Update Ollama AI Provider to Support Parsing "Thinking" Content From New Message Schema (#4587)

* add className prop to OllamaAILLM

* Enhance `OllamaAILLM.handleStream` to support parsing thinking content from the `message.thinking` property.

* refactor thinking property handler
patched ollama `@agent` flow calls

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
Marcello Fitton 2025-11-20 15:39:17 -08:00 committed by GitHub
parent c169193fc4
commit 7a7ec969d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 109 additions and 28 deletions

View File

@ -8,6 +8,7 @@ const {
LLMPerformanceMonitor, LLMPerformanceMonitor,
} = require("../../helpers/chat/LLMPerformanceMonitor"); } = require("../../helpers/chat/LLMPerformanceMonitor");
const { Ollama } = require("ollama"); const { Ollama } = require("ollama");
const { v4: uuidv4 } = require("uuid");
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md // Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
class OllamaAILLM { class OllamaAILLM {
@ -18,6 +19,7 @@ class OllamaAILLM {
if (!process.env.OLLAMA_BASE_PATH) if (!process.env.OLLAMA_BASE_PATH)
throw new Error("No Ollama Base Path was set."); throw new Error("No Ollama Base Path was set.");
this.className = "OllamaAILLM";
this.authToken = process.env.OLLAMA_AUTH_TOKEN; this.authToken = process.env.OLLAMA_AUTH_TOKEN;
this.basePath = process.env.OLLAMA_BASE_PATH; this.basePath = process.env.OLLAMA_BASE_PATH;
this.model = modelPreference || process.env.OLLAMA_MODEL_PREF; this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
@ -257,8 +259,11 @@ class OllamaAILLM {
}, },
}) })
.then((res) => { .then((res) => {
let content = res.message.content;
if (res.message.thinking)
content = `<think>${res.message.thinking}</think>${content}`;
return { return {
content: res.message.content, content,
usage: { usage: {
prompt_tokens: res.prompt_eval_count, prompt_tokens: res.prompt_eval_count,
completion_tokens: res.eval_count, completion_tokens: res.eval_count,
@ -326,6 +331,7 @@ class OllamaAILLM {
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
let fullText = ""; let fullText = "";
let reasoningText = "";
let usage = { let usage = {
prompt_tokens: 0, prompt_tokens: 0,
completion_tokens: 0, completion_tokens: 0,
@ -367,16 +373,59 @@ class OllamaAILLM {
} }
if (chunk.hasOwnProperty("message")) { if (chunk.hasOwnProperty("message")) {
// As of Ollama v0.9.0+, thinking content comes in a separate property
// in the response object. If it exists, we need to handle it separately by wrapping it in <think> tags.
const content = chunk.message.content; const content = chunk.message.content;
fullText += content; const reasoningToken = chunk.message.thinking;
writeResponseChunk(response, {
uuid, if (reasoningToken) {
sources, if (reasoningText.length === 0) {
type: "textResponseChunk", const startTag = "<think>";
textResponse: content, writeResponseChunk(response, {
close: false, uuid,
error: false, sources,
}); type: "textResponseChunk",
textResponse: startTag + reasoningToken,
close: false,
error: false,
});
reasoningText += startTag + reasoningToken;
} else {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: reasoningToken,
close: false,
error: false,
});
reasoningText += reasoningToken;
}
} else if (content.length > 0) {
// If we have reasoning text, we need to close the reasoning tag and then append the content.
if (reasoningText.length > 0) {
const endTag = "</think>";
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: endTag,
close: false,
error: false,
});
fullText += reasoningText + endTag;
reasoningText = ""; // Reset reasoning buffer
}
fullText += content; // Append regular text
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: content,
close: false,
error: false,
});
}
} }
} }
} catch (error) { } catch (error) {
@ -386,9 +435,8 @@ class OllamaAILLM {
type: "textResponseChunk", type: "textResponseChunk",
textResponse: "", textResponse: "",
close: true, close: true,
error: `Ollama:streaming - could not stream chat. ${ error: `Ollama:streaming - could not stream chat. ${error?.cause ?? error.message
error?.cause ?? error.message }`,
}`,
}); });
response.removeListener("close", handleAbort); response.removeListener("close", handleAbort);
stream?.endMeasurement(usage); stream?.endMeasurement(usage);

View File

@ -88,7 +88,9 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
if (history[history.length - 1].role !== "user") return null; if (history[history.length - 1].role !== "user") return null;
const msgUUID = v4(); const msgUUID = v4();
let token = "";
let textResponse = ""; let textResponse = "";
let reasoningText = "";
const historyMessages = this.buildToolCallMessages(history, functions); const historyMessages = this.buildToolCallMessages(history, functions);
const stream = await chatCb({ messages: historyMessages }); const stream = await chatCb({ messages: historyMessages });
@ -99,17 +101,32 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
}); });
for await (const chunk of stream) { for await (const chunk of stream) {
if ( if (!chunk.hasOwnProperty("message")) continue;
!chunk.hasOwnProperty("message") ||
!chunk.message.hasOwnProperty("content") const content = chunk.message?.content;
) const reasoningToken = chunk.message?.thinking;
continue; if (reasoningToken) {
if (reasoningText.length === 0) {
reasoningText = `Thinking:\n\n${reasoningToken}`;
token = reasoningText;
} else {
reasoningText += reasoningToken;
token = reasoningToken;
}
} else if (content.length > 0) {
if (reasoningText.length > 0) {
token = `\n\nDone thinking.\n\n${content}`;
reasoningText = "";
} else {
token = content;
}
textResponse += content;
}
textResponse += chunk.message.content;
eventHandler?.("reportStreamEvent", { eventHandler?.("reportStreamEvent", {
type: "statusResponse", type: "statusResponse",
uuid: msgUUID, uuid: msgUUID,
content: chunk.message.content, content: token,
}); });
} }
@ -225,23 +242,39 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
); );
const msgUUID = v4(); const msgUUID = v4();
completion = { content: "" }; completion = { content: "" };
let reasoningText = "";
let token = "";
const stream = await this.#handleFunctionCallStream({ const stream = await this.#handleFunctionCallStream({
messages: this.cleanMsgs(messages), messages: this.cleanMsgs(messages),
}); });
for await (const chunk of stream) { for await (const chunk of stream) {
if ( if (!chunk.hasOwnProperty("message")) continue;
!chunk.hasOwnProperty("message") ||
!chunk.message.hasOwnProperty("content")
)
continue;
const delta = chunk.message.content; const content = chunk.message?.content;
completion.content += delta; const reasoningToken = chunk.message?.thinking;
if (reasoningToken) {
if (reasoningText.length === 0) {
reasoningText = `<think>${reasoningToken}`;
token = `<think>${reasoningToken}`;
} else {
reasoningText += reasoningToken;
token = reasoningToken;
}
} else if (content.length > 0) {
if (reasoningText.length > 0) {
token = `</think>${content}`;
reasoningText = "";
} else {
token = content;
}
}
completion.content += token;
eventHandler?.("reportStreamEvent", { eventHandler?.("reportStreamEvent", {
type: "textResponseChunk", type: "textResponseChunk",
uuid: msgUUID, uuid: msgUUID,
content: delta, content: token,
}); });
} }
} }