Update Ollama AI Provider to Support Parsing "Thinking" Content From New Message Schema (#4587)

* add className prop to OllamaAILLM

* Enhance `OllamaAILLM.handleStream` to support parsing thinking content from the `message.thinking` property.

* refactor thinking property handler
patched ollama `@agent` flow calls

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
Marcello Fitton 2025-11-20 15:39:17 -08:00 committed by GitHub
parent c169193fc4
commit 7a7ec969d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 109 additions and 28 deletions

View File

@ -8,6 +8,7 @@ const {
LLMPerformanceMonitor,
} = require("../../helpers/chat/LLMPerformanceMonitor");
const { Ollama } = require("ollama");
const { v4: uuidv4 } = require("uuid");
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
class OllamaAILLM {
@ -18,6 +19,7 @@ class OllamaAILLM {
if (!process.env.OLLAMA_BASE_PATH)
throw new Error("No Ollama Base Path was set.");
this.className = "OllamaAILLM";
this.authToken = process.env.OLLAMA_AUTH_TOKEN;
this.basePath = process.env.OLLAMA_BASE_PATH;
this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
@ -257,8 +259,11 @@ class OllamaAILLM {
},
})
.then((res) => {
let content = res.message.content;
if (res.message.thinking)
content = `<think>${res.message.thinking}</think>${content}`;
return {
content: res.message.content,
content,
usage: {
prompt_tokens: res.prompt_eval_count,
completion_tokens: res.eval_count,
@ -326,6 +331,7 @@ class OllamaAILLM {
return new Promise(async (resolve) => {
let fullText = "";
let reasoningText = "";
let usage = {
prompt_tokens: 0,
completion_tokens: 0,
@ -367,8 +373,50 @@ class OllamaAILLM {
}
if (chunk.hasOwnProperty("message")) {
// As of Ollama v0.9.0+, thinking content comes in a separate property
// in the response object. If it exists, we need to handle it separately by wrapping it in <think> tags.
const content = chunk.message.content;
fullText += content;
const reasoningToken = chunk.message.thinking;
if (reasoningToken) {
if (reasoningText.length === 0) {
const startTag = "<think>";
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: startTag + reasoningToken,
close: false,
error: false,
});
reasoningText += startTag + reasoningToken;
} else {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: reasoningToken,
close: false,
error: false,
});
reasoningText += reasoningToken;
}
} else if (content.length > 0) {
// If we have reasoning text, we need to close the reasoning tag and then append the content.
if (reasoningText.length > 0) {
const endTag = "</think>";
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: endTag,
close: false,
error: false,
});
fullText += reasoningText + endTag;
reasoningText = ""; // Reset reasoning buffer
}
fullText += content; // Append regular text
writeResponseChunk(response, {
uuid,
sources,
@ -379,6 +427,7 @@ class OllamaAILLM {
});
}
}
}
} catch (error) {
writeResponseChunk(response, {
uuid,
@ -386,8 +435,7 @@ class OllamaAILLM {
type: "textResponseChunk",
textResponse: "",
close: true,
error: `Ollama:streaming - could not stream chat. ${
error?.cause ?? error.message
error: `Ollama:streaming - could not stream chat. ${error?.cause ?? error.message
}`,
});
response.removeListener("close", handleAbort);

View File

@ -88,7 +88,9 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
if (history[history.length - 1].role !== "user") return null;
const msgUUID = v4();
let token = "";
let textResponse = "";
let reasoningText = "";
const historyMessages = this.buildToolCallMessages(history, functions);
const stream = await chatCb({ messages: historyMessages });
@ -99,17 +101,32 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
});
for await (const chunk of stream) {
if (
!chunk.hasOwnProperty("message") ||
!chunk.message.hasOwnProperty("content")
)
continue;
if (!chunk.hasOwnProperty("message")) continue;
const content = chunk.message?.content;
const reasoningToken = chunk.message?.thinking;
if (reasoningToken) {
if (reasoningText.length === 0) {
reasoningText = `Thinking:\n\n${reasoningToken}`;
token = reasoningText;
} else {
reasoningText += reasoningToken;
token = reasoningToken;
}
} else if (content.length > 0) {
if (reasoningText.length > 0) {
token = `\n\nDone thinking.\n\n${content}`;
reasoningText = "";
} else {
token = content;
}
textResponse += content;
}
textResponse += chunk.message.content;
eventHandler?.("reportStreamEvent", {
type: "statusResponse",
uuid: msgUUID,
content: chunk.message.content,
content: token,
});
}
@ -225,23 +242,39 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
);
const msgUUID = v4();
completion = { content: "" };
let reasoningText = "";
let token = "";
const stream = await this.#handleFunctionCallStream({
messages: this.cleanMsgs(messages),
});
for await (const chunk of stream) {
if (
!chunk.hasOwnProperty("message") ||
!chunk.message.hasOwnProperty("content")
)
continue;
if (!chunk.hasOwnProperty("message")) continue;
const delta = chunk.message.content;
completion.content += delta;
const content = chunk.message?.content;
const reasoningToken = chunk.message?.thinking;
if (reasoningToken) {
if (reasoningText.length === 0) {
reasoningText = `<think>${reasoningToken}`;
token = `<think>${reasoningToken}`;
} else {
reasoningText += reasoningToken;
token = reasoningToken;
}
} else if (content.length > 0) {
if (reasoningText.length > 0) {
token = `</think>${content}`;
reasoningText = "";
} else {
token = content;
}
}
completion.content += token;
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
uuid: msgUUID,
content: delta,
content: token,
});
}
}