Update Ollama AI Provider to Support Parsing "Thinking" Content From New Message Schema (#4587)
* add className prop to OllamaAILLM * Enhance `OllamaAILLM.handleStream` to support parsing thinking content from the `message.thinking` property. * refactor thinking property handler patched ollama `@agent` flow calls --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
parent
c169193fc4
commit
7a7ec969d7
@ -8,6 +8,7 @@ const {
|
||||
LLMPerformanceMonitor,
|
||||
} = require("../../helpers/chat/LLMPerformanceMonitor");
|
||||
const { Ollama } = require("ollama");
|
||||
const { v4: uuidv4 } = require("uuid");
|
||||
|
||||
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
|
||||
class OllamaAILLM {
|
||||
@ -18,6 +19,7 @@ class OllamaAILLM {
|
||||
if (!process.env.OLLAMA_BASE_PATH)
|
||||
throw new Error("No Ollama Base Path was set.");
|
||||
|
||||
this.className = "OllamaAILLM";
|
||||
this.authToken = process.env.OLLAMA_AUTH_TOKEN;
|
||||
this.basePath = process.env.OLLAMA_BASE_PATH;
|
||||
this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
|
||||
@ -257,8 +259,11 @@ class OllamaAILLM {
|
||||
},
|
||||
})
|
||||
.then((res) => {
|
||||
let content = res.message.content;
|
||||
if (res.message.thinking)
|
||||
content = `<think>${res.message.thinking}</think>${content}`;
|
||||
return {
|
||||
content: res.message.content,
|
||||
content,
|
||||
usage: {
|
||||
prompt_tokens: res.prompt_eval_count,
|
||||
completion_tokens: res.eval_count,
|
||||
@ -326,6 +331,7 @@ class OllamaAILLM {
|
||||
|
||||
return new Promise(async (resolve) => {
|
||||
let fullText = "";
|
||||
let reasoningText = "";
|
||||
let usage = {
|
||||
prompt_tokens: 0,
|
||||
completion_tokens: 0,
|
||||
@ -367,8 +373,50 @@ class OllamaAILLM {
|
||||
}
|
||||
|
||||
if (chunk.hasOwnProperty("message")) {
|
||||
// As of Ollama v0.9.0+, thinking content comes in a separate property
|
||||
// in the response object. If it exists, we need to handle it separately by wrapping it in <think> tags.
|
||||
const content = chunk.message.content;
|
||||
fullText += content;
|
||||
const reasoningToken = chunk.message.thinking;
|
||||
|
||||
if (reasoningToken) {
|
||||
if (reasoningText.length === 0) {
|
||||
const startTag = "<think>";
|
||||
writeResponseChunk(response, {
|
||||
uuid,
|
||||
sources,
|
||||
type: "textResponseChunk",
|
||||
textResponse: startTag + reasoningToken,
|
||||
close: false,
|
||||
error: false,
|
||||
});
|
||||
reasoningText += startTag + reasoningToken;
|
||||
} else {
|
||||
writeResponseChunk(response, {
|
||||
uuid,
|
||||
sources,
|
||||
type: "textResponseChunk",
|
||||
textResponse: reasoningToken,
|
||||
close: false,
|
||||
error: false,
|
||||
});
|
||||
reasoningText += reasoningToken;
|
||||
}
|
||||
} else if (content.length > 0) {
|
||||
// If we have reasoning text, we need to close the reasoning tag and then append the content.
|
||||
if (reasoningText.length > 0) {
|
||||
const endTag = "</think>";
|
||||
writeResponseChunk(response, {
|
||||
uuid,
|
||||
sources,
|
||||
type: "textResponseChunk",
|
||||
textResponse: endTag,
|
||||
close: false,
|
||||
error: false,
|
||||
});
|
||||
fullText += reasoningText + endTag;
|
||||
reasoningText = ""; // Reset reasoning buffer
|
||||
}
|
||||
fullText += content; // Append regular text
|
||||
writeResponseChunk(response, {
|
||||
uuid,
|
||||
sources,
|
||||
@ -379,6 +427,7 @@ class OllamaAILLM {
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
writeResponseChunk(response, {
|
||||
uuid,
|
||||
@ -386,8 +435,7 @@ class OllamaAILLM {
|
||||
type: "textResponseChunk",
|
||||
textResponse: "",
|
||||
close: true,
|
||||
error: `Ollama:streaming - could not stream chat. ${
|
||||
error?.cause ?? error.message
|
||||
error: `Ollama:streaming - could not stream chat. ${error?.cause ?? error.message
|
||||
}`,
|
||||
});
|
||||
response.removeListener("close", handleAbort);
|
||||
|
||||
@ -88,7 +88,9 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
|
||||
if (history[history.length - 1].role !== "user") return null;
|
||||
|
||||
const msgUUID = v4();
|
||||
let token = "";
|
||||
let textResponse = "";
|
||||
let reasoningText = "";
|
||||
const historyMessages = this.buildToolCallMessages(history, functions);
|
||||
const stream = await chatCb({ messages: historyMessages });
|
||||
|
||||
@ -99,17 +101,32 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
if (
|
||||
!chunk.hasOwnProperty("message") ||
|
||||
!chunk.message.hasOwnProperty("content")
|
||||
)
|
||||
continue;
|
||||
if (!chunk.hasOwnProperty("message")) continue;
|
||||
|
||||
const content = chunk.message?.content;
|
||||
const reasoningToken = chunk.message?.thinking;
|
||||
if (reasoningToken) {
|
||||
if (reasoningText.length === 0) {
|
||||
reasoningText = `Thinking:\n\n${reasoningToken}`;
|
||||
token = reasoningText;
|
||||
} else {
|
||||
reasoningText += reasoningToken;
|
||||
token = reasoningToken;
|
||||
}
|
||||
} else if (content.length > 0) {
|
||||
if (reasoningText.length > 0) {
|
||||
token = `\n\nDone thinking.\n\n${content}`;
|
||||
reasoningText = "";
|
||||
} else {
|
||||
token = content;
|
||||
}
|
||||
textResponse += content;
|
||||
}
|
||||
|
||||
textResponse += chunk.message.content;
|
||||
eventHandler?.("reportStreamEvent", {
|
||||
type: "statusResponse",
|
||||
uuid: msgUUID,
|
||||
content: chunk.message.content,
|
||||
content: token,
|
||||
});
|
||||
}
|
||||
|
||||
@ -225,23 +242,39 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
|
||||
);
|
||||
const msgUUID = v4();
|
||||
completion = { content: "" };
|
||||
let reasoningText = "";
|
||||
let token = "";
|
||||
const stream = await this.#handleFunctionCallStream({
|
||||
messages: this.cleanMsgs(messages),
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
if (
|
||||
!chunk.hasOwnProperty("message") ||
|
||||
!chunk.message.hasOwnProperty("content")
|
||||
)
|
||||
continue;
|
||||
if (!chunk.hasOwnProperty("message")) continue;
|
||||
|
||||
const delta = chunk.message.content;
|
||||
completion.content += delta;
|
||||
const content = chunk.message?.content;
|
||||
const reasoningToken = chunk.message?.thinking;
|
||||
if (reasoningToken) {
|
||||
if (reasoningText.length === 0) {
|
||||
reasoningText = `<think>${reasoningToken}`;
|
||||
token = `<think>${reasoningToken}`;
|
||||
} else {
|
||||
reasoningText += reasoningToken;
|
||||
token = reasoningToken;
|
||||
}
|
||||
} else if (content.length > 0) {
|
||||
if (reasoningText.length > 0) {
|
||||
token = `</think>${content}`;
|
||||
reasoningText = "";
|
||||
} else {
|
||||
token = content;
|
||||
}
|
||||
}
|
||||
|
||||
completion.content += token;
|
||||
eventHandler?.("reportStreamEvent", {
|
||||
type: "textResponseChunk",
|
||||
uuid: msgUUID,
|
||||
content: delta,
|
||||
content: token,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user