Update Ollama AI Provider to Support Parsing "Thinking" Content From New Message Schema (#4587)
* add className prop to OllamaAILLM * Enhance `OllamaAILLM.handleStream` to support parsing thinking content from the `message.thinking` property. * refactor thinking property handler patched ollama `@agent` flow calls --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
parent
c169193fc4
commit
7a7ec969d7
@ -8,6 +8,7 @@ const {
|
|||||||
LLMPerformanceMonitor,
|
LLMPerformanceMonitor,
|
||||||
} = require("../../helpers/chat/LLMPerformanceMonitor");
|
} = require("../../helpers/chat/LLMPerformanceMonitor");
|
||||||
const { Ollama } = require("ollama");
|
const { Ollama } = require("ollama");
|
||||||
|
const { v4: uuidv4 } = require("uuid");
|
||||||
|
|
||||||
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
|
// Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
|
||||||
class OllamaAILLM {
|
class OllamaAILLM {
|
||||||
@ -18,6 +19,7 @@ class OllamaAILLM {
|
|||||||
if (!process.env.OLLAMA_BASE_PATH)
|
if (!process.env.OLLAMA_BASE_PATH)
|
||||||
throw new Error("No Ollama Base Path was set.");
|
throw new Error("No Ollama Base Path was set.");
|
||||||
|
|
||||||
|
this.className = "OllamaAILLM";
|
||||||
this.authToken = process.env.OLLAMA_AUTH_TOKEN;
|
this.authToken = process.env.OLLAMA_AUTH_TOKEN;
|
||||||
this.basePath = process.env.OLLAMA_BASE_PATH;
|
this.basePath = process.env.OLLAMA_BASE_PATH;
|
||||||
this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
|
this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
|
||||||
@ -257,8 +259,11 @@ class OllamaAILLM {
|
|||||||
},
|
},
|
||||||
})
|
})
|
||||||
.then((res) => {
|
.then((res) => {
|
||||||
|
let content = res.message.content;
|
||||||
|
if (res.message.thinking)
|
||||||
|
content = `<think>${res.message.thinking}</think>${content}`;
|
||||||
return {
|
return {
|
||||||
content: res.message.content,
|
content,
|
||||||
usage: {
|
usage: {
|
||||||
prompt_tokens: res.prompt_eval_count,
|
prompt_tokens: res.prompt_eval_count,
|
||||||
completion_tokens: res.eval_count,
|
completion_tokens: res.eval_count,
|
||||||
@ -326,6 +331,7 @@ class OllamaAILLM {
|
|||||||
|
|
||||||
return new Promise(async (resolve) => {
|
return new Promise(async (resolve) => {
|
||||||
let fullText = "";
|
let fullText = "";
|
||||||
|
let reasoningText = "";
|
||||||
let usage = {
|
let usage = {
|
||||||
prompt_tokens: 0,
|
prompt_tokens: 0,
|
||||||
completion_tokens: 0,
|
completion_tokens: 0,
|
||||||
@ -367,16 +373,59 @@ class OllamaAILLM {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (chunk.hasOwnProperty("message")) {
|
if (chunk.hasOwnProperty("message")) {
|
||||||
|
// As of Ollama v0.9.0+, thinking content comes in a separate property
|
||||||
|
// in the response object. If it exists, we need to handle it separately by wrapping it in <think> tags.
|
||||||
const content = chunk.message.content;
|
const content = chunk.message.content;
|
||||||
fullText += content;
|
const reasoningToken = chunk.message.thinking;
|
||||||
writeResponseChunk(response, {
|
|
||||||
uuid,
|
if (reasoningToken) {
|
||||||
sources,
|
if (reasoningText.length === 0) {
|
||||||
type: "textResponseChunk",
|
const startTag = "<think>";
|
||||||
textResponse: content,
|
writeResponseChunk(response, {
|
||||||
close: false,
|
uuid,
|
||||||
error: false,
|
sources,
|
||||||
});
|
type: "textResponseChunk",
|
||||||
|
textResponse: startTag + reasoningToken,
|
||||||
|
close: false,
|
||||||
|
error: false,
|
||||||
|
});
|
||||||
|
reasoningText += startTag + reasoningToken;
|
||||||
|
} else {
|
||||||
|
writeResponseChunk(response, {
|
||||||
|
uuid,
|
||||||
|
sources,
|
||||||
|
type: "textResponseChunk",
|
||||||
|
textResponse: reasoningToken,
|
||||||
|
close: false,
|
||||||
|
error: false,
|
||||||
|
});
|
||||||
|
reasoningText += reasoningToken;
|
||||||
|
}
|
||||||
|
} else if (content.length > 0) {
|
||||||
|
// If we have reasoning text, we need to close the reasoning tag and then append the content.
|
||||||
|
if (reasoningText.length > 0) {
|
||||||
|
const endTag = "</think>";
|
||||||
|
writeResponseChunk(response, {
|
||||||
|
uuid,
|
||||||
|
sources,
|
||||||
|
type: "textResponseChunk",
|
||||||
|
textResponse: endTag,
|
||||||
|
close: false,
|
||||||
|
error: false,
|
||||||
|
});
|
||||||
|
fullText += reasoningText + endTag;
|
||||||
|
reasoningText = ""; // Reset reasoning buffer
|
||||||
|
}
|
||||||
|
fullText += content; // Append regular text
|
||||||
|
writeResponseChunk(response, {
|
||||||
|
uuid,
|
||||||
|
sources,
|
||||||
|
type: "textResponseChunk",
|
||||||
|
textResponse: content,
|
||||||
|
close: false,
|
||||||
|
error: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@ -386,9 +435,8 @@ class OllamaAILLM {
|
|||||||
type: "textResponseChunk",
|
type: "textResponseChunk",
|
||||||
textResponse: "",
|
textResponse: "",
|
||||||
close: true,
|
close: true,
|
||||||
error: `Ollama:streaming - could not stream chat. ${
|
error: `Ollama:streaming - could not stream chat. ${error?.cause ?? error.message
|
||||||
error?.cause ?? error.message
|
}`,
|
||||||
}`,
|
|
||||||
});
|
});
|
||||||
response.removeListener("close", handleAbort);
|
response.removeListener("close", handleAbort);
|
||||||
stream?.endMeasurement(usage);
|
stream?.endMeasurement(usage);
|
||||||
|
|||||||
@ -88,7 +88,9 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
|
|||||||
if (history[history.length - 1].role !== "user") return null;
|
if (history[history.length - 1].role !== "user") return null;
|
||||||
|
|
||||||
const msgUUID = v4();
|
const msgUUID = v4();
|
||||||
|
let token = "";
|
||||||
let textResponse = "";
|
let textResponse = "";
|
||||||
|
let reasoningText = "";
|
||||||
const historyMessages = this.buildToolCallMessages(history, functions);
|
const historyMessages = this.buildToolCallMessages(history, functions);
|
||||||
const stream = await chatCb({ messages: historyMessages });
|
const stream = await chatCb({ messages: historyMessages });
|
||||||
|
|
||||||
@ -99,17 +101,32 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
if (
|
if (!chunk.hasOwnProperty("message")) continue;
|
||||||
!chunk.hasOwnProperty("message") ||
|
|
||||||
!chunk.message.hasOwnProperty("content")
|
const content = chunk.message?.content;
|
||||||
)
|
const reasoningToken = chunk.message?.thinking;
|
||||||
continue;
|
if (reasoningToken) {
|
||||||
|
if (reasoningText.length === 0) {
|
||||||
|
reasoningText = `Thinking:\n\n${reasoningToken}`;
|
||||||
|
token = reasoningText;
|
||||||
|
} else {
|
||||||
|
reasoningText += reasoningToken;
|
||||||
|
token = reasoningToken;
|
||||||
|
}
|
||||||
|
} else if (content.length > 0) {
|
||||||
|
if (reasoningText.length > 0) {
|
||||||
|
token = `\n\nDone thinking.\n\n${content}`;
|
||||||
|
reasoningText = "";
|
||||||
|
} else {
|
||||||
|
token = content;
|
||||||
|
}
|
||||||
|
textResponse += content;
|
||||||
|
}
|
||||||
|
|
||||||
textResponse += chunk.message.content;
|
|
||||||
eventHandler?.("reportStreamEvent", {
|
eventHandler?.("reportStreamEvent", {
|
||||||
type: "statusResponse",
|
type: "statusResponse",
|
||||||
uuid: msgUUID,
|
uuid: msgUUID,
|
||||||
content: chunk.message.content,
|
content: token,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -225,23 +242,39 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
|
|||||||
);
|
);
|
||||||
const msgUUID = v4();
|
const msgUUID = v4();
|
||||||
completion = { content: "" };
|
completion = { content: "" };
|
||||||
|
let reasoningText = "";
|
||||||
|
let token = "";
|
||||||
const stream = await this.#handleFunctionCallStream({
|
const stream = await this.#handleFunctionCallStream({
|
||||||
messages: this.cleanMsgs(messages),
|
messages: this.cleanMsgs(messages),
|
||||||
});
|
});
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
if (
|
if (!chunk.hasOwnProperty("message")) continue;
|
||||||
!chunk.hasOwnProperty("message") ||
|
|
||||||
!chunk.message.hasOwnProperty("content")
|
|
||||||
)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const delta = chunk.message.content;
|
const content = chunk.message?.content;
|
||||||
completion.content += delta;
|
const reasoningToken = chunk.message?.thinking;
|
||||||
|
if (reasoningToken) {
|
||||||
|
if (reasoningText.length === 0) {
|
||||||
|
reasoningText = `<think>${reasoningToken}`;
|
||||||
|
token = `<think>${reasoningToken}`;
|
||||||
|
} else {
|
||||||
|
reasoningText += reasoningToken;
|
||||||
|
token = reasoningToken;
|
||||||
|
}
|
||||||
|
} else if (content.length > 0) {
|
||||||
|
if (reasoningText.length > 0) {
|
||||||
|
token = `</think>${content}`;
|
||||||
|
reasoningText = "";
|
||||||
|
} else {
|
||||||
|
token = content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
completion.content += token;
|
||||||
eventHandler?.("reportStreamEvent", {
|
eventHandler?.("reportStreamEvent", {
|
||||||
type: "textResponseChunk",
|
type: "textResponseChunk",
|
||||||
uuid: msgUUID,
|
uuid: msgUUID,
|
||||||
content: delta,
|
content: token,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user