Update Ollama AI Provider to Support Parsing "Thinking" Content From New Message Schema (#4587)

* add className prop to OllamaAILLM * Enhance `OllamaAILLM.handleStream` to support parsing thinking content from the `message.thinking` property. * refactor thinking property handler patched ollama `@agent` flow calls --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
2025-11-20 15:39:17 -08:00 · 2025-11-20 15:39:17 -08:00 · 7a7ec969d7
commit 7a7ec969d7
parent c169193fc4
2 changed files with 109 additions and 28 deletions
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@ -8,6 +8,7 @@ const {
  LLMPerformanceMonitor,
 } = require("../../helpers/chat/LLMPerformanceMonitor");
 const { Ollama } = require("ollama");
 const { v4: uuidv4 } = require("uuid");
 // Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
 class OllamaAILLM {
@ -18,6 +19,7 @@ class OllamaAILLM {
    if (!process.env.OLLAMA_BASE_PATH)
      throw new Error("No Ollama Base Path was set.");
    this.className = "OllamaAILLM";
    this.authToken = process.env.OLLAMA_AUTH_TOKEN;
    this.basePath = process.env.OLLAMA_BASE_PATH;
    this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
@ -257,8 +259,11 @@ class OllamaAILLM {
          },
        })
        .then((res) => {
          let content = res.message.content;
          if (res.message.thinking)
            content = `<think>${res.message.thinking}</think>${content}`;
          return {
-            content: res.message.content,
+            content,
            usage: {
              prompt_tokens: res.prompt_eval_count,
              completion_tokens: res.eval_count,
@ -326,6 +331,7 @@ class OllamaAILLM {
    return new Promise(async (resolve) => {
      let fullText = "";
      let reasoningText = "";
      let usage = {
        prompt_tokens: 0,
        completion_tokens: 0,
@ -367,16 +373,59 @@ class OllamaAILLM {
          }
          if (chunk.hasOwnProperty("message")) {
            // As of Ollama v0.9.0+, thinking content comes in a separate property
            // in the response object. If it exists, we need to handle it separately by wrapping it in <think> tags.
            const content = chunk.message.content;
-            fullText += content;
+            const reasoningToken = chunk.message.thinking;
-            writeResponseChunk(response, {
+
-              uuid,
+            if (reasoningToken) {
-              sources,
+              if (reasoningText.length === 0) {
-              type: "textResponseChunk",
+                const startTag = "<think>";
-              textResponse: content,
+                writeResponseChunk(response, {
-              close: false,
+                  uuid,
-              error: false,
+                  sources,
-            });
+                  type: "textResponseChunk",
                  textResponse: startTag + reasoningToken,
                  close: false,
                  error: false,
                });
                reasoningText += startTag + reasoningToken;
              } else {
                writeResponseChunk(response, {
                  uuid,
                  sources,
                  type: "textResponseChunk",
                  textResponse: reasoningToken,
                  close: false,
                  error: false,
                });
                reasoningText += reasoningToken;
              }
            } else if (content.length > 0) {
              // If we have reasoning text, we need to close the reasoning tag and then append the content.
              if (reasoningText.length > 0) {
                const endTag = "</think>";
                writeResponseChunk(response, {
                  uuid,
                  sources,
                  type: "textResponseChunk",
                  textResponse: endTag,
                  close: false,
                  error: false,
                });
                fullText += reasoningText + endTag;
                reasoningText = ""; // Reset reasoning buffer
              }
              fullText += content; // Append regular text
              writeResponseChunk(response, {
                uuid,
                sources,
                type: "textResponseChunk",
                textResponse: content,
                close: false,
                error: false,
              });
            }
          }
        }
      } catch (error) {
@ -386,9 +435,8 @@ class OllamaAILLM {
          type: "textResponseChunk",
          textResponse: "",
          close: true,
-          error: `Ollama:streaming - could not stream chat. ${
+          error: `Ollama:streaming - could not stream chat. ${error?.cause ?? error.message
-            error?.cause ?? error.message
+            }`,
          }`,
        });
        response.removeListener("close", handleAbort);
        stream?.endMeasurement(usage);
--- a/server/utils/agents/aibitat/providers/ollama.js
+++ b/server/utils/agents/aibitat/providers/ollama.js
@ -88,7 +88,9 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
    if (history[history.length - 1].role !== "user") return null;
    const msgUUID = v4();
    let token = "";
    let textResponse = "";
    let reasoningText = "";
    const historyMessages = this.buildToolCallMessages(history, functions);
    const stream = await chatCb({ messages: historyMessages });
@ -99,17 +101,32 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
    });
    for await (const chunk of stream) {
-      if (
+      if (!chunk.hasOwnProperty("message")) continue;
-        !chunk.hasOwnProperty("message") ||
+
-        !chunk.message.hasOwnProperty("content")
+      const content = chunk.message?.content;
-      )
+      const reasoningToken = chunk.message?.thinking;
-        continue;
+      if (reasoningToken) {
        if (reasoningText.length === 0) {
          reasoningText = `Thinking:\n\n${reasoningToken}`;
          token = reasoningText;
        } else {
          reasoningText += reasoningToken;
          token = reasoningToken;
        }
      } else if (content.length > 0) {
        if (reasoningText.length > 0) {
          token = `\n\nDone thinking.\n\n${content}`;
          reasoningText = "";
        } else {
          token = content;
        }
        textResponse += content;
      }
      textResponse += chunk.message.content;
      eventHandler?.("reportStreamEvent", {
        type: "statusResponse",
        uuid: msgUUID,
-        content: chunk.message.content,
+        content: token,
      });
    }
@ -225,23 +242,39 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
        );
        const msgUUID = v4();
        completion = { content: "" };
        let reasoningText = "";
        let token = "";
        const stream = await this.#handleFunctionCallStream({
          messages: this.cleanMsgs(messages),
        });
        for await (const chunk of stream) {
-          if (
+          if (!chunk.hasOwnProperty("message")) continue;
            !chunk.hasOwnProperty("message") ||
            !chunk.message.hasOwnProperty("content")
          )
            continue;
-          const delta = chunk.message.content;
+          const content = chunk.message?.content;
-          completion.content += delta;
+          const reasoningToken = chunk.message?.thinking;
          if (reasoningToken) {
            if (reasoningText.length === 0) {
              reasoningText = `<think>${reasoningToken}`;
              token = `<think>${reasoningToken}`;
            } else {
              reasoningText += reasoningToken;
              token = reasoningToken;
            }
          } else if (content.length > 0) {
            if (reasoningText.length > 0) {
              token = `</think>${content}`;
              reasoningText = "";
            } else {
              token = content;
            }
          }
          completion.content += token;
          eventHandler?.("reportStreamEvent", {
            type: "textResponseChunk",
            uuid: msgUUID,
-            content: delta,
+            content: token,
          });
        }
      }