Update Ollama AI Provider to Support Parsing "Thinking" Content From New Message Schema (#4587)

* add className prop to OllamaAILLM * Enhance `OllamaAILLM.handleStream` to support parsing thinking content from the `message.thinking` property. * refactor thinking property handler patched ollama `@agent` flow calls --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
2025-11-20 15:39:17 -08:00 · 2025-11-20 15:39:17 -08:00 · 7a7ec969d7
commit 7a7ec969d7
parent c169193fc4
2 changed files with 109 additions and 28 deletions
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@ -8,6 +8,7 @@ const {
  LLMPerformanceMonitor,
 } = require("../../helpers/chat/LLMPerformanceMonitor");
 const { Ollama } = require("ollama");
+const { v4: uuidv4 } = require("uuid");

 // Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
 class OllamaAILLM {
@ -18,6 +19,7 @@ class OllamaAILLM {
    if (!process.env.OLLAMA_BASE_PATH)
      throw new Error("No Ollama Base Path was set.");

+    this.className = "OllamaAILLM";
    this.authToken = process.env.OLLAMA_AUTH_TOKEN;
    this.basePath = process.env.OLLAMA_BASE_PATH;
    this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
@ -257,8 +259,11 @@ class OllamaAILLM {
          },
        })
        .then((res) => {
+          let content = res.message.content;
+          if (res.message.thinking)
+            content = `<think>${res.message.thinking}</think>${content}`;
          return {
-            content: res.message.content,
+            content,
            usage: {
              prompt_tokens: res.prompt_eval_count,
              completion_tokens: res.eval_count,
@ -326,6 +331,7 @@ class OllamaAILLM {

    return new Promise(async (resolve) => {
      let fullText = "";
+      let reasoningText = "";
      let usage = {
        prompt_tokens: 0,
        completion_tokens: 0,
@ -367,8 +373,50 @@ class OllamaAILLM {
          }

          if (chunk.hasOwnProperty("message")) {
+            // As of Ollama v0.9.0+, thinking content comes in a separate property
+            // in the response object. If it exists, we need to handle it separately by wrapping it in <think> tags.
            const content = chunk.message.content;
-            fullText += content;
+            const reasoningToken = chunk.message.thinking;
+
+            if (reasoningToken) {
+              if (reasoningText.length === 0) {
+                const startTag = "<think>";
+                writeResponseChunk(response, {
+                  uuid,
+                  sources,
+                  type: "textResponseChunk",
+                  textResponse: startTag + reasoningToken,
+                  close: false,
+                  error: false,
+                });
+                reasoningText += startTag + reasoningToken;
+              } else {
+                writeResponseChunk(response, {
+                  uuid,
+                  sources,
+                  type: "textResponseChunk",
+                  textResponse: reasoningToken,
+                  close: false,
+                  error: false,
+                });
+                reasoningText += reasoningToken;
+              }
+            } else if (content.length > 0) {
+              // If we have reasoning text, we need to close the reasoning tag and then append the content.
+              if (reasoningText.length > 0) {
+                const endTag = "</think>";
+                writeResponseChunk(response, {
+                  uuid,
+                  sources,
+                  type: "textResponseChunk",
+                  textResponse: endTag,
+                  close: false,
+                  error: false,
+                });
+                fullText += reasoningText + endTag;
+                reasoningText = ""; // Reset reasoning buffer
+              }
+              fullText += content; // Append regular text
              writeResponseChunk(response, {
                uuid,
                sources,
@ -379,6 +427,7 @@ class OllamaAILLM {
              });
            }
          }
+        }
      } catch (error) {
        writeResponseChunk(response, {
          uuid,
@ -386,8 +435,7 @@ class OllamaAILLM {
          type: "textResponseChunk",
          textResponse: "",
          close: true,
-          error: `Ollama:streaming - could not stream chat. ${
-            error?.cause ?? error.message
+          error: `Ollama:streaming - could not stream chat. ${error?.cause ?? error.message
            }`,
        });
        response.removeListener("close", handleAbort);
--- a/server/utils/agents/aibitat/providers/ollama.js
+++ b/server/utils/agents/aibitat/providers/ollama.js
@ -88,7 +88,9 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
    if (history[history.length - 1].role !== "user") return null;

    const msgUUID = v4();
+    let token = "";
    let textResponse = "";
+    let reasoningText = "";
    const historyMessages = this.buildToolCallMessages(history, functions);
    const stream = await chatCb({ messages: historyMessages });

@ -99,17 +101,32 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
    });

    for await (const chunk of stream) {
-      if (
-        !chunk.hasOwnProperty("message") ||
-        !chunk.message.hasOwnProperty("content")
-      )
-        continue;
+      if (!chunk.hasOwnProperty("message")) continue;
+
+      const content = chunk.message?.content;
+      const reasoningToken = chunk.message?.thinking;
+      if (reasoningToken) {
+        if (reasoningText.length === 0) {
+          reasoningText = `Thinking:\n\n${reasoningToken}`;
+          token = reasoningText;
+        } else {
+          reasoningText += reasoningToken;
+          token = reasoningToken;
+        }
+      } else if (content.length > 0) {
+        if (reasoningText.length > 0) {
+          token = `\n\nDone thinking.\n\n${content}`;
+          reasoningText = "";
+        } else {
+          token = content;
+        }
+        textResponse += content;
+      }

-      textResponse += chunk.message.content;
      eventHandler?.("reportStreamEvent", {
        type: "statusResponse",
        uuid: msgUUID,
-        content: chunk.message.content,
+        content: token,
      });
    }

@ -225,23 +242,39 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
        );
        const msgUUID = v4();
        completion = { content: "" };
+        let reasoningText = "";
+        let token = "";
        const stream = await this.#handleFunctionCallStream({
          messages: this.cleanMsgs(messages),
        });

        for await (const chunk of stream) {
-          if (
-            !chunk.hasOwnProperty("message") ||
-            !chunk.message.hasOwnProperty("content")
-          )
-            continue;
+          if (!chunk.hasOwnProperty("message")) continue;

-          const delta = chunk.message.content;
-          completion.content += delta;
+          const content = chunk.message?.content;
+          const reasoningToken = chunk.message?.thinking;
+          if (reasoningToken) {
+            if (reasoningText.length === 0) {
+              reasoningText = `<think>${reasoningToken}`;
+              token = `<think>${reasoningToken}`;
+            } else {
+              reasoningText += reasoningToken;
+              token = reasoningToken;
+            }
+          } else if (content.length > 0) {
+            if (reasoningText.length > 0) {
+              token = `</think>${content}`;
+              reasoningText = "";
+            } else {
+              token = content;
+            }
+          }
+
+          completion.content += token;
          eventHandler?.("reportStreamEvent", {
            type: "textResponseChunk",
            uuid: msgUUID,
-            content: delta,
+            content: token,
          });
        }
      }