Enable custom HTTP response timeout for ollama (#4448)

2025-09-29 09:32:55 -10:00 · 2025-09-29 09:32:55 -10:00 · c8f13d5f27
commit c8f13d5f27
parent ac444c8fa5
4 changed files with 47 additions and 1 deletions
--- a/docker/.env.example
+++ b/docker/.env.example
@ -44,6 +44,7 @@ GID='1000'
 # OLLAMA_MODEL_PREF='llama2'
 # OLLAMA_MODEL_TOKEN_LIMIT=4096
 # OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
+# OLLAMA_RESPONSE_TIMEOUT=7200000 (optional, max timeout in milliseconds for ollama response to conclude. Default is 5min before aborting)

 # LLM_PROVIDER='togetherai'
 # TOGETHER_AI_API_KEY='my-together-ai-key'
--- a/server/.env.example
+++ b/server/.env.example
@ -41,6 +41,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
 # OLLAMA_MODEL_PREF='llama2'
 # OLLAMA_MODEL_TOKEN_LIMIT=4096
 # OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
+# OLLAMA_RESPONSE_TIMEOUT=7200000 (optional, max timeout in milliseconds for ollama response to conclude. Default is 5min before aborting)

 # LLM_PROVIDER='togetherai'
 # TOGETHER_AI_API_KEY='my-together-ai-key'
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@ -31,7 +31,11 @@ class OllamaAILLM {
    const headers = this.authToken
      ? { Authorization: `Bearer ${this.authToken}` }
      : {};
-    this.client = new Ollama({ host: this.basePath, headers: headers });
+    this.client = new Ollama({
+      host: this.basePath,
+      headers: headers,
+      fetch: this.#applyFetch(),
+    });
    this.embedder = embedder ?? new NativeEmbedder();
    this.defaultTemp = 0.7;
    this.#log(
@ -55,6 +59,43 @@ class OllamaAILLM {
    );
  }

+  /**
+   * Apply a custom fetch function to the Ollama client.
+   * This is useful when we want to bypass the default 5m timeout for global fetch
+   * for machines which run responses very slowly.
+   * @returns {Function} The custom fetch function.
+   */
+  #applyFetch() {
+    try {
+      if (!("OLLAMA_RESPONSE_TIMEOUT" in process.env)) return fetch;
+      const { Agent } = require("undici");
+      const moment = require("moment");
+      let timeout = process.env.OLLAMA_RESPONSE_TIMEOUT;
+
+      if (!timeout || isNaN(Number(timeout)) || Number(timeout) <= 5 * 60_000) {
+        this.#log(
+          "Timeout option was not set, is not a number, or is less than 5 minutes in ms - falling back to default",
+          { timeout }
+        );
+        return fetch;
+      } else timeout = Number(timeout);
+
+      const noTimeoutFetch = (input, init = {}) => {
+        return fetch(input, {
+          ...init,
+          dispatcher: new Agent({ headersTimeout: timeout }),
+        });
+      };
+
+      const humanDiff = moment.duration(timeout).humanize();
+      this.#log(`Applying custom fetch w/timeout of ${humanDiff}.`);
+      return noTimeoutFetch;
+    } catch (error) {
+      this.#log("Error applying custom fetch - using default fetch", error);
+      return fetch;
+    }
+  }
+
  streamingEnabled() {
    return "streamGetChatCompletion" in this;
  }
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -1170,6 +1170,9 @@ function dumpENV() {

    // Specify Chromium args for collector
    "ANYTHINGLLM_CHROMIUM_ARGS",
+
+    // Allow setting a custom response timeout for Ollama
+    "OLLAMA_RESPONSE_TIMEOUT",
  ];

  // Simple sanitization of each value to prevent ENV injection via newline or quote escaping.