Enable custom HTTP response timeout for ollama (#4448)
This commit is contained in:
parent
ac444c8fa5
commit
c8f13d5f27
@ -44,6 +44,7 @@ GID='1000'
|
|||||||
# OLLAMA_MODEL_PREF='llama2'
|
# OLLAMA_MODEL_PREF='llama2'
|
||||||
# OLLAMA_MODEL_TOKEN_LIMIT=4096
|
# OLLAMA_MODEL_TOKEN_LIMIT=4096
|
||||||
# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
|
# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
|
||||||
|
# OLLAMA_RESPONSE_TIMEOUT=7200000 (optional, max timeout in milliseconds for ollama response to conclude. Default is 5min before aborting)
|
||||||
|
|
||||||
# LLM_PROVIDER='togetherai'
|
# LLM_PROVIDER='togetherai'
|
||||||
# TOGETHER_AI_API_KEY='my-together-ai-key'
|
# TOGETHER_AI_API_KEY='my-together-ai-key'
|
||||||
|
|||||||
@ -41,6 +41,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
|
|||||||
# OLLAMA_MODEL_PREF='llama2'
|
# OLLAMA_MODEL_PREF='llama2'
|
||||||
# OLLAMA_MODEL_TOKEN_LIMIT=4096
|
# OLLAMA_MODEL_TOKEN_LIMIT=4096
|
||||||
# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
|
# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
|
||||||
|
# OLLAMA_RESPONSE_TIMEOUT=7200000 (optional, max timeout in milliseconds for ollama response to conclude. Default is 5min before aborting)
|
||||||
|
|
||||||
# LLM_PROVIDER='togetherai'
|
# LLM_PROVIDER='togetherai'
|
||||||
# TOGETHER_AI_API_KEY='my-together-ai-key'
|
# TOGETHER_AI_API_KEY='my-together-ai-key'
|
||||||
|
|||||||
@ -31,7 +31,11 @@ class OllamaAILLM {
|
|||||||
const headers = this.authToken
|
const headers = this.authToken
|
||||||
? { Authorization: `Bearer ${this.authToken}` }
|
? { Authorization: `Bearer ${this.authToken}` }
|
||||||
: {};
|
: {};
|
||||||
this.client = new Ollama({ host: this.basePath, headers: headers });
|
this.client = new Ollama({
|
||||||
|
host: this.basePath,
|
||||||
|
headers: headers,
|
||||||
|
fetch: this.#applyFetch(),
|
||||||
|
});
|
||||||
this.embedder = embedder ?? new NativeEmbedder();
|
this.embedder = embedder ?? new NativeEmbedder();
|
||||||
this.defaultTemp = 0.7;
|
this.defaultTemp = 0.7;
|
||||||
this.#log(
|
this.#log(
|
||||||
@ -55,6 +59,43 @@ class OllamaAILLM {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply a custom fetch function to the Ollama client.
|
||||||
|
* This is useful when we want to bypass the default 5m timeout for global fetch
|
||||||
|
* for machines which run responses very slowly.
|
||||||
|
* @returns {Function} The custom fetch function.
|
||||||
|
*/
|
||||||
|
#applyFetch() {
|
||||||
|
try {
|
||||||
|
if (!("OLLAMA_RESPONSE_TIMEOUT" in process.env)) return fetch;
|
||||||
|
const { Agent } = require("undici");
|
||||||
|
const moment = require("moment");
|
||||||
|
let timeout = process.env.OLLAMA_RESPONSE_TIMEOUT;
|
||||||
|
|
||||||
|
if (!timeout || isNaN(Number(timeout)) || Number(timeout) <= 5 * 60_000) {
|
||||||
|
this.#log(
|
||||||
|
"Timeout option was not set, is not a number, or is less than 5 minutes in ms - falling back to default",
|
||||||
|
{ timeout }
|
||||||
|
);
|
||||||
|
return fetch;
|
||||||
|
} else timeout = Number(timeout);
|
||||||
|
|
||||||
|
const noTimeoutFetch = (input, init = {}) => {
|
||||||
|
return fetch(input, {
|
||||||
|
...init,
|
||||||
|
dispatcher: new Agent({ headersTimeout: timeout }),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const humanDiff = moment.duration(timeout).humanize();
|
||||||
|
this.#log(`Applying custom fetch w/timeout of ${humanDiff}.`);
|
||||||
|
return noTimeoutFetch;
|
||||||
|
} catch (error) {
|
||||||
|
this.#log("Error applying custom fetch - using default fetch", error);
|
||||||
|
return fetch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
streamingEnabled() {
|
streamingEnabled() {
|
||||||
return "streamGetChatCompletion" in this;
|
return "streamGetChatCompletion" in this;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1170,6 +1170,9 @@ function dumpENV() {
|
|||||||
|
|
||||||
// Specify Chromium args for collector
|
// Specify Chromium args for collector
|
||||||
"ANYTHINGLLM_CHROMIUM_ARGS",
|
"ANYTHINGLLM_CHROMIUM_ARGS",
|
||||||
|
|
||||||
|
// Allow setting a custom response timeout for Ollama
|
||||||
|
"OLLAMA_RESPONSE_TIMEOUT",
|
||||||
];
|
];
|
||||||
|
|
||||||
// Simple sanitization of each value to prevent ENV injection via newline or quote escaping.
|
// Simple sanitization of each value to prevent ENV injection via newline or quote escaping.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user