Add custom fetch to embedder for Ollama (#5180)

Refactor ollama timeout to be shared. Add custom fetch to embedder for ollama as well
This commit is contained in:
Timothy Carambat 2026-03-09 11:47:00 -07:00 committed by GitHub
parent dc0bdf112b
commit 4e3bcfc616
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 16 additions and 47 deletions

View File

@ -33,7 +33,7 @@ class OllamaAILLM {
this.client = new Ollama({ this.client = new Ollama({
host: this.basePath, host: this.basePath,
headers: headers, headers: headers,
fetch: this.#applyFetch(), fetch: OllamaAILLM.applyOllamaFetch(),
}); });
this.embedder = embedder ?? new NativeEmbedder(); this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7; this.defaultTemp = 0.7;
@ -132,7 +132,7 @@ class OllamaAILLM {
* for machines which run responses very slowly. * for machines which run responses very slowly.
* @returns {Function} The custom fetch function. * @returns {Function} The custom fetch function.
*/ */
#applyFetch() { static applyOllamaFetch() {
try { try {
if (!("OLLAMA_RESPONSE_TIMEOUT" in process.env)) return fetch; if (!("OLLAMA_RESPONSE_TIMEOUT" in process.env)) return fetch;
const { Agent } = require("undici"); const { Agent } = require("undici");
@ -140,7 +140,7 @@ class OllamaAILLM {
let timeout = process.env.OLLAMA_RESPONSE_TIMEOUT; let timeout = process.env.OLLAMA_RESPONSE_TIMEOUT;
if (!timeout || isNaN(Number(timeout)) || Number(timeout) <= 5 * 60_000) { if (!timeout || isNaN(Number(timeout)) || Number(timeout) <= 5 * 60_000) {
this.#log( OllamaAILLM.#slog(
"Timeout option was not set, is not a number, or is less than 5 minutes in ms - falling back to default", "Timeout option was not set, is not a number, or is less than 5 minutes in ms - falling back to default",
{ timeout } { timeout }
); );
@ -155,10 +155,13 @@ class OllamaAILLM {
}; };
const humanDiff = moment.duration(timeout).humanize(); const humanDiff = moment.duration(timeout).humanize();
this.#log(`Applying custom fetch w/timeout of ${humanDiff}.`); OllamaAILLM.#slog(`Applying custom fetch w/timeout of ${humanDiff}.`);
return noTimeoutFetch; return noTimeoutFetch;
} catch (error) { } catch (error) {
this.#log("Error applying custom fetch - using default fetch", error); OllamaAILLM.#slog(
"Error applying custom fetch - using default fetch",
error
);
return fetch; return fetch;
} }
} }

View File

@ -1,5 +1,6 @@
const { maximumChunkLength } = require("../../helpers"); const { maximumChunkLength } = require("../../helpers");
const { Ollama } = require("ollama"); const { Ollama } = require("ollama");
const { OllamaAILLM } = require("../../AiProviders/ollama");
class OllamaEmbedder { class OllamaEmbedder {
constructor() { constructor() {
@ -16,10 +17,15 @@ class OllamaEmbedder {
: 1; : 1;
this.embeddingMaxChunkLength = maximumChunkLength(); this.embeddingMaxChunkLength = maximumChunkLength();
this.authToken = process.env.OLLAMA_AUTH_TOKEN; this.authToken = process.env.OLLAMA_AUTH_TOKEN;
const headers = this.authToken const headers = this.authToken
? { Authorization: `Bearer ${this.authToken}` } ? { Authorization: `Bearer ${this.authToken}` }
: {}; : {};
this.client = new Ollama({ host: this.basePath, headers }); this.client = new Ollama({
host: this.basePath,
headers,
fetch: OllamaAILLM.applyOllamaFetch(),
});
this.log( this.log(
`initialized with model ${this.model} at ${this.basePath}. Batch size: ${this.maxConcurrentChunks}, num_ctx: ${this.embeddingMaxChunkLength}` `initialized with model ${this.model} at ${this.basePath}. Batch size: ${this.maxConcurrentChunks}, num_ctx: ${this.embeddingMaxChunkLength}`
); );

View File

@ -28,7 +28,7 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
this._client = new Ollama({ this._client = new Ollama({
host: basePath, host: basePath,
headers: headers, headers: headers,
fetch: this.#applyFetch(), fetch: OllamaAILLM.applyOllamaFetch(),
}); });
this.model = model; this.model = model;
this.verbose = true; this.verbose = true;
@ -526,46 +526,6 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
getCost(_usage) { getCost(_usage) {
return 0; return 0;
} }
/**
* Apply a custom fetch function to the Ollama client.
* This is useful when we want to bypass the default 5m timeout for global fetch
* for machines which run responses very slowly.
* @returns {Function} The custom fetch function.
*/
#applyFetch() {
try {
if (!("OLLAMA_RESPONSE_TIMEOUT" in process.env)) return fetch;
const { Agent } = require("undici");
const moment = require("moment");
let timeout = process.env.OLLAMA_RESPONSE_TIMEOUT;
if (!timeout || isNaN(Number(timeout)) || Number(timeout) <= 5 * 60_000) {
this.providerLog(
"Timeout option was not set, is not a number, or is less than 5 minutes in ms - falling back to default",
{ timeout }
);
return fetch;
} else timeout = Number(timeout);
const noTimeoutFetch = (input, init = {}) => {
return fetch(input, {
...init,
dispatcher: new Agent({ headersTimeout: timeout }),
});
};
const humanDiff = moment.duration(timeout).humanize();
this.providerLog(`Applying custom fetch w/timeout of ${humanDiff}.`);
return noTimeoutFetch;
} catch (error) {
this.providerLog(
"Error applying custom fetch - using default fetch",
error
);
return fetch;
}
}
} }
module.exports = OllamaProvider; module.exports = OllamaProvider;