Refactor Ollama context window setting (#4909)
This commit is contained in:
parent
64bff91998
commit
fe78e1c667
@ -1,10 +1,10 @@
|
|||||||
import React, { useEffect, useState } from "react";
|
import React, { useEffect, useState } from "react";
|
||||||
import System from "@/models/system";
|
import System from "@/models/system";
|
||||||
import PreLoader from "@/components/Preloader";
|
|
||||||
import { OLLAMA_COMMON_URLS } from "@/utils/constants";
|
import { OLLAMA_COMMON_URLS } from "@/utils/constants";
|
||||||
import { CaretDown, CaretUp, Info } from "@phosphor-icons/react";
|
import { CaretDown, CaretUp, Info, CircleNotch } from "@phosphor-icons/react";
|
||||||
import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
|
import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
|
||||||
import { Tooltip } from "react-tooltip";
|
import { Tooltip } from "react-tooltip";
|
||||||
|
import { Link } from "react-router-dom";
|
||||||
|
|
||||||
export default function OllamaLLMOptions({ settings }) {
|
export default function OllamaLLMOptions({ settings }) {
|
||||||
const {
|
const {
|
||||||
@ -21,9 +21,6 @@ export default function OllamaLLMOptions({ settings }) {
|
|||||||
initialBasePath: settings?.OllamaLLMBasePath,
|
initialBasePath: settings?.OllamaLLMBasePath,
|
||||||
ENDPOINTS: OLLAMA_COMMON_URLS,
|
ENDPOINTS: OLLAMA_COMMON_URLS,
|
||||||
});
|
});
|
||||||
const [performanceMode, setPerformanceMode] = useState(
|
|
||||||
settings?.OllamaLLMPerformanceMode || "base"
|
|
||||||
);
|
|
||||||
const [maxTokens, setMaxTokens] = useState(
|
const [maxTokens, setMaxTokens] = useState(
|
||||||
settings?.OllamaLLMTokenLimit || ""
|
settings?.OllamaLLMTokenLimit || ""
|
||||||
);
|
);
|
||||||
@ -56,14 +53,36 @@ export default function OllamaLLMOptions({ settings }) {
|
|||||||
|
|
||||||
<div hidden={!showAdvancedControls}>
|
<div hidden={!showAdvancedControls}>
|
||||||
<div className="flex flex-col">
|
<div className="flex flex-col">
|
||||||
<div className="w-full flex items-start gap-4">
|
<div className="w-full flex items-start gap-4 mb-4">
|
||||||
<div className="flex flex-col w-60">
|
<div className="flex flex-col w-60">
|
||||||
<div className="flex justify-between items-center mb-2">
|
<div className="flex justify-between items-center mb-2">
|
||||||
<label className="text-white text-sm font-semibold">
|
<div className="flex items-center gap-1">
|
||||||
Ollama Base URL
|
<label className="text-white text-sm font-semibold">
|
||||||
</label>
|
Ollama Base URL
|
||||||
|
</label>
|
||||||
|
<Info
|
||||||
|
size={18}
|
||||||
|
className="text-theme-text-secondary cursor-pointer"
|
||||||
|
data-tooltip-id="ollama-base-url"
|
||||||
|
data-tooltip-content="Enter the URL where Ollama is running."
|
||||||
|
/>
|
||||||
|
<Tooltip
|
||||||
|
id="ollama-base-url"
|
||||||
|
place="top"
|
||||||
|
delayShow={300}
|
||||||
|
className="tooltip !text-xs !opacity-100"
|
||||||
|
style={{
|
||||||
|
maxWidth: "250px",
|
||||||
|
whiteSpace: "normal",
|
||||||
|
wordWrap: "break-word",
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
{loading ? (
|
{loading ? (
|
||||||
<PreLoader size="6" />
|
<CircleNotch
|
||||||
|
size={16}
|
||||||
|
className="text-theme-text-secondary animate-spin"
|
||||||
|
/>
|
||||||
) : (
|
) : (
|
||||||
<>
|
<>
|
||||||
{!basePathValue.value && (
|
{!basePathValue.value && (
|
||||||
@ -89,61 +108,45 @@ export default function OllamaLLMOptions({ settings }) {
|
|||||||
onChange={basePath.onChange}
|
onChange={basePath.onChange}
|
||||||
onBlur={basePath.onBlur}
|
onBlur={basePath.onBlur}
|
||||||
/>
|
/>
|
||||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
|
||||||
Enter the URL where Ollama is running.
|
|
||||||
</p>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="flex flex-col w-60">
|
<div className="flex flex-col w-60">
|
||||||
<label className="text-white text-sm font-semibold mb-2 flex items-center">
|
<div className="flex items-center mb-2 gap-x-1">
|
||||||
Performance Mode
|
<label className="text-white text-sm font-semibold block">
|
||||||
|
Ollama Keep Alive
|
||||||
|
</label>
|
||||||
<Info
|
<Info
|
||||||
size={16}
|
size={18}
|
||||||
className="ml-2 text-white"
|
className="text-theme-text-secondary cursor-pointer"
|
||||||
data-tooltip-id="performance-mode-tooltip"
|
data-tooltip-id="ollama-keep-alive"
|
||||||
/>
|
/>
|
||||||
</label>
|
<Tooltip
|
||||||
<select
|
id="ollama-keep-alive"
|
||||||
name="OllamaLLMPerformanceMode"
|
place="top"
|
||||||
required={true}
|
delayShow={300}
|
||||||
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
delayHide={400}
|
||||||
value={performanceMode}
|
clickable={true}
|
||||||
onChange={(e) => setPerformanceMode(e.target.value)}
|
className="tooltip !text-xs !opacity-100"
|
||||||
>
|
style={{
|
||||||
<option value="base">Base (Default)</option>
|
maxWidth: "250px",
|
||||||
<option value="maximum">Maximum</option>
|
whiteSpace: "normal",
|
||||||
</select>
|
wordWrap: "break-word",
|
||||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
}}
|
||||||
Choose the performance mode for the Ollama model.
|
>
|
||||||
</p>
|
<p className="text-xs leading-[18px] font-base">
|
||||||
<Tooltip
|
Choose how long Ollama should keep your model in memory
|
||||||
id="performance-mode-tooltip"
|
before unloading.{" "}
|
||||||
place="bottom"
|
<Link
|
||||||
className="tooltip !text-xs max-w-xs"
|
className="underline text-blue-300"
|
||||||
>
|
to="https://docs.ollama.com/faq#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
|
||||||
<p className="text-red-500">
|
target="_blank"
|
||||||
<strong>Note:</strong> Be careful with the Maximum mode. It
|
rel="noreferrer"
|
||||||
may increase resource usage significantly.
|
>
|
||||||
</p>
|
Learn more →
|
||||||
<br />
|
</Link>
|
||||||
<p>
|
</p>
|
||||||
<strong>Base:</strong> Ollama automatically limits the context
|
</Tooltip>
|
||||||
to 2048 tokens, keeping resources usage low while maintaining
|
</div>
|
||||||
good performance. Suitable for most users and models.
|
|
||||||
</p>
|
|
||||||
<br />
|
|
||||||
<p>
|
|
||||||
<strong>Maximum:</strong> Uses the full context window (up to
|
|
||||||
Max Tokens). Will result in increased resource usage but
|
|
||||||
allows for larger context conversations. <br />
|
|
||||||
<br />
|
|
||||||
This is not recommended for most users.
|
|
||||||
</p>
|
|
||||||
</Tooltip>
|
|
||||||
</div>
|
|
||||||
<div className="flex flex-col w-60">
|
|
||||||
<label className="text-white text-sm font-semibold block mb-2">
|
|
||||||
Ollama Keep Alive
|
|
||||||
</label>
|
|
||||||
<select
|
<select
|
||||||
name="OllamaLLMKeepAliveSeconds"
|
name="OllamaLLMKeepAliveSeconds"
|
||||||
required={true}
|
required={true}
|
||||||
@ -155,26 +158,53 @@ export default function OllamaLLMOptions({ settings }) {
|
|||||||
<option value="3600">1 hour</option>
|
<option value="3600">1 hour</option>
|
||||||
<option value="-1">Forever</option>
|
<option value="-1">Forever</option>
|
||||||
</select>
|
</select>
|
||||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
|
||||||
Choose how long Ollama should keep your model in memory before
|
|
||||||
unloading.
|
|
||||||
<a
|
|
||||||
className="underline text-blue-300"
|
|
||||||
href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
|
|
||||||
target="_blank"
|
|
||||||
rel="noreferrer"
|
|
||||||
>
|
|
||||||
{" "}
|
|
||||||
Learn more →
|
|
||||||
</a>
|
|
||||||
</p>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="w-full flex items-start gap-4">
|
<div className="w-full flex items-start gap-4">
|
||||||
<div className="flex flex-col w-60">
|
<div className="flex flex-col w-60">
|
||||||
<label className="text-white text-sm font-semibold block mb-2">
|
<div className="flex items-center mb-2 gap-x-1">
|
||||||
Max Tokens (Optional)
|
<label className="text-white text-sm font-semibold block">
|
||||||
</label>
|
Model context window
|
||||||
|
</label>
|
||||||
|
<Info
|
||||||
|
size={18}
|
||||||
|
className="text-theme-text-secondary cursor-pointer"
|
||||||
|
data-tooltip-id="ollama-model-context-window"
|
||||||
|
/>
|
||||||
|
<Tooltip
|
||||||
|
id="ollama-model-context-window"
|
||||||
|
place="top"
|
||||||
|
delayShow={300}
|
||||||
|
delayHide={400}
|
||||||
|
clickable={true}
|
||||||
|
className="tooltip !text-xs !opacity-100"
|
||||||
|
style={{
|
||||||
|
maxWidth: "250px",
|
||||||
|
whiteSpace: "normal",
|
||||||
|
wordWrap: "break-word",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<p className="text-xs leading-[18px] font-base">
|
||||||
|
Specify the maximum number of tokens that can be used for
|
||||||
|
the model context window.
|
||||||
|
<br /> <br />
|
||||||
|
If you leave this field blank, the context window limit will
|
||||||
|
be auto-detected from the model and applied to all chats. If
|
||||||
|
auto-detection fails, a fallback context window limit of
|
||||||
|
4096 will be used.
|
||||||
|
<br /> <br />
|
||||||
|
<b>Important:</b> Some models have very large context
|
||||||
|
windows using the full context window limit can dramatically
|
||||||
|
increase the memory usage of your system. For this reason,
|
||||||
|
we will automatically cap the context window limit to 16,384
|
||||||
|
tokens if the model supports more than that and no value is
|
||||||
|
specified.
|
||||||
|
<br /> <br />
|
||||||
|
If an invalid value is entered, AnythingLLM will handle this
|
||||||
|
for you so that chats do not fail.
|
||||||
|
</p>
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
<input
|
<input
|
||||||
type="number"
|
type="number"
|
||||||
name="OllamaLLMTokenLimit"
|
name="OllamaLLMTokenLimit"
|
||||||
@ -189,28 +219,44 @@ export default function OllamaLLMOptions({ settings }) {
|
|||||||
required={false}
|
required={false}
|
||||||
autoComplete="off"
|
autoComplete="off"
|
||||||
/>
|
/>
|
||||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
|
||||||
Override the context window limit. Leave empty to auto-detect
|
|
||||||
from the model (defaults to 4096 if detection fails).
|
|
||||||
</p>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
|
||||||
<div className="w-full flex items-start gap-4 mt-4">
|
<div className="flex flex-col w-60">
|
||||||
<div className="flex flex-col w-100">
|
<div className="flex items-center mb-2 gap-x-1">
|
||||||
<label className="text-white text-sm font-semibold">
|
<label className="text-white text-sm font-semibold">
|
||||||
Auth Token
|
Authentication Token
|
||||||
</label>
|
</label>
|
||||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
<Info
|
||||||
Enter a <code>Bearer</code> Auth Token for interacting with your
|
size={18}
|
||||||
Ollama server.
|
className="text-theme-text-secondary cursor-pointer"
|
||||||
<br />
|
data-tooltip-id="ollama-authentication-token"
|
||||||
Used <b>only</b> if running Ollama behind an authentication
|
/>
|
||||||
server.
|
<Tooltip
|
||||||
</p>
|
id="ollama-authentication-token"
|
||||||
|
place="top"
|
||||||
|
delayShow={300}
|
||||||
|
delayHide={400}
|
||||||
|
clickable={true}
|
||||||
|
className="tooltip !text-xs !opacity-100"
|
||||||
|
style={{
|
||||||
|
maxWidth: "250px",
|
||||||
|
whiteSpace: "normal",
|
||||||
|
wordWrap: "break-word",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<p className="text-xs leading-[18px] font-base">
|
||||||
|
Enter a <code>Bearer</code> Auth Token for interacting with
|
||||||
|
your Ollama server.
|
||||||
|
<br /> <br />
|
||||||
|
Used <b>only</b> if running Ollama behind an authentication
|
||||||
|
server.
|
||||||
|
</p>
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
<input
|
<input
|
||||||
type="password"
|
type="password"
|
||||||
name="OllamaLLMAuthToken"
|
name="OllamaLLMAuthToken"
|
||||||
className="border-none bg-theme-settings-input-bg mt-2 text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg outline-none block w-full p-2.5 focus:outline-primary-button active:outline-primary-button"
|
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg outline-none block w-full p-2.5 focus:outline-primary-button active:outline-primary-button"
|
||||||
placeholder="Ollama Auth Token"
|
placeholder="Ollama Auth Token"
|
||||||
defaultValue={
|
defaultValue={
|
||||||
settings?.OllamaLLMAuthToken ? "*".repeat(20) : ""
|
settings?.OllamaLLMAuthToken ? "*".repeat(20) : ""
|
||||||
|
|||||||
@ -518,7 +518,6 @@ const SystemSettings = {
|
|||||||
OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF,
|
OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF,
|
||||||
OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT || null,
|
OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT || null,
|
||||||
OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
|
OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
|
||||||
OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base",
|
|
||||||
|
|
||||||
// Novita LLM Keys
|
// Novita LLM Keys
|
||||||
NovitaLLMApiKey: !!process.env.NOVITA_LLM_API_KEY,
|
NovitaLLMApiKey: !!process.env.NOVITA_LLM_API_KEY,
|
||||||
|
|||||||
@ -1,40 +0,0 @@
|
|||||||
# Common Issues with Ollama
|
|
||||||
|
|
||||||
If you encounter an error stating `llama:streaming - could not stream chat. Error: connect ECONNREFUSED 172.17.0.1:11434` when using AnythingLLM in a Docker container, this indicates that the IP of the Host inside of the virtual docker network does not bind to port 11434 of the host system by default, due to Ollama's restriction to localhost and 127.0.0.1. To resolve this issue and ensure proper communication between the Dockerized AnythingLLM and the Ollama service, you must configure Ollama to bind to 0.0.0.0 or a specific IP address.
|
|
||||||
|
|
||||||
### Setting Environment Variables on Mac
|
|
||||||
|
|
||||||
If Ollama is run as a macOS application, environment variables should be set using `launchctl`:
|
|
||||||
|
|
||||||
1. For each environment variable, call `launchctl setenv`.
|
|
||||||
```bash
|
|
||||||
launchctl setenv OLLAMA_HOST "0.0.0.0"
|
|
||||||
```
|
|
||||||
2. Restart the Ollama application.
|
|
||||||
|
|
||||||
### Setting Environment Variables on Linux
|
|
||||||
|
|
||||||
If Ollama is run as a systemd service, environment variables should be set using `systemctl`:
|
|
||||||
|
|
||||||
1. Edit the systemd service by calling `systemctl edit ollama.service`. This will open an editor.
|
|
||||||
2. For each environment variable, add a line `Environment` under the section `[Service]`:
|
|
||||||
```ini
|
|
||||||
[Service]
|
|
||||||
Environment="OLLAMA_HOST=0.0.0.0"
|
|
||||||
```
|
|
||||||
3. Save and exit.
|
|
||||||
4. Reload `systemd` and restart Ollama:
|
|
||||||
```bash
|
|
||||||
systemctl daemon-reload
|
|
||||||
systemctl restart ollama
|
|
||||||
```
|
|
||||||
|
|
||||||
### Setting Environment Variables on Windows
|
|
||||||
|
|
||||||
On Windows, Ollama inherits your user and system environment variables.
|
|
||||||
|
|
||||||
1. First, quit Ollama by clicking on it in the taskbar.
|
|
||||||
2. Edit system environment variables from the Control Panel.
|
|
||||||
3. Edit or create new variable(s) for your user account for `OLLAMA_HOST`, `OLLAMA_MODELS`, etc.
|
|
||||||
4. Click OK/Apply to save.
|
|
||||||
5. Run `ollama` from a new terminal window.
|
|
||||||
@ -23,7 +23,6 @@ class OllamaAILLM {
|
|||||||
this.authToken = process.env.OLLAMA_AUTH_TOKEN;
|
this.authToken = process.env.OLLAMA_AUTH_TOKEN;
|
||||||
this.basePath = process.env.OLLAMA_BASE_PATH;
|
this.basePath = process.env.OLLAMA_BASE_PATH;
|
||||||
this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
|
this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
|
||||||
this.performanceMode = process.env.OLLAMA_PERFORMANCE_MODE || "base";
|
|
||||||
this.keepAlive = process.env.OLLAMA_KEEP_ALIVE_TIMEOUT
|
this.keepAlive = process.env.OLLAMA_KEEP_ALIVE_TIMEOUT
|
||||||
? Number(process.env.OLLAMA_KEEP_ALIVE_TIMEOUT)
|
? Number(process.env.OLLAMA_KEEP_ALIVE_TIMEOUT)
|
||||||
: 300; // Default 5-minute timeout for Ollama model loading.
|
: 300; // Default 5-minute timeout for Ollama model loading.
|
||||||
@ -43,9 +42,7 @@ class OllamaAILLM {
|
|||||||
this.limits = null;
|
this.limits = null;
|
||||||
|
|
||||||
OllamaAILLM.cacheContextWindows(true);
|
OllamaAILLM.cacheContextWindows(true);
|
||||||
this.#log(
|
this.#log(`initialized with\nmodel: ${this.model}`);
|
||||||
`initialized with\nmodel: ${this.model}\nperf: ${this.performanceMode}`
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#log(text, ...args) {
|
#log(text, ...args) {
|
||||||
@ -64,6 +61,9 @@ class OllamaAILLM {
|
|||||||
system: this.promptWindowLimit() * 0.15,
|
system: this.promptWindowLimit() * 0.15,
|
||||||
user: this.promptWindowLimit() * 0.7,
|
user: this.promptWindowLimit() * 0.7,
|
||||||
};
|
};
|
||||||
|
this.#log(
|
||||||
|
`model ${this.model} is using a max context window of ${this.promptWindowLimit()}/${OllamaAILLM.maxContextWindow(this.model)} tokens.`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -176,8 +176,7 @@ class OllamaAILLM {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let userDefinedLimit = null;
|
let userDefinedLimit = null;
|
||||||
const systemDefinedLimit =
|
const systemDefinedLimit = OllamaAILLM.maxContextWindow(modelName);
|
||||||
Number(this.modelContextWindows[modelName]) || 4096;
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
process.env.OLLAMA_MODEL_TOKEN_LIMIT &&
|
process.env.OLLAMA_MODEL_TOKEN_LIMIT &&
|
||||||
@ -190,13 +189,23 @@ class OllamaAILLM {
|
|||||||
// so we return the minimum of the two, if there is no user defined limit, we return the system defined limit as-is.
|
// so we return the minimum of the two, if there is no user defined limit, we return the system defined limit as-is.
|
||||||
if (userDefinedLimit !== null)
|
if (userDefinedLimit !== null)
|
||||||
return Math.min(userDefinedLimit, systemDefinedLimit);
|
return Math.min(userDefinedLimit, systemDefinedLimit);
|
||||||
return systemDefinedLimit;
|
|
||||||
|
// Cap the context window limit to 16,384 tokens if the model supports more than that and no value is specified by the user.
|
||||||
|
// This prevents super-large context windows from being used if the user does not specify a value
|
||||||
|
// as well as also having smaller context windows use the full context window limit.
|
||||||
|
return Math.min(systemDefinedLimit, 16384);
|
||||||
}
|
}
|
||||||
|
|
||||||
promptWindowLimit() {
|
promptWindowLimit() {
|
||||||
return this.constructor.promptWindowLimit(this.model);
|
return this.constructor.promptWindowLimit(this.model);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static maxContextWindow(modelName = null) {
|
||||||
|
if (Object.keys(OllamaAILLM.modelContextWindows).length === 0 || !modelName)
|
||||||
|
return 4096;
|
||||||
|
return Number(OllamaAILLM.modelContextWindows[modelName]) || 16384;
|
||||||
|
}
|
||||||
|
|
||||||
async isValidChatCompletionModel(_ = "") {
|
async isValidChatCompletionModel(_ = "") {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -266,10 +275,7 @@ class OllamaAILLM {
|
|||||||
options: {
|
options: {
|
||||||
temperature,
|
temperature,
|
||||||
use_mlock: true,
|
use_mlock: true,
|
||||||
// There are currently only two performance settings so if its not "base" - its max context.
|
num_ctx: this.promptWindowLimit(),
|
||||||
...(this.performanceMode === "base"
|
|
||||||
? {} // TODO: if in base mode, maybe we just use half the context window when below <10K?
|
|
||||||
: { num_ctx: this.promptWindowLimit() }),
|
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
.then((res) => {
|
.then((res) => {
|
||||||
@ -321,10 +327,7 @@ class OllamaAILLM {
|
|||||||
options: {
|
options: {
|
||||||
temperature,
|
temperature,
|
||||||
use_mlock: true,
|
use_mlock: true,
|
||||||
// There are currently only two performance settings so if its not "base" - its max context.
|
num_ctx: this.promptWindowLimit(),
|
||||||
...(this.performanceMode === "base"
|
|
||||||
? {}
|
|
||||||
: { num_ctx: this.promptWindowLimit() }),
|
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
|
|||||||
@ -464,16 +464,10 @@ class OllamaLangchainChatModel {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
static performanceMode() {
|
|
||||||
return process.env.OLLAMA_PERFORMANCE_MODE || "base";
|
|
||||||
}
|
|
||||||
|
|
||||||
static queryOptions(config = {}) {
|
static queryOptions(config = {}) {
|
||||||
const model = config?.model || process.env.OLLAMA_MODEL_PREF;
|
const model = config?.model || process.env.OLLAMA_MODEL_PREF;
|
||||||
return {
|
return {
|
||||||
...(this.performanceMode() === "base"
|
num_ctx: OllamaAILLM.promptWindowLimit(model),
|
||||||
? {}
|
|
||||||
: { num_ctx: OllamaAILLM.promptWindowLimit(model) }),
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -25,6 +25,7 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
|
|||||||
this._client = new Ollama({
|
this._client = new Ollama({
|
||||||
host: process.env.OLLAMA_BASE_PATH,
|
host: process.env.OLLAMA_BASE_PATH,
|
||||||
headers: headers,
|
headers: headers,
|
||||||
|
fetch: this.#applyFetch(),
|
||||||
});
|
});
|
||||||
this.model = model;
|
this.model = model;
|
||||||
this.verbose = true;
|
this.verbose = true;
|
||||||
@ -38,15 +39,12 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
get performanceMode() {
|
|
||||||
return process.env.OLLAMA_PERFORMANCE_MODE || "base";
|
|
||||||
}
|
|
||||||
|
|
||||||
get queryOptions() {
|
get queryOptions() {
|
||||||
|
this.providerLog(
|
||||||
|
`${this.model} is using a max context window of ${OllamaAILLM.promptWindowLimit(this.model)}/${OllamaAILLM.maxContextWindow(this.model)} tokens.`
|
||||||
|
);
|
||||||
return {
|
return {
|
||||||
...(this.performanceMode === "base"
|
num_ctx: OllamaAILLM.promptWindowLimit(this.model),
|
||||||
? {}
|
|
||||||
: { num_ctx: OllamaAILLM.promptWindowLimit(this.model) }),
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -366,6 +364,46 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
|
|||||||
getCost(_usage) {
|
getCost(_usage) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply a custom fetch function to the Ollama client.
|
||||||
|
* This is useful when we want to bypass the default 5m timeout for global fetch
|
||||||
|
* for machines which run responses very slowly.
|
||||||
|
* @returns {Function} The custom fetch function.
|
||||||
|
*/
|
||||||
|
#applyFetch() {
|
||||||
|
try {
|
||||||
|
if (!("OLLAMA_RESPONSE_TIMEOUT" in process.env)) return fetch;
|
||||||
|
const { Agent } = require("undici");
|
||||||
|
const moment = require("moment");
|
||||||
|
let timeout = process.env.OLLAMA_RESPONSE_TIMEOUT;
|
||||||
|
|
||||||
|
if (!timeout || isNaN(Number(timeout)) || Number(timeout) <= 5 * 60_000) {
|
||||||
|
this.providerLog(
|
||||||
|
"Timeout option was not set, is not a number, or is less than 5 minutes in ms - falling back to default",
|
||||||
|
{ timeout }
|
||||||
|
);
|
||||||
|
return fetch;
|
||||||
|
} else timeout = Number(timeout);
|
||||||
|
|
||||||
|
const noTimeoutFetch = (input, init = {}) => {
|
||||||
|
return fetch(input, {
|
||||||
|
...init,
|
||||||
|
dispatcher: new Agent({ headersTimeout: timeout }),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const humanDiff = moment.duration(timeout).humanize();
|
||||||
|
this.providerLog(`Applying custom fetch w/timeout of ${humanDiff}.`);
|
||||||
|
return noTimeoutFetch;
|
||||||
|
} catch (error) {
|
||||||
|
this.providerLog(
|
||||||
|
"Error applying custom fetch - using default fetch",
|
||||||
|
error
|
||||||
|
);
|
||||||
|
return fetch;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = OllamaProvider;
|
module.exports = OllamaProvider;
|
||||||
|
|||||||
@ -125,10 +125,6 @@ const KEY_MAPPING = {
|
|||||||
envKey: "OLLAMA_MODEL_TOKEN_LIMIT",
|
envKey: "OLLAMA_MODEL_TOKEN_LIMIT",
|
||||||
checks: [],
|
checks: [],
|
||||||
},
|
},
|
||||||
OllamaLLMPerformanceMode: {
|
|
||||||
envKey: "OLLAMA_PERFORMANCE_MODE",
|
|
||||||
checks: [],
|
|
||||||
},
|
|
||||||
OllamaLLMKeepAliveSeconds: {
|
OllamaLLMKeepAliveSeconds: {
|
||||||
envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT",
|
envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT",
|
||||||
checks: [isInteger],
|
checks: [isInteger],
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user