2749 ollama client auth token (#3005)

* ollama auth token provision * auth token provision * ollama auth provision * ollama auth token * ollama auth provision * token input field css fix * Fix provider handler not using key sensible fallback to not break existing installs re-order of input fields null-check for API key and header optional insert on request linting * apply header and auth to agent invocations * upgrading to ollama 5.10 for passing headers to constructor * rename Auth systemSetting key to be more descriptive linting and copy * remove untracked files + update gitignore * remove debug * patch lockfile --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
2025-02-19 05:30:17 +05:30 · 2025-02-19 05:30:17 +05:30 · 3fd0fe8fc5
commit 3fd0fe8fc5
parent 3390ccf4b1
12 changed files with 212 additions and 129 deletions
--- a/docker/.env.example
+++ b/docker/.env.example
@ -42,6 +42,7 @@ GID='1000'
 # OLLAMA_BASE_PATH='http://host.docker.internal:11434'
 # OLLAMA_MODEL_PREF='llama2'
 # OLLAMA_MODEL_TOKEN_LIMIT=4096
 # OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
 # LLM_PROVIDER='togetherai'
 # TOGETHER_AI_API_KEY='my-together-ai-key'
--- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
@ -11,12 +11,15 @@ export default function OllamaLLMOptions({ settings }) {
    autoDetecting: loading,
    basePath,
    basePathValue,
    authToken,
    authTokenValue,
    showAdvancedControls,
    setShowAdvancedControls,
    handleAutoDetectClick,
  } = useProviderEndpointAutoDiscovery({
    provider: "ollama",
    initialBasePath: settings?.OllamaLLMBasePath,
    initialAuthToken: settings?.OllamaLLMAuthToken,
    ENDPOINTS: OLLAMA_COMMON_URLS,
  });
  const [performanceMode, setPerformanceMode] = useState(
@ -32,6 +35,7 @@ export default function OllamaLLMOptions({ settings }) {
        <OllamaLLMModelSelection
          settings={settings}
          basePath={basePath.value}
          authToken={authToken.value}
        />
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-2">
@ -73,120 +77,146 @@ export default function OllamaLLMOptions({ settings }) {
      </div>
      <div hidden={!showAdvancedControls}>
-        <div className="w-full flex items-start gap-4">
+        <div className="flex flex-col">
-          <div className="flex flex-col w-60">
+          <div className="w-full flex items-start gap-4">
-            <div className="flex justify-between items-center mb-2">
+            <div className="flex flex-col w-60">
-              <label className="text-white text-sm font-semibold">
+              <div className="flex justify-between items-center mb-2">
-                Ollama Base URL
+                <label className="text-white text-sm font-semibold">
-              </label>
+                  Ollama Base URL
-              {loading ? (
+                </label>
-                <PreLoader size="6" />
+                {loading ? (
-              ) : (
+                  <PreLoader size="6" />
-                <>
+                ) : (
-                  {!basePathValue.value && (
+                  <>
-                    <button
+                    {!basePathValue.value && (
-                      onClick={handleAutoDetectClick}
+                      <button
-                      className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
+                        onClick={handleAutoDetectClick}
-                    >
+                        className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
-                      Auto-Detect
+                      >
-                    </button>
+                        Auto-Detect
-                  )}
+                      </button>
-                </>
+                    )}
-              )}
+                  </>
-            </div>
+                )}
-            <input
+              </div>
-              type="url"
+              <input
-              name="OllamaLLMBasePath"
+                type="url"
-              className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
+                name="OllamaLLMBasePath"
-              placeholder="http://127.0.0.1:11434"
+                className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
-              value={basePathValue.value}
+                placeholder="http://127.0.0.1:11434"
-              required={true}
+                value={basePathValue.value}
-              autoComplete="off"
+                required={true}
-              spellCheck={false}
+                autoComplete="off"
-              onChange={basePath.onChange}
+                spellCheck={false}
-              onBlur={basePath.onBlur}
+                onChange={basePath.onChange}
-            />
+                onBlur={basePath.onBlur}
            <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
              Enter the URL where Ollama is running.
            </p>
          </div>
          <div className="flex flex-col w-60">
            <label className="text-white text-sm font-semibold block mb-2">
              Ollama Keep Alive
            </label>
            <select
              name="OllamaLLMKeepAliveSeconds"
              required={true}
              className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
              defaultValue={settings?.OllamaLLMKeepAliveSeconds ?? "300"}
            >
              <option value="0">No cache</option>
              <option value="300">5 minutes</option>
              <option value="3600">1 hour</option>
              <option value="-1">Forever</option>
            </select>
            <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
              Choose how long Ollama should keep your model in memory before
              unloading.
              <a
                className="underline text-blue-300"
                href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
                target="_blank"
                rel="noreferrer"
              >
                {" "}
                Learn more &rarr;
              </a>
            </p>
          </div>
          <div className="flex flex-col w-60">
            <label className="text-white text-sm font-semibold mb-2 flex items-center">
              Performance Mode
              <Info
                size={16}
                className="ml-2 text-white"
                data-tooltip-id="performance-mode-tooltip"
              />
-            </label>
+              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
-            <select
+                Enter the URL where Ollama is running.
              name="OllamaLLMPerformanceMode"
              required={true}
              className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
              value={performanceMode}
              onChange={(e) => setPerformanceMode(e.target.value)}
            >
              <option value="base">Base (Default)</option>
              <option value="maximum">Maximum</option>
            </select>
            <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
              Choose the performance mode for the Ollama model.
            </p>
            <Tooltip
              id="performance-mode-tooltip"
              place="bottom"
              className="tooltip !text-xs max-w-xs"
            >
              <p className="text-red-500">
                <strong>Note:</strong> Be careful with the Maximum mode. It may
                increase resource usage significantly.
              </p>
-              <br />
+            </div>
-              <p>
+            <div className="flex flex-col w-60">
-                <strong>Base:</strong> Ollama automatically limits the context
+              <label className="text-white text-sm font-semibold mb-2 flex items-center">
-                to 2048 tokens, keeping resources usage low while maintaining
+                Performance Mode
-                good performance. Suitable for most users and models.
+                <Info
                  size={16}
                  className="ml-2 text-white"
                  data-tooltip-id="performance-mode-tooltip"
                />
              </label>
              <select
                name="OllamaLLMPerformanceMode"
                required={true}
                className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
                value={performanceMode}
                onChange={(e) => setPerformanceMode(e.target.value)}
              >
                <option value="base">Base (Default)</option>
                <option value="maximum">Maximum</option>
              </select>
              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
                Choose the performance mode for the Ollama model.
              </p>
-              <br />
+              <Tooltip
-              <p>
+                id="performance-mode-tooltip"
-                <strong>Maximum:</strong> Uses the full context window (up to
+                place="bottom"
-                Max Tokens). Will result in increased resource usage but allows
+                className="tooltip !text-xs max-w-xs"
-                for larger context conversations. <br />
+              >
                <p className="text-red-500">
                  <strong>Note:</strong> Be careful with the Maximum mode. It
                  may increase resource usage significantly.
                </p>
                <br />
-                This is not recommended for most users.
+                <p>
                  <strong>Base:</strong> Ollama automatically limits the context
                  to 2048 tokens, keeping resources usage low while maintaining
                  good performance. Suitable for most users and models.
                </p>
                <br />
                <p>
                  <strong>Maximum:</strong> Uses the full context window (up to
                  Max Tokens). Will result in increased resource usage but
                  allows for larger context conversations. <br />
                  <br />
                  This is not recommended for most users.
                </p>
              </Tooltip>
            </div>
            <div className="flex flex-col w-60">
              <label className="text-white text-sm font-semibold block mb-2">
                Ollama Keep Alive
              </label>
              <select
                name="OllamaLLMKeepAliveSeconds"
                required={true}
                className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
                defaultValue={settings?.OllamaLLMKeepAliveSeconds ?? "300"}
              >
                <option value="0">No cache</option>
                <option value="300">5 minutes</option>
                <option value="3600">1 hour</option>
                <option value="-1">Forever</option>
              </select>
              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
                Choose how long Ollama should keep your model in memory before
                unloading.
                <a
                  className="underline text-blue-300"
                  href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
                  target="_blank"
                  rel="noreferrer"
                >
                  {" "}
                  Learn more &rarr;
                </a>
              </p>
-            </Tooltip>
+            </div>
          </div>
          <div className="w-full flex items-start gap-4">
            <div className="flex flex-col w-100">
              <label className="text-white text-sm font-semibold">
                Auth Token
              </label>
              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
                Enter a <code>Bearer</code> Auth Token for interacting with your
                Ollama server.
                <br />
                Used <b>only</b> if running Ollama behind an authentication
                server.
              </p>
              <input
                type="password"
                name="OllamaLLMAuthToken"
                className="border-none bg-theme-settings-input-bg mt-2 text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg outline-none block w-full p-2.5"
                placeholder="Ollama Auth Token"
                value={authTokenValue.value}
                onChange={authToken.onChange}
                onBlur={authToken.onBlur}
                required={false}
                autoComplete="off"
                spellCheck={false}
              />
            </div>
          </div>
        </div>
      </div>
@ -194,7 +224,11 @@ export default function OllamaLLMOptions({ settings }) {
  );
 }
-function OllamaLLMModelSelection({ settings, basePath = null }) {
+function OllamaLLMModelSelection({
  settings,
  basePath = null,
  authToken = null,
 }) {
  const [customModels, setCustomModels] = useState([]);
  const [loading, setLoading] = useState(true);
@ -207,7 +241,11 @@ function OllamaLLMModelSelection({ settings, basePath = null }) {
      }
      setLoading(true);
      try {
-        const { models } = await System.customModels("ollama", null, basePath);
+        const { models } = await System.customModels(
          "ollama",
          authToken,
          basePath
        );
        setCustomModels(models || []);
      } catch (error) {
        console.error("Failed to fetch custom models:", error);
@ -216,7 +254,7 @@ function OllamaLLMModelSelection({ settings, basePath = null }) {
      setLoading(false);
    }
    findCustomModels();
-  }, [basePath]);
+  }, [basePath, authToken]);
  if (loading || customModels.length == 0) {
    return (
--- a/frontend/src/hooks/useProviderEndpointAutoDiscovery.js
+++ b/frontend/src/hooks/useProviderEndpointAutoDiscovery.js
@ -5,11 +5,15 @@ import showToast from "@/utils/toast";
 export default function useProviderEndpointAutoDiscovery({
  provider = null,
  initialBasePath = "",
  initialAuthToken = null,
  ENDPOINTS = [],
 }) {
  const [loading, setLoading] = useState(false);
  const [basePath, setBasePath] = useState(initialBasePath);
  const [basePathValue, setBasePathValue] = useState(initialBasePath);
  const [authToken, setAuthToken] = useState(initialAuthToken);
  const [authTokenValue, setAuthTokenValue] = useState(initialAuthToken);
  const [autoDetectAttempted, setAutoDetectAttempted] = useState(false);
  const [showAdvancedControls, setShowAdvancedControls] = useState(true);
@ -20,7 +24,7 @@ export default function useProviderEndpointAutoDiscovery({
    ENDPOINTS.forEach((endpoint) => {
      possibleEndpoints.push(
        new Promise((resolve, reject) => {
-          System.customModels(provider, null, endpoint, 2_000)
+          System.customModels(provider, authTokenValue, endpoint, 2_000)
            .then((results) => {
              if (!results?.models || results.models.length === 0)
                throw new Error("No models");
@ -74,9 +78,18 @@ export default function useProviderEndpointAutoDiscovery({
    setBasePath(basePathValue);
  }
  function handleAuthTokenChange(e) {
    const value = e.target.value;
    setAuthTokenValue(value);
  }
  function handleAuthTokenBlur() {
    setAuthToken(authTokenValue);
  }
  useEffect(() => {
    if (!initialBasePath && !autoDetectAttempted) autoDetect(true);
-  }, [initialBasePath, autoDetectAttempted]);
+  }, [initialBasePath, initialAuthToken, autoDetectAttempted]);
  return {
    autoDetecting: loading,
@ -93,6 +106,16 @@ export default function useProviderEndpointAutoDiscovery({
      value: basePathValue,
      set: setBasePathValue,
    },
    authToken: {
      value: authToken,
      set: setAuthTokenValue,
      onChange: handleAuthTokenChange,
      onBlur: handleAuthTokenBlur,
    },
    authTokenValue: {
      value: authTokenValue,
      set: setAuthTokenValue,
    },
    handleAutoDetectClick,
    runAutoDetect: autoDetect,
  };
--- a/server/.env.example
+++ b/server/.env.example
@ -39,6 +39,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
 # OLLAMA_BASE_PATH='http://host.docker.internal:11434'
 # OLLAMA_MODEL_PREF='llama2'
 # OLLAMA_MODEL_TOKEN_LIMIT=4096
 # OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
 # LLM_PROVIDER='togetherai'
 # TOGETHER_AI_API_KEY='my-together-ai-key'
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -471,6 +471,7 @@ const SystemSettings = {
      OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT,
      OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
      OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base",
      OllamaLLMAuthToken: process.env.OLLAMA_AUTH_TOKEN ?? null,
      // Novita LLM Keys
      NovitaLLMApiKey: !!process.env.NOVITA_LLM_API_KEY,
--- a/server/package.json
+++ b/server/package.json
@ -63,7 +63,7 @@
    "mssql": "^10.0.2",
    "multer": "^1.4.5-lts.1",
    "mysql2": "^3.9.8",
-    "ollama": "^0.5.0",
+    "ollama": "^0.5.10",
    "openai": "4.38.5",
    "pg": "^8.11.5",
    "pinecone-client": "^1.1.0",
@ -97,4 +97,4 @@
    "nodemon": "^2.0.22",
    "prettier": "^3.0.3"
  }
-}
+}
--- a/server/storage/models/.gitignore
+++ b/server/storage/models/.gitignore
@ -7,4 +7,5 @@ novita
 mixedbread-ai*
 gemini
 togetherAi
-tesseract
+tesseract
 ppio
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@ -15,6 +15,7 @@ class OllamaAILLM {
    if (!process.env.OLLAMA_BASE_PATH)
      throw new Error("No Ollama Base Path was set.");
    this.authToken = process.env.OLLAMA_AUTH_TOKEN;
    this.basePath = process.env.OLLAMA_BASE_PATH;
    this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
    this.performanceMode = process.env.OLLAMA_PERFORMANCE_MODE || "base";
@ -27,7 +28,10 @@ class OllamaAILLM {
      user: this.promptWindowLimit() * 0.7,
    };
-    this.client = new Ollama({ host: this.basePath });
+    const headers = this.authToken
      ? { Authorization: `Bearer ${this.authToken}` }
      : {};
    this.client = new Ollama({ host: this.basePath, headers: headers });
    this.embedder = embedder ?? new NativeEmbedder();
    this.defaultTemp = 0.7;
    this.#log(
@ -273,9 +277,8 @@ class OllamaAILLM {
          type: "textResponseChunk",
          textResponse: "",
          close: true,
-          error: `Ollama:streaming - could not stream chat. ${
+          error: `Ollama:streaming - could not stream chat. ${error?.cause ?? error.message
-            error?.cause ?? error.message
+            }`,
          }`,
        });
        response.removeListener("close", handleAbort);
        stream?.endMeasurement(usage);
--- a/server/utils/agents/aibitat/providers/ollama.js
+++ b/server/utils/agents/aibitat/providers/ollama.js
@ -16,7 +16,13 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
    } = config;
    super();
-    this._client = new Ollama({ host: process.env.OLLAMA_BASE_PATH });
+    const headers = process.env.OLLAMA_AUTH_TOKEN
      ? { Authorization: `Bearer ${process.env.OLLAMA_AUTH_TOKEN}` }
      : {};
    this._client = new Ollama({
      host: process.env.OLLAMA_BASE_PATH,
      headers: headers,
    });
    this.model = model;
    this.verbose = true;
  }
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -41,7 +41,7 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
    case "localai":
      return await localAIModels(basePath, apiKey);
    case "ollama":
-      return await ollamaAIModels(basePath);
+      return await ollamaAIModels(basePath, apiKey);
    case "togetherai":
      return await getTogetherAiModels(apiKey);
    case "fireworksai":
@ -292,7 +292,7 @@ async function getKoboldCPPModels(basePath = null) {
  }
 }
-async function ollamaAIModels(basePath = null) {
+async function ollamaAIModels(basePath = null, _authToken = null) {
  let url;
  try {
    let urlPath = basePath ?? process.env.OLLAMA_BASE_PATH;
@ -304,7 +304,9 @@ async function ollamaAIModels(basePath = null) {
    return { models: [], error: "Not a valid URL." };
  }
-  const models = await fetch(`${url}/api/tags`)
+  const authToken = _authToken || process.env.OLLAMA_AUTH_TOKEN || null;
  const headers = authToken ? { Authorization: `Bearer ${authToken}` } : {};
  const models = await fetch(`${url}/api/tags`, { headers: headers })
    .then((res) => {
      if (!res.ok)
        throw new Error(`Could not reach Ollama server! ${res.status}`);
@ -321,6 +323,9 @@ async function ollamaAIModels(basePath = null) {
      return [];
    });
  // Api Key was successful so lets save it for future uses
  if (models.length > 0 && !!authToken)
    process.env.OLLAMA_AUTH_TOKEN = authToken;
  return { models, error: null };
 }
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -120,6 +120,10 @@ const KEY_MAPPING = {
    envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT",
    checks: [isInteger],
  },
  OllamaLLMAuthToken: {
    envKey: "OLLAMA_AUTH_TOKEN",
    checks: [],
  },
  // Mistral AI API Settings
  MistralApiKey: {
--- a/server/yarn.lock
+++ b/server/yarn.lock
@ -5198,10 +5198,10 @@ object.values@^1.1.6, object.values@^1.1.7:
    define-properties "^1.2.1"
    es-object-atoms "^1.0.0"
-ollama@^0.5.0:
+ollama@^0.5.10:
-  version "0.5.0"
+  version "0.5.12"
-  resolved "https://registry.yarnpkg.com/ollama/-/ollama-0.5.0.tgz#cb9bc709d4d3278c9f484f751b0d9b98b06f4859"
+  resolved "https://registry.yarnpkg.com/ollama/-/ollama-0.5.12.tgz#d8aadfaff076b2852cf826d928a03d9a40f308b9"
-  integrity sha512-CRtRzsho210EGdK52GrUMohA2pU+7NbgEaBG3DcYeRmvQthDO7E2LHOkLlUUeaYUlNmEd8icbjC02ug9meSYnw==
+  integrity sha512-flVH1fn1c9NF7VV3bW9kSu0E+bYc40b4DxL/gS2Debhao35osJFRDiPOj9sIWTMvcyj78Paw1OuhfIe7uhDWfQ==
  dependencies:
    whatwg-fetch "^3.6.20"