diff --git a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
index 67367c7f..bbe85ded 100644
--- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
@@ -1,10 +1,10 @@
 import React, { useEffect, useState } from "react";
 import System from "@/models/system";
-import PreLoader from "@/components/Preloader";
 import { OLLAMA_COMMON_URLS } from "@/utils/constants";
-import { CaretDown, CaretUp, Info } from "@phosphor-icons/react";
+import { CaretDown, CaretUp, Info, CircleNotch } from "@phosphor-icons/react";
 import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
 import { Tooltip } from "react-tooltip";
+import { Link } from "react-router-dom";
 
 export default function OllamaLLMOptions({ settings }) {
   const {
@@ -21,9 +21,6 @@ export default function OllamaLLMOptions({ settings }) {
     initialBasePath: settings?.OllamaLLMBasePath,
     ENDPOINTS: OLLAMA_COMMON_URLS,
   });
-  const [performanceMode, setPerformanceMode] = useState(
-    settings?.OllamaLLMPerformanceMode || "base"
-  );
   const [maxTokens, setMaxTokens] = useState(
     settings?.OllamaLLMTokenLimit || ""
   );
@@ -56,14 +53,36 @@ export default function OllamaLLMOptions({ settings }) {
 
       <div hidden={!showAdvancedControls}>
         <div className="flex flex-col">
-          <div className="w-full flex items-start gap-4">
+          <div className="w-full flex items-start gap-4 mb-4">
             <div className="flex flex-col w-60">
               <div className="flex justify-between items-center mb-2">
-                <label className="text-white text-sm font-semibold">
-                  Ollama Base URL
-                </label>
+                <div className="flex items-center gap-1">
+                  <label className="text-white text-sm font-semibold">
+                    Ollama Base URL
+                  </label>
+                  <Info
+                    size={18}
+                    className="text-theme-text-secondary cursor-pointer"
+                    data-tooltip-id="ollama-base-url"
+                    data-tooltip-content="Enter the URL where Ollama is running."
+                  />
+                  <Tooltip
+                    id="ollama-base-url"
+                    place="top"
+                    delayShow={300}
+                    className="tooltip !text-xs !opacity-100"
+                    style={{
+                      maxWidth: "250px",
+                      whiteSpace: "normal",
+                      wordWrap: "break-word",
+                    }}
+                  />
+                </div>
                 {loading ? (
-                  <PreLoader size="6" />
+                  <CircleNotch
+                    size={16}
+                    className="text-theme-text-secondary animate-spin"
+                  />
                 ) : (
                   <>
                     {!basePathValue.value && (
@@ -89,61 +108,45 @@ export default function OllamaLLMOptions({ settings }) {
                 onChange={basePath.onChange}
                 onBlur={basePath.onBlur}
               />
-              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
-                Enter the URL where Ollama is running.
-              </p>
             </div>
+
             <div className="flex flex-col w-60">
-              <label className="text-white text-sm font-semibold mb-2 flex items-center">
-                Performance Mode
+              <div className="flex items-center mb-2 gap-x-1">
+                <label className="text-white text-sm font-semibold block">
+                  Ollama Keep Alive
+                </label>
                 <Info
-                  size={16}
-                  className="ml-2 text-white"
-                  data-tooltip-id="performance-mode-tooltip"
+                  size={18}
+                  className="text-theme-text-secondary cursor-pointer"
+                  data-tooltip-id="ollama-keep-alive"
                 />
-              </label>
-              <select
-                name="OllamaLLMPerformanceMode"
-                required={true}
-                className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
-                value={performanceMode}
-                onChange={(e) => setPerformanceMode(e.target.value)}
-              >
-                <option value="base">Base (Default)</option>
-                <option value="maximum">Maximum</option>
-              </select>
-              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
-                Choose the performance mode for the Ollama model.
-              </p>
-              <Tooltip
-                id="performance-mode-tooltip"
-                place="bottom"
-                className="tooltip !text-xs max-w-xs"
-              >
-                <p className="text-red-500">
-                  <strong>Note:</strong> Be careful with the Maximum mode. It
-                  may increase resource usage significantly.
-                </p>
-                <br />
-                <p>
-                  <strong>Base:</strong> Ollama automatically limits the context
-                  to 2048 tokens, keeping resources usage low while maintaining
-                  good performance. Suitable for most users and models.
-                </p>
-                <br />
-                <p>
-                  <strong>Maximum:</strong> Uses the full context window (up to
-                  Max Tokens). Will result in increased resource usage but
-                  allows for larger context conversations. <br />
-                  <br />
-                  This is not recommended for most users.
-                </p>
-              </Tooltip>
-            </div>
-            <div className="flex flex-col w-60">
-              <label className="text-white text-sm font-semibold block mb-2">
-                Ollama Keep Alive
-              </label>
+                <Tooltip
+                  id="ollama-keep-alive"
+                  place="top"
+                  delayShow={300}
+                  delayHide={400}
+                  clickable={true}
+                  className="tooltip !text-xs !opacity-100"
+                  style={{
+                    maxWidth: "250px",
+                    whiteSpace: "normal",
+                    wordWrap: "break-word",
+                  }}
+                >
+                  <p className="text-xs leading-[18px] font-base">
+                    Choose how long Ollama should keep your model in memory
+                    before unloading.{" "}
+                    <Link
+                      className="underline text-blue-300"
+                      to="https://docs.ollama.com/faq#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
+                      target="_blank"
+                      rel="noreferrer"
+                    >
+                      Learn more &rarr;
+                    </Link>
+                  </p>
+                </Tooltip>
+              </div>
               <select
                 name="OllamaLLMKeepAliveSeconds"
                 required={true}
@@ -155,26 +158,53 @@ export default function OllamaLLMOptions({ settings }) {
                 <option value="3600">1 hour</option>
                 <option value="-1">Forever</option>
               </select>
-              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
-                Choose how long Ollama should keep your model in memory before
-                unloading.
-                <a
-                  className="underline text-blue-300"
-                  href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
-                  target="_blank"
-                  rel="noreferrer"
-                >
-                  {" "}
-                  Learn more &rarr;
-                </a>
-              </p>
             </div>
           </div>
           <div className="w-full flex items-start gap-4">
             <div className="flex flex-col w-60">
-              <label className="text-white text-sm font-semibold block mb-2">
-                Max Tokens (Optional)
-              </label>
+              <div className="flex items-center mb-2 gap-x-1">
+                <label className="text-white text-sm font-semibold block">
+                  Model context window
+                </label>
+                <Info
+                  size={18}
+                  className="text-theme-text-secondary cursor-pointer"
+                  data-tooltip-id="ollama-model-context-window"
+                />
+                <Tooltip
+                  id="ollama-model-context-window"
+                  place="top"
+                  delayShow={300}
+                  delayHide={400}
+                  clickable={true}
+                  className="tooltip !text-xs !opacity-100"
+                  style={{
+                    maxWidth: "250px",
+                    whiteSpace: "normal",
+                    wordWrap: "break-word",
+                  }}
+                >
+                  <p className="text-xs leading-[18px] font-base">
+                    Specify the maximum number of tokens that can be used for
+                    the model context window.
+                    <br /> <br />
+                    If you leave this field blank, the context window limit will
+                    be auto-detected from the model and applied to all chats. If
+                    auto-detection fails, a fallback context window limit of
+                    4096 will be used.
+                    <br /> <br />
+                    <b>Important:</b> Some models have very large context
+                    windows using the full context window limit can dramatically
+                    increase the memory usage of your system. For this reason,
+                    we will automatically cap the context window limit to 16,384
+                    tokens if the model supports more than that and no value is
+                    specified.
+                    <br /> <br />
+                    If an invalid value is entered, AnythingLLM will handle this
+                    for you so that chats do not fail.
+                  </p>
+                </Tooltip>
+              </div>
               <input
                 type="number"
                 name="OllamaLLMTokenLimit"
@@ -189,28 +219,44 @@ export default function OllamaLLMOptions({ settings }) {
                 required={false}
                 autoComplete="off"
               />
-              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
-                Override the context window limit. Leave empty to auto-detect
-                from the model (defaults to 4096 if detection fails).
-              </p>
             </div>
-          </div>
-          <div className="w-full flex items-start gap-4 mt-4">
-            <div className="flex flex-col w-100">
-              <label className="text-white text-sm font-semibold">
-                Auth Token
-              </label>
-              <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
-                Enter a <code>Bearer</code> Auth Token for interacting with your
-                Ollama server.
-                <br />
-                Used <b>only</b> if running Ollama behind an authentication
-                server.
-              </p>
+
+            <div className="flex flex-col w-60">
+              <div className="flex items-center mb-2 gap-x-1">
+                <label className="text-white text-sm font-semibold">
+                  Authentication Token
+                </label>
+                <Info
+                  size={18}
+                  className="text-theme-text-secondary cursor-pointer"
+                  data-tooltip-id="ollama-authentication-token"
+                />
+                <Tooltip
+                  id="ollama-authentication-token"
+                  place="top"
+                  delayShow={300}
+                  delayHide={400}
+                  clickable={true}
+                  className="tooltip !text-xs !opacity-100"
+                  style={{
+                    maxWidth: "250px",
+                    whiteSpace: "normal",
+                    wordWrap: "break-word",
+                  }}
+                >
+                  <p className="text-xs leading-[18px] font-base">
+                    Enter a <code>Bearer</code> Auth Token for interacting with
+                    your Ollama server.
+                    <br /> <br />
+                    Used <b>only</b> if running Ollama behind an authentication
+                    server.
+                  </p>
+                </Tooltip>
+              </div>
               <input
                 type="password"
                 name="OllamaLLMAuthToken"
-                className="border-none bg-theme-settings-input-bg mt-2 text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg outline-none block w-full p-2.5 focus:outline-primary-button active:outline-primary-button"
+                className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg outline-none block w-full p-2.5 focus:outline-primary-button active:outline-primary-button"
                 placeholder="Ollama Auth Token"
                 defaultValue={
                   settings?.OllamaLLMAuthToken ? "*".repeat(20) : ""
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index fba5e9ee..c626f73a 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -518,7 +518,6 @@ const SystemSettings = {
       OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF,
       OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT || null,
       OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
-      OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base",
 
       // Novita LLM Keys
       NovitaLLMApiKey: !!process.env.NOVITA_LLM_API_KEY,
diff --git a/server/utils/AiProviders/ollama/README.md b/server/utils/AiProviders/ollama/README.md
deleted file mode 100644
index 9e96b2ed..00000000
--- a/server/utils/AiProviders/ollama/README.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Common Issues with Ollama
-
-If you encounter an error stating `llama:streaming - could not stream chat. Error: connect ECONNREFUSED 172.17.0.1:11434` when using AnythingLLM in a Docker container, this indicates that the IP of the Host inside of the virtual docker network does not bind to port 11434 of the host system by default, due to Ollama's restriction to localhost and 127.0.0.1. To resolve this issue and ensure proper communication between the Dockerized AnythingLLM and the Ollama service, you must configure Ollama to bind to 0.0.0.0 or a specific IP address.
-
-### Setting Environment Variables on Mac
-
-If Ollama is run as a macOS application, environment variables should be set using `launchctl`:
-
-1.  For each environment variable, call `launchctl setenv`.
-    ```bash
-    launchctl setenv OLLAMA_HOST "0.0.0.0"
-    ```
-2.  Restart the Ollama application.
-
-### Setting Environment Variables on Linux
-
-If Ollama is run as a systemd service, environment variables should be set using `systemctl`:
-
-1.  Edit the systemd service by calling `systemctl edit ollama.service`. This will open an editor.
-2.  For each environment variable, add a line `Environment` under the section `[Service]`:
-    ```ini
-    [Service]
-    Environment="OLLAMA_HOST=0.0.0.0"
-    ```
-3.  Save and exit.
-4.  Reload `systemd` and restart Ollama:
-    ```bash
-    systemctl daemon-reload
-    systemctl restart ollama
-    ```
-
-### Setting Environment Variables on Windows
-
-On Windows, Ollama inherits your user and system environment variables.
-
-1.  First, quit Ollama by clicking on it in the taskbar.
-2.  Edit system environment variables from the Control Panel.
-3.  Edit or create new variable(s) for your user account for `OLLAMA_HOST`, `OLLAMA_MODELS`, etc.
-4.  Click OK/Apply to save.
-5.  Run `ollama` from a new terminal window.
diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js
index 445b08d7..92a0707b 100644
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@@ -23,7 +23,6 @@ class OllamaAILLM {
     this.authToken = process.env.OLLAMA_AUTH_TOKEN;
     this.basePath = process.env.OLLAMA_BASE_PATH;
     this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
-    this.performanceMode = process.env.OLLAMA_PERFORMANCE_MODE || "base";
     this.keepAlive = process.env.OLLAMA_KEEP_ALIVE_TIMEOUT
       ? Number(process.env.OLLAMA_KEEP_ALIVE_TIMEOUT)
       : 300; // Default 5-minute timeout for Ollama model loading.
@@ -43,9 +42,7 @@ class OllamaAILLM {
     this.limits = null;
 
     OllamaAILLM.cacheContextWindows(true);
-    this.#log(
-      `initialized with\nmodel: ${this.model}\nperf: ${this.performanceMode}`
-    );
+    this.#log(`initialized with\nmodel: ${this.model}`);
   }
 
   #log(text, ...args) {
@@ -64,6 +61,9 @@ class OllamaAILLM {
       system: this.promptWindowLimit() * 0.15,
       user: this.promptWindowLimit() * 0.7,
     };
+    this.#log(
+      `model ${this.model} is using a max context window of ${this.promptWindowLimit()}/${OllamaAILLM.maxContextWindow(this.model)} tokens.`
+    );
   }
 
   /**
@@ -176,8 +176,7 @@ class OllamaAILLM {
     }
 
     let userDefinedLimit = null;
-    const systemDefinedLimit =
-      Number(this.modelContextWindows[modelName]) || 4096;
+    const systemDefinedLimit = OllamaAILLM.maxContextWindow(modelName);
 
     if (
       process.env.OLLAMA_MODEL_TOKEN_LIMIT &&
@@ -190,13 +189,23 @@ class OllamaAILLM {
     // so we return the minimum of the two, if there is no user defined limit, we return the system defined limit as-is.
     if (userDefinedLimit !== null)
       return Math.min(userDefinedLimit, systemDefinedLimit);
-    return systemDefinedLimit;
+
+    // Cap the context window limit to 16,384 tokens if the model supports more than that and no value is specified by the user.
+    // This prevents super-large context windows from being used if the user does not specify a value
+    // as well as also having smaller context windows use the full context window limit.
+    return Math.min(systemDefinedLimit, 16384);
   }
 
   promptWindowLimit() {
     return this.constructor.promptWindowLimit(this.model);
   }
 
+  static maxContextWindow(modelName = null) {
+    if (Object.keys(OllamaAILLM.modelContextWindows).length === 0 || !modelName)
+      return 4096;
+    return Number(OllamaAILLM.modelContextWindows[modelName]) || 16384;
+  }
+
   async isValidChatCompletionModel(_ = "") {
     return true;
   }
@@ -266,10 +275,7 @@ class OllamaAILLM {
           options: {
             temperature,
             use_mlock: true,
-            // There are currently only two performance settings so if its not "base" - its max context.
-            ...(this.performanceMode === "base"
-              ? {} // TODO: if in base mode, maybe we just use half the context window when below <10K?
-              : { num_ctx: this.promptWindowLimit() }),
+            num_ctx: this.promptWindowLimit(),
           },
         })
         .then((res) => {
@@ -321,10 +327,7 @@ class OllamaAILLM {
         options: {
           temperature,
           use_mlock: true,
-          // There are currently only two performance settings so if its not "base" - its max context.
-          ...(this.performanceMode === "base"
-            ? {}
-            : { num_ctx: this.promptWindowLimit() }),
+          num_ctx: this.promptWindowLimit(),
         },
       }),
       messages,
diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js
index 92fcb7a8..d92a5ad8 100644
--- a/server/utils/agents/aibitat/providers/ai-provider.js
+++ b/server/utils/agents/aibitat/providers/ai-provider.js
@@ -464,16 +464,10 @@ class OllamaLangchainChatModel {
     });
   }
 
-  static performanceMode() {
-    return process.env.OLLAMA_PERFORMANCE_MODE || "base";
-  }
-
   static queryOptions(config = {}) {
     const model = config?.model || process.env.OLLAMA_MODEL_PREF;
     return {
-      ...(this.performanceMode() === "base"
-        ? {}
-        : { num_ctx: OllamaAILLM.promptWindowLimit(model) }),
+      num_ctx: OllamaAILLM.promptWindowLimit(model),
     };
   }
 }
diff --git a/server/utils/agents/aibitat/providers/ollama.js b/server/utils/agents/aibitat/providers/ollama.js
index e39a6704..c3239d26 100644
--- a/server/utils/agents/aibitat/providers/ollama.js
+++ b/server/utils/agents/aibitat/providers/ollama.js
@@ -25,6 +25,7 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
     this._client = new Ollama({
       host: process.env.OLLAMA_BASE_PATH,
       headers: headers,
+      fetch: this.#applyFetch(),
     });
     this.model = model;
     this.verbose = true;
@@ -38,15 +39,12 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
     return true;
   }
 
-  get performanceMode() {
-    return process.env.OLLAMA_PERFORMANCE_MODE || "base";
-  }
-
   get queryOptions() {
+    this.providerLog(
+      `${this.model} is using a max context window of ${OllamaAILLM.promptWindowLimit(this.model)}/${OllamaAILLM.maxContextWindow(this.model)} tokens.`
+    );
     return {
-      ...(this.performanceMode === "base"
-        ? {}
-        : { num_ctx: OllamaAILLM.promptWindowLimit(this.model) }),
+      num_ctx: OllamaAILLM.promptWindowLimit(this.model),
     };
   }
 
@@ -366,6 +364,46 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
   getCost(_usage) {
     return 0;
   }
+
+  /**
+   * Apply a custom fetch function to the Ollama client.
+   * This is useful when we want to bypass the default 5m timeout for global fetch
+   * for machines which run responses very slowly.
+   * @returns {Function} The custom fetch function.
+   */
+  #applyFetch() {
+    try {
+      if (!("OLLAMA_RESPONSE_TIMEOUT" in process.env)) return fetch;
+      const { Agent } = require("undici");
+      const moment = require("moment");
+      let timeout = process.env.OLLAMA_RESPONSE_TIMEOUT;
+
+      if (!timeout || isNaN(Number(timeout)) || Number(timeout) <= 5 * 60_000) {
+        this.providerLog(
+          "Timeout option was not set, is not a number, or is less than 5 minutes in ms - falling back to default",
+          { timeout }
+        );
+        return fetch;
+      } else timeout = Number(timeout);
+
+      const noTimeoutFetch = (input, init = {}) => {
+        return fetch(input, {
+          ...init,
+          dispatcher: new Agent({ headersTimeout: timeout }),
+        });
+      };
+
+      const humanDiff = moment.duration(timeout).humanize();
+      this.providerLog(`Applying custom fetch w/timeout of ${humanDiff}.`);
+      return noTimeoutFetch;
+    } catch (error) {
+      this.providerLog(
+        "Error applying custom fetch - using default fetch",
+        error
+      );
+      return fetch;
+    }
+  }
 }
 
 module.exports = OllamaProvider;
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index b6666dec..5e4cffbe 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -125,10 +125,6 @@ const KEY_MAPPING = {
     envKey: "OLLAMA_MODEL_TOKEN_LIMIT",
     checks: [],
   },
-  OllamaLLMPerformanceMode: {
-    envKey: "OLLAMA_PERFORMANCE_MODE",
-    checks: [],
-  },
   OllamaLLMKeepAliveSeconds: {
     envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT",
     checks: [isInteger],