diff --git a/frontend/src/components/LLMSelection/LemonadeOptions/index.jsx b/frontend/src/components/LLMSelection/LemonadeOptions/index.jsx
index c61bae1f..9fc5b850 100644
--- a/frontend/src/components/LLMSelection/LemonadeOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/LemonadeOptions/index.jsx
@@ -106,7 +106,7 @@ export default function LemonadeOptions({ settings }) {
             type="url"
             name="LemonadeLLMBasePath"
             className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
-            placeholder="http://localhost:8000"
+            placeholder="http://localhost:13305"
             value={cleanBasePath(basePathValue.value)}
             required={true}
             autoComplete="off"
@@ -150,7 +150,7 @@ export default function LemonadeOptions({ settings }) {
             type="number"
             name="LemonadeLLMModelTokenLimit"
             className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
-            placeholder="4096"
+            placeholder="8192"
             min={1}
             value={maxTokens}
             onChange={(e) => setMaxTokens(Number(e.target.value))}
diff --git a/frontend/src/utils/constants.js b/frontend/src/utils/constants.js
index f6bdc7b9..b50621ad 100644
--- a/frontend/src/utils/constants.js
+++ b/frontend/src/utils/constants.js
@@ -67,6 +67,12 @@ export const LEMONADE_COMMON_URLS = [
   "http://127.0.0.1:8000/live",
   "http://host.docker.internal:8000/live",
   "http://172.17.0.1:8000/live",
+
+  // In Lemonade 10.1.0 the base port is 13305
+  "http://localhost:13305/live",
+  "http://127.0.0.1:13305/live",
+  "http://host.docker.internal:13305/live",
+  "http://172.17.0.1:13305/live",
 ];
 
 export function fullApiUrl() {
diff --git a/server/utils/AiProviders/lemonade/index.js b/server/utils/AiProviders/lemonade/index.js
index 206b1582..b7895340 100644
--- a/server/utils/AiProviders/lemonade/index.js
+++ b/server/utils/AiProviders/lemonade/index.js
@@ -22,7 +22,7 @@ class LemonadeLLM {
         process.env.LEMONADE_LLM_BASE_PATH,
         "openai"
       ),
-      apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
+      apiKey: process.env.LEMONADE_LLM_API_KEY || null,
     });
 
     this.model = modelPreference || process.env.LEMONADE_LLM_MODEL_PREF;
@@ -202,7 +202,7 @@ class LemonadeLLM {
           process.env.LEMONADE_LLM_BASE_PATH,
           "openai"
         ),
-        apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
+        apiKey: process.env.LEMONADE_LLM_API_KEY || null,
       });
 
       const { labels = [] } = await client.models.retrieve(this.model);
@@ -223,6 +223,41 @@ class LemonadeLLM {
     }
   }
 
+  /**
+   * Get the currently loaded models from the Lemonade server.
+   * @returns {Promise<string[]>}
+   */
+  static async getCurrentlyLoadedModels() {
+    const endpoint = new URL(
+      parseLemonadeServerEndpoint(process.env.LEMONADE_LLM_BASE_PATH, "openai")
+    );
+    endpoint.pathname += "/health";
+    const loadedModels = await fetch(endpoint.toString(), {
+      method: "GET",
+      headers: {
+        ...(process.env.LEMONADE_LLM_API_KEY
+          ? { Authorization: `Bearer ${process.env.LEMONADE_LLM_API_KEY}` }
+          : {}),
+      },
+    })
+      .then((response) => {
+        if (!response.ok)
+          throw new Error(
+            `Failed to get currently loaded models: ${response.statusText}`
+          );
+        return response.json();
+      })
+      .then(({ all_models_loaded = [] } = {}) => {
+        return all_models_loaded.map((model) => {
+          return {
+            model_name: model.model_name,
+            ctx_size: model?.recipe_options?.ctx_size ?? 8192,
+          };
+        });
+      });
+    return loadedModels;
+  }
+
   /**
    * Utility function to load a model from the Lemonade server.
    * Does not check if the model is already loaded or unloads any models.
@@ -230,12 +265,33 @@ class LemonadeLLM {
    */
   static async loadModel(model, basePath = process.env.LEMONADE_LLM_BASE_PATH) {
     try {
+      const desiredCtxSize = Number(this.promptWindowLimit());
+      const currentlyLoadedModels =
+        await LemonadeLLM.getCurrentlyLoadedModels();
+      const modelAlreadyLoaded = currentlyLoadedModels.find(
+        (m) => m.model_name === model
+      );
+
+      if (modelAlreadyLoaded) {
+        if (modelAlreadyLoaded.ctx_size === desiredCtxSize) {
+          LemonadeLLM.slog(
+            `Model ${model} already loaded with ctx size ${desiredCtxSize}`
+          );
+          return true;
+        }
+
+        LemonadeLLM.slog(
+          `Model ${model} needs to be reloaded again with ctx size ${desiredCtxSize}`
+        );
+      }
+
       const endpoint = new URL(parseLemonadeServerEndpoint(basePath, "openai"));
       endpoint.pathname += "/load";
 
       LemonadeLLM.slog(
-        `Loading model ${model} with context size ${this.promptWindowLimit()}`
+        `Loading model ${model} with context size ${desiredCtxSize}`
       );
+
       await fetch(endpoint.toString(), {
         method: "POST",
         headers: {
@@ -246,7 +302,7 @@ class LemonadeLLM {
         },
         body: JSON.stringify({
           model_name: String(model),
-          ctx_size: Number(this.promptWindowLimit()),
+          ctx_size: desiredCtxSize,
         }),
       })
         .then((response) => {
diff --git a/server/utils/EmbeddingEngines/lemonade/index.js b/server/utils/EmbeddingEngines/lemonade/index.js
index e2da82f3..8d5a64f4 100644
--- a/server/utils/EmbeddingEngines/lemonade/index.js
+++ b/server/utils/EmbeddingEngines/lemonade/index.js
@@ -14,7 +14,7 @@ class LemonadeEmbedder {
         process.env.EMBEDDING_BASE_PATH,
         "openai"
       ),
-      apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
+      apiKey: process.env.LEMONADE_LLM_API_KEY || null,
     });
     this.model = process.env.EMBEDDING_MODEL_PREF;
 
diff --git a/server/utils/agentFlows/executors/llm-instruction.js b/server/utils/agentFlows/executors/llm-instruction.js
index d7e4e57f..d594c547 100644
--- a/server/utils/agentFlows/executors/llm-instruction.js
+++ b/server/utils/agentFlows/executors/llm-instruction.js
@@ -23,13 +23,17 @@ async function executeLLMInstruction(config, context) {
     if (typeof input === "object") input = JSON.stringify(input);
     if (typeof input !== "string") input = String(input);
 
+    let completion;
     const provider = aibitat.getProviderForConfig(aibitat.defaultProvider);
-    const completion = await provider.complete([
-      {
-        role: "user",
-        content: input,
-      },
-    ]);
+    if (provider.supportsAgentStreaming) {
+      completion = await provider.stream(
+        [{ role: "user", content: input }],
+        [],
+        null
+      );
+    } else {
+      completion = await provider.complete([{ role: "user", content: input }]);
+    }
 
     introspect(`Successfully received LLM response`);
     if (resultVariable) config.resultVariable = resultVariable;
diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js
index 37a285b3..c372740f 100644
--- a/server/utils/agents/aibitat/index.js
+++ b/server/utils/agents/aibitat/index.js
@@ -843,25 +843,18 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
     );
 
     if (completionStream.functionCall) {
-      if (depth >= this.maxToolCalls) {
-        this.handlerProps?.log?.(
-          `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Making final response without tools.`
-        );
-        this?.introspect?.(
-          `Maximum tool call limit (${this.maxToolCalls}) reached. Generating a final response from what I have so far.`
-        );
-
-        const finalStream = await this.#safeProviderCall(() =>
-          provider.stream(messages, [], eventHandler)
-        );
-        const finalResponse =
-          finalStream?.textResponse ||
-          "I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run.";
-        return finalResponse;
-      }
-
       const { name, arguments: args } = completionStream.functionCall;
       const fn = this.functions.get(name);
+      const reachedToolLimit = depth >= this.maxToolCalls;
+
+      if (reachedToolLimit) {
+        this.handlerProps?.log?.(
+          `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Executing final tool call then generating response.`
+        );
+        this?.introspect?.(
+          `Maximum tool call limit (${this.maxToolCalls}) reached. After this tool I will generate a final response.`
+        );
+      }
 
       if (!fn) {
         return await this.handleAsyncExecution(
@@ -875,7 +868,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
               originalFunctionCall: completionStream.functionCall,
             },
           ],
-          functions,
+          reachedToolLimit ? [] : functions,
           byAgent,
           depth + 1
         );
@@ -951,7 +944,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
       return await this.handleAsyncExecution(
         provider,
         newMessages,
-        functions,
+        reachedToolLimit ? [] : functions,
         byAgent,
         depth + 1
       );
@@ -1000,31 +993,18 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
     );
 
     if (completion.functionCall) {
-      if (depth >= this.maxToolCalls) {
-        this.handlerProps?.log?.(
-          `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Making final response without tools.`
-        );
-        this?.introspect?.(
-          `Maximum tool call limit (${this.maxToolCalls}) reached. Generating a final response from what I have so far.`
-        );
-
-        const finalCompletion = await this.#safeProviderCall(() =>
-          provider.complete(messages, [])
-        );
-        eventHandler?.("reportStreamEvent", {
-          type: "usageMetrics",
-          uuid: msgUUID,
-          metrics: provider.getUsage(),
-        });
-        this?.flushCitations?.(msgUUID);
-        return (
-          finalCompletion?.textResponse ||
-          "I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run."
-        );
-      }
-
       const { name, arguments: args } = completion.functionCall;
       const fn = this.functions.get(name);
+      const reachedToolLimit = depth >= this.maxToolCalls;
+
+      if (reachedToolLimit) {
+        this.handlerProps?.log?.(
+          `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Executing final tool call then generating response.`
+        );
+        this?.introspect?.(
+          `Maximum tool call limit (${this.maxToolCalls}) reached. After this tool I will generate a final response.`
+        );
+      }
 
       if (!fn) {
         return await this.handleExecution(
@@ -1038,7 +1018,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
               originalFunctionCall: completion.functionCall,
             },
           ],
-          functions,
+          reachedToolLimit ? [] : functions,
           byAgent,
           depth + 1,
           msgUUID
@@ -1103,7 +1083,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
       return await this.handleExecution(
         provider,
         newMessages,
-        functions,
+        reachedToolLimit ? [] : functions,
         byAgent,
         depth + 1,
         msgUUID
diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js
index 4d234767..ea8dc2b0 100644
--- a/server/utils/agents/aibitat/providers/ai-provider.js
+++ b/server/utils/agents/aibitat/providers/ai-provider.js
@@ -402,7 +402,7 @@ class Provider {
           configuration: {
             baseURL: process.env.LEMONADE_LLM_BASE_PATH,
           },
-          apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
+          apiKey: process.env.LEMONADE_LLM_API_KEY || null,
           ...config,
         });
       default:
diff --git a/server/utils/agents/aibitat/providers/lemonade.js b/server/utils/agents/aibitat/providers/lemonade.js
index 0e6715d5..1c47a2ac 100644
--- a/server/utils/agents/aibitat/providers/lemonade.js
+++ b/server/utils/agents/aibitat/providers/lemonade.js
@@ -27,7 +27,7 @@ class LemonadeProvider extends InheritMultiple([Provider, UnTooled]) {
         process.env.LEMONADE_LLM_BASE_PATH,
         "openai"
       ),
-      apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
+      apiKey: process.env.LEMONADE_LLM_API_KEY || null,
       maxRetries: 3,
     });