Merge branch 'master' of github.com:Mintplex-Labs/anything-llm

2026-04-08 14:17:49 -07:00 · 2026-04-08 14:17:49 -07:00 · 82ef164314
commit 82ef164314
parent 4af0257263 b2404801d1
8 changed files with 105 additions and 59 deletions
--- a/frontend/src/components/LLMSelection/LemonadeOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/LemonadeOptions/index.jsx
@ -106,7 +106,7 @@ export default function LemonadeOptions({ settings }) {
            type="url"
            name="LemonadeLLMBasePath"
            className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
-            placeholder="http://localhost:8000"
+            placeholder="http://localhost:13305"
            value={cleanBasePath(basePathValue.value)}
            required={true}
            autoComplete="off"
@ -150,7 +150,7 @@ export default function LemonadeOptions({ settings }) {
            type="number"
            name="LemonadeLLMModelTokenLimit"
            className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
-            placeholder="4096"
+            placeholder="8192"
            min={1}
            value={maxTokens}
            onChange={(e) => setMaxTokens(Number(e.target.value))}
--- a/frontend/src/utils/constants.js
+++ b/frontend/src/utils/constants.js
@ -67,6 +67,12 @@ export const LEMONADE_COMMON_URLS = [
  "http://127.0.0.1:8000/live",
  "http://host.docker.internal:8000/live",
  "http://172.17.0.1:8000/live",
  // In Lemonade 10.1.0 the base port is 13305
  "http://localhost:13305/live",
  "http://127.0.0.1:13305/live",
  "http://host.docker.internal:13305/live",
  "http://172.17.0.1:13305/live",
 ];
 export function fullApiUrl() {
--- a/server/utils/AiProviders/lemonade/index.js
+++ b/server/utils/AiProviders/lemonade/index.js
@ -22,7 +22,7 @@ class LemonadeLLM {
        process.env.LEMONADE_LLM_BASE_PATH,
        "openai"
      ),
-      apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
+      apiKey: process.env.LEMONADE_LLM_API_KEY || null,
    });
    this.model = modelPreference || process.env.LEMONADE_LLM_MODEL_PREF;
@ -202,7 +202,7 @@ class LemonadeLLM {
          process.env.LEMONADE_LLM_BASE_PATH,
          "openai"
        ),
-        apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
+        apiKey: process.env.LEMONADE_LLM_API_KEY || null,
      });
      const { labels = [] } = await client.models.retrieve(this.model);
@ -223,6 +223,41 @@ class LemonadeLLM {
    }
  }
  /**
   * Get the currently loaded models from the Lemonade server.
   * @returns {Promise<string[]>}
   */
  static async getCurrentlyLoadedModels() {
    const endpoint = new URL(
      parseLemonadeServerEndpoint(process.env.LEMONADE_LLM_BASE_PATH, "openai")
    );
    endpoint.pathname += "/health";
    const loadedModels = await fetch(endpoint.toString(), {
      method: "GET",
      headers: {
        ...(process.env.LEMONADE_LLM_API_KEY
          ? { Authorization: `Bearer ${process.env.LEMONADE_LLM_API_KEY}` }
          : {}),
      },
    })
      .then((response) => {
        if (!response.ok)
          throw new Error(
            `Failed to get currently loaded models: ${response.statusText}`
          );
        return response.json();
      })
      .then(({ all_models_loaded = [] } = {}) => {
        return all_models_loaded.map((model) => {
          return {
            model_name: model.model_name,
            ctx_size: model?.recipe_options?.ctx_size ?? 8192,
          };
        });
      });
    return loadedModels;
  }
  /**
   * Utility function to load a model from the Lemonade server.
   * Does not check if the model is already loaded or unloads any models.
@ -230,12 +265,33 @@ class LemonadeLLM {
   */
  static async loadModel(model, basePath = process.env.LEMONADE_LLM_BASE_PATH) {
    try {
      const desiredCtxSize = Number(this.promptWindowLimit());
      const currentlyLoadedModels =
        await LemonadeLLM.getCurrentlyLoadedModels();
      const modelAlreadyLoaded = currentlyLoadedModels.find(
        (m) => m.model_name === model
      );
      if (modelAlreadyLoaded) {
        if (modelAlreadyLoaded.ctx_size === desiredCtxSize) {
          LemonadeLLM.slog(
            `Model ${model} already loaded with ctx size ${desiredCtxSize}`
          );
          return true;
        }
        LemonadeLLM.slog(
          `Model ${model} needs to be reloaded again with ctx size ${desiredCtxSize}`
        );
      }
      const endpoint = new URL(parseLemonadeServerEndpoint(basePath, "openai"));
      endpoint.pathname += "/load";
      LemonadeLLM.slog(
-        `Loading model ${model} with context size ${this.promptWindowLimit()}`
+        `Loading model ${model} with context size ${desiredCtxSize}`
      );
      await fetch(endpoint.toString(), {
        method: "POST",
        headers: {
@ -246,7 +302,7 @@ class LemonadeLLM {
        },
        body: JSON.stringify({
          model_name: String(model),
-          ctx_size: Number(this.promptWindowLimit()),
+          ctx_size: desiredCtxSize,
        }),
      })
        .then((response) => {
--- a/server/utils/EmbeddingEngines/lemonade/index.js
+++ b/server/utils/EmbeddingEngines/lemonade/index.js
@ -14,7 +14,7 @@ class LemonadeEmbedder {
        process.env.EMBEDDING_BASE_PATH,
        "openai"
      ),
-      apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
+      apiKey: process.env.LEMONADE_LLM_API_KEY || null,
    });
    this.model = process.env.EMBEDDING_MODEL_PREF;
--- a/server/utils/agentFlows/executors/llm-instruction.js
+++ b/server/utils/agentFlows/executors/llm-instruction.js
@ -23,13 +23,17 @@ async function executeLLMInstruction(config, context) {
    if (typeof input === "object") input = JSON.stringify(input);
    if (typeof input !== "string") input = String(input);
    let completion;
    const provider = aibitat.getProviderForConfig(aibitat.defaultProvider);
-    const completion = await provider.complete([
+    if (provider.supportsAgentStreaming) {
-      {
+      completion = await provider.stream(
-        role: "user",
+        [{ role: "user", content: input }],
-        content: input,
+        [],
-      },
+        null
-    ]);
+      );
    } else {
      completion = await provider.complete([{ role: "user", content: input }]);
    }
    introspect(`Successfully received LLM response`);
    if (resultVariable) config.resultVariable = resultVariable;
--- a/server/utils/agents/aibitat/index.js
+++ b/server/utils/agents/aibitat/index.js
@ -843,25 +843,18 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
    );
    if (completionStream.functionCall) {
      if (depth >= this.maxToolCalls) {
        this.handlerProps?.log?.(
          `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Making final response without tools.`
        );
        this?.introspect?.(
          `Maximum tool call limit (${this.maxToolCalls}) reached. Generating a final response from what I have so far.`
        );
        const finalStream = await this.#safeProviderCall(() =>
          provider.stream(messages, [], eventHandler)
        );
        const finalResponse =
          finalStream?.textResponse ||
          "I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run.";
        return finalResponse;
      }
      const { name, arguments: args } = completionStream.functionCall;
      const fn = this.functions.get(name);
      const reachedToolLimit = depth >= this.maxToolCalls;
      if (reachedToolLimit) {
        this.handlerProps?.log?.(
          `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Executing final tool call then generating response.`
        );
        this?.introspect?.(
          `Maximum tool call limit (${this.maxToolCalls}) reached. After this tool I will generate a final response.`
        );
      }
      if (!fn) {
        return await this.handleAsyncExecution(
@ -875,7 +868,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
              originalFunctionCall: completionStream.functionCall,
            },
          ],
-          functions,
+          reachedToolLimit ? [] : functions,
          byAgent,
          depth + 1
        );
@ -951,7 +944,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
      return await this.handleAsyncExecution(
        provider,
        newMessages,
-        functions,
+        reachedToolLimit ? [] : functions,
        byAgent,
        depth + 1
      );
@ -1000,31 +993,18 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
    );
    if (completion.functionCall) {
      if (depth >= this.maxToolCalls) {
        this.handlerProps?.log?.(
          `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Making final response without tools.`
        );
        this?.introspect?.(
          `Maximum tool call limit (${this.maxToolCalls}) reached. Generating a final response from what I have so far.`
        );
        const finalCompletion = await this.#safeProviderCall(() =>
          provider.complete(messages, [])
        );
        eventHandler?.("reportStreamEvent", {
          type: "usageMetrics",
          uuid: msgUUID,
          metrics: provider.getUsage(),
        });
        this?.flushCitations?.(msgUUID);
        return (
          finalCompletion?.textResponse ||
          "I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run."
        );
      }
      const { name, arguments: args } = completion.functionCall;
      const fn = this.functions.get(name);
      const reachedToolLimit = depth >= this.maxToolCalls;
      if (reachedToolLimit) {
        this.handlerProps?.log?.(
          `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Executing final tool call then generating response.`
        );
        this?.introspect?.(
          `Maximum tool call limit (${this.maxToolCalls}) reached. After this tool I will generate a final response.`
        );
      }
      if (!fn) {
        return await this.handleExecution(
@ -1038,7 +1018,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
              originalFunctionCall: completion.functionCall,
            },
          ],
-          functions,
+          reachedToolLimit ? [] : functions,
          byAgent,
          depth + 1,
          msgUUID
@ -1103,7 +1083,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
      return await this.handleExecution(
        provider,
        newMessages,
-        functions,
+        reachedToolLimit ? [] : functions,
        byAgent,
        depth + 1,
        msgUUID
--- a/server/utils/agents/aibitat/providers/ai-provider.js
+++ b/server/utils/agents/aibitat/providers/ai-provider.js
@ -402,7 +402,7 @@ class Provider {
          configuration: {
            baseURL: process.env.LEMONADE_LLM_BASE_PATH,
          },
-          apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
+          apiKey: process.env.LEMONADE_LLM_API_KEY || null,
          ...config,
        });
      default:
--- a/server/utils/agents/aibitat/providers/lemonade.js
+++ b/server/utils/agents/aibitat/providers/lemonade.js
@ -27,7 +27,7 @@ class LemonadeProvider extends InheritMultiple([Provider, UnTooled]) {
        process.env.LEMONADE_LLM_BASE_PATH,
        "openai"
      ),
-      apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
+      apiKey: process.env.LEMONADE_LLM_API_KEY || null,
      maxRetries: 3,
    });