diff --git a/frontend/src/components/LLMSelection/LemonadeOptions/index.jsx b/frontend/src/components/LLMSelection/LemonadeOptions/index.jsx index c61bae1f..9fc5b850 100644 --- a/frontend/src/components/LLMSelection/LemonadeOptions/index.jsx +++ b/frontend/src/components/LLMSelection/LemonadeOptions/index.jsx @@ -106,7 +106,7 @@ export default function LemonadeOptions({ settings }) { type="url" name="LemonadeLLMBasePath" className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5" - placeholder="http://localhost:8000" + placeholder="http://localhost:13305" value={cleanBasePath(basePathValue.value)} required={true} autoComplete="off" @@ -150,7 +150,7 @@ export default function LemonadeOptions({ settings }) { type="number" name="LemonadeLLMModelTokenLimit" className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5" - placeholder="4096" + placeholder="8192" min={1} value={maxTokens} onChange={(e) => setMaxTokens(Number(e.target.value))} diff --git a/frontend/src/utils/constants.js b/frontend/src/utils/constants.js index f6bdc7b9..b50621ad 100644 --- a/frontend/src/utils/constants.js +++ b/frontend/src/utils/constants.js @@ -67,6 +67,12 @@ export const LEMONADE_COMMON_URLS = [ "http://127.0.0.1:8000/live", "http://host.docker.internal:8000/live", "http://172.17.0.1:8000/live", + + // In Lemonade 10.1.0 the base port is 13305 + "http://localhost:13305/live", + "http://127.0.0.1:13305/live", + "http://host.docker.internal:13305/live", + "http://172.17.0.1:13305/live", ]; export function fullApiUrl() { diff --git a/server/utils/AiProviders/lemonade/index.js b/server/utils/AiProviders/lemonade/index.js index 206b1582..b7895340 100644 --- a/server/utils/AiProviders/lemonade/index.js +++ b/server/utils/AiProviders/lemonade/index.js @@ -22,7 +22,7 @@ class LemonadeLLM { process.env.LEMONADE_LLM_BASE_PATH, "openai" ), - apiKey: process.env.LEMONADE_LLM_API_KEY ?? null, + apiKey: process.env.LEMONADE_LLM_API_KEY || null, }); this.model = modelPreference || process.env.LEMONADE_LLM_MODEL_PREF; @@ -202,7 +202,7 @@ class LemonadeLLM { process.env.LEMONADE_LLM_BASE_PATH, "openai" ), - apiKey: process.env.LEMONADE_LLM_API_KEY ?? null, + apiKey: process.env.LEMONADE_LLM_API_KEY || null, }); const { labels = [] } = await client.models.retrieve(this.model); @@ -223,6 +223,41 @@ class LemonadeLLM { } } + /** + * Get the currently loaded models from the Lemonade server. + * @returns {Promise} + */ + static async getCurrentlyLoadedModels() { + const endpoint = new URL( + parseLemonadeServerEndpoint(process.env.LEMONADE_LLM_BASE_PATH, "openai") + ); + endpoint.pathname += "/health"; + const loadedModels = await fetch(endpoint.toString(), { + method: "GET", + headers: { + ...(process.env.LEMONADE_LLM_API_KEY + ? { Authorization: `Bearer ${process.env.LEMONADE_LLM_API_KEY}` } + : {}), + }, + }) + .then((response) => { + if (!response.ok) + throw new Error( + `Failed to get currently loaded models: ${response.statusText}` + ); + return response.json(); + }) + .then(({ all_models_loaded = [] } = {}) => { + return all_models_loaded.map((model) => { + return { + model_name: model.model_name, + ctx_size: model?.recipe_options?.ctx_size ?? 8192, + }; + }); + }); + return loadedModels; + } + /** * Utility function to load a model from the Lemonade server. * Does not check if the model is already loaded or unloads any models. @@ -230,12 +265,33 @@ class LemonadeLLM { */ static async loadModel(model, basePath = process.env.LEMONADE_LLM_BASE_PATH) { try { + const desiredCtxSize = Number(this.promptWindowLimit()); + const currentlyLoadedModels = + await LemonadeLLM.getCurrentlyLoadedModels(); + const modelAlreadyLoaded = currentlyLoadedModels.find( + (m) => m.model_name === model + ); + + if (modelAlreadyLoaded) { + if (modelAlreadyLoaded.ctx_size === desiredCtxSize) { + LemonadeLLM.slog( + `Model ${model} already loaded with ctx size ${desiredCtxSize}` + ); + return true; + } + + LemonadeLLM.slog( + `Model ${model} needs to be reloaded again with ctx size ${desiredCtxSize}` + ); + } + const endpoint = new URL(parseLemonadeServerEndpoint(basePath, "openai")); endpoint.pathname += "/load"; LemonadeLLM.slog( - `Loading model ${model} with context size ${this.promptWindowLimit()}` + `Loading model ${model} with context size ${desiredCtxSize}` ); + await fetch(endpoint.toString(), { method: "POST", headers: { @@ -246,7 +302,7 @@ class LemonadeLLM { }, body: JSON.stringify({ model_name: String(model), - ctx_size: Number(this.promptWindowLimit()), + ctx_size: desiredCtxSize, }), }) .then((response) => { diff --git a/server/utils/EmbeddingEngines/lemonade/index.js b/server/utils/EmbeddingEngines/lemonade/index.js index e2da82f3..8d5a64f4 100644 --- a/server/utils/EmbeddingEngines/lemonade/index.js +++ b/server/utils/EmbeddingEngines/lemonade/index.js @@ -14,7 +14,7 @@ class LemonadeEmbedder { process.env.EMBEDDING_BASE_PATH, "openai" ), - apiKey: process.env.LEMONADE_LLM_API_KEY ?? null, + apiKey: process.env.LEMONADE_LLM_API_KEY || null, }); this.model = process.env.EMBEDDING_MODEL_PREF; diff --git a/server/utils/agentFlows/executors/llm-instruction.js b/server/utils/agentFlows/executors/llm-instruction.js index d7e4e57f..d594c547 100644 --- a/server/utils/agentFlows/executors/llm-instruction.js +++ b/server/utils/agentFlows/executors/llm-instruction.js @@ -23,13 +23,17 @@ async function executeLLMInstruction(config, context) { if (typeof input === "object") input = JSON.stringify(input); if (typeof input !== "string") input = String(input); + let completion; const provider = aibitat.getProviderForConfig(aibitat.defaultProvider); - const completion = await provider.complete([ - { - role: "user", - content: input, - }, - ]); + if (provider.supportsAgentStreaming) { + completion = await provider.stream( + [{ role: "user", content: input }], + [], + null + ); + } else { + completion = await provider.complete([{ role: "user", content: input }]); + } introspect(`Successfully received LLM response`); if (resultVariable) config.resultVariable = resultVariable; diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js index 37a285b3..c372740f 100644 --- a/server/utils/agents/aibitat/index.js +++ b/server/utils/agents/aibitat/index.js @@ -843,25 +843,18 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection ); if (completionStream.functionCall) { - if (depth >= this.maxToolCalls) { - this.handlerProps?.log?.( - `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Making final response without tools.` - ); - this?.introspect?.( - `Maximum tool call limit (${this.maxToolCalls}) reached. Generating a final response from what I have so far.` - ); - - const finalStream = await this.#safeProviderCall(() => - provider.stream(messages, [], eventHandler) - ); - const finalResponse = - finalStream?.textResponse || - "I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run."; - return finalResponse; - } - const { name, arguments: args } = completionStream.functionCall; const fn = this.functions.get(name); + const reachedToolLimit = depth >= this.maxToolCalls; + + if (reachedToolLimit) { + this.handlerProps?.log?.( + `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Executing final tool call then generating response.` + ); + this?.introspect?.( + `Maximum tool call limit (${this.maxToolCalls}) reached. After this tool I will generate a final response.` + ); + } if (!fn) { return await this.handleAsyncExecution( @@ -875,7 +868,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection originalFunctionCall: completionStream.functionCall, }, ], - functions, + reachedToolLimit ? [] : functions, byAgent, depth + 1 ); @@ -951,7 +944,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection return await this.handleAsyncExecution( provider, newMessages, - functions, + reachedToolLimit ? [] : functions, byAgent, depth + 1 ); @@ -1000,31 +993,18 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection ); if (completion.functionCall) { - if (depth >= this.maxToolCalls) { - this.handlerProps?.log?.( - `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Making final response without tools.` - ); - this?.introspect?.( - `Maximum tool call limit (${this.maxToolCalls}) reached. Generating a final response from what I have so far.` - ); - - const finalCompletion = await this.#safeProviderCall(() => - provider.complete(messages, []) - ); - eventHandler?.("reportStreamEvent", { - type: "usageMetrics", - uuid: msgUUID, - metrics: provider.getUsage(), - }); - this?.flushCitations?.(msgUUID); - return ( - finalCompletion?.textResponse || - "I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run." - ); - } - const { name, arguments: args } = completion.functionCall; const fn = this.functions.get(name); + const reachedToolLimit = depth >= this.maxToolCalls; + + if (reachedToolLimit) { + this.handlerProps?.log?.( + `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Executing final tool call then generating response.` + ); + this?.introspect?.( + `Maximum tool call limit (${this.maxToolCalls}) reached. After this tool I will generate a final response.` + ); + } if (!fn) { return await this.handleExecution( @@ -1038,7 +1018,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection originalFunctionCall: completion.functionCall, }, ], - functions, + reachedToolLimit ? [] : functions, byAgent, depth + 1, msgUUID @@ -1103,7 +1083,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection return await this.handleExecution( provider, newMessages, - functions, + reachedToolLimit ? [] : functions, byAgent, depth + 1, msgUUID diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js index 4d234767..ea8dc2b0 100644 --- a/server/utils/agents/aibitat/providers/ai-provider.js +++ b/server/utils/agents/aibitat/providers/ai-provider.js @@ -402,7 +402,7 @@ class Provider { configuration: { baseURL: process.env.LEMONADE_LLM_BASE_PATH, }, - apiKey: process.env.LEMONADE_LLM_API_KEY ?? null, + apiKey: process.env.LEMONADE_LLM_API_KEY || null, ...config, }); default: diff --git a/server/utils/agents/aibitat/providers/lemonade.js b/server/utils/agents/aibitat/providers/lemonade.js index 0e6715d5..1c47a2ac 100644 --- a/server/utils/agents/aibitat/providers/lemonade.js +++ b/server/utils/agents/aibitat/providers/lemonade.js @@ -27,7 +27,7 @@ class LemonadeProvider extends InheritMultiple([Provider, UnTooled]) { process.env.LEMONADE_LLM_BASE_PATH, "openai" ), - apiKey: process.env.LEMONADE_LLM_API_KEY ?? null, + apiKey: process.env.LEMONADE_LLM_API_KEY || null, maxRetries: 3, });