Merge branch 'master' of github.com:Mintplex-Labs/anything-llm

This commit is contained in:
Timothy Carambat 2026-04-08 14:17:49 -07:00
commit 82ef164314
8 changed files with 105 additions and 59 deletions

View File

@ -106,7 +106,7 @@ export default function LemonadeOptions({ settings }) {
type="url" type="url"
name="LemonadeLLMBasePath" name="LemonadeLLMBasePath"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5" className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="http://localhost:8000" placeholder="http://localhost:13305"
value={cleanBasePath(basePathValue.value)} value={cleanBasePath(basePathValue.value)}
required={true} required={true}
autoComplete="off" autoComplete="off"
@ -150,7 +150,7 @@ export default function LemonadeOptions({ settings }) {
type="number" type="number"
name="LemonadeLLMModelTokenLimit" name="LemonadeLLMModelTokenLimit"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5" className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="4096" placeholder="8192"
min={1} min={1}
value={maxTokens} value={maxTokens}
onChange={(e) => setMaxTokens(Number(e.target.value))} onChange={(e) => setMaxTokens(Number(e.target.value))}

View File

@ -67,6 +67,12 @@ export const LEMONADE_COMMON_URLS = [
"http://127.0.0.1:8000/live", "http://127.0.0.1:8000/live",
"http://host.docker.internal:8000/live", "http://host.docker.internal:8000/live",
"http://172.17.0.1:8000/live", "http://172.17.0.1:8000/live",
// In Lemonade 10.1.0 the base port is 13305
"http://localhost:13305/live",
"http://127.0.0.1:13305/live",
"http://host.docker.internal:13305/live",
"http://172.17.0.1:13305/live",
]; ];
export function fullApiUrl() { export function fullApiUrl() {

View File

@ -22,7 +22,7 @@ class LemonadeLLM {
process.env.LEMONADE_LLM_BASE_PATH, process.env.LEMONADE_LLM_BASE_PATH,
"openai" "openai"
), ),
apiKey: process.env.LEMONADE_LLM_API_KEY ?? null, apiKey: process.env.LEMONADE_LLM_API_KEY || null,
}); });
this.model = modelPreference || process.env.LEMONADE_LLM_MODEL_PREF; this.model = modelPreference || process.env.LEMONADE_LLM_MODEL_PREF;
@ -202,7 +202,7 @@ class LemonadeLLM {
process.env.LEMONADE_LLM_BASE_PATH, process.env.LEMONADE_LLM_BASE_PATH,
"openai" "openai"
), ),
apiKey: process.env.LEMONADE_LLM_API_KEY ?? null, apiKey: process.env.LEMONADE_LLM_API_KEY || null,
}); });
const { labels = [] } = await client.models.retrieve(this.model); const { labels = [] } = await client.models.retrieve(this.model);
@ -223,6 +223,41 @@ class LemonadeLLM {
} }
} }
/**
* Get the currently loaded models from the Lemonade server.
* @returns {Promise<string[]>}
*/
static async getCurrentlyLoadedModels() {
const endpoint = new URL(
parseLemonadeServerEndpoint(process.env.LEMONADE_LLM_BASE_PATH, "openai")
);
endpoint.pathname += "/health";
const loadedModels = await fetch(endpoint.toString(), {
method: "GET",
headers: {
...(process.env.LEMONADE_LLM_API_KEY
? { Authorization: `Bearer ${process.env.LEMONADE_LLM_API_KEY}` }
: {}),
},
})
.then((response) => {
if (!response.ok)
throw new Error(
`Failed to get currently loaded models: ${response.statusText}`
);
return response.json();
})
.then(({ all_models_loaded = [] } = {}) => {
return all_models_loaded.map((model) => {
return {
model_name: model.model_name,
ctx_size: model?.recipe_options?.ctx_size ?? 8192,
};
});
});
return loadedModels;
}
/** /**
* Utility function to load a model from the Lemonade server. * Utility function to load a model from the Lemonade server.
* Does not check if the model is already loaded or unloads any models. * Does not check if the model is already loaded or unloads any models.
@ -230,12 +265,33 @@ class LemonadeLLM {
*/ */
static async loadModel(model, basePath = process.env.LEMONADE_LLM_BASE_PATH) { static async loadModel(model, basePath = process.env.LEMONADE_LLM_BASE_PATH) {
try { try {
const desiredCtxSize = Number(this.promptWindowLimit());
const currentlyLoadedModels =
await LemonadeLLM.getCurrentlyLoadedModels();
const modelAlreadyLoaded = currentlyLoadedModels.find(
(m) => m.model_name === model
);
if (modelAlreadyLoaded) {
if (modelAlreadyLoaded.ctx_size === desiredCtxSize) {
LemonadeLLM.slog(
`Model ${model} already loaded with ctx size ${desiredCtxSize}`
);
return true;
}
LemonadeLLM.slog(
`Model ${model} needs to be reloaded again with ctx size ${desiredCtxSize}`
);
}
const endpoint = new URL(parseLemonadeServerEndpoint(basePath, "openai")); const endpoint = new URL(parseLemonadeServerEndpoint(basePath, "openai"));
endpoint.pathname += "/load"; endpoint.pathname += "/load";
LemonadeLLM.slog( LemonadeLLM.slog(
`Loading model ${model} with context size ${this.promptWindowLimit()}` `Loading model ${model} with context size ${desiredCtxSize}`
); );
await fetch(endpoint.toString(), { await fetch(endpoint.toString(), {
method: "POST", method: "POST",
headers: { headers: {
@ -246,7 +302,7 @@ class LemonadeLLM {
}, },
body: JSON.stringify({ body: JSON.stringify({
model_name: String(model), model_name: String(model),
ctx_size: Number(this.promptWindowLimit()), ctx_size: desiredCtxSize,
}), }),
}) })
.then((response) => { .then((response) => {

View File

@ -14,7 +14,7 @@ class LemonadeEmbedder {
process.env.EMBEDDING_BASE_PATH, process.env.EMBEDDING_BASE_PATH,
"openai" "openai"
), ),
apiKey: process.env.LEMONADE_LLM_API_KEY ?? null, apiKey: process.env.LEMONADE_LLM_API_KEY || null,
}); });
this.model = process.env.EMBEDDING_MODEL_PREF; this.model = process.env.EMBEDDING_MODEL_PREF;

View File

@ -23,13 +23,17 @@ async function executeLLMInstruction(config, context) {
if (typeof input === "object") input = JSON.stringify(input); if (typeof input === "object") input = JSON.stringify(input);
if (typeof input !== "string") input = String(input); if (typeof input !== "string") input = String(input);
let completion;
const provider = aibitat.getProviderForConfig(aibitat.defaultProvider); const provider = aibitat.getProviderForConfig(aibitat.defaultProvider);
const completion = await provider.complete([ if (provider.supportsAgentStreaming) {
{ completion = await provider.stream(
role: "user", [{ role: "user", content: input }],
content: input, [],
}, null
]); );
} else {
completion = await provider.complete([{ role: "user", content: input }]);
}
introspect(`Successfully received LLM response`); introspect(`Successfully received LLM response`);
if (resultVariable) config.resultVariable = resultVariable; if (resultVariable) config.resultVariable = resultVariable;

View File

@ -843,25 +843,18 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
); );
if (completionStream.functionCall) { if (completionStream.functionCall) {
if (depth >= this.maxToolCalls) {
this.handlerProps?.log?.(
`[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Making final response without tools.`
);
this?.introspect?.(
`Maximum tool call limit (${this.maxToolCalls}) reached. Generating a final response from what I have so far.`
);
const finalStream = await this.#safeProviderCall(() =>
provider.stream(messages, [], eventHandler)
);
const finalResponse =
finalStream?.textResponse ||
"I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run.";
return finalResponse;
}
const { name, arguments: args } = completionStream.functionCall; const { name, arguments: args } = completionStream.functionCall;
const fn = this.functions.get(name); const fn = this.functions.get(name);
const reachedToolLimit = depth >= this.maxToolCalls;
if (reachedToolLimit) {
this.handlerProps?.log?.(
`[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Executing final tool call then generating response.`
);
this?.introspect?.(
`Maximum tool call limit (${this.maxToolCalls}) reached. After this tool I will generate a final response.`
);
}
if (!fn) { if (!fn) {
return await this.handleAsyncExecution( return await this.handleAsyncExecution(
@ -875,7 +868,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
originalFunctionCall: completionStream.functionCall, originalFunctionCall: completionStream.functionCall,
}, },
], ],
functions, reachedToolLimit ? [] : functions,
byAgent, byAgent,
depth + 1 depth + 1
); );
@ -951,7 +944,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
return await this.handleAsyncExecution( return await this.handleAsyncExecution(
provider, provider,
newMessages, newMessages,
functions, reachedToolLimit ? [] : functions,
byAgent, byAgent,
depth + 1 depth + 1
); );
@ -1000,31 +993,18 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
); );
if (completion.functionCall) { if (completion.functionCall) {
if (depth >= this.maxToolCalls) {
this.handlerProps?.log?.(
`[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Making final response without tools.`
);
this?.introspect?.(
`Maximum tool call limit (${this.maxToolCalls}) reached. Generating a final response from what I have so far.`
);
const finalCompletion = await this.#safeProviderCall(() =>
provider.complete(messages, [])
);
eventHandler?.("reportStreamEvent", {
type: "usageMetrics",
uuid: msgUUID,
metrics: provider.getUsage(),
});
this?.flushCitations?.(msgUUID);
return (
finalCompletion?.textResponse ||
"I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run."
);
}
const { name, arguments: args } = completion.functionCall; const { name, arguments: args } = completion.functionCall;
const fn = this.functions.get(name); const fn = this.functions.get(name);
const reachedToolLimit = depth >= this.maxToolCalls;
if (reachedToolLimit) {
this.handlerProps?.log?.(
`[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Executing final tool call then generating response.`
);
this?.introspect?.(
`Maximum tool call limit (${this.maxToolCalls}) reached. After this tool I will generate a final response.`
);
}
if (!fn) { if (!fn) {
return await this.handleExecution( return await this.handleExecution(
@ -1038,7 +1018,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
originalFunctionCall: completion.functionCall, originalFunctionCall: completion.functionCall,
}, },
], ],
functions, reachedToolLimit ? [] : functions,
byAgent, byAgent,
depth + 1, depth + 1,
msgUUID msgUUID
@ -1103,7 +1083,7 @@ https://docs.anythingllm.com/agent/intelligent-tool-selection
return await this.handleExecution( return await this.handleExecution(
provider, provider,
newMessages, newMessages,
functions, reachedToolLimit ? [] : functions,
byAgent, byAgent,
depth + 1, depth + 1,
msgUUID msgUUID

View File

@ -402,7 +402,7 @@ class Provider {
configuration: { configuration: {
baseURL: process.env.LEMONADE_LLM_BASE_PATH, baseURL: process.env.LEMONADE_LLM_BASE_PATH,
}, },
apiKey: process.env.LEMONADE_LLM_API_KEY ?? null, apiKey: process.env.LEMONADE_LLM_API_KEY || null,
...config, ...config,
}); });
default: default:

View File

@ -27,7 +27,7 @@ class LemonadeProvider extends InheritMultiple([Provider, UnTooled]) {
process.env.LEMONADE_LLM_BASE_PATH, process.env.LEMONADE_LLM_BASE_PATH,
"openai" "openai"
), ),
apiKey: process.env.LEMONADE_LLM_API_KEY ?? null, apiKey: process.env.LEMONADE_LLM_API_KEY || null,
maxRetries: 3, maxRetries: 3,
}); });