patch out max_tokens from block output amount

resolves #3421
This commit is contained in:
Timothy Carambat 2026-02-12 14:20:08 -08:00
parent 6f443ce11a
commit 5fb1281891
2 changed files with 2 additions and 4 deletions

View File

@ -40,7 +40,7 @@ export default function LiteLLMOptions({ settings }) {
type="number"
name="LiteLLMTokenLimit"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="4096"
placeholder="8192"
min={1}
onScroll={(e) => e.target.blur()}
defaultValue={settings?.LiteLLMTokenLimit}

View File

@ -22,7 +22,7 @@ class LiteLLM {
apiKey: process.env.LITE_LLM_API_KEY ?? null,
});
this.model = modelPreference ?? process.env.LITE_LLM_MODEL_PREF ?? null;
this.maxTokens = process.env.LITE_LLM_MODEL_TOKEN_LIMIT ?? 1024;
if (!this.model) throw new Error("LiteLLM must have a valid model set.");
this.limits = {
history: this.promptWindowLimit() * 0.15,
@ -132,7 +132,6 @@ class LiteLLM {
model: this.model,
messages,
temperature,
max_tokens: parseInt(this.maxTokens), // LiteLLM requires int
})
.catch((e) => {
throw new Error(e.message);
@ -168,7 +167,6 @@ class LiteLLM {
stream: true,
messages,
temperature,
max_tokens: parseInt(this.maxTokens), // LiteLLM requires int
}),
messages,
runPromptTokenCalculation: true,