Add option to control KoboldCPP max response tokens (#3746)

add option to control koboldcpp max response tokens
This commit is contained in:
Sean Hatfield 2025-05-02 14:12:06 -07:00 committed by GitHub
parent b4511ac222
commit 8912d0f0fc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 36 additions and 0 deletions

View File

@ -22,11 +22,18 @@ export default function KoboldCPPOptions({ settings }) {
const [tokenLimit, setTokenLimit] = useState(
settings?.KoboldCPPTokenLimit || 4096
);
const [maxTokens, setMaxTokens] = useState(
settings?.KoboldCPPMaxTokens || 2048
);
const handleTokenLimitChange = (e) => {
setTokenLimit(Number(e.target.value));
};
const handleMaxTokensChange = (e) => {
setMaxTokens(Number(e.target.value));
};
return (
<div className="w-full flex flex-col gap-y-7">
<div className="w-full flex items-start gap-[36px] mt-1.5">
@ -54,6 +61,26 @@ export default function KoboldCPPOptions({ settings }) {
Maximum number of tokens for context and response.
</p>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-2">
Max response tokens
</label>
<input
type="number"
name="KoboldCPPMaxTokens"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="2048"
min={1}
value={maxTokens}
onChange={handleMaxTokensChange}
onScroll={(e) => e.target.blur()}
required={true}
autoComplete="off"
/>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Maximum number of tokens for the response.
</p>
</div>
</div>
<div className="flex justify-start mt-4">
<button

View File

@ -78,6 +78,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# KOBOLD_CPP_BASE_PATH='http://127.0.0.1:5000/v1'
# KOBOLD_CPP_MODEL_PREF='koboldcpp/codellama-7b-instruct.Q4_K_S'
# KOBOLD_CPP_MODEL_TOKEN_LIMIT=4096
# KOBOLD_CPP_MAX_TOKENS=2048
# LLM_PROVIDER='textgenwebui'
# TEXT_GEN_WEB_UI_BASE_PATH='http://127.0.0.1:5000/v1'

View File

@ -513,6 +513,7 @@ const SystemSettings = {
KoboldCPPModelPref: process.env.KOBOLD_CPP_MODEL_PREF,
KoboldCPPBasePath: process.env.KOBOLD_CPP_BASE_PATH,
KoboldCPPTokenLimit: process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT,
KoboldCPPMaxTokens: process.env.KOBOLD_CPP_MAX_TOKENS,
// Text Generation Web UI Keys
TextGenWebUIBasePath: process.env.TEXT_GEN_WEB_UI_BASE_PATH,

View File

@ -32,6 +32,7 @@ class KoboldCPPLLM {
this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
this.maxTokens = Number(process.env.KOBOLD_CPP_MAX_TOKENS) || 2048;
this.log(`Inference API: ${this.basePath} Model: ${this.model}`);
}
@ -132,6 +133,7 @@ class KoboldCPPLLM {
model: this.model,
messages,
temperature,
max_tokens: this.maxTokens,
})
.catch((e) => {
throw new Error(e.message);
@ -168,6 +170,7 @@ class KoboldCPPLLM {
stream: true,
messages,
temperature,
max_tokens: this.maxTokens,
}),
messages
);

View File

@ -163,6 +163,10 @@ const KEY_MAPPING = {
envKey: "KOBOLD_CPP_MODEL_TOKEN_LIMIT",
checks: [nonZero],
},
KoboldCPPMaxTokens: {
envKey: "KOBOLD_CPP_MAX_TOKENS",
checks: [nonZero],
},
// Text Generation Web UI Settings
TextGenWebUIBasePath: {