Add option to control KoboldCPP max response tokens (#3746)
add option to control koboldcpp max response tokens
This commit is contained in:
parent
b4511ac222
commit
8912d0f0fc
@ -22,11 +22,18 @@ export default function KoboldCPPOptions({ settings }) {
|
||||
const [tokenLimit, setTokenLimit] = useState(
|
||||
settings?.KoboldCPPTokenLimit || 4096
|
||||
);
|
||||
const [maxTokens, setMaxTokens] = useState(
|
||||
settings?.KoboldCPPMaxTokens || 2048
|
||||
);
|
||||
|
||||
const handleTokenLimitChange = (e) => {
|
||||
setTokenLimit(Number(e.target.value));
|
||||
};
|
||||
|
||||
const handleMaxTokensChange = (e) => {
|
||||
setMaxTokens(Number(e.target.value));
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="w-full flex flex-col gap-y-7">
|
||||
<div className="w-full flex items-start gap-[36px] mt-1.5">
|
||||
@ -54,6 +61,26 @@ export default function KoboldCPPOptions({ settings }) {
|
||||
Maximum number of tokens for context and response.
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-2">
|
||||
Max response tokens
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
name="KoboldCPPMaxTokens"
|
||||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="2048"
|
||||
min={1}
|
||||
value={maxTokens}
|
||||
onChange={handleMaxTokensChange}
|
||||
onScroll={(e) => e.target.blur()}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
/>
|
||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||
Maximum number of tokens for the response.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex justify-start mt-4">
|
||||
<button
|
||||
|
||||
@ -78,6 +78,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
|
||||
# KOBOLD_CPP_BASE_PATH='http://127.0.0.1:5000/v1'
|
||||
# KOBOLD_CPP_MODEL_PREF='koboldcpp/codellama-7b-instruct.Q4_K_S'
|
||||
# KOBOLD_CPP_MODEL_TOKEN_LIMIT=4096
|
||||
# KOBOLD_CPP_MAX_TOKENS=2048
|
||||
|
||||
# LLM_PROVIDER='textgenwebui'
|
||||
# TEXT_GEN_WEB_UI_BASE_PATH='http://127.0.0.1:5000/v1'
|
||||
|
||||
@ -513,6 +513,7 @@ const SystemSettings = {
|
||||
KoboldCPPModelPref: process.env.KOBOLD_CPP_MODEL_PREF,
|
||||
KoboldCPPBasePath: process.env.KOBOLD_CPP_BASE_PATH,
|
||||
KoboldCPPTokenLimit: process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT,
|
||||
KoboldCPPMaxTokens: process.env.KOBOLD_CPP_MAX_TOKENS,
|
||||
|
||||
// Text Generation Web UI Keys
|
||||
TextGenWebUIBasePath: process.env.TEXT_GEN_WEB_UI_BASE_PATH,
|
||||
|
||||
@ -32,6 +32,7 @@ class KoboldCPPLLM {
|
||||
|
||||
this.embedder = embedder ?? new NativeEmbedder();
|
||||
this.defaultTemp = 0.7;
|
||||
this.maxTokens = Number(process.env.KOBOLD_CPP_MAX_TOKENS) || 2048;
|
||||
this.log(`Inference API: ${this.basePath} Model: ${this.model}`);
|
||||
}
|
||||
|
||||
@ -132,6 +133,7 @@ class KoboldCPPLLM {
|
||||
model: this.model,
|
||||
messages,
|
||||
temperature,
|
||||
max_tokens: this.maxTokens,
|
||||
})
|
||||
.catch((e) => {
|
||||
throw new Error(e.message);
|
||||
@ -168,6 +170,7 @@ class KoboldCPPLLM {
|
||||
stream: true,
|
||||
messages,
|
||||
temperature,
|
||||
max_tokens: this.maxTokens,
|
||||
}),
|
||||
messages
|
||||
);
|
||||
|
||||
@ -163,6 +163,10 @@ const KEY_MAPPING = {
|
||||
envKey: "KOBOLD_CPP_MODEL_TOKEN_LIMIT",
|
||||
checks: [nonZero],
|
||||
},
|
||||
KoboldCPPMaxTokens: {
|
||||
envKey: "KOBOLD_CPP_MAX_TOKENS",
|
||||
checks: [nonZero],
|
||||
},
|
||||
|
||||
// Text Generation Web UI Settings
|
||||
TextGenWebUIBasePath: {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user