Add option to control KoboldCPP max response tokens (#3746)

add option to control koboldcpp max response tokens
2025-05-02 14:12:06 -07:00 · 2025-05-02 14:12:06 -07:00 · 8912d0f0fc
commit 8912d0f0fc
parent b4511ac222
5 changed files with 36 additions and 0 deletions
--- a/frontend/src/components/LLMSelection/KoboldCPPOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/KoboldCPPOptions/index.jsx
@ -22,11 +22,18 @@ export default function KoboldCPPOptions({ settings }) {
  const [tokenLimit, setTokenLimit] = useState(
    settings?.KoboldCPPTokenLimit || 4096
  );
+  const [maxTokens, setMaxTokens] = useState(
+    settings?.KoboldCPPMaxTokens || 2048
+  );

  const handleTokenLimitChange = (e) => {
    setTokenLimit(Number(e.target.value));
  };

+  const handleMaxTokensChange = (e) => {
+    setMaxTokens(Number(e.target.value));
+  };
+
  return (
    <div className="w-full flex flex-col gap-y-7">
      <div className="w-full flex items-start gap-[36px] mt-1.5">
@ -54,6 +61,26 @@ export default function KoboldCPPOptions({ settings }) {
            Maximum number of tokens for context and response.
          </p>
        </div>
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-2">
+            Max response tokens
+          </label>
+          <input
+            type="number"
+            name="KoboldCPPMaxTokens"
+            className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
+            placeholder="2048"
+            min={1}
+            value={maxTokens}
+            onChange={handleMaxTokensChange}
+            onScroll={(e) => e.target.blur()}
+            required={true}
+            autoComplete="off"
+          />
+          <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
+            Maximum number of tokens for the response.
+          </p>
+        </div>
      </div>
      <div className="flex justify-start mt-4">
        <button
--- a/server/.env.example
+++ b/server/.env.example
@ -78,6 +78,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
 # KOBOLD_CPP_BASE_PATH='http://127.0.0.1:5000/v1'
 # KOBOLD_CPP_MODEL_PREF='koboldcpp/codellama-7b-instruct.Q4_K_S'
 # KOBOLD_CPP_MODEL_TOKEN_LIMIT=4096
+# KOBOLD_CPP_MAX_TOKENS=2048

 # LLM_PROVIDER='textgenwebui'
 # TEXT_GEN_WEB_UI_BASE_PATH='http://127.0.0.1:5000/v1'
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -513,6 +513,7 @@ const SystemSettings = {
      KoboldCPPModelPref: process.env.KOBOLD_CPP_MODEL_PREF,
      KoboldCPPBasePath: process.env.KOBOLD_CPP_BASE_PATH,
      KoboldCPPTokenLimit: process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT,
+      KoboldCPPMaxTokens: process.env.KOBOLD_CPP_MAX_TOKENS,

      // Text Generation Web UI Keys
      TextGenWebUIBasePath: process.env.TEXT_GEN_WEB_UI_BASE_PATH,
--- a/server/utils/AiProviders/koboldCPP/index.js
+++ b/server/utils/AiProviders/koboldCPP/index.js
@ -32,6 +32,7 @@ class KoboldCPPLLM {

    this.embedder = embedder ?? new NativeEmbedder();
    this.defaultTemp = 0.7;
+    this.maxTokens = Number(process.env.KOBOLD_CPP_MAX_TOKENS) || 2048;
    this.log(`Inference API: ${this.basePath} Model: ${this.model}`);
  }

@ -132,6 +133,7 @@ class KoboldCPPLLM {
          model: this.model,
          messages,
          temperature,
+          max_tokens: this.maxTokens,
        })
        .catch((e) => {
          throw new Error(e.message);
@ -168,6 +170,7 @@ class KoboldCPPLLM {
        stream: true,
        messages,
        temperature,
+        max_tokens: this.maxTokens,
      }),
      messages
    );
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -163,6 +163,10 @@ const KEY_MAPPING = {
    envKey: "KOBOLD_CPP_MODEL_TOKEN_LIMIT",
    checks: [nonZero],
  },
+  KoboldCPPMaxTokens: {
+    envKey: "KOBOLD_CPP_MAX_TOKENS",
+    checks: [nonZero],
+  },

  // Text Generation Web UI Settings
  TextGenWebUIBasePath: {