diff --git a/docker/.env.example b/docker/.env.example
index 35731600..4db7aeff 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -27,6 +27,7 @@ GID='1000'
# LLM_PROVIDER='anthropic'
# ANTHROPIC_API_KEY=sk-ant-xxxx
# ANTHROPIC_MODEL_PREF='claude-2'
+# ANTHROPIC_CACHE_CONTROL="5m" # Enable prompt caching (5m=5min cache, 1h=1hour cache). Reduces costs and improves speed by caching system prompts.
# LLM_PROVIDER='lmstudio'
# LMSTUDIO_BASE_PATH='http://your-server:1234/v1'
diff --git a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx
index 95cda6ef..478e1398 100644
--- a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx
@@ -1,7 +1,9 @@
import { useState, useEffect } from "react";
import System from "@/models/system";
+import { CaretDown, CaretUp } from "@phosphor-icons/react";
export default function AnthropicAiOptions({ settings }) {
+ const [showAdvancedControls, setShowAdvancedControls] = useState(false);
const [inputValue, setInputValue] = useState(settings?.AnthropicApiKey);
const [anthropicApiKey, setAnthropicApiKey] = useState(
settings?.AnthropicApiKey
@@ -27,7 +29,6 @@ export default function AnthropicAiOptions({ settings }) {
onBlur={() => setAnthropicApiKey(inputValue)}
/>
-
{!settings?.credentialsOnly && (
)}
+
+
+
+
+
+
+
+
+
+
+
+
+
);
}
diff --git a/server/.env.example b/server/.env.example
index 8b5c2ea8..aecd0e14 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -24,6 +24,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# LLM_PROVIDER='anthropic'
# ANTHROPIC_API_KEY=sk-ant-xxxx
# ANTHROPIC_MODEL_PREF='claude-2'
+# ANTHROPIC_CACHE_CONTROL="5m" # Enable prompt caching (5m=5min cache, 1h=1hour cache). Reduces costs and improves speed by caching system prompts.
# LLM_PROVIDER='lmstudio'
# LMSTUDIO_BASE_PATH='http://your-server:1234/v1'
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index 9015257c..ce3a0063 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -481,6 +481,7 @@ const SystemSettings = {
// Anthropic Keys
AnthropicApiKey: !!process.env.ANTHROPIC_API_KEY,
AnthropicModelPref: process.env.ANTHROPIC_MODEL_PREF || "claude-2",
+ AnthropicCacheControl: process.env.ANTHROPIC_CACHE_CONTROL || "none",
// Gemini Keys
GeminiLLMApiKey: !!process.env.GEMINI_API_KEY,
diff --git a/server/utils/AiProviders/anthropic/index.js b/server/utils/AiProviders/anthropic/index.js
index 2170ba83..1dd1f290 100644
--- a/server/utils/AiProviders/anthropic/index.js
+++ b/server/utils/AiProviders/anthropic/index.js
@@ -34,7 +34,9 @@ class AnthropicLLM {
this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
- this.log(`Initialized with ${this.model}`);
+ this.log(
+ `Initialized with ${this.model}. Cache ${this.cacheControl ? `enabled (${this.cacheControl.ttl})` : "disabled"}`
+ );
}
log(text, ...args) {
@@ -57,6 +59,48 @@ class AnthropicLLM {
return true;
}
+ /**
+ * Parses the cache control ENV variable
+ *
+ * If caching is enabled, we can pass less than 1024 tokens and Anthropic will just
+ * ignore it unless it is above the model's minimum. Since this feature is opt-in
+ * we can safely assume that if caching is enabled that we should just pass the content as is.
+ * https://docs.claude.com/en/docs/build-with-claude/prompt-caching#cache-limitations
+ *
+ * @param {string} value - The ENV value (5m or 1h)
+ * @returns {null|{type: "ephemeral", ttl: "5m" | "1h"}} Cache control configuration
+ */
+ get cacheControl() {
+ // Store result in instance variable to avoid recalculating
+ if (this._cacheControl) return this._cacheControl;
+
+ if (!process.env.ANTHROPIC_CACHE_CONTROL) this._cacheControl = null;
+ else {
+ const normalized =
+ process.env.ANTHROPIC_CACHE_CONTROL.toLowerCase().trim();
+ if (["5m", "1h"].includes(normalized))
+ this._cacheControl = { type: "ephemeral", ttl: normalized };
+ else this._cacheControl = null;
+ }
+ return this._cacheControl;
+ }
+
+ /**
+ * Builds system parameter with cache control if applicable
+ * @param {string} systemContent - The system prompt content
+ * @returns {string|array} System parameter for API call
+ */
+ #buildSystemPrompt(systemContent) {
+ if (!systemContent || !this.cacheControl) return systemContent;
+ return [
+ {
+ type: "text",
+ text: systemContent,
+ cache_control: this.cacheControl,
+ },
+ ];
+ }
+
/**
* Generates appropriate content array for a message + attachments.
* @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
@@ -105,11 +149,12 @@ class AnthropicLLM {
async getChatCompletion(messages = null, { temperature = 0.7 }) {
try {
+ const systemContent = messages[0].content;
const result = await LLMPerformanceMonitor.measureAsyncFunction(
this.anthropic.messages.create({
model: this.model,
max_tokens: 4096,
- system: messages[0].content, // Strip out the system message
+ system: this.#buildSystemPrompt(systemContent),
messages: messages.slice(1), // Pop off the system message
temperature: Number(temperature ?? this.defaultTemp),
})
@@ -117,6 +162,7 @@ class AnthropicLLM {
const promptTokens = result.output.usage.input_tokens;
const completionTokens = result.output.usage.output_tokens;
+
return {
textResponse: result.output.content[0].text,
metrics: {
@@ -134,11 +180,12 @@ class AnthropicLLM {
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+ const systemContent = messages[0].content;
const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
this.anthropic.messages.stream({
model: this.model,
max_tokens: 4096,
- system: messages[0].content, // Strip out the system message
+ system: this.#buildSystemPrompt(systemContent),
messages: messages.slice(1), // Pop off the system message
temperature: Number(temperature ?? this.defaultTemp),
}),
diff --git a/server/utils/agents/aibitat/providers/anthropic.js b/server/utils/agents/aibitat/providers/anthropic.js
index 815ac88e..968f6144 100644
--- a/server/utils/agents/aibitat/providers/anthropic.js
+++ b/server/utils/agents/aibitat/providers/anthropic.js
@@ -23,14 +23,55 @@ class AnthropicProvider extends Provider {
const client = new Anthropic(options);
super(client);
-
this.model = model;
}
+ /**
+ * Parses the cache control ENV variable
+ *
+ * If caching is enabled, we can pass less than 1024 tokens and Anthropic will just
+ * ignore it unless it is above the model's minimum. Since this feature is opt-in
+ * we can safely assume that if caching is enabled that we should just pass the content as is.
+ * https://docs.claude.com/en/docs/build-with-claude/prompt-caching#cache-limitations
+ *
+ * @param {string} value - The ENV value (5m or 1h)
+ * @returns {null|{type: "ephemeral", ttl: "5m" | "1h"}} Cache control configuration
+ */
+ get cacheControl() {
+ // Store result in instance variable to avoid recalculating
+ if (this._cacheControl) return this._cacheControl;
+
+ if (!process.env.ANTHROPIC_CACHE_CONTROL) this._cacheControl = null;
+ else {
+ const normalized =
+ process.env.ANTHROPIC_CACHE_CONTROL.toLowerCase().trim();
+ if (["5m", "1h"].includes(normalized))
+ this._cacheControl = { type: "ephemeral", ttl: normalized };
+ else this._cacheControl = null;
+ }
+ return this._cacheControl;
+ }
+
get supportsAgentStreaming() {
return true;
}
+ /**
+ * Builds system parameter with cache control if applicable
+ * @param {string} systemContent - The system prompt content
+ * @returns {string|array} System parameter for API call
+ */
+ #buildSystemPrompt(systemContent) {
+ if (!systemContent || !this.cacheControl) return systemContent;
+ return [
+ {
+ type: "text",
+ text: systemContent,
+ cache_control: this.cacheControl,
+ },
+ ];
+ }
+
#prepareMessages(messages = []) {
// Extract system prompt and filter out any system messages from the main chat.
let systemPrompt =
@@ -149,7 +190,7 @@ class AnthropicProvider extends Provider {
{
model: this.model,
max_tokens: 4096,
- system: systemPrompt,
+ system: this.#buildSystemPrompt(systemPrompt),
messages: chats,
stream: true,
...(Array.isArray(functions) && functions?.length > 0
@@ -276,7 +317,7 @@ class AnthropicProvider extends Provider {
{
model: this.model,
max_tokens: 4096,
- system: systemPrompt,
+ system: this.#buildSystemPrompt(systemPrompt),
messages: chats,
stream: false,
...(Array.isArray(functions) && functions?.length > 0
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index be92498a..3f34c1d7 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -58,6 +58,15 @@ const KEY_MAPPING = {
envKey: "ANTHROPIC_MODEL_PREF",
checks: [isNotEmpty],
},
+ AnthropicCacheControl: {
+ envKey: "ANTHROPIC_CACHE_CONTROL",
+ checks: [
+ (input) =>
+ ["none", "5m", "1h"].includes(input)
+ ? null
+ : "Invalid cache control. Must be one of: 5m, 1h.",
+ ],
+ },
GeminiLLMApiKey: {
envKey: "GEMINI_API_KEY",