diff --git a/docker/.env.example b/docker/.env.example index 35731600..4db7aeff 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -27,6 +27,7 @@ GID='1000' # LLM_PROVIDER='anthropic' # ANTHROPIC_API_KEY=sk-ant-xxxx # ANTHROPIC_MODEL_PREF='claude-2' +# ANTHROPIC_CACHE_CONTROL="5m" # Enable prompt caching (5m=5min cache, 1h=1hour cache). Reduces costs and improves speed by caching system prompts. # LLM_PROVIDER='lmstudio' # LMSTUDIO_BASE_PATH='http://your-server:1234/v1' diff --git a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx index 95cda6ef..478e1398 100644 --- a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx +++ b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx @@ -1,7 +1,9 @@ import { useState, useEffect } from "react"; import System from "@/models/system"; +import { CaretDown, CaretUp } from "@phosphor-icons/react"; export default function AnthropicAiOptions({ settings }) { + const [showAdvancedControls, setShowAdvancedControls] = useState(false); const [inputValue, setInputValue] = useState(settings?.AnthropicApiKey); const [anthropicApiKey, setAnthropicApiKey] = useState( settings?.AnthropicApiKey @@ -27,7 +29,6 @@ export default function AnthropicAiOptions({ settings }) { onBlur={() => setAnthropicApiKey(inputValue)} /> - {!settings?.credentialsOnly && ( )} +
+ +
+ ); } diff --git a/server/.env.example b/server/.env.example index 8b5c2ea8..aecd0e14 100644 --- a/server/.env.example +++ b/server/.env.example @@ -24,6 +24,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # LLM_PROVIDER='anthropic' # ANTHROPIC_API_KEY=sk-ant-xxxx # ANTHROPIC_MODEL_PREF='claude-2' +# ANTHROPIC_CACHE_CONTROL="5m" # Enable prompt caching (5m=5min cache, 1h=1hour cache). Reduces costs and improves speed by caching system prompts. # LLM_PROVIDER='lmstudio' # LMSTUDIO_BASE_PATH='http://your-server:1234/v1' diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 9015257c..ce3a0063 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -481,6 +481,7 @@ const SystemSettings = { // Anthropic Keys AnthropicApiKey: !!process.env.ANTHROPIC_API_KEY, AnthropicModelPref: process.env.ANTHROPIC_MODEL_PREF || "claude-2", + AnthropicCacheControl: process.env.ANTHROPIC_CACHE_CONTROL || "none", // Gemini Keys GeminiLLMApiKey: !!process.env.GEMINI_API_KEY, diff --git a/server/utils/AiProviders/anthropic/index.js b/server/utils/AiProviders/anthropic/index.js index 2170ba83..1dd1f290 100644 --- a/server/utils/AiProviders/anthropic/index.js +++ b/server/utils/AiProviders/anthropic/index.js @@ -34,7 +34,9 @@ class AnthropicLLM { this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; - this.log(`Initialized with ${this.model}`); + this.log( + `Initialized with ${this.model}. Cache ${this.cacheControl ? `enabled (${this.cacheControl.ttl})` : "disabled"}` + ); } log(text, ...args) { @@ -57,6 +59,48 @@ class AnthropicLLM { return true; } + /** + * Parses the cache control ENV variable + * + * If caching is enabled, we can pass less than 1024 tokens and Anthropic will just + * ignore it unless it is above the model's minimum. Since this feature is opt-in + * we can safely assume that if caching is enabled that we should just pass the content as is. + * https://docs.claude.com/en/docs/build-with-claude/prompt-caching#cache-limitations + * + * @param {string} value - The ENV value (5m or 1h) + * @returns {null|{type: "ephemeral", ttl: "5m" | "1h"}} Cache control configuration + */ + get cacheControl() { + // Store result in instance variable to avoid recalculating + if (this._cacheControl) return this._cacheControl; + + if (!process.env.ANTHROPIC_CACHE_CONTROL) this._cacheControl = null; + else { + const normalized = + process.env.ANTHROPIC_CACHE_CONTROL.toLowerCase().trim(); + if (["5m", "1h"].includes(normalized)) + this._cacheControl = { type: "ephemeral", ttl: normalized }; + else this._cacheControl = null; + } + return this._cacheControl; + } + + /** + * Builds system parameter with cache control if applicable + * @param {string} systemContent - The system prompt content + * @returns {string|array} System parameter for API call + */ + #buildSystemPrompt(systemContent) { + if (!systemContent || !this.cacheControl) return systemContent; + return [ + { + type: "text", + text: systemContent, + cache_control: this.cacheControl, + }, + ]; + } + /** * Generates appropriate content array for a message + attachments. * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}} @@ -105,11 +149,12 @@ class AnthropicLLM { async getChatCompletion(messages = null, { temperature = 0.7 }) { try { + const systemContent = messages[0].content; const result = await LLMPerformanceMonitor.measureAsyncFunction( this.anthropic.messages.create({ model: this.model, max_tokens: 4096, - system: messages[0].content, // Strip out the system message + system: this.#buildSystemPrompt(systemContent), messages: messages.slice(1), // Pop off the system message temperature: Number(temperature ?? this.defaultTemp), }) @@ -117,6 +162,7 @@ class AnthropicLLM { const promptTokens = result.output.usage.input_tokens; const completionTokens = result.output.usage.output_tokens; + return { textResponse: result.output.content[0].text, metrics: { @@ -134,11 +180,12 @@ class AnthropicLLM { } async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + const systemContent = messages[0].content; const measuredStreamRequest = await LLMPerformanceMonitor.measureStream( this.anthropic.messages.stream({ model: this.model, max_tokens: 4096, - system: messages[0].content, // Strip out the system message + system: this.#buildSystemPrompt(systemContent), messages: messages.slice(1), // Pop off the system message temperature: Number(temperature ?? this.defaultTemp), }), diff --git a/server/utils/agents/aibitat/providers/anthropic.js b/server/utils/agents/aibitat/providers/anthropic.js index 815ac88e..968f6144 100644 --- a/server/utils/agents/aibitat/providers/anthropic.js +++ b/server/utils/agents/aibitat/providers/anthropic.js @@ -23,14 +23,55 @@ class AnthropicProvider extends Provider { const client = new Anthropic(options); super(client); - this.model = model; } + /** + * Parses the cache control ENV variable + * + * If caching is enabled, we can pass less than 1024 tokens and Anthropic will just + * ignore it unless it is above the model's minimum. Since this feature is opt-in + * we can safely assume that if caching is enabled that we should just pass the content as is. + * https://docs.claude.com/en/docs/build-with-claude/prompt-caching#cache-limitations + * + * @param {string} value - The ENV value (5m or 1h) + * @returns {null|{type: "ephemeral", ttl: "5m" | "1h"}} Cache control configuration + */ + get cacheControl() { + // Store result in instance variable to avoid recalculating + if (this._cacheControl) return this._cacheControl; + + if (!process.env.ANTHROPIC_CACHE_CONTROL) this._cacheControl = null; + else { + const normalized = + process.env.ANTHROPIC_CACHE_CONTROL.toLowerCase().trim(); + if (["5m", "1h"].includes(normalized)) + this._cacheControl = { type: "ephemeral", ttl: normalized }; + else this._cacheControl = null; + } + return this._cacheControl; + } + get supportsAgentStreaming() { return true; } + /** + * Builds system parameter with cache control if applicable + * @param {string} systemContent - The system prompt content + * @returns {string|array} System parameter for API call + */ + #buildSystemPrompt(systemContent) { + if (!systemContent || !this.cacheControl) return systemContent; + return [ + { + type: "text", + text: systemContent, + cache_control: this.cacheControl, + }, + ]; + } + #prepareMessages(messages = []) { // Extract system prompt and filter out any system messages from the main chat. let systemPrompt = @@ -149,7 +190,7 @@ class AnthropicProvider extends Provider { { model: this.model, max_tokens: 4096, - system: systemPrompt, + system: this.#buildSystemPrompt(systemPrompt), messages: chats, stream: true, ...(Array.isArray(functions) && functions?.length > 0 @@ -276,7 +317,7 @@ class AnthropicProvider extends Provider { { model: this.model, max_tokens: 4096, - system: systemPrompt, + system: this.#buildSystemPrompt(systemPrompt), messages: chats, stream: false, ...(Array.isArray(functions) && functions?.length > 0 diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index be92498a..3f34c1d7 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -58,6 +58,15 @@ const KEY_MAPPING = { envKey: "ANTHROPIC_MODEL_PREF", checks: [isNotEmpty], }, + AnthropicCacheControl: { + envKey: "ANTHROPIC_CACHE_CONTROL", + checks: [ + (input) => + ["none", "5m", "1h"].includes(input) + ? null + : "Invalid cache control. Must be one of: 5m, 1h.", + ], + }, GeminiLLMApiKey: { envKey: "GEMINI_API_KEY",