diff --git a/docker/.env.example b/docker/.env.example
index 35731600..4db7aeff 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -27,6 +27,7 @@ GID='1000'
 # LLM_PROVIDER='anthropic'
 # ANTHROPIC_API_KEY=sk-ant-xxxx
 # ANTHROPIC_MODEL_PREF='claude-2'
+# ANTHROPIC_CACHE_CONTROL="5m" # Enable prompt caching (5m=5min cache, 1h=1hour cache). Reduces costs and improves speed by caching system prompts.
 
 # LLM_PROVIDER='lmstudio'
 # LMSTUDIO_BASE_PATH='http://your-server:1234/v1'
diff --git a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx
index 95cda6ef..478e1398 100644
--- a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx
@@ -1,7 +1,9 @@
 import { useState, useEffect } from "react";
 import System from "@/models/system";
+import { CaretDown, CaretUp } from "@phosphor-icons/react";
 
 export default function AnthropicAiOptions({ settings }) {
+  const [showAdvancedControls, setShowAdvancedControls] = useState(false);
   const [inputValue, setInputValue] = useState(settings?.AnthropicApiKey);
   const [anthropicApiKey, setAnthropicApiKey] = useState(
     settings?.AnthropicApiKey
@@ -27,7 +29,6 @@ export default function AnthropicAiOptions({ settings }) {
             onBlur={() => setAnthropicApiKey(inputValue)}
           />
         </div>
-
         {!settings?.credentialsOnly && (
           <AnthropicModelSelection
             apiKey={anthropicApiKey}
@@ -35,6 +36,56 @@ export default function AnthropicAiOptions({ settings }) {
           />
         )}
       </div>
+      <div className="flex justify-start mt-4">
+        <button
+          onClick={(e) => {
+            e.preventDefault();
+            setShowAdvancedControls(!showAdvancedControls);
+          }}
+          className="border-none text-theme-text-primary hover:text-theme-text-secondary flex items-center text-sm"
+        >
+          {showAdvancedControls ? "Hide" : "Show"} advanced settings
+          {showAdvancedControls ? (
+            <CaretUp size={14} className="ml-1" />
+          ) : (
+            <CaretDown size={14} className="ml-1" />
+          )}
+        </button>
+      </div>
+      <div hidden={!showAdvancedControls}>
+        <div className="w-full flex items-start gap-4 mt-1.5">
+          <div className="flex flex-col w-60">
+            <div className="flex justify-between items-center mb-2">
+              <label className="text-white text-sm font-semibold">
+                Prompt Caching
+              </label>
+            </div>
+            <select
+              name="AnthropicCacheControl"
+              className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+            >
+              <option
+                value="none"
+                selected={settings?.AnthropicCacheControl === "none"}
+              >
+                No caching
+              </option>
+              <option
+                value="5m"
+                selected={settings?.AnthropicCacheControl === "5m"}
+              >
+                5 minutes
+              </option>
+              <option
+                value="1h"
+                selected={settings?.AnthropicCacheControl === "1h"}
+              >
+                1 hour
+              </option>
+            </select>
+          </div>
+        </div>
+      </div>
     </div>
   );
 }
diff --git a/server/.env.example b/server/.env.example
index 8b5c2ea8..aecd0e14 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -24,6 +24,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
 # LLM_PROVIDER='anthropic'
 # ANTHROPIC_API_KEY=sk-ant-xxxx
 # ANTHROPIC_MODEL_PREF='claude-2'
+# ANTHROPIC_CACHE_CONTROL="5m" # Enable prompt caching (5m=5min cache, 1h=1hour cache). Reduces costs and improves speed by caching system prompts.
 
 # LLM_PROVIDER='lmstudio'
 # LMSTUDIO_BASE_PATH='http://your-server:1234/v1'
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index 9015257c..ce3a0063 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -481,6 +481,7 @@ const SystemSettings = {
       // Anthropic Keys
       AnthropicApiKey: !!process.env.ANTHROPIC_API_KEY,
       AnthropicModelPref: process.env.ANTHROPIC_MODEL_PREF || "claude-2",
+      AnthropicCacheControl: process.env.ANTHROPIC_CACHE_CONTROL || "none",
 
       // Gemini Keys
       GeminiLLMApiKey: !!process.env.GEMINI_API_KEY,
diff --git a/server/utils/AiProviders/anthropic/index.js b/server/utils/AiProviders/anthropic/index.js
index 2170ba83..1dd1f290 100644
--- a/server/utils/AiProviders/anthropic/index.js
+++ b/server/utils/AiProviders/anthropic/index.js
@@ -34,7 +34,9 @@ class AnthropicLLM {
 
     this.embedder = embedder ?? new NativeEmbedder();
     this.defaultTemp = 0.7;
-    this.log(`Initialized with ${this.model}`);
+    this.log(
+      `Initialized with ${this.model}. Cache ${this.cacheControl ? `enabled (${this.cacheControl.ttl})` : "disabled"}`
+    );
   }
 
   log(text, ...args) {
@@ -57,6 +59,48 @@ class AnthropicLLM {
     return true;
   }
 
+  /**
+   * Parses the cache control ENV variable
+   *
+   * If caching is enabled, we can pass less than 1024 tokens and Anthropic will just
+   * ignore it unless it is above the model's minimum. Since this feature is opt-in
+   * we can safely assume that if caching is enabled that we should just pass the content as is.
+   * https://docs.claude.com/en/docs/build-with-claude/prompt-caching#cache-limitations
+   *
+   * @param {string} value - The ENV value (5m or 1h)
+   * @returns {null|{type: "ephemeral", ttl: "5m" | "1h"}} Cache control configuration
+   */
+  get cacheControl() {
+    // Store result in instance variable to avoid recalculating
+    if (this._cacheControl) return this._cacheControl;
+
+    if (!process.env.ANTHROPIC_CACHE_CONTROL) this._cacheControl = null;
+    else {
+      const normalized =
+        process.env.ANTHROPIC_CACHE_CONTROL.toLowerCase().trim();
+      if (["5m", "1h"].includes(normalized))
+        this._cacheControl = { type: "ephemeral", ttl: normalized };
+      else this._cacheControl = null;
+    }
+    return this._cacheControl;
+  }
+
+  /**
+   * Builds system parameter with cache control if applicable
+   * @param {string} systemContent - The system prompt content
+   * @returns {string|array} System parameter for API call
+   */
+  #buildSystemPrompt(systemContent) {
+    if (!systemContent || !this.cacheControl) return systemContent;
+    return [
+      {
+        type: "text",
+        text: systemContent,
+        cache_control: this.cacheControl,
+      },
+    ];
+  }
+
   /**
    * Generates appropriate content array for a message + attachments.
    * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
@@ -105,11 +149,12 @@ class AnthropicLLM {
 
   async getChatCompletion(messages = null, { temperature = 0.7 }) {
     try {
+      const systemContent = messages[0].content;
       const result = await LLMPerformanceMonitor.measureAsyncFunction(
         this.anthropic.messages.create({
           model: this.model,
           max_tokens: 4096,
-          system: messages[0].content, // Strip out the system message
+          system: this.#buildSystemPrompt(systemContent),
           messages: messages.slice(1), // Pop off the system message
           temperature: Number(temperature ?? this.defaultTemp),
         })
@@ -117,6 +162,7 @@ class AnthropicLLM {
 
       const promptTokens = result.output.usage.input_tokens;
       const completionTokens = result.output.usage.output_tokens;
+
       return {
         textResponse: result.output.content[0].text,
         metrics: {
@@ -134,11 +180,12 @@ class AnthropicLLM {
   }
 
   async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+    const systemContent = messages[0].content;
     const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
       this.anthropic.messages.stream({
         model: this.model,
         max_tokens: 4096,
-        system: messages[0].content, // Strip out the system message
+        system: this.#buildSystemPrompt(systemContent),
         messages: messages.slice(1), // Pop off the system message
         temperature: Number(temperature ?? this.defaultTemp),
       }),
diff --git a/server/utils/agents/aibitat/providers/anthropic.js b/server/utils/agents/aibitat/providers/anthropic.js
index 815ac88e..968f6144 100644
--- a/server/utils/agents/aibitat/providers/anthropic.js
+++ b/server/utils/agents/aibitat/providers/anthropic.js
@@ -23,14 +23,55 @@ class AnthropicProvider extends Provider {
     const client = new Anthropic(options);
 
     super(client);
-
     this.model = model;
   }
 
+  /**
+   * Parses the cache control ENV variable
+   *
+   * If caching is enabled, we can pass less than 1024 tokens and Anthropic will just
+   * ignore it unless it is above the model's minimum. Since this feature is opt-in
+   * we can safely assume that if caching is enabled that we should just pass the content as is.
+   * https://docs.claude.com/en/docs/build-with-claude/prompt-caching#cache-limitations
+   *
+   * @param {string} value - The ENV value (5m or 1h)
+   * @returns {null|{type: "ephemeral", ttl: "5m" | "1h"}} Cache control configuration
+   */
+  get cacheControl() {
+    // Store result in instance variable to avoid recalculating
+    if (this._cacheControl) return this._cacheControl;
+
+    if (!process.env.ANTHROPIC_CACHE_CONTROL) this._cacheControl = null;
+    else {
+      const normalized =
+        process.env.ANTHROPIC_CACHE_CONTROL.toLowerCase().trim();
+      if (["5m", "1h"].includes(normalized))
+        this._cacheControl = { type: "ephemeral", ttl: normalized };
+      else this._cacheControl = null;
+    }
+    return this._cacheControl;
+  }
+
   get supportsAgentStreaming() {
     return true;
   }
 
+  /**
+   * Builds system parameter with cache control if applicable
+   * @param {string} systemContent - The system prompt content
+   * @returns {string|array} System parameter for API call
+   */
+  #buildSystemPrompt(systemContent) {
+    if (!systemContent || !this.cacheControl) return systemContent;
+    return [
+      {
+        type: "text",
+        text: systemContent,
+        cache_control: this.cacheControl,
+      },
+    ];
+  }
+
   #prepareMessages(messages = []) {
     // Extract system prompt and filter out any system messages from the main chat.
     let systemPrompt =
@@ -149,7 +190,7 @@ class AnthropicProvider extends Provider {
         {
           model: this.model,
           max_tokens: 4096,
-          system: systemPrompt,
+          system: this.#buildSystemPrompt(systemPrompt),
           messages: chats,
           stream: true,
           ...(Array.isArray(functions) && functions?.length > 0
@@ -276,7 +317,7 @@ class AnthropicProvider extends Provider {
         {
           model: this.model,
           max_tokens: 4096,
-          system: systemPrompt,
+          system: this.#buildSystemPrompt(systemPrompt),
           messages: chats,
           stream: false,
           ...(Array.isArray(functions) && functions?.length > 0
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index be92498a..3f34c1d7 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -58,6 +58,15 @@ const KEY_MAPPING = {
     envKey: "ANTHROPIC_MODEL_PREF",
     checks: [isNotEmpty],
   },
+  AnthropicCacheControl: {
+    envKey: "ANTHROPIC_CACHE_CONTROL",
+    checks: [
+      (input) =>
+        ["none", "5m", "1h"].includes(input)
+          ? null
+          : "Invalid cache control. Must be one of: 5m, 1h.",
+    ],
+  },
 
   GeminiLLMApiKey: {
     envKey: "GEMINI_API_KEY",