diff --git a/server/utils/AiProviders/openRouter/index.js b/server/utils/AiProviders/openRouter/index.js
index c8e12748..28d4306b 100644
--- a/server/utils/AiProviders/openRouter/index.js
+++ b/server/utils/AiProviders/openRouter/index.js
@@ -304,13 +304,9 @@ class OpenRouterLLM {
         user: user?.id ? `user_${user.id}` : "",
       }),
       messages,
-      // We have to manually count the tokens
-      // OpenRouter has a ton of providers and they all can return slightly differently
-      // some return chunk.usage on STOP, some do it after stop, its inconsistent.
-      // So it is possible reported metrics are inaccurate since we cannot reliably
-      // catch the metrics before resolving the stream - so we just pretend this functionality
-      // is not available.
-      runPromptTokenCalculation: true,
+      // OpenRouter returns the usage in the stream as the very last chunk **after** the finish reason.
+      // so we don't need to run the prompt token calculation.
+      runPromptTokenCalculation: false,
       modelTag: this.model,
       provider: this.className,
     });
@@ -320,6 +316,8 @@ class OpenRouterLLM {
 
   /**
    * Handles the default stream response for a chat.
+   * - Handle weird OR timeout behavior where the stream never self-closes.
+   * - Handle the usage metrics being returned in the stream as the very last chunk **after** the finish reason.
    * @param {import("express").Response} response
    * @param {import('../../helpers/chat/LLMPerformanceMonitor').MonitoredStream} stream
    * @param {Object} responseProps
@@ -328,6 +326,8 @@ class OpenRouterLLM {
   handleStream(response, stream, responseProps) {
     const timeoutThresholdMs = this.timeout;
     const { uuid = uuidv4(), sources = [] } = responseProps;
+    let hasUsageMetrics = false;
+    let usage = { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
 
     return new Promise(async (resolve) => {
       let fullText = "";
@@ -336,14 +336,8 @@ class OpenRouterLLM {
       let pplxCitations = []; // Array of inline citations for Perplexity models (if applicable)
       let isPerplexity = this.isPerplexityModel;
 
-      // Establish listener to early-abort a streaming response
-      // in case things go sideways or the user does not like the response.
-      // We preserve the generated text but continue as if chat was completed
-      // to preserve previously generated content.
       const handleAbort = () => {
-        stream?.endMeasurement({
-          completion_tokens: LLMPerformanceMonitor.countTokens(fullText),
-        });
+        stream?.endMeasurement(usage);
         clientAbortedHandler(resolve, fullText);
       };
       response.on("close", handleAbort);
@@ -375,9 +369,7 @@ class OpenRouterLLM {
           });
           clearInterval(timeoutCheck);
           response.removeListener("close", handleAbort);
-          stream?.endMeasurement({
-            completion_tokens: LLMPerformanceMonitor.countTokens(fullText),
-          });
+          stream?.endMeasurement(usage);
           resolve(fullText);
         }
       }, 500);
@@ -389,6 +381,15 @@ class OpenRouterLLM {
           const reasoningToken = message?.delta?.reasoning;
           lastChunkTime = Number(new Date());
 
+          if (chunk.hasOwnProperty("usage") && !hasUsageMetrics) {
+            hasUsageMetrics = true;
+            usage = {
+              prompt_tokens: chunk.usage.prompt_tokens,
+              completion_tokens: chunk.usage.completion_tokens,
+              total_tokens: chunk.usage.total_tokens,
+            };
+          }
+
           // Some models will return citations (e.g. Perplexity) - we should preserve them for inline citations if applicable.
           if (
             isPerplexity &&
@@ -464,7 +465,7 @@ class OpenRouterLLM {
             });
           }
 
-          if (message.finish_reason !== null) {
+          if (message?.finish_reason) {
             writeResponseChunk(response, {
               uuid,
               sources,
@@ -473,14 +474,14 @@ class OpenRouterLLM {
               close: true,
               error: false,
             });
-            response.removeListener("close", handleAbort);
-            clearInterval(timeoutCheck);
-            stream?.endMeasurement({
-              completion_tokens: LLMPerformanceMonitor.countTokens(fullText),
-            });
-            resolve(fullText);
           }
         }
+
+        // Stream completed naturally - resolve with final metrics
+        response.removeListener("close", handleAbort);
+        clearInterval(timeoutCheck);
+        stream?.endMeasurement(usage);
+        resolve(fullText);
       } catch (e) {
         writeResponseChunk(response, {
           uuid,
@@ -492,9 +493,7 @@ class OpenRouterLLM {
         });
         response.removeListener("close", handleAbort);
         clearInterval(timeoutCheck);
-        stream?.endMeasurement({
-          completion_tokens: LLMPerformanceMonitor.countTokens(fullText),
-        });
+        stream?.endMeasurement(usage);
         resolve(fullText);
       }
     });