From 664f466e3f4c0c780b27ff0808bf10938113b718 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Sun, 14 Dec 2025 14:46:55 -0800 Subject: [PATCH] 4601 log model on response (#4781) * add model tag to chatCompletion * add modelTag `model` to async streaming keeps default arguments for prompt token calculation where applied via explict arg * fix HF default arg * render all performance metrics as available for backward compatibility add `timestamp` to both sync/async chat methods * extract metrics string to function --- .../Actions/RenderMetrics/index.jsx | 26 ++++++++++++++++--- server/utils/AiProviders/anthropic/index.js | 5 +++- server/utils/AiProviders/apipie/index.js | 6 ++++- server/utils/AiProviders/azureOpenAi/index.js | 6 ++++- server/utils/AiProviders/bedrock/index.js | 9 ++++--- server/utils/AiProviders/cohere/index.js | 5 +++- server/utils/AiProviders/cometapi/index.js | 6 ++++- server/utils/AiProviders/deepseek/index.js | 5 +++- .../AiProviders/dellProAiStudio/index.js | 6 ++++- server/utils/AiProviders/fireworksAi/index.js | 5 +++- server/utils/AiProviders/foundry/index.js | 6 ++++- server/utils/AiProviders/gemini/index.js | 5 +++- .../utils/AiProviders/genericOpenAi/index.js | 7 +++-- server/utils/AiProviders/giteeai/index.js | 5 +++- server/utils/AiProviders/groq/index.js | 5 +++- server/utils/AiProviders/huggingface/index.js | 6 ++++- server/utils/AiProviders/koboldCPP/index.js | 6 ++++- server/utils/AiProviders/liteLLM/index.js | 7 +++-- server/utils/AiProviders/lmStudio/index.js | 6 ++++- server/utils/AiProviders/localAi/index.js | 6 ++++- server/utils/AiProviders/mistral/index.js | 5 +++- server/utils/AiProviders/moonshotAi/index.js | 6 ++++- server/utils/AiProviders/novita/index.js | 6 ++++- server/utils/AiProviders/nvidiaNim/index.js | 6 ++++- server/utils/AiProviders/ollama/index.js | 5 +++- server/utils/AiProviders/openAi/index.js | 5 +++- server/utils/AiProviders/openRouter/index.js | 6 ++++- server/utils/AiProviders/perplexity/index.js | 6 ++++- server/utils/AiProviders/ppio/index.js | 6 ++++- .../utils/AiProviders/textGenWebUI/index.js | 6 ++++- server/utils/AiProviders/togetherAi/index.js | 5 +++- server/utils/AiProviders/xai/index.js | 5 +++- server/utils/AiProviders/zai/index.js | 5 +++- .../helpers/chat/LLMPerformanceMonitor.js | 8 ++++-- 34 files changed, 176 insertions(+), 42 deletions(-) diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/RenderMetrics/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/RenderMetrics/index.jsx index d5f99c18..c9d3af7b 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/RenderMetrics/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/RenderMetrics/index.jsx @@ -1,3 +1,4 @@ +import { formatDateTimeAsMoment } from "@/utils/directories"; import { numberWithCommas } from "@/utils/numbers"; import React, { useEffect, useState, useContext } from "react"; const MetricsContext = React.createContext(); @@ -41,6 +42,26 @@ function getAutoShowMetrics() { return window?.localStorage?.getItem(SHOW_METRICS_KEY) === "true"; } +/** + * Build the metrics string for a given metrics object + * - Model name + * - Duration and output TPS + * - Timestamp + * @param {metrics: {duration:number, outputTps: number, model?: string, timestamp?: number}} metrics + * @returns {string} + */ +function buildMetricsString(metrics = {}) { + return [ + metrics?.model ? metrics.model : "", + `${formatDuration(metrics.duration)} (${formatTps(metrics.outputTps)} tok/s)`, + metrics?.timestamp + ? formatDateTimeAsMoment(metrics.timestamp, "MMM D, h:mm A") + : "", + ] + .filter(Boolean) + .join(" ยท "); +} + /** * Toggle the show metrics setting in localStorage `anythingllm_show_chat_metrics` key * @returns {void} @@ -88,7 +109,7 @@ export function MetricsProvider({ children }) { /** * Render the metrics for a given chat, if available - * @param {metrics: {duration:number, outputTps: number}} props + * @param {metrics: {duration:number, outputTps: number, model: string, timestamp: number}} props * @returns */ export default function RenderMetrics({ metrics = {} }) { @@ -110,8 +131,7 @@ export default function RenderMetrics({ metrics = {} }) { className={`border-none flex justify-end items-center gap-x-[8px] ${showMetricsAutomatically ? "opacity-100" : "opacity-0"} md:group-hover:opacity-100 transition-all duration-300`} >

- {formatDuration(metrics.duration)} ({formatTps(metrics.outputTps)}{" "} - tok/s) + {buildMetricsString(metrics)}

); diff --git a/server/utils/AiProviders/anthropic/index.js b/server/utils/AiProviders/anthropic/index.js index 1dd1f290..34c1f238 100644 --- a/server/utils/AiProviders/anthropic/index.js +++ b/server/utils/AiProviders/anthropic/index.js @@ -171,6 +171,8 @@ class AnthropicLLM { total_tokens: promptTokens + completionTokens, outputTps: completionTokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } catch (error) { @@ -190,7 +192,8 @@ class AnthropicLLM { temperature: Number(temperature ?? this.defaultTemp), }), messages, - false + false, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/apipie/index.js b/server/utils/AiProviders/apipie/index.js index 534e7c27..e1ff3bdf 100644 --- a/server/utils/AiProviders/apipie/index.js +++ b/server/utils/AiProviders/apipie/index.js @@ -220,6 +220,8 @@ class ApiPieLLM { outputTps: (result.output.usage?.completion_tokens || 0) / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -237,7 +239,9 @@ class ApiPieLLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/azureOpenAi/index.js b/server/utils/AiProviders/azureOpenAi/index.js index 274bbb19..03609cc4 100644 --- a/server/utils/AiProviders/azureOpenAi/index.js +++ b/server/utils/AiProviders/azureOpenAi/index.js @@ -174,6 +174,8 @@ class AzureOpenAiLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -192,7 +194,9 @@ class AzureOpenAiLLM { n: 1, stream: true, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/bedrock/index.js b/server/utils/AiProviders/bedrock/index.js index 5d266474..a9686342 100644 --- a/server/utils/AiProviders/bedrock/index.js +++ b/server/utils/AiProviders/bedrock/index.js @@ -423,9 +423,7 @@ class AWSBedrockLLM { ); } throw new Error(`AWSBedrock::getChatCompletion failed. ${e.message}`); - }), - messages, - false + }) ); const response = result.output; @@ -450,6 +448,8 @@ class AWSBedrockLLM { total_tokens: response?.usage?.totalTokens ?? 0, outputTps: outputTps, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -492,7 +492,8 @@ class AWSBedrockLLM { const measuredStreamRequest = await LLMPerformanceMonitor.measureStream( stream, messages, - false // Indicate it's not a function call measurement + false, + this.model ); return measuredStreamRequest; } catch (e) { diff --git a/server/utils/AiProviders/cohere/index.js b/server/utils/AiProviders/cohere/index.js index 98f8741a..1f23fbdd 100644 --- a/server/utils/AiProviders/cohere/index.js +++ b/server/utils/AiProviders/cohere/index.js @@ -124,6 +124,8 @@ class CohereLLM { total_tokens: promptTokens + completionTokens, outputTps: completionTokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -139,7 +141,8 @@ class CohereLLM { temperature, }), messages, - false + false, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/cometapi/index.js b/server/utils/AiProviders/cometapi/index.js index 23e069d5..8c1df6af 100644 --- a/server/utils/AiProviders/cometapi/index.js +++ b/server/utils/AiProviders/cometapi/index.js @@ -225,6 +225,8 @@ class CometApiLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -242,7 +244,9 @@ class CometApiLLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/deepseek/index.js b/server/utils/AiProviders/deepseek/index.js index 379012d0..70abf658 100644 --- a/server/utils/AiProviders/deepseek/index.js +++ b/server/utils/AiProviders/deepseek/index.js @@ -130,6 +130,8 @@ class DeepSeekLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -148,7 +150,8 @@ class DeepSeekLLM { temperature, }), messages, - false + false, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/dellProAiStudio/index.js b/server/utils/AiProviders/dellProAiStudio/index.js index 7c233ec2..5cac4bfc 100644 --- a/server/utils/AiProviders/dellProAiStudio/index.js +++ b/server/utils/AiProviders/dellProAiStudio/index.js @@ -165,6 +165,8 @@ class DellProAiStudioLLM { total_tokens: result.output.usage?.total_tokens || 0, outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -182,7 +184,9 @@ class DellProAiStudioLLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/fireworksAi/index.js b/server/utils/AiProviders/fireworksAi/index.js index 99afa083..f3014c57 100644 --- a/server/utils/AiProviders/fireworksAi/index.js +++ b/server/utils/AiProviders/fireworksAi/index.js @@ -163,6 +163,8 @@ class FireworksAiLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -181,7 +183,8 @@ class FireworksAiLLM { temperature, }), messages, - false + false, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/foundry/index.js b/server/utils/AiProviders/foundry/index.js index 22a9e380..b54ba311 100644 --- a/server/utils/AiProviders/foundry/index.js +++ b/server/utils/AiProviders/foundry/index.js @@ -224,6 +224,8 @@ class FoundryLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -242,7 +244,9 @@ class FoundryLLM { temperature, max_completion_tokens: this.promptWindowLimit(), }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/gemini/index.js b/server/utils/AiProviders/gemini/index.js index 4c352557..f1b5bb9d 100644 --- a/server/utils/AiProviders/gemini/index.js +++ b/server/utils/AiProviders/gemini/index.js @@ -405,6 +405,8 @@ class GeminiLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -421,7 +423,8 @@ class GeminiLLM { }, }), messages, - false + false, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/genericOpenAi/index.js b/server/utils/AiProviders/genericOpenAi/index.js index 8e32ad59..67b41b40 100644 --- a/server/utils/AiProviders/genericOpenAi/index.js +++ b/server/utils/AiProviders/genericOpenAi/index.js @@ -193,6 +193,8 @@ class GenericOpenAiLLM { outputTps: (result.output?.usage?.completion_tokens || 0) / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -206,9 +208,10 @@ class GenericOpenAiLLM { temperature, max_tokens: this.maxTokens, }), - messages + messages, // runPromptTokenCalculation: true - There is not way to know if the generic provider connected is returning - // the correct usage metrics if any at all since any provider could be connected. + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/giteeai/index.js b/server/utils/AiProviders/giteeai/index.js index e74a6d55..62abf6f3 100644 --- a/server/utils/AiProviders/giteeai/index.js +++ b/server/utils/AiProviders/giteeai/index.js @@ -170,6 +170,8 @@ class GiteeAILLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -183,7 +185,8 @@ class GiteeAILLM { temperature, }), messages, - false + false, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/groq/index.js b/server/utils/AiProviders/groq/index.js index 8eddefab..db9b52d6 100644 --- a/server/utils/AiProviders/groq/index.js +++ b/server/utils/AiProviders/groq/index.js @@ -203,6 +203,8 @@ class GroqLLM { result.output.usage.completion_tokens / result.output.usage.completion_time, duration: result.output.usage.total_time, + model: this.model, + timestamp: new Date(), }, }; } @@ -221,7 +223,8 @@ class GroqLLM { temperature, }), messages, - false + false, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/huggingface/index.js b/server/utils/AiProviders/huggingface/index.js index f4b6100e..cc0b0633 100644 --- a/server/utils/AiProviders/huggingface/index.js +++ b/server/utils/AiProviders/huggingface/index.js @@ -117,6 +117,8 @@ class HuggingFaceLLM { outputTps: (result.output.usage?.completion_tokens || 0) / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -129,7 +131,9 @@ class HuggingFaceLLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/koboldCPP/index.js b/server/utils/AiProviders/koboldCPP/index.js index b795b152..d8e78efc 100644 --- a/server/utils/AiProviders/koboldCPP/index.js +++ b/server/utils/AiProviders/koboldCPP/index.js @@ -160,6 +160,8 @@ class KoboldCPPLLM { total_tokens: promptTokens + completionTokens, outputTps: completionTokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -173,7 +175,9 @@ class KoboldCPPLLM { temperature, max_tokens: this.maxTokens, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/liteLLM/index.js b/server/utils/AiProviders/liteLLM/index.js index 91aa8517..2f9891ce 100644 --- a/server/utils/AiProviders/liteLLM/index.js +++ b/server/utils/AiProviders/liteLLM/index.js @@ -154,6 +154,8 @@ class LiteLLM { outputTps: (result.output.usage?.completion_tokens || 0) / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -167,9 +169,10 @@ class LiteLLM { temperature, max_tokens: parseInt(this.maxTokens), // LiteLLM requires int }), - messages + messages, // runPromptTokenCalculation: true - We manually count the tokens because they may or may not be provided in the stream - // responses depending on LLM connected. If they are provided, then we counted for nothing, but better than nothing. + true, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/lmStudio/index.js b/server/utils/AiProviders/lmStudio/index.js index 3c9b62b1..bc0c80fb 100644 --- a/server/utils/AiProviders/lmStudio/index.js +++ b/server/utils/AiProviders/lmStudio/index.js @@ -234,6 +234,8 @@ class LMStudioLLM { total_tokens: result.output.usage?.total_tokens || 0, outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -251,7 +253,9 @@ class LMStudioLLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/localAi/index.js b/server/utils/AiProviders/localAi/index.js index f62fe70d..020ebdcc 100644 --- a/server/utils/AiProviders/localAi/index.js +++ b/server/utils/AiProviders/localAi/index.js @@ -145,6 +145,8 @@ class LocalAiLLM { total_tokens: promptTokens + completionTokens, outputTps: completionTokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -162,7 +164,9 @@ class LocalAiLLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/mistral/index.js b/server/utils/AiProviders/mistral/index.js index 4cf547cc..e520c481 100644 --- a/server/utils/AiProviders/mistral/index.js +++ b/server/utils/AiProviders/mistral/index.js @@ -139,6 +139,8 @@ class MistralLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -157,7 +159,8 @@ class MistralLLM { temperature, }), messages, - false + false, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/moonshotAi/index.js b/server/utils/AiProviders/moonshotAi/index.js index b00f7213..8d505332 100644 --- a/server/utils/AiProviders/moonshotAi/index.js +++ b/server/utils/AiProviders/moonshotAi/index.js @@ -136,6 +136,8 @@ class MoonshotAiLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -148,7 +150,9 @@ class MoonshotAiLLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/novita/index.js b/server/utils/AiProviders/novita/index.js index 69c06753..d1fc3bb3 100644 --- a/server/utils/AiProviders/novita/index.js +++ b/server/utils/AiProviders/novita/index.js @@ -225,6 +225,8 @@ class NovitaLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -242,7 +244,9 @@ class NovitaLLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/nvidiaNim/index.js b/server/utils/AiProviders/nvidiaNim/index.js index f932625c..62cd1ad3 100644 --- a/server/utils/AiProviders/nvidiaNim/index.js +++ b/server/utils/AiProviders/nvidiaNim/index.js @@ -184,6 +184,8 @@ class NvidiaNimLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -201,7 +203,9 @@ class NvidiaNimLLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js index 1c3ab9b9..1782fe60 100644 --- a/server/utils/AiProviders/ollama/index.js +++ b/server/utils/AiProviders/ollama/index.js @@ -305,6 +305,8 @@ class OllamaAILLM { outputTps: result.output.usage.completion_tokens / result.output.usage.duration, duration: result.output.usage.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -326,7 +328,8 @@ class OllamaAILLM { }, }), messages, - false + false, + this.model ).catch((e) => { throw this.#errorHandler(e); }); diff --git a/server/utils/AiProviders/openAi/index.js b/server/utils/AiProviders/openAi/index.js index 5ea984fe..620923c1 100644 --- a/server/utils/AiProviders/openAi/index.js +++ b/server/utils/AiProviders/openAi/index.js @@ -175,6 +175,8 @@ class OpenAiLLM { ? usage.output_tokens / result.duration : 0, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -194,7 +196,8 @@ class OpenAiLLM { temperature: this.#temperature(this.model, temperature), }), messages, - false + false, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/openRouter/index.js b/server/utils/AiProviders/openRouter/index.js index 4c1e477a..0b81243d 100644 --- a/server/utils/AiProviders/openRouter/index.js +++ b/server/utils/AiProviders/openRouter/index.js @@ -276,6 +276,8 @@ class OpenRouterLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -300,13 +302,15 @@ class OpenRouterLLM { include_reasoning: true, user: user?.id ? `user_${user.id}` : "", }), - messages + messages, // We have to manually count the tokens // OpenRouter has a ton of providers and they all can return slightly differently // some return chunk.usage on STOP, some do it after stop, its inconsistent. // So it is possible reported metrics are inaccurate since we cannot reliably // catch the metrics before resolving the stream - so we just pretend this functionality // is not available. + true, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/perplexity/index.js b/server/utils/AiProviders/perplexity/index.js index 5007c4c5..0bd81690 100644 --- a/server/utils/AiProviders/perplexity/index.js +++ b/server/utils/AiProviders/perplexity/index.js @@ -117,6 +117,8 @@ class PerplexityLLM { total_tokens: result.output.usage?.total_tokens || 0, outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -134,7 +136,9 @@ class PerplexityLLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/ppio/index.js b/server/utils/AiProviders/ppio/index.js index bef7fada..3532e854 100644 --- a/server/utils/AiProviders/ppio/index.js +++ b/server/utils/AiProviders/ppio/index.js @@ -176,6 +176,8 @@ class PPIOLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -193,7 +195,9 @@ class PPIOLLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/textGenWebUI/index.js b/server/utils/AiProviders/textGenWebUI/index.js index 2de1d8cd..d927edc0 100644 --- a/server/utils/AiProviders/textGenWebUI/index.js +++ b/server/utils/AiProviders/textGenWebUI/index.js @@ -150,6 +150,8 @@ class TextGenWebUILLM { total_tokens: result.output.usage?.total_tokens || 0, outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -162,7 +164,9 @@ class TextGenWebUILLM { messages, temperature, }), - messages + messages, + true, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/togetherAi/index.js b/server/utils/AiProviders/togetherAi/index.js index 00ad4e64..bbd58cad 100644 --- a/server/utils/AiProviders/togetherAi/index.js +++ b/server/utils/AiProviders/togetherAi/index.js @@ -209,6 +209,8 @@ class TogetherAiLLM { total_tokens: result.output.usage?.total_tokens || 0, outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -227,7 +229,8 @@ class TogetherAiLLM { temperature, }), messages, - false + false, + this.model ); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/xai/index.js b/server/utils/AiProviders/xai/index.js index df925715..7b5451fb 100644 --- a/server/utils/AiProviders/xai/index.js +++ b/server/utils/AiProviders/xai/index.js @@ -147,6 +147,8 @@ class XAiLLM { total_tokens: result.output.usage.total_tokens || 0, outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -165,7 +167,8 @@ class XAiLLM { temperature, }), messages, - false + false, + this.model ); return measuredStreamRequest; diff --git a/server/utils/AiProviders/zai/index.js b/server/utils/AiProviders/zai/index.js index ccbbc535..ad6fe1c2 100644 --- a/server/utils/AiProviders/zai/index.js +++ b/server/utils/AiProviders/zai/index.js @@ -136,6 +136,8 @@ class ZAiLLM { total_tokens: result.output.usage?.total_tokens || 0, outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, + model: this.model, + timestamp: new Date(), }, }; } @@ -149,7 +151,8 @@ class ZAiLLM { temperature, }), messages, - false + false, + this.model ); return measuredStreamRequest; diff --git a/server/utils/helpers/chat/LLMPerformanceMonitor.js b/server/utils/helpers/chat/LLMPerformanceMonitor.js index 070df590..df7398d2 100644 --- a/server/utils/helpers/chat/LLMPerformanceMonitor.js +++ b/server/utils/helpers/chat/LLMPerformanceMonitor.js @@ -59,13 +59,15 @@ class LLMPerformanceMonitor { * Also attaches an `endMeasurement` method to the stream that will calculate the duration of the stream and metrics. * @param {Promise} func * @param {Messages} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream - * @param {boolean} runPromptTokenCalculation - whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream. + * @param {boolean} runPromptTokenCalculation - [default: true] whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream. + * @param {string} modelTag - the tag of the model that was used to generate the stream (eg: gpt-4o, claude-3-5-sonnet, qwen3/72b-instruct, etc.) * @returns {Promise} */ static async measureStream( func, messages = [], - runPromptTokenCalculation = true + runPromptTokenCalculation = true, + modelTag = "" ) { const stream = await func; stream.start = Date.now(); @@ -76,6 +78,7 @@ class LLMPerformanceMonitor { total_tokens: 0, outputTps: 0, duration: 0, + ...(modelTag ? { model: modelTag } : {}), }; stream.endMeasurement = (reportedUsage = {}) => { @@ -88,6 +91,7 @@ class LLMPerformanceMonitor { ...stream.metrics, ...reportedUsage, duration: reportedUsage?.duration ?? estimatedDuration, + timestamp: new Date(), }; stream.metrics.total_tokens =