4601 log model on response (#4781)

* add model tag to chatCompletion

* add modelTag `model` to async streaming
keeps default arguments for prompt token calculation where applied via explict arg

* fix HF default arg

* render all performance metrics as available for backward compatibility
add `timestamp` to both sync/async chat methods

* extract metrics string to function
This commit is contained in:
Timothy Carambat 2025-12-14 14:46:55 -08:00 committed by GitHub
parent bc3ad06de4
commit 664f466e3f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 176 additions and 42 deletions

View File

@ -1,3 +1,4 @@
import { formatDateTimeAsMoment } from "@/utils/directories";
import { numberWithCommas } from "@/utils/numbers"; import { numberWithCommas } from "@/utils/numbers";
import React, { useEffect, useState, useContext } from "react"; import React, { useEffect, useState, useContext } from "react";
const MetricsContext = React.createContext(); const MetricsContext = React.createContext();
@ -41,6 +42,26 @@ function getAutoShowMetrics() {
return window?.localStorage?.getItem(SHOW_METRICS_KEY) === "true"; return window?.localStorage?.getItem(SHOW_METRICS_KEY) === "true";
} }
/**
* Build the metrics string for a given metrics object
* - Model name
* - Duration and output TPS
* - Timestamp
* @param {metrics: {duration:number, outputTps: number, model?: string, timestamp?: number}} metrics
* @returns {string}
*/
function buildMetricsString(metrics = {}) {
return [
metrics?.model ? metrics.model : "",
`${formatDuration(metrics.duration)} (${formatTps(metrics.outputTps)} tok/s)`,
metrics?.timestamp
? formatDateTimeAsMoment(metrics.timestamp, "MMM D, h:mm A")
: "",
]
.filter(Boolean)
.join(" · ");
}
/** /**
* Toggle the show metrics setting in localStorage `anythingllm_show_chat_metrics` key * Toggle the show metrics setting in localStorage `anythingllm_show_chat_metrics` key
* @returns {void} * @returns {void}
@ -88,7 +109,7 @@ export function MetricsProvider({ children }) {
/** /**
* Render the metrics for a given chat, if available * Render the metrics for a given chat, if available
* @param {metrics: {duration:number, outputTps: number}} props * @param {metrics: {duration:number, outputTps: number, model: string, timestamp: number}} props
* @returns * @returns
*/ */
export default function RenderMetrics({ metrics = {} }) { export default function RenderMetrics({ metrics = {} }) {
@ -110,8 +131,7 @@ export default function RenderMetrics({ metrics = {} }) {
className={`border-none flex justify-end items-center gap-x-[8px] ${showMetricsAutomatically ? "opacity-100" : "opacity-0"} md:group-hover:opacity-100 transition-all duration-300`} className={`border-none flex justify-end items-center gap-x-[8px] ${showMetricsAutomatically ? "opacity-100" : "opacity-0"} md:group-hover:opacity-100 transition-all duration-300`}
> >
<p className="cursor-pointer text-xs font-mono text-theme-text-secondary opacity-50"> <p className="cursor-pointer text-xs font-mono text-theme-text-secondary opacity-50">
{formatDuration(metrics.duration)} ({formatTps(metrics.outputTps)}{" "} {buildMetricsString(metrics)}
tok/s)
</p> </p>
</button> </button>
); );

View File

@ -171,6 +171,8 @@ class AnthropicLLM {
total_tokens: promptTokens + completionTokens, total_tokens: promptTokens + completionTokens,
outputTps: completionTokens / result.duration, outputTps: completionTokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} catch (error) { } catch (error) {
@ -190,7 +192,8 @@ class AnthropicLLM {
temperature: Number(temperature ?? this.defaultTemp), temperature: Number(temperature ?? this.defaultTemp),
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -220,6 +220,8 @@ class ApiPieLLM {
outputTps: outputTps:
(result.output.usage?.completion_tokens || 0) / result.duration, (result.output.usage?.completion_tokens || 0) / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -237,7 +239,9 @@ class ApiPieLLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -174,6 +174,8 @@ class AzureOpenAiLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -192,7 +194,9 @@ class AzureOpenAiLLM {
n: 1, n: 1,
stream: true, stream: true,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -423,9 +423,7 @@ class AWSBedrockLLM {
); );
} }
throw new Error(`AWSBedrock::getChatCompletion failed. ${e.message}`); throw new Error(`AWSBedrock::getChatCompletion failed. ${e.message}`);
}), })
messages,
false
); );
const response = result.output; const response = result.output;
@ -450,6 +448,8 @@ class AWSBedrockLLM {
total_tokens: response?.usage?.totalTokens ?? 0, total_tokens: response?.usage?.totalTokens ?? 0,
outputTps: outputTps, outputTps: outputTps,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -492,7 +492,8 @@ class AWSBedrockLLM {
const measuredStreamRequest = await LLMPerformanceMonitor.measureStream( const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
stream, stream,
messages, messages,
false // Indicate it's not a function call measurement false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} catch (e) { } catch (e) {

View File

@ -124,6 +124,8 @@ class CohereLLM {
total_tokens: promptTokens + completionTokens, total_tokens: promptTokens + completionTokens,
outputTps: completionTokens / result.duration, outputTps: completionTokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -139,7 +141,8 @@ class CohereLLM {
temperature, temperature,
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -225,6 +225,8 @@ class CometApiLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -242,7 +244,9 @@ class CometApiLLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -130,6 +130,8 @@ class DeepSeekLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -148,7 +150,8 @@ class DeepSeekLLM {
temperature, temperature,
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -165,6 +165,8 @@ class DellProAiStudioLLM {
total_tokens: result.output.usage?.total_tokens || 0, total_tokens: result.output.usage?.total_tokens || 0,
outputTps: result.output.usage?.completion_tokens / result.duration, outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -182,7 +184,9 @@ class DellProAiStudioLLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -163,6 +163,8 @@ class FireworksAiLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -181,7 +183,8 @@ class FireworksAiLLM {
temperature, temperature,
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -224,6 +224,8 @@ class FoundryLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -242,7 +244,9 @@ class FoundryLLM {
temperature, temperature,
max_completion_tokens: this.promptWindowLimit(), max_completion_tokens: this.promptWindowLimit(),
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -405,6 +405,8 @@ class GeminiLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -421,7 +423,8 @@ class GeminiLLM {
}, },
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -193,6 +193,8 @@ class GenericOpenAiLLM {
outputTps: outputTps:
(result.output?.usage?.completion_tokens || 0) / result.duration, (result.output?.usage?.completion_tokens || 0) / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -206,9 +208,10 @@ class GenericOpenAiLLM {
temperature, temperature,
max_tokens: this.maxTokens, max_tokens: this.maxTokens,
}), }),
messages messages,
// runPromptTokenCalculation: true - There is not way to know if the generic provider connected is returning // runPromptTokenCalculation: true - There is not way to know if the generic provider connected is returning
// the correct usage metrics if any at all since any provider could be connected. true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -170,6 +170,8 @@ class GiteeAILLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -183,7 +185,8 @@ class GiteeAILLM {
temperature, temperature,
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -203,6 +203,8 @@ class GroqLLM {
result.output.usage.completion_tokens / result.output.usage.completion_tokens /
result.output.usage.completion_time, result.output.usage.completion_time,
duration: result.output.usage.total_time, duration: result.output.usage.total_time,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -221,7 +223,8 @@ class GroqLLM {
temperature, temperature,
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -117,6 +117,8 @@ class HuggingFaceLLM {
outputTps: outputTps:
(result.output.usage?.completion_tokens || 0) / result.duration, (result.output.usage?.completion_tokens || 0) / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -129,7 +131,9 @@ class HuggingFaceLLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -160,6 +160,8 @@ class KoboldCPPLLM {
total_tokens: promptTokens + completionTokens, total_tokens: promptTokens + completionTokens,
outputTps: completionTokens / result.duration, outputTps: completionTokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -173,7 +175,9 @@ class KoboldCPPLLM {
temperature, temperature,
max_tokens: this.maxTokens, max_tokens: this.maxTokens,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -154,6 +154,8 @@ class LiteLLM {
outputTps: outputTps:
(result.output.usage?.completion_tokens || 0) / result.duration, (result.output.usage?.completion_tokens || 0) / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -167,9 +169,10 @@ class LiteLLM {
temperature, temperature,
max_tokens: parseInt(this.maxTokens), // LiteLLM requires int max_tokens: parseInt(this.maxTokens), // LiteLLM requires int
}), }),
messages messages,
// runPromptTokenCalculation: true - We manually count the tokens because they may or may not be provided in the stream // runPromptTokenCalculation: true - We manually count the tokens because they may or may not be provided in the stream
// responses depending on LLM connected. If they are provided, then we counted for nothing, but better than nothing. true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -234,6 +234,8 @@ class LMStudioLLM {
total_tokens: result.output.usage?.total_tokens || 0, total_tokens: result.output.usage?.total_tokens || 0,
outputTps: result.output.usage?.completion_tokens / result.duration, outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -251,7 +253,9 @@ class LMStudioLLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -145,6 +145,8 @@ class LocalAiLLM {
total_tokens: promptTokens + completionTokens, total_tokens: promptTokens + completionTokens,
outputTps: completionTokens / result.duration, outputTps: completionTokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -162,7 +164,9 @@ class LocalAiLLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -139,6 +139,8 @@ class MistralLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -157,7 +159,8 @@ class MistralLLM {
temperature, temperature,
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -136,6 +136,8 @@ class MoonshotAiLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -148,7 +150,9 @@ class MoonshotAiLLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -225,6 +225,8 @@ class NovitaLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -242,7 +244,9 @@ class NovitaLLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -184,6 +184,8 @@ class NvidiaNimLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -201,7 +203,9 @@ class NvidiaNimLLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -305,6 +305,8 @@ class OllamaAILLM {
outputTps: outputTps:
result.output.usage.completion_tokens / result.output.usage.duration, result.output.usage.completion_tokens / result.output.usage.duration,
duration: result.output.usage.duration, duration: result.output.usage.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -326,7 +328,8 @@ class OllamaAILLM {
}, },
}), }),
messages, messages,
false false,
this.model
).catch((e) => { ).catch((e) => {
throw this.#errorHandler(e); throw this.#errorHandler(e);
}); });

View File

@ -175,6 +175,8 @@ class OpenAiLLM {
? usage.output_tokens / result.duration ? usage.output_tokens / result.duration
: 0, : 0,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -194,7 +196,8 @@ class OpenAiLLM {
temperature: this.#temperature(this.model, temperature), temperature: this.#temperature(this.model, temperature),
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -276,6 +276,8 @@ class OpenRouterLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -300,13 +302,15 @@ class OpenRouterLLM {
include_reasoning: true, include_reasoning: true,
user: user?.id ? `user_${user.id}` : "", user: user?.id ? `user_${user.id}` : "",
}), }),
messages messages,
// We have to manually count the tokens // We have to manually count the tokens
// OpenRouter has a ton of providers and they all can return slightly differently // OpenRouter has a ton of providers and they all can return slightly differently
// some return chunk.usage on STOP, some do it after stop, its inconsistent. // some return chunk.usage on STOP, some do it after stop, its inconsistent.
// So it is possible reported metrics are inaccurate since we cannot reliably // So it is possible reported metrics are inaccurate since we cannot reliably
// catch the metrics before resolving the stream - so we just pretend this functionality // catch the metrics before resolving the stream - so we just pretend this functionality
// is not available. // is not available.
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -117,6 +117,8 @@ class PerplexityLLM {
total_tokens: result.output.usage?.total_tokens || 0, total_tokens: result.output.usage?.total_tokens || 0,
outputTps: result.output.usage?.completion_tokens / result.duration, outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -134,7 +136,9 @@ class PerplexityLLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -176,6 +176,8 @@ class PPIOLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -193,7 +195,9 @@ class PPIOLLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -150,6 +150,8 @@ class TextGenWebUILLM {
total_tokens: result.output.usage?.total_tokens || 0, total_tokens: result.output.usage?.total_tokens || 0,
outputTps: result.output.usage?.completion_tokens / result.duration, outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -162,7 +164,9 @@ class TextGenWebUILLM {
messages, messages,
temperature, temperature,
}), }),
messages messages,
true,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -209,6 +209,8 @@ class TogetherAiLLM {
total_tokens: result.output.usage?.total_tokens || 0, total_tokens: result.output.usage?.total_tokens || 0,
outputTps: result.output.usage?.completion_tokens / result.duration, outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -227,7 +229,8 @@ class TogetherAiLLM {
temperature, temperature,
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;
} }

View File

@ -147,6 +147,8 @@ class XAiLLM {
total_tokens: result.output.usage.total_tokens || 0, total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration, outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -165,7 +167,8 @@ class XAiLLM {
temperature, temperature,
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -136,6 +136,8 @@ class ZAiLLM {
total_tokens: result.output.usage?.total_tokens || 0, total_tokens: result.output.usage?.total_tokens || 0,
outputTps: result.output.usage?.completion_tokens / result.duration, outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration, duration: result.duration,
model: this.model,
timestamp: new Date(),
}, },
}; };
} }
@ -149,7 +151,8 @@ class ZAiLLM {
temperature, temperature,
}), }),
messages, messages,
false false,
this.model
); );
return measuredStreamRequest; return measuredStreamRequest;

View File

@ -59,13 +59,15 @@ class LLMPerformanceMonitor {
* Also attaches an `endMeasurement` method to the stream that will calculate the duration of the stream and metrics. * Also attaches an `endMeasurement` method to the stream that will calculate the duration of the stream and metrics.
* @param {Promise<OpenAICompatibleStream>} func * @param {Promise<OpenAICompatibleStream>} func
* @param {Messages} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream * @param {Messages} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream
* @param {boolean} runPromptTokenCalculation - whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream. * @param {boolean} runPromptTokenCalculation - [default: true] whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream.
* @param {string} modelTag - the tag of the model that was used to generate the stream (eg: gpt-4o, claude-3-5-sonnet, qwen3/72b-instruct, etc.)
* @returns {Promise<MonitoredStream>} * @returns {Promise<MonitoredStream>}
*/ */
static async measureStream( static async measureStream(
func, func,
messages = [], messages = [],
runPromptTokenCalculation = true runPromptTokenCalculation = true,
modelTag = ""
) { ) {
const stream = await func; const stream = await func;
stream.start = Date.now(); stream.start = Date.now();
@ -76,6 +78,7 @@ class LLMPerformanceMonitor {
total_tokens: 0, total_tokens: 0,
outputTps: 0, outputTps: 0,
duration: 0, duration: 0,
...(modelTag ? { model: modelTag } : {}),
}; };
stream.endMeasurement = (reportedUsage = {}) => { stream.endMeasurement = (reportedUsage = {}) => {
@ -88,6 +91,7 @@ class LLMPerformanceMonitor {
...stream.metrics, ...stream.metrics,
...reportedUsage, ...reportedUsage,
duration: reportedUsage?.duration ?? estimatedDuration, duration: reportedUsage?.duration ?? estimatedDuration,
timestamp: new Date(),
}; };
stream.metrics.total_tokens = stream.metrics.total_tokens =