diff --git a/server/utils/AiProviders/anthropic/index.js b/server/utils/AiProviders/anthropic/index.js index a5f31351..fd6a5504 100644 --- a/server/utils/AiProviders/anthropic/index.js +++ b/server/utils/AiProviders/anthropic/index.js @@ -172,6 +172,7 @@ class AnthropicLLM { outputTps: completionTokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -194,6 +195,7 @@ class AnthropicLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/apipie/index.js b/server/utils/AiProviders/apipie/index.js index 8a807c5a..6eb0629d 100644 --- a/server/utils/AiProviders/apipie/index.js +++ b/server/utils/AiProviders/apipie/index.js @@ -221,6 +221,7 @@ class ApiPieLLM { (result.output.usage?.completion_tokens || 0) / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -242,6 +243,7 @@ class ApiPieLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/azureOpenAi/index.js b/server/utils/AiProviders/azureOpenAi/index.js index ba825eea..a73e3a80 100644 --- a/server/utils/AiProviders/azureOpenAi/index.js +++ b/server/utils/AiProviders/azureOpenAi/index.js @@ -15,6 +15,7 @@ class AzureOpenAiLLM { if (!process.env.AZURE_OPENAI_KEY) throw new Error("No Azure API key was set."); + this.className = "AzureOpenAiLLM"; this.openai = new OpenAI({ apiKey: process.env.AZURE_OPENAI_KEY, baseURL: AzureOpenAiLLM.formatBaseUrl(process.env.AZURE_OPENAI_ENDPOINT), @@ -175,6 +176,7 @@ class AzureOpenAiLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -197,6 +199,7 @@ class AzureOpenAiLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/bedrock/index.js b/server/utils/AiProviders/bedrock/index.js index 119f3e88..6dc7de6e 100644 --- a/server/utils/AiProviders/bedrock/index.js +++ b/server/utils/AiProviders/bedrock/index.js @@ -72,6 +72,7 @@ class AWSBedrockLLM { throw new Error(`Required environment variable ${envVar} is not set.`); } + this.className = "AWSBedrockLLM"; this.model = modelPreference || process.env.AWS_BEDROCK_LLM_MODEL_PREFERENCE; @@ -448,6 +449,7 @@ class AWSBedrockLLM { outputTps: outputTps, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -493,6 +495,7 @@ class AWSBedrockLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } catch (e) { diff --git a/server/utils/AiProviders/cohere/index.js b/server/utils/AiProviders/cohere/index.js index aaf7a258..ef14d4c6 100644 --- a/server/utils/AiProviders/cohere/index.js +++ b/server/utils/AiProviders/cohere/index.js @@ -125,6 +125,7 @@ class CohereLLM { outputTps: completionTokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -143,6 +144,7 @@ class CohereLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/cometapi/index.js b/server/utils/AiProviders/cometapi/index.js index c9cb74b2..85032ac4 100644 --- a/server/utils/AiProviders/cometapi/index.js +++ b/server/utils/AiProviders/cometapi/index.js @@ -226,6 +226,7 @@ class CometApiLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -247,6 +248,7 @@ class CometApiLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/deepseek/index.js b/server/utils/AiProviders/deepseek/index.js index 136af490..0521d198 100644 --- a/server/utils/AiProviders/deepseek/index.js +++ b/server/utils/AiProviders/deepseek/index.js @@ -131,6 +131,7 @@ class DeepSeekLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -152,6 +153,7 @@ class DeepSeekLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/dellProAiStudio/index.js b/server/utils/AiProviders/dellProAiStudio/index.js index f8dc11f8..1c887d5b 100644 --- a/server/utils/AiProviders/dellProAiStudio/index.js +++ b/server/utils/AiProviders/dellProAiStudio/index.js @@ -166,6 +166,7 @@ class DellProAiStudioLLM { outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -187,6 +188,7 @@ class DellProAiStudioLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/dockerModelRunner/index.js b/server/utils/AiProviders/dockerModelRunner/index.js index 27faa6a1..8febe010 100644 --- a/server/utils/AiProviders/dockerModelRunner/index.js +++ b/server/utils/AiProviders/dockerModelRunner/index.js @@ -26,6 +26,7 @@ class DockerModelRunnerLLM { if (!process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF && !modelPreference) throw new Error("No Docker Model Runner Model Pref was set."); + this.className = "DockerModelRunnerLLM"; this.dmr = new OpenAIApi({ baseURL: parseDockerModelRunnerEndpoint( process.env.DOCKER_MODEL_RUNNER_BASE_PATH @@ -173,6 +174,7 @@ class DockerModelRunnerLLM { outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -194,6 +196,7 @@ class DockerModelRunnerLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/fireworksAi/index.js b/server/utils/AiProviders/fireworksAi/index.js index 6f3ec426..48cb08cf 100644 --- a/server/utils/AiProviders/fireworksAi/index.js +++ b/server/utils/AiProviders/fireworksAi/index.js @@ -164,6 +164,7 @@ class FireworksAiLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -185,6 +186,7 @@ class FireworksAiLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/foundry/index.js b/server/utils/AiProviders/foundry/index.js index ed91c717..b503e212 100644 --- a/server/utils/AiProviders/foundry/index.js +++ b/server/utils/AiProviders/foundry/index.js @@ -234,6 +234,7 @@ class FoundryLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -256,6 +257,7 @@ class FoundryLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/gemini/index.js b/server/utils/AiProviders/gemini/index.js index 91f25441..9ac2b9f5 100644 --- a/server/utils/AiProviders/gemini/index.js +++ b/server/utils/AiProviders/gemini/index.js @@ -406,6 +406,7 @@ class GeminiLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -425,6 +426,7 @@ class GeminiLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/genericOpenAi/index.js b/server/utils/AiProviders/genericOpenAi/index.js index ed5fba28..7572a535 100644 --- a/server/utils/AiProviders/genericOpenAi/index.js +++ b/server/utils/AiProviders/genericOpenAi/index.js @@ -194,6 +194,7 @@ class GenericOpenAiLLM { (result.output?.usage?.completion_tokens || 0) / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -209,9 +210,9 @@ class GenericOpenAiLLM { max_tokens: this.maxTokens, }), messages, - // runPromptTokenCalculation: true - There is not way to know if the generic provider connected is returning runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/giteeai/index.js b/server/utils/AiProviders/giteeai/index.js index e5849f28..8be36e29 100644 --- a/server/utils/AiProviders/giteeai/index.js +++ b/server/utils/AiProviders/giteeai/index.js @@ -23,6 +23,7 @@ class GiteeAILLM { throw new Error("No Gitee AI API key was set."); const { OpenAI: OpenAIApi } = require("openai"); + this.className = "GiteeAILLM"; this.openai = new OpenAIApi({ apiKey: process.env.GITEE_AI_API_KEY, baseURL: "https://ai.gitee.com/v1", @@ -48,27 +49,6 @@ class GiteeAILLM { console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args); } - // This checks if the .cached_at file has a timestamp that is more than 1Week (in millis) - // from the current date. If it is, then we will refetch the API so that all the models are up - // to date. - #cacheIsStale() { - const MAX_STALE = 6.048e8; // 1 Week in MS - if (!fs.existsSync(this.cacheAtPath)) return true; - const now = Number(new Date()); - const timestampMs = Number(fs.readFileSync(this.cacheAtPath)); - return now - timestampMs > MAX_STALE; - } - - // This function fetches the models from the GiteeAI API and caches them locally. - async #syncModels() { - if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale()) - return false; - - this.log("Model cache is not present or stale. Fetching from GiteeAI API."); - await giteeAiModels(); - return; - } - models() { if (!fs.existsSync(this.cacheModelPath)) return {}; return safeJsonParse( @@ -171,6 +151,7 @@ class GiteeAILLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -187,6 +168,7 @@ class GiteeAILLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/groq/index.js b/server/utils/AiProviders/groq/index.js index a05f6f5b..b697ab13 100644 --- a/server/utils/AiProviders/groq/index.js +++ b/server/utils/AiProviders/groq/index.js @@ -11,6 +11,7 @@ class GroqLLM { constructor(embedder = null, modelPreference = null) { const { OpenAI: OpenAIApi } = require("openai"); if (!process.env.GROQ_API_KEY) throw new Error("No Groq API key was set."); + this.className = "GroqLLM"; this.openai = new OpenAIApi({ baseURL: "https://api.groq.com/openai/v1", @@ -204,6 +205,7 @@ class GroqLLM { result.output.usage.completion_time, duration: result.output.usage.total_time, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -225,6 +227,7 @@ class GroqLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/huggingface/index.js b/server/utils/AiProviders/huggingface/index.js index f84e0273..2f5dc591 100644 --- a/server/utils/AiProviders/huggingface/index.js +++ b/server/utils/AiProviders/huggingface/index.js @@ -14,6 +14,7 @@ class HuggingFaceLLM { throw new Error("No HuggingFace Access Token was set."); const { OpenAI: OpenAIApi } = require("openai"); + this.className = "HuggingFaceLLM"; this.openai = new OpenAIApi({ baseURL: `${process.env.HUGGING_FACE_LLM_ENDPOINT}/v1`, apiKey: process.env.HUGGING_FACE_LLM_API_KEY, @@ -118,6 +119,7 @@ class HuggingFaceLLM { (result.output.usage?.completion_tokens || 0) / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -134,6 +136,7 @@ class HuggingFaceLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/koboldCPP/index.js b/server/utils/AiProviders/koboldCPP/index.js index a973d664..fa349236 100644 --- a/server/utils/AiProviders/koboldCPP/index.js +++ b/server/utils/AiProviders/koboldCPP/index.js @@ -161,6 +161,7 @@ class KoboldCPPLLM { outputTps: completionTokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -178,6 +179,7 @@ class KoboldCPPLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/liteLLM/index.js b/server/utils/AiProviders/liteLLM/index.js index 65e20151..5ff8bbe5 100644 --- a/server/utils/AiProviders/liteLLM/index.js +++ b/server/utils/AiProviders/liteLLM/index.js @@ -155,6 +155,7 @@ class LiteLLM { (result.output.usage?.completion_tokens || 0) / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -170,9 +171,9 @@ class LiteLLM { max_tokens: parseInt(this.maxTokens), // LiteLLM requires int }), messages, - // runPromptTokenCalculation: true - We manually count the tokens because they may or may not be provided in the stream runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/lmStudio/index.js b/server/utils/AiProviders/lmStudio/index.js index 1327dc66..78a5a792 100644 --- a/server/utils/AiProviders/lmStudio/index.js +++ b/server/utils/AiProviders/lmStudio/index.js @@ -17,6 +17,7 @@ class LMStudioLLM { if (!process.env.LMSTUDIO_BASE_PATH) throw new Error("No LMStudio API Base Path was set."); + this.className = "LMStudioLLM"; const apiKey = process.env.LMSTUDIO_AUTH_TOKEN ?? null; this.lmstudio = new OpenAIApi({ baseURL: parseLMStudioBasePath(process.env.LMSTUDIO_BASE_PATH), // here is the URL to your LMStudio instance @@ -240,6 +241,7 @@ class LMStudioLLM { outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -261,6 +263,7 @@ class LMStudioLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/localAi/index.js b/server/utils/AiProviders/localAi/index.js index 54f061c9..bc723e75 100644 --- a/server/utils/AiProviders/localAi/index.js +++ b/server/utils/AiProviders/localAi/index.js @@ -12,6 +12,7 @@ class LocalAiLLM { if (!process.env.LOCAL_AI_BASE_PATH) throw new Error("No LocalAI Base Path was set."); + this.className = "LocalAiLLM"; const { OpenAI: OpenAIApi } = require("openai"); this.openai = new OpenAIApi({ baseURL: process.env.LOCAL_AI_BASE_PATH, @@ -146,6 +147,7 @@ class LocalAiLLM { outputTps: completionTokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -167,6 +169,7 @@ class LocalAiLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/mistral/index.js b/server/utils/AiProviders/mistral/index.js index 89849f04..07298ef9 100644 --- a/server/utils/AiProviders/mistral/index.js +++ b/server/utils/AiProviders/mistral/index.js @@ -140,6 +140,7 @@ class MistralLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -161,6 +162,7 @@ class MistralLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/moonshotAi/index.js b/server/utils/AiProviders/moonshotAi/index.js index 273c7449..17e624f9 100644 --- a/server/utils/AiProviders/moonshotAi/index.js +++ b/server/utils/AiProviders/moonshotAi/index.js @@ -137,6 +137,7 @@ class MoonshotAiLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -153,6 +154,7 @@ class MoonshotAiLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/novita/index.js b/server/utils/AiProviders/novita/index.js index 84b1c783..957605a9 100644 --- a/server/utils/AiProviders/novita/index.js +++ b/server/utils/AiProviders/novita/index.js @@ -226,6 +226,7 @@ class NovitaLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -247,6 +248,7 @@ class NovitaLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/nvidiaNim/index.js b/server/utils/AiProviders/nvidiaNim/index.js index 6ff1dd38..63e309a4 100644 --- a/server/utils/AiProviders/nvidiaNim/index.js +++ b/server/utils/AiProviders/nvidiaNim/index.js @@ -185,6 +185,7 @@ class NvidiaNimLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -206,6 +207,7 @@ class NvidiaNimLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js index cb733e02..e9a71115 100644 --- a/server/utils/AiProviders/ollama/index.js +++ b/server/utils/AiProviders/ollama/index.js @@ -312,6 +312,7 @@ class OllamaAILLM { result.output.usage.completion_tokens / result.output.usage.duration, duration: result.output.usage.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -333,6 +334,7 @@ class OllamaAILLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }).catch((e) => { throw this.#errorHandler(e); }); diff --git a/server/utils/AiProviders/openAi/index.js b/server/utils/AiProviders/openAi/index.js index 2420aa29..55fed492 100644 --- a/server/utils/AiProviders/openAi/index.js +++ b/server/utils/AiProviders/openAi/index.js @@ -176,6 +176,7 @@ class OpenAiLLM { : 0, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -198,6 +199,7 @@ class OpenAiLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/openRouter/index.js b/server/utils/AiProviders/openRouter/index.js index 23832700..c8e12748 100644 --- a/server/utils/AiProviders/openRouter/index.js +++ b/server/utils/AiProviders/openRouter/index.js @@ -277,6 +277,7 @@ class OpenRouterLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -311,6 +312,7 @@ class OpenRouterLLM { // is not available. runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/perplexity/index.js b/server/utils/AiProviders/perplexity/index.js index 01fc566b..2a1b4f27 100644 --- a/server/utils/AiProviders/perplexity/index.js +++ b/server/utils/AiProviders/perplexity/index.js @@ -18,6 +18,7 @@ class PerplexityLLM { if (!process.env.PERPLEXITY_API_KEY) throw new Error("No Perplexity API key was set."); + this.className = "PerplexityLLM"; const { OpenAI: OpenAIApi } = require("openai"); this.openai = new OpenAIApi({ baseURL: "https://api.perplexity.ai", @@ -118,6 +119,7 @@ class PerplexityLLM { outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -139,6 +141,7 @@ class PerplexityLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/ppio/index.js b/server/utils/AiProviders/ppio/index.js index 81325157..f10ede2e 100644 --- a/server/utils/AiProviders/ppio/index.js +++ b/server/utils/AiProviders/ppio/index.js @@ -177,6 +177,7 @@ class PPIOLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -198,6 +199,7 @@ class PPIOLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/privatemode/index.js b/server/utils/AiProviders/privatemode/index.js index 47e8f80e..7f71ec2b 100644 --- a/server/utils/AiProviders/privatemode/index.js +++ b/server/utils/AiProviders/privatemode/index.js @@ -169,6 +169,7 @@ class PrivatemodeLLM { outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -190,6 +191,7 @@ class PrivatemodeLLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/sambanova/index.js b/server/utils/AiProviders/sambanova/index.js index 49ae1614..4cec536d 100644 --- a/server/utils/AiProviders/sambanova/index.js +++ b/server/utils/AiProviders/sambanova/index.js @@ -137,6 +137,7 @@ class SambaNovaLLM { outputTps: result.output.usage?.total_tokens_per_sec || 0, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -156,6 +157,7 @@ class SambaNovaLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/textGenWebUI/index.js b/server/utils/AiProviders/textGenWebUI/index.js index 253f9ef1..7d7cac22 100644 --- a/server/utils/AiProviders/textGenWebUI/index.js +++ b/server/utils/AiProviders/textGenWebUI/index.js @@ -151,6 +151,7 @@ class TextGenWebUILLM { outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -167,6 +168,7 @@ class TextGenWebUILLM { messages, runPromptTokenCalculation: true, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/togetherAi/index.js b/server/utils/AiProviders/togetherAi/index.js index 9ab5b827..f9571644 100644 --- a/server/utils/AiProviders/togetherAi/index.js +++ b/server/utils/AiProviders/togetherAi/index.js @@ -82,6 +82,7 @@ class TogetherAiLLM { if (!process.env.TOGETHER_AI_API_KEY) throw new Error("No TogetherAI API key was set."); const { OpenAI: OpenAIApi } = require("openai"); + this.className = "TogetherAiLLM"; this.openai = new OpenAIApi({ baseURL: "https://api.together.xyz/v1", apiKey: process.env.TOGETHER_AI_API_KEY ?? null, @@ -210,6 +211,7 @@ class TogetherAiLLM { outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -231,6 +233,7 @@ class TogetherAiLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; } diff --git a/server/utils/AiProviders/xai/index.js b/server/utils/AiProviders/xai/index.js index dbb21dab..ae5eff68 100644 --- a/server/utils/AiProviders/xai/index.js +++ b/server/utils/AiProviders/xai/index.js @@ -148,6 +148,7 @@ class XAiLLM { outputTps: result.output.usage.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -169,6 +170,7 @@ class XAiLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/AiProviders/zai/index.js b/server/utils/AiProviders/zai/index.js index be550268..40480897 100644 --- a/server/utils/AiProviders/zai/index.js +++ b/server/utils/AiProviders/zai/index.js @@ -137,6 +137,7 @@ class ZAiLLM { outputTps: result.output.usage?.completion_tokens / result.duration, duration: result.duration, model: this.model, + provider: this.className, timestamp: new Date(), }, }; @@ -153,6 +154,7 @@ class ZAiLLM { messages, runPromptTokenCalculation: false, modelTag: this.model, + provider: this.className, }); return measuredStreamRequest; diff --git a/server/utils/helpers/chat/LLMPerformanceMonitor.js b/server/utils/helpers/chat/LLMPerformanceMonitor.js index 1423c672..e30d300a 100644 --- a/server/utils/helpers/chat/LLMPerformanceMonitor.js +++ b/server/utils/helpers/chat/LLMPerformanceMonitor.js @@ -62,6 +62,7 @@ class LLMPerformanceMonitor { * @param {Messages} [opts.messages=[]] - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream * @param {boolean} [opts.runPromptTokenCalculation=true] - whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream. * @param {string} [opts.modelTag=""] - the tag of the model that was used to generate the stream (eg: gpt-4o, claude-3-5-sonnet, qwen3/72b-instruct, etc.) + * @param {string} [opts.provider=""] - the class name of the LLM that was used to generate the stream (eg: OpenAI, Anthropic, LMStudio, ApiPie, etc.) * @returns {Promise} */ static async measureStream({ @@ -69,6 +70,7 @@ class LLMPerformanceMonitor { messages = [], runPromptTokenCalculation = true, modelTag = "", + provider = "", }) { const stream = await func; stream.start = Date.now(); @@ -80,6 +82,7 @@ class LLMPerformanceMonitor { outputTps: 0, duration: 0, ...(modelTag ? { model: modelTag } : {}), + ...(provider ? { provider: provider } : {}), }; stream.endMeasurement = (reportedUsage = {}) => {