add provider field to chats (#4959)

This commit is contained in:
Timothy Carambat 2026-02-02 20:11:18 -08:00 committed by GitHub
parent 8e2efc63d0
commit fef5bf06ec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
36 changed files with 83 additions and 23 deletions

View File

@ -172,6 +172,7 @@ class AnthropicLLM {
outputTps: completionTokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -194,6 +195,7 @@ class AnthropicLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -221,6 +221,7 @@ class ApiPieLLM {
(result.output.usage?.completion_tokens || 0) / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -242,6 +243,7 @@ class ApiPieLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -15,6 +15,7 @@ class AzureOpenAiLLM {
if (!process.env.AZURE_OPENAI_KEY)
throw new Error("No Azure API key was set.");
this.className = "AzureOpenAiLLM";
this.openai = new OpenAI({
apiKey: process.env.AZURE_OPENAI_KEY,
baseURL: AzureOpenAiLLM.formatBaseUrl(process.env.AZURE_OPENAI_ENDPOINT),
@ -175,6 +176,7 @@ class AzureOpenAiLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -197,6 +199,7 @@ class AzureOpenAiLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -72,6 +72,7 @@ class AWSBedrockLLM {
throw new Error(`Required environment variable ${envVar} is not set.`);
}
this.className = "AWSBedrockLLM";
this.model =
modelPreference || process.env.AWS_BEDROCK_LLM_MODEL_PREFERENCE;
@ -448,6 +449,7 @@ class AWSBedrockLLM {
outputTps: outputTps,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -493,6 +495,7 @@ class AWSBedrockLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
} catch (e) {

View File

@ -125,6 +125,7 @@ class CohereLLM {
outputTps: completionTokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -143,6 +144,7 @@ class CohereLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -226,6 +226,7 @@ class CometApiLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -247,6 +248,7 @@ class CometApiLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -131,6 +131,7 @@ class DeepSeekLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -152,6 +153,7 @@ class DeepSeekLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -166,6 +166,7 @@ class DellProAiStudioLLM {
outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -187,6 +188,7 @@ class DellProAiStudioLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -26,6 +26,7 @@ class DockerModelRunnerLLM {
if (!process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF && !modelPreference)
throw new Error("No Docker Model Runner Model Pref was set.");
this.className = "DockerModelRunnerLLM";
this.dmr = new OpenAIApi({
baseURL: parseDockerModelRunnerEndpoint(
process.env.DOCKER_MODEL_RUNNER_BASE_PATH
@ -173,6 +174,7 @@ class DockerModelRunnerLLM {
outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -194,6 +196,7 @@ class DockerModelRunnerLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -164,6 +164,7 @@ class FireworksAiLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -185,6 +186,7 @@ class FireworksAiLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -234,6 +234,7 @@ class FoundryLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -256,6 +257,7 @@ class FoundryLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -406,6 +406,7 @@ class GeminiLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -425,6 +426,7 @@ class GeminiLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -194,6 +194,7 @@ class GenericOpenAiLLM {
(result.output?.usage?.completion_tokens || 0) / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -209,9 +210,9 @@ class GenericOpenAiLLM {
max_tokens: this.maxTokens,
}),
messages,
// runPromptTokenCalculation: true - There is not way to know if the generic provider connected is returning
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -23,6 +23,7 @@ class GiteeAILLM {
throw new Error("No Gitee AI API key was set.");
const { OpenAI: OpenAIApi } = require("openai");
this.className = "GiteeAILLM";
this.openai = new OpenAIApi({
apiKey: process.env.GITEE_AI_API_KEY,
baseURL: "https://ai.gitee.com/v1",
@ -48,27 +49,6 @@ class GiteeAILLM {
console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
}
// This checks if the .cached_at file has a timestamp that is more than 1Week (in millis)
// from the current date. If it is, then we will refetch the API so that all the models are up
// to date.
#cacheIsStale() {
const MAX_STALE = 6.048e8; // 1 Week in MS
if (!fs.existsSync(this.cacheAtPath)) return true;
const now = Number(new Date());
const timestampMs = Number(fs.readFileSync(this.cacheAtPath));
return now - timestampMs > MAX_STALE;
}
// This function fetches the models from the GiteeAI API and caches them locally.
async #syncModels() {
if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale())
return false;
this.log("Model cache is not present or stale. Fetching from GiteeAI API.");
await giteeAiModels();
return;
}
models() {
if (!fs.existsSync(this.cacheModelPath)) return {};
return safeJsonParse(
@ -171,6 +151,7 @@ class GiteeAILLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -187,6 +168,7 @@ class GiteeAILLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -11,6 +11,7 @@ class GroqLLM {
constructor(embedder = null, modelPreference = null) {
const { OpenAI: OpenAIApi } = require("openai");
if (!process.env.GROQ_API_KEY) throw new Error("No Groq API key was set.");
this.className = "GroqLLM";
this.openai = new OpenAIApi({
baseURL: "https://api.groq.com/openai/v1",
@ -204,6 +205,7 @@ class GroqLLM {
result.output.usage.completion_time,
duration: result.output.usage.total_time,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -225,6 +227,7 @@ class GroqLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -14,6 +14,7 @@ class HuggingFaceLLM {
throw new Error("No HuggingFace Access Token was set.");
const { OpenAI: OpenAIApi } = require("openai");
this.className = "HuggingFaceLLM";
this.openai = new OpenAIApi({
baseURL: `${process.env.HUGGING_FACE_LLM_ENDPOINT}/v1`,
apiKey: process.env.HUGGING_FACE_LLM_API_KEY,
@ -118,6 +119,7 @@ class HuggingFaceLLM {
(result.output.usage?.completion_tokens || 0) / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -134,6 +136,7 @@ class HuggingFaceLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -161,6 +161,7 @@ class KoboldCPPLLM {
outputTps: completionTokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -178,6 +179,7 @@ class KoboldCPPLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -155,6 +155,7 @@ class LiteLLM {
(result.output.usage?.completion_tokens || 0) / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -170,9 +171,9 @@ class LiteLLM {
max_tokens: parseInt(this.maxTokens), // LiteLLM requires int
}),
messages,
// runPromptTokenCalculation: true - We manually count the tokens because they may or may not be provided in the stream
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -17,6 +17,7 @@ class LMStudioLLM {
if (!process.env.LMSTUDIO_BASE_PATH)
throw new Error("No LMStudio API Base Path was set.");
this.className = "LMStudioLLM";
const apiKey = process.env.LMSTUDIO_AUTH_TOKEN ?? null;
this.lmstudio = new OpenAIApi({
baseURL: parseLMStudioBasePath(process.env.LMSTUDIO_BASE_PATH), // here is the URL to your LMStudio instance
@ -240,6 +241,7 @@ class LMStudioLLM {
outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -261,6 +263,7 @@ class LMStudioLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -12,6 +12,7 @@ class LocalAiLLM {
if (!process.env.LOCAL_AI_BASE_PATH)
throw new Error("No LocalAI Base Path was set.");
this.className = "LocalAiLLM";
const { OpenAI: OpenAIApi } = require("openai");
this.openai = new OpenAIApi({
baseURL: process.env.LOCAL_AI_BASE_PATH,
@ -146,6 +147,7 @@ class LocalAiLLM {
outputTps: completionTokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -167,6 +169,7 @@ class LocalAiLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -140,6 +140,7 @@ class MistralLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -161,6 +162,7 @@ class MistralLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -137,6 +137,7 @@ class MoonshotAiLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -153,6 +154,7 @@ class MoonshotAiLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -226,6 +226,7 @@ class NovitaLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -247,6 +248,7 @@ class NovitaLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -185,6 +185,7 @@ class NvidiaNimLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -206,6 +207,7 @@ class NvidiaNimLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -312,6 +312,7 @@ class OllamaAILLM {
result.output.usage.completion_tokens / result.output.usage.duration,
duration: result.output.usage.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -333,6 +334,7 @@ class OllamaAILLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
}).catch((e) => {
throw this.#errorHandler(e);
});

View File

@ -176,6 +176,7 @@ class OpenAiLLM {
: 0,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -198,6 +199,7 @@ class OpenAiLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -277,6 +277,7 @@ class OpenRouterLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -311,6 +312,7 @@ class OpenRouterLLM {
// is not available.
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -18,6 +18,7 @@ class PerplexityLLM {
if (!process.env.PERPLEXITY_API_KEY)
throw new Error("No Perplexity API key was set.");
this.className = "PerplexityLLM";
const { OpenAI: OpenAIApi } = require("openai");
this.openai = new OpenAIApi({
baseURL: "https://api.perplexity.ai",
@ -118,6 +119,7 @@ class PerplexityLLM {
outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -139,6 +141,7 @@ class PerplexityLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -177,6 +177,7 @@ class PPIOLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -198,6 +199,7 @@ class PPIOLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -169,6 +169,7 @@ class PrivatemodeLLM {
outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -190,6 +191,7 @@ class PrivatemodeLLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -137,6 +137,7 @@ class SambaNovaLLM {
outputTps: result.output.usage?.total_tokens_per_sec || 0,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -156,6 +157,7 @@ class SambaNovaLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -151,6 +151,7 @@ class TextGenWebUILLM {
outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -167,6 +168,7 @@ class TextGenWebUILLM {
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -82,6 +82,7 @@ class TogetherAiLLM {
if (!process.env.TOGETHER_AI_API_KEY)
throw new Error("No TogetherAI API key was set.");
const { OpenAI: OpenAIApi } = require("openai");
this.className = "TogetherAiLLM";
this.openai = new OpenAIApi({
baseURL: "https://api.together.xyz/v1",
apiKey: process.env.TOGETHER_AI_API_KEY ?? null,
@ -210,6 +211,7 @@ class TogetherAiLLM {
outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -231,6 +233,7 @@ class TogetherAiLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;
}

View File

@ -148,6 +148,7 @@ class XAiLLM {
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -169,6 +170,7 @@ class XAiLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -137,6 +137,7 @@ class ZAiLLM {
outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
provider: this.className,
timestamp: new Date(),
},
};
@ -153,6 +154,7 @@ class ZAiLLM {
messages,
runPromptTokenCalculation: false,
modelTag: this.model,
provider: this.className,
});
return measuredStreamRequest;

View File

@ -62,6 +62,7 @@ class LLMPerformanceMonitor {
* @param {Messages} [opts.messages=[]] - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream
* @param {boolean} [opts.runPromptTokenCalculation=true] - whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream.
* @param {string} [opts.modelTag=""] - the tag of the model that was used to generate the stream (eg: gpt-4o, claude-3-5-sonnet, qwen3/72b-instruct, etc.)
* @param {string} [opts.provider=""] - the class name of the LLM that was used to generate the stream (eg: OpenAI, Anthropic, LMStudio, ApiPie, etc.)
* @returns {Promise<MonitoredStream>}
*/
static async measureStream({
@ -69,6 +70,7 @@ class LLMPerformanceMonitor {
messages = [],
runPromptTokenCalculation = true,
modelTag = "",
provider = "",
}) {
const stream = await func;
stream.start = Date.now();
@ -80,6 +82,7 @@ class LLMPerformanceMonitor {
outputTps: 0,
duration: 0,
...(modelTag ? { model: modelTag } : {}),
...(provider ? { provider: provider } : {}),
};
stream.endMeasurement = (reportedUsage = {}) => {