add provider field to chats (#4959)
This commit is contained in:
parent
8e2efc63d0
commit
fef5bf06ec
@ -172,6 +172,7 @@ class AnthropicLLM {
|
||||
outputTps: completionTokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -194,6 +195,7 @@ class AnthropicLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -221,6 +221,7 @@ class ApiPieLLM {
|
||||
(result.output.usage?.completion_tokens || 0) / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -242,6 +243,7 @@ class ApiPieLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -15,6 +15,7 @@ class AzureOpenAiLLM {
|
||||
if (!process.env.AZURE_OPENAI_KEY)
|
||||
throw new Error("No Azure API key was set.");
|
||||
|
||||
this.className = "AzureOpenAiLLM";
|
||||
this.openai = new OpenAI({
|
||||
apiKey: process.env.AZURE_OPENAI_KEY,
|
||||
baseURL: AzureOpenAiLLM.formatBaseUrl(process.env.AZURE_OPENAI_ENDPOINT),
|
||||
@ -175,6 +176,7 @@ class AzureOpenAiLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -197,6 +199,7 @@ class AzureOpenAiLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -72,6 +72,7 @@ class AWSBedrockLLM {
|
||||
throw new Error(`Required environment variable ${envVar} is not set.`);
|
||||
}
|
||||
|
||||
this.className = "AWSBedrockLLM";
|
||||
this.model =
|
||||
modelPreference || process.env.AWS_BEDROCK_LLM_MODEL_PREFERENCE;
|
||||
|
||||
@ -448,6 +449,7 @@ class AWSBedrockLLM {
|
||||
outputTps: outputTps,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -493,6 +495,7 @@ class AWSBedrockLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
} catch (e) {
|
||||
|
||||
@ -125,6 +125,7 @@ class CohereLLM {
|
||||
outputTps: completionTokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -143,6 +144,7 @@ class CohereLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -226,6 +226,7 @@ class CometApiLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -247,6 +248,7 @@ class CometApiLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -131,6 +131,7 @@ class DeepSeekLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -152,6 +153,7 @@ class DeepSeekLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -166,6 +166,7 @@ class DellProAiStudioLLM {
|
||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -187,6 +188,7 @@ class DellProAiStudioLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -26,6 +26,7 @@ class DockerModelRunnerLLM {
|
||||
if (!process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF && !modelPreference)
|
||||
throw new Error("No Docker Model Runner Model Pref was set.");
|
||||
|
||||
this.className = "DockerModelRunnerLLM";
|
||||
this.dmr = new OpenAIApi({
|
||||
baseURL: parseDockerModelRunnerEndpoint(
|
||||
process.env.DOCKER_MODEL_RUNNER_BASE_PATH
|
||||
@ -173,6 +174,7 @@ class DockerModelRunnerLLM {
|
||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -194,6 +196,7 @@ class DockerModelRunnerLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -164,6 +164,7 @@ class FireworksAiLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -185,6 +186,7 @@ class FireworksAiLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -234,6 +234,7 @@ class FoundryLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -256,6 +257,7 @@ class FoundryLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -406,6 +406,7 @@ class GeminiLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -425,6 +426,7 @@ class GeminiLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -194,6 +194,7 @@ class GenericOpenAiLLM {
|
||||
(result.output?.usage?.completion_tokens || 0) / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -209,9 +210,9 @@ class GenericOpenAiLLM {
|
||||
max_tokens: this.maxTokens,
|
||||
}),
|
||||
messages,
|
||||
// runPromptTokenCalculation: true - There is not way to know if the generic provider connected is returning
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -23,6 +23,7 @@ class GiteeAILLM {
|
||||
throw new Error("No Gitee AI API key was set.");
|
||||
const { OpenAI: OpenAIApi } = require("openai");
|
||||
|
||||
this.className = "GiteeAILLM";
|
||||
this.openai = new OpenAIApi({
|
||||
apiKey: process.env.GITEE_AI_API_KEY,
|
||||
baseURL: "https://ai.gitee.com/v1",
|
||||
@ -48,27 +49,6 @@ class GiteeAILLM {
|
||||
console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
|
||||
}
|
||||
|
||||
// This checks if the .cached_at file has a timestamp that is more than 1Week (in millis)
|
||||
// from the current date. If it is, then we will refetch the API so that all the models are up
|
||||
// to date.
|
||||
#cacheIsStale() {
|
||||
const MAX_STALE = 6.048e8; // 1 Week in MS
|
||||
if (!fs.existsSync(this.cacheAtPath)) return true;
|
||||
const now = Number(new Date());
|
||||
const timestampMs = Number(fs.readFileSync(this.cacheAtPath));
|
||||
return now - timestampMs > MAX_STALE;
|
||||
}
|
||||
|
||||
// This function fetches the models from the GiteeAI API and caches them locally.
|
||||
async #syncModels() {
|
||||
if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale())
|
||||
return false;
|
||||
|
||||
this.log("Model cache is not present or stale. Fetching from GiteeAI API.");
|
||||
await giteeAiModels();
|
||||
return;
|
||||
}
|
||||
|
||||
models() {
|
||||
if (!fs.existsSync(this.cacheModelPath)) return {};
|
||||
return safeJsonParse(
|
||||
@ -171,6 +151,7 @@ class GiteeAILLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -187,6 +168,7 @@ class GiteeAILLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -11,6 +11,7 @@ class GroqLLM {
|
||||
constructor(embedder = null, modelPreference = null) {
|
||||
const { OpenAI: OpenAIApi } = require("openai");
|
||||
if (!process.env.GROQ_API_KEY) throw new Error("No Groq API key was set.");
|
||||
this.className = "GroqLLM";
|
||||
|
||||
this.openai = new OpenAIApi({
|
||||
baseURL: "https://api.groq.com/openai/v1",
|
||||
@ -204,6 +205,7 @@ class GroqLLM {
|
||||
result.output.usage.completion_time,
|
||||
duration: result.output.usage.total_time,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -225,6 +227,7 @@ class GroqLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -14,6 +14,7 @@ class HuggingFaceLLM {
|
||||
throw new Error("No HuggingFace Access Token was set.");
|
||||
const { OpenAI: OpenAIApi } = require("openai");
|
||||
|
||||
this.className = "HuggingFaceLLM";
|
||||
this.openai = new OpenAIApi({
|
||||
baseURL: `${process.env.HUGGING_FACE_LLM_ENDPOINT}/v1`,
|
||||
apiKey: process.env.HUGGING_FACE_LLM_API_KEY,
|
||||
@ -118,6 +119,7 @@ class HuggingFaceLLM {
|
||||
(result.output.usage?.completion_tokens || 0) / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -134,6 +136,7 @@ class HuggingFaceLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -161,6 +161,7 @@ class KoboldCPPLLM {
|
||||
outputTps: completionTokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -178,6 +179,7 @@ class KoboldCPPLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -155,6 +155,7 @@ class LiteLLM {
|
||||
(result.output.usage?.completion_tokens || 0) / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -170,9 +171,9 @@ class LiteLLM {
|
||||
max_tokens: parseInt(this.maxTokens), // LiteLLM requires int
|
||||
}),
|
||||
messages,
|
||||
// runPromptTokenCalculation: true - We manually count the tokens because they may or may not be provided in the stream
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -17,6 +17,7 @@ class LMStudioLLM {
|
||||
if (!process.env.LMSTUDIO_BASE_PATH)
|
||||
throw new Error("No LMStudio API Base Path was set.");
|
||||
|
||||
this.className = "LMStudioLLM";
|
||||
const apiKey = process.env.LMSTUDIO_AUTH_TOKEN ?? null;
|
||||
this.lmstudio = new OpenAIApi({
|
||||
baseURL: parseLMStudioBasePath(process.env.LMSTUDIO_BASE_PATH), // here is the URL to your LMStudio instance
|
||||
@ -240,6 +241,7 @@ class LMStudioLLM {
|
||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -261,6 +263,7 @@ class LMStudioLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -12,6 +12,7 @@ class LocalAiLLM {
|
||||
if (!process.env.LOCAL_AI_BASE_PATH)
|
||||
throw new Error("No LocalAI Base Path was set.");
|
||||
|
||||
this.className = "LocalAiLLM";
|
||||
const { OpenAI: OpenAIApi } = require("openai");
|
||||
this.openai = new OpenAIApi({
|
||||
baseURL: process.env.LOCAL_AI_BASE_PATH,
|
||||
@ -146,6 +147,7 @@ class LocalAiLLM {
|
||||
outputTps: completionTokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -167,6 +169,7 @@ class LocalAiLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -140,6 +140,7 @@ class MistralLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -161,6 +162,7 @@ class MistralLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -137,6 +137,7 @@ class MoonshotAiLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -153,6 +154,7 @@ class MoonshotAiLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -226,6 +226,7 @@ class NovitaLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -247,6 +248,7 @@ class NovitaLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -185,6 +185,7 @@ class NvidiaNimLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -206,6 +207,7 @@ class NvidiaNimLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -312,6 +312,7 @@ class OllamaAILLM {
|
||||
result.output.usage.completion_tokens / result.output.usage.duration,
|
||||
duration: result.output.usage.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -333,6 +334,7 @@ class OllamaAILLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
}).catch((e) => {
|
||||
throw this.#errorHandler(e);
|
||||
});
|
||||
|
||||
@ -176,6 +176,7 @@ class OpenAiLLM {
|
||||
: 0,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -198,6 +199,7 @@ class OpenAiLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -277,6 +277,7 @@ class OpenRouterLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -311,6 +312,7 @@ class OpenRouterLLM {
|
||||
// is not available.
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -18,6 +18,7 @@ class PerplexityLLM {
|
||||
if (!process.env.PERPLEXITY_API_KEY)
|
||||
throw new Error("No Perplexity API key was set.");
|
||||
|
||||
this.className = "PerplexityLLM";
|
||||
const { OpenAI: OpenAIApi } = require("openai");
|
||||
this.openai = new OpenAIApi({
|
||||
baseURL: "https://api.perplexity.ai",
|
||||
@ -118,6 +119,7 @@ class PerplexityLLM {
|
||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -139,6 +141,7 @@ class PerplexityLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -177,6 +177,7 @@ class PPIOLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -198,6 +199,7 @@ class PPIOLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -169,6 +169,7 @@ class PrivatemodeLLM {
|
||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -190,6 +191,7 @@ class PrivatemodeLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -137,6 +137,7 @@ class SambaNovaLLM {
|
||||
outputTps: result.output.usage?.total_tokens_per_sec || 0,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -156,6 +157,7 @@ class SambaNovaLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -151,6 +151,7 @@ class TextGenWebUILLM {
|
||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -167,6 +168,7 @@ class TextGenWebUILLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -82,6 +82,7 @@ class TogetherAiLLM {
|
||||
if (!process.env.TOGETHER_AI_API_KEY)
|
||||
throw new Error("No TogetherAI API key was set.");
|
||||
const { OpenAI: OpenAIApi } = require("openai");
|
||||
this.className = "TogetherAiLLM";
|
||||
this.openai = new OpenAIApi({
|
||||
baseURL: "https://api.together.xyz/v1",
|
||||
apiKey: process.env.TOGETHER_AI_API_KEY ?? null,
|
||||
@ -210,6 +211,7 @@ class TogetherAiLLM {
|
||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -231,6 +233,7 @@ class TogetherAiLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
@ -148,6 +148,7 @@ class XAiLLM {
|
||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -169,6 +170,7 @@ class XAiLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -137,6 +137,7 @@ class ZAiLLM {
|
||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
provider: this.className,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
@ -153,6 +154,7 @@ class ZAiLLM {
|
||||
messages,
|
||||
runPromptTokenCalculation: false,
|
||||
modelTag: this.model,
|
||||
provider: this.className,
|
||||
});
|
||||
|
||||
return measuredStreamRequest;
|
||||
|
||||
@ -62,6 +62,7 @@ class LLMPerformanceMonitor {
|
||||
* @param {Messages} [opts.messages=[]] - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream
|
||||
* @param {boolean} [opts.runPromptTokenCalculation=true] - whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream.
|
||||
* @param {string} [opts.modelTag=""] - the tag of the model that was used to generate the stream (eg: gpt-4o, claude-3-5-sonnet, qwen3/72b-instruct, etc.)
|
||||
* @param {string} [opts.provider=""] - the class name of the LLM that was used to generate the stream (eg: OpenAI, Anthropic, LMStudio, ApiPie, etc.)
|
||||
* @returns {Promise<MonitoredStream>}
|
||||
*/
|
||||
static async measureStream({
|
||||
@ -69,6 +70,7 @@ class LLMPerformanceMonitor {
|
||||
messages = [],
|
||||
runPromptTokenCalculation = true,
|
||||
modelTag = "",
|
||||
provider = "",
|
||||
}) {
|
||||
const stream = await func;
|
||||
stream.start = Date.now();
|
||||
@ -80,6 +82,7 @@ class LLMPerformanceMonitor {
|
||||
outputTps: 0,
|
||||
duration: 0,
|
||||
...(modelTag ? { model: modelTag } : {}),
|
||||
...(provider ? { provider: provider } : {}),
|
||||
};
|
||||
|
||||
stream.endMeasurement = (reportedUsage = {}) => {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user