4601 log model on response (#4781)
* add model tag to chatCompletion * add modelTag `model` to async streaming keeps default arguments for prompt token calculation where applied via explict arg * fix HF default arg * render all performance metrics as available for backward compatibility add `timestamp` to both sync/async chat methods * extract metrics string to function
This commit is contained in:
parent
bc3ad06de4
commit
664f466e3f
@ -1,3 +1,4 @@
|
|||||||
|
import { formatDateTimeAsMoment } from "@/utils/directories";
|
||||||
import { numberWithCommas } from "@/utils/numbers";
|
import { numberWithCommas } from "@/utils/numbers";
|
||||||
import React, { useEffect, useState, useContext } from "react";
|
import React, { useEffect, useState, useContext } from "react";
|
||||||
const MetricsContext = React.createContext();
|
const MetricsContext = React.createContext();
|
||||||
@ -41,6 +42,26 @@ function getAutoShowMetrics() {
|
|||||||
return window?.localStorage?.getItem(SHOW_METRICS_KEY) === "true";
|
return window?.localStorage?.getItem(SHOW_METRICS_KEY) === "true";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the metrics string for a given metrics object
|
||||||
|
* - Model name
|
||||||
|
* - Duration and output TPS
|
||||||
|
* - Timestamp
|
||||||
|
* @param {metrics: {duration:number, outputTps: number, model?: string, timestamp?: number}} metrics
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
function buildMetricsString(metrics = {}) {
|
||||||
|
return [
|
||||||
|
metrics?.model ? metrics.model : "",
|
||||||
|
`${formatDuration(metrics.duration)} (${formatTps(metrics.outputTps)} tok/s)`,
|
||||||
|
metrics?.timestamp
|
||||||
|
? formatDateTimeAsMoment(metrics.timestamp, "MMM D, h:mm A")
|
||||||
|
: "",
|
||||||
|
]
|
||||||
|
.filter(Boolean)
|
||||||
|
.join(" · ");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Toggle the show metrics setting in localStorage `anythingllm_show_chat_metrics` key
|
* Toggle the show metrics setting in localStorage `anythingllm_show_chat_metrics` key
|
||||||
* @returns {void}
|
* @returns {void}
|
||||||
@ -88,7 +109,7 @@ export function MetricsProvider({ children }) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Render the metrics for a given chat, if available
|
* Render the metrics for a given chat, if available
|
||||||
* @param {metrics: {duration:number, outputTps: number}} props
|
* @param {metrics: {duration:number, outputTps: number, model: string, timestamp: number}} props
|
||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
export default function RenderMetrics({ metrics = {} }) {
|
export default function RenderMetrics({ metrics = {} }) {
|
||||||
@ -110,8 +131,7 @@ export default function RenderMetrics({ metrics = {} }) {
|
|||||||
className={`border-none flex justify-end items-center gap-x-[8px] ${showMetricsAutomatically ? "opacity-100" : "opacity-0"} md:group-hover:opacity-100 transition-all duration-300`}
|
className={`border-none flex justify-end items-center gap-x-[8px] ${showMetricsAutomatically ? "opacity-100" : "opacity-0"} md:group-hover:opacity-100 transition-all duration-300`}
|
||||||
>
|
>
|
||||||
<p className="cursor-pointer text-xs font-mono text-theme-text-secondary opacity-50">
|
<p className="cursor-pointer text-xs font-mono text-theme-text-secondary opacity-50">
|
||||||
{formatDuration(metrics.duration)} ({formatTps(metrics.outputTps)}{" "}
|
{buildMetricsString(metrics)}
|
||||||
tok/s)
|
|
||||||
</p>
|
</p>
|
||||||
</button>
|
</button>
|
||||||
);
|
);
|
||||||
|
|||||||
@ -171,6 +171,8 @@ class AnthropicLLM {
|
|||||||
total_tokens: promptTokens + completionTokens,
|
total_tokens: promptTokens + completionTokens,
|
||||||
outputTps: completionTokens / result.duration,
|
outputTps: completionTokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@ -190,7 +192,8 @@ class AnthropicLLM {
|
|||||||
temperature: Number(temperature ?? this.defaultTemp),
|
temperature: Number(temperature ?? this.defaultTemp),
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -220,6 +220,8 @@ class ApiPieLLM {
|
|||||||
outputTps:
|
outputTps:
|
||||||
(result.output.usage?.completion_tokens || 0) / result.duration,
|
(result.output.usage?.completion_tokens || 0) / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -237,7 +239,9 @@ class ApiPieLLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -174,6 +174,8 @@ class AzureOpenAiLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -192,7 +194,9 @@ class AzureOpenAiLLM {
|
|||||||
n: 1,
|
n: 1,
|
||||||
stream: true,
|
stream: true,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -423,9 +423,7 @@ class AWSBedrockLLM {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
throw new Error(`AWSBedrock::getChatCompletion failed. ${e.message}`);
|
throw new Error(`AWSBedrock::getChatCompletion failed. ${e.message}`);
|
||||||
}),
|
})
|
||||||
messages,
|
|
||||||
false
|
|
||||||
);
|
);
|
||||||
|
|
||||||
const response = result.output;
|
const response = result.output;
|
||||||
@ -450,6 +448,8 @@ class AWSBedrockLLM {
|
|||||||
total_tokens: response?.usage?.totalTokens ?? 0,
|
total_tokens: response?.usage?.totalTokens ?? 0,
|
||||||
outputTps: outputTps,
|
outputTps: outputTps,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -492,7 +492,8 @@ class AWSBedrockLLM {
|
|||||||
const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
|
const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
|
||||||
stream,
|
stream,
|
||||||
messages,
|
messages,
|
||||||
false // Indicate it's not a function call measurement
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|||||||
@ -124,6 +124,8 @@ class CohereLLM {
|
|||||||
total_tokens: promptTokens + completionTokens,
|
total_tokens: promptTokens + completionTokens,
|
||||||
outputTps: completionTokens / result.duration,
|
outputTps: completionTokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -139,7 +141,8 @@ class CohereLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -225,6 +225,8 @@ class CometApiLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -242,7 +244,9 @@ class CometApiLLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -130,6 +130,8 @@ class DeepSeekLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -148,7 +150,8 @@ class DeepSeekLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -165,6 +165,8 @@ class DellProAiStudioLLM {
|
|||||||
total_tokens: result.output.usage?.total_tokens || 0,
|
total_tokens: result.output.usage?.total_tokens || 0,
|
||||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -182,7 +184,9 @@ class DellProAiStudioLLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -163,6 +163,8 @@ class FireworksAiLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -181,7 +183,8 @@ class FireworksAiLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -224,6 +224,8 @@ class FoundryLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -242,7 +244,9 @@ class FoundryLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
max_completion_tokens: this.promptWindowLimit(),
|
max_completion_tokens: this.promptWindowLimit(),
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -405,6 +405,8 @@ class GeminiLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -421,7 +423,8 @@ class GeminiLLM {
|
|||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -193,6 +193,8 @@ class GenericOpenAiLLM {
|
|||||||
outputTps:
|
outputTps:
|
||||||
(result.output?.usage?.completion_tokens || 0) / result.duration,
|
(result.output?.usage?.completion_tokens || 0) / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -206,9 +208,10 @@ class GenericOpenAiLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
max_tokens: this.maxTokens,
|
max_tokens: this.maxTokens,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
// runPromptTokenCalculation: true - There is not way to know if the generic provider connected is returning
|
// runPromptTokenCalculation: true - There is not way to know if the generic provider connected is returning
|
||||||
// the correct usage metrics if any at all since any provider could be connected.
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -170,6 +170,8 @@ class GiteeAILLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -183,7 +185,8 @@ class GiteeAILLM {
|
|||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -203,6 +203,8 @@ class GroqLLM {
|
|||||||
result.output.usage.completion_tokens /
|
result.output.usage.completion_tokens /
|
||||||
result.output.usage.completion_time,
|
result.output.usage.completion_time,
|
||||||
duration: result.output.usage.total_time,
|
duration: result.output.usage.total_time,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -221,7 +223,8 @@ class GroqLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -117,6 +117,8 @@ class HuggingFaceLLM {
|
|||||||
outputTps:
|
outputTps:
|
||||||
(result.output.usage?.completion_tokens || 0) / result.duration,
|
(result.output.usage?.completion_tokens || 0) / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -129,7 +131,9 @@ class HuggingFaceLLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -160,6 +160,8 @@ class KoboldCPPLLM {
|
|||||||
total_tokens: promptTokens + completionTokens,
|
total_tokens: promptTokens + completionTokens,
|
||||||
outputTps: completionTokens / result.duration,
|
outputTps: completionTokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -173,7 +175,9 @@ class KoboldCPPLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
max_tokens: this.maxTokens,
|
max_tokens: this.maxTokens,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -154,6 +154,8 @@ class LiteLLM {
|
|||||||
outputTps:
|
outputTps:
|
||||||
(result.output.usage?.completion_tokens || 0) / result.duration,
|
(result.output.usage?.completion_tokens || 0) / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -167,9 +169,10 @@ class LiteLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
max_tokens: parseInt(this.maxTokens), // LiteLLM requires int
|
max_tokens: parseInt(this.maxTokens), // LiteLLM requires int
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
// runPromptTokenCalculation: true - We manually count the tokens because they may or may not be provided in the stream
|
// runPromptTokenCalculation: true - We manually count the tokens because they may or may not be provided in the stream
|
||||||
// responses depending on LLM connected. If they are provided, then we counted for nothing, but better than nothing.
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -234,6 +234,8 @@ class LMStudioLLM {
|
|||||||
total_tokens: result.output.usage?.total_tokens || 0,
|
total_tokens: result.output.usage?.total_tokens || 0,
|
||||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -251,7 +253,9 @@ class LMStudioLLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -145,6 +145,8 @@ class LocalAiLLM {
|
|||||||
total_tokens: promptTokens + completionTokens,
|
total_tokens: promptTokens + completionTokens,
|
||||||
outputTps: completionTokens / result.duration,
|
outputTps: completionTokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -162,7 +164,9 @@ class LocalAiLLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -139,6 +139,8 @@ class MistralLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -157,7 +159,8 @@ class MistralLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -136,6 +136,8 @@ class MoonshotAiLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -148,7 +150,9 @@ class MoonshotAiLLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -225,6 +225,8 @@ class NovitaLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -242,7 +244,9 @@ class NovitaLLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -184,6 +184,8 @@ class NvidiaNimLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -201,7 +203,9 @@ class NvidiaNimLLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -305,6 +305,8 @@ class OllamaAILLM {
|
|||||||
outputTps:
|
outputTps:
|
||||||
result.output.usage.completion_tokens / result.output.usage.duration,
|
result.output.usage.completion_tokens / result.output.usage.duration,
|
||||||
duration: result.output.usage.duration,
|
duration: result.output.usage.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -326,7 +328,8 @@ class OllamaAILLM {
|
|||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
).catch((e) => {
|
).catch((e) => {
|
||||||
throw this.#errorHandler(e);
|
throw this.#errorHandler(e);
|
||||||
});
|
});
|
||||||
|
|||||||
@ -175,6 +175,8 @@ class OpenAiLLM {
|
|||||||
? usage.output_tokens / result.duration
|
? usage.output_tokens / result.duration
|
||||||
: 0,
|
: 0,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -194,7 +196,8 @@ class OpenAiLLM {
|
|||||||
temperature: this.#temperature(this.model, temperature),
|
temperature: this.#temperature(this.model, temperature),
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -276,6 +276,8 @@ class OpenRouterLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -300,13 +302,15 @@ class OpenRouterLLM {
|
|||||||
include_reasoning: true,
|
include_reasoning: true,
|
||||||
user: user?.id ? `user_${user.id}` : "",
|
user: user?.id ? `user_${user.id}` : "",
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
// We have to manually count the tokens
|
// We have to manually count the tokens
|
||||||
// OpenRouter has a ton of providers and they all can return slightly differently
|
// OpenRouter has a ton of providers and they all can return slightly differently
|
||||||
// some return chunk.usage on STOP, some do it after stop, its inconsistent.
|
// some return chunk.usage on STOP, some do it after stop, its inconsistent.
|
||||||
// So it is possible reported metrics are inaccurate since we cannot reliably
|
// So it is possible reported metrics are inaccurate since we cannot reliably
|
||||||
// catch the metrics before resolving the stream - so we just pretend this functionality
|
// catch the metrics before resolving the stream - so we just pretend this functionality
|
||||||
// is not available.
|
// is not available.
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -117,6 +117,8 @@ class PerplexityLLM {
|
|||||||
total_tokens: result.output.usage?.total_tokens || 0,
|
total_tokens: result.output.usage?.total_tokens || 0,
|
||||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -134,7 +136,9 @@ class PerplexityLLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -176,6 +176,8 @@ class PPIOLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -193,7 +195,9 @@ class PPIOLLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -150,6 +150,8 @@ class TextGenWebUILLM {
|
|||||||
total_tokens: result.output.usage?.total_tokens || 0,
|
total_tokens: result.output.usage?.total_tokens || 0,
|
||||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -162,7 +164,9 @@ class TextGenWebUILLM {
|
|||||||
messages,
|
messages,
|
||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages
|
messages,
|
||||||
|
true,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -209,6 +209,8 @@ class TogetherAiLLM {
|
|||||||
total_tokens: result.output.usage?.total_tokens || 0,
|
total_tokens: result.output.usage?.total_tokens || 0,
|
||||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -227,7 +229,8 @@ class TogetherAiLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -147,6 +147,8 @@ class XAiLLM {
|
|||||||
total_tokens: result.output.usage.total_tokens || 0,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: result.output.usage.completion_tokens / result.duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -165,7 +167,8 @@ class XAiLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -136,6 +136,8 @@ class ZAiLLM {
|
|||||||
total_tokens: result.output.usage?.total_tokens || 0,
|
total_tokens: result.output.usage?.total_tokens || 0,
|
||||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||||
duration: result.duration,
|
duration: result.duration,
|
||||||
|
model: this.model,
|
||||||
|
timestamp: new Date(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -149,7 +151,8 @@ class ZAiLLM {
|
|||||||
temperature,
|
temperature,
|
||||||
}),
|
}),
|
||||||
messages,
|
messages,
|
||||||
false
|
false,
|
||||||
|
this.model
|
||||||
);
|
);
|
||||||
|
|
||||||
return measuredStreamRequest;
|
return measuredStreamRequest;
|
||||||
|
|||||||
@ -59,13 +59,15 @@ class LLMPerformanceMonitor {
|
|||||||
* Also attaches an `endMeasurement` method to the stream that will calculate the duration of the stream and metrics.
|
* Also attaches an `endMeasurement` method to the stream that will calculate the duration of the stream and metrics.
|
||||||
* @param {Promise<OpenAICompatibleStream>} func
|
* @param {Promise<OpenAICompatibleStream>} func
|
||||||
* @param {Messages} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream
|
* @param {Messages} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream
|
||||||
* @param {boolean} runPromptTokenCalculation - whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream.
|
* @param {boolean} runPromptTokenCalculation - [default: true] whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream.
|
||||||
|
* @param {string} modelTag - the tag of the model that was used to generate the stream (eg: gpt-4o, claude-3-5-sonnet, qwen3/72b-instruct, etc.)
|
||||||
* @returns {Promise<MonitoredStream>}
|
* @returns {Promise<MonitoredStream>}
|
||||||
*/
|
*/
|
||||||
static async measureStream(
|
static async measureStream(
|
||||||
func,
|
func,
|
||||||
messages = [],
|
messages = [],
|
||||||
runPromptTokenCalculation = true
|
runPromptTokenCalculation = true,
|
||||||
|
modelTag = ""
|
||||||
) {
|
) {
|
||||||
const stream = await func;
|
const stream = await func;
|
||||||
stream.start = Date.now();
|
stream.start = Date.now();
|
||||||
@ -76,6 +78,7 @@ class LLMPerformanceMonitor {
|
|||||||
total_tokens: 0,
|
total_tokens: 0,
|
||||||
outputTps: 0,
|
outputTps: 0,
|
||||||
duration: 0,
|
duration: 0,
|
||||||
|
...(modelTag ? { model: modelTag } : {}),
|
||||||
};
|
};
|
||||||
|
|
||||||
stream.endMeasurement = (reportedUsage = {}) => {
|
stream.endMeasurement = (reportedUsage = {}) => {
|
||||||
@ -88,6 +91,7 @@ class LLMPerformanceMonitor {
|
|||||||
...stream.metrics,
|
...stream.metrics,
|
||||||
...reportedUsage,
|
...reportedUsage,
|
||||||
duration: reportedUsage?.duration ?? estimatedDuration,
|
duration: reportedUsage?.duration ?? estimatedDuration,
|
||||||
|
timestamp: new Date(),
|
||||||
};
|
};
|
||||||
|
|
||||||
stream.metrics.total_tokens =
|
stream.metrics.total_tokens =
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user