Prune telemetry events

Update telemetry for model tagging
Update readme on telemetry
This commit is contained in:
timothycarambat 2025-04-29 13:54:00 -07:00
parent f9ef5840cb
commit dacd67124a
14 changed files with 129 additions and 16 deletions

View File

@ -204,12 +204,18 @@ Set `DISABLE_TELEMETRY` in your server or docker .env settings to "true" to opt
We will only track usage details that help us make product and roadmap decisions, specifically:
- Type of your installation (Docker or Desktop)
- When a document is added or removed. No information _about_ the document. Just that the event occurred. This gives us an idea of use.
- Type of vector database in use. Let's us know which vector database provider is the most used to prioritize changes when updates arrive for that provider.
- Type of LLM in use. Let's us know the most popular choice and prioritize changes when updates arrive for that provider.
- Chat is sent. This is the most regular "event" and gives us an idea of the daily-activity of this project across all installations. Again, only the event is sent - we have no information on the nature or content of the chat itself.
You can verify these claims by finding all locations `Telemetry.sendTelemetry` is called. Additionally these events are written to the output log so you can also see the specific data which was sent - if enabled. No IP or other identifying information is collected. The Telemetry provider is [PostHog](https://posthog.com/) - an open-source telemetry collection service.
- When a document is added or removed. No information _about_ the document. Just that the event occurred. This gives us an idea of use.
- Type of vector database in use. Let's us know which vector database provider is the most used to prioritize changes when updates arrive for that provider.
- Type of LLM provider & model tag in use. Let's us know the most popular choice and prioritize changes when updates arrive for that provider or model, or combination thereof. eg: reasoning vs regular, multi-modal models, etc.
- When a chat is sent. This is the most regular "event" and gives us an idea of the daily-activity of this project across all installations. Again, only the **event** is sent - we have no information on the nature or content of the chat itself.
You can verify these claims by finding all locations `Telemetry.sendTelemetry` is called. Additionally these events are written to the output log so you can also see the specific data which was sent - if enabled. **No IP or other identifying information is collected**. The Telemetry provider is [PostHog](https://posthog.com/) - an open-source telemetry collection service.
We take privacy very seriously and hopefully you can understand our position to also glimpse into how our tool is used with asking for annoying popup surveys so we can build something worth using. The anonymous data is _never_ shared with third parties, ever.
[View all telemetry events in source code](https://github.com/search?q=repo%3AMintplex-Labs%2Fanything-llm%20.sendTelemetry\(&type=code)

View File

@ -513,8 +513,6 @@ function adminEndpoints(app) {
try {
const user = await userFromSession(request, response);
const { apiKey, error } = await ApiKey.create(user.id);
await Telemetry.sendTelemetry("api_key_created");
await EventLogs.logEvent(
"api_key_created",
{ createdBy: user?.username },

View File

@ -12,6 +12,7 @@ const { EventLogs } = require("../../../models/eventLogs");
const {
OpenAICompatibleChat,
} = require("../../../utils/chats/openaiCompatible");
const { getModelTag } = require("../../utils");
function apiOpenAICompatibleEndpoints(app) {
if (!app) return;
@ -181,6 +182,7 @@ function apiOpenAICompatibleEndpoints(app) {
Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "lancedb",
TTSSelection: process.env.TTS_PROVIDER || "native",
LLMModel: getModelTag(),
});
await EventLogs.logEvent("api_sent_chat", {
workspaceName: workspace?.name,

View File

@ -14,6 +14,7 @@ const {
writeResponseChunk,
} = require("../../../utils/helpers/chat/responses");
const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler");
const { getModelTag } = require("../../utils");
function apiWorkspaceEndpoints(app) {
if (!app) return;
@ -87,6 +88,7 @@ function apiWorkspaceEndpoints(app) {
Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "lancedb",
TTSSelection: process.env.TTS_PROVIDER || "native",
LLMModel: getModelTag(),
});
await EventLogs.logEvent("api_workspace_created", {
workspaceName: workspace?.name || "Unknown Workspace",

View File

@ -13,6 +13,7 @@ const {
const { WorkspaceChats } = require("../../../models/workspaceChats");
const { User } = require("../../../models/user");
const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler");
const { getModelTag } = require("../../utils");
function apiWorkspaceThreadEndpoints(app) {
if (!app) return;
@ -436,6 +437,7 @@ function apiWorkspaceThreadEndpoints(app) {
Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "lancedb",
TTSSelection: process.env.TTS_PROVIDER || "native",
LLMModel: getModelTag(),
});
await EventLogs.logEvent("api_sent_chat", {
workspaceName: workspace?.name,
@ -606,6 +608,7 @@ function apiWorkspaceThreadEndpoints(app) {
Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "lancedb",
TTSSelection: process.env.TTS_PROVIDER || "native",
LLMModel: getModelTag(),
});
await EventLogs.logEvent("api_sent_chat", {
workspaceName: workspace?.name,

View File

@ -16,6 +16,7 @@ const { writeResponseChunk } = require("../utils/helpers/chat/responses");
const { WorkspaceThread } = require("../models/workspaceThread");
const { User } = require("../models/user");
const truncate = require("truncate");
const { getModelTag } = require("./utils");
function chatEndpoints(app) {
if (!app) return;
@ -75,6 +76,7 @@ function chatEndpoints(app) {
VectorDbSelection: process.env.VECTOR_DB || "lancedb",
multiModal: Array.isArray(attachments) && attachments?.length !== 0,
TTSSelection: process.env.TTS_PROVIDER || "native",
LLMModel: getModelTag(),
});
await EventLogs.logEvent(
@ -179,6 +181,7 @@ function chatEndpoints(app) {
VectorDbSelection: process.env.VECTOR_DB || "lancedb",
multiModal: Array.isArray(attachments) && attachments?.length !== 0,
TTSSelection: process.env.TTS_PROVIDER || "native",
LLMModel: getModelTag(),
});
await EventLogs.logEvent(

View File

@ -908,7 +908,6 @@ function systemEndpoints(app) {
}
const { apiKey, error } = await ApiKey.create();
await Telemetry.sendTelemetry("api_key_created");
await EventLogs.logEvent(
"api_key_created",
{},

View File

@ -55,7 +55,101 @@ async function getDiskStorage() {
}
}
/**
* Returns the model tag based on the provider set in the environment.
* This information is used to identify the parent model for the system
* so that we can prioritize the correct model and types for future updates
* as well as build features in AnythingLLM directly for a specific model or capabilities.
*
* Disable with {@link https://github.com/Mintplex-Labs/anything-llm?tab=readme-ov-file#telemetry--privacy|Disable Telemetry}
* @returns {string} The model tag.
*/
function getModelTag() {
let model = null;
const provider = process.env.LLM_PROVIDER;
switch (provider) {
case "openai":
model = process.env.OPEN_MODEL_PREF;
break;
case "anthropic":
model = process.env.ANTHROPIC_MODEL_PREF;
break;
case "lmstudio":
model = process.env.LMSTUDIO_MODEL_PREF;
break;
case "ollama":
model = process.env.OLLAMA_MODEL_PREF;
break;
case "groq":
model = process.env.GROQ_MODEL_PREF;
break;
case "togetherai":
model = process.env.TOGETHER_AI_MODEL_PREF;
break;
case "azure":
model = process.env.OPEN_MODEL_PREF;
break;
case "koboldcpp":
model = process.env.KOBOLD_CPP_MODEL_PREF;
break;
case "localai":
model = process.env.LOCAL_AI_MODEL_PREF;
break;
case "openrouter":
model = process.env.OPENROUTER_MODEL_PREF;
break;
case "mistral":
model = process.env.MISTRAL_MODEL_PREF;
break;
case "generic-openai":
model = process.env.GENERIC_OPEN_AI_MODEL_PREF;
break;
case "perplexity":
model = process.env.PERPLEXITY_MODEL_PREF;
break;
case "textgenwebui":
model = "textgenwebui-default";
break;
case "bedrock":
model = process.env.AWS_BEDROCK_LLM_MODEL_PREFERENCE;
break;
case "fireworksai":
model = process.env.FIREWORKS_AI_LLM_MODEL_PREF;
break;
case "deepseek":
model = process.env.DEEPSEEK_MODEL_PREF;
break;
case "litellm":
model = process.env.LITE_LLM_MODEL_PREF;
break;
case "apipie":
model = process.env.APIPIE_LLM_MODEL_PREF;
break;
case "xai":
model = process.env.XAI_LLM_MODEL_PREF;
break;
case "novita":
model = process.env.NOVITA_LLM_MODEL_PREF;
break;
case "nvidia-nim":
model = process.env.NVIDIA_NIM_LLM_MODEL_PREF;
break;
case "ppio":
model = process.env.PPIO_MODEL_PREF;
break;
case "gemini":
model = process.env.GEMINI_LLM_MODEL_PREF;
break;
default:
model = "--";
break;
}
return model;
}
module.exports = {
utilEndpoints,
getGitVersion,
getModelTag,
};

View File

@ -18,6 +18,7 @@ const {
} = require("../utils/middleware/validWorkspace");
const { WorkspaceChats } = require("../models/workspaceChats");
const { convertToChatHistory } = require("../utils/helpers/chat/responses");
const { getModelTag } = require("./utils");
function workspaceThreadEndpoints(app) {
if (!app) return;
@ -41,6 +42,7 @@ function workspaceThreadEndpoints(app) {
Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "lancedb",
TTSSelection: process.env.TTS_PROVIDER || "native",
LLMModel: getModelTag(),
},
user?.id
);

View File

@ -34,6 +34,7 @@ const { getTTSProvider } = require("../utils/TextToSpeech");
const { WorkspaceThread } = require("../models/workspaceThread");
const truncate = require("truncate");
const { purgeDocument } = require("../utils/files/purgeDocument");
const { getModelTag } = require("./utils");
function workspaceEndpoints(app) {
if (!app) return;
@ -56,6 +57,7 @@ function workspaceEndpoints(app) {
Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "lancedb",
TTSSelection: process.env.TTS_PROVIDER || "native",
LLMModel: getModelTag(),
},
user?.id
);

View File

@ -4,6 +4,7 @@ const prisma = require("../utils/prisma");
const { Telemetry } = require("./telemetry");
const { EventLogs } = require("./eventLogs");
const { safeJsonParse } = require("../utils/http");
const { getModelTag } = require("../endpoints/utils");
const Document = {
writable: ["pinned", "watched", "lastUpdatedAt"],
@ -130,6 +131,7 @@ const Document = {
Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "lancedb",
TTSSelection: process.env.TTS_PROVIDER || "native",
LLMModel: getModelTag(),
});
await EventLogs.logEvent(
"workspace_documents_added",
@ -169,12 +171,6 @@ const Document = {
}
}
await Telemetry.sendTelemetry("documents_removed_in_workspace", {
LLMSelection: process.env.LLM_PROVIDER || "openai",
Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "lancedb",
TTSSelection: process.env.TTS_PROVIDER || "native",
});
await EventLogs.logEvent(
"workspace_documents_removed",
{

View File

@ -21,6 +21,12 @@ const Telemetry = {
agent_chat_sent: 1800,
agent_chat_started: 1800,
agent_tool_call: 1800,
// Document mgmt events
document_uploaded: 30,
documents_embedded_in_workspace: 30,
link_uploaded: 30,
raw_document_uploaded: 30,
},
id: async function () {

View File

@ -83,7 +83,7 @@ async function validateTablePragmas(force = false) {
// Telemetry is anonymized and your data is never read. This can be disabled by setting
// DISABLE_TELEMETRY=true in the `.env` of however you setup. Telemetry helps us determine use
// of how AnythingLLM is used and how to improve this product!
// You can see all Telemetry events by ctrl+f `Telemetry.sendEvent` calls to verify this claim.
// You can see all Telemetry events by ctrl+f `Telemetry.sendTelemetry` calls to verify this claim.
async function setupTelemetry() {
if (process.env.DISABLE_TELEMETRY === "true") {
console.log(

View File

@ -4,7 +4,7 @@ const { Telemetry } = require("../../models/telemetry");
// Telemetry is anonymized and your data is never read. This can be disabled by setting
// DISABLE_TELEMETRY=true in the `.env` of however you setup. Telemetry helps us determine use
// of how AnythingLLM is used and how to improve this product!
// You can see all Telemetry events by ctrl+f `Telemetry.sendEvent` calls to verify this claim.
// You can see all Telemetry events by ctrl+f `Telemetry.sendTelemetry` calls to verify this claim.
async function setupTelemetry() {
if (process.env.DISABLE_TELEMETRY === "true") {
console.log(