Prune telemetry events

Update telemetry for model tagging Update readme on telemetry
2025-04-29 13:54:00 -07:00 · 2025-04-29 13:54:00 -07:00 · dacd67124a
commit dacd67124a
parent f9ef5840cb
14 changed files with 129 additions and 16 deletions
--- a/README.md
+++ b/README.md
@ -204,12 +204,18 @@ Set `DISABLE_TELEMETRY` in your server or docker .env settings to "true" to opt
 We will only track usage details that help us make product and roadmap decisions, specifically:

 - Type of your installation (Docker or Desktop)
- When a document is added or removed. No information _about_ the document. Just that the event occurred. This gives us an idea of use.
- Type of vector database in use. Let's us know which vector database provider is the most used to prioritize changes when updates arrive for that provider.
- Type of LLM in use. Let's us know the most popular choice and prioritize changes when updates arrive for that provider.
- Chat is sent. This is the most regular "event" and gives us an idea of the daily-activity of this project across all installations. Again, only the event is sent - we have no information on the nature or content of the chat itself.

-You can verify these claims by finding all locations `Telemetry.sendTelemetry` is called. Additionally these events are written to the output log so you can also see the specific data which was sent - if enabled. No IP or other identifying information is collected. The Telemetry provider is [PostHog](https://posthog.com/) - an open-source telemetry collection service.
+- When a document is added or removed. No information _about_ the document. Just that the event occurred. This gives us an idea of use.
+
+- Type of vector database in use. Let's us know which vector database provider is the most used to prioritize changes when updates arrive for that provider.
+
+- Type of LLM provider & model tag in use. Let's us know the most popular choice and prioritize changes when updates arrive for that provider or model, or combination thereof. eg: reasoning vs regular, multi-modal models, etc.
+
+- When a chat is sent. This is the most regular "event" and gives us an idea of the daily-activity of this project across all installations. Again, only the **event** is sent - we have no information on the nature or content of the chat itself.
+
+You can verify these claims by finding all locations `Telemetry.sendTelemetry` is called. Additionally these events are written to the output log so you can also see the specific data which was sent - if enabled. **No IP or other identifying information is collected**. The Telemetry provider is [PostHog](https://posthog.com/) - an open-source telemetry collection service.
+
+We take privacy very seriously and hopefully you can understand our position to also glimpse into how our tool is used with asking for annoying popup surveys so we can build something worth using. The anonymous data is _never_ shared with third parties, ever.

 [View all telemetry events in source code](https://github.com/search?q=repo%3AMintplex-Labs%2Fanything-llm%20.sendTelemetry\(&type=code)

--- a/server/endpoints/admin.js
+++ b/server/endpoints/admin.js
@ -513,8 +513,6 @@ function adminEndpoints(app) {
      try {
        const user = await userFromSession(request, response);
        const { apiKey, error } = await ApiKey.create(user.id);
-
-        await Telemetry.sendTelemetry("api_key_created");
        await EventLogs.logEvent(
          "api_key_created",
          { createdBy: user?.username },
--- a/server/endpoints/api/openai/index.js
+++ b/server/endpoints/api/openai/index.js
@ -12,6 +12,7 @@ const { EventLogs } = require("../../../models/eventLogs");
 const {
  OpenAICompatibleChat,
 } = require("../../../utils/chats/openaiCompatible");
+const { getModelTag } = require("../../utils");

 function apiOpenAICompatibleEndpoints(app) {
  if (!app) return;
@ -181,6 +182,7 @@ function apiOpenAICompatibleEndpoints(app) {
          Embedder: process.env.EMBEDDING_ENGINE || "inherit",
          VectorDbSelection: process.env.VECTOR_DB || "lancedb",
          TTSSelection: process.env.TTS_PROVIDER || "native",
+          LLMModel: getModelTag(),
        });
        await EventLogs.logEvent("api_sent_chat", {
          workspaceName: workspace?.name,
--- a/server/endpoints/api/workspace/index.js
+++ b/server/endpoints/api/workspace/index.js
@ -14,6 +14,7 @@ const {
  writeResponseChunk,
 } = require("../../../utils/helpers/chat/responses");
 const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler");
+const { getModelTag } = require("../../utils");

 function apiWorkspaceEndpoints(app) {
  if (!app) return;
@ -87,6 +88,7 @@ function apiWorkspaceEndpoints(app) {
        Embedder: process.env.EMBEDDING_ENGINE || "inherit",
        VectorDbSelection: process.env.VECTOR_DB || "lancedb",
        TTSSelection: process.env.TTS_PROVIDER || "native",
+        LLMModel: getModelTag(),
      });
      await EventLogs.logEvent("api_workspace_created", {
        workspaceName: workspace?.name || "Unknown Workspace",
--- a/server/endpoints/api/workspaceThread/index.js
+++ b/server/endpoints/api/workspaceThread/index.js
@ -13,6 +13,7 @@ const {
 const { WorkspaceChats } = require("../../../models/workspaceChats");
 const { User } = require("../../../models/user");
 const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler");
+const { getModelTag } = require("../../utils");

 function apiWorkspaceThreadEndpoints(app) {
  if (!app) return;
@ -436,6 +437,7 @@ function apiWorkspaceThreadEndpoints(app) {
          Embedder: process.env.EMBEDDING_ENGINE || "inherit",
          VectorDbSelection: process.env.VECTOR_DB || "lancedb",
          TTSSelection: process.env.TTS_PROVIDER || "native",
+          LLMModel: getModelTag(),
        });
        await EventLogs.logEvent("api_sent_chat", {
          workspaceName: workspace?.name,
@ -606,6 +608,7 @@ function apiWorkspaceThreadEndpoints(app) {
          Embedder: process.env.EMBEDDING_ENGINE || "inherit",
          VectorDbSelection: process.env.VECTOR_DB || "lancedb",
          TTSSelection: process.env.TTS_PROVIDER || "native",
+          LLMModel: getModelTag(),
        });
        await EventLogs.logEvent("api_sent_chat", {
          workspaceName: workspace?.name,
--- a/server/endpoints/chat.js
+++ b/server/endpoints/chat.js
@ -16,6 +16,7 @@ const { writeResponseChunk } = require("../utils/helpers/chat/responses");
 const { WorkspaceThread } = require("../models/workspaceThread");
 const { User } = require("../models/user");
 const truncate = require("truncate");
+const { getModelTag } = require("./utils");

 function chatEndpoints(app) {
  if (!app) return;
@ -75,6 +76,7 @@ function chatEndpoints(app) {
          VectorDbSelection: process.env.VECTOR_DB || "lancedb",
          multiModal: Array.isArray(attachments) && attachments?.length !== 0,
          TTSSelection: process.env.TTS_PROVIDER || "native",
+          LLMModel: getModelTag(),
        });

        await EventLogs.logEvent(
@ -179,6 +181,7 @@ function chatEndpoints(app) {
          VectorDbSelection: process.env.VECTOR_DB || "lancedb",
          multiModal: Array.isArray(attachments) && attachments?.length !== 0,
          TTSSelection: process.env.TTS_PROVIDER || "native",
+          LLMModel: getModelTag(),
        });

        await EventLogs.logEvent(
--- a/server/endpoints/system.js
+++ b/server/endpoints/system.js
@ -908,7 +908,6 @@ function systemEndpoints(app) {
        }

        const { apiKey, error } = await ApiKey.create();
-        await Telemetry.sendTelemetry("api_key_created");
        await EventLogs.logEvent(
          "api_key_created",
          {},
--- a/server/endpoints/utils.js
+++ b/server/endpoints/utils.js
@ -55,7 +55,101 @@ async function getDiskStorage() {
  }
 }

+/**
+ * Returns the model tag based on the provider set in the environment.
+ * This information is used to identify the parent model for the system
+ * so that we can prioritize the correct model and types for future updates
+ * as well as build features in AnythingLLM directly for a specific model or capabilities.
+ *
+ * Disable with  {@link https://github.com/Mintplex-Labs/anything-llm?tab=readme-ov-file#telemetry--privacy|Disable Telemetry}
+ * @returns {string} The model tag.
+ */
+function getModelTag() {
+  let model = null;
+  const provider = process.env.LLM_PROVIDER;
+
+  switch (provider) {
+    case "openai":
+      model = process.env.OPEN_MODEL_PREF;
+      break;
+    case "anthropic":
+      model = process.env.ANTHROPIC_MODEL_PREF;
+      break;
+    case "lmstudio":
+      model = process.env.LMSTUDIO_MODEL_PREF;
+      break;
+    case "ollama":
+      model = process.env.OLLAMA_MODEL_PREF;
+      break;
+    case "groq":
+      model = process.env.GROQ_MODEL_PREF;
+      break;
+    case "togetherai":
+      model = process.env.TOGETHER_AI_MODEL_PREF;
+      break;
+    case "azure":
+      model = process.env.OPEN_MODEL_PREF;
+      break;
+    case "koboldcpp":
+      model = process.env.KOBOLD_CPP_MODEL_PREF;
+      break;
+    case "localai":
+      model = process.env.LOCAL_AI_MODEL_PREF;
+      break;
+    case "openrouter":
+      model = process.env.OPENROUTER_MODEL_PREF;
+      break;
+    case "mistral":
+      model = process.env.MISTRAL_MODEL_PREF;
+      break;
+    case "generic-openai":
+      model = process.env.GENERIC_OPEN_AI_MODEL_PREF;
+      break;
+    case "perplexity":
+      model = process.env.PERPLEXITY_MODEL_PREF;
+      break;
+    case "textgenwebui":
+      model = "textgenwebui-default";
+      break;
+    case "bedrock":
+      model = process.env.AWS_BEDROCK_LLM_MODEL_PREFERENCE;
+      break;
+    case "fireworksai":
+      model = process.env.FIREWORKS_AI_LLM_MODEL_PREF;
+      break;
+    case "deepseek":
+      model = process.env.DEEPSEEK_MODEL_PREF;
+      break;
+    case "litellm":
+      model = process.env.LITE_LLM_MODEL_PREF;
+      break;
+    case "apipie":
+      model = process.env.APIPIE_LLM_MODEL_PREF;
+      break;
+    case "xai":
+      model = process.env.XAI_LLM_MODEL_PREF;
+      break;
+    case "novita":
+      model = process.env.NOVITA_LLM_MODEL_PREF;
+      break;
+    case "nvidia-nim":
+      model = process.env.NVIDIA_NIM_LLM_MODEL_PREF;
+      break;
+    case "ppio":
+      model = process.env.PPIO_MODEL_PREF;
+      break;
+    case "gemini":
+      model = process.env.GEMINI_LLM_MODEL_PREF;
+      break;
+    default:
+      model = "--";
+      break;
+  }
+  return model;
+}
+
 module.exports = {
  utilEndpoints,
  getGitVersion,
+  getModelTag,
 };
--- a/server/endpoints/workspaceThreads.js
+++ b/server/endpoints/workspaceThreads.js
@ -18,6 +18,7 @@ const {
 } = require("../utils/middleware/validWorkspace");
 const { WorkspaceChats } = require("../models/workspaceChats");
 const { convertToChatHistory } = require("../utils/helpers/chat/responses");
+const { getModelTag } = require("./utils");

 function workspaceThreadEndpoints(app) {
  if (!app) return;
@ -41,6 +42,7 @@ function workspaceThreadEndpoints(app) {
            Embedder: process.env.EMBEDDING_ENGINE || "inherit",
            VectorDbSelection: process.env.VECTOR_DB || "lancedb",
            TTSSelection: process.env.TTS_PROVIDER || "native",
+            LLMModel: getModelTag(),
          },
          user?.id
        );
--- a/server/endpoints/workspaces.js
+++ b/server/endpoints/workspaces.js
@ -34,6 +34,7 @@ const { getTTSProvider } = require("../utils/TextToSpeech");
 const { WorkspaceThread } = require("../models/workspaceThread");
 const truncate = require("truncate");
 const { purgeDocument } = require("../utils/files/purgeDocument");
+const { getModelTag } = require("./utils");

 function workspaceEndpoints(app) {
  if (!app) return;
@ -56,6 +57,7 @@ function workspaceEndpoints(app) {
            Embedder: process.env.EMBEDDING_ENGINE || "inherit",
            VectorDbSelection: process.env.VECTOR_DB || "lancedb",
            TTSSelection: process.env.TTS_PROVIDER || "native",
+            LLMModel: getModelTag(),
          },
          user?.id
        );
--- a/server/models/documents.js
+++ b/server/models/documents.js
@ -4,6 +4,7 @@ const prisma = require("../utils/prisma");
 const { Telemetry } = require("./telemetry");
 const { EventLogs } = require("./eventLogs");
 const { safeJsonParse } = require("../utils/http");
+const { getModelTag } = require("../endpoints/utils");

 const Document = {
  writable: ["pinned", "watched", "lastUpdatedAt"],
@ -130,6 +131,7 @@ const Document = {
      Embedder: process.env.EMBEDDING_ENGINE || "inherit",
      VectorDbSelection: process.env.VECTOR_DB || "lancedb",
      TTSSelection: process.env.TTS_PROVIDER || "native",
+      LLMModel: getModelTag(),
    });
    await EventLogs.logEvent(
      "workspace_documents_added",
@ -169,12 +171,6 @@ const Document = {
      }
    }

-    await Telemetry.sendTelemetry("documents_removed_in_workspace", {
-      LLMSelection: process.env.LLM_PROVIDER || "openai",
-      Embedder: process.env.EMBEDDING_ENGINE || "inherit",
-      VectorDbSelection: process.env.VECTOR_DB || "lancedb",
-      TTSSelection: process.env.TTS_PROVIDER || "native",
-    });
    await EventLogs.logEvent(
      "workspace_documents_removed",
      {
--- a/server/models/telemetry.js
+++ b/server/models/telemetry.js
@ -21,6 +21,12 @@ const Telemetry = {
    agent_chat_sent: 1800,
    agent_chat_started: 1800,
    agent_tool_call: 1800,
+
+    // Document mgmt events
+    document_uploaded: 30,
+    documents_embedded_in_workspace: 30,
+    link_uploaded: 30,
+    raw_document_uploaded: 30,
  },

  id: async function () {
--- a/server/utils/database/index.js
+++ b/server/utils/database/index.js
@ -83,7 +83,7 @@ async function validateTablePragmas(force = false) {
 // Telemetry is anonymized and your data is never read. This can be disabled by setting
 // DISABLE_TELEMETRY=true in the `.env` of however you setup. Telemetry helps us determine use
 // of how AnythingLLM is used and how to improve this product!
-// You can see all Telemetry events by ctrl+f `Telemetry.sendEvent` calls to verify this claim.
+// You can see all Telemetry events by ctrl+f `Telemetry.sendTelemetry` calls to verify this claim.
 async function setupTelemetry() {
  if (process.env.DISABLE_TELEMETRY === "true") {
    console.log(
--- a/server/utils/telemetry/index.js
+++ b/server/utils/telemetry/index.js
@ -4,7 +4,7 @@ const { Telemetry } = require("../../models/telemetry");
 // Telemetry is anonymized and your data is never read. This can be disabled by setting
 // DISABLE_TELEMETRY=true in the `.env` of however you setup. Telemetry helps us determine use
 // of how AnythingLLM is used and how to improve this product!
-// You can see all Telemetry events by ctrl+f `Telemetry.sendEvent` calls to verify this claim.
+// You can see all Telemetry events by ctrl+f `Telemetry.sendTelemetry` calls to verify this claim.
 async function setupTelemetry() {
  if (process.env.DISABLE_TELEMETRY === "true") {
    console.log(