diff --git a/README.md b/README.md index 90243c35..88922e65 100644 --- a/README.md +++ b/README.md @@ -102,7 +102,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace - [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link) - [PPIO](https://ppinfra.com?utm_source=github_anything-llm) - [Moonshot AI](https://www.moonshot.ai/) - +- [CometAPI (chat models)](https://api.cometapi.com/) **Embedder models:** - [AnythingLLM Native Embedder](/server/storage/models/README.md) (default) diff --git a/docker/.env.example b/docker/.env.example index dca22fa0..bd268053 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -96,6 +96,11 @@ GID='1000' # NOVITA_LLM_API_KEY='your-novita-api-key-here' check on https://novita.ai/settings/key-management # NOVITA_LLM_MODEL_PREF='deepseek/deepseek-r1' +# LLM_PROVIDER='cometapi' +# COMETAPI_LLM_API_KEY='your-cometapi-api-key-here' # Get one at https://api.cometapi.com/console/token +# COMETAPI_LLM_MODEL_PREF='gpt-5-mini' +# COMETAPI_LLM_TIMEOUT_MS=500 # Optional; stream idle timeout in ms (min 500ms) + # LLM_PROVIDER='cohere' # COHERE_API_KEY= # COHERE_MODEL_PREF='command-r' diff --git a/frontend/src/components/LLMSelection/CometApiLLMOptions/index.jsx b/frontend/src/components/LLMSelection/CometApiLLMOptions/index.jsx new file mode 100644 index 00000000..71fbeec6 --- /dev/null +++ b/frontend/src/components/LLMSelection/CometApiLLMOptions/index.jsx @@ -0,0 +1,155 @@ +import System from "@/models/system"; +import { CaretDown, CaretUp } from "@phosphor-icons/react"; +import { useState, useEffect } from "react"; + +export default function CometApiLLMOptions({ settings }) { + return ( +
+
+
+ + +
+ {!settings?.credentialsOnly && ( + + )} +
+ +
+ ); +} + +function AdvancedControls({ settings }) { + const [showAdvancedControls, setShowAdvancedControls] = useState(false); + + return ( +
+
+ +
+ +
+ ); +} + +function CometApiModelSelection({ settings }) { + // TODO: For now, CometAPI models list is noisy; show a flat, deduped list without grouping. + // Revisit after CometAPI model list API provides better categorization/metadata. + const [models, setModels] = useState([]); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function findCustomModels() { + setLoading(true); + const { models: fetched = [] } = await System.customModels("cometapi"); + if (fetched?.length > 0) { + // De-duplicate by id (case-insensitive) and sort by name for readability + const seen = new Set(); + const unique = []; + for (const m of fetched) { + const key = String(m.id || m.name || "").toLowerCase(); + if (!seen.has(key)) { + seen.add(key); + unique.push(m); + } + } + unique.sort((a, b) => + String(a.name || a.id).localeCompare(String(b.name || b.id)) + ); + setModels(unique); + } else { + setModels([]); + } + setLoading(false); + } + findCustomModels(); + }, []); + + if (loading || models.length === 0) { + return ( +
+ + +
+ ); + } + + return ( +
+ + + + {models.map((model) => ( + + ))} + +

+ You can type the model id directly or pick from suggestions. +

+
+ ); +} diff --git a/frontend/src/media/llmprovider/cometapi.png b/frontend/src/media/llmprovider/cometapi.png new file mode 100644 index 00000000..40d139de Binary files /dev/null and b/frontend/src/media/llmprovider/cometapi.png differ diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx index 9ef12944..afadca6a 100644 --- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx @@ -33,6 +33,7 @@ import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png"; import PPIOLogo from "@/media/llmprovider/ppio.png"; import DellProAiStudioLogo from "@/media/llmprovider/dpais.png"; import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png"; +import CometApiLogo from "@/media/llmprovider/cometapi.png"; import PreLoader from "@/components/Preloader"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; @@ -44,6 +45,7 @@ import LocalAiOptions from "@/components/LLMSelection/LocalAiOptions"; import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions"; import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions"; import NovitaLLMOptions from "@/components/LLMSelection/NovitaLLMOptions"; +import CometApiLLMOptions from "@/components/LLMSelection/CometApiLLMOptions"; import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions"; import FireworksAiOptions from "@/components/LLMSelection/FireworksAiOptions"; import MistralOptions from "@/components/LLMSelection/MistralOptions"; @@ -161,15 +163,6 @@ export const AVAILABLE_LLM_PROVIDERS = [ description: "Run LLMs locally on your own machine.", requiredConfig: ["LocalAiApiKey", "LocalAiBasePath", "LocalAiTokenLimit"], }, - { - name: "Novita AI", - value: "novita", - logo: NovitaLogo, - options: (settings) => , - description: - "Reliable, Scalable, and Cost-Effective for LLMs from Novita AI", - requiredConfig: ["NovitaLLMApiKey"], - }, { name: "Together AI", value: "togetherai", @@ -303,6 +296,31 @@ export const AVAILABLE_LLM_PROVIDERS = [ description: "Run Moonshot AI's powerful LLMs.", requiredConfig: ["MoonshotAiApiKey"], }, + { + name: "Novita AI", + value: "novita", + logo: NovitaLogo, + options: (settings) => , + description: + "Reliable, Scalable, and Cost-Effective for LLMs from Novita AI", + requiredConfig: ["NovitaLLMApiKey"], + }, + { + name: "CometAPI", + value: "cometapi", + logo: CometApiLogo, + options: (settings) => , + description: "500+ AI Models all in one API.", + requiredConfig: ["CometApiLLMApiKey"], + }, + { + name: "xAI", + value: "xai", + logo: XAILogo, + options: (settings) => , + description: "Run xAI's powerful LLMs like Grok-2 and more.", + requiredConfig: ["XAIApiKey", "XAIModelPref"], + }, { name: "Generic OpenAI", value: "generic-openai", @@ -317,14 +335,6 @@ export const AVAILABLE_LLM_PROVIDERS = [ "GenericOpenAiKey", ], }, - { - name: "xAI", - value: "xai", - logo: XAILogo, - options: (settings) => , - description: "Run xAI's powerful LLMs like Grok-2 and more.", - requiredConfig: ["XAIApiKey", "XAIModelPref"], - }, ]; export default function GeneralLLMPreference() { diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index a6e4ab02..442a443d 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -39,6 +39,7 @@ import PPIOLogo from "@/media/llmprovider/ppio.png"; import PGVectorLogo from "@/media/vectordbs/pgvector.png"; import DPAISLogo from "@/media/llmprovider/dpais.png"; import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png"; +import CometApiLogo from "@/media/llmprovider/cometapi.png"; import React, { useState, useEffect } from "react"; import paths from "@/utils/paths"; @@ -252,6 +253,14 @@ export const LLM_SELECTION_PRIVACY = { ], logo: MoonshotAiLogo, }, + cometapi: { + name: "CometAPI", + description: [ + "Your chats will not be used for training", + "Your prompts and document text used in response creation are visible to CometAPI", + ], + logo: CometApiLogo, + }, }; export const VECTOR_DB_PRIVACY = { diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index 4ce2745d..7a16985f 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -28,6 +28,7 @@ import CohereLogo from "@/media/llmprovider/cohere.png"; import PPIOLogo from "@/media/llmprovider/ppio.png"; import DellProAiStudioLogo from "@/media/llmprovider/dpais.png"; import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png"; +import CometApiLogo from "@/media/llmprovider/cometapi.png"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions"; @@ -57,6 +58,7 @@ import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions"; import PPIOLLMOptions from "@/components/LLMSelection/PPIOLLMOptions"; import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions"; import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions"; +import CometApiLLMOptions from "@/components/LLMSelection/CometApiLLMOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import System from "@/models/system"; @@ -272,6 +274,13 @@ const LLMS = [ options: (settings) => , description: "Run Moonshot AI's powerful LLMs.", }, + { + name: "CometAPI", + value: "cometapi", + logo: CometApiLogo, + options: (settings) => , + description: "500+ AI Models all in one API.", + }, ]; export default function LLMPreference({ diff --git a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx index 31b7327b..9710243d 100644 --- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx @@ -32,20 +32,16 @@ const ENABLED_PROVIDERS = [ "nvidia-nim", "gemini", "moonshotai", + "cometapi", // TODO: More agent support. // "cohere", // Has tool calling and will need to build explicit support // "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested. ]; const WARN_PERFORMANCE = [ "lmstudio", - "groq", - "azure", "koboldcpp", "ollama", "localai", - "openrouter", - "novita", - "generic-openai", "textgenwebui", ]; diff --git a/locales/README.ja-JP.md b/locales/README.ja-JP.md index 8920b218..d6fef0fa 100644 --- a/locales/README.ja-JP.md +++ b/locales/README.ja-JP.md @@ -91,6 +91,7 @@ AnythingLLMは、ドキュメントを`ワークスペース`と呼ばれるオ - [Cohere](https://cohere.com/) - [KoboldCPP](https://github.com/LostRuins/koboldcpp) - [PPIO](https://ppinfra.com?utm_source=github_anything-llm) +- [CometAPI (チャットモデル)](https://api.cometapi.com/) **埋め込みモデル:** diff --git a/locales/README.zh-CN.md b/locales/README.zh-CN.md index e3c63225..aa328351 100644 --- a/locales/README.zh-CN.md +++ b/locales/README.zh-CN.md @@ -100,6 +100,7 @@ AnythingLLM将您的文档划分为称为`workspaces` (工作区)的对象。工 - [xAI](https://x.ai/) - [Novita AI (聊天模型)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link) - [PPIO (聊天模型)](https://ppinfra.com?utm_source=github_anything-llm) +- [CometAPI (聊天模型)](https://api.cometapi.com/) **支持的嵌入模型:** @@ -200,7 +201,7 @@ _以下是一些与 AnythingLLM 兼容的应用程序,但并非由 Mintplex La ### 怎样关闭 -在服务器或 Docker 的 .env 设置中将 `DISABLE_TELEMETRY` 设置为 "true",即可选择不参与遥测数据收集。你也可以在应用内通过以下路径操作:侧边栏 > `Privacy` (隐私) > 关闭遥测功能。 +在服务器或 Docker 的 .env 设置中将 `DISABLE_TELEMETRY` 设置为 "true",即可选择不参与遥测数据收集。你也可以在应用内通过以下路径操作:侧边栏 > `Privacy` (隐私) > 关闭遥测功能。 ### 你们跟踪收集哪些信息? @@ -214,7 +215,7 @@ _以下是一些与 AnythingLLM 兼容的应用程序,但并非由 Mintplex La 您可以通过查找所有调用`Telemetry.sendTelemetry`的位置来验证这些声明。此外,如果启用,这些事件也会被写入输出日志,因此您也可以看到发送了哪些具体数据。**IP或其他识别信息不会被收集**。Telemetry远程信息收集的方案来自[PostHog](https://posthog.com/) - 一个开源的远程信息收集服务。 -我们非常重视隐私,且不用烦人的弹窗问卷来获取反馈,希望你能理解为什么我们想要知道该工具的使用情况,这样我们才能打造真正值得使用的产品。所有匿名数据 _绝不会_ 与任何第三方共享。 +我们非常重视隐私,且不用烦人的弹窗问卷来获取反馈,希望你能理解为什么我们想要知道该工具的使用情况,这样我们才能打造真正值得使用的产品。所有匿名数据 _绝不会_ 与任何第三方共享。 [在源代码中查看所有信息收集活动](https://github.com/search?q=repo%3AMintplex-Labs%2Fanything-llm%20.sendTelemetry\(&type=code) diff --git a/server/.env.example b/server/.env.example index 0d3d1ecd..4e5d3091 100644 --- a/server/.env.example +++ b/server/.env.example @@ -106,6 +106,12 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # COHERE_API_KEY= # COHERE_MODEL_PREF='command-r' +# LLM_PROVIDER='cometapi' +# COMETAPI_LLM_API_KEY='your-cometapi-key-here' # Get one at https://api.cometapi.com/console/token +# COMETAPI_LLM_MODEL_PREF='gpt-5-mini' +# COMETAPI_LLM_TIMEOUT_MS=500 # Optional; stream idle timeout in ms (min 500ms) + + # LLM_PROVIDER='bedrock' # AWS_BEDROCK_LLM_ACCESS_KEY_ID= # AWS_BEDROCK_LLM_ACCESS_KEY= @@ -354,4 +360,4 @@ TTS_PROVIDER="native" # Specify the target languages for when using OCR to parse images and PDFs. # This is a comma separated list of language codes as a string. Unsupported languages will be ignored. # Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes. -# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol \ No newline at end of file +# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index f0796be0..064e299c 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -610,6 +610,11 @@ const SystemSettings = { DellProAiStudioModelPref: process.env.DPAIS_LLM_MODEL_PREF, DellProAiStudioTokenLimit: process.env.DPAIS_LLM_MODEL_TOKEN_LIMIT ?? 4096, + + // CometAPI LLM Keys + CometApiLLMApiKey: !!process.env.COMETAPI_LLM_API_KEY, + CometApiLLMModelPref: process.env.COMETAPI_LLM_MODEL_PREF, + CometApiLLMTimeout: process.env.COMETAPI_LLM_TIMEOUT_MS, }; }, diff --git a/server/storage/models/.gitignore b/server/storage/models/.gitignore index e73faa05..7f5c5f8b 100644 --- a/server/storage/models/.gitignore +++ b/server/storage/models/.gitignore @@ -10,4 +10,5 @@ togetherAi tesseract ppio context-windows/* -MintplexLabs \ No newline at end of file +MintplexLabs +cometapi \ No newline at end of file diff --git a/server/utils/AiProviders/cometapi/constants.js b/server/utils/AiProviders/cometapi/constants.js new file mode 100644 index 00000000..2d7a32da --- /dev/null +++ b/server/utils/AiProviders/cometapi/constants.js @@ -0,0 +1,39 @@ +// TODO: When CometAPI's model list is upgraded, this operation needs to be removed +// Model filtering patterns from cometapi.md that are not supported by AnythingLLM +module.exports.COMETAPI_IGNORE_PATTERNS = [ + // Image generation models + "dall-e", + "dalle", + "midjourney", + "mj_", + "stable-diffusion", + "sd-", + "flux-", + "playground-v", + "ideogram", + "recraft-", + "black-forest-labs", + "/recraft-v3", + "recraftv3", + "stability-ai/", + "sdxl", + // Audio generation models + "suno_", + "tts", + "whisper", + // Video generation models + "runway", + "luma_", + "luma-", + "veo", + "kling_", + "minimax_video", + "hunyuan-t1", + // Utility models + "embedding", + "search-gpts", + "files_retrieve", + "moderation", + // Deepl + "deepl", +]; diff --git a/server/utils/AiProviders/cometapi/index.js b/server/utils/AiProviders/cometapi/index.js new file mode 100644 index 00000000..82fb7c1b --- /dev/null +++ b/server/utils/AiProviders/cometapi/index.js @@ -0,0 +1,433 @@ +const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { v4: uuidv4 } = require("uuid"); +const { + writeResponseChunk, + clientAbortedHandler, + formatChatHistory, +} = require("../../helpers/chat/responses"); +const fs = require("fs"); +const path = require("path"); +const { safeJsonParse } = require("../../http"); +const { + LLMPerformanceMonitor, +} = require("../../helpers/chat/LLMPerformanceMonitor"); +const { COMETAPI_IGNORE_PATTERNS } = require("./constants"); +const cacheFolder = path.resolve( + process.env.STORAGE_DIR + ? path.resolve(process.env.STORAGE_DIR, "models", "cometapi") + : path.resolve(__dirname, `../../../storage/models/cometapi`) +); + +class CometApiLLM { + constructor(embedder = null, modelPreference = null) { + if (!process.env.COMETAPI_LLM_API_KEY) + throw new Error("No CometAPI API key was set."); + + const { OpenAI: OpenAIApi } = require("openai"); + this.basePath = "https://api.cometapi.com/v1"; + this.openai = new OpenAIApi({ + baseURL: this.basePath, + apiKey: process.env.COMETAPI_LLM_API_KEY ?? null, + defaultHeaders: { + "HTTP-Referer": "https://anythingllm.com", + "X-CometAPI-Source": "anythingllm", + }, + }); + this.model = + modelPreference || process.env.COMETAPI_LLM_MODEL_PREF || "gpt-5-mini"; + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + this.embedder = embedder ?? new NativeEmbedder(); + this.defaultTemp = 0.7; + this.timeout = this.#parseTimeout(); + + if (!fs.existsSync(cacheFolder)) + fs.mkdirSync(cacheFolder, { recursive: true }); + this.cacheModelPath = path.resolve(cacheFolder, "models.json"); + this.cacheAtPath = path.resolve(cacheFolder, ".cached_at"); + + this.log(`Loaded with model: ${this.model}`); + } + + log(text, ...args) { + console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args); + } + + /** + * CometAPI has various models that never return `finish_reasons` and thus leave the stream open + * which causes issues in subsequent messages. This timeout value forces us to close the stream after + * x milliseconds. This is a configurable value via the COMETAPI_LLM_TIMEOUT_MS value + * @returns {number} The timeout value in milliseconds (default: 500) + */ + #parseTimeout() { + if (isNaN(Number(process.env.COMETAPI_LLM_TIMEOUT_MS))) return 500; + const setValue = Number(process.env.COMETAPI_LLM_TIMEOUT_MS); + if (setValue < 500) return 500; + return setValue; + } + + // This checks if the .cached_at file has a timestamp that is more than 1Week (in millis) + // from the current date. If it is, then we will refetch the API so that all the models are up + // to date. + #cacheIsStale() { + const MAX_STALE = 6.048e8; // 1 Week in MS + if (!fs.existsSync(this.cacheAtPath)) return true; + const now = Number(new Date()); + const timestampMs = Number(fs.readFileSync(this.cacheAtPath)); + return now - timestampMs > MAX_STALE; + } + + // The CometAPI model API has a lot of models, so we cache this locally in the directory + // as if the cache directory JSON file is stale or does not exist we will fetch from API and store it. + // This might slow down the first request, but we need the proper token context window + // for each model and this is a constructor property - so we can really only get it if this cache exists. + // We used to have this as a chore, but given there is an API to get the info - this makes little sense. + async #syncModels() { + if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale()) + return false; + + this.log( + "Model cache is not present or stale. Fetching from CometAPI API." + ); + await fetchCometApiModels(); + return; + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + models() { + if (!fs.existsSync(this.cacheModelPath)) return {}; + return safeJsonParse( + fs.readFileSync(this.cacheModelPath, { encoding: "utf-8" }), + {} + ); + } + + streamingEnabled() { + return "streamGetChatCompletion" in this; + } + + static promptWindowLimit(modelName) { + const cacheModelPath = path.resolve(cacheFolder, "models.json"); + const availableModels = fs.existsSync(cacheModelPath) + ? safeJsonParse( + fs.readFileSync(cacheModelPath, { encoding: "utf-8" }), + {} + ) + : {}; + return availableModels[modelName]?.maxLength || 4096; + } + + promptWindowLimit() { + const availableModels = this.models(); + return availableModels[this.model]?.maxLength || 4096; + } + + async isValidChatCompletionModel(model = "") { + await this.#syncModels(); + const availableModels = this.models(); + return availableModels.hasOwnProperty(model); + } + + /** + * Generates appropriate content array for a message + attachments. + * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}} + * @returns {string|object[]} + */ + #generateContent({ userPrompt, attachments = [] }) { + if (!attachments.length) { + return userPrompt; + } + + const content = [{ type: "text", text: userPrompt }]; + for (let attachment of attachments) { + content.push({ + type: "image_url", + image_url: { + url: attachment.contentString, + detail: "auto", + }, + }); + } + return content.flat(); + } + + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + attachments = [], + }) { + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + return [ + prompt, + ...formatChatHistory(chatHistory, this.#generateContent), + { + role: "user", + content: this.#generateContent({ userPrompt, attachments }), + }, + ]; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `CometAPI chat: ${this.model} is not valid for chat completion!` + ); + + const result = await LLMPerformanceMonitor.measureAsyncFunction( + this.openai.chat.completions + .create({ + model: this.model, + messages, + temperature, + }) + .catch((e) => { + throw new Error(e.message); + }) + ); + + if ( + !result.output.hasOwnProperty("choices") || + result.output.choices.length === 0 + ) + return null; + + return { + textResponse: result.output.choices[0].message.content, + metrics: { + prompt_tokens: result.output.usage.prompt_tokens || 0, + completion_tokens: result.output.usage.completion_tokens || 0, + total_tokens: result.output.usage.total_tokens || 0, + outputTps: result.output.usage.completion_tokens / result.duration, + duration: result.duration, + }, + }; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `CometAPI chat: ${this.model} is not valid for chat completion!` + ); + + const measuredStreamRequest = await LLMPerformanceMonitor.measureStream( + this.openai.chat.completions.create({ + model: this.model, + stream: true, + messages, + temperature, + }), + messages + ); + return measuredStreamRequest; + } + + /** + * Handles the default stream response for a chat. + * @param {import("express").Response} response + * @param {import('../../helpers/chat/LLMPerformanceMonitor').MonitoredStream} stream + * @param {Object} responseProps + * @returns {Promise} + */ + handleStream(response, stream, responseProps) { + const timeoutThresholdMs = this.timeout; + const { uuid = uuidv4(), sources = [] } = responseProps; + + return new Promise(async (resolve) => { + let fullText = ""; + let lastChunkTime = null; // null when first token is still not received. + + // Establish listener to early-abort a streaming response + // in case things go sideways or the user does not like the response. + // We preserve the generated text but continue as if chat was completed + // to preserve previously generated content. + const handleAbort = () => { + stream?.endMeasurement({ + completion_tokens: LLMPerformanceMonitor.countTokens(fullText), + }); + clientAbortedHandler(resolve, fullText); + }; + response.on("close", handleAbort); + + // NOTICE: Not all CometAPI models will return a stop reason + // which keeps the connection open and so the model never finalizes the stream + // like the traditional OpenAI response schema does. So in the case the response stream + // never reaches a formal close state we maintain an interval timer that if we go >=timeoutThresholdMs with + // no new chunks then we kill the stream and assume it to be complete. CometAPI is quite fast + // so this threshold should permit most responses, but we can adjust `timeoutThresholdMs` if + // we find it is too aggressive. + const timeoutCheck = setInterval(() => { + if (lastChunkTime === null) return; + + const now = Number(new Date()); + const diffMs = now - lastChunkTime; + if (diffMs >= timeoutThresholdMs) { + this.log( + `CometAPI stream did not self-close and has been stale for >${timeoutThresholdMs}ms. Closing response stream.` + ); + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: "", + close: true, + error: false, + }); + clearInterval(timeoutCheck); + response.removeListener("close", handleAbort); + stream?.endMeasurement({ + completion_tokens: LLMPerformanceMonitor.countTokens(fullText), + }); + resolve(fullText); + } + }, 500); + + try { + for await (const chunk of stream) { + const message = chunk?.choices?.[0]; + const token = message?.delta?.content; + lastChunkTime = Number(new Date()); + + if (token) { + fullText += token; + writeResponseChunk(response, { + uuid, + sources: [], + type: "textResponseChunk", + textResponse: token, + close: false, + error: false, + }); + } + + if (message.finish_reason !== null) { + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: "", + close: true, + error: false, + }); + response.removeListener("close", handleAbort); + stream?.endMeasurement({ + completion_tokens: LLMPerformanceMonitor.countTokens(fullText), + }); + resolve(fullText); + } + } + } catch (e) { + writeResponseChunk(response, { + uuid, + sources, + type: "abort", + textResponse: null, + close: true, + error: e.message, + }); + response.removeListener("close", handleAbort); + stream?.endMeasurement({ + completion_tokens: LLMPerformanceMonitor.countTokens(fullText), + }); + resolve(fullText); + } + }); + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +/** + * Fetches available models from CometAPI and filters out non-chat models + * Based on cometapi.md specifications + */ +async function fetchCometApiModels() { + return await fetch(`https://api.cometapi.com/v1/models`, { + method: "GET", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${process.env.COMETAPI_LLM_API_KEY}`, + }, + }) + .then((res) => res.json()) + .then(({ data = [] }) => { + const models = {}; + + // Filter out non-chat models using patterns from cometapi.md + const chatModels = data.filter((model) => { + const modelId = model.id.toLowerCase(); + return !COMETAPI_IGNORE_PATTERNS.some((pattern) => + modelId.includes(pattern.toLowerCase()) + ); + }); + + chatModels.forEach((model) => { + models[model.id] = { + id: model.id, + name: model.id, // CometAPI has limited model info according to cometapi.md + organization: + model.id.split("/")[0] || model.id.split("-")[0] || "CometAPI", + maxLength: model.context_length || 4096, // Conservative default + }; + }); + + // Cache all response information + if (!fs.existsSync(cacheFolder)) + fs.mkdirSync(cacheFolder, { recursive: true }); + fs.writeFileSync( + path.resolve(cacheFolder, "models.json"), + JSON.stringify(models), + { + encoding: "utf-8", + } + ); + fs.writeFileSync( + path.resolve(cacheFolder, ".cached_at"), + String(Number(new Date())), + { + encoding: "utf-8", + } + ); + return models; + }) + .catch((e) => { + console.error("Error fetching CometAPI models:", e); + return {}; + }); +} + +module.exports = { + CometApiLLM, + fetchCometApiModels, +}; diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js index d6b22d3a..683850df 100644 --- a/server/utils/agents/aibitat/index.js +++ b/server/utils/agents/aibitat/index.js @@ -830,6 +830,8 @@ ${this.getHistory({ to: route.to }) return new Providers.GeminiProvider({ model: config.model }); case "dpais": return new Providers.DellProAiStudioProvider({ model: config.model }); + case "cometapi": + return new Providers.CometApiProvider({ model: config.model }); default: throw new Error( `Unknown provider: ${config.provider}. Please use a valid provider.` diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js index 07867e4c..c2528acd 100644 --- a/server/utils/agents/aibitat/providers/ai-provider.js +++ b/server/utils/agents/aibitat/providers/ai-provider.js @@ -251,6 +251,14 @@ class Provider { apiKey: null, ...config, }); + case "cometapi": + return new ChatOpenAI({ + configuration: { + baseURL: "https://api.cometapi.com/v1", + }, + apiKey: process.env.COMETAPI_LLM_API_KEY ?? null, + ...config, + }); default: throw new Error(`Unsupported provider ${provider} for this task.`); diff --git a/server/utils/agents/aibitat/providers/cometapi.js b/server/utils/agents/aibitat/providers/cometapi.js new file mode 100644 index 00000000..87eca7a0 --- /dev/null +++ b/server/utils/agents/aibitat/providers/cometapi.js @@ -0,0 +1,115 @@ +const OpenAI = require("openai"); +const Provider = require("./ai-provider.js"); +const InheritMultiple = require("./helpers/classes.js"); +const UnTooled = require("./helpers/untooled.js"); + +/** + * The agent provider for the CometAPI provider. + */ +class CometApiProvider extends InheritMultiple([Provider, UnTooled]) { + model; + + constructor(config = {}) { + const { model = "gpt-5-mini" } = config; + super(); + const client = new OpenAI({ + baseURL: "https://api.cometapi.com/v1", + apiKey: process.env.COMETAPI_LLM_API_KEY, + maxRetries: 3, + defaultHeaders: { + "HTTP-Referer": "https://anythingllm.com", + "X-CometAPI-Source": "anythingllm", + }, + }); + + this._client = client; + this.model = model; + this.verbose = true; + } + + get client() { + return this._client; + } + + async #handleFunctionCallChat({ messages = [] }) { + return await this.client.chat.completions + .create({ + model: this.model, + temperature: 0, + messages, + }) + .then((result) => { + if (!result.hasOwnProperty("choices")) + throw new Error("CometAPI chat: No results!"); + if (result.choices.length === 0) + throw new Error("CometAPI chat: No results length!"); + return result.choices[0].message.content; + }) + .catch((_) => { + return null; + }); + } + + /** + * Create a completion based on the received messages. + * + * @param messages A list of messages to send to the API. + * @param functions + * @returns The completion. + */ + async complete(messages, functions = []) { + let completion; + if (functions.length > 0) { + const { toolCall, text } = await this.functionCall( + messages, + functions, + this.#handleFunctionCallChat.bind(this) + ); + + if (toolCall !== null) { + this.providerLog(`Valid tool call found - running ${toolCall.name}.`); + this.deduplicator.trackRun(toolCall.name, toolCall.arguments); + return { + result: null, + functionCall: { + name: toolCall.name, + arguments: toolCall.arguments, + }, + cost: 0, + }; + } + completion = { content: text }; + } + + if (!completion?.content) { + this.providerLog("Will assume chat completion without tool call inputs."); + const response = await this.client.chat.completions.create({ + model: this.model, + messages: this.cleanMsgs(messages), + }); + completion = response.choices[0].message; + } + + // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent + // from calling the exact same function over and over in a loop within a single chat exchange + // _but_ we should enable it to call previously used tools in a new chat interaction. + this.deduplicator.reset("runs"); + return { + result: completion.content, + cost: 0, + }; + } + + /** + * Get the cost of the completion. + * + * @param _usage The completion to get the cost for. + * @returns The cost of the completion. + * Stubbed since CometAPI has no cost basis. + */ + getCost() { + return 0; + } +} + +module.exports = CometApiProvider; diff --git a/server/utils/agents/aibitat/providers/index.js b/server/utils/agents/aibitat/providers/index.js index 859ad9de..2146269b 100644 --- a/server/utils/agents/aibitat/providers/index.js +++ b/server/utils/agents/aibitat/providers/index.js @@ -24,6 +24,7 @@ const PPIOProvider = require("./ppio.js"); const GeminiProvider = require("./gemini.js"); const DellProAiStudioProvider = require("./dellProAiStudio.js"); const MoonshotAiProvider = require("./moonshotAi.js"); +const CometApiProvider = require("./cometapi.js"); module.exports = { OpenAIProvider, @@ -47,6 +48,7 @@ module.exports = { ApiPieProvider, XAIProvider, NovitaProvider, + CometApiProvider, NvidiaNimProvider, PPIOProvider, GeminiProvider, diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js index 4527ee78..46581d3c 100644 --- a/server/utils/agents/index.js +++ b/server/utils/agents/index.js @@ -204,6 +204,11 @@ class AgentHandler { throw new Error("Moonshot AI model must be set to use agents."); break; + case "cometapi": + if (!process.env.COMETAPI_LLM_API_KEY) + throw new Error("CometAPI API Key must be provided to use agents."); + break; + default: throw new Error( "No workspace agent provider set. Please set your agent provider in the workspace's settings" @@ -274,6 +279,8 @@ class AgentHandler { return process.env.GEMINI_LLM_MODEL_PREF ?? "gemini-2.0-flash-lite"; case "dpais": return process.env.DPAIS_LLM_MODEL_PREF; + case "cometapi": + return process.env.COMETAPI_LLM_MODEL_PREF ?? "gpt-5-mini"; default: return null; } diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index e0a1fb82..ea5e738c 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -8,6 +8,7 @@ const { parseLMStudioBasePath } = require("../AiProviders/lmStudio"); const { parseNvidiaNimBasePath } = require("../AiProviders/nvidiaNim"); const { fetchPPIOModels } = require("../AiProviders/ppio"); const { GeminiLLM } = require("../AiProviders/gemini"); +const { fetchCometApiModels } = require("../AiProviders/cometapi"); const SUPPORT_CUSTOM_MODELS = [ "openai", @@ -28,6 +29,7 @@ const SUPPORT_CUSTOM_MODELS = [ "deepseek", "apipie", "novita", + "cometapi", "xai", "gemini", "ppio", @@ -76,6 +78,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { return await getAPIPieModels(apiKey); case "novita": return await getNovitaModels(); + case "cometapi": + return await getCometApiModels(); case "xai": return await getXAIModels(apiKey); case "nvidia-nim": @@ -453,6 +457,20 @@ async function getNovitaModels() { return { models, error: null }; } +async function getCometApiModels() { + const knownModels = await fetchCometApiModels(); + if (!Object.keys(knownModels).length === 0) + return { models: [], error: null }; + const models = Object.values(knownModels).map((model) => { + return { + id: model.id, + organization: model.organization, + name: model.name, + }; + }); + return { models, error: null }; +} + async function getAPIPieModels(apiKey = null) { const knownModels = await fetchApiPieModels(apiKey); if (!Object.keys(knownModels).length === 0) diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index bff2873b..12327698 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -212,6 +212,9 @@ function getLLMProvider({ provider = null, model = null } = {}) { case "dpais": const { DellProAiStudioLLM } = require("../AiProviders/dellProAiStudio"); return new DellProAiStudioLLM(embedder, model); + case "cometapi": + const { CometApiLLM } = require("../AiProviders/cometapi"); + return new CometApiLLM(embedder, model); default: throw new Error( `ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}` @@ -362,6 +365,9 @@ function getLLMProviderClass({ provider = null } = {}) { case "moonshotai": const { MoonshotAiLLM } = require("../AiProviders/moonshotAi"); return MoonshotAiLLM; + case "cometapi": + const { CometApiLLM } = require("../AiProviders/cometapi"); + return CometApiLLM; default: return null; } @@ -430,6 +436,8 @@ function getBaseLLMProviderModel({ provider = null } = {}) { return process.env.DPAIS_LLM_MODEL_PREF; case "moonshotai": return process.env.MOONSHOT_AI_MODEL_PREF; + case "cometapi": + return process.env.COMETAPI_LLM_MODEL_PREF; default: return null; } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 124b4b4e..6dfbe4fc 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -704,6 +704,20 @@ const KEY_MAPPING = { envKey: "MOONSHOT_AI_MODEL_PREF", checks: [isNotEmpty], }, + + // CometAPI Options + CometApiLLMApiKey: { + envKey: "COMETAPI_LLM_API_KEY", + checks: [isNotEmpty], + }, + CometApiLLMModelPref: { + envKey: "COMETAPI_LLM_MODEL_PREF", + checks: [isNotEmpty], + }, + CometApiLLMTimeout: { + envKey: "COMETAPI_LLM_TIMEOUT_MS", + checks: [], + }, }; function isNotEmpty(input = "") { @@ -813,6 +827,7 @@ function supportedLLM(input = "") { "ppio", "dpais", "moonshotai", + "cometapi", ].includes(input); return validSelection ? null : `${input} is not a valid LLM provider.`; }