diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/AgentMenu/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/AgentMenu/index.jsx new file mode 100644 index 00000000..e5036604 --- /dev/null +++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/AgentMenu/index.jsx @@ -0,0 +1,116 @@ +import { useEffect, useRef, useState } from "react"; +import { Tooltip } from "react-tooltip"; +import { At } from "@phosphor-icons/react"; +import { useIsAgentSessionActive } from "@/utils/chat/agent"; +import { useTranslation } from "react-i18next"; +import { useSearchParams } from "react-router-dom"; + +export default function AvailableAgentsButton({ showing, setShowAgents }) { + const { t } = useTranslation(); + const agentSessionActive = useIsAgentSessionActive(); + if (agentSessionActive) return null; + return ( +
setShowAgents(!showing)} + className={`flex justify-center items-center cursor-pointer opacity-60 hover:opacity-100 light:opacity-100 light:hover:opacity-60 ${ + showing ? "!opacity-100" : "" + }`} + > + + +
+ ); +} + +export function AvailableAgents({ + showing, + setShowing, + sendCommand, + promptRef, + centered = false, +}) { + const formRef = useRef(null); + const agentSessionActive = useIsAgentSessionActive(); + const [searchParams] = useSearchParams(); + const { t } = useTranslation(); + + /* + * @checklist-item + * If the URL has the agent param, open the agent menu for the user + * automatically when the component mounts. + */ + useEffect(() => { + if (searchParams.get("action") === "set-agent-chat" && !showing) + handleAgentClick(); + }, [promptRef.current]); + + useEffect(() => { + function listenForOutsideClick() { + if (!showing || !formRef.current) return false; + document.addEventListener("click", closeIfOutside); + } + listenForOutsideClick(); + }, [showing, formRef.current]); + + const closeIfOutside = ({ target }) => { + if (target.id === "agent-list-btn") return; + const isOutside = !formRef?.current?.contains(target); + if (!isOutside) return; + setShowing(false); + }; + + const handleAgentClick = () => { + setShowing(false); + sendCommand({ text: "@agent " }); + promptRef?.current?.focus(); + }; + + if (agentSessionActive) return null; + return ( + <> + + + ); +} + +export function useAvailableAgents() { + const [showAgents, setShowAgents] = useState(false); + return { showAgents, setShowAgents }; +} diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/ToolsMenu/Tabs/AgentSkills/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/ToolsMenu/Tabs/AgentSkills/index.jsx index bff5d17d..f6f7a388 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/ToolsMenu/Tabs/AgentSkills/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/ToolsMenu/Tabs/AgentSkills/index.jsx @@ -17,8 +17,10 @@ import { useIsAgentSessionActive } from "@/utils/chat/agent"; export default function AgentSkillsTab({ highlightedIndex = -1, registerItemCount, + workspace, }) { const { t } = useTranslation(); + const { showAgentCommand = true } = workspace ?? {}; const agentSessionActive = useIsAgentSessionActive(); const defaultSkills = getDefaultSkills(t); const configurableSkills = getConfigurableSkills(t); @@ -27,6 +29,7 @@ export default function AgentSkillsTab({ const [importedSkills, setImportedSkills] = useState([]); const [flows, setFlows] = useState([]); const [loading, setLoading] = useState(true); + const showAgentCmdActivationAlert = showAgentCommand && !agentSessionActive; useEffect(() => { fetchSkillSettings(); @@ -147,7 +150,7 @@ export default function AgentSkillsTab({ return ( <> - {!agentSessionActive && ( + {showAgentCmdActivationAlert && (

{t("chat_window.use_agent_session_to_use_tools")}

diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/ToolsMenu/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/ToolsMenu/index.jsx index a5146702..20caef44 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/ToolsMenu/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/ToolsMenu/index.jsx @@ -29,6 +29,7 @@ function getTabs(t, user) { } /** + * @param {Workspace} props.workspace - the workspace object * @param {boolean} props.showing * @param {function} props.setShowing * @param {function} props.sendCommand @@ -36,6 +37,7 @@ function getTabs(t, user) { * @param {boolean} [props.centered] - when true, popup opens below the input */ export default function ToolsMenu({ + workspace, showing, setShowing, sendCommand, @@ -147,6 +149,7 @@ export default function ToolsMenu({ promptRef={promptRef} highlightedIndex={highlightedIndex} registerItemCount={registerItemCount} + workspace={workspace} /> diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx index 018c218b..6a3fa79f 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx @@ -24,6 +24,7 @@ export const PROMPT_INPUT_EVENT = "set_prompt_input"; const MAX_EDIT_STACK_SIZE = 100; /** + * @param {Workspace} props.workspace - workspace object * @param {function} props.submit - form submit handler * @param {boolean} props.isStreaming - disables input while streaming response * @param {function} props.sendCommand - handler for slash commands and agent mentions @@ -33,6 +34,7 @@ const MAX_EDIT_STACK_SIZE = 100; * @param {string} [props.threadSlug] - thread slug for home page context */ export default function PromptInput({ + workspace = {}, submit, isStreaming, sendCommand, @@ -42,6 +44,7 @@ export default function PromptInput({ threadSlug = null, }) { const { t } = useTranslation(); + const { showAgentCommand = true } = workspace ?? {}; const { isDisabled } = useIsDisabled(); const agentSessionActive = useIsAgentSessionActive(); const [promptInput, setPromptInput] = useState(""); @@ -329,6 +332,7 @@ export default function PromptInput({ >
ستحتاج إلى استخدام الأمر "@agent" لاستخدام الأدوات.', }, query: { title: "استعلام", - "desc-start": "سوف تقدم الإجابات", - only: "فقط", - "desc-end": "إذا وجد المستند في السياق", + description: + 'سيوفر الإجابات فقط إذا تم العثور على سياق الوثيقة.
ستحتاج إلى استخدام الأمر "@agent" لاستخدام الأدوات.', + }, + automatic: { + title: "سيارة", + description: + 'سيتم استخدام الأدوات تلقائيًا إذا كان النموذج ومزود الخدمة يدعمان استدعاء الأدوات الأصلية. إذا لم يتم دعم الأدوات الأصلية، فستحتاج إلى استخدام الأمر "@agent" لاستخدام الأدوات.', }, }, history: { diff --git a/frontend/src/locales/cs/common.js b/frontend/src/locales/cs/common.js index 18b6abc5..b2d1b6be 100644 --- a/frontend/src/locales/cs/common.js +++ b/frontend/src/locales/cs/common.js @@ -191,15 +191,18 @@ const TRANSLATIONS = { title: "Režim chatu", chat: { title: "Chat", - "desc-start": "bude poskytovat odpovědi s obecnými znalostmi LLM", - and: "a", - "desc-end": "kontext dokumentu, který je nalezen.", + description: + "poskytne odpovědi založené na obecných znalostech LLM a kontextu dokumentu, který je k dispozici.
Pro použití nástrojů budete muset použít příkaz @agent.", }, query: { title: "Dotaz", - "desc-start": "bude poskytovat odpovědi", - only: "pouze", - "desc-end": "pokud je nalezen kontext dokumentu.", + description: + "budou poskytovat odpovědi pouze, pokud je nalezen kontext dokumentu.
Pro použití nástrojů budete muset použít příkaz @agent.", + }, + automatic: { + title: "Auto", + description: + "automaticky použije nástroje, pokud to podporují jak model, tak poskytovatel. Pokud není podporováno nativní volání nástrojů, budete muset použít příkaz `@agent` pro použití nástrojů.", }, }, history: { diff --git a/frontend/src/locales/da/common.js b/frontend/src/locales/da/common.js index ea57718c..23e43d10 100644 --- a/frontend/src/locales/da/common.js +++ b/frontend/src/locales/da/common.js @@ -177,15 +177,18 @@ const TRANSLATIONS = { title: "Chat-tilstand", chat: { title: "Chat", - "desc-start": "vil give svar baseret på LLM'ens generelle viden", - and: "og", - "desc-end": "dokumentkontekst der findes.", + description: + 'vil give svar baseret på LLM\'s generelle viden og den relevante kontekst fra dokumentet. Du skal bruge kommandoen "@agent" for at bruge værktøjerne.', }, query: { title: "Forespørgsel", - "desc-start": "vil give svar", - only: "kun", - "desc-end": "hvis dokumentkontekst findes.", + description: + "vil give svar kun, hvis dokumentets kontekst er fundet.
Du skal bruge kommandoen @agent for at bruge værktøjerne.", + }, + automatic: { + title: "Bil", + description: + 'vil automatisk bruge værktøjer, hvis modellen og leverandøren understøtter native værktøjskald.
Hvis native værktøjskald ikke understøttes, skal du bruge kommandoen "@agent" for at bruge værktøjer.', }, }, history: { diff --git a/frontend/src/locales/de/common.js b/frontend/src/locales/de/common.js index 7591d425..0c5dc8ba 100644 --- a/frontend/src/locales/de/common.js +++ b/frontend/src/locales/de/common.js @@ -184,15 +184,18 @@ const TRANSLATIONS = { title: "Chat-Modus", chat: { title: "Chat", - "desc-start": "wird Antworten mit dem allgemeinen Wissen des LLM", - and: "und", - "desc-end": "gefundenem Dokumentenkontext liefern.", + description: + "wird Antworten basierend auf dem allgemeinen Wissen des LLM und dem relevanten Kontext aus den Dokumenten und liefern.
Sie benötigen den Befehl `@agent`, um die Tools zu nutzen.", }, query: { title: "Abfrage", - "desc-start": "wird Antworten", - only: "nur", - "desc-end": "liefern, wenn Dokumentenkontext gefunden wird.", + description: + 'wird nur Antworten und bereitstellen, falls der Kontext des Dokuments gefunden wurde.
Sie müssen den Befehl "@agent" verwenden, um die Tools zu nutzen.', + }, + automatic: { + title: "Auto", + description: + 'wird automatisch Werkzeuge verwenden, wenn das Modell und der Anbieter native Werkzeugaufrufe unterstützen.
Wenn native Werkzeugaufrufe nicht unterstützt werden, müssen Sie den Befehl "@agent" verwenden, um Werkzeuge zu nutzen.', }, }, history: { diff --git a/frontend/src/locales/en/common.js b/frontend/src/locales/en/common.js index 67306800..55e6f43c 100644 --- a/frontend/src/locales/en/common.js +++ b/frontend/src/locales/en/common.js @@ -188,17 +188,20 @@ const TRANSLATIONS = { }, mode: { title: "Chat mode", + automatic: { + title: "Auto", + description: + "will automatically use tools if the model and provider support native tool calling.
If native tooling is not supported, you will need to use the @agent command to use tools.", + }, chat: { title: "Chat", - "desc-start": "will provide answers with the LLM's general knowledge", - and: "and", - "desc-end": "document context that is found.", + description: + "will provide answers with the LLM's general knowledge and document context that is found.
You will need to use the @agent command to use tools.", }, query: { title: "Query", - "desc-start": "will provide answers", - only: "only", - "desc-end": "if document context is found.", + description: + "will provide answers only if document context is found.
You will need to use the @agent command to use tools.", }, }, history: { diff --git a/frontend/src/locales/es/common.js b/frontend/src/locales/es/common.js index 882054e0..2ec4fd7a 100644 --- a/frontend/src/locales/es/common.js +++ b/frontend/src/locales/es/common.js @@ -185,16 +185,18 @@ const TRANSLATIONS = { title: "Modo de chat", chat: { title: "Chat", - "desc-start": - "proporcionará respuestas con el conocimiento general del LLM", - and: "y", - "desc-end": "el contexto del documento que se encuentre.", + description: + 'proporcionará respuestas basándose en el conocimiento general del LLM y en el contexto del documento que se encuentre disponible. Para utilizar las herramientas, deberá utilizar el comando "@agent".', }, query: { title: "Consulta", - "desc-start": "proporcionará respuestas", - only: "solo", - "desc-end": "si se encuentra contexto del documento.", + description: + 'proporcionará respuestas solo si se encuentra el contexto del documento.
Deberá utilizar el comando "@agent" para utilizar las herramientas.', + }, + automatic: { + title: "Coche", + description: + 'Utilizará automáticamente las herramientas si el modelo y el proveedor admiten la llamada a herramientas nativas. Si no se admiten las herramientas nativas, deberá utilizar el comando "@agent" para utilizar las herramientas.', }, }, history: { diff --git a/frontend/src/locales/et/common.js b/frontend/src/locales/et/common.js index 2cefa614..38b99031 100644 --- a/frontend/src/locales/et/common.js +++ b/frontend/src/locales/et/common.js @@ -181,15 +181,18 @@ const TRANSLATIONS = { title: "Vestlusrežiim", chat: { title: "Vestlus", - "desc-start": "annab vastuseid LLM-i üldteadmistest", - and: "ja", - "desc-end": "leitud dokumendikontekstist.", + description: + 'teenab vastuseid, kasutades LLM-i üldist teadmist ja dokumentide konteksti, mida on leitav.
Selleks peate kasutama käsku "@agent".', }, query: { title: "Päring", - "desc-start": "annab vastuseid", - only: "ainult", - "desc-end": "kui leitakse dokumendikontekst.", + description: + 'teenib vastuseid ainult__, kui dokumendi kontekst on leitud. Vajate kasutama käesu "agent", et kasutada tööriime.', + }, + automatic: { + title: "Automaailm", + description: + 'kasutab automaatselt tööriistu, kui mudel ja pakkuja toetavad native tööriistade kasutamist.
Kui native tööriistade kasutamine pole toetatud, peate kasutama käsku "@agent", et tööriiste kasutada.', }, }, history: { diff --git a/frontend/src/locales/fa/common.js b/frontend/src/locales/fa/common.js index 3cb339ee..b11ad147 100644 --- a/frontend/src/locales/fa/common.js +++ b/frontend/src/locales/fa/common.js @@ -176,15 +176,18 @@ const TRANSLATIONS = { title: "حالت گفتگو", chat: { title: "گفتگو", - "desc-start": "پاسخ‌ها را با دانش عمومی LLM", - and: "و", - "desc-end": "محتوای اسناد یافت شده ارائه می‌دهد.", + description: + "با استفاده از دانش عمومی مدل زبانی و اطلاعات موجود در سند، پاسخ‌ها را ارائه خواهد داد. برای استفاده از ابزارها، باید از دستور @agent استفاده کنید.", }, query: { title: "پرس‌وجو", - "desc-start": "پاسخ‌ها را", - only: "فقط", - "desc-end": "در صورت یافتن محتوای اسناد ارائه می‌دهد.", + description: + "پاسخ‌ها را تنها در صورت یافتن زمینه سند ارائه می‌دهد. برای استفاده از ابزارها، باید از دستور @agent استفاده کنید.", + }, + automatic: { + title: "خودرو", + description: + "اگر مدل و ارائه‌دهنده از فراخوانی ابزار به صورت پیش‌فرض پشتیبانی کنند، ابزارها به‌طور خودکار استفاده خواهند شد.
در صورتی که فراخوانی ابزار به صورت پیش‌فرض پشتیبانی نشود، شما باید از دستور @agent برای استفاده از ابزارها استفاده کنید.", }, }, history: { diff --git a/frontend/src/locales/fr/common.js b/frontend/src/locales/fr/common.js index decef6b9..68472588 100644 --- a/frontend/src/locales/fr/common.js +++ b/frontend/src/locales/fr/common.js @@ -177,16 +177,18 @@ const TRANSLATIONS = { title: "Mode de chat", chat: { title: "Chat", - "desc-start": - "fournira des réponses avec les connaissances générales du LLM", - and: "et", - "desc-end": "le contexte du document trouvé.", + description: + 'fournira des réponses en utilisant les connaissances générales du LLM et le contexte du document correspondant.
Vous devrez utiliser la commande "@agent" pour utiliser les outils.', }, query: { title: "Requête", - "desc-start": "fournira des réponses", - only: "uniquement", - "desc-end": "si un contexte de document est trouvé.", + description: + "fournira des réponses uniquement si le contexte du document est trouvé.
Vous devrez utiliser la commande @agent pour utiliser les outils.", + }, + automatic: { + title: "Voiture", + description: + "utilisera automatiquement les outils si le modèle et le fournisseur prennent en charge l'appel de outils natifs.
Si l'utilisation d'outils natifs n'est pas prise en charge, vous devrez utiliser la commande \"@agent\" pour utiliser les outils.", }, }, history: { diff --git a/frontend/src/locales/he/common.js b/frontend/src/locales/he/common.js index e43c2461..ccd5075f 100644 --- a/frontend/src/locales/he/common.js +++ b/frontend/src/locales/he/common.js @@ -179,15 +179,18 @@ const TRANSLATIONS = { title: "מצב צ'אט", chat: { title: "צ'אט", - "desc-start": "יספק תשובות עם הידע הכללי של מודל השפה", - and: "וכן", - "desc-end": "מהקשר המסמכים שנמצא.", + description: + 'יוכל לספק תשובות בהתבסס על הידע הכללי של ה-LLM ועל ההקשר הרלוונטי מתוך המסמך. ו-\nתצטרכו להשתמש בפקודה "@agent" כדי להשתמש בכלי.', }, query: { title: "שאילתה", - "desc-start": "יספק תשובות", - only: "רק", - "desc-end": "אם נמצא הקשר במסמכים.", + description: + "יספק תשובות רקבמידה ויהיה ניתן למצוא הקשר של המסמך.
תצטרכו להשתמש בפקודה @agent כדי להשתמש בכלי.", + }, + automatic: { + title: "רכב", + description: + 'הכלי ישתמש באופן אוטומטי בכלים אם המודל והספק תומכים בהם.
אם אין תמיכה בכלים מקומיים, תצטרכו להשתמש בפקודה "@agent" כדי להשתמש בכלים.', }, }, history: { diff --git a/frontend/src/locales/it/common.js b/frontend/src/locales/it/common.js index e19a0e69..36c72b8b 100644 --- a/frontend/src/locales/it/common.js +++ b/frontend/src/locales/it/common.js @@ -178,15 +178,18 @@ const TRANSLATIONS = { title: "Modalità chat", chat: { title: "Chat", - "desc-start": "fornirà risposte con la conoscenza generale dell'LLM", - and: "e", - "desc-end": "contesto documentale associato.", + description: + "fornirà risposte basate sulla conoscenza generale del modello LLM e sul contesto del documento e che è disponibile.
Per utilizzare gli strumenti, sarà necessario utilizzare il comando @agent.", }, query: { title: "Query", - "desc-start": "fornirà risposte", - only: "solo", - "desc-end": "se sarà presente un contesto documentale", + description: + 'fornirà risposte solo se il contesto del documento viene trovato. Per utilizzare gli strumenti, sarà necessario utilizzare il comando "@agent".', + }, + automatic: { + title: "Auto", + description: + 'utilizzerà automaticamente gli strumenti se il modello e il fornitore supportano la chiamata nativa agli strumenti.
Se la chiamata nativa agli strumenti non è supportata, sarà necessario utilizzare il comando "@agent" per utilizzare gli strumenti.', }, }, history: { diff --git a/frontend/src/locales/ja/common.js b/frontend/src/locales/ja/common.js index 64f297b4..d7382458 100644 --- a/frontend/src/locales/ja/common.js +++ b/frontend/src/locales/ja/common.js @@ -175,15 +175,18 @@ const TRANSLATIONS = { title: "チャットモード", chat: { title: "チャット", - "desc-start": "LLMの一般知識で回答します", - and: "および", - "desc-end": "見つかったドキュメントコンテキストを使用します。", + description: + "LLMの一般的な知識と、関連するドキュメントの文脈に基づいて、回答を提供します。ツールを使用するには、`@agent`コマンドを使用する必要があります。", }, query: { title: "クエリ", - "desc-start": "回答を提供します", - only: "のみ", - "desc-end": "ドキュメントコンテキストが見つかった場合のみ。", + description: + "該当する情報が見つかった場合、回答をのみ提供します。ツールを使用するには、@agentコマンドを使用する必要があります。", + }, + automatic: { + title: "自動車", + description: + "ネイティブなツール呼び出しをサポートしている場合、モデルとプロバイダーが自動的にツールを使用します。
ネイティブなツール呼び出しがサポートされていない場合は、@agentコマンドを使用してツールを使用する必要があります。", }, }, history: { diff --git a/frontend/src/locales/ko/common.js b/frontend/src/locales/ko/common.js index bf0dfa42..a86b98da 100644 --- a/frontend/src/locales/ko/common.js +++ b/frontend/src/locales/ko/common.js @@ -180,15 +180,18 @@ const TRANSLATIONS = { title: "채팅 모드", chat: { title: "채팅", - "desc-start": "문서 내용을 찾습니다.", - and: "그리고", - "desc-end": "LLM의 일반 지식을 같이 사용하여 답변을 제공합니다", + description: + "LLM의 일반적인 지식과 관련 문맥 정보를 활용하여 답변을 제공합니다. 도구를 사용하려면 @agent 명령어를 사용해야 합니다.", }, query: { title: "쿼리", - "desc-start": "문서 컨텍스트를 찾을 ", - only: "때만", - "desc-end": "답변을 제공합니다.", + description: + "문서 맥락이 발견되면 에만 답변을 제공합니다.
도구를 사용하려면 @agent 명령을 사용해야 합니다.", + }, + automatic: { + title: "자동", + description: + "모델과 제공업체가 네이티브 도구 호출을 지원하는 경우, 자동으로 도구를 사용합니다.
네이티브 도구 호출이 지원되지 않는 경우, 도구를 사용하려면 @agent 명령을 사용해야 합니다.", }, }, history: { diff --git a/frontend/src/locales/lv/common.js b/frontend/src/locales/lv/common.js index f9ffd1d6..77c86816 100644 --- a/frontend/src/locales/lv/common.js +++ b/frontend/src/locales/lv/common.js @@ -183,15 +183,18 @@ const TRANSLATIONS = { title: "Sarunas režīms", chat: { title: "Saruna", - "desc-start": "sniegs atbildes ar LLM vispārējām zināšanām", - and: "un", - "desc-end": "dokumentu kontekstu, kas tiek atrasts.", + description: + 'sniedz atbildes, izmantojot LLM vispārīgo zināšanu un dokumenta kontekstu, kas ir pieejams.
Lai izmantotu rīkus, jums jāizmantojat komandu "@agent".', }, query: { title: "Vaicājums", - "desc-start": "sniegs atbildes", - only: "tikai", - "desc-end": "ja tiek atrasts dokumentu konteksts.", + description: + 'sniedz atbildes tikai , ja dokumenta konteksts ir atrasts.
Lai izmantotu rīkus, jums būs jāizmanto komanda "@agent".', + }, + automatic: { + title: "Automobiļs", + description: + 'automātiski izmantos rīkus, ja modelis un sniedzējs atbalsta vietējo rīku izmantošanu.
Ja vietējā rīku izmantošana netiek atbalstīta, jums būs jāizmantojas "@agent" komanda, lai izmantotu rīkus.', }, }, history: { diff --git a/frontend/src/locales/nl/common.js b/frontend/src/locales/nl/common.js index 2943f2b9..d350cfb9 100644 --- a/frontend/src/locales/nl/common.js +++ b/frontend/src/locales/nl/common.js @@ -176,15 +176,18 @@ const TRANSLATIONS = { title: "Chatmodus", chat: { title: "Chat", - "desc-start": "zal antwoorden geven met de algemene kennis van de LLM", - and: "en", - "desc-end": "documentcontext die wordt gevonden.", + description: + "zal antwoorden geven met de algemene kennis van het LLM en de relevante context uit het document. U moet het `@agent`-commando gebruiken om tools te gebruiken.", }, query: { title: "Query", - "desc-start": "zal antwoorden geven", - only: "alleen", - "desc-end": "als documentcontext wordt gevonden.", + description: + "zal antwoorden alleen geven, indien de context van het document wordt gevonden.
U moet het commando @agent gebruiken om tools te gebruiken.", + }, + automatic: { + title: "Auto", + description: + "zal automatisch tools gebruiken als het model en de provider native tool-aanroepen ondersteunen.
Als native tooling niet wordt ondersteund, moet u het `@agent`-commando gebruiken om tools te gebruiken.", }, }, history: { diff --git a/frontend/src/locales/pl/common.js b/frontend/src/locales/pl/common.js index ab9ba831..efea7580 100644 --- a/frontend/src/locales/pl/common.js +++ b/frontend/src/locales/pl/common.js @@ -183,16 +183,18 @@ const TRANSLATIONS = { title: "Tryb czatu", chat: { title: "Czat", - "desc-start": "dostarczy odpowiedzi na podstawie wiedzy ogólnej LLM", - and: "oraz", - "desc-end": " znalezionym kontekście (dokumenty, źródła danych)", + description: + "zapewni odpowiedzi, wykorzystując ogólną wiedzę LLM oraz kontekst dokumentu, w którym ta wiedza znajduje się.
Będziesz musiał użyć komendy `@agent` w celu korzystania z narzędzi.", }, query: { title: "Zapytanie (wyszukiwanie)", - "desc-start": "dostarczy odpowiedzi", - only: "tylko", - "desc-end": - "na podstawie znalezionego kontekstu (dokumenty, źródła danych) - w przeciwnym razie odmówi odpowiedzi.", + description: + "będzie dostarczać odpowiedzi tylko, jeśli zostanie zidentyfikowany kontekst dokumentu.
Będziesz musiał użyć komendy `@agent` w celu korzystania z narzędzi.", + }, + automatic: { + title: "Samochód", + description: + "automatycznie będzie wykorzystywał narzędzia, jeśli model i dostawca obsługują natywne wywoływanie narzędzi. Jeśli natywne narzędzia nie są obsługiwane, konieczne będzie użycie polecenia `@agent` w celu wykorzystania narzędzi.", }, }, history: { diff --git a/frontend/src/locales/pt_BR/common.js b/frontend/src/locales/pt_BR/common.js index ebfc9dd7..1d5fd71a 100644 --- a/frontend/src/locales/pt_BR/common.js +++ b/frontend/src/locales/pt_BR/common.js @@ -183,15 +183,18 @@ const TRANSLATIONS = { title: "Modo de Chat", chat: { title: "Chat", - "desc-start": "fornecerá respostas com conhecimento geral do LLM", - and: "e", - "desc-end": "contexto dos documentos encontrados.", + description: + 'fornecerá respostas com base no conhecimento geral do LLM e no contexto do documento encontrado.
Você precisará usar o comando "@agent" para utilizar as ferramentas.', }, query: { title: "Consulta", - "desc-start": "fornecerá respostas", - only: "apenas", - "desc-end": "se contexto for encontrado nos documentos.", + description: + 'fornecerá respostas apenas caso o contexto do documento seja encontrado.
Você precisará usar o comando "@agent" para utilizar as ferramentas.', + }, + automatic: { + title: "Automóvel", + description: + 'utilizará automaticamente as ferramentas, se o modelo e o provedor suportarem a chamada nativa de ferramentas. Se a chamada nativa de ferramentas não for suportada, você precisará usar o comando "@agent" para utilizar as ferramentas.', }, }, history: { diff --git a/frontend/src/locales/ro/common.js b/frontend/src/locales/ro/common.js index 49fa53ad..5e2b824d 100644 --- a/frontend/src/locales/ro/common.js +++ b/frontend/src/locales/ro/common.js @@ -185,16 +185,18 @@ const TRANSLATIONS = { title: "Mod chat", chat: { title: "Chat", - "desc-start": - "oferă răspunsuri bazate pe cunoștințele generale ale LLM-ului", - and: "și", - "desc-end": "context document care este găsit.", + description: + 'va oferi răspunsuri folosind cunoștințele generale ale modelului LLM și contextul documentului respectiv.
Va trebui să utilizați comanda "@agent" pentru a utiliza instrumentele.', }, query: { title: "Interogare", - "desc-start": "oferă răspunsuri", - only: "doar", - "desc-end": "dacă contextul documentului este găsit.", + description: + 'vor oferi răspunsuri **doar** dacă contextul documentului este identificat. Veți avea nevoie să utilizați comanda "@agent" pentru a utiliza instrumentele.', + }, + automatic: { + title: "Mașină", + description: + 'va utiliza automat instrumentele, dacă modelul și furnizorul suportă apelarea nativă a instrumentelor.
Dacă apelarea nativă a instrumentelor nu este suportată, veți avea nevoie să utilizați comanda "@agent" pentru a utiliza instrumentele.', }, }, history: { diff --git a/frontend/src/locales/ru/common.js b/frontend/src/locales/ru/common.js index bb9473ef..93ea56bb 100644 --- a/frontend/src/locales/ru/common.js +++ b/frontend/src/locales/ru/common.js @@ -176,15 +176,18 @@ const TRANSLATIONS = { title: "Режим чата", chat: { title: "Чат", - "desc-start": "будет предоставлять ответы с общей информацией LLM", - and: "и", - "desc-end": "найденный контекст документов.", + description: + "предоставит ответы, используя общие знания, содержащиеся в LLM, и контекст документа, который был предоставлен.
Для использования инструментов необходимо использовать команду @agent.", }, query: { title: "Запрос", - "desc-start": "будет предоставлять ответы", - only: "только", - "desc-end": "если найден контекст документов.", + description: + "предоставит ответы только в том случае, если будет найден контекст документа.Для использования инструментов необходимо использовать команду @agent.", + }, + automatic: { + title: "Авто", + description: + "автоматически будет использовать инструменты, если модель и поставщик поддерживают вызов инструментов.
Если вызов инструментов не поддерживается, вам потребуется использовать команду `@agent` для использования инструментов.", }, }, history: { diff --git a/frontend/src/locales/tr/common.js b/frontend/src/locales/tr/common.js index 96e5e1d4..ca0cad0f 100644 --- a/frontend/src/locales/tr/common.js +++ b/frontend/src/locales/tr/common.js @@ -176,15 +176,18 @@ const TRANSLATIONS = { title: "Sohbet Modu", chat: { title: "Sohbet", - "desc-start": "LLM'nin genel bilgisiyle yanıtlar sunar", - and: "ve", - "desc-end": "bulunan belge bağlamını ekler.", + description: + "LLM'nin genel bilgisi ve bulunan doküman bağlamıyla cevaplar sunacaktır. Araçları kullanmak için @agent komutunu kullanmanız gerekecektir.", }, query: { title: "Sorgu", - "desc-start": "yanıtları", - only: "sadece", - "desc-end": "belge bağlamı bulunduğunda sunar.", + description: + "yalnızca doküman bağlamı bulunursa yanıtlar sağlayacaktır.İhtiyaç duyacağınız araçları kullanmak için @agent komutunu kullanmanız gerekecektir.", + }, + automatic: { + title: "Oto", + description: + "
Varsa, model ve sağlayıcı tarafından desteklenen yerel araçları otomatik olarak kullanacaktır. Yerel araç kullanımı desteklenmiyorsa, araçları kullanmak için @agent komutunu kullanmanız gerekecektir.", }, }, history: { diff --git a/frontend/src/locales/vn/common.js b/frontend/src/locales/vn/common.js index be950076..efde4c5e 100644 --- a/frontend/src/locales/vn/common.js +++ b/frontend/src/locales/vn/common.js @@ -176,15 +176,18 @@ const TRANSLATIONS = { title: "Chế độ trò chuyện", chat: { title: "Trò chuyện", - "desc-start": "sẽ cung cấp câu trả lời với kiến thức chung của LLM", - and: "và", - "desc-end": "ngữ cảnh tài liệu được tìm thấy.", + description: + "sẽ cung cấp câu trả lời dựa trên kiến thức chung của LLM và ngữ cảnh tài liệu được cung cấp.
Bạn cần sử dụng lệnh @agent để sử dụng các công cụ.", }, query: { title: "Truy vấn", - "desc-start": "sẽ cung cấp câu trả lời", - only: "chỉ", - "desc-end": "khi tìm thấy ngữ cảnh tài liệu.", + description: + "sẽ cung cấp câu trả lời chỉ khi ngữ cảnh của tài liệu được tìm thấy.
Bạn cần sử dụng lệnh @agent để sử dụng các công cụ.", + }, + automatic: { + title: "Tự động", + description: + "sẽ tự động sử dụng các công cụ nếu mô hình và nhà cung cấp hỗ trợ gọi công cụ gốc.
Nếu không hỗ trợ gọi công cụ gốc, bạn sẽ cần sử dụng lệnh `@agent` để sử dụng các công cụ.", }, }, history: { diff --git a/frontend/src/locales/zh/common.js b/frontend/src/locales/zh/common.js index 8d0505a6..cec21fd4 100644 --- a/frontend/src/locales/zh/common.js +++ b/frontend/src/locales/zh/common.js @@ -177,15 +177,18 @@ const TRANSLATIONS = { title: "聊天模式", chat: { title: "聊天", - "desc-start": "将提供 LLM 的一般知识", - and: "和", - "desc-end": "找到的文档上下文的答案。", + description: + "将提供答案,利用LLM的通用知识和相关文档的上下文信息。您需要使用 `@agent` 命令来使用工具。", }, query: { title: "查询", - "desc-start": "将会提供答案", - only: "仅当", - "desc-end": "找到文档上下文时。", + description: + "将在找到文档上下文时提供答案 仅限。您需要使用 @agent 命令来使用工具。", + }, + automatic: { + title: "自动", + description: + "如果模型和提供商支持原生工具调用,则会自动使用这些工具。
如果不支持原生工具调用,则需要使用 `@agent` 命令来使用工具。", }, }, history: { diff --git a/frontend/src/locales/zh_TW/common.js b/frontend/src/locales/zh_TW/common.js index 08a3fba6..6bde4bd9 100644 --- a/frontend/src/locales/zh_TW/common.js +++ b/frontend/src/locales/zh_TW/common.js @@ -169,15 +169,18 @@ const TRANSLATIONS = { title: "對話模式", chat: { title: "對話", - "desc-start": "會結合 LLM 的一般知識", - and: "以及", - "desc-end": "已找到的文件內容來回答。", + description: + "將提供答案,利用 LLM 的一般知識和相關文件內容。您需要使用 `@agent` 命令來使用工具。", }, query: { title: "查詢", - "desc-start": "會", - only: "只", - "desc-end": "在找到文件內容時回答。", + description: + "將提供答案,僅在找到文件上下文時 。您需要使用 @agent 指令來使用工具。", + }, + automatic: { + title: "自動", + description: + "如果模型和供應商支援原生工具調用,則系統會自動使用這些工具。
如果原生工具調用不受支援,您需要使用 `@agent` 命令來使用工具。", }, }, history: { diff --git a/frontend/src/models/workspace.js b/frontend/src/models/workspace.js index a2799da1..b36788cb 100644 --- a/frontend/src/models/workspace.js +++ b/frontend/src/models/workspace.js @@ -569,6 +569,27 @@ const Workspace = { return response; }, + /** + * Checks if the agent command is available for a workspace + * by checking if the workspace's agent provider supports native tool calling. + * + * This can be model specific or enabled via ENV flag. + * @param {string} slug - workspace slug + * @returns {Promise<{showAgentCommand: boolean}>} + */ + agentCommandAvailable: async function (slug = null) { + if (!slug) return { showAgentCommand: true }; + return await fetch( + `${API_BASE}/workspace/${slug}/is-agent-command-available`, + { headers: baseHeaders() } + ) + .then((res) => res.json()) + .catch((e) => { + console.error(e); + return { showAgentCommand: true }; + }); + }, + threads: WorkspaceThread, }; diff --git a/frontend/src/pages/Main/Home/index.jsx b/frontend/src/pages/Main/Home/index.jsx index 60a9d947..6ccf7545 100644 --- a/frontend/src/pages/Main/Home/index.jsx +++ b/frontend/src/pages/Main/Home/index.jsx @@ -64,11 +64,15 @@ export default function Home() { async function init() { const ws = await getTargetWorkspace(); if (ws) { - const [suggestedMessages, pfpUrl] = await Promise.all([ + const [suggestedMessages, { showAgentCommand }] = await Promise.all([ Workspace.getSuggestedMessages(ws.slug), - Workspace.fetchPfp(ws.slug), + Workspace.agentCommandAvailable(ws.slug), ]); - setWorkspace({ ...ws, suggestedMessages, pfpUrl }); + setWorkspace({ + ...ws, + suggestedMessages, + showAgentCommand, + }); } setWorkspaceLoading(false); } @@ -289,6 +293,7 @@ function HomeContent({ workspace, setWorkspace, threadSlug, setThreadSlug }) { {t("main-page.greeting")}
@@ -14,6 +16,17 @@ export default function ChatModeSelection({ workspace, setHasChanges }) {
+ @@ -32,29 +45,31 @@ export default function ChatModeSelection({ workspace, setHasChanges }) { setChatMode("query"); setHasChanges(true); }} - className="transition-bg duration-200 px-6 py-1 text-md text-white/60 disabled:text-white bg-transparent disabled:bg-[#687280] rounded-md" + className="border-none transition-bg duration-200 px-6 py-1 text-md text-white/60 disabled:text-white bg-transparent disabled:bg-[#687280] rounded-md hover:bg-white/10 light:hover:bg-black/10" > {t("chat.mode.query.title")}
-

- {chatMode === "chat" ? ( - <> - {t("chat.mode.chat.title")}{" "} - {t("chat.mode.chat.desc-start")}{" "} - {t("chat.mode.chat.and")}{" "} - {t("chat.mode.chat.desc-end")} - - ) : ( - <> - {t("chat.mode.query.title")}{" "} - {t("chat.mode.query.desc-start")}{" "} - {t("chat.mode.query.only")}{" "} - {t("chat.mode.query.desc-end")} - - )} -

+
); } + +/** + * A component that displays the explanation for a given chat mode. + * @param {'automatic' | 'chat' | 'query'} chatMode - The chat mode to display the explanation for. + * @returns {JSX.Element} The component to display the explanation for the given chat mode. + */ +function ChatModeExplanation({ chatMode = "chat" }) { + const { t } = useTranslation(); + return ( +

+ {t(`chat.mode.${chatMode}.title`)}{" "} + , br:
}} + /> +

+ ); +} diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index 040fe4af..c2e4ef28 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -598,13 +598,13 @@ function apiWorkspaceEndpoints(app) { #swagger.tags = ['Workspaces'] #swagger.description = 'Execute a chat with a workspace' #swagger.requestBody = { - description: 'Send a prompt to the workspace and the type of conversation (query or chat).
Query: Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.
Chat: Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.
Attachments: Can include images and documents.
     Document attachments: must have the mime type application/anythingllm-document - otherwise it will be passed to the LLM as an image and may fail to process. This uses the built-in document processor to first parse the document to text before injecting it into the context window.', + description: 'Send a prompt to the workspace and the type of conversation (automatic, query or chat).
Query: Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.
Automatic: Will use tool-calling if the provider supports native tool calling without needing to invoke @agent.
Chat: Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.
Attachments: Can include images and documents.
     Document attachments: must have the mime type application/anythingllm-document - otherwise it will be passed to the LLM as an image and may fail to process. This uses the built-in document processor to first parse the document to text before injecting it into the context window.', required: true, content: { "application/json": { example: { message: "What is AnythingLLM?", - mode: "query | chat", + mode:"automatic | query | chat", sessionId: "identifier-to-partition-chats-by-external-id", attachments: [ { @@ -728,13 +728,13 @@ function apiWorkspaceEndpoints(app) { #swagger.tags = ['Workspaces'] #swagger.description = 'Execute a streamable chat with a workspace' #swagger.requestBody = { - description: 'Send a prompt to the workspace and the type of conversation (query or chat).
Query: Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.
Chat: Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.
Attachments: Can include images and documents.
     Document attachments: must have the mime type application/anythingllm-document - otherwise it will be passed to the LLM as an image and may fail to process. This uses the built-in document processor to first parse the document to text before injecting it into the context window.', + description: 'Send a prompt to the workspace and the type of conversation (automatic, query or chat).
Query: Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.
Automatic: Will use tool-calling if the provider supports native tool calling without needing to invoke @agent.
Chat: Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.
Attachments: Can include images and documents.
     Document attachments: must have the mime type application/anythingllm-document - otherwise it will be passed to the LLM as an image and may fail to process. This uses the built-in document processor to first parse the document to text before injecting it into the context window.', required: true, content: { "application/json": { example: { message: "What is AnythingLLM?", - mode: "query | chat", + mode: "automatic | query | chat", sessionId: "identifier-to-partition-chats-by-external-id", attachments: [ { diff --git a/server/endpoints/workspaces.js b/server/endpoints/workspaces.js index e6d77a0e..301ea1ff 100644 --- a/server/endpoints/workspaces.js +++ b/server/endpoints/workspaces.js @@ -1059,6 +1059,23 @@ function workspaceEndpoints(app) { } ); + app.get( + "/workspace/:slug/is-agent-command-available", + [validatedRequest, flexUserRoleValid([ROLES.all]), validWorkspaceSlug], + async (_, response) => { + try { + response.status(200).json({ + showAgentCommand: await Workspace.isAgentCommandAvailable( + response.locals.workspace + ), + }); + } catch (error) { + console.error("Error checking if agent command is available:", error); + response.status(500).json({ showAgentCommand: true }); + } + } + ); + // Parsed Files in separate endpoint just to keep the workspace endpoints clean workspaceParsedFilesEndpoints(app); } diff --git a/server/models/workspace.js b/server/models/workspace.js index 219030df..a5e85d35 100644 --- a/server/models/workspace.js +++ b/server/models/workspace.js @@ -33,6 +33,7 @@ function isNullOrNaN(value) { */ const Workspace = { + VALID_CHAT_MODES: ["chat", "query", "automatic"], defaultPrompt: SystemSettings.saneDefaultSystemPrompt, // Used for generic updates so we can validate keys in request body @@ -93,7 +94,7 @@ const Workspace = { return n; }, chatMode: (value) => { - if (!value || !["chat", "query"].includes(value)) return "chat"; + if (!value || !Workspace.VALID_CHAT_MODES.includes(value)) return "chat"; return value; }, chatProvider: (value) => { @@ -205,6 +206,7 @@ const Workspace = { const workspace = await prisma.workspaces.create({ data: { name: this.validations.name(name), + chatMode: "chat", // default to chat mode for now ...this.validateFields(additionalFields), slug, }, @@ -609,6 +611,46 @@ const Workspace = { return false; } }, + + /** + * Checks if the workspace's chat provider/model waterfall supports native tool calling. + * @param {Workspace} workspace - The workspace object to check + * @returns {Promise} + */ + supportsNativeToolCalling: async function (workspace = {}) { + if (!workspace) return false; + const { getBaseLLMProviderModel } = require("../utils/helpers"); + const AIbitat = require("../utils/agents/aibitat"); + const provider = + workspace?.agentProvider ?? + workspace?.chatProvider ?? + process.env.LLM_PROVIDER; + const model = + workspace?.agentModel ?? + workspace?.chatModel ?? + getBaseLLMProviderModel({ provider }); + const agentConfig = { provider, model }; + const agentProvider = new AIbitat(agentConfig).getProviderForConfig( + agentConfig + ); + const nativeToolCalling = await agentProvider.supportsNativeToolCalling?.(); + return nativeToolCalling; + }, + + /** + * Checks if the agent command is available for a workspace + * by checking if the workspace's agent provider supports native tool calling. + * - If the workspaces chat provider/model supports native tool calling, then the agent command is NOT available + * as it will be assumed the model is capable of handling tool calls. + * Otherwise, the agent command is available and the user must opt-in to "@agent" to use tool calls. + * @param {Workspace} workspace - The workspace object to check + * @returns {Promise} + */ + isAgentCommandAvailable: async function (workspace) { + if (workspace.chatMode !== "automatic") return true; + const nativeToolCalling = await this.supportsNativeToolCalling(workspace); + return nativeToolCalling === false; + }, }; module.exports = { Workspace }; diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index aa829ee5..ebfadd00 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -2315,13 +2315,13 @@ } }, "requestBody": { - "description": "Send a prompt to the workspace and the type of conversation (query or chat).
Query: Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.
Chat: Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.
Attachments: Can include images and documents.
     Document attachments: must have the mime type application/anythingllm-document - otherwise it will be passed to the LLM as an image and may fail to process. This uses the built-in document processor to first parse the document to text before injecting it into the context window.", + "description": "Send a prompt to the workspace and the type of conversation (automatic, query or chat).
Query: Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.
Automatic: Will use tool-calling if the provider supports native tool calling without needing to invoke @agent.
Chat: Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.
Attachments: Can include images and documents.
     Document attachments: must have the mime type application/anythingllm-document - otherwise it will be passed to the LLM as an image and may fail to process. This uses the built-in document processor to first parse the document to text before injecting it into the context window.", "required": true, "content": { "application/json": { "example": { "message": "What is AnythingLLM?", - "mode": "query | chat", + "mode": "automatic | query | chat", "sessionId": "identifier-to-partition-chats-by-external-id", "attachments": [ { @@ -2423,13 +2423,13 @@ } }, "requestBody": { - "description": "Send a prompt to the workspace and the type of conversation (query or chat).
Query: Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.
Chat: Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.
Attachments: Can include images and documents.
     Document attachments: must have the mime type application/anythingllm-document - otherwise it will be passed to the LLM as an image and may fail to process. This uses the built-in document processor to first parse the document to text before injecting it into the context window.", + "description": "Send a prompt to the workspace and the type of conversation (automatic, query or chat).
Query: Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.
Automatic: Will use tool-calling if the provider supports native tool calling without needing to invoke @agent.
Chat: Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.
Attachments: Can include images and documents.
     Document attachments: must have the mime type application/anythingllm-document - otherwise it will be passed to the LLM as an image and may fail to process. This uses the built-in document processor to first parse the document to text before injecting it into the context window.", "required": true, "content": { "application/json": { "example": { "message": "What is AnythingLLM?", - "mode": "query | chat", + "mode": "automatic | query | chat", "sessionId": "identifier-to-partition-chats-by-external-id", "attachments": [ { diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js index 8fa28aea..5e8d2d2b 100644 --- a/server/utils/agents/aibitat/index.js +++ b/server/utils/agents/aibitat/index.js @@ -606,10 +606,22 @@ ${this.getHistory({ to: route.to }) } // This is normal chat between user<->agent - return this.getHistory(route).map((c) => ({ - content: c.content, - role: c.from === route.to ? "user" : "assistant", - })); + // Include attachments if present (for vision/multimodal support) + return this.getHistory(route).map((c) => { + const message = { + content: c.content, + role: c.from === route.to ? "user" : "assistant", + }; + // Pass attachments through for user messages that have them + if ( + c.attachments && + c.attachments.length > 0 && + message.role === "user" + ) { + message.attachments = c.attachments; + } + return message; + }); } /** @@ -626,6 +638,24 @@ ${this.getHistory({ to: route.to }) async reply(route) { const fromConfig = this.getAgentConfig(route.from); const chatHistory = this.getOrFormatNodeChatHistory(route); + + // Fetch fresh parsed file context and inject into the last user message + if (this.fetchParsedFileContext) { + const parsedContext = await this.fetchParsedFileContext(); + if (parsedContext) { + // Find the last user message and append context to it + for (let i = chatHistory.length - 1; i >= 0; i--) { + if (chatHistory[i].role === "user") { + chatHistory[i] = { + ...chatHistory[i], + content: chatHistory[i].content + parsedContext, + }; + break; + } + } + } + } + const messages = [ { content: fromConfig.role, @@ -674,6 +704,25 @@ ${this.getHistory({ to: route.to }) return content; } + /** + * Wrapper for provider calls that catches errors and converts them to APIError. + * This ensures provider errors are properly surfaced to the user instead of crashing. + * + * @param {Function} providerCall - Async function that calls the provider + * @returns {Promise} - The result of the provider call + * @throws {APIError} - If the provider call fails + */ + async #safeProviderCall(providerCall) { + try { + return await providerCall(); + } catch (error) { + console.error(`[AIbitat] Provider error: ${error.message}`, { + hide_meta: true, + }); + throw new APIError(`The agent model failed to respond: ${error.message}`); + } + } + /** * Handle the async (streaming) execution of the provider * with tool calls. @@ -696,11 +745,9 @@ ${this.getHistory({ to: route.to }) this?.socket?.send(type, data); }; - /** @type {{ functionCall: { name: string, arguments: string }, textResponse: string, uuid: string }} */ - const completionStream = await provider.stream( - messages, - functions, - eventHandler + /** @type {{ functionCall: { name: string, arguments: string }, textResponse: string }} */ + const completionStream = await this.#safeProviderCall(() => + provider.stream(messages, functions, eventHandler) ); if (completionStream.functionCall) { @@ -712,14 +759,9 @@ ${this.getHistory({ to: route.to }) `Maximum tool call limit (${this.maxToolCalls}) reached. Generating a final response from what I have so far.` ); - const finalStream = await provider.stream(messages, [], eventHandler); - const finalUuid = finalStream?.uuid || v4(); - eventHandler?.("reportStreamEvent", { - type: "usageMetrics", - uuid: finalUuid, - metrics: provider.getUsage(), - }); - this?.flushCitations?.(finalUuid); + const finalStream = await this.#safeProviderCall(() => + provider.stream(messages, [], eventHandler) + ); const finalResponse = finalStream?.textResponse || "I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run."; @@ -847,7 +889,9 @@ ${this.getHistory({ to: route.to }) }; // get the chat completion - const completion = await provider.complete(messages, functions); + const completion = await this.#safeProviderCall(() => + provider.complete(messages, functions) + ); if (completion.functionCall) { if (depth >= this.maxToolCalls) { @@ -858,7 +902,9 @@ ${this.getHistory({ to: route.to }) `Maximum tool call limit (${this.maxToolCalls}) reached. Generating a final response from what I have so far.` ); - const finalCompletion = await provider.complete(messages, []); + const finalCompletion = await this.#safeProviderCall(() => + provider.complete(messages, []) + ); eventHandler?.("reportStreamEvent", { type: "usageMetrics", uuid: msgUUID, @@ -959,9 +1005,10 @@ ${this.getHistory({ to: route.to }) * Provide a feedback where it was interrupted if you want to. * * @param feedback The feedback to the interruption if any. + * @param attachments Optional attachments (images) to include with the feedback. * @returns */ - async continue(feedback) { + async continue(feedback, attachments = []) { const lastChat = this._chats.at(-1); if (!lastChat || lastChat.state !== "interrupt") { throw new Error("No chat to continue"); @@ -981,6 +1028,7 @@ ${this.getHistory({ to: route.to }) from, to, content: feedback, + ...(attachments?.length > 0 ? { attachments } : {}), }; // register the message in the chat history diff --git a/server/utils/agents/aibitat/plugins/chat-history.js b/server/utils/agents/aibitat/plugins/chat-history.js index d2a05be0..9058d51d 100644 --- a/server/utils/agents/aibitat/plugins/chat-history.js +++ b/server/utils/agents/aibitat/plugins/chat-history.js @@ -22,12 +22,16 @@ const chatHistory = { // the USER and the last being from anyone other than the user. if (prev.from !== "USER" || last.from === "USER") return; + // Extract attachments from user message if present + const attachments = prev.attachments || []; + // If we have a post-reply flow we should save the chat using this special flow // so that post save cleanup and other unique properties can be run as opposed to regular chat. if (aibitat.hasOwnProperty("_replySpecialAttributes")) { await this._storeSpecial(aibitat, { prompt: prev.content, response: last.content, + attachments, options: aibitat._replySpecialAttributes, }); delete aibitat._replySpecialAttributes; @@ -37,11 +41,15 @@ const chatHistory = { await this._store(aibitat, { prompt: prev.content, response: last.content, + attachments, }); } catch {} }); }, - _store: async function (aibitat, { prompt, response } = {}) { + _store: async function ( + aibitat, + { prompt, response, attachments = [] } = {} + ) { const invocation = aibitat.handlerProps.invocation; const metrics = aibitat.provider?.getUsage?.() ?? {}; const citations = aibitat._pendingCitations ?? []; @@ -52,6 +60,7 @@ const chatHistory = { text: response, sources: citations, type: "chat", + attachments, metrics, }, user: { id: invocation?.user_id || null }, @@ -61,7 +70,7 @@ const chatHistory = { }, _storeSpecial: async function ( aibitat, - { prompt, response, options = {} } = {} + { prompt, response, attachments = [], options = {} } = {} ) { const invocation = aibitat.handlerProps.invocation; const metrics = aibitat.provider?.getUsage?.() ?? {}; @@ -78,6 +87,7 @@ const chatHistory = { ? options.storedResponse(response) : response, type: options?.saveAsType ?? "chat", + attachments, metrics, }, user: { id: invocation?.user_id || null }, diff --git a/server/utils/agents/aibitat/plugins/websocket.js b/server/utils/agents/aibitat/plugins/websocket.js index 25449186..1cdd07db 100644 --- a/server/utils/agents/aibitat/plugins/websocket.js +++ b/server/utils/agents/aibitat/plugins/websocket.js @@ -96,13 +96,16 @@ const websocket = { }); aibitat.onInterrupt(async (node) => { - const feedback = await socket.askForFeedback(socket, node); + const { feedback, attachments } = await socket.askForFeedback( + socket, + node + ); if (WEBSOCKET_BAIL_COMMANDS.includes(feedback)) { socket.close(); return; } - await aibitat.continue(feedback); + await aibitat.continue(feedback, attachments); }); /** @@ -110,7 +113,7 @@ const websocket = { * * @param socket The content to summarize. // AIbitatWebSocket & { receive: any, echo: any } * @param node The chat node // { from: string; to: string } - * @returns The summarized content. + * @returns {{ feedback: string, attachments: Array }} The feedback and any attachments. */ socket.askForFeedback = (socket, node) => { socket.awaitResponse = (question = "waiting...") => { @@ -123,7 +126,10 @@ const websocket = { if (data.type !== "awaitingFeedback") return; delete socket.handleFeedback; clearTimeout(socketTimeout); - resolve(data.feedback); + resolve({ + feedback: data.feedback, + attachments: data.attachments || [], + }); return; }; @@ -133,7 +139,7 @@ const websocket = { `Client took too long to respond, chat thread is dead after ${SOCKET_TIMEOUT_MS}ms` ) ); - resolve("exit"); + resolve({ feedback: "exit", attachments: [] }); return; }, SOCKET_TIMEOUT_MS); }); diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js index 57e4329a..740d7ae1 100644 --- a/server/utils/agents/aibitat/providers/ai-provider.js +++ b/server/utils/agents/aibitat/providers/ai-provider.js @@ -114,6 +114,29 @@ class Provider { return this._client; } + /** + * Whether this provider supports native tool calling via the ENV flag. + * @param {string} providerTag - The tag of the provider to check (e.g. "bedrock", "openrouter", "groq", etc.). + * @returns {boolean} + */ + supportsNativeToolCallingViaEnv(providerTag = "") { + if (!("PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING" in process.env)) return false; + if (!providerTag) return false; + return ( + process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes( + providerTag + ) || false + ); + } + + /** + * Whether this provider supports native OpenAI-compatible tool calling. + * @returns {boolean|Promise} + */ + supportsNativeToolCalling() { + return false; + } + /** * * @param {string} provider - the string key of the provider LLM being loaded. @@ -439,6 +462,37 @@ class Provider { return false; } + /** + * Format a single message with attachments (images) for multimodal content. + * Transforms a message with attachments into the OpenAI-compatible multimodal format. + * Can be overridden by provider subclasses for provider-specific formats. + * @param {Object} message - The message to format + * @returns {Object} - Message formatted for the API + */ + formatMessageWithAttachments(message) { + if (!message.attachments || message.attachments.length === 0) { + return message; + } + + // Transform message with attachments into multimodal format + const content = [{ type: "text", text: message.content }]; + for (const attachment of message.attachments) { + content.push({ + type: "image_url", + image_url: { + url: attachment.contentString, + }, + }); + } + + // Return message without attachments property, with content as array + const { attachments: _, ...rest } = message; + return { + ...rest, + content, + }; + } + /** * Resets the usage metrics to zero and starts the request timer. * Call this before each completion to ensure accurate per-call metrics. @@ -457,6 +511,17 @@ class Provider { }; } + /** + * Formats an array of messages to handle attachments (images) for multimodal content. + * @param {Array<{role: string, content: string, attachments?: Array}>} messages + * @returns {Array} - Messages formatted for the API + */ + formatMessagesWithAttachments(messages = []) { + return messages.map((message) => + this.formatMessageWithAttachments(message) + ); + } + /** * Updates the stored usage metrics from a provider response. * Override in subclasses to handle provider-specific usage formats. @@ -505,10 +570,11 @@ class Provider { async stream(messages, functions = [], eventHandler = null) { this.providerLog("Provider.stream - will process this chat completion."); const msgUUID = v4(); + const formattedMessages = this.formatMessagesWithAttachments(messages); const stream = await this.client.chat.completions.create({ model: this.model, stream: true, - messages, + messages: formattedMessages, ...(Array.isArray(functions) && functions?.length > 0 ? { functions } : {}), diff --git a/server/utils/agents/aibitat/providers/anthropic.js b/server/utils/agents/aibitat/providers/anthropic.js index 343d96b3..b8975e69 100644 --- a/server/utils/agents/aibitat/providers/anthropic.js +++ b/server/utils/agents/aibitat/providers/anthropic.js @@ -26,6 +26,15 @@ class AnthropicProvider extends Provider { this.model = model; } + /** + * Whether this provider supports native OpenAI-compatible tool calling. + * - Anthropic always supports tool calling. + * @returns {boolean} + */ + supportsNativeToolCalling() { + return true; + } + /** * Parses the cache control ENV variable * @@ -72,6 +81,18 @@ class AnthropicProvider extends Provider { ]; } + /** + * Parse a data URL into media type and base64 data + * @param {string} dataUrl - Data URL like "data:image/jpeg;base64,/9j/..." + * @returns {{mediaType: string, data: string}|null} + */ + #parseDataUrl(dataUrl) { + if (!dataUrl || !dataUrl.startsWith("data:")) return null; + const matches = dataUrl.match(/^data:([^;]+);base64,(.+)$/); + if (!matches) return null; + return { mediaType: matches[1], data: matches[2] }; + } + #prepareMessages(messages = []) { // Extract system prompt and filter out any system messages from the main chat. let systemPrompt = @@ -120,6 +141,23 @@ class AnthropicProvider extends Provider { item.type !== "text" || (item.text && item.text.trim().length > 0) ); + // Add image attachments if present (for vision/multimodal support) + if (message.attachments && message.attachments.length > 0) { + for (const attachment of message.attachments) { + const parsed = this.#parseDataUrl(attachment.contentString); + if (parsed) { + content.push({ + type: "image", + source: { + type: "base64", + media_type: parsed.mediaType, + data: parsed.data, + }, + }); + } + } + } + if (content.length === 0) return processedMessages; // Add a text block to assistant messages with tool use if one doesn't exist. @@ -139,7 +177,9 @@ class AnthropicProvider extends Provider { // Merge consecutive messages from the same role. lastMessage.content.push(...content); } else { - processedMessages.push({ ...message, content }); + // Don't pass attachments to the final message object + const { attachments: _, ...restOfMessage } = message; + processedMessages.push({ ...restOfMessage, content }); } return processedMessages; diff --git a/server/utils/agents/aibitat/providers/azure.js b/server/utils/agents/aibitat/providers/azure.js index 35f66aa6..8dde7601 100644 --- a/server/utils/agents/aibitat/providers/azure.js +++ b/server/utils/agents/aibitat/providers/azure.js @@ -28,6 +28,15 @@ class AzureOpenAiProvider extends Provider { return true; } + /** + * Whether this provider supports native OpenAI-compatible tool calling. + * - Azure OpenAI always supports tool calling. + * @returns {boolean} + */ + supportsNativeToolCalling() { + return true; + } + /** * Stream a chat completion from Azure OpenAI with tool calling. * diff --git a/server/utils/agents/aibitat/providers/bedrock.js b/server/utils/agents/aibitat/providers/bedrock.js index 914d2ebc..613d2ec1 100644 --- a/server/utils/agents/aibitat/providers/bedrock.js +++ b/server/utils/agents/aibitat/providers/bedrock.js @@ -57,9 +57,7 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) { */ supportsNativeToolCalling() { if (this._supportsToolCalling !== null) return this._supportsToolCalling; - const supportsToolCalling = - process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("bedrock"); - + const supportsToolCalling = this.supportsNativeToolCallingViaEnv("bedrock"); if (supportsToolCalling) this.providerLog("AWS Bedrock native tool calling is ENABLED via ENV."); else @@ -95,26 +93,53 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) { // or otherwise absorb headaches that can arise from Ollama models #convertToLangchainPrototypes(chats = []) { const langchainChats = []; - const roleToMessageMap = { - system: SystemMessage, - user: HumanMessage, - assistant: AIMessage, - }; for (const chat of chats) { - if (!roleToMessageMap.hasOwnProperty(chat.role)) continue; - const MessageClass = roleToMessageMap[chat.role]; - langchainChats.push(new MessageClass({ content: chat.content })); + if (chat.role === "system") { + langchainChats.push(new SystemMessage({ content: chat.content })); + } else if (chat.role === "user") { + langchainChats.push( + new HumanMessage({ + content: this.#formatContentWithAttachments(chat), + }) + ); + } else if (chat.role === "assistant") { + langchainChats.push(new AIMessage({ content: chat.content })); + } } return langchainChats; } + /** + * Format message content with attachments for Langchain multimodal support. + * Transforms a message with attachments into the format Langchain expects. + * @param {Object} chat - The chat message + * @returns {string|Array} Content as string or multimodal array + */ + #formatContentWithAttachments(chat) { + if (!chat.attachments || chat.attachments.length === 0) { + return chat.content; + } + + const content = [{ type: "text", text: chat.content }]; + for (const attachment of chat.attachments) { + content.push({ + type: "image_url", + image_url: { + url: attachment.contentString, + }, + }); + } + return content; + } + /** * Convert aibitat message history to Langchain message prototypes with * proper tool call / tool result handling for native tool calling. * role:"function" messages (from previous aibitat tool runs) are converted * to AIMessage(tool_calls) + ToolMessage pairs that Langchain expects. + * Also handles image attachments for multimodal support. * @param {Array} chats - The aibitat message history. * @returns {Array} Langchain message instances. */ @@ -176,7 +201,11 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) { } else if (chat.role === "system") { langchainChats.push(new SystemMessage({ content: chat.content })); } else if (chat.role === "user") { - langchainChats.push(new HumanMessage({ content: chat.content })); + langchainChats.push( + new HumanMessage({ + content: this.#formatContentWithAttachments(chat), + }) + ); } else if (chat.role === "assistant") { langchainChats.push(new AIMessage({ content: chat.content })); } diff --git a/server/utils/agents/aibitat/providers/cohere.js b/server/utils/agents/aibitat/providers/cohere.js index f7707b83..bdb7dc85 100644 --- a/server/utils/agents/aibitat/providers/cohere.js +++ b/server/utils/agents/aibitat/providers/cohere.js @@ -1,10 +1,14 @@ -const { CohereClient } = require("cohere-ai"); +const { CohereClientV2 } = require("cohere-ai"); const Provider = require("./ai-provider"); const InheritMultiple = require("./helpers/classes"); const UnTooled = require("./helpers/untooled"); const { v4 } = require("uuid"); const { safeJsonParse } = require("../../../http"); +/** + * The agent provider for the Cohere AI provider. + * Uses the v2 API which supports OpenAI-compatible message format and vision. + */ class CohereProvider extends InheritMultiple([Provider, UnTooled]) { model; @@ -12,7 +16,7 @@ class CohereProvider extends InheritMultiple([Provider, UnTooled]) { const { model = process.env.COHERE_MODEL_PREF || "command-r-08-2024" } = config; super(); - const client = new CohereClient({ + const client = new CohereClientV2({ token: process.env.COHERE_API_KEY, }); this._client = client; @@ -37,35 +41,45 @@ class CohereProvider extends InheritMultiple([Provider, UnTooled]) { return false; } - #convertChatHistoryCohere(chatHistory = []) { - let cohereHistory = []; - chatHistory.forEach((message) => { - switch (message.role) { - case "SYSTEM": - case "system": - cohereHistory.push({ role: "SYSTEM", message: message.content }); - break; - case "USER": - case "user": - cohereHistory.push({ role: "USER", message: message.content }); - break; - case "CHATBOT": - case "assistant": - cohereHistory.push({ role: "CHATBOT", message: message.content }); - break; - } - }); + /** + * Format a message with attachments for Cohere's v2 API. + * Cohere SDK uses camelCase (imageUrl) instead of snake_case (image_url). + * @param {Object} message - Message with potential attachments + * @returns {Object} Formatted message for Cohere SDK + */ + formatMessageWithAttachments(message) { + if (!message.attachments || message.attachments.length === 0) { + return message; + } - return cohereHistory; + const content = [{ type: "text", text: message.content }]; + for (const attachment of message.attachments) { + content.push({ + type: "image_url", + imageUrl: { + url: attachment.contentString, + }, + }); + } + + const { attachments: _, ...rest } = message; + return { + ...rest, + content, + }; } + /** + * Stream a chat completion using the Cohere v2 API. + * The v2 API accepts OpenAI-compatible message format directly, + * including multimodal content arrays for vision support. + * @param {Object} options - Options containing messages array + * @returns {AsyncIterable} Stream of events from Cohere + */ async #handleFunctionCallStream({ messages = [] }) { - const userPrompt = messages[messages.length - 1]?.content || ""; - const history = messages.slice(0, -1); return await this.client.chatStream({ model: this.model, - chatHistory: this.#convertChatHistoryCohere(history), - message: userPrompt, + messages: messages, }); } @@ -92,12 +106,14 @@ class CohereProvider extends InheritMultiple([Provider, UnTooled]) { }); for await (const event of stream) { - if (event.eventType !== "text-generation") continue; - textResponse += event.text; + if (event.type !== "content-delta") continue; + const text = event.delta?.message?.content?.text || ""; + if (!text) continue; + textResponse += text; eventHandler?.("reportStreamEvent", { type: "statusResponse", uuid: msgUUID, - content: event.text, + content: text, }); } @@ -223,12 +239,14 @@ class CohereProvider extends InheritMultiple([Provider, UnTooled]) { }); for await (const chunk of stream) { - if (chunk.eventType !== "text-generation") continue; - completion.content += chunk.text; + if (chunk.type !== "content-delta") continue; + const text = chunk.delta?.message?.content?.text || ""; + if (!text) continue; + completion.content += text; eventHandler?.("reportStreamEvent", { type: "textResponseChunk", uuid: msgUUID, - content: chunk.text, + content: text, }); } } diff --git a/server/utils/agents/aibitat/providers/deepseek.js b/server/utils/agents/aibitat/providers/deepseek.js index b9076388..d25ae5f7 100644 --- a/server/utils/agents/aibitat/providers/deepseek.js +++ b/server/utils/agents/aibitat/providers/deepseek.js @@ -43,6 +43,17 @@ class DeepSeekProvider extends InheritMultiple([Provider, UnTooled]) { return true; } + /** + * DeepSeek models do not support vision/image inputs. + * Strip attachments from messages to prevent API errors. + * @param {Object} message - Message with potential attachments + * @returns {Object} Message without attachments + */ + formatMessageWithAttachments(message) { + const { attachments: _, ...rest } = message; + return rest; + } + get #isThinkingModel() { return this.model === "deepseek-reasoner"; } @@ -81,13 +92,37 @@ class DeepSeekProvider extends InheritMultiple([Provider, UnTooled]) { }); } + /** + * Strip attachments from all messages since DeepSeek doesn't support vision. + * @param {Array} messages - Array of messages + * @returns {Array} Messages with attachments removed + */ + #stripAttachments(messages) { + let hasAttachments = false; + const stripped = messages.map((msg) => { + if (msg.attachments && msg.attachments.length > 0) { + hasAttachments = true; + const { attachments: _, ...rest } = msg; + return rest; + } + return msg; + }); + if (hasAttachments) { + this.providerLog( + "DeepSeek does not support vision - stripped image attachments from messages." + ); + } + return stripped; + } + async stream(messages, functions = [], eventHandler = null) { const useNative = functions.length > 0 && this.supportsNativeToolCalling(); + const cleanedMessages = this.#stripAttachments(messages); if (!useNative) { return await UnTooled.prototype.stream.call( this, - messages, + cleanedMessages, functions, this.#handleFunctionCallStream.bind(this), eventHandler @@ -102,7 +137,7 @@ class DeepSeekProvider extends InheritMultiple([Provider, UnTooled]) { return await tooledStream( this.client, this.model, - messages, + cleanedMessages, functions, eventHandler, this.#tooledOptions @@ -123,11 +158,12 @@ class DeepSeekProvider extends InheritMultiple([Provider, UnTooled]) { async complete(messages, functions = []) { const useNative = functions.length > 0 && this.supportsNativeToolCalling(); + const cleanedMessages = this.#stripAttachments(messages); if (!useNative) { return await UnTooled.prototype.complete.call( this, - messages, + cleanedMessages, functions, this.#handleFunctionCallChat.bind(this) ); @@ -137,7 +173,7 @@ class DeepSeekProvider extends InheritMultiple([Provider, UnTooled]) { const result = await tooledComplete( this.client, this.model, - messages, + cleanedMessages, functions, this.getCost.bind(this), this.#tooledOptions diff --git a/server/utils/agents/aibitat/providers/gemini.js b/server/utils/agents/aibitat/providers/gemini.js index 5308e37a..433502e9 100644 --- a/server/utils/agents/aibitat/providers/gemini.js +++ b/server/utils/agents/aibitat/providers/gemini.js @@ -30,14 +30,13 @@ class GeminiProvider extends Provider { return this._client; } - get supportsToolCalling() { - if (!this.model.startsWith("gemini")) return false; - return true; - } - + /** + * Whether this provider supports agent streaming. + * - Tool call streaming results in a 400/503 error for all non-gemini models + * using the compatible v1beta/openai/ endpoint + * @returns {boolean} + */ get supportsAgentStreaming() { - // Tool call streaming results in a 400/503 error for all non-gemini models - // using the compatible v1beta/openai/ endpoint if (!this.model.startsWith("gemini")) { this.providerLog( `Gemini: ${this.model} does not support tool call streaming.` @@ -47,6 +46,20 @@ class GeminiProvider extends Provider { return true; } + get supportsToolCalling() { + if (!this.model.startsWith("gemini")) return false; + return true; + } + + /** + * Whether this provider supports native OpenAI-compatible tool calling. + * - Gemini only supports tool calling for Gemini models. + * @returns {boolean} + */ + supportsNativeToolCalling() { + return this.supportsToolCalling; + } + /** * Gemini specifcally will throw an error if the tool call's function name * starts with a non-alpha character. So we need to prefix the function names @@ -141,6 +154,24 @@ class GeminiProvider extends Provider { return; } + // Handle messages with attachments (images) for multimodal support + if (message.attachments && message.attachments.length > 0) { + const content = [{ type: "text", text: message.content }]; + for (const attachment of message.attachments) { + content.push({ + type: "image_url", + image_url: { + url: attachment.contentString, + }, + }); + } + formattedMessages.push({ + role: message.role, + content, + }); + return; + } + formattedMessages.push({ role: message.role, content: message.content, diff --git a/server/utils/agents/aibitat/providers/genericOpenAi.js b/server/utils/agents/aibitat/providers/genericOpenAi.js index ad208a87..a4cbdb16 100644 --- a/server/utils/agents/aibitat/providers/genericOpenAi.js +++ b/server/utils/agents/aibitat/providers/genericOpenAi.js @@ -59,10 +59,7 @@ class GenericOpenAiProvider extends InheritMultiple([Provider, UnTooled]) { supportsNativeToolCalling() { if (this._supportsToolCalling !== null) return this._supportsToolCalling; const supportsToolCalling = - process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes( - "generic-openai" - ); - + this.supportsNativeToolCallingViaEnv("generic-openai"); if (supportsToolCalling) this.providerLog( "Generic OpenAI supports native tool calling is ENABLED via ENV." diff --git a/server/utils/agents/aibitat/providers/groq.js b/server/utils/agents/aibitat/providers/groq.js index eeae09d7..5c7368c4 100644 --- a/server/utils/agents/aibitat/providers/groq.js +++ b/server/utils/agents/aibitat/providers/groq.js @@ -44,9 +44,7 @@ class GroqProvider extends InheritMultiple([Provider, UnTooled]) { */ supportsNativeToolCalling() { if (this._supportsToolCalling !== null) return this._supportsToolCalling; - const supportsToolCalling = - process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("groq"); - + const supportsToolCalling = this.supportsNativeToolCallingViaEnv("groq"); if (supportsToolCalling) this.providerLog("Groq supports native tool calling is ENABLED via ENV."); else diff --git a/server/utils/agents/aibitat/providers/helpers/tooled.js b/server/utils/agents/aibitat/providers/helpers/tooled.js index b4e2119a..fab68aad 100644 --- a/server/utils/agents/aibitat/providers/helpers/tooled.js +++ b/server/utils/agents/aibitat/providers/helpers/tooled.js @@ -35,10 +35,41 @@ function formatFunctionsToTools(functions) { })); } +/** + * Format message content with attachments (images) for multimodal support. + * Transforms a message with attachments into the OpenAI-compatible format. + * @param {Object} message - The message to format + * @returns {Object} Message with content formatted for the API + */ +function formatMessageWithAttachments(message) { + if (!message.attachments || message.attachments.length === 0) { + return message; + } + + // Transform message with attachments into multimodal format + const content = [{ type: "text", text: message.content }]; + for (const attachment of message.attachments) { + content.push({ + type: "image_url", + image_url: { + url: attachment.contentString, + }, + }); + } + + // Return message without attachments property, with content as array + const { attachments: _, ...rest } = message; + return { + ...rest, + content, + }; +} + /** * Convert the aibitat message history (which uses role:"function" with * `originalFunctionCall` metadata) into the OpenAI tool-calling message * format (assistant `tool_calls` + role:"tool" pairs). + * Also handles image attachments for multimodal support. * @param {Array} messages * @param {{injectReasoningContent?: boolean}} options * - injectReasoningContent: when true, ensures every assistant message has @@ -112,9 +143,11 @@ function formatMessagesForTools(messages, options = {}) { message.role === "assistant" && !("reasoning_content" in message) ) { - formattedMessages.push({ ...message, reasoning_content: "" }); + formattedMessages.push( + formatMessageWithAttachments({ ...message, reasoning_content: "" }) + ); } else { - formattedMessages.push(message); + formattedMessages.push(formatMessageWithAttachments(message)); } } diff --git a/server/utils/agents/aibitat/providers/helpers/untooled.js b/server/utils/agents/aibitat/providers/helpers/untooled.js index 17134111..0ecc052c 100644 --- a/server/utils/agents/aibitat/providers/helpers/untooled.js +++ b/server/utils/agents/aibitat/providers/helpers/untooled.js @@ -18,7 +18,9 @@ class UnTooled { `${prevMsg}\n${msg.content}`; return; } - modifiedMessages.push(msg); + // Format messages with attachments for multimodal support + // Uses formatMessageWithAttachments inherited from Provider base class + modifiedMessages.push(this.formatMessageWithAttachments(msg)); }); return modifiedMessages; } @@ -119,6 +121,11 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`; } buildToolCallMessages(history = [], functions = []) { + // Format history messages with attachments for multimodal support + const formattedHistory = history.map((msg) => + this.formatMessageWithAttachments(msg) + ); + return [ { content: `You are a program which picks the most optimal function and parameters to call. @@ -138,7 +145,7 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`; Now pick a function if there is an appropriate one to use given the last user message and the given conversation so far.`, role: "system", }, - ...history, + ...formattedHistory, ]; } diff --git a/server/utils/agents/aibitat/providers/lemonade.js b/server/utils/agents/aibitat/providers/lemonade.js index 65fd6ad9..57e74feb 100644 --- a/server/utils/agents/aibitat/providers/lemonade.js +++ b/server/utils/agents/aibitat/providers/lemonade.js @@ -66,7 +66,7 @@ class LemonadeProvider extends InheritMultiple([Provider, UnTooled]) { // Labels can be missing for tool calling models, so we also check if ENV flag is set const supportsToolCallingFlag = - process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("lemonade"); + this.supportsNativeToolCallingViaEnv("lemonade"); if (supportsToolCallingFlag) { this.providerLog( "Lemonade supports native tool calling is ENABLED via ENV." diff --git a/server/utils/agents/aibitat/providers/litellm.js b/server/utils/agents/aibitat/providers/litellm.js index f7e96547..5c0c9633 100644 --- a/server/utils/agents/aibitat/providers/litellm.js +++ b/server/utils/agents/aibitat/providers/litellm.js @@ -44,9 +44,7 @@ class LiteLLMProvider extends InheritMultiple([Provider, UnTooled]) { */ supportsNativeToolCalling() { if (this._supportsToolCalling !== null) return this._supportsToolCalling; - const supportsToolCalling = - process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("litellm"); - + const supportsToolCalling = this.supportsNativeToolCallingViaEnv("litellm"); if (supportsToolCalling) this.providerLog( "LiteLLM supports native tool calling is ENABLED via ENV." diff --git a/server/utils/agents/aibitat/providers/localai.js b/server/utils/agents/aibitat/providers/localai.js index 5b9d62a8..26027361 100644 --- a/server/utils/agents/aibitat/providers/localai.js +++ b/server/utils/agents/aibitat/providers/localai.js @@ -44,9 +44,7 @@ class LocalAiProvider extends InheritMultiple([Provider, UnTooled]) { */ supportsNativeToolCalling() { if (this._supportsToolCalling !== null) return this._supportsToolCalling; - const supportsToolCalling = - process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("localai"); - + const supportsToolCalling = this.supportsNativeToolCallingViaEnv("localai"); if (supportsToolCalling) this.providerLog( "LocalAI supports native tool calling is ENABLED via ENV." diff --git a/server/utils/agents/aibitat/providers/ollama.js b/server/utils/agents/aibitat/providers/ollama.js index edc62552..7dead084 100644 --- a/server/utils/agents/aibitat/providers/ollama.js +++ b/server/utils/agents/aibitat/providers/ollama.js @@ -91,10 +91,51 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) { }); } + /** + * Parse a data URL into base64 data for Ollama images + * @param {string} dataUrl - Data URL like "data:image/jpeg;base64,/9j/..." + * @returns {string|null} Base64 encoded image data + */ + #parseImageDataUrl(dataUrl) { + if (!dataUrl || !dataUrl.startsWith("data:")) return null; + const matches = dataUrl.match(/^data:[^;]+;base64,(.+)$/); + if (!matches) return null; + return matches[1]; + } + + /** + * Override formatMessageWithAttachments for Ollama's specific format. + * Ollama expects images in a separate 'images' array with base64 data (no data URI prefix), + * not the OpenAI-style content array format. + * **This is only used for Ollama:untooled fallback mode.** + * @param {Object} message - Message with potential attachments + * @returns {Object} Formatted message for Ollama + */ + formatMessageWithAttachments(message) { + if (!message.attachments || message.attachments.length === 0) { + return message; + } + + const images = []; + for (const attachment of message.attachments) { + const imageData = this.#parseImageDataUrl(attachment.contentString); + if (imageData) { + images.push(imageData); + } + } + + const { attachments: _, ...restOfMessage } = message; + return { + ...restOfMessage, + ...(images.length > 0 ? { images } : {}), + }; + } + /** * Convert aibitat's internal message history (which uses role:"function" with * originalFunctionCall metadata) into the Ollama tool-calling message format * (assistant tool_calls + role:"tool" result pairs). + * Handles image attachments for vision/multimodal support. * @param {Array} messages * @returns {Array} */ @@ -128,7 +169,21 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) { : JSON.stringify(message.content), }); } else { - formatted.push(message); + // Handle messages with attachments (images) for multimodal support + if (message.attachments && message.attachments.length > 0) { + const images = []; + for (const attachment of message.attachments) { + const imageData = this.#parseImageDataUrl(attachment.contentString); + if (imageData) images.push(imageData); + } + const { attachments: _, ...restOfMessage } = message; + formatted.push({ + ...restOfMessage, + ...(images.length > 0 ? { images } : {}), + }); + } else { + formatted.push(message); + } } } return formatted; diff --git a/server/utils/agents/aibitat/providers/openai.js b/server/utils/agents/aibitat/providers/openai.js index f3d39c51..a7f402de 100644 --- a/server/utils/agents/aibitat/providers/openai.js +++ b/server/utils/agents/aibitat/providers/openai.js @@ -30,10 +30,20 @@ class OpenAIProvider extends Provider { return true; } + /** + * Whether this provider supports native OpenAI-compatible tool calling. + * - OpenAI always supports tool calling. + * @returns {Promise} + */ + supportsNativeToolCalling() { + return true; + } + /** * Format the messages to the OpenAI API Responses format. * - If the message is our internal `function` type, then we need to map it to a function call + output format * - Otherwise, map it to the input text format for user, system, and assistant messages + * - Handles attachments (images) for multimodal support * * @param {any[]} messages - The messages to format. * @returns {OpenAI.OpenAI.Responses.ResponseInput[]} The formatted messages. @@ -69,14 +79,27 @@ class OpenAIProvider extends Provider { return; } + // Build content array with text and optional image attachments + const content = [ + { + type: message.role === "assistant" ? "output_text" : "input_text", + text: message.content, + }, + ]; + + // Add image attachments if present (for multimodal/vision support) + if (message.attachments && message.attachments.length > 0) { + for (const attachment of message.attachments) { + content.push({ + type: "input_image", + image_url: attachment.contentString, + }); + } + } + formattedMessages.push({ role: message.role, - content: [ - { - type: message.role === "assistant" ? "output_text" : "input_text", - text: message.content, - }, - ], + content, }); }); diff --git a/server/utils/agents/aibitat/providers/openrouter.js b/server/utils/agents/aibitat/providers/openrouter.js index a6a2fa1b..f8400f55 100644 --- a/server/utils/agents/aibitat/providers/openrouter.js +++ b/server/utils/agents/aibitat/providers/openrouter.js @@ -51,8 +51,7 @@ class OpenRouterProvider extends InheritMultiple([Provider, UnTooled]) { supportsNativeToolCalling() { if (this._supportsToolCalling !== null) return this._supportsToolCalling; const supportsToolCalling = - process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("openrouter"); - + this.supportsNativeToolCallingViaEnv("openrouter"); if (supportsToolCalling) this.providerLog( "OpenRouter supports native tool calling is ENABLED via ENV." diff --git a/server/utils/agents/defaults.js b/server/utils/agents/defaults.js index 7a725a49..ba54cd92 100644 --- a/server/utils/agents/defaults.js +++ b/server/utils/agents/defaults.js @@ -33,8 +33,14 @@ const WORKSPACE_AGENT = { * @returns {Promise<{ role: string, functions: object[] }>} */ getDefinition: async (provider = null, workspace = null, user = null) => { + const basePrompt = await Provider.systemPrompt({ + provider, + workspace, + user, + }); + return { - role: await Provider.systemPrompt({ provider, workspace, user }), + role: basePrompt, functions: [ ...(await agentSkillsFromSystemSettings()), ...ImportedPlugin.activeImportedPlugins(), diff --git a/server/utils/agents/ephemeral.js b/server/utils/agents/ephemeral.js index d0c794bc..cc826290 100644 --- a/server/utils/agents/ephemeral.js +++ b/server/utils/agents/ephemeral.js @@ -5,7 +5,10 @@ const MCPCompatibilityLayer = require("../MCP"); const { AgentFlows } = require("../agentFlows"); const { httpSocket } = require("./aibitat/plugins/http-socket.js"); const { User } = require("../../models/user"); +const { Workspace } = require("../../models/workspace"); const { WorkspaceChats } = require("../../models/workspaceChats"); +const { WorkspaceParsedFiles } = require("../../models/workspaceParsedFiles"); +const { DocumentManager } = require("../DocumentManager"); const { safeJsonParse } = require("../http"); const { USER_AGENT, @@ -37,6 +40,8 @@ class EphemeralAgentHandler extends AgentHandler { #prompt = null; /** @type {string[]} the functions to load into the agent (Aibitat plugins) */ #funcsToLoad = []; + /** @type {Array<{name: string, mime: string, contentString: string}>} attachments for multimodal support */ + #attachments = []; /** @type {AIbitat|null} */ aibitat = null; @@ -54,7 +59,8 @@ class EphemeralAgentHandler extends AgentHandler { * prompt: string, * userId: import("@prisma/client").users["id"]|null, * threadId: import("@prisma/client").workspace_threads["id"]|null, - * sessionId: string|null + * sessionId: string|null, + * attachments: Array<{name: string, mime: string, contentString: string}> * }} parameters */ constructor({ @@ -64,6 +70,7 @@ class EphemeralAgentHandler extends AgentHandler { userId = null, threadId = null, sessionId = null, + attachments = [], }) { super({ uuid }); this.#invocationUUID = uuid; @@ -76,6 +83,7 @@ class EphemeralAgentHandler extends AgentHandler { this.#userId = userId; this.#threadId = threadId; this.#sessionId = sessionId; + this.#attachments = attachments; } log(text, ...args) { @@ -353,6 +361,78 @@ class EphemeralAgentHandler extends AgentHandler { return this; } + /** + * Fetch fresh parsed files and pinned documents, format them for injection into user messages. + * Called on every chat turn to ensure context is always up-to-date. + * @returns {Promise} Formatted context string to append to user message + */ + async #fetchParsedFileContext() { + const user = this.#userId ? { id: this.#userId } : null; + const thread = this.#threadId ? { id: this.#threadId } : null; + const documentManager = new DocumentManager({ + workspace: this.#workspace, + }); + + return Promise.all([ + WorkspaceParsedFiles.getContextFiles(this.#workspace, thread, user), + documentManager.pinnedDocs(), + ]) + .then(([parsedFiles, pinnedDocs]) => { + const allDocuments = [ + ...(parsedFiles || []).map((doc) => ({ + name: doc.title || "Uploaded Document", + content: doc.pageContent, + })), + ...(pinnedDocs || []).map((doc) => ({ + name: doc.title || doc.metadata?.title || "Pinned Document", + content: doc.pageContent, + })), + ]; + + if (allDocuments.length === 0) return ""; + + if (parsedFiles?.length > 0) + this.log( + `Injecting ${parsedFiles.length} parsed file(s) into user message` + ); + if (pinnedDocs?.length > 0) + this.log( + `Injecting ${pinnedDocs.length} pinned document(s) into user message` + ); + + return ( + "\n\n\n" + + allDocuments + .map((doc, i) => { + const filename = doc.name || `Document ${i + 1}`; + return `\n${doc.content}\n`; + }) + .join("\n") + + "\n" + ); + }) + .catch((e) => { + this.log("Error fetching parsed file context", e.message); + return ""; + }); + } + + /** + * Strip the @agent command from the message if it exists. + * Prevents hallucination by the agent when the @agent command is used from the model thinking + * it is an agent or something itself. + * If the user sent nothing after the @agent command - assume its a greeting. + * @param {string} message - The message to strip the @agent command from. + * @returns {string} The message with the @agent command stripped. + */ + #stripAgentCommand(message = "") { + const stripped = String(message) + .replace(/^@agent\s*/, "") + .trim(); + if (!stripped) return "Hello!"; + return stripped; + } + async createAIbitat( args = { handler: null, @@ -371,6 +451,10 @@ class EphemeralAgentHandler extends AgentHandler { }, }); + // Register callback to fetch fresh parsed file context on each chat turn + // This injects parsed files into user messages instead of system prompt + this.aibitat.fetchParsedFileContext = () => this.#fetchParsedFileContext(); + // Attach HTTP response object if defined for chunk streaming. this.log(`Attached ${httpSocket.name} plugin to Agent cluster`); this.aibitat.use( @@ -392,16 +476,38 @@ class EphemeralAgentHandler extends AgentHandler { return this.aibitat.start({ from: USER_AGENT.name, to: this.channel ?? WORKSPACE_AGENT.name, - content: this.#prompt, + content: this.#stripAgentCommand(this.#prompt), + attachments: this.#attachments, }); } + /** + * Determine if the message should invoke the agent handler. + * This is true when the user explicitly invokes an agent (via @agent prefix) + * or when the workspace is in automatic mode **and** the provider supports native tool calling. + * @param {{message: string, workspace?: object, chatMode?: string}} parameters + * @returns {Promise} + */ + static async isAgentInvocation({ + message, + workspace = null, + chatMode = null, + }) { + if (this.#isAgentCommandInvocation({ message })) return true; + if (chatMode === "automatic") { + if (!workspace) return false; + if (await Workspace.supportsNativeToolCalling(workspace)) return true; + return false; + } + return false; + } + /** * Determine if the message provided is an agent invocation. * @param {{message:string}} parameters * @returns {boolean} */ - static isAgentInvocation({ message }) { + static #isAgentCommandInvocation({ message }) { const agentHandles = WorkspaceAgentInvocation.parseAgents(message); if (agentHandles.length > 0) return true; return false; diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js index d07604f5..eaf7040d 100644 --- a/server/utils/agents/index.js +++ b/server/utils/agents/index.js @@ -3,6 +3,7 @@ const AgentPlugins = require("./aibitat/plugins"); const { WorkspaceAgentInvocation, } = require("../../models/workspaceAgentInvocation"); +const { WorkspaceParsedFiles } = require("../../models/workspaceParsedFiles"); const { User } = require("../../models/user"); const { WorkspaceChats } = require("../../models/workspaceChats"); const { safeJsonParse } = require("../http"); @@ -10,6 +11,8 @@ const { USER_AGENT, WORKSPACE_AGENT } = require("./defaults"); const ImportedPlugin = require("./imported"); const { AgentFlows } = require("../agentFlows"); const MCPCompatibilityLayer = require("../MCP"); +const { getAndClearInvocationAttachments } = require("../chats/agents"); +const { DocumentManager } = require("../DocumentManager"); class AgentHandler { #invocationUUID; @@ -19,6 +22,7 @@ class AgentHandler { channel = null; provider = null; model = null; + attachments = []; constructor({ uuid }) { this.#invocationUUID = uuid; @@ -590,9 +594,76 @@ class AgentHandler { async init() { await this.#validInvocation(); this.#providerSetupAndCheck(); + + // Retrieve cached attachments (images, etc.) from the HTTP request + this.attachments = getAndClearInvocationAttachments(this.#invocationUUID); + return this; } + /** + * Fetch fresh parsed files and pinned documents, format them for injection into user messages. + * Called on every chat turn to ensure context is always up-to-date. + * @returns {Promise} Formatted context string to append to user message + */ + async #fetchParsedFileContext() { + const user = this.invocation.user_id + ? { id: this.invocation.user_id } + : null; + const thread = this.invocation.thread_id + ? { id: this.invocation.thread_id } + : null; + const documentManager = new DocumentManager({ + workspace: this.invocation.workspace, + }); + + return Promise.all([ + WorkspaceParsedFiles.getContextFiles( + this.invocation.workspace, + thread, + user + ), + documentManager.pinnedDocs(), + ]) + .then(([parsedFiles, pinnedDocs]) => { + const allDocuments = [ + ...(parsedFiles || []).map((doc) => ({ + name: doc.title || "Uploaded Document", + content: doc.pageContent, + })), + ...(pinnedDocs || []).map((doc) => ({ + name: doc.title || doc.metadata?.title || "Pinned Document", + content: doc.pageContent, + })), + ]; + + if (allDocuments.length === 0) return ""; + if (parsedFiles?.length > 0) + this.log( + `Injecting ${parsedFiles.length} parsed file(s) into user message` + ); + if (pinnedDocs?.length > 0) + this.log( + `Injecting ${pinnedDocs.length} pinned document(s) into user message` + ); + + return ( + "\n\n\n" + + allDocuments + .map((doc, i) => { + const filename = doc.name || `Document ${i + 1}`; + return `\n${doc.content}\n`; + }) + .join("\n") + + "\n" + ); + }) + .catch((e) => { + this.log("Error fetching parsed file context", e.message); + return ""; + }); + } + async createAIbitat( args = { socket: null, @@ -608,6 +679,10 @@ class AgentHandler { }, }); + // Register callback to fetch fresh parsed file context on each chat turn + // This injects parsed files into user messages instead of system prompt + this.aibitat.fetchParsedFileContext = () => this.#fetchParsedFileContext(); + // Attach standard websocket plugin for frontend communication. this.log(`Attached ${AgentPlugins.websocket.name} plugin to Agent cluster`); this.aibitat.use( @@ -631,11 +706,28 @@ class AgentHandler { await this.#attachPlugins(args); } + /** + * Strip the @agent command from the message if it exists. + * Prevents hallucination by the agent when the @agent command is used from the model thinking + * it is an agent or something itself. + * If the user sent nothing after the @agent command - assume its a greeting. + * @param {string} message - The message to strip the @agent command from. + * @returns {string} The message with the @agent command stripped. + */ + #stripAgentCommand(message = "") { + const stripped = String(message) + .replace(/^@agent\s*/, "") + .trim(); + if (!stripped) return "Hello!"; + return stripped; + } + startAgentCluster() { return this.aibitat.start({ from: USER_AGENT.name, to: this.channel ?? WORKSPACE_AGENT.name, - content: this.invocation.prompt, + content: this.#stripAgentCommand(this.invocation.prompt), + attachments: this.attachments, }); } } diff --git a/server/utils/chats/agents.js b/server/utils/chats/agents.js index 26de10e8..29204397 100644 --- a/server/utils/chats/agents.js +++ b/server/utils/chats/agents.js @@ -3,6 +3,37 @@ const { WorkspaceAgentInvocation, } = require("../../models/workspaceAgentInvocation"); const { writeResponseChunk } = require("../helpers/chat/responses"); +const { Workspace } = require("../../models/workspace"); + +/** + * In-memory cache for attachments associated with agent invocations. + * Attachments are stored here when grepAgents creates an invocation, + * then retrieved by AgentHandler when the websocket connects. + * @type {Map} + */ +const invocationAttachmentsCache = new Map(); + +/** + * Store attachments for an invocation UUID + * @param {string} uuid - The invocation UUID + * @param {Array} attachments - The attachments array + */ +function cacheInvocationAttachments(uuid, attachments = []) { + if (attachments.length > 0) { + invocationAttachmentsCache.set(uuid, attachments); + } +} + +/** + * Retrieve and remove attachments for an invocation UUID + * @param {string} uuid - The invocation UUID + * @returns {Array} The attachments array (empty if none cached) + */ +function getAndClearInvocationAttachments(uuid) { + const attachments = invocationAttachmentsCache.get(uuid) || []; + invocationAttachmentsCache.delete(uuid); + return attachments; +} async function grepAgents({ uuid, @@ -11,9 +42,17 @@ async function grepAgents({ workspace, user = null, thread = null, + attachments = [], }) { + let nativeToolingEnabled = false; + + // If the workspace is in automatic mode, check if the workspace supports native tooling + // to determine if the agent flow should be used or not. + if (workspace?.chatMode === "automatic") + nativeToolingEnabled = await Workspace.supportsNativeToolCalling(workspace); + const agentHandles = WorkspaceAgentInvocation.parseAgents(message); - if (agentHandles.length > 0) { + if (agentHandles.length > 0 || nativeToolingEnabled) { const { invocation: newInvocation } = await WorkspaceAgentInvocation.new({ prompt: message, workspace: workspace, @@ -39,6 +78,9 @@ async function grepAgents({ return; } + // Cache attachments for the websocket handler to retrieve later + cacheInvocationAttachments(newInvocation.uuid, attachments); + writeResponseChunk(response, { id: uuid, type: "agentInitWebsocketConnection", @@ -70,4 +112,4 @@ async function grepAgents({ return false; } -module.exports = { grepAgents }; +module.exports = { grepAgents, getAndClearInvocationAttachments }; diff --git a/server/utils/chats/apiChatHandler.js b/server/utils/chats/apiChatHandler.js index ff9ee101..0b3f14bc 100644 --- a/server/utils/chats/apiChatHandler.js +++ b/server/utils/chats/apiChatHandler.js @@ -100,7 +100,7 @@ async function processDocumentAttachments(attachments = []) { * @param {{ * workspace: import("@prisma/client").workspaces, * message:string, - * mode: "chat"|"query", + * mode: "automatic"|"chat"|"query", * user: import("@prisma/client").users|null, * thread: import("@prisma/client").workspace_threads|null, * sessionId: string|null, @@ -150,7 +150,13 @@ async function chatSync({ const processedMessage = await grepAllSlashCommands(message); message = processedMessage; - if (EphemeralAgentHandler.isAgentInvocation({ message })) { + if ( + await EphemeralAgentHandler.isAgentInvocation({ + message, + workspace, + chatMode, + }) + ) { await Telemetry.sendTelemetry("agent_chat_started"); // Initialize the EphemeralAgentHandler to handle non-continuous @@ -162,6 +168,7 @@ async function chatSync({ userId: user?.id || null, threadId: thread?.id || null, sessionId, + attachments, }); // Establish event listener that emulates websocket calls @@ -439,7 +446,7 @@ async function chatSync({ * response: import("express").Response, * workspace: import("@prisma/client").workspaces, * message:string, - * mode: "chat"|"query", + * mode: "automatic"|"chat"|"query", * user: import("@prisma/client").users|null, * thread: import("@prisma/client").workspace_threads|null, * sessionId: string|null, @@ -492,7 +499,13 @@ async function streamChat({ const processedMessage = await grepAllSlashCommands(message); message = processedMessage; - if (EphemeralAgentHandler.isAgentInvocation({ message })) { + if ( + await EphemeralAgentHandler.isAgentInvocation({ + message, + workspace, + chatMode, + }) + ) { await Telemetry.sendTelemetry("agent_chat_started"); // Initialize the EphemeralAgentHandler to handle non-continuous @@ -504,6 +517,7 @@ async function streamChat({ userId: user?.id || null, threadId: thread?.id || null, sessionId, + attachments, }); // Establish event listener that emulates websocket calls diff --git a/server/utils/chats/stream.js b/server/utils/chats/stream.js index acb1e4a5..86ee9316 100644 --- a/server/utils/chats/stream.js +++ b/server/utils/chats/stream.js @@ -13,7 +13,7 @@ const { sourceIdentifier, } = require("./index"); -const VALID_CHAT_MODE = ["chat", "query"]; +const VALID_CHAT_MODE = ["automatic", "chat", "query"]; async function streamChatWithWorkspace( response, @@ -47,6 +47,7 @@ async function streamChatWithWorkspace( user, workspace, thread, + attachments, }); if (isAgentChat) return;