diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx index c588b4ec..2fa65bb9 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx @@ -1,4 +1,4 @@ -import { Fragment } from "react"; +import { Fragment, useState, useEffect } from "react"; import { decode as HTMLDecode } from "he"; import truncate from "truncate"; import ModalWrapper from "@/components/ModalWrapper"; @@ -29,19 +29,51 @@ const CIRCLE_ICONS = { }; /** - * Renders a circle with a source type icon inside. + * Renders a circle with a source type icon inside, or a favicon if URL is provided. * @param {"file"|"link"|"youtube"|"github"|"gitlab"|"confluence"|"drupalwiki"|"obsidian"|"paperlessNgx"} props.type * @param {number} [props.size] - Circle diameter in px * @param {number} [props.iconSize] - Icon size in px + * @param {string} [props.url] - Optional URL to fetch favicon from */ -export function SourceTypeCircle({ type = "file", size = 22, iconSize = 12 }) { +export function SourceTypeCircle({ + type = "file", + size = 22, + iconSize = 12, + url = null, +}) { const Icon = CIRCLE_ICONS[type] || CIRCLE_ICONS.file; + const [imgError, setImgError] = useState(false); + + let faviconUrl = null; + if (type === "link" && url) { + try { + const hostname = new URL(url).hostname; + faviconUrl = `https://www.google.com/s2/favicons?domain=${hostname}&sz=64`; + } catch { + faviconUrl = null; + } + } + + useEffect(() => { + setImgError(false); + }, [url]); + return (
- + {faviconUrl && !imgError ? ( + favicon setImgError(true)} + /> + ) : ( + + )}
); } @@ -107,7 +139,12 @@ export default function Citations({ sources = [] }) { className="absolute top-0 size-[22px] rounded-full border-2 border-zinc-800 light:border-white" style={{ left: `${idx * 17}px`, zIndex: 3 - idx }} > - + ); })} diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/index.jsx index 4788baeb..c01eef85 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/index.jsx @@ -196,7 +196,8 @@ export default memo( prevProps.message === nextProps.message && prevProps.isLastMessage === nextProps.isLastMessage && prevProps.chatId === nextProps.chatId && - JSON.stringify(prevProps.metrics) === JSON.stringify(nextProps.metrics) + JSON.stringify(prevProps.metrics) === JSON.stringify(nextProps.metrics) && + JSON.stringify(prevProps.sources) === JSON.stringify(nextProps.sources) ); } ); diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/SourcesSidebar/SourceItem/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/SourcesSidebar/SourceItem/index.jsx index dce49b3f..03fb619c 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/SourcesSidebar/SourceItem/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/SourcesSidebar/SourceItem/index.jsx @@ -13,7 +13,12 @@ export default function SourceItem({ source, onClick }) { className="flex flex-col gap-[2px] items-start w-full text-left hover:opacity-75 transition-opacity" >
- +

{source.title}

diff --git a/frontend/src/utils/chat/agent.js b/frontend/src/utils/chat/agent.js index 98395e05..56bc33ae 100644 --- a/frontend/src/utils/chat/agent.js +++ b/frontend/src/utils/chat/agent.js @@ -138,6 +138,18 @@ export default function handleSocketResponse(socket, event, setChatHistory) { ); } + if (type === "citations") { + if (!data.content.citations) return prev; + return prev.map((msg) => + msg.uuid === uuid + ? { + ...msg, + sources: [...(msg.sources || []), ...data.content.citations], + } + : msg + ); + } + if (type === "textResponseChunk") { return prev .map((msg) => diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js index 5cfb9c36..8fa28aea 100644 --- a/server/utils/agents/aibitat/index.js +++ b/server/utils/agents/aibitat/index.js @@ -37,6 +37,13 @@ class AIbitat { channels = new Map(); functions = new Map(); + /** + * Buffer for citations collected during tool execution. + * Citations are flushed to the frontend when the response is finalized. + * @type {Array<{id: string, title: string, text: string, chunkSource?: string, score?: number}>} + */ + _pendingCitations = []; + constructor(props = {}) { const { chats = [], @@ -76,6 +83,41 @@ class AIbitat { return this; } + /** + * Add citation(s) to be reported when the response is finalized. + * Citations are buffered and flushed with the correct message UUID. + * @param {{id: string, title: string, text: string, chunkSource?: string, score?: number}|Array<{id: string, title: string, text: string, chunkSource?: string, score?: number}>} citations - Citation object or array of citation objects + */ + addCitation(citations) { + if (!citations) return; + if (Array.isArray(citations)) + this._pendingCitations.push(...citations.filter(Boolean)); + else if (typeof citations === "object") + this._pendingCitations.push(citations); + } + + /** + * Flush all pending citations to the frontend with the given message UUID. + * Called automatically when the agent response is finalized. + * Note: Does not clear citations - they are cleared by chat-history plugin after persisting. + * @param {string} messageUuid - The UUID of the message to attach citations to + */ + flushCitations(messageUuid) { + if (!messageUuid || this._pendingCitations.length === 0) return; + this.socket?.send?.("reportStreamEvent", { + type: "citations", + uuid: messageUuid, + citations: this._pendingCitations, + }); + } + + /** + * Clear all pending citations. Called after citations have been persisted. + */ + clearCitations() { + this._pendingCitations = []; + } + /** * Add a new agent to the AIbitat. * @@ -671,11 +713,13 @@ ${this.getHistory({ to: route.to }) ); const finalStream = await provider.stream(messages, [], eventHandler); + const finalUuid = finalStream?.uuid || v4(); eventHandler?.("reportStreamEvent", { type: "usageMetrics", - uuid: finalStream?.uuid || v4(), + uuid: finalUuid, metrics: provider.getUsage(), }); + this?.flushCitations?.(finalUuid); const finalResponse = finalStream?.textResponse || "I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run."; @@ -744,6 +788,7 @@ ${this.getHistory({ to: route.to }) uuid: directOutputUUID, metrics: provider.getUsage(), }); + this?.flushCitations?.(directOutputUUID); return result; } @@ -764,11 +809,13 @@ ${this.getHistory({ to: route.to }) ); } + const responseUuid = completionStream?.uuid || v4(); eventHandler?.("reportStreamEvent", { type: "usageMetrics", - uuid: completionStream?.uuid || v4(), + uuid: responseUuid, metrics: provider.getUsage(), }); + this?.flushCitations?.(responseUuid); return completionStream?.textResponse; } @@ -817,6 +864,7 @@ ${this.getHistory({ to: route.to }) uuid: msgUUID, metrics: provider.getUsage(), }); + this?.flushCitations?.(msgUUID); return ( finalCompletion?.textResponse || "I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run." @@ -874,6 +922,7 @@ ${this.getHistory({ to: route.to }) uuid: msgUUID, metrics: provider.getUsage(), }); + this?.flushCitations?.(msgUUID); return result; } @@ -900,6 +949,7 @@ ${this.getHistory({ to: route.to }) uuid: msgUUID, metrics: provider.getUsage(), }); + this?.flushCitations?.(msgUUID); return completion?.textResponse; } diff --git a/server/utils/agents/aibitat/plugins/chat-history.js b/server/utils/agents/aibitat/plugins/chat-history.js index 8596a9df..d2a05be0 100644 --- a/server/utils/agents/aibitat/plugins/chat-history.js +++ b/server/utils/agents/aibitat/plugins/chat-history.js @@ -44,18 +44,20 @@ const chatHistory = { _store: async function (aibitat, { prompt, response } = {}) { const invocation = aibitat.handlerProps.invocation; const metrics = aibitat.provider?.getUsage?.() ?? {}; + const citations = aibitat._pendingCitations ?? []; await WorkspaceChats.new({ workspaceId: Number(invocation.workspace_id), prompt, response: { text: response, - sources: [], + sources: citations, type: "chat", metrics, }, user: { id: invocation?.user_id || null }, threadId: invocation?.thread_id || null, }); + aibitat.clearCitations?.(); }, _storeSpecial: async function ( aibitat, @@ -63,11 +65,13 @@ const chatHistory = { ) { const invocation = aibitat.handlerProps.invocation; const metrics = aibitat.provider?.getUsage?.() ?? {}; + const citations = aibitat._pendingCitations ?? []; + const existingSources = options?.sources ?? []; await WorkspaceChats.new({ workspaceId: Number(invocation.workspace_id), prompt, response: { - sources: options?.sources ?? [], + sources: [...existingSources, ...citations], // when we have a _storeSpecial called the options param can include a storedResponse() function // that will override the text property to store extra information in, depending on the special type of chat. text: options.hasOwnProperty("storedResponse") @@ -79,6 +83,7 @@ const chatHistory = { user: { id: invocation?.user_id || null }, threadId: invocation?.thread_id || null, }); + aibitat.clearCitations?.(); options?.postSave(); }, }; diff --git a/server/utils/agents/aibitat/plugins/summarize.js b/server/utils/agents/aibitat/plugins/summarize.js index d532a071..ebf3a28c 100644 --- a/server/utils/agents/aibitat/plugins/summarize.js +++ b/server/utils/agents/aibitat/plugins/summarize.js @@ -136,6 +136,15 @@ const docSummarizer = { ); } + // Report citation for the document being summarized + this.super.addCitation?.({ + id: docInfo.document_id, + title: document.title || filename, + text: document.content, + chunkSource: null, + score: null, + }); + const { TokenManager } = require("../../../helpers/tiktoken"); if ( new TokenManager(this.super.model).countFromString( diff --git a/server/utils/agents/aibitat/plugins/web-browsing.js b/server/utils/agents/aibitat/plugins/web-browsing.js index ed568e9d..336cc72d 100644 --- a/server/utils/agents/aibitat/plugins/web-browsing.js +++ b/server/utils/agents/aibitat/plugins/web-browsing.js @@ -111,6 +111,36 @@ const webBrowsing = { return `${str.slice(0, length)}...${str.slice(-length)}`; }, + /** + * Report citations for an array of search results. + * Uses title, link, and snippet directly from result data. + * @param {Array<{title?: string, link?: string, snippet?: string}>} results - Search results to report as citations + */ + reportSearchResultsCitations: function (results) { + if (!Array.isArray(results)) return; + const citations = []; + for (const result of results) { + const fallbackUrl = + result.link || + result.url || + result.website || + result.product_link || + result.patent_link || + result.link_clean; + + citations.push({ + id: result.link || fallbackUrl, + title: result.title || fallbackUrl, + text: result.snippet || result.description || result.text || "", + chunkSource: result.link + ? `link://${result.link}` + : `link://${fallbackUrl}`, + score: null, + }); + } + this.super.addCitation?.(citations); + }, + /** * Use SerpApi * SerpApi supports dozens of search engines across the major platforms including Google, DuckDuckGo, Bing, eBay, Amazon, Baidu, Yandex, and more. @@ -362,6 +392,7 @@ const webBrowsing = { if (data.length === 0) return `No information was found online for the search query.`; + this.reportSearchResultsCitations(data); const result = JSON.stringify(data); this.super.introspect( `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)` @@ -436,6 +467,7 @@ const webBrowsing = { if (data.length === 0) return `No information was found online for the search query.`; + this.reportSearchResultsCitations(data); const result = JSON.stringify(data); this.super.introspect( `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)` @@ -504,6 +536,7 @@ const webBrowsing = { if (data.length === 0) return `No information was found online for the search query.`; + this.reportSearchResultsCitations(data); const result = JSON.stringify(data); this.super.introspect( `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)` @@ -559,6 +592,7 @@ const webBrowsing = { if (searchResponse.length === 0) return `No information was found online for the search query.`; + this.reportSearchResultsCitations(searchResponse); const result = JSON.stringify(searchResponse); this.super.introspect( `${this.caller}: I found ${searchResponse.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)` @@ -643,6 +677,7 @@ const webBrowsing = { if (data.length === 0) return `No information was found online for the search query.`; + this.reportSearchResultsCitations(data); const result = JSON.stringify(data); this.super.introspect( `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)` @@ -715,6 +750,7 @@ const webBrowsing = { if (data.length === 0) return `No information was found online for the search query.`; + this.reportSearchResultsCitations(data); const result = JSON.stringify(data); this.super.introspect( `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)` @@ -778,6 +814,7 @@ const webBrowsing = { if (data.length === 0) return `No information was found online for the search query.`; + this.reportSearchResultsCitations(data); const result = JSON.stringify(data); this.super.introspect( `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)` @@ -785,6 +822,26 @@ const webBrowsing = { return result; }, _duckDuckGoEngine: async function (query) { + /** + * Extract the actual destination URL from a DuckDuckGo redirect link. + * DDG links look like: //duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com&rut=... + * @param {string} ddgLink - The DuckDuckGo redirect link + * @returns {string} The actual destination URL + */ + function extractUrl(ddgLink) { + if (!ddgLink) return ddgLink; + try { + const fullUrl = ddgLink.startsWith("//") + ? `https:${ddgLink}` + : ddgLink; + const url = new URL(fullUrl); + const actualUrl = url.searchParams.get("uddg"); + return actualUrl ? decodeURIComponent(actualUrl) : ddgLink; + } catch { + return ddgLink; + } + } + this.super.introspect( `${this.caller}: Using DuckDuckGo to search for "${ query.length > 100 ? `${query.slice(0, 100)}...` : query @@ -823,11 +880,11 @@ const webBrowsing = { ); const title = titleMatch ? titleMatch[1].trim() : ""; - // Extract URL + // Extract URL and clean DDG redirect const urlMatch = result.match( /]*class="result__a"[^>]*href="([^"]*)">/ ); - const link = urlMatch ? urlMatch[1] : ""; + const link = extractUrl(urlMatch ? urlMatch[1] : ""); // Extract snippet const snippetMatch = result.match( @@ -846,6 +903,7 @@ const webBrowsing = { return `No information was found online for the search query.`; } + this.reportSearchResultsCitations(data); const result = JSON.stringify(data); this.super.introspect( `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)` @@ -913,6 +971,7 @@ const webBrowsing = { if (data.length === 0) return `No information was found online for the search query.`; + this.reportSearchResultsCitations(data); const result = JSON.stringify(data); this.super.introspect( `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)` diff --git a/server/utils/agents/aibitat/plugins/web-scraping.js b/server/utils/agents/aibitat/plugins/web-scraping.js index 8d4f6c09..e02adbb0 100644 --- a/server/utils/agents/aibitat/plugins/web-scraping.js +++ b/server/utils/agents/aibitat/plugins/web-scraping.js @@ -55,6 +55,33 @@ const webScraping = { } }, + /** + * Report a URL citation to be displayed in the chat UI. + * @param {string} url - The URL that was accessed + * @param {string} content - The content retrieved from the URL + */ + reportUrlCitation: function (url, content) { + try { + const urlObj = new URL(url); + this.super.addCitation?.({ + id: url, + title: urlObj.hostname + urlObj.pathname, + text: content, + chunkSource: `link://${url}`, + score: null, + }); + } catch { + // URL parsing failed, still add citation without parsed title + this.super.addCitation?.({ + id: url, + title: url, + text: content, + chunkSource: `link://${url}`, + score: null, + }); + } + }, + /** * Scrape a website and summarize the content based on objective if the content is too large. * Objective is the original objective & task that user give to the agent, url is the url of the website to be scraped. @@ -83,6 +110,7 @@ const webScraping = { throw new Error("There was no content to be collected or read."); } + this.reportUrlCitation(url, content); const { TokenManager } = require("../../../helpers/tiktoken"); const tokenEstimate = new TokenManager( this.super.model