Workspace Chat with documents overhaul (#4261)

* Create parse endpoint in collector (#4212)

* create parse endpoint in collector

* revert cleanup temp util call

* lint

* remove unused cleanupTempDocuments function

* revert slug change
minor change for destinations

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>

* Add parsed files table and parse server endpoints (#4222)

* add workspace_parsed_files table + parse endpoints/models

* remove dev api parse endpoint

* remove unneeded imports

* iterate over all files + remove unneeded update function + update telemetry debounce

* Upload UI/UX context window check + frontend alert (#4230)

* prompt user to embed if exceeds prompt window + handle embed + handle cancel

* add tokenCountEstimate to workspace_parsed_files + optimizations

* use util for path locations + use safeJsonParse

* add modal for user decision on overflow of context window

* lint

* dynamic fetching of provider/model combo + inject parsed documents

* remove unneeded comments

* popup ui for attaching/removing files + warning to embed + wip fetching states on update

* remove prop drilling, fetch files/limits directly in attach files popup

* rework ux of FE + BE optimizations

* fix ux of FE + BE optimizations

* Implement bidirectional sync for parsed file states
linting
small changes and comments

* move parse support to another endpoint file
simplify calls and loading of records

* button borders

* enable default users to upload parsed files but NOT embed

* delete cascade on user/workspace/thread deletion to remove parsedFileRecord

* enable bgworker with "always" jobs and optional document sync jobs
orphan document job: Will find any broken reference files to prevent overpollution of the storage folder. This will run 10s after boot and every 12hr after

* change run timeout for orphan job to 1m to allow settling before spawning a worker

* linting and cleanup pr

---------

Co-authored-by: Timothy Carambat <rambat1010@gmail.com>

* dev build

* fix tooltip hiding during embedding overflow files

* prevent crash log from ERRNO on parse files

* unused import

* update docs link

* Migrate parsed-files to GET endpoint
patch logic for grabbing models names from utils
better handling for undetermined context windows (null instead of Pos_INIFI)
UI placeholder for null context windows

* patch URL

---------

Co-authored-by: Sean Hatfield <seanhatfield5@gmail.com>
This commit is contained in:
Timothy Carambat 2025-08-11 09:26:19 -07:00 committed by GitHub
parent 9451cd596f
commit 0fb33736da
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 1471 additions and 101 deletions

View File

@ -6,7 +6,7 @@ concurrency:
on: on:
push: push:
branches: ['mobile-support'] # put your current branch to create a build. Core team only. branches: ['upload-ui-ux'] # put your current branch to create a build. Core team only.
paths-ignore: paths-ignore:
- '**.md' - '**.md'
- 'cloud-deployments/*' - 'cloud-deployments/*'

View File

@ -58,6 +58,39 @@ app.post(
} }
); );
app.post(
"/parse",
[verifyPayloadIntegrity],
async function (request, response) {
const { filename, options = {} } = reqBody(request);
try {
const targetFilename = path
.normalize(filename)
.replace(/^(\.\.(\/|\\|$))+/, "");
const {
success,
reason,
documents = [],
} = await processSingleFile(targetFilename, {
...options,
parseOnly: true,
});
response
.status(200)
.json({ filename: targetFilename, success, reason, documents });
} catch (e) {
console.error(e);
response.status(200).json({
filename: filename,
success: false,
reason: "A processing error occurred.",
documents: [],
});
}
return;
}
);
app.post( app.post(
"/process-link", "/process-link",
[verifyPayloadIntegrity], [verifyPayloadIntegrity],

View File

@ -62,6 +62,7 @@ async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
const document = writeToServerDocuments({ const document = writeToServerDocuments({
data, data,
filename: `${slugify(filename)}-${data.id}`, filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
}); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log( console.log(

View File

@ -8,7 +8,7 @@ const {
const { tokenizeString } = require("../../utils/tokenizer"); const { tokenizeString } = require("../../utils/tokenizer");
const { default: slugify } = require("slugify"); const { default: slugify } = require("slugify");
async function asDocX({ fullFilePath = "", filename = "" }) { async function asDocX({ fullFilePath = "", filename = "", options = {} }) {
const loader = new DocxLoader(fullFilePath); const loader = new DocxLoader(fullFilePath);
console.log(`-- Working ${filename} --`); console.log(`-- Working ${filename} --`);
@ -48,6 +48,7 @@ async function asDocX({ fullFilePath = "", filename = "" }) {
const document = writeToServerDocuments({ const document = writeToServerDocuments({
data, data,
filename: `${slugify(filename)}-${data.id}`, filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
}); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);

View File

@ -8,7 +8,7 @@ const {
} = require("../../utils/files"); } = require("../../utils/files");
const { default: slugify } = require("slugify"); const { default: slugify } = require("slugify");
async function asEPub({ fullFilePath = "", filename = "" }) { async function asEPub({ fullFilePath = "", filename = "", options = {} }) {
let content = ""; let content = "";
try { try {
const loader = new EPubLoader(fullFilePath, { splitChapters: false }); const loader = new EPubLoader(fullFilePath, { splitChapters: false });
@ -46,6 +46,7 @@ async function asEPub({ fullFilePath = "", filename = "" }) {
const document = writeToServerDocuments({ const document = writeToServerDocuments({
data, data,
filename: `${slugify(filename)}-${data.id}`, filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
}); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);

View File

@ -41,6 +41,7 @@ async function asImage({ fullFilePath = "", filename = "", options = {} }) {
const document = writeToServerDocuments({ const document = writeToServerDocuments({
data, data,
filename: `${slugify(filename)}-${data.id}`, filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
}); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);

View File

@ -60,6 +60,7 @@ async function asMbox({ fullFilePath = "", filename = "" }) {
const document = writeToServerDocuments({ const document = writeToServerDocuments({
data, data,
filename: `${slugify(filename)}-${data.id}-msg-${item}`, filename: `${slugify(filename)}-${data.id}-msg-${item}`,
options: { parseOnly: options.parseOnly },
}); });
documents.push(document); documents.push(document);
} }

View File

@ -44,6 +44,7 @@ async function asOfficeMime({ fullFilePath = "", filename = "" }) {
const document = writeToServerDocuments({ const document = writeToServerDocuments({
data, data,
filename: `${slugify(filename)}-${data.id}`, filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
}); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);

View File

@ -65,6 +65,7 @@ async function asPdf({ fullFilePath = "", filename = "", options = {} }) {
const document = writeToServerDocuments({ const document = writeToServerDocuments({
data, data,
filename: `${slugify(filename)}-${data.id}`, filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
}); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);

View File

@ -8,7 +8,7 @@ const {
} = require("../../utils/files"); } = require("../../utils/files");
const { default: slugify } = require("slugify"); const { default: slugify } = require("slugify");
async function asTxt({ fullFilePath = "", filename = "" }) { async function asTxt({ fullFilePath = "", filename = "", options = {} }) {
let content = ""; let content = "";
try { try {
content = fs.readFileSync(fullFilePath, "utf8"); content = fs.readFileSync(fullFilePath, "utf8");
@ -44,6 +44,7 @@ async function asTxt({ fullFilePath = "", filename = "" }) {
const document = writeToServerDocuments({ const document = writeToServerDocuments({
data, data,
filename: `${slugify(filename)}-${data.id}`, filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
}); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);

View File

@ -7,6 +7,7 @@ const {
trashFile, trashFile,
writeToServerDocuments, writeToServerDocuments,
documentsFolder, documentsFolder,
directUploadsFolder,
} = require("../../utils/files"); } = require("../../utils/files");
const { tokenizeString } = require("../../utils/tokenizer"); const { tokenizeString } = require("../../utils/tokenizer");
const { default: slugify } = require("slugify"); const { default: slugify } = require("slugify");
@ -26,14 +27,16 @@ function convertToCSV(data) {
.join("\n"); .join("\n");
} }
async function asXlsx({ fullFilePath = "", filename = "" }) { async function asXlsx({ fullFilePath = "", filename = "", options = {} }) {
const documents = []; const documents = [];
const folderName = slugify(`${path.basename(filename)}-${v4().slice(0, 4)}`, { const folderName = slugify(`${path.basename(filename)}-${v4().slice(0, 4)}`, {
lower: true, lower: true,
trim: true, trim: true,
}); });
const outFolderPath = options.parseOnly
? path.resolve(directUploadsFolder, folderName)
: path.resolve(documentsFolder, folderName);
const outFolderPath = path.resolve(documentsFolder, folderName);
try { try {
const workSheetsFromFile = xlsx.parse(fullFilePath); const workSheetsFromFile = xlsx.parse(fullFilePath);
if (!fs.existsSync(outFolderPath)) if (!fs.existsSync(outFolderPath))
@ -68,6 +71,7 @@ async function asXlsx({ fullFilePath = "", filename = "" }) {
data: sheetData, data: sheetData,
filename: `sheet-${slugify(name)}`, filename: `sheet-${slugify(name)}`,
destinationOverride: outFolderPath, destinationOverride: outFolderPath,
options: { parseOnly: options.parseOnly },
}); });
documents.push(document); documents.push(document);
console.log( console.log(

View File

@ -11,6 +11,16 @@ const documentsFolder =
? path.resolve(__dirname, `../../../server/storage/documents`) ? path.resolve(__dirname, `../../../server/storage/documents`)
: path.resolve(process.env.STORAGE_DIR, `documents`); : path.resolve(process.env.STORAGE_DIR, `documents`);
/**
* The folder where direct uploads are stored to be stored when
* processed by the collector. These are files that were DnD'd into UI
* and are not to be embedded or selectable from the file picker.
*/
const directUploadsFolder =
process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../../server/storage/direct-uploads`)
: path.resolve(process.env.STORAGE_DIR, `direct-uploads`);
/** /**
* Checks if a file is text by checking the mime type and then falling back to buffer inspection. * Checks if a file is text by checking the mime type and then falling back to buffer inspection.
* This way we can capture all the cases where the mime type is not known but still parseable as text * This way we can capture all the cases where the mime type is not known but still parseable as text
@ -102,17 +112,21 @@ function createdDate(filepath) {
* @param {Object} params.data - The data to write to the file. Must look like a document object. * @param {Object} params.data - The data to write to the file. Must look like a document object.
* @param {string} params.filename - The name of the file to write to. * @param {string} params.filename - The name of the file to write to.
* @param {string|null} params.destinationOverride - A forced destination to write to - will be honored if provided. * @param {string|null} params.destinationOverride - A forced destination to write to - will be honored if provided.
* @param {Object} params.options - The options for the function.
* @param {boolean} params.options.parseOnly - If true, the file will be written to the direct uploads folder instead of the documents folder. Will be ignored if destinationOverride is provided.
* @returns {Object} - The data with the location added. * @returns {Object} - The data with the location added.
*/ */
function writeToServerDocuments({ function writeToServerDocuments({
data = {}, data = {},
filename = null, filename,
destinationOverride = null, destinationOverride = null,
options = {},
}) { }) {
if (!filename) throw new Error("Filename is required!"); if (!filename) throw new Error("Filename is required!");
let destination = null; let destination = null;
if (destinationOverride) destination = path.resolve(destinationOverride); if (destinationOverride) destination = path.resolve(destinationOverride);
else if (options.parseOnly) destination = path.resolve(directUploadsFolder);
else destination = path.resolve(documentsFolder, "custom-documents"); else destination = path.resolve(documentsFolder, "custom-documents");
if (!fs.existsSync(destination)) if (!fs.existsSync(destination))
@ -129,6 +143,7 @@ function writeToServerDocuments({
// that will work since we know the location exists and since we only allow // that will work since we know the location exists and since we only allow
// 1-level deep folders this will always work. This still works for integrations like GitHub and YouTube. // 1-level deep folders this will always work. This still works for integrations like GitHub and YouTube.
location: destinationFilePath.split("/").slice(-2).join("/"), location: destinationFilePath.split("/").slice(-2).join("/"),
isDirectUpload: options.parseOnly || false,
}; };
} }
@ -207,4 +222,5 @@ module.exports = {
isWithin, isWithin,
sanitizeFileName, sanitizeFileName,
documentsFolder, documentsFolder,
directUploadsFolder,
}; };

View File

@ -78,6 +78,12 @@ export function ChatTooltips() {
delayShow={500} delayShow={500}
className="tooltip !text-xs max-w-[350px]" className="tooltip !text-xs max-w-[350px]"
/> />
<Tooltip
id="context-window-limit-exceeded"
place="top"
delayShow={500}
className="tooltip !text-xs max-w-[350px]"
/>
<DocumentLevelTooltip /> <DocumentLevelTooltip />
</> </>
); );

View File

@ -0,0 +1,106 @@
import { CircleNotch } from "@phosphor-icons/react";
import ModalWrapper from "@/components/ModalWrapper";
import pluralize from "pluralize";
import { numberWithCommas } from "@/utils/numbers";
import useUser from "@/hooks/useUser";
import { Link } from "react-router-dom";
import Paths from "@/utils/paths";
import Workspace from "@/models/workspace";
export default function FileUploadWarningModal({
show,
onClose,
onContinue,
onEmbed,
tokenCount,
maxTokens,
fileCount = 1,
isEmbedding = false,
embedProgress = 0,
}) {
const { user } = useUser();
const canEmbed = !user || user.role !== "default";
if (!show) return null;
if (isEmbedding) {
return (
<ModalWrapper isOpen={show}>
<div className="relative max-w-[600px] bg-theme-bg-primary rounded-lg shadow border border-theme-modal-border">
<div className="p-6 flex flex-col items-center justify-center">
<p className="text-white text-lg font-semibold mb-4">
Embedding {embedProgress + 1} of {fileCount}{" "}
{pluralize("file", fileCount)}
</p>
<CircleNotch size={32} className="animate-spin text-white" />
<p className="text-white/60 text-sm mt-2">
Please wait while we embed your files...
</p>
</div>
</div>
</ModalWrapper>
);
}
return (
<ModalWrapper isOpen={show}>
<div className="relative max-w-[600px] bg-theme-bg-primary rounded-lg shadow border border-theme-modal-border">
<div className="relative p-6 border-b border-theme-modal-border">
<div className="w-full flex gap-x-2 items-center">
<h3 className="text-xl font-semibold text-white overflow-hidden overflow-ellipsis whitespace-nowrap">
Context Window Warning
</h3>
</div>
</div>
<div className="py-7 px-9 space-y-4">
<p className="text-theme-text-primary text-sm">
Your workspace is using {numberWithCommas(tokenCount)} of{" "}
{numberWithCommas(maxTokens)} available tokens. We recommend keeping
usage below {(Workspace.maxContextWindowLimit * 100).toFixed(0)}% to
ensure the best chat experience. Adding {fileCount} more{" "}
{pluralize("file", fileCount)} would exceed this limit.{" "}
<Link
target="_blank"
to={Paths.documentation.contextWindows()}
className="text-theme-text-secondary text-sm underline"
>
Learn more about context windows &rarr;
</Link>
</p>
<p className="text-theme-text-primary text-sm">
Choose how you would like to proceed with these uploads.
</p>
</div>
<div className="flex w-full justify-between items-center p-6 space-x-2 border-t border-theme-modal-border rounded-b">
<button
onClick={onClose}
type="button"
className="border-none transition-all duration-300 bg-theme-modal-border text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
>
Cancel
</button>
<div className="flex w-full justify-end items-center space-x-2">
<button
onClick={onContinue}
type="button"
className="border-none transition-all duration-300 bg-theme-modal-border text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
>
Continue Anyway
</button>
{canEmbed && (
<button
onClick={onEmbed}
disabled={isEmbedding || !canEmbed}
type="button"
className="border-none transition-all duration-300 bg-white text-black hover:opacity-60 px-4 py-2 rounded-lg text-sm"
>
Embed {pluralize("File", fileCount)}
</button>
)}
</div>
</div>
</div>
</ModalWrapper>
);
}

View File

@ -4,7 +4,9 @@ import System from "@/models/system";
import { useDropzone } from "react-dropzone"; import { useDropzone } from "react-dropzone";
import DndIcon from "./dnd-icon.png"; import DndIcon from "./dnd-icon.png";
import Workspace from "@/models/workspace"; import Workspace from "@/models/workspace";
import useUser from "@/hooks/useUser"; import showToast from "@/utils/toast";
import FileUploadWarningModal from "./FileUploadWarningModal";
import pluralize from "pluralize";
export const DndUploaderContext = createContext(); export const DndUploaderContext = createContext();
export const REMOVE_ATTACHMENT_EVENT = "ATTACHMENT_REMOVE"; export const REMOVE_ATTACHMENT_EVENT = "ATTACHMENT_REMOVE";
@ -12,6 +14,8 @@ export const CLEAR_ATTACHMENTS_EVENT = "ATTACHMENT_CLEAR";
export const PASTE_ATTACHMENT_EVENT = "ATTACHMENT_PASTED"; export const PASTE_ATTACHMENT_EVENT = "ATTACHMENT_PASTED";
export const ATTACHMENTS_PROCESSING_EVENT = "ATTACHMENTS_PROCESSING"; export const ATTACHMENTS_PROCESSING_EVENT = "ATTACHMENTS_PROCESSING";
export const ATTACHMENTS_PROCESSED_EVENT = "ATTACHMENTS_PROCESSED"; export const ATTACHMENTS_PROCESSED_EVENT = "ATTACHMENTS_PROCESSED";
export const PARSED_FILE_ATTACHMENT_REMOVED_EVENT =
"PARSED_FILE_ATTACHMENT_REMOVED";
/** /**
* File Attachment for automatic upload on the chat container page. * File Attachment for automatic upload on the chat container page.
@ -19,30 +23,58 @@ export const ATTACHMENTS_PROCESSED_EVENT = "ATTACHMENTS_PROCESSED";
* @property {string} uid - unique file id. * @property {string} uid - unique file id.
* @property {File} file - native File object * @property {File} file - native File object
* @property {string|null} contentString - base64 encoded string of file * @property {string|null} contentString - base64 encoded string of file
* @property {('in_progress'|'failed'|'success')} status - the automatic upload status. * @property {('in_progress'|'failed'|'embedded'|'added_context')} status - the automatic upload status.
* @property {string|null} error - Error message * @property {string|null} error - Error message
* @property {{id:string, location:string}|null} document - uploaded document details * @property {{id:string, location:string}|null} document - uploaded document details
* @property {('attachment'|'upload')} type - The type of upload. Attachments are chat-specific, uploads go to the workspace. * @property {('attachment'|'upload')} type - The type of upload. Attachments are chat-specific, uploads go to the workspace.
*/ */
export function DnDFileUploaderProvider({ workspace, children }) { /**
* @typedef {Object} ParsedFile
* @property {number} id - The id of the parsed file.
* @property {string} filename - The name of the parsed file.
* @property {number} workspaceId - The id of the workspace the parsed file belongs to.
* @property {string|null} userId - The id of the user the parsed file belongs to.
* @property {string|null} threadId - The id of the thread the parsed file belongs to.
* @property {string} metadata - The metadata of the parsed file.
* @property {number} tokenCountEstimate - The estimated token count of the parsed file.
*/
export function DnDFileUploaderProvider({
workspace,
threadSlug = null,
children,
}) {
const [files, setFiles] = useState([]); const [files, setFiles] = useState([]);
const [ready, setReady] = useState(false); const [ready, setReady] = useState(false);
const [dragging, setDragging] = useState(false); const [dragging, setDragging] = useState(false);
const { user } = useUser(); const [showWarningModal, setShowWarningModal] = useState(false);
const [isEmbedding, setIsEmbedding] = useState(false);
const [embedProgress, setEmbedProgress] = useState(0);
const [pendingFiles, setPendingFiles] = useState([]);
const [tokenCount, setTokenCount] = useState(0);
const [maxTokens, setMaxTokens] = useState(Number.POSITIVE_INFINITY);
useEffect(() => { useEffect(() => {
System.checkDocumentProcessorOnline().then((status) => setReady(status)); System.checkDocumentProcessorOnline().then((status) => setReady(status));
}, [user]); }, []);
useEffect(() => { useEffect(() => {
window.addEventListener(REMOVE_ATTACHMENT_EVENT, handleRemove); window.addEventListener(REMOVE_ATTACHMENT_EVENT, handleRemove);
window.addEventListener(CLEAR_ATTACHMENTS_EVENT, resetAttachments); window.addEventListener(CLEAR_ATTACHMENTS_EVENT, resetAttachments);
window.addEventListener(PASTE_ATTACHMENT_EVENT, handlePastedAttachment); window.addEventListener(PASTE_ATTACHMENT_EVENT, handlePastedAttachment);
window.addEventListener(
PARSED_FILE_ATTACHMENT_REMOVED_EVENT,
handleRemoveParsedFile
);
return () => { return () => {
window.removeEventListener(REMOVE_ATTACHMENT_EVENT, handleRemove); window.removeEventListener(REMOVE_ATTACHMENT_EVENT, handleRemove);
window.removeEventListener(CLEAR_ATTACHMENTS_EVENT, resetAttachments); window.removeEventListener(CLEAR_ATTACHMENTS_EVENT, resetAttachments);
window.removeEventListener(
PARSED_FILE_ATTACHMENT_REMOVED_EVENT,
handleRemoveParsedFile
);
window.removeEventListener( window.removeEventListener(
PASTE_ATTACHMENT_EVENT, PASTE_ATTACHMENT_EVENT,
handlePastedAttachment handlePastedAttachment
@ -50,6 +82,18 @@ export function DnDFileUploaderProvider({ workspace, children }) {
}; };
}, []); }, []);
/**
* Handles the removal of a parsed file attachment from the uploader queue.
* Only uses the document id to remove the file from the queue
* @param {CustomEvent<{document: ParsedFile}>} event
*/
async function handleRemoveParsedFile(event) {
const { document } = event.detail;
setFiles((prev) =>
prev.filter((prevFile) => prevFile.document.id !== document.id)
);
}
/** /**
* Remove file from uploader queue. * Remove file from uploader queue.
* @param {CustomEvent<{uid: string}>} event * @param {CustomEvent<{uid: string}>} event
@ -112,8 +156,6 @@ export function DnDFileUploaderProvider({ workspace, children }) {
type: "attachment", type: "attachment",
}); });
} else { } else {
// If the user is a default user, we do not want to allow them to upload files.
if (!!user && user.role === "default") continue;
newAccepted.push({ newAccepted.push({
uid: v4(), uid: v4(),
file, file,
@ -149,8 +191,6 @@ export function DnDFileUploaderProvider({ workspace, children }) {
type: "attachment", type: "attachment",
}); });
} else { } else {
// If the user is a default user, we do not want to allow them to upload files.
if (!!user && user.role === "default") continue;
newAccepted.push({ newAccepted.push({
uid: v4(), uid: v4(),
file, file,
@ -170,36 +210,87 @@ export function DnDFileUploaderProvider({ workspace, children }) {
* Embeds attachments that are eligible for embedding - basically files that are not images. * Embeds attachments that are eligible for embedding - basically files that are not images.
* @param {Attachment[]} newAttachments * @param {Attachment[]} newAttachments
*/ */
function embedEligibleAttachments(newAttachments = []) { async function embedEligibleAttachments(newAttachments = []) {
window.dispatchEvent(new CustomEvent(ATTACHMENTS_PROCESSING_EVENT)); window.dispatchEvent(new CustomEvent(ATTACHMENTS_PROCESSING_EVENT));
const promises = []; const promises = [];
const { currentContextTokenCount, contextWindow } =
await Workspace.getParsedFiles(workspace.slug, threadSlug);
const workspaceContextWindow = contextWindow
? Math.floor(contextWindow * Workspace.maxContextWindowLimit)
: Number.POSITIVE_INFINITY;
setMaxTokens(workspaceContextWindow);
let totalTokenCount = currentContextTokenCount;
let batchPendingFiles = [];
for (const attachment of newAttachments) { for (const attachment of newAttachments) {
// Images/attachments are chat specific. // Images/attachments are chat specific.
if (attachment.type === "attachment") continue; if (attachment.type === "attachment") continue;
const formData = new FormData(); const formData = new FormData();
formData.append("file", attachment.file, attachment.file.name); formData.append("file", attachment.file, attachment.file.name);
formData.append("threadSlug", threadSlug || null);
promises.push( promises.push(
Workspace.uploadAndEmbedFile(workspace.slug, formData).then( Workspace.parseFile(workspace.slug, formData).then(
({ response, data }) => { async ({ response, data }) => {
if (!response.ok) {
const updates = {
status: "failed",
error: data?.error ?? null,
};
setFiles((prev) =>
prev.map(
(
/** @type {Attachment} */
prevFile
) =>
prevFile.uid !== attachment.uid
? prevFile
: { ...prevFile, ...updates }
)
);
return;
}
// Will always be one file in the array
/** @type {ParsedFile} */
const file = data.files[0];
// Add token count for this file
// and add it to the batch pending files
totalTokenCount += file.tokenCountEstimate;
batchPendingFiles.push({
attachment,
parsedFileId: file.id,
tokenCount: file.tokenCountEstimate,
});
if (totalTokenCount > workspaceContextWindow) {
setTokenCount(totalTokenCount);
setPendingFiles(batchPendingFiles);
setShowWarningModal(true);
return;
}
// File is within limits, keep in parsed files
const result = { success: true, document: file };
const updates = { const updates = {
status: response.ok ? "success" : "failed", status: result.success ? "added_context" : "failed",
error: data?.error ?? null, error: result.error ?? null,
document: data?.document, document: result.document,
}; };
setFiles((prev) => { setFiles((prev) =>
return prev.map( prev.map(
( (
/** @type {Attachment} */ /** @type {Attachment} */
prevFile prevFile
) => { ) =>
if (prevFile.uid !== attachment.uid) return prevFile; prevFile.uid !== attachment.uid
return { ...prevFile, ...updates }; ? prevFile
} : { ...prevFile, ...updates }
); )
}); );
} }
) )
); );
@ -211,10 +302,117 @@ export function DnDFileUploaderProvider({ workspace, children }) {
); );
} }
// Handle modal actions
const handleCloseModal = async () => {
if (!pendingFiles.length) return;
// Delete all files from this batch
await Workspace.deleteParsedFiles(
workspace.slug,
pendingFiles.map((file) => file.parsedFileId)
);
// Remove all files from this batch from the UI
setFiles((prev) =>
prev.filter(
(prevFile) =>
!pendingFiles.some((file) => file.attachment.uid === prevFile.uid)
)
);
setShowWarningModal(false);
setPendingFiles([]);
setTokenCount(0);
window.dispatchEvent(new CustomEvent(ATTACHMENTS_PROCESSED_EVENT));
};
const handleContinueAnyway = async () => {
if (!pendingFiles.length) return;
const results = pendingFiles.map((file) => ({
success: true,
document: { id: file.parsedFileId },
}));
const fileUpdates = pendingFiles.map((file, i) => ({
uid: file.attachment.uid,
updates: {
status: results[i].success ? "success" : "failed",
error: results[i].error ?? null,
document: results[i].document,
},
}));
setFiles((prev) =>
prev.map((prevFile) => {
const update = fileUpdates.find((f) => f.uid === prevFile.uid);
return update ? { ...prevFile, ...update.updates } : prevFile;
})
);
setShowWarningModal(false);
setPendingFiles([]);
setTokenCount(0);
};
const handleEmbed = async () => {
if (!pendingFiles.length) return;
setIsEmbedding(true);
setEmbedProgress(0);
// Embed all pending files
let completed = 0;
const results = await Promise.all(
pendingFiles.map((file) =>
Workspace.embedParsedFile(workspace.slug, file.parsedFileId).then(
(result) => {
completed++;
setEmbedProgress(completed);
return result;
}
)
)
);
// Update status for all files
const fileUpdates = pendingFiles.map((file, i) => ({
uid: file.attachment.uid,
updates: {
status: results[i].response.ok ? "embedded" : "failed",
error: results[i].data?.error ?? null,
document: results[i].data?.document,
},
}));
setFiles((prev) =>
prev.map((prevFile) => {
const update = fileUpdates.find((f) => f.uid === prevFile.uid);
return update ? { ...prevFile, ...update.updates } : prevFile;
})
);
setShowWarningModal(false);
setPendingFiles([]);
setTokenCount(0);
setIsEmbedding(false);
window.dispatchEvent(new CustomEvent(ATTACHMENTS_PROCESSED_EVENT));
showToast(
`${pendingFiles.length} ${pluralize("file", pendingFiles.length)} embedded successfully`,
"success"
);
};
return ( return (
<DndUploaderContext.Provider <DndUploaderContext.Provider
value={{ files, ready, dragging, setDragging, onDrop, parseAttachments }} value={{ files, ready, dragging, setDragging, onDrop, parseAttachments }}
> >
<FileUploadWarningModal
show={showWarningModal}
onClose={handleCloseModal}
onContinue={handleContinueAnyway}
onEmbed={handleEmbed}
tokenCount={tokenCount}
maxTokens={maxTokens}
fileCount={pendingFiles.length}
isEmbedding={isEmbedding}
embedProgress={embedProgress}
/>
{children} {children}
</DndUploaderContext.Provider> </DndUploaderContext.Provider>
); );
@ -231,8 +429,6 @@ export default function DnDFileUploaderWrapper({ children }) {
onDragEnter: () => setDragging(true), onDragEnter: () => setDragging(true),
onDragLeave: () => setDragging(false), onDragLeave: () => setDragging(false),
}); });
const { user } = useUser();
const canUploadAll = !user || user?.role !== "default";
return ( return (
<div <div
@ -245,22 +441,16 @@ export default function DnDFileUploaderWrapper({ children }) {
> >
<div className="w-full h-full flex justify-center items-center rounded-xl"> <div className="w-full h-full flex justify-center items-center rounded-xl">
<div className="flex flex-col gap-y-[14px] justify-center items-center"> <div className="flex flex-col gap-y-[14px] justify-center items-center">
<img src={DndIcon} width={69} height={69} /> <img
<p className="text-white text-[24px] font-semibold"> src={DndIcon}
Add {canUploadAll ? "anything" : "an image"} width={69}
</p> height={69}
alt="Drag and drop icon"
/>
<p className="text-white text-[24px] font-semibold">Add anything</p>
<p className="text-white text-[16px] text-center"> <p className="text-white text-[16px] text-center">
{canUploadAll ? ( Drop a file or image here to attach it to your <br />
<> workspace auto-magically.
Drop your file here to embed it into your <br />
workspace auto-magically.
</>
) : (
<>
Drop your image here to chat with it <br />
auto-magically.
</>
)}
</p> </p>
</div> </div>
</div> </div>

View File

@ -0,0 +1,197 @@
import { useState } from "react";
import { X, CircleNotch, Warning } from "@phosphor-icons/react";
import Workspace from "@/models/workspace";
import { useParams } from "react-router-dom";
import { nFormatter } from "@/utils/numbers";
import showToast from "@/utils/toast";
import pluralize from "pluralize";
import { PARSED_FILE_ATTACHMENT_REMOVED_EVENT } from "../../../DnDWrapper";
import useUser from "@/hooks/useUser";
export default function ParsedFilesMenu({
onEmbeddingChange,
tooltipRef,
files,
setFiles,
currentTokens,
setCurrentTokens,
contextWindow,
isLoading,
}) {
const { user } = useUser();
const canEmbed = !user || user.role !== "default";
const initialContextWindowLimitExceeded =
contextWindow &&
currentTokens >= contextWindow * Workspace.maxContextWindowLimit;
const { slug, threadSlug = null } = useParams();
const [isEmbedding, setIsEmbedding] = useState(false);
const [embedProgress, setEmbedProgress] = useState(1);
const [contextWindowLimitExceeded, setContextWindowLimitExceeded] = useState(
initialContextWindowLimitExceeded
);
async function handleRemove(e, file) {
e.preventDefault();
e.stopPropagation();
if (!file?.id) return;
const success = await Workspace.deleteParsedFiles(slug, [file.id]);
if (!success) return;
// Update the local files list and current tokens
setFiles((prev) => prev.filter((f) => f.id !== file.id));
// Dispatch an event to the DnDFileUploaderWrapper to update the files list in attachment manager if it exists
window.dispatchEvent(
new CustomEvent(PARSED_FILE_ATTACHMENT_REMOVED_EVENT, {
detail: { document: file },
})
);
const { currentContextTokenCount } = await Workspace.getParsedFiles(
slug,
threadSlug
);
const newContextWindowLimitExceeded =
contextWindow &&
currentContextTokenCount >=
contextWindow * Workspace.maxContextWindowLimit;
setCurrentTokens(currentContextTokenCount);
setContextWindowLimitExceeded(newContextWindowLimitExceeded);
}
/**
* Handles the embedding of the files when the user exceeds the context window limit
* and opts to embed the files into the workspace instead.
* @returns {Promise<void>}
*/
async function handleEmbed() {
if (!files.length) return;
setIsEmbedding(true);
onEmbeddingChange?.(true);
setEmbedProgress(1);
try {
let completed = 0;
await Promise.all(
files.map((file) =>
Workspace.embedParsedFile(slug, file.id).then(() => {
completed++;
setEmbedProgress(completed + 1);
})
)
);
setFiles([]);
const { currentContextTokenCount } = await Workspace.getParsedFiles(
slug,
threadSlug
);
setCurrentTokens(currentContextTokenCount);
setContextWindowLimitExceeded(
currentContextTokenCount >=
contextWindow * Workspace.maxContextWindowLimit
);
showToast(
`${files.length} ${pluralize("file", files.length)} embedded successfully`,
"success"
);
tooltipRef?.current?.close();
} catch (error) {
console.error("Failed to embed files:", error);
showToast("Failed to embed files", "error");
}
setIsEmbedding(false);
onEmbeddingChange?.(false);
setEmbedProgress(1);
}
return (
<div className="flex flex-col gap-2 p-2">
<div className="flex items-center justify-between">
<div className="text-sm font-medium text-theme-text-primary">
Current Context ({files.length} files)
</div>
<div
// If the user cannot see the embed CTA, show a tooltip
{...(contextWindowLimitExceeded &&
!canEmbed && {
"data-tooltip-id": "context-window-limit-exceeded",
"data-tooltip-content":
"You have exceeded the context window limit. Some files may be truncated or excluded from chat responses. Responses may hallucinate or lack relevant information.",
})}
className={`flex items-center gap-x-1 ${contextWindowLimitExceeded && !canEmbed ? "cursor-pointer" : ""}`}
>
{contextWindowLimitExceeded && (
<Warning size={14} className="text-orange-600" />
)}
<div
className={`text-xs ${contextWindowLimitExceeded ? "text-orange-600" : "text-theme-text-secondary"}`}
>
{nFormatter(currentTokens)} /{" "}
{contextWindow ? nFormatter(contextWindow) : "--"} tokens
</div>
</div>
</div>
{contextWindowLimitExceeded && canEmbed && (
<div className="flex flex-col gap-2 p-2 bg-theme-bg-secondary light:bg-theme-bg-primary rounded">
<div className="flex items-start gap-2">
<Warning
className="flex-shrink-0 mt-1 text-yellow-500 light:text-yellow-600"
size={16}
/>
<div className="text-xs text-theme-text-primary">
Your context window is getting full. Some files may be truncated
or excluded from chat responses. We recommend embedding these
files directly into your workspace for better results.
</div>
</div>
<button
onClick={handleEmbed}
disabled={isEmbedding}
className="border-none disabled:opacity-50 flex items-center justify-center gap-2 px-3 py-2 text-xs bg-primary-button hover:bg-theme-button-primary-hover text-white font-medium rounded transition-colors shadow-sm"
>
{isEmbedding ? (
<>
<CircleNotch size={14} className="animate-spin" />
Embedding {embedProgress} of {files.length} files...
</>
) : (
"Embed Files into Workspace"
)}
</button>
</div>
)}
<div className="flex flex-col gap-1 max-h-[300px] overflow-y-auto">
{files.length > 0 &&
files.map((file, i) => (
<div
key={i}
className={
"flex items-center justify-between gap-2 p-2 text-xs bg-theme-bg-secondary rounded"
}
>
<div className="truncate flex-1 text-theme-text-primary">
{file.title}
</div>
<button
onClick={(e) => handleRemove(e, file)}
className="border-none text-theme-text-secondary hover:text-theme-text-primary"
disabled={isEmbedding}
>
<X size={16} />
</button>
</div>
))}
{isLoading && (
<div className="flex items-center justify-center gap-2 text-xs text-theme-text-secondary text-center py-2">
<CircleNotch size={16} className="animate-spin" />
Loading...
</div>
)}
{!isLoading && files.length === 0 && (
<div className="text-xs text-theme-text-secondary text-center py-2">
No files found
</div>
)}
</div>
</div>
);
}

View File

@ -1,7 +1,15 @@
import useUser from "@/hooks/useUser";
import { PaperclipHorizontal } from "@phosphor-icons/react"; import { PaperclipHorizontal } from "@phosphor-icons/react";
import { Tooltip } from "react-tooltip"; import { Tooltip } from "react-tooltip";
import { useTranslation } from "react-i18next"; import { useTranslation } from "react-i18next";
import { useRef, useState, useEffect } from "react";
import { useParams } from "react-router-dom";
import Workspace from "@/models/workspace";
import {
ATTACHMENTS_PROCESSED_EVENT,
REMOVE_ATTACHMENT_EVENT,
} from "../../DnDWrapper";
import { useTheme } from "@/hooks/useTheme";
import ParsedFilesMenu from "./ParsedFilesMenu";
/** /**
* This is a simple proxy component that clicks on the DnD file uploader for the user. * This is a simple proxy component that clicks on the DnD file uploader for the user.
@ -9,35 +17,119 @@ import { useTranslation } from "react-i18next";
*/ */
export default function AttachItem() { export default function AttachItem() {
const { t } = useTranslation(); const { t } = useTranslation();
const { user } = useUser(); const { theme } = useTheme();
if (!!user && user.role === "default") return null; const { slug, threadSlug = null } = useParams();
const tooltipRef = useRef(null);
const [isEmbedding, setIsEmbedding] = useState(false);
const [files, setFiles] = useState([]);
const [currentTokens, setCurrentTokens] = useState(0);
const [contextWindow, setContextWindow] = useState(Infinity);
const [showTooltip, setShowTooltip] = useState(false);
const [isLoading, setIsLoading] = useState(true);
const fetchFiles = () => {
if (!slug) return;
if (isEmbedding) return;
setIsLoading(true);
Workspace.getParsedFiles(slug, threadSlug)
.then(({ files, contextWindow, currentContextTokenCount }) => {
setFiles(files);
setShowTooltip(files.length > 0);
setContextWindow(contextWindow);
setCurrentTokens(currentContextTokenCount);
})
.finally(() => {
setIsLoading(false);
});
};
/**
* Handles the removal of an attachment from the parsed files
* and triggers a re-fetch of the parsed files.
* This function handles when the user clicks the X on an Attachment via the AttachmentManager
* so we need to sync the state in the ParsedFilesMenu picker here.
*/
async function handleRemoveAttachment(e) {
const { document } = e.detail;
await Workspace.deleteParsedFiles(slug, [document.id]);
fetchFiles();
}
/**
* Handles the click event for the attach item button.
* @param {MouseEvent} e - The click event.
* @returns {void}
*/
function handleClick(e) {
e?.target?.blur();
document?.getElementById("dnd-chat-file-uploader")?.click();
return;
}
useEffect(() => {
fetchFiles();
window.addEventListener(ATTACHMENTS_PROCESSED_EVENT, fetchFiles);
window.addEventListener(REMOVE_ATTACHMENT_EVENT, handleRemoveAttachment);
return () => {
window.removeEventListener(ATTACHMENTS_PROCESSED_EVENT, fetchFiles);
window.removeEventListener(
REMOVE_ATTACHMENT_EVENT,
handleRemoveAttachment
);
};
}, [slug, threadSlug]);
return ( return (
<> <>
<button <button
id="attach-item-btn" id="attach-item-btn"
data-tooltip-id="attach-item-btn" data-tooltip-id="tooltip-attach-item-btn"
data-tooltip-content={t("chat_window.attach_file")}
aria-label={t("chat_window.attach_file")} aria-label={t("chat_window.attach_file")}
type="button" type="button"
onClick={(e) => { onClick={handleClick}
e?.target?.blur(); onPointerEnter={fetchFiles}
document?.getElementById("dnd-chat-file-uploader")?.click();
return;
}}
className={`border-none relative flex justify-center items-center opacity-60 hover:opacity-100 light:opacity-100 light:hover:opacity-60 cursor-pointer`} className={`border-none relative flex justify-center items-center opacity-60 hover:opacity-100 light:opacity-100 light:hover:opacity-60 cursor-pointer`}
> >
<PaperclipHorizontal <div className="relative">
color="var(--theme-sidebar-footer-icon-fill)" <PaperclipHorizontal
className="w-[22px] h-[22px] pointer-events-none text-white rotate-90 -scale-y-100" color="var(--theme-sidebar-footer-icon-fill)"
/> className="w-[22px] h-[22px] pointer-events-none text-white rotate-90 -scale-y-100"
/>
{files.length > 0 && (
<div className="absolute -top-2 right-[1%] bg-white text-black light:invert text-[8px] rounded-full px-1 flex items-center justify-center">
{files.length}
</div>
)}
</div>
</button> </button>
<Tooltip {showTooltip && (
id="attach-item-btn" <Tooltip
place="top" ref={tooltipRef}
delayShow={300} id="tooltip-attach-item-btn"
className="tooltip !text-xs z-[99]" place="top"
/> opacity={1}
clickable={!isEmbedding}
delayShow={300}
delayHide={isEmbedding ? 999999 : 800} // Prevent tooltip from hiding during embedding
arrowColor={
theme === "light"
? "var(--theme-modal-border)"
: "var(--theme-bg-primary)"
}
className="z-99 !w-[400px] !bg-theme-bg-primary !px-[5px] !rounded-lg !pointer-events-auto light:border-2 light:border-theme-modal-border"
>
<ParsedFilesMenu
onEmbeddingChange={setIsEmbedding}
tooltipRef={tooltipRef}
isLoading={isLoading}
files={files}
setFiles={setFiles}
currentTokens={currentTokens}
setCurrentTokens={setCurrentTokens}
contextWindow={contextWindow}
/>
</Tooltip>
)}
</> </>
); );
} }

View File

@ -160,7 +160,11 @@ function AttachmentItem({ attachment }) {
<> <>
<div <div
data-tooltip-id={`attachment-uid-${uid}-success`} data-tooltip-id={`attachment-uid-${uid}-success`}
data-tooltip-content={`${file.name} was uploaded and embedded into this workspace. It will be available for RAG chat now.`} data-tooltip-content={
status === "embedded"
? `${file.name} was uploaded and embedded into this workspace. It will be available for RAG chat now.`
: `${file.name} will be used as context for this chat only.`
}
className={`relative flex items-center gap-x-1 rounded-lg bg-theme-attachment-bg border-none w-[180px] group`} className={`relative flex items-center gap-x-1 rounded-lg bg-theme-attachment-bg border-none w-[180px] group`}
> >
<div className="invisible group-hover:visible absolute -top-[5px] -right-[5px] w-fit h-fit z-[10]"> <div className="invisible group-hover:visible absolute -top-[5px] -right-[5px] w-fit h-fit z-[10]">
@ -186,7 +190,7 @@ function AttachmentItem({ attachment }) {
{file.name} {file.name}
</p> </p>
<p className="text-theme-attachment-text-secondary text-[10px] leading-[14px] font-medium"> <p className="text-theme-attachment-text-secondary text-[10px] leading-[14px] font-medium">
File embedded! {status === "embedded" ? "File embedded!" : "Added as context!"}
</p> </p>
</div> </div>
</div> </div>

View File

@ -79,7 +79,7 @@ export default function WorkspaceChat({ loading, workspace }) {
setEventDelegatorForCodeSnippets(); setEventDelegatorForCodeSnippets();
return ( return (
<TTSProvider> <TTSProvider>
<DnDFileUploaderProvider workspace={workspace}> <DnDFileUploaderProvider workspace={workspace} threadSlug={threadSlug}>
<ChatContainer workspace={workspace} knownHistory={history} /> <ChatContainer workspace={workspace} knownHistory={history} />
</DnDFileUploaderProvider> </DnDFileUploaderProvider>
</TTSProvider> </TTSProvider>

View File

@ -1,4 +1,4 @@
import { API_BASE } from "@/utils/constants"; import { API_BASE, fullApiUrl } from "@/utils/constants";
import { baseHeaders, safeJsonParse } from "@/utils/request"; import { baseHeaders, safeJsonParse } from "@/utils/request";
import { fetchEventSource } from "@microsoft/fetch-event-source"; import { fetchEventSource } from "@microsoft/fetch-event-source";
import WorkspaceThread from "@/models/workspaceThread"; import WorkspaceThread from "@/models/workspaceThread";
@ -7,6 +7,8 @@ import { ABORT_STREAM_EVENT } from "@/utils/chat";
const Workspace = { const Workspace = {
workspaceOrderStorageKey: "anythingllm-workspace-order", workspaceOrderStorageKey: "anythingllm-workspace-order",
/** The maximum percentage of the context window that can be used for attachments */
maxContextWindowLimit: 0.8,
new: async function (data = {}) { new: async function (data = {}) {
const { workspace, message } = await fetch(`${API_BASE}/workspace/new`, { const { workspace, message } = await fetch(`${API_BASE}/workspace/new`, {
@ -250,6 +252,28 @@ const Workspace = {
const data = await response.json(); const data = await response.json();
return { response, data }; return { response, data };
}, },
parseFile: async function (slug, formData) {
const response = await fetch(`${API_BASE}/workspace/${slug}/parse`, {
method: "POST",
body: formData,
headers: baseHeaders(),
});
const data = await response.json();
return { response, data };
},
getParsedFiles: async function (slug, threadSlug = null) {
const basePath = new URL(`${fullApiUrl()}/workspace/${slug}/parsed-files`);
if (threadSlug) basePath.searchParams.set("threadSlug", threadSlug);
const response = await fetch(basePath, {
method: "GET",
headers: baseHeaders(),
});
const data = await response.json();
return data;
},
uploadLink: async function (slug, link) { uploadLink: async function (slug, link) {
const response = await fetch(`${API_BASE}/workspace/${slug}/upload-link`, { const response = await fetch(`${API_BASE}/workspace/${slug}/upload-link`, {
method: "POST", method: "POST",
@ -454,6 +478,31 @@ const Workspace = {
return { response, data }; return { response, data };
}, },
deleteParsedFiles: async function (slug, fileIds = []) {
const response = await fetch(
`${API_BASE}/workspace/${slug}/delete-parsed-files`,
{
method: "DELETE",
headers: baseHeaders(),
body: JSON.stringify({ fileIds }),
}
);
return response.ok;
},
embedParsedFile: async function (slug, fileId) {
const response = await fetch(
`${API_BASE}/workspace/${slug}/embed-parsed-file/${fileId}`,
{
method: "POST",
headers: baseHeaders(),
}
);
const data = await response.json();
return { response, data };
},
/** /**
* Deletes and un-embeds a single file in a single call from a workspace * Deletes and un-embeds a single file in a single call from a workspace
* @param {string} slug - workspace slug * @param {string} slug - workspace slug

View File

@ -211,6 +211,13 @@ export default {
}, },
}, },
// TODO: Migrate all docs.anythingllm.com links to the new docs.
documentation: {
contextWindows: () => {
return "https://docs.anythingllm.com/chatting-with-documents/introduction#you-exceed-the-context-window---what-now";
},
},
experimental: { experimental: {
liveDocumentSync: { liveDocumentSync: {
manage: () => `/settings/beta-features/live-document-sync/manage`, manage: () => `/settings/beta-features/live-document-sync/manage`,

1
server/.gitignore vendored
View File

@ -14,6 +14,7 @@ storage/plugins/agent-flows/*
storage/plugins/office-extensions/* storage/plugins/office-extensions/*
storage/plugins/anythingllm_mcp_servers.json storage/plugins/anythingllm_mcp_servers.json
!storage/documents/DOCUMENTS.md !storage/documents/DOCUMENTS.md
storage/direct-uploads
logs/server.log logs/server.log
*.db *.db
*.db-journal *.db-journal

View File

@ -32,14 +32,15 @@ const {
} = require("../utils/files/pfp"); } = require("../utils/files/pfp");
const { getTTSProvider } = require("../utils/TextToSpeech"); const { getTTSProvider } = require("../utils/TextToSpeech");
const { WorkspaceThread } = require("../models/workspaceThread"); const { WorkspaceThread } = require("../models/workspaceThread");
const truncate = require("truncate"); const truncate = require("truncate");
const { purgeDocument } = require("../utils/files/purgeDocument"); const { purgeDocument } = require("../utils/files/purgeDocument");
const { getModelTag } = require("./utils"); const { getModelTag } = require("./utils");
const { searchWorkspaceAndThreads } = require("../utils/helpers/search"); const { searchWorkspaceAndThreads } = require("../utils/helpers/search");
const { workspaceParsedFilesEndpoints } = require("./workspacesParsedFiles");
function workspaceEndpoints(app) { function workspaceEndpoints(app) {
if (!app) return; if (!app) return;
const responseCache = new Map(); const responseCache = new Map();
app.post( app.post(
@ -1060,6 +1061,9 @@ function workspaceEndpoints(app) {
} }
} }
); );
// Parsed Files in separate endpoint just to keep the workspace endpoints clean
workspaceParsedFilesEndpoints(app);
} }
module.exports = { workspaceEndpoints }; module.exports = { workspaceEndpoints };

View File

@ -0,0 +1,199 @@
const { reqBody, multiUserMode, userFromSession } = require("../utils/http");
const { handleFileUpload } = require("../utils/files/multer");
const { validatedRequest } = require("../utils/middleware/validatedRequest");
const { Telemetry } = require("../models/telemetry");
const {
flexUserRoleValid,
ROLES,
} = require("../utils/middleware/multiUserProtected");
const { EventLogs } = require("../models/eventLogs");
const { validWorkspaceSlug } = require("../utils/middleware/validWorkspace");
const { CollectorApi } = require("../utils/collectorApi");
const { WorkspaceThread } = require("../models/workspaceThread");
const { WorkspaceParsedFiles } = require("../models/workspaceParsedFiles");
function workspaceParsedFilesEndpoints(app) {
if (!app) return;
app.get(
"/workspace/:slug/parsed-files",
[validatedRequest, flexUserRoleValid([ROLES.all]), validWorkspaceSlug],
async (request, response) => {
try {
const threadSlug = request.query.threadSlug || null;
const user = await userFromSession(request, response);
const workspace = response.locals.workspace;
const thread = threadSlug
? await WorkspaceThread.get({ slug: String(threadSlug) })
: null;
const { files, contextWindow, currentContextTokenCount } =
await WorkspaceParsedFiles.getContextMetadataAndLimits(
workspace,
thread || null,
multiUserMode(response) ? user : null
);
return response
.status(200)
.json({ files, contextWindow, currentContextTokenCount });
} catch (e) {
console.error(e.message, e);
return response.sendStatus(500).end();
}
}
);
app.delete(
"/workspace/:slug/delete-parsed-files",
[validatedRequest, flexUserRoleValid([ROLES.all]), validWorkspaceSlug],
async function (request, response) {
try {
const { fileIds = [] } = reqBody(request);
if (!fileIds.length) return response.sendStatus(400).end();
const success = await WorkspaceParsedFiles.delete({
id: { in: fileIds.map((id) => parseInt(id)) },
});
return response.status(success ? 200 : 500).end();
} catch (e) {
console.error(e.message, e);
return response.sendStatus(500).end();
}
}
);
app.post(
"/workspace/:slug/embed-parsed-file/:fileId",
[
validatedRequest,
// Embed is still an admin/manager only feature
flexUserRoleValid([ROLES.admin, ROLES.manager]),
validWorkspaceSlug,
],
async function (request, response) {
const { fileId = null } = request.params;
try {
const user = await userFromSession(request, response);
const workspace = response.locals.workspace;
if (!fileId) return response.sendStatus(400).end();
const { success, error, document } =
await WorkspaceParsedFiles.moveToDocumentsAndEmbed(fileId, workspace);
if (!success) {
return response.status(500).json({
success: false,
error: error || "Failed to embed file",
});
}
await Telemetry.sendTelemetry("document_embedded");
await EventLogs.logEvent(
"document_embedded",
{
documentName: document?.name || "unknown",
workspaceId: workspace.id,
},
user?.id
);
return response.status(200).json({
success: true,
error: null,
document,
});
} catch (e) {
console.error(e.message, e);
return response.sendStatus(500).end();
} finally {
if (!fileId) return;
await WorkspaceParsedFiles.delete({ id: parseInt(fileId) });
}
}
);
app.post(
"/workspace/:slug/parse",
[
validatedRequest,
flexUserRoleValid([ROLES.all]),
handleFileUpload,
validWorkspaceSlug,
],
async function (request, response) {
try {
const user = await userFromSession(request, response);
const workspace = response.locals.workspace;
const Collector = new CollectorApi();
const { originalname } = request.file;
const processingOnline = await Collector.online();
if (!processingOnline) {
return response.status(500).json({
success: false,
error: `Document processing API is not online. Document ${originalname} will not be parsed.`,
});
}
const { success, reason, documents } =
await Collector.parseDocument(originalname);
if (!success || !documents?.[0]) {
return response.status(500).json({
success: false,
error: reason || "No document returned from collector",
});
}
// Get thread ID if we have a slug
const { threadSlug = null } = reqBody(request);
const thread = threadSlug
? await WorkspaceThread.get({
slug: String(threadSlug),
workspace_id: workspace.id,
user_id: user?.id || null,
})
: null;
const files = await Promise.all(
documents.map(async (doc) => {
const metadata = { ...doc };
// Strip out pageContent
delete metadata.pageContent;
const filename = `${originalname}-${doc.id}.json`;
const { file, error: dbError } = await WorkspaceParsedFiles.create({
filename,
workspaceId: workspace.id,
userId: user?.id || null,
threadId: thread?.id || null,
metadata: JSON.stringify(metadata),
tokenCountEstimate: doc.token_count_estimate || 0,
});
if (dbError) throw new Error(dbError);
return file;
})
);
Collector.log(`Document ${originalname} parsed successfully.`);
await EventLogs.logEvent(
"document_uploaded_to_chat",
{
documentName: originalname,
workspace: workspace.slug,
thread: thread?.name || null,
},
user?.id
);
return response.status(200).json({
success: true,
error: null,
files,
});
} catch (e) {
console.error(e.message, e);
return response.sendStatus(500).end();
}
}
);
}
module.exports = { workspaceParsedFilesEndpoints };

View File

@ -0,0 +1,64 @@
const fs = require('fs');
const path = require('path');
const { log, conclude } = require('./helpers/index.js');
const { WorkspaceParsedFiles } = require('../models/workspaceParsedFiles.js');
const { directUploadsPath } = require('../utils/files');
async function batchDeleteFiles(filesToDelete, batchSize = 500) {
let deletedCount = 0;
let failedCount = 0;
for (let i = 0; i < filesToDelete.length; i += batchSize) {
const batch = filesToDelete.slice(i, i + batchSize);
try {
await Promise.all(batch.map(filePath => fs.unlink(filePath)));
deletedCount += batch.length;
log(`Deleted batch ${Math.floor(i / batchSize) + 1}: ${batch.length} files`);
} catch (err) {
// If batch fails, try individual files sync
for (const filePath of batch) {
try {
fs.unlinkSync(filePath);
deletedCount++;
} catch (fileErr) {
failedCount++;
log(`Failed to delete ${filePath}: ${fileErr.message}`);
}
}
}
}
return { deletedCount, failedCount };
}
(async () => {
try {
const filesToDelete = [];
const knownFiles = await WorkspaceParsedFiles
.where({}, null, null, { filename: true })
.then(files => new Set(files.map(f => f.filename)));
if (!fs.existsSync(directUploadsPath)) return log('No direct uploads path found - exiting.');
const filesInDirectUploadsPath = fs.readdirSync(directUploadsPath);
if (filesInDirectUploadsPath.length === 0) return;
for (let i = 0; i < filesInDirectUploadsPath.length; i++) {
const file = filesInDirectUploadsPath[i];
if (knownFiles.has(file)) continue;
filesToDelete.push(path.resolve(directUploadsPath, file));
}
if (filesToDelete.length === 0) return; // No orphaned files to delete
log(`Found ${filesToDelete.length} orphaned files to delete`);
const { deletedCount, failedCount } = await batchDeleteFiles(filesToDelete);
log(`Deleted ${deletedCount} orphaned files`);
if (failedCount > 0) log(`Failed to delete ${failedCount} files`);
} catch (e) {
console.error(e)
log(`errored with ${e.message}`)
} finally {
conclude();
}
})();

View File

@ -11,7 +11,7 @@ const Telemetry = {
pubkey: "phc_9qu7QLpV8L84P3vFmEiZxL020t2EqIubP7HHHxrSsqS", pubkey: "phc_9qu7QLpV8L84P3vFmEiZxL020t2EqIubP7HHHxrSsqS",
stubDevelopmentEvents: true, // [DO NOT TOUCH] Core team only. stubDevelopmentEvents: true, // [DO NOT TOUCH] Core team only.
label: "telemetry_id", label: "telemetry_id",
/* /*
Key value pairs of events that should be debounced to prevent spamming the logs. Key value pairs of events that should be debounced to prevent spamming the logs.
This should be used for events that could be triggered in rapid succession that are not useful to atomically log. This should be used for events that could be triggered in rapid succession that are not useful to atomically log.
The value is the number of seconds to debounce the event The value is the number of seconds to debounce the event
@ -27,6 +27,7 @@ const Telemetry = {
documents_embedded_in_workspace: 30, documents_embedded_in_workspace: 30,
link_uploaded: 30, link_uploaded: 30,
raw_document_uploaded: 30, raw_document_uploaded: 30,
document_parsed: 30,
}, },
id: async function () { id: async function () {

View File

@ -283,6 +283,10 @@ const Workspace = {
return { return {
...workspace, ...workspace,
documents: await Document.forWorkspace(workspace.id), documents: await Document.forWorkspace(workspace.id),
contextWindow: this._getContextWindow(workspace),
currentContextTokenCount: await this._getCurrentContextTokenCount(
workspace.id
),
}; };
} catch (error) { } catch (error) {
console.error(error.message); console.error(error.message);
@ -290,6 +294,42 @@ const Workspace = {
} }
}, },
/**
* Get the total token count of all parsed files in a workspace/thread
* @param {number} workspaceId - The ID of the workspace
* @param {number|null} threadId - Optional thread ID to filter by
* @returns {Promise<number>} Total token count of all files
* @private
*/
async _getCurrentContextTokenCount(workspaceId, threadId = null) {
const { WorkspaceParsedFiles } = require("./workspaceParsedFiles");
return await WorkspaceParsedFiles.totalTokenCount({
workspaceId: Number(workspaceId),
threadId: threadId ? Number(threadId) : null,
});
},
/**
* Get the context window size for a workspace based on its provider and model settings.
* If the workspace has no provider/model set, falls back to system defaults.
* @param {Workspace} workspace - The workspace to get context window for
* @returns {number|null} The context window size in tokens (defaults to null if no provider/model found)
* @private
*/
_getContextWindow: function (workspace) {
const {
getLLMProviderClass,
getBaseLLMProviderModel,
} = require("../utils/helpers");
const provider = workspace.chatProvider || process.env.LLM_PROVIDER || null;
const LLMProvider = getLLMProviderClass({ provider });
const model =
workspace.chatModel || getBaseLLMProviderModel({ provider }) || null;
if (!provider || !model) return null;
return LLMProvider?.promptWindowLimit?.(model) || null;
},
get: async function (clause = {}) { get: async function (clause = {}) {
try { try {
const workspace = await prisma.workspaces.findFirst({ const workspace = await prisma.workspaces.findFirst({
@ -299,7 +339,14 @@ const Workspace = {
}, },
}); });
return workspace || null; if (!workspace) return null;
return {
...workspace,
contextWindow: this._getContextWindow(workspace),
currentContextTokenCount: await this._getCurrentContextTokenCount(
workspace.id
),
};
} catch (error) { } catch (error) {
console.error(error.message); console.error(error.message);
return null; return null;

View File

@ -0,0 +1,227 @@
const prisma = require("../utils/prisma");
const { EventLogs } = require("./eventLogs");
const { Document } = require("./documents");
const { documentsPath, directUploadsPath } = require("../utils/files");
const { safeJsonParse } = require("../utils/http");
const fs = require("fs");
const path = require("path");
const WorkspaceParsedFiles = {
create: async function ({
filename,
workspaceId,
userId = null,
threadId = null,
metadata = null,
tokenCountEstimate = 0,
}) {
try {
const file = await prisma.workspace_parsed_files.create({
data: {
filename,
workspaceId: parseInt(workspaceId),
userId: userId ? parseInt(userId) : null,
threadId: threadId ? parseInt(threadId) : null,
metadata,
tokenCountEstimate,
},
});
await EventLogs.logEvent(
"workspace_file_uploaded",
{
filename,
workspaceId,
},
userId
);
return { file, error: null };
} catch (error) {
console.error("FAILED TO CREATE PARSED FILE RECORD.", error.message);
return { file: null, error: error.message };
}
},
get: async function (clause = {}) {
try {
const file = await prisma.workspace_parsed_files.findFirst({
where: clause,
});
return file;
} catch (error) {
console.error(error.message);
return null;
}
},
where: async function (
clause = {},
limit = null,
orderBy = null,
select = null
) {
try {
const files = await prisma.workspace_parsed_files.findMany({
where: clause,
...(limit !== null ? { take: limit } : {}),
...(orderBy !== null ? { orderBy } : {}),
...(select !== null ? { select } : {}),
});
return files;
} catch (error) {
console.error(error.message);
return [];
}
},
delete: async function (clause = {}) {
try {
await prisma.workspace_parsed_files.deleteMany({
where: clause,
});
return true;
} catch (error) {
console.error(error.message);
return false;
}
},
totalTokenCount: async function (clause = {}) {
const { _sum } = await prisma.workspace_parsed_files.aggregate({
where: clause,
_sum: { tokenCountEstimate: true },
});
return _sum.tokenCountEstimate || 0;
},
moveToDocumentsAndEmbed: async function (fileId, workspace) {
try {
const parsedFile = await this.get({ id: parseInt(fileId) });
if (!parsedFile) throw new Error("File not found");
// Get file location from metadata
const metadata = safeJsonParse(parsedFile.metadata, {});
const location = metadata.location;
if (!location) throw new Error("No file location in metadata");
// Get file from metadata location
const sourceFile = path.join(directUploadsPath, location.split("/")[1]);
if (!fs.existsSync(sourceFile)) throw new Error("Source file not found");
// Move to custom-documents
const customDocsPath = path.join(documentsPath, "custom-documents");
if (!fs.existsSync(customDocsPath))
fs.mkdirSync(customDocsPath, { recursive: true });
// Copy the file to custom-documents
const targetPath = path.join(customDocsPath, location.split("/")[1]);
fs.copyFileSync(sourceFile, targetPath);
fs.unlinkSync(sourceFile);
const {
failedToEmbed = [],
errors = [],
embedded = [],
} = await Document.addDocuments(
workspace,
[`custom-documents/${location.split("/")[1]}`],
parsedFile.userId
);
if (failedToEmbed.length > 0)
throw new Error(errors[0] || "Failed to embed document");
const document = await Document.get({
workspaceId: workspace.id,
docpath: embedded[0],
});
return { success: true, error: null, document };
} catch (error) {
console.error("Failed to move and embed file:", error);
return { success: false, error: error.message, document: null };
} finally {
// Always delete the file after processing
await this.delete({ id: parseInt(fileId) });
}
},
getContextMetadataAndLimits: async function (
workspace,
thread = null,
user = null
) {
try {
if (!workspace) throw new Error("Workspace is required");
const files = await this.where({
workspaceId: workspace.id,
threadId: thread?.id || null,
...(user ? { userId: user.id } : {}),
});
const results = [];
let totalTokens = 0;
for (const file of files) {
const metadata = safeJsonParse(file.metadata, {});
totalTokens += file.tokenCountEstimate || 0;
results.push({
id: file.id,
title: metadata.title || metadata.location,
location: metadata.location,
token_count_estimate: file.tokenCountEstimate,
});
}
return {
files: results,
contextWindow: workspace.contextWindow,
currentContextTokenCount: totalTokens,
};
} catch (error) {
console.error("Failed to get context metadata:", error);
return {
files: [],
contextWindow: Infinity,
currentContextTokenCount: 0,
};
}
},
getContextFiles: async function (workspace, thread = null, user = null) {
try {
const files = await this.where({
workspaceId: workspace.id,
threadId: thread?.id || null,
...(user ? { userId: user.id } : {}),
});
const results = [];
for (const file of files) {
const metadata = safeJsonParse(file.metadata, {});
const location = metadata.location;
if (!location) continue;
const sourceFile = path.join(directUploadsPath, location.split("/")[1]);
if (!fs.existsSync(sourceFile)) continue;
const content = fs.readFileSync(sourceFile, "utf-8");
const data = safeJsonParse(content, null);
if (!data?.pageContent) continue;
results.push({
pageContent: data.pageContent,
token_count_estimate: file.tokenCountEstimate,
...metadata,
});
}
return results;
} catch (error) {
console.error("Failed to get context files:", error);
return [];
}
},
};
module.exports = { WorkspaceParsedFiles };

View File

@ -0,0 +1,23 @@
-- CreateTable
CREATE TABLE "workspace_parsed_files" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"filename" TEXT NOT NULL,
"workspaceId" INTEGER NOT NULL,
"userId" INTEGER,
"threadId" INTEGER,
"metadata" TEXT,
"tokenCountEstimate" INTEGER DEFAULT 0,
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "workspace_parsed_files_workspaceId_fkey" FOREIGN KEY ("workspaceId") REFERENCES "workspaces" ("id") ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT "workspace_parsed_files_userId_fkey" FOREIGN KEY ("userId") REFERENCES "users" ("id") ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT "workspace_parsed_files_threadId_fkey" FOREIGN KEY ("threadId") REFERENCES "workspace_threads" ("id") ON DELETE CASCADE ON UPDATE CASCADE
);
-- CreateIndex
CREATE UNIQUE INDEX "workspace_parsed_files_filename_key" ON "workspace_parsed_files"("filename");
-- CreateIndex
CREATE INDEX "workspace_parsed_files_workspaceId_idx" ON "workspace_parsed_files"("workspaceId");
-- CreateIndex
CREATE INDEX "workspace_parsed_files_userId_idx" ON "workspace_parsed_files"("userId");

View File

@ -83,6 +83,7 @@ model users {
system_prompt_variables system_prompt_variables[] system_prompt_variables system_prompt_variables[]
prompt_history prompt_history[] prompt_history prompt_history[]
desktop_mobile_devices desktop_mobile_devices[] desktop_mobile_devices desktop_mobile_devices[]
workspace_parsed_files workspace_parsed_files[]
} }
model recovery_codes { model recovery_codes {
@ -149,18 +150,20 @@ model workspaces {
threads workspace_threads[] threads workspace_threads[]
workspace_agent_invocations workspace_agent_invocations[] workspace_agent_invocations workspace_agent_invocations[]
prompt_history prompt_history[] prompt_history prompt_history[]
workspace_parsed_files workspace_parsed_files[]
} }
model workspace_threads { model workspace_threads {
id Int @id @default(autoincrement()) id Int @id @default(autoincrement())
name String name String
slug String @unique slug String @unique
workspace_id Int workspace_id Int
user_id Int? user_id Int?
createdAt DateTime @default(now()) createdAt DateTime @default(now())
lastUpdatedAt DateTime @default(now()) lastUpdatedAt DateTime @default(now())
workspace workspaces @relation(fields: [workspace_id], references: [id], onDelete: Cascade) workspace workspaces @relation(fields: [workspace_id], references: [id], onDelete: Cascade)
user users? @relation(fields: [user_id], references: [id], onDelete: Cascade) user users? @relation(fields: [user_id], references: [id], onDelete: Cascade)
workspace_parsed_files workspace_parsed_files[]
@@index([workspace_id]) @@index([workspace_id])
@@index([user_id]) @@index([user_id])
@ -371,3 +374,20 @@ model desktop_mobile_devices {
@@index([userId]) @@index([userId])
} }
model workspace_parsed_files {
id Int @id @default(autoincrement())
filename String @unique
workspaceId Int
userId Int?
threadId Int?
metadata String?
tokenCountEstimate Int? @default(0)
createdAt DateTime @default(now())
workspace workspaces @relation(fields: [workspaceId], references: [id], onDelete: Cascade)
user users? @relation(fields: [userId], references: [id], onDelete: Cascade)
thread workspace_threads? @relation(fields: [threadId], references: [id], onDelete: Cascade)
@@index([workspaceId])
@@index([userId])
}

View File

@ -6,8 +6,26 @@ const setLogger = require("../logger");
class BackgroundService { class BackgroundService {
name = "BackgroundWorkerService"; name = "BackgroundWorkerService";
static _instance = null; static _instance = null;
documentSyncEnabled = false;
#root = path.resolve(__dirname, "../../jobs"); #root = path.resolve(__dirname, "../../jobs");
#alwaysRunJobs = [
{
name: "cleanup-orphan-documents",
timeout: "1m",
interval: "12hr",
},
];
#documentSyncJobs = [
// Job for auto-sync of documents
// https://github.com/breejs/bree
{
name: "sync-watched-documents",
interval: "1hr",
},
];
constructor() { constructor() {
if (BackgroundService._instance) { if (BackgroundService._instance) {
this.#log("SINGLETON LOCK: Using existing BackgroundService."); this.#log("SINGLETON LOCK: Using existing BackgroundService.");
@ -24,16 +42,14 @@ class BackgroundService {
async boot() { async boot() {
const { DocumentSyncQueue } = require("../../models/documentSyncQueue"); const { DocumentSyncQueue } = require("../../models/documentSyncQueue");
if (!(await DocumentSyncQueue.enabled())) { this.documentSyncEnabled = await DocumentSyncQueue.enabled();
this.#log("Feature is not enabled and will not be started."); const jobsToRun = this.jobs();
return;
}
this.#log("Starting..."); this.#log("Starting...");
this.bree = new Bree({ this.bree = new Bree({
logger: this.logger, logger: this.logger,
root: this.#root, root: this.#root,
jobs: this.jobs(), jobs: jobsToRun,
errorHandler: this.onError, errorHandler: this.onError,
workerMessageHandler: this.onWorkerMessageHandler, workerMessageHandler: this.onWorkerMessageHandler,
runJobsAs: "process", runJobsAs: "process",
@ -41,7 +57,10 @@ class BackgroundService {
this.graceful = new Graceful({ brees: [this.bree], logger: this.logger }); this.graceful = new Graceful({ brees: [this.bree], logger: this.logger });
this.graceful.listen(); this.graceful.listen();
this.bree.start(); this.bree.start();
this.#log("Service started"); this.#log(
`Service started with ${jobsToRun.length} jobs`,
jobsToRun.map((j) => j.name)
);
} }
async stop() { async stop() {
@ -54,14 +73,9 @@ class BackgroundService {
/** @returns {import("@mintplex-labs/bree").Job[]} */ /** @returns {import("@mintplex-labs/bree").Job[]} */
jobs() { jobs() {
return [ const activeJobs = [...this.#alwaysRunJobs];
// Job for auto-sync of documents if (this.documentSyncEnabled) activeJobs.push(...this.#documentSyncJobs);
// https://github.com/breejs/bree return activeJobs;
{
name: "sync-watched-documents",
interval: "1hr",
},
];
} }
onError(error, _workerMetadata) { onError(error, _workerMetadata) {

View File

@ -1,6 +1,7 @@
const { v4: uuidv4 } = require("uuid"); const { v4: uuidv4 } = require("uuid");
const { DocumentManager } = require("../DocumentManager"); const { DocumentManager } = require("../DocumentManager");
const { WorkspaceChats } = require("../../models/workspaceChats"); const { WorkspaceChats } = require("../../models/workspaceChats");
const { WorkspaceParsedFiles } = require("../../models/workspaceParsedFiles");
const { getVectorDbClass, getLLMProvider } = require("../helpers"); const { getVectorDbClass, getLLMProvider } = require("../helpers");
const { writeResponseChunk } = require("../helpers/chat/responses"); const { writeResponseChunk } = require("../helpers/chat/responses");
const { grepAgents } = require("./agents"); const { grepAgents } = require("./agents");
@ -130,6 +131,22 @@ async function streamChatWithWorkspace(
}); });
}); });
// Inject any parsed files for this workspace/thread/user
const parsedFiles = await WorkspaceParsedFiles.getContextFiles(
workspace,
thread || null,
user || null
);
parsedFiles.forEach((doc) => {
const { pageContent, ...metadata } = doc;
contextTexts.push(doc.pageContent);
sources.push({
text:
pageContent.slice(0, 1_000) + "...continued on in source document...",
...metadata,
});
});
const vectorSearchResults = const vectorSearchResults =
embeddingsCount !== 0 embeddingsCount !== 0
? await VectorDb.performSimilaritySearch({ ? await VectorDb.performSimilaritySearch({

View File

@ -232,6 +232,42 @@ class CollectorApi {
return { success: false, content: null }; return { success: false, content: null };
}); });
} }
/**
* Parse a document without processing it
* - Will append the options to the request body
* @param {string} filename - The filename of the document to parse
* @returns {Promise<Object>} - The response from the collector API
*/
async parseDocument(filename = "") {
if (!filename) return false;
const data = JSON.stringify({
filename,
options: this.#attachOptions(),
});
return await fetch(`${this.endpoint}/parse`, {
method: "POST",
headers: {
"Content-Type": "application/json",
"X-Integrity": this.comkey.sign(data),
"X-Payload-Signer": this.comkey.encrypt(
new EncryptionManager().xPayload
),
},
body: data,
})
.then((res) => {
if (!res.ok) throw new Error("Response could not be completed");
return res.json();
})
.then((res) => res)
.catch((e) => {
this.log(e.message);
return { success: false, reason: e.message, documents: [] };
});
}
} }
module.exports.CollectorApi = CollectorApi; module.exports.CollectorApi = CollectorApi;

View File

@ -7,6 +7,10 @@ const documentsPath =
process.env.NODE_ENV === "development" process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../storage/documents`) ? path.resolve(__dirname, `../../storage/documents`)
: path.resolve(process.env.STORAGE_DIR, `documents`); : path.resolve(process.env.STORAGE_DIR, `documents`);
const directUploadsPath =
process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../storage/direct-uploads`)
: path.resolve(process.env.STORAGE_DIR, `direct-uploads`);
const vectorCachePath = const vectorCachePath =
process.env.NODE_ENV === "development" process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../storage/vector-cache`) ? path.resolve(__dirname, `../../storage/vector-cache`)
@ -468,6 +472,7 @@ module.exports = {
normalizePath, normalizePath,
isWithin, isWithin,
documentsPath, documentsPath,
directUploadsPath,
hasVectorCachedFiles, hasVectorCachedFiles,
purgeEntireVectorCache, purgeEntireVectorCache,
getDocumentsByFolder, getDocumentsByFolder,

View File

@ -402,19 +402,19 @@ function getBaseLLMProviderModel({ provider = null } = {}) {
case "koboldcpp": case "koboldcpp":
return process.env.KOBOLD_CPP_MODEL_PREF; return process.env.KOBOLD_CPP_MODEL_PREF;
case "textgenwebui": case "textgenwebui":
return process.env.TEXT_GEN_WEB_UI_API_KEY; return null;
case "cohere": case "cohere":
return process.env.COHERE_MODEL_PREF; return process.env.COHERE_MODEL_PREF;
case "litellm": case "litellm":
return process.env.LITE_LLM_MODEL_PREF; return process.env.LITE_LLM_MODEL_PREF;
case "generic-openai": case "generic-openai":
return process.env.GENERIC_OPEN_AI_EMBEDDING_API_KEY; return process.env.GENERIC_OPEN_AI_MODEL_PREF;
case "bedrock": case "bedrock":
return process.env.AWS_BEDROCK_LLM_MODEL_PREFERENCE; return process.env.AWS_BEDROCK_LLM_MODEL_PREFERENCE;
case "deepseek": case "deepseek":
return process.env.DEEPSEEK_MODEL_PREF; return process.env.DEEPSEEK_MODEL_PREF;
case "apipie": case "apipie":
return process.env.APIPIE_LLM_API_KEY; return process.env.APIPIE_LLM_MODEL_PREF;
case "novita": case "novita":
return process.env.NOVITA_LLM_MODEL_PREF; return process.env.NOVITA_LLM_MODEL_PREF;
case "xai": case "xai":
@ -422,7 +422,7 @@ function getBaseLLMProviderModel({ provider = null } = {}) {
case "nvidia-nim": case "nvidia-nim":
return process.env.NVIDIA_NIM_LLM_MODEL_PREF; return process.env.NVIDIA_NIM_LLM_MODEL_PREF;
case "ppio": case "ppio":
return process.env.PPIO_API_KEY; return process.env.PPIO_MODEL_PREF;
case "dpais": case "dpais":
return process.env.DPAIS_LLM_MODEL_PREF; return process.env.DPAIS_LLM_MODEL_PREF;
case "moonshotai": case "moonshotai":