Workspace Chat with documents overhaul (#4261)
* Create parse endpoint in collector (#4212) * create parse endpoint in collector * revert cleanup temp util call * lint * remove unused cleanupTempDocuments function * revert slug change minor change for destinations --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> * Add parsed files table and parse server endpoints (#4222) * add workspace_parsed_files table + parse endpoints/models * remove dev api parse endpoint * remove unneeded imports * iterate over all files + remove unneeded update function + update telemetry debounce * Upload UI/UX context window check + frontend alert (#4230) * prompt user to embed if exceeds prompt window + handle embed + handle cancel * add tokenCountEstimate to workspace_parsed_files + optimizations * use util for path locations + use safeJsonParse * add modal for user decision on overflow of context window * lint * dynamic fetching of provider/model combo + inject parsed documents * remove unneeded comments * popup ui for attaching/removing files + warning to embed + wip fetching states on update * remove prop drilling, fetch files/limits directly in attach files popup * rework ux of FE + BE optimizations * fix ux of FE + BE optimizations * Implement bidirectional sync for parsed file states linting small changes and comments * move parse support to another endpoint file simplify calls and loading of records * button borders * enable default users to upload parsed files but NOT embed * delete cascade on user/workspace/thread deletion to remove parsedFileRecord * enable bgworker with "always" jobs and optional document sync jobs orphan document job: Will find any broken reference files to prevent overpollution of the storage folder. This will run 10s after boot and every 12hr after * change run timeout for orphan job to 1m to allow settling before spawning a worker * linting and cleanup pr --------- Co-authored-by: Timothy Carambat <rambat1010@gmail.com> * dev build * fix tooltip hiding during embedding overflow files * prevent crash log from ERRNO on parse files * unused import * update docs link * Migrate parsed-files to GET endpoint patch logic for grabbing models names from utils better handling for undetermined context windows (null instead of Pos_INIFI) UI placeholder for null context windows * patch URL --------- Co-authored-by: Sean Hatfield <seanhatfield5@gmail.com>
This commit is contained in:
parent
9451cd596f
commit
0fb33736da
2
.github/workflows/dev-build.yaml
vendored
2
.github/workflows/dev-build.yaml
vendored
@ -6,7 +6,7 @@ concurrency:
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ['mobile-support'] # put your current branch to create a build. Core team only.
|
||||
branches: ['upload-ui-ux'] # put your current branch to create a build. Core team only.
|
||||
paths-ignore:
|
||||
- '**.md'
|
||||
- 'cloud-deployments/*'
|
||||
|
||||
@ -58,6 +58,39 @@ app.post(
|
||||
}
|
||||
);
|
||||
|
||||
app.post(
|
||||
"/parse",
|
||||
[verifyPayloadIntegrity],
|
||||
async function (request, response) {
|
||||
const { filename, options = {} } = reqBody(request);
|
||||
try {
|
||||
const targetFilename = path
|
||||
.normalize(filename)
|
||||
.replace(/^(\.\.(\/|\\|$))+/, "");
|
||||
const {
|
||||
success,
|
||||
reason,
|
||||
documents = [],
|
||||
} = await processSingleFile(targetFilename, {
|
||||
...options,
|
||||
parseOnly: true,
|
||||
});
|
||||
response
|
||||
.status(200)
|
||||
.json({ filename: targetFilename, success, reason, documents });
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
response.status(200).json({
|
||||
filename: filename,
|
||||
success: false,
|
||||
reason: "A processing error occurred.",
|
||||
documents: [],
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
);
|
||||
|
||||
app.post(
|
||||
"/process-link",
|
||||
[verifyPayloadIntegrity],
|
||||
|
||||
@ -62,6 +62,7 @@ async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
options: { parseOnly: options.parseOnly },
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(
|
||||
|
||||
@ -8,7 +8,7 @@ const {
|
||||
const { tokenizeString } = require("../../utils/tokenizer");
|
||||
const { default: slugify } = require("slugify");
|
||||
|
||||
async function asDocX({ fullFilePath = "", filename = "" }) {
|
||||
async function asDocX({ fullFilePath = "", filename = "", options = {} }) {
|
||||
const loader = new DocxLoader(fullFilePath);
|
||||
|
||||
console.log(`-- Working ${filename} --`);
|
||||
@ -48,6 +48,7 @@ async function asDocX({ fullFilePath = "", filename = "" }) {
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
options: { parseOnly: options.parseOnly },
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
|
||||
@ -8,7 +8,7 @@ const {
|
||||
} = require("../../utils/files");
|
||||
const { default: slugify } = require("slugify");
|
||||
|
||||
async function asEPub({ fullFilePath = "", filename = "" }) {
|
||||
async function asEPub({ fullFilePath = "", filename = "", options = {} }) {
|
||||
let content = "";
|
||||
try {
|
||||
const loader = new EPubLoader(fullFilePath, { splitChapters: false });
|
||||
@ -46,6 +46,7 @@ async function asEPub({ fullFilePath = "", filename = "" }) {
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
options: { parseOnly: options.parseOnly },
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
|
||||
@ -41,6 +41,7 @@ async function asImage({ fullFilePath = "", filename = "", options = {} }) {
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
options: { parseOnly: options.parseOnly },
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
|
||||
@ -60,6 +60,7 @@ async function asMbox({ fullFilePath = "", filename = "" }) {
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
filename: `${slugify(filename)}-${data.id}-msg-${item}`,
|
||||
options: { parseOnly: options.parseOnly },
|
||||
});
|
||||
documents.push(document);
|
||||
}
|
||||
|
||||
@ -44,6 +44,7 @@ async function asOfficeMime({ fullFilePath = "", filename = "" }) {
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
options: { parseOnly: options.parseOnly },
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
|
||||
@ -65,6 +65,7 @@ async function asPdf({ fullFilePath = "", filename = "", options = {} }) {
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
options: { parseOnly: options.parseOnly },
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
|
||||
@ -8,7 +8,7 @@ const {
|
||||
} = require("../../utils/files");
|
||||
const { default: slugify } = require("slugify");
|
||||
|
||||
async function asTxt({ fullFilePath = "", filename = "" }) {
|
||||
async function asTxt({ fullFilePath = "", filename = "", options = {} }) {
|
||||
let content = "";
|
||||
try {
|
||||
content = fs.readFileSync(fullFilePath, "utf8");
|
||||
@ -44,6 +44,7 @@ async function asTxt({ fullFilePath = "", filename = "" }) {
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
options: { parseOnly: options.parseOnly },
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
|
||||
@ -7,6 +7,7 @@ const {
|
||||
trashFile,
|
||||
writeToServerDocuments,
|
||||
documentsFolder,
|
||||
directUploadsFolder,
|
||||
} = require("../../utils/files");
|
||||
const { tokenizeString } = require("../../utils/tokenizer");
|
||||
const { default: slugify } = require("slugify");
|
||||
@ -26,14 +27,16 @@ function convertToCSV(data) {
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
async function asXlsx({ fullFilePath = "", filename = "" }) {
|
||||
async function asXlsx({ fullFilePath = "", filename = "", options = {} }) {
|
||||
const documents = [];
|
||||
const folderName = slugify(`${path.basename(filename)}-${v4().slice(0, 4)}`, {
|
||||
lower: true,
|
||||
trim: true,
|
||||
});
|
||||
const outFolderPath = options.parseOnly
|
||||
? path.resolve(directUploadsFolder, folderName)
|
||||
: path.resolve(documentsFolder, folderName);
|
||||
|
||||
const outFolderPath = path.resolve(documentsFolder, folderName);
|
||||
try {
|
||||
const workSheetsFromFile = xlsx.parse(fullFilePath);
|
||||
if (!fs.existsSync(outFolderPath))
|
||||
@ -68,6 +71,7 @@ async function asXlsx({ fullFilePath = "", filename = "" }) {
|
||||
data: sheetData,
|
||||
filename: `sheet-${slugify(name)}`,
|
||||
destinationOverride: outFolderPath,
|
||||
options: { parseOnly: options.parseOnly },
|
||||
});
|
||||
documents.push(document);
|
||||
console.log(
|
||||
|
||||
@ -11,6 +11,16 @@ const documentsFolder =
|
||||
? path.resolve(__dirname, `../../../server/storage/documents`)
|
||||
: path.resolve(process.env.STORAGE_DIR, `documents`);
|
||||
|
||||
/**
|
||||
* The folder where direct uploads are stored to be stored when
|
||||
* processed by the collector. These are files that were DnD'd into UI
|
||||
* and are not to be embedded or selectable from the file picker.
|
||||
*/
|
||||
const directUploadsFolder =
|
||||
process.env.NODE_ENV === "development"
|
||||
? path.resolve(__dirname, `../../../server/storage/direct-uploads`)
|
||||
: path.resolve(process.env.STORAGE_DIR, `direct-uploads`);
|
||||
|
||||
/**
|
||||
* Checks if a file is text by checking the mime type and then falling back to buffer inspection.
|
||||
* This way we can capture all the cases where the mime type is not known but still parseable as text
|
||||
@ -102,17 +112,21 @@ function createdDate(filepath) {
|
||||
* @param {Object} params.data - The data to write to the file. Must look like a document object.
|
||||
* @param {string} params.filename - The name of the file to write to.
|
||||
* @param {string|null} params.destinationOverride - A forced destination to write to - will be honored if provided.
|
||||
* @param {Object} params.options - The options for the function.
|
||||
* @param {boolean} params.options.parseOnly - If true, the file will be written to the direct uploads folder instead of the documents folder. Will be ignored if destinationOverride is provided.
|
||||
* @returns {Object} - The data with the location added.
|
||||
*/
|
||||
function writeToServerDocuments({
|
||||
data = {},
|
||||
filename = null,
|
||||
filename,
|
||||
destinationOverride = null,
|
||||
options = {},
|
||||
}) {
|
||||
if (!filename) throw new Error("Filename is required!");
|
||||
|
||||
let destination = null;
|
||||
if (destinationOverride) destination = path.resolve(destinationOverride);
|
||||
else if (options.parseOnly) destination = path.resolve(directUploadsFolder);
|
||||
else destination = path.resolve(documentsFolder, "custom-documents");
|
||||
|
||||
if (!fs.existsSync(destination))
|
||||
@ -129,6 +143,7 @@ function writeToServerDocuments({
|
||||
// that will work since we know the location exists and since we only allow
|
||||
// 1-level deep folders this will always work. This still works for integrations like GitHub and YouTube.
|
||||
location: destinationFilePath.split("/").slice(-2).join("/"),
|
||||
isDirectUpload: options.parseOnly || false,
|
||||
};
|
||||
}
|
||||
|
||||
@ -207,4 +222,5 @@ module.exports = {
|
||||
isWithin,
|
||||
sanitizeFileName,
|
||||
documentsFolder,
|
||||
directUploadsFolder,
|
||||
};
|
||||
|
||||
@ -78,6 +78,12 @@ export function ChatTooltips() {
|
||||
delayShow={500}
|
||||
className="tooltip !text-xs max-w-[350px]"
|
||||
/>
|
||||
<Tooltip
|
||||
id="context-window-limit-exceeded"
|
||||
place="top"
|
||||
delayShow={500}
|
||||
className="tooltip !text-xs max-w-[350px]"
|
||||
/>
|
||||
<DocumentLevelTooltip />
|
||||
</>
|
||||
);
|
||||
|
||||
@ -0,0 +1,106 @@
|
||||
import { CircleNotch } from "@phosphor-icons/react";
|
||||
import ModalWrapper from "@/components/ModalWrapper";
|
||||
import pluralize from "pluralize";
|
||||
import { numberWithCommas } from "@/utils/numbers";
|
||||
import useUser from "@/hooks/useUser";
|
||||
import { Link } from "react-router-dom";
|
||||
import Paths from "@/utils/paths";
|
||||
import Workspace from "@/models/workspace";
|
||||
|
||||
export default function FileUploadWarningModal({
|
||||
show,
|
||||
onClose,
|
||||
onContinue,
|
||||
onEmbed,
|
||||
tokenCount,
|
||||
maxTokens,
|
||||
fileCount = 1,
|
||||
isEmbedding = false,
|
||||
embedProgress = 0,
|
||||
}) {
|
||||
const { user } = useUser();
|
||||
const canEmbed = !user || user.role !== "default";
|
||||
if (!show) return null;
|
||||
|
||||
if (isEmbedding) {
|
||||
return (
|
||||
<ModalWrapper isOpen={show}>
|
||||
<div className="relative max-w-[600px] bg-theme-bg-primary rounded-lg shadow border border-theme-modal-border">
|
||||
<div className="p-6 flex flex-col items-center justify-center">
|
||||
<p className="text-white text-lg font-semibold mb-4">
|
||||
Embedding {embedProgress + 1} of {fileCount}{" "}
|
||||
{pluralize("file", fileCount)}
|
||||
</p>
|
||||
<CircleNotch size={32} className="animate-spin text-white" />
|
||||
<p className="text-white/60 text-sm mt-2">
|
||||
Please wait while we embed your files...
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</ModalWrapper>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<ModalWrapper isOpen={show}>
|
||||
<div className="relative max-w-[600px] bg-theme-bg-primary rounded-lg shadow border border-theme-modal-border">
|
||||
<div className="relative p-6 border-b border-theme-modal-border">
|
||||
<div className="w-full flex gap-x-2 items-center">
|
||||
<h3 className="text-xl font-semibold text-white overflow-hidden overflow-ellipsis whitespace-nowrap">
|
||||
Context Window Warning
|
||||
</h3>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="py-7 px-9 space-y-4">
|
||||
<p className="text-theme-text-primary text-sm">
|
||||
Your workspace is using {numberWithCommas(tokenCount)} of{" "}
|
||||
{numberWithCommas(maxTokens)} available tokens. We recommend keeping
|
||||
usage below {(Workspace.maxContextWindowLimit * 100).toFixed(0)}% to
|
||||
ensure the best chat experience. Adding {fileCount} more{" "}
|
||||
{pluralize("file", fileCount)} would exceed this limit.{" "}
|
||||
<Link
|
||||
target="_blank"
|
||||
to={Paths.documentation.contextWindows()}
|
||||
className="text-theme-text-secondary text-sm underline"
|
||||
>
|
||||
Learn more about context windows →
|
||||
</Link>
|
||||
</p>
|
||||
<p className="text-theme-text-primary text-sm">
|
||||
Choose how you would like to proceed with these uploads.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="flex w-full justify-between items-center p-6 space-x-2 border-t border-theme-modal-border rounded-b">
|
||||
<button
|
||||
onClick={onClose}
|
||||
type="button"
|
||||
className="border-none transition-all duration-300 bg-theme-modal-border text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<div className="flex w-full justify-end items-center space-x-2">
|
||||
<button
|
||||
onClick={onContinue}
|
||||
type="button"
|
||||
className="border-none transition-all duration-300 bg-theme-modal-border text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
|
||||
>
|
||||
Continue Anyway
|
||||
</button>
|
||||
{canEmbed && (
|
||||
<button
|
||||
onClick={onEmbed}
|
||||
disabled={isEmbedding || !canEmbed}
|
||||
type="button"
|
||||
className="border-none transition-all duration-300 bg-white text-black hover:opacity-60 px-4 py-2 rounded-lg text-sm"
|
||||
>
|
||||
Embed {pluralize("File", fileCount)}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</ModalWrapper>
|
||||
);
|
||||
}
|
||||
@ -4,7 +4,9 @@ import System from "@/models/system";
|
||||
import { useDropzone } from "react-dropzone";
|
||||
import DndIcon from "./dnd-icon.png";
|
||||
import Workspace from "@/models/workspace";
|
||||
import useUser from "@/hooks/useUser";
|
||||
import showToast from "@/utils/toast";
|
||||
import FileUploadWarningModal from "./FileUploadWarningModal";
|
||||
import pluralize from "pluralize";
|
||||
|
||||
export const DndUploaderContext = createContext();
|
||||
export const REMOVE_ATTACHMENT_EVENT = "ATTACHMENT_REMOVE";
|
||||
@ -12,6 +14,8 @@ export const CLEAR_ATTACHMENTS_EVENT = "ATTACHMENT_CLEAR";
|
||||
export const PASTE_ATTACHMENT_EVENT = "ATTACHMENT_PASTED";
|
||||
export const ATTACHMENTS_PROCESSING_EVENT = "ATTACHMENTS_PROCESSING";
|
||||
export const ATTACHMENTS_PROCESSED_EVENT = "ATTACHMENTS_PROCESSED";
|
||||
export const PARSED_FILE_ATTACHMENT_REMOVED_EVENT =
|
||||
"PARSED_FILE_ATTACHMENT_REMOVED";
|
||||
|
||||
/**
|
||||
* File Attachment for automatic upload on the chat container page.
|
||||
@ -19,30 +23,58 @@ export const ATTACHMENTS_PROCESSED_EVENT = "ATTACHMENTS_PROCESSED";
|
||||
* @property {string} uid - unique file id.
|
||||
* @property {File} file - native File object
|
||||
* @property {string|null} contentString - base64 encoded string of file
|
||||
* @property {('in_progress'|'failed'|'success')} status - the automatic upload status.
|
||||
* @property {('in_progress'|'failed'|'embedded'|'added_context')} status - the automatic upload status.
|
||||
* @property {string|null} error - Error message
|
||||
* @property {{id:string, location:string}|null} document - uploaded document details
|
||||
* @property {('attachment'|'upload')} type - The type of upload. Attachments are chat-specific, uploads go to the workspace.
|
||||
*/
|
||||
|
||||
export function DnDFileUploaderProvider({ workspace, children }) {
|
||||
/**
|
||||
* @typedef {Object} ParsedFile
|
||||
* @property {number} id - The id of the parsed file.
|
||||
* @property {string} filename - The name of the parsed file.
|
||||
* @property {number} workspaceId - The id of the workspace the parsed file belongs to.
|
||||
* @property {string|null} userId - The id of the user the parsed file belongs to.
|
||||
* @property {string|null} threadId - The id of the thread the parsed file belongs to.
|
||||
* @property {string} metadata - The metadata of the parsed file.
|
||||
* @property {number} tokenCountEstimate - The estimated token count of the parsed file.
|
||||
*/
|
||||
|
||||
export function DnDFileUploaderProvider({
|
||||
workspace,
|
||||
threadSlug = null,
|
||||
children,
|
||||
}) {
|
||||
const [files, setFiles] = useState([]);
|
||||
const [ready, setReady] = useState(false);
|
||||
const [dragging, setDragging] = useState(false);
|
||||
const { user } = useUser();
|
||||
const [showWarningModal, setShowWarningModal] = useState(false);
|
||||
const [isEmbedding, setIsEmbedding] = useState(false);
|
||||
const [embedProgress, setEmbedProgress] = useState(0);
|
||||
const [pendingFiles, setPendingFiles] = useState([]);
|
||||
const [tokenCount, setTokenCount] = useState(0);
|
||||
const [maxTokens, setMaxTokens] = useState(Number.POSITIVE_INFINITY);
|
||||
|
||||
useEffect(() => {
|
||||
System.checkDocumentProcessorOnline().then((status) => setReady(status));
|
||||
}, [user]);
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
window.addEventListener(REMOVE_ATTACHMENT_EVENT, handleRemove);
|
||||
window.addEventListener(CLEAR_ATTACHMENTS_EVENT, resetAttachments);
|
||||
window.addEventListener(PASTE_ATTACHMENT_EVENT, handlePastedAttachment);
|
||||
window.addEventListener(
|
||||
PARSED_FILE_ATTACHMENT_REMOVED_EVENT,
|
||||
handleRemoveParsedFile
|
||||
);
|
||||
|
||||
return () => {
|
||||
window.removeEventListener(REMOVE_ATTACHMENT_EVENT, handleRemove);
|
||||
window.removeEventListener(CLEAR_ATTACHMENTS_EVENT, resetAttachments);
|
||||
window.removeEventListener(
|
||||
PARSED_FILE_ATTACHMENT_REMOVED_EVENT,
|
||||
handleRemoveParsedFile
|
||||
);
|
||||
window.removeEventListener(
|
||||
PASTE_ATTACHMENT_EVENT,
|
||||
handlePastedAttachment
|
||||
@ -50,6 +82,18 @@ export function DnDFileUploaderProvider({ workspace, children }) {
|
||||
};
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Handles the removal of a parsed file attachment from the uploader queue.
|
||||
* Only uses the document id to remove the file from the queue
|
||||
* @param {CustomEvent<{document: ParsedFile}>} event
|
||||
*/
|
||||
async function handleRemoveParsedFile(event) {
|
||||
const { document } = event.detail;
|
||||
setFiles((prev) =>
|
||||
prev.filter((prevFile) => prevFile.document.id !== document.id)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove file from uploader queue.
|
||||
* @param {CustomEvent<{uid: string}>} event
|
||||
@ -112,8 +156,6 @@ export function DnDFileUploaderProvider({ workspace, children }) {
|
||||
type: "attachment",
|
||||
});
|
||||
} else {
|
||||
// If the user is a default user, we do not want to allow them to upload files.
|
||||
if (!!user && user.role === "default") continue;
|
||||
newAccepted.push({
|
||||
uid: v4(),
|
||||
file,
|
||||
@ -149,8 +191,6 @@ export function DnDFileUploaderProvider({ workspace, children }) {
|
||||
type: "attachment",
|
||||
});
|
||||
} else {
|
||||
// If the user is a default user, we do not want to allow them to upload files.
|
||||
if (!!user && user.role === "default") continue;
|
||||
newAccepted.push({
|
||||
uid: v4(),
|
||||
file,
|
||||
@ -170,36 +210,87 @@ export function DnDFileUploaderProvider({ workspace, children }) {
|
||||
* Embeds attachments that are eligible for embedding - basically files that are not images.
|
||||
* @param {Attachment[]} newAttachments
|
||||
*/
|
||||
function embedEligibleAttachments(newAttachments = []) {
|
||||
async function embedEligibleAttachments(newAttachments = []) {
|
||||
window.dispatchEvent(new CustomEvent(ATTACHMENTS_PROCESSING_EVENT));
|
||||
const promises = [];
|
||||
|
||||
const { currentContextTokenCount, contextWindow } =
|
||||
await Workspace.getParsedFiles(workspace.slug, threadSlug);
|
||||
const workspaceContextWindow = contextWindow
|
||||
? Math.floor(contextWindow * Workspace.maxContextWindowLimit)
|
||||
: Number.POSITIVE_INFINITY;
|
||||
setMaxTokens(workspaceContextWindow);
|
||||
|
||||
let totalTokenCount = currentContextTokenCount;
|
||||
let batchPendingFiles = [];
|
||||
|
||||
for (const attachment of newAttachments) {
|
||||
// Images/attachments are chat specific.
|
||||
if (attachment.type === "attachment") continue;
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append("file", attachment.file, attachment.file.name);
|
||||
formData.append("threadSlug", threadSlug || null);
|
||||
promises.push(
|
||||
Workspace.uploadAndEmbedFile(workspace.slug, formData).then(
|
||||
({ response, data }) => {
|
||||
Workspace.parseFile(workspace.slug, formData).then(
|
||||
async ({ response, data }) => {
|
||||
if (!response.ok) {
|
||||
const updates = {
|
||||
status: response.ok ? "success" : "failed",
|
||||
status: "failed",
|
||||
error: data?.error ?? null,
|
||||
document: data?.document,
|
||||
};
|
||||
|
||||
setFiles((prev) => {
|
||||
return prev.map(
|
||||
setFiles((prev) =>
|
||||
prev.map(
|
||||
(
|
||||
/** @type {Attachment} */
|
||||
prevFile
|
||||
) => {
|
||||
if (prevFile.uid !== attachment.uid) return prevFile;
|
||||
return { ...prevFile, ...updates };
|
||||
}
|
||||
) =>
|
||||
prevFile.uid !== attachment.uid
|
||||
? prevFile
|
||||
: { ...prevFile, ...updates }
|
||||
)
|
||||
);
|
||||
return;
|
||||
}
|
||||
// Will always be one file in the array
|
||||
/** @type {ParsedFile} */
|
||||
const file = data.files[0];
|
||||
|
||||
// Add token count for this file
|
||||
// and add it to the batch pending files
|
||||
totalTokenCount += file.tokenCountEstimate;
|
||||
batchPendingFiles.push({
|
||||
attachment,
|
||||
parsedFileId: file.id,
|
||||
tokenCount: file.tokenCountEstimate,
|
||||
});
|
||||
|
||||
if (totalTokenCount > workspaceContextWindow) {
|
||||
setTokenCount(totalTokenCount);
|
||||
setPendingFiles(batchPendingFiles);
|
||||
setShowWarningModal(true);
|
||||
return;
|
||||
}
|
||||
|
||||
// File is within limits, keep in parsed files
|
||||
const result = { success: true, document: file };
|
||||
const updates = {
|
||||
status: result.success ? "added_context" : "failed",
|
||||
error: result.error ?? null,
|
||||
document: result.document,
|
||||
};
|
||||
|
||||
setFiles((prev) =>
|
||||
prev.map(
|
||||
(
|
||||
/** @type {Attachment} */
|
||||
prevFile
|
||||
) =>
|
||||
prevFile.uid !== attachment.uid
|
||||
? prevFile
|
||||
: { ...prevFile, ...updates }
|
||||
)
|
||||
);
|
||||
}
|
||||
)
|
||||
);
|
||||
@ -211,10 +302,117 @@ export function DnDFileUploaderProvider({ workspace, children }) {
|
||||
);
|
||||
}
|
||||
|
||||
// Handle modal actions
|
||||
const handleCloseModal = async () => {
|
||||
if (!pendingFiles.length) return;
|
||||
|
||||
// Delete all files from this batch
|
||||
await Workspace.deleteParsedFiles(
|
||||
workspace.slug,
|
||||
pendingFiles.map((file) => file.parsedFileId)
|
||||
);
|
||||
|
||||
// Remove all files from this batch from the UI
|
||||
setFiles((prev) =>
|
||||
prev.filter(
|
||||
(prevFile) =>
|
||||
!pendingFiles.some((file) => file.attachment.uid === prevFile.uid)
|
||||
)
|
||||
);
|
||||
setShowWarningModal(false);
|
||||
setPendingFiles([]);
|
||||
setTokenCount(0);
|
||||
window.dispatchEvent(new CustomEvent(ATTACHMENTS_PROCESSED_EVENT));
|
||||
};
|
||||
|
||||
const handleContinueAnyway = async () => {
|
||||
if (!pendingFiles.length) return;
|
||||
const results = pendingFiles.map((file) => ({
|
||||
success: true,
|
||||
document: { id: file.parsedFileId },
|
||||
}));
|
||||
|
||||
const fileUpdates = pendingFiles.map((file, i) => ({
|
||||
uid: file.attachment.uid,
|
||||
updates: {
|
||||
status: results[i].success ? "success" : "failed",
|
||||
error: results[i].error ?? null,
|
||||
document: results[i].document,
|
||||
},
|
||||
}));
|
||||
|
||||
setFiles((prev) =>
|
||||
prev.map((prevFile) => {
|
||||
const update = fileUpdates.find((f) => f.uid === prevFile.uid);
|
||||
return update ? { ...prevFile, ...update.updates } : prevFile;
|
||||
})
|
||||
);
|
||||
setShowWarningModal(false);
|
||||
setPendingFiles([]);
|
||||
setTokenCount(0);
|
||||
};
|
||||
|
||||
const handleEmbed = async () => {
|
||||
if (!pendingFiles.length) return;
|
||||
setIsEmbedding(true);
|
||||
setEmbedProgress(0);
|
||||
|
||||
// Embed all pending files
|
||||
let completed = 0;
|
||||
const results = await Promise.all(
|
||||
pendingFiles.map((file) =>
|
||||
Workspace.embedParsedFile(workspace.slug, file.parsedFileId).then(
|
||||
(result) => {
|
||||
completed++;
|
||||
setEmbedProgress(completed);
|
||||
return result;
|
||||
}
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
// Update status for all files
|
||||
const fileUpdates = pendingFiles.map((file, i) => ({
|
||||
uid: file.attachment.uid,
|
||||
updates: {
|
||||
status: results[i].response.ok ? "embedded" : "failed",
|
||||
error: results[i].data?.error ?? null,
|
||||
document: results[i].data?.document,
|
||||
},
|
||||
}));
|
||||
|
||||
setFiles((prev) =>
|
||||
prev.map((prevFile) => {
|
||||
const update = fileUpdates.find((f) => f.uid === prevFile.uid);
|
||||
return update ? { ...prevFile, ...update.updates } : prevFile;
|
||||
})
|
||||
);
|
||||
setShowWarningModal(false);
|
||||
setPendingFiles([]);
|
||||
setTokenCount(0);
|
||||
setIsEmbedding(false);
|
||||
window.dispatchEvent(new CustomEvent(ATTACHMENTS_PROCESSED_EVENT));
|
||||
showToast(
|
||||
`${pendingFiles.length} ${pluralize("file", pendingFiles.length)} embedded successfully`,
|
||||
"success"
|
||||
);
|
||||
};
|
||||
|
||||
return (
|
||||
<DndUploaderContext.Provider
|
||||
value={{ files, ready, dragging, setDragging, onDrop, parseAttachments }}
|
||||
>
|
||||
<FileUploadWarningModal
|
||||
show={showWarningModal}
|
||||
onClose={handleCloseModal}
|
||||
onContinue={handleContinueAnyway}
|
||||
onEmbed={handleEmbed}
|
||||
tokenCount={tokenCount}
|
||||
maxTokens={maxTokens}
|
||||
fileCount={pendingFiles.length}
|
||||
isEmbedding={isEmbedding}
|
||||
embedProgress={embedProgress}
|
||||
/>
|
||||
{children}
|
||||
</DndUploaderContext.Provider>
|
||||
);
|
||||
@ -231,8 +429,6 @@ export default function DnDFileUploaderWrapper({ children }) {
|
||||
onDragEnter: () => setDragging(true),
|
||||
onDragLeave: () => setDragging(false),
|
||||
});
|
||||
const { user } = useUser();
|
||||
const canUploadAll = !user || user?.role !== "default";
|
||||
|
||||
return (
|
||||
<div
|
||||
@ -245,22 +441,16 @@ export default function DnDFileUploaderWrapper({ children }) {
|
||||
>
|
||||
<div className="w-full h-full flex justify-center items-center rounded-xl">
|
||||
<div className="flex flex-col gap-y-[14px] justify-center items-center">
|
||||
<img src={DndIcon} width={69} height={69} />
|
||||
<p className="text-white text-[24px] font-semibold">
|
||||
Add {canUploadAll ? "anything" : "an image"}
|
||||
</p>
|
||||
<img
|
||||
src={DndIcon}
|
||||
width={69}
|
||||
height={69}
|
||||
alt="Drag and drop icon"
|
||||
/>
|
||||
<p className="text-white text-[24px] font-semibold">Add anything</p>
|
||||
<p className="text-white text-[16px] text-center">
|
||||
{canUploadAll ? (
|
||||
<>
|
||||
Drop your file here to embed it into your <br />
|
||||
Drop a file or image here to attach it to your <br />
|
||||
workspace auto-magically.
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
Drop your image here to chat with it <br />
|
||||
auto-magically.
|
||||
</>
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -0,0 +1,197 @@
|
||||
import { useState } from "react";
|
||||
import { X, CircleNotch, Warning } from "@phosphor-icons/react";
|
||||
import Workspace from "@/models/workspace";
|
||||
import { useParams } from "react-router-dom";
|
||||
import { nFormatter } from "@/utils/numbers";
|
||||
import showToast from "@/utils/toast";
|
||||
import pluralize from "pluralize";
|
||||
import { PARSED_FILE_ATTACHMENT_REMOVED_EVENT } from "../../../DnDWrapper";
|
||||
import useUser from "@/hooks/useUser";
|
||||
|
||||
export default function ParsedFilesMenu({
|
||||
onEmbeddingChange,
|
||||
tooltipRef,
|
||||
files,
|
||||
setFiles,
|
||||
currentTokens,
|
||||
setCurrentTokens,
|
||||
contextWindow,
|
||||
isLoading,
|
||||
}) {
|
||||
const { user } = useUser();
|
||||
const canEmbed = !user || user.role !== "default";
|
||||
const initialContextWindowLimitExceeded =
|
||||
contextWindow &&
|
||||
currentTokens >= contextWindow * Workspace.maxContextWindowLimit;
|
||||
const { slug, threadSlug = null } = useParams();
|
||||
const [isEmbedding, setIsEmbedding] = useState(false);
|
||||
const [embedProgress, setEmbedProgress] = useState(1);
|
||||
const [contextWindowLimitExceeded, setContextWindowLimitExceeded] = useState(
|
||||
initialContextWindowLimitExceeded
|
||||
);
|
||||
|
||||
async function handleRemove(e, file) {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
if (!file?.id) return;
|
||||
|
||||
const success = await Workspace.deleteParsedFiles(slug, [file.id]);
|
||||
if (!success) return;
|
||||
|
||||
// Update the local files list and current tokens
|
||||
setFiles((prev) => prev.filter((f) => f.id !== file.id));
|
||||
|
||||
// Dispatch an event to the DnDFileUploaderWrapper to update the files list in attachment manager if it exists
|
||||
window.dispatchEvent(
|
||||
new CustomEvent(PARSED_FILE_ATTACHMENT_REMOVED_EVENT, {
|
||||
detail: { document: file },
|
||||
})
|
||||
);
|
||||
const { currentContextTokenCount } = await Workspace.getParsedFiles(
|
||||
slug,
|
||||
threadSlug
|
||||
);
|
||||
const newContextWindowLimitExceeded =
|
||||
contextWindow &&
|
||||
currentContextTokenCount >=
|
||||
contextWindow * Workspace.maxContextWindowLimit;
|
||||
setCurrentTokens(currentContextTokenCount);
|
||||
setContextWindowLimitExceeded(newContextWindowLimitExceeded);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles the embedding of the files when the user exceeds the context window limit
|
||||
* and opts to embed the files into the workspace instead.
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async function handleEmbed() {
|
||||
if (!files.length) return;
|
||||
setIsEmbedding(true);
|
||||
onEmbeddingChange?.(true);
|
||||
setEmbedProgress(1);
|
||||
try {
|
||||
let completed = 0;
|
||||
await Promise.all(
|
||||
files.map((file) =>
|
||||
Workspace.embedParsedFile(slug, file.id).then(() => {
|
||||
completed++;
|
||||
setEmbedProgress(completed + 1);
|
||||
})
|
||||
)
|
||||
);
|
||||
setFiles([]);
|
||||
const { currentContextTokenCount } = await Workspace.getParsedFiles(
|
||||
slug,
|
||||
threadSlug
|
||||
);
|
||||
setCurrentTokens(currentContextTokenCount);
|
||||
setContextWindowLimitExceeded(
|
||||
currentContextTokenCount >=
|
||||
contextWindow * Workspace.maxContextWindowLimit
|
||||
);
|
||||
showToast(
|
||||
`${files.length} ${pluralize("file", files.length)} embedded successfully`,
|
||||
"success"
|
||||
);
|
||||
tooltipRef?.current?.close();
|
||||
} catch (error) {
|
||||
console.error("Failed to embed files:", error);
|
||||
showToast("Failed to embed files", "error");
|
||||
}
|
||||
setIsEmbedding(false);
|
||||
onEmbeddingChange?.(false);
|
||||
setEmbedProgress(1);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-2 p-2">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="text-sm font-medium text-theme-text-primary">
|
||||
Current Context ({files.length} files)
|
||||
</div>
|
||||
<div
|
||||
// If the user cannot see the embed CTA, show a tooltip
|
||||
{...(contextWindowLimitExceeded &&
|
||||
!canEmbed && {
|
||||
"data-tooltip-id": "context-window-limit-exceeded",
|
||||
"data-tooltip-content":
|
||||
"You have exceeded the context window limit. Some files may be truncated or excluded from chat responses. Responses may hallucinate or lack relevant information.",
|
||||
})}
|
||||
className={`flex items-center gap-x-1 ${contextWindowLimitExceeded && !canEmbed ? "cursor-pointer" : ""}`}
|
||||
>
|
||||
{contextWindowLimitExceeded && (
|
||||
<Warning size={14} className="text-orange-600" />
|
||||
)}
|
||||
<div
|
||||
className={`text-xs ${contextWindowLimitExceeded ? "text-orange-600" : "text-theme-text-secondary"}`}
|
||||
>
|
||||
{nFormatter(currentTokens)} /{" "}
|
||||
{contextWindow ? nFormatter(contextWindow) : "--"} tokens
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{contextWindowLimitExceeded && canEmbed && (
|
||||
<div className="flex flex-col gap-2 p-2 bg-theme-bg-secondary light:bg-theme-bg-primary rounded">
|
||||
<div className="flex items-start gap-2">
|
||||
<Warning
|
||||
className="flex-shrink-0 mt-1 text-yellow-500 light:text-yellow-600"
|
||||
size={16}
|
||||
/>
|
||||
<div className="text-xs text-theme-text-primary">
|
||||
Your context window is getting full. Some files may be truncated
|
||||
or excluded from chat responses. We recommend embedding these
|
||||
files directly into your workspace for better results.
|
||||
</div>
|
||||
</div>
|
||||
<button
|
||||
onClick={handleEmbed}
|
||||
disabled={isEmbedding}
|
||||
className="border-none disabled:opacity-50 flex items-center justify-center gap-2 px-3 py-2 text-xs bg-primary-button hover:bg-theme-button-primary-hover text-white font-medium rounded transition-colors shadow-sm"
|
||||
>
|
||||
{isEmbedding ? (
|
||||
<>
|
||||
<CircleNotch size={14} className="animate-spin" />
|
||||
Embedding {embedProgress} of {files.length} files...
|
||||
</>
|
||||
) : (
|
||||
"Embed Files into Workspace"
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
<div className="flex flex-col gap-1 max-h-[300px] overflow-y-auto">
|
||||
{files.length > 0 &&
|
||||
files.map((file, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className={
|
||||
"flex items-center justify-between gap-2 p-2 text-xs bg-theme-bg-secondary rounded"
|
||||
}
|
||||
>
|
||||
<div className="truncate flex-1 text-theme-text-primary">
|
||||
{file.title}
|
||||
</div>
|
||||
<button
|
||||
onClick={(e) => handleRemove(e, file)}
|
||||
className="border-none text-theme-text-secondary hover:text-theme-text-primary"
|
||||
disabled={isEmbedding}
|
||||
>
|
||||
<X size={16} />
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
{isLoading && (
|
||||
<div className="flex items-center justify-center gap-2 text-xs text-theme-text-secondary text-center py-2">
|
||||
<CircleNotch size={16} className="animate-spin" />
|
||||
Loading...
|
||||
</div>
|
||||
)}
|
||||
{!isLoading && files.length === 0 && (
|
||||
<div className="text-xs text-theme-text-secondary text-center py-2">
|
||||
No files found
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@ -1,7 +1,15 @@
|
||||
import useUser from "@/hooks/useUser";
|
||||
import { PaperclipHorizontal } from "@phosphor-icons/react";
|
||||
import { Tooltip } from "react-tooltip";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { useRef, useState, useEffect } from "react";
|
||||
import { useParams } from "react-router-dom";
|
||||
import Workspace from "@/models/workspace";
|
||||
import {
|
||||
ATTACHMENTS_PROCESSED_EVENT,
|
||||
REMOVE_ATTACHMENT_EVENT,
|
||||
} from "../../DnDWrapper";
|
||||
import { useTheme } from "@/hooks/useTheme";
|
||||
import ParsedFilesMenu from "./ParsedFilesMenu";
|
||||
|
||||
/**
|
||||
* This is a simple proxy component that clicks on the DnD file uploader for the user.
|
||||
@ -9,35 +17,119 @@ import { useTranslation } from "react-i18next";
|
||||
*/
|
||||
export default function AttachItem() {
|
||||
const { t } = useTranslation();
|
||||
const { user } = useUser();
|
||||
if (!!user && user.role === "default") return null;
|
||||
const { theme } = useTheme();
|
||||
const { slug, threadSlug = null } = useParams();
|
||||
const tooltipRef = useRef(null);
|
||||
const [isEmbedding, setIsEmbedding] = useState(false);
|
||||
const [files, setFiles] = useState([]);
|
||||
const [currentTokens, setCurrentTokens] = useState(0);
|
||||
const [contextWindow, setContextWindow] = useState(Infinity);
|
||||
const [showTooltip, setShowTooltip] = useState(false);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
|
||||
const fetchFiles = () => {
|
||||
if (!slug) return;
|
||||
if (isEmbedding) return;
|
||||
setIsLoading(true);
|
||||
Workspace.getParsedFiles(slug, threadSlug)
|
||||
.then(({ files, contextWindow, currentContextTokenCount }) => {
|
||||
setFiles(files);
|
||||
setShowTooltip(files.length > 0);
|
||||
setContextWindow(contextWindow);
|
||||
setCurrentTokens(currentContextTokenCount);
|
||||
})
|
||||
.finally(() => {
|
||||
setIsLoading(false);
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Handles the removal of an attachment from the parsed files
|
||||
* and triggers a re-fetch of the parsed files.
|
||||
* This function handles when the user clicks the X on an Attachment via the AttachmentManager
|
||||
* so we need to sync the state in the ParsedFilesMenu picker here.
|
||||
*/
|
||||
async function handleRemoveAttachment(e) {
|
||||
const { document } = e.detail;
|
||||
await Workspace.deleteParsedFiles(slug, [document.id]);
|
||||
fetchFiles();
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles the click event for the attach item button.
|
||||
* @param {MouseEvent} e - The click event.
|
||||
* @returns {void}
|
||||
*/
|
||||
function handleClick(e) {
|
||||
e?.target?.blur();
|
||||
document?.getElementById("dnd-chat-file-uploader")?.click();
|
||||
return;
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
fetchFiles();
|
||||
window.addEventListener(ATTACHMENTS_PROCESSED_EVENT, fetchFiles);
|
||||
window.addEventListener(REMOVE_ATTACHMENT_EVENT, handleRemoveAttachment);
|
||||
return () => {
|
||||
window.removeEventListener(ATTACHMENTS_PROCESSED_EVENT, fetchFiles);
|
||||
window.removeEventListener(
|
||||
REMOVE_ATTACHMENT_EVENT,
|
||||
handleRemoveAttachment
|
||||
);
|
||||
};
|
||||
}, [slug, threadSlug]);
|
||||
|
||||
return (
|
||||
<>
|
||||
<button
|
||||
id="attach-item-btn"
|
||||
data-tooltip-id="attach-item-btn"
|
||||
data-tooltip-content={t("chat_window.attach_file")}
|
||||
data-tooltip-id="tooltip-attach-item-btn"
|
||||
aria-label={t("chat_window.attach_file")}
|
||||
type="button"
|
||||
onClick={(e) => {
|
||||
e?.target?.blur();
|
||||
document?.getElementById("dnd-chat-file-uploader")?.click();
|
||||
return;
|
||||
}}
|
||||
onClick={handleClick}
|
||||
onPointerEnter={fetchFiles}
|
||||
className={`border-none relative flex justify-center items-center opacity-60 hover:opacity-100 light:opacity-100 light:hover:opacity-60 cursor-pointer`}
|
||||
>
|
||||
<div className="relative">
|
||||
<PaperclipHorizontal
|
||||
color="var(--theme-sidebar-footer-icon-fill)"
|
||||
className="w-[22px] h-[22px] pointer-events-none text-white rotate-90 -scale-y-100"
|
||||
/>
|
||||
{files.length > 0 && (
|
||||
<div className="absolute -top-2 right-[1%] bg-white text-black light:invert text-[8px] rounded-full px-1 flex items-center justify-center">
|
||||
{files.length}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</button>
|
||||
{showTooltip && (
|
||||
<Tooltip
|
||||
id="attach-item-btn"
|
||||
ref={tooltipRef}
|
||||
id="tooltip-attach-item-btn"
|
||||
place="top"
|
||||
opacity={1}
|
||||
clickable={!isEmbedding}
|
||||
delayShow={300}
|
||||
className="tooltip !text-xs z-[99]"
|
||||
delayHide={isEmbedding ? 999999 : 800} // Prevent tooltip from hiding during embedding
|
||||
arrowColor={
|
||||
theme === "light"
|
||||
? "var(--theme-modal-border)"
|
||||
: "var(--theme-bg-primary)"
|
||||
}
|
||||
className="z-99 !w-[400px] !bg-theme-bg-primary !px-[5px] !rounded-lg !pointer-events-auto light:border-2 light:border-theme-modal-border"
|
||||
>
|
||||
<ParsedFilesMenu
|
||||
onEmbeddingChange={setIsEmbedding}
|
||||
tooltipRef={tooltipRef}
|
||||
isLoading={isLoading}
|
||||
files={files}
|
||||
setFiles={setFiles}
|
||||
currentTokens={currentTokens}
|
||||
setCurrentTokens={setCurrentTokens}
|
||||
contextWindow={contextWindow}
|
||||
/>
|
||||
</Tooltip>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
@ -160,7 +160,11 @@ function AttachmentItem({ attachment }) {
|
||||
<>
|
||||
<div
|
||||
data-tooltip-id={`attachment-uid-${uid}-success`}
|
||||
data-tooltip-content={`${file.name} was uploaded and embedded into this workspace. It will be available for RAG chat now.`}
|
||||
data-tooltip-content={
|
||||
status === "embedded"
|
||||
? `${file.name} was uploaded and embedded into this workspace. It will be available for RAG chat now.`
|
||||
: `${file.name} will be used as context for this chat only.`
|
||||
}
|
||||
className={`relative flex items-center gap-x-1 rounded-lg bg-theme-attachment-bg border-none w-[180px] group`}
|
||||
>
|
||||
<div className="invisible group-hover:visible absolute -top-[5px] -right-[5px] w-fit h-fit z-[10]">
|
||||
@ -186,7 +190,7 @@ function AttachmentItem({ attachment }) {
|
||||
{file.name}
|
||||
</p>
|
||||
<p className="text-theme-attachment-text-secondary text-[10px] leading-[14px] font-medium">
|
||||
File embedded!
|
||||
{status === "embedded" ? "File embedded!" : "Added as context!"}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -79,7 +79,7 @@ export default function WorkspaceChat({ loading, workspace }) {
|
||||
setEventDelegatorForCodeSnippets();
|
||||
return (
|
||||
<TTSProvider>
|
||||
<DnDFileUploaderProvider workspace={workspace}>
|
||||
<DnDFileUploaderProvider workspace={workspace} threadSlug={threadSlug}>
|
||||
<ChatContainer workspace={workspace} knownHistory={history} />
|
||||
</DnDFileUploaderProvider>
|
||||
</TTSProvider>
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { API_BASE } from "@/utils/constants";
|
||||
import { API_BASE, fullApiUrl } from "@/utils/constants";
|
||||
import { baseHeaders, safeJsonParse } from "@/utils/request";
|
||||
import { fetchEventSource } from "@microsoft/fetch-event-source";
|
||||
import WorkspaceThread from "@/models/workspaceThread";
|
||||
@ -7,6 +7,8 @@ import { ABORT_STREAM_EVENT } from "@/utils/chat";
|
||||
|
||||
const Workspace = {
|
||||
workspaceOrderStorageKey: "anythingllm-workspace-order",
|
||||
/** The maximum percentage of the context window that can be used for attachments */
|
||||
maxContextWindowLimit: 0.8,
|
||||
|
||||
new: async function (data = {}) {
|
||||
const { workspace, message } = await fetch(`${API_BASE}/workspace/new`, {
|
||||
@ -250,6 +252,28 @@ const Workspace = {
|
||||
const data = await response.json();
|
||||
return { response, data };
|
||||
},
|
||||
parseFile: async function (slug, formData) {
|
||||
const response = await fetch(`${API_BASE}/workspace/${slug}/parse`, {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
headers: baseHeaders(),
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
return { response, data };
|
||||
},
|
||||
|
||||
getParsedFiles: async function (slug, threadSlug = null) {
|
||||
const basePath = new URL(`${fullApiUrl()}/workspace/${slug}/parsed-files`);
|
||||
if (threadSlug) basePath.searchParams.set("threadSlug", threadSlug);
|
||||
const response = await fetch(basePath, {
|
||||
method: "GET",
|
||||
headers: baseHeaders(),
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
return data;
|
||||
},
|
||||
uploadLink: async function (slug, link) {
|
||||
const response = await fetch(`${API_BASE}/workspace/${slug}/upload-link`, {
|
||||
method: "POST",
|
||||
@ -454,6 +478,31 @@ const Workspace = {
|
||||
return { response, data };
|
||||
},
|
||||
|
||||
deleteParsedFiles: async function (slug, fileIds = []) {
|
||||
const response = await fetch(
|
||||
`${API_BASE}/workspace/${slug}/delete-parsed-files`,
|
||||
{
|
||||
method: "DELETE",
|
||||
headers: baseHeaders(),
|
||||
body: JSON.stringify({ fileIds }),
|
||||
}
|
||||
);
|
||||
return response.ok;
|
||||
},
|
||||
|
||||
embedParsedFile: async function (slug, fileId) {
|
||||
const response = await fetch(
|
||||
`${API_BASE}/workspace/${slug}/embed-parsed-file/${fileId}`,
|
||||
{
|
||||
method: "POST",
|
||||
headers: baseHeaders(),
|
||||
}
|
||||
);
|
||||
|
||||
const data = await response.json();
|
||||
return { response, data };
|
||||
},
|
||||
|
||||
/**
|
||||
* Deletes and un-embeds a single file in a single call from a workspace
|
||||
* @param {string} slug - workspace slug
|
||||
|
||||
@ -211,6 +211,13 @@ export default {
|
||||
},
|
||||
},
|
||||
|
||||
// TODO: Migrate all docs.anythingllm.com links to the new docs.
|
||||
documentation: {
|
||||
contextWindows: () => {
|
||||
return "https://docs.anythingllm.com/chatting-with-documents/introduction#you-exceed-the-context-window---what-now";
|
||||
},
|
||||
},
|
||||
|
||||
experimental: {
|
||||
liveDocumentSync: {
|
||||
manage: () => `/settings/beta-features/live-document-sync/manage`,
|
||||
|
||||
1
server/.gitignore
vendored
1
server/.gitignore
vendored
@ -14,6 +14,7 @@ storage/plugins/agent-flows/*
|
||||
storage/plugins/office-extensions/*
|
||||
storage/plugins/anythingllm_mcp_servers.json
|
||||
!storage/documents/DOCUMENTS.md
|
||||
storage/direct-uploads
|
||||
logs/server.log
|
||||
*.db
|
||||
*.db-journal
|
||||
|
||||
@ -32,14 +32,15 @@ const {
|
||||
} = require("../utils/files/pfp");
|
||||
const { getTTSProvider } = require("../utils/TextToSpeech");
|
||||
const { WorkspaceThread } = require("../models/workspaceThread");
|
||||
|
||||
const truncate = require("truncate");
|
||||
const { purgeDocument } = require("../utils/files/purgeDocument");
|
||||
const { getModelTag } = require("./utils");
|
||||
const { searchWorkspaceAndThreads } = require("../utils/helpers/search");
|
||||
const { workspaceParsedFilesEndpoints } = require("./workspacesParsedFiles");
|
||||
|
||||
function workspaceEndpoints(app) {
|
||||
if (!app) return;
|
||||
|
||||
const responseCache = new Map();
|
||||
|
||||
app.post(
|
||||
@ -1060,6 +1061,9 @@ function workspaceEndpoints(app) {
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Parsed Files in separate endpoint just to keep the workspace endpoints clean
|
||||
workspaceParsedFilesEndpoints(app);
|
||||
}
|
||||
|
||||
module.exports = { workspaceEndpoints };
|
||||
|
||||
199
server/endpoints/workspacesParsedFiles.js
Normal file
199
server/endpoints/workspacesParsedFiles.js
Normal file
@ -0,0 +1,199 @@
|
||||
const { reqBody, multiUserMode, userFromSession } = require("../utils/http");
|
||||
const { handleFileUpload } = require("../utils/files/multer");
|
||||
const { validatedRequest } = require("../utils/middleware/validatedRequest");
|
||||
const { Telemetry } = require("../models/telemetry");
|
||||
const {
|
||||
flexUserRoleValid,
|
||||
ROLES,
|
||||
} = require("../utils/middleware/multiUserProtected");
|
||||
const { EventLogs } = require("../models/eventLogs");
|
||||
const { validWorkspaceSlug } = require("../utils/middleware/validWorkspace");
|
||||
const { CollectorApi } = require("../utils/collectorApi");
|
||||
const { WorkspaceThread } = require("../models/workspaceThread");
|
||||
const { WorkspaceParsedFiles } = require("../models/workspaceParsedFiles");
|
||||
|
||||
function workspaceParsedFilesEndpoints(app) {
|
||||
if (!app) return;
|
||||
|
||||
app.get(
|
||||
"/workspace/:slug/parsed-files",
|
||||
[validatedRequest, flexUserRoleValid([ROLES.all]), validWorkspaceSlug],
|
||||
async (request, response) => {
|
||||
try {
|
||||
const threadSlug = request.query.threadSlug || null;
|
||||
const user = await userFromSession(request, response);
|
||||
const workspace = response.locals.workspace;
|
||||
const thread = threadSlug
|
||||
? await WorkspaceThread.get({ slug: String(threadSlug) })
|
||||
: null;
|
||||
const { files, contextWindow, currentContextTokenCount } =
|
||||
await WorkspaceParsedFiles.getContextMetadataAndLimits(
|
||||
workspace,
|
||||
thread || null,
|
||||
multiUserMode(response) ? user : null
|
||||
);
|
||||
|
||||
return response
|
||||
.status(200)
|
||||
.json({ files, contextWindow, currentContextTokenCount });
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
return response.sendStatus(500).end();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
app.delete(
|
||||
"/workspace/:slug/delete-parsed-files",
|
||||
[validatedRequest, flexUserRoleValid([ROLES.all]), validWorkspaceSlug],
|
||||
async function (request, response) {
|
||||
try {
|
||||
const { fileIds = [] } = reqBody(request);
|
||||
if (!fileIds.length) return response.sendStatus(400).end();
|
||||
const success = await WorkspaceParsedFiles.delete({
|
||||
id: { in: fileIds.map((id) => parseInt(id)) },
|
||||
});
|
||||
return response.status(success ? 200 : 500).end();
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
return response.sendStatus(500).end();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
app.post(
|
||||
"/workspace/:slug/embed-parsed-file/:fileId",
|
||||
[
|
||||
validatedRequest,
|
||||
// Embed is still an admin/manager only feature
|
||||
flexUserRoleValid([ROLES.admin, ROLES.manager]),
|
||||
validWorkspaceSlug,
|
||||
],
|
||||
async function (request, response) {
|
||||
const { fileId = null } = request.params;
|
||||
try {
|
||||
const user = await userFromSession(request, response);
|
||||
const workspace = response.locals.workspace;
|
||||
|
||||
if (!fileId) return response.sendStatus(400).end();
|
||||
const { success, error, document } =
|
||||
await WorkspaceParsedFiles.moveToDocumentsAndEmbed(fileId, workspace);
|
||||
|
||||
if (!success) {
|
||||
return response.status(500).json({
|
||||
success: false,
|
||||
error: error || "Failed to embed file",
|
||||
});
|
||||
}
|
||||
|
||||
await Telemetry.sendTelemetry("document_embedded");
|
||||
await EventLogs.logEvent(
|
||||
"document_embedded",
|
||||
{
|
||||
documentName: document?.name || "unknown",
|
||||
workspaceId: workspace.id,
|
||||
},
|
||||
user?.id
|
||||
);
|
||||
|
||||
return response.status(200).json({
|
||||
success: true,
|
||||
error: null,
|
||||
document,
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
return response.sendStatus(500).end();
|
||||
} finally {
|
||||
if (!fileId) return;
|
||||
await WorkspaceParsedFiles.delete({ id: parseInt(fileId) });
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
app.post(
|
||||
"/workspace/:slug/parse",
|
||||
[
|
||||
validatedRequest,
|
||||
flexUserRoleValid([ROLES.all]),
|
||||
handleFileUpload,
|
||||
validWorkspaceSlug,
|
||||
],
|
||||
async function (request, response) {
|
||||
try {
|
||||
const user = await userFromSession(request, response);
|
||||
const workspace = response.locals.workspace;
|
||||
const Collector = new CollectorApi();
|
||||
const { originalname } = request.file;
|
||||
const processingOnline = await Collector.online();
|
||||
|
||||
if (!processingOnline) {
|
||||
return response.status(500).json({
|
||||
success: false,
|
||||
error: `Document processing API is not online. Document ${originalname} will not be parsed.`,
|
||||
});
|
||||
}
|
||||
|
||||
const { success, reason, documents } =
|
||||
await Collector.parseDocument(originalname);
|
||||
if (!success || !documents?.[0]) {
|
||||
return response.status(500).json({
|
||||
success: false,
|
||||
error: reason || "No document returned from collector",
|
||||
});
|
||||
}
|
||||
|
||||
// Get thread ID if we have a slug
|
||||
const { threadSlug = null } = reqBody(request);
|
||||
const thread = threadSlug
|
||||
? await WorkspaceThread.get({
|
||||
slug: String(threadSlug),
|
||||
workspace_id: workspace.id,
|
||||
user_id: user?.id || null,
|
||||
})
|
||||
: null;
|
||||
const files = await Promise.all(
|
||||
documents.map(async (doc) => {
|
||||
const metadata = { ...doc };
|
||||
// Strip out pageContent
|
||||
delete metadata.pageContent;
|
||||
const filename = `${originalname}-${doc.id}.json`;
|
||||
const { file, error: dbError } = await WorkspaceParsedFiles.create({
|
||||
filename,
|
||||
workspaceId: workspace.id,
|
||||
userId: user?.id || null,
|
||||
threadId: thread?.id || null,
|
||||
metadata: JSON.stringify(metadata),
|
||||
tokenCountEstimate: doc.token_count_estimate || 0,
|
||||
});
|
||||
|
||||
if (dbError) throw new Error(dbError);
|
||||
return file;
|
||||
})
|
||||
);
|
||||
|
||||
Collector.log(`Document ${originalname} parsed successfully.`);
|
||||
await EventLogs.logEvent(
|
||||
"document_uploaded_to_chat",
|
||||
{
|
||||
documentName: originalname,
|
||||
workspace: workspace.slug,
|
||||
thread: thread?.name || null,
|
||||
},
|
||||
user?.id
|
||||
);
|
||||
|
||||
return response.status(200).json({
|
||||
success: true,
|
||||
error: null,
|
||||
files,
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
return response.sendStatus(500).end();
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
module.exports = { workspaceParsedFilesEndpoints };
|
||||
64
server/jobs/cleanup-orphan-documents.js
Normal file
64
server/jobs/cleanup-orphan-documents.js
Normal file
@ -0,0 +1,64 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { log, conclude } = require('./helpers/index.js');
|
||||
const { WorkspaceParsedFiles } = require('../models/workspaceParsedFiles.js');
|
||||
const { directUploadsPath } = require('../utils/files');
|
||||
|
||||
async function batchDeleteFiles(filesToDelete, batchSize = 500) {
|
||||
let deletedCount = 0;
|
||||
let failedCount = 0;
|
||||
|
||||
for (let i = 0; i < filesToDelete.length; i += batchSize) {
|
||||
const batch = filesToDelete.slice(i, i + batchSize);
|
||||
|
||||
try {
|
||||
await Promise.all(batch.map(filePath => fs.unlink(filePath)));
|
||||
deletedCount += batch.length;
|
||||
|
||||
log(`Deleted batch ${Math.floor(i / batchSize) + 1}: ${batch.length} files`);
|
||||
} catch (err) {
|
||||
// If batch fails, try individual files sync
|
||||
for (const filePath of batch) {
|
||||
try {
|
||||
fs.unlinkSync(filePath);
|
||||
deletedCount++;
|
||||
} catch (fileErr) {
|
||||
failedCount++;
|
||||
log(`Failed to delete ${filePath}: ${fileErr.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { deletedCount, failedCount };
|
||||
}
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
const filesToDelete = [];
|
||||
const knownFiles = await WorkspaceParsedFiles
|
||||
.where({}, null, null, { filename: true })
|
||||
.then(files => new Set(files.map(f => f.filename)));
|
||||
|
||||
if (!fs.existsSync(directUploadsPath)) return log('No direct uploads path found - exiting.');
|
||||
const filesInDirectUploadsPath = fs.readdirSync(directUploadsPath);
|
||||
if (filesInDirectUploadsPath.length === 0) return;
|
||||
|
||||
for (let i = 0; i < filesInDirectUploadsPath.length; i++) {
|
||||
const file = filesInDirectUploadsPath[i];
|
||||
if (knownFiles.has(file)) continue;
|
||||
filesToDelete.push(path.resolve(directUploadsPath, file));
|
||||
}
|
||||
|
||||
if (filesToDelete.length === 0) return; // No orphaned files to delete
|
||||
log(`Found ${filesToDelete.length} orphaned files to delete`);
|
||||
const { deletedCount, failedCount } = await batchDeleteFiles(filesToDelete);
|
||||
log(`Deleted ${deletedCount} orphaned files`);
|
||||
if (failedCount > 0) log(`Failed to delete ${failedCount} files`);
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
log(`errored with ${e.message}`)
|
||||
} finally {
|
||||
conclude();
|
||||
}
|
||||
})();
|
||||
@ -27,6 +27,7 @@ const Telemetry = {
|
||||
documents_embedded_in_workspace: 30,
|
||||
link_uploaded: 30,
|
||||
raw_document_uploaded: 30,
|
||||
document_parsed: 30,
|
||||
},
|
||||
|
||||
id: async function () {
|
||||
|
||||
@ -283,6 +283,10 @@ const Workspace = {
|
||||
return {
|
||||
...workspace,
|
||||
documents: await Document.forWorkspace(workspace.id),
|
||||
contextWindow: this._getContextWindow(workspace),
|
||||
currentContextTokenCount: await this._getCurrentContextTokenCount(
|
||||
workspace.id
|
||||
),
|
||||
};
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
@ -290,6 +294,42 @@ const Workspace = {
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Get the total token count of all parsed files in a workspace/thread
|
||||
* @param {number} workspaceId - The ID of the workspace
|
||||
* @param {number|null} threadId - Optional thread ID to filter by
|
||||
* @returns {Promise<number>} Total token count of all files
|
||||
* @private
|
||||
*/
|
||||
async _getCurrentContextTokenCount(workspaceId, threadId = null) {
|
||||
const { WorkspaceParsedFiles } = require("./workspaceParsedFiles");
|
||||
return await WorkspaceParsedFiles.totalTokenCount({
|
||||
workspaceId: Number(workspaceId),
|
||||
threadId: threadId ? Number(threadId) : null,
|
||||
});
|
||||
},
|
||||
|
||||
/**
|
||||
* Get the context window size for a workspace based on its provider and model settings.
|
||||
* If the workspace has no provider/model set, falls back to system defaults.
|
||||
* @param {Workspace} workspace - The workspace to get context window for
|
||||
* @returns {number|null} The context window size in tokens (defaults to null if no provider/model found)
|
||||
* @private
|
||||
*/
|
||||
_getContextWindow: function (workspace) {
|
||||
const {
|
||||
getLLMProviderClass,
|
||||
getBaseLLMProviderModel,
|
||||
} = require("../utils/helpers");
|
||||
const provider = workspace.chatProvider || process.env.LLM_PROVIDER || null;
|
||||
const LLMProvider = getLLMProviderClass({ provider });
|
||||
const model =
|
||||
workspace.chatModel || getBaseLLMProviderModel({ provider }) || null;
|
||||
|
||||
if (!provider || !model) return null;
|
||||
return LLMProvider?.promptWindowLimit?.(model) || null;
|
||||
},
|
||||
|
||||
get: async function (clause = {}) {
|
||||
try {
|
||||
const workspace = await prisma.workspaces.findFirst({
|
||||
@ -299,7 +339,14 @@ const Workspace = {
|
||||
},
|
||||
});
|
||||
|
||||
return workspace || null;
|
||||
if (!workspace) return null;
|
||||
return {
|
||||
...workspace,
|
||||
contextWindow: this._getContextWindow(workspace),
|
||||
currentContextTokenCount: await this._getCurrentContextTokenCount(
|
||||
workspace.id
|
||||
),
|
||||
};
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
return null;
|
||||
|
||||
227
server/models/workspaceParsedFiles.js
Normal file
227
server/models/workspaceParsedFiles.js
Normal file
@ -0,0 +1,227 @@
|
||||
const prisma = require("../utils/prisma");
|
||||
const { EventLogs } = require("./eventLogs");
|
||||
const { Document } = require("./documents");
|
||||
const { documentsPath, directUploadsPath } = require("../utils/files");
|
||||
const { safeJsonParse } = require("../utils/http");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
const WorkspaceParsedFiles = {
|
||||
create: async function ({
|
||||
filename,
|
||||
workspaceId,
|
||||
userId = null,
|
||||
threadId = null,
|
||||
metadata = null,
|
||||
tokenCountEstimate = 0,
|
||||
}) {
|
||||
try {
|
||||
const file = await prisma.workspace_parsed_files.create({
|
||||
data: {
|
||||
filename,
|
||||
workspaceId: parseInt(workspaceId),
|
||||
userId: userId ? parseInt(userId) : null,
|
||||
threadId: threadId ? parseInt(threadId) : null,
|
||||
metadata,
|
||||
tokenCountEstimate,
|
||||
},
|
||||
});
|
||||
|
||||
await EventLogs.logEvent(
|
||||
"workspace_file_uploaded",
|
||||
{
|
||||
filename,
|
||||
workspaceId,
|
||||
},
|
||||
userId
|
||||
);
|
||||
|
||||
return { file, error: null };
|
||||
} catch (error) {
|
||||
console.error("FAILED TO CREATE PARSED FILE RECORD.", error.message);
|
||||
return { file: null, error: error.message };
|
||||
}
|
||||
},
|
||||
|
||||
get: async function (clause = {}) {
|
||||
try {
|
||||
const file = await prisma.workspace_parsed_files.findFirst({
|
||||
where: clause,
|
||||
});
|
||||
return file;
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
return null;
|
||||
}
|
||||
},
|
||||
|
||||
where: async function (
|
||||
clause = {},
|
||||
limit = null,
|
||||
orderBy = null,
|
||||
select = null
|
||||
) {
|
||||
try {
|
||||
const files = await prisma.workspace_parsed_files.findMany({
|
||||
where: clause,
|
||||
...(limit !== null ? { take: limit } : {}),
|
||||
...(orderBy !== null ? { orderBy } : {}),
|
||||
...(select !== null ? { select } : {}),
|
||||
});
|
||||
return files;
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
return [];
|
||||
}
|
||||
},
|
||||
|
||||
delete: async function (clause = {}) {
|
||||
try {
|
||||
await prisma.workspace_parsed_files.deleteMany({
|
||||
where: clause,
|
||||
});
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
return false;
|
||||
}
|
||||
},
|
||||
|
||||
totalTokenCount: async function (clause = {}) {
|
||||
const { _sum } = await prisma.workspace_parsed_files.aggregate({
|
||||
where: clause,
|
||||
_sum: { tokenCountEstimate: true },
|
||||
});
|
||||
return _sum.tokenCountEstimate || 0;
|
||||
},
|
||||
|
||||
moveToDocumentsAndEmbed: async function (fileId, workspace) {
|
||||
try {
|
||||
const parsedFile = await this.get({ id: parseInt(fileId) });
|
||||
if (!parsedFile) throw new Error("File not found");
|
||||
|
||||
// Get file location from metadata
|
||||
const metadata = safeJsonParse(parsedFile.metadata, {});
|
||||
const location = metadata.location;
|
||||
if (!location) throw new Error("No file location in metadata");
|
||||
|
||||
// Get file from metadata location
|
||||
const sourceFile = path.join(directUploadsPath, location.split("/")[1]);
|
||||
if (!fs.existsSync(sourceFile)) throw new Error("Source file not found");
|
||||
|
||||
// Move to custom-documents
|
||||
const customDocsPath = path.join(documentsPath, "custom-documents");
|
||||
if (!fs.existsSync(customDocsPath))
|
||||
fs.mkdirSync(customDocsPath, { recursive: true });
|
||||
|
||||
// Copy the file to custom-documents
|
||||
const targetPath = path.join(customDocsPath, location.split("/")[1]);
|
||||
fs.copyFileSync(sourceFile, targetPath);
|
||||
fs.unlinkSync(sourceFile);
|
||||
|
||||
const {
|
||||
failedToEmbed = [],
|
||||
errors = [],
|
||||
embedded = [],
|
||||
} = await Document.addDocuments(
|
||||
workspace,
|
||||
[`custom-documents/${location.split("/")[1]}`],
|
||||
parsedFile.userId
|
||||
);
|
||||
|
||||
if (failedToEmbed.length > 0)
|
||||
throw new Error(errors[0] || "Failed to embed document");
|
||||
|
||||
const document = await Document.get({
|
||||
workspaceId: workspace.id,
|
||||
docpath: embedded[0],
|
||||
});
|
||||
return { success: true, error: null, document };
|
||||
} catch (error) {
|
||||
console.error("Failed to move and embed file:", error);
|
||||
return { success: false, error: error.message, document: null };
|
||||
} finally {
|
||||
// Always delete the file after processing
|
||||
await this.delete({ id: parseInt(fileId) });
|
||||
}
|
||||
},
|
||||
|
||||
getContextMetadataAndLimits: async function (
|
||||
workspace,
|
||||
thread = null,
|
||||
user = null
|
||||
) {
|
||||
try {
|
||||
if (!workspace) throw new Error("Workspace is required");
|
||||
const files = await this.where({
|
||||
workspaceId: workspace.id,
|
||||
threadId: thread?.id || null,
|
||||
...(user ? { userId: user.id } : {}),
|
||||
});
|
||||
|
||||
const results = [];
|
||||
let totalTokens = 0;
|
||||
|
||||
for (const file of files) {
|
||||
const metadata = safeJsonParse(file.metadata, {});
|
||||
totalTokens += file.tokenCountEstimate || 0;
|
||||
results.push({
|
||||
id: file.id,
|
||||
title: metadata.title || metadata.location,
|
||||
location: metadata.location,
|
||||
token_count_estimate: file.tokenCountEstimate,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
files: results,
|
||||
contextWindow: workspace.contextWindow,
|
||||
currentContextTokenCount: totalTokens,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error("Failed to get context metadata:", error);
|
||||
return {
|
||||
files: [],
|
||||
contextWindow: Infinity,
|
||||
currentContextTokenCount: 0,
|
||||
};
|
||||
}
|
||||
},
|
||||
|
||||
getContextFiles: async function (workspace, thread = null, user = null) {
|
||||
try {
|
||||
const files = await this.where({
|
||||
workspaceId: workspace.id,
|
||||
threadId: thread?.id || null,
|
||||
...(user ? { userId: user.id } : {}),
|
||||
});
|
||||
|
||||
const results = [];
|
||||
for (const file of files) {
|
||||
const metadata = safeJsonParse(file.metadata, {});
|
||||
const location = metadata.location;
|
||||
if (!location) continue;
|
||||
|
||||
const sourceFile = path.join(directUploadsPath, location.split("/")[1]);
|
||||
if (!fs.existsSync(sourceFile)) continue;
|
||||
|
||||
const content = fs.readFileSync(sourceFile, "utf-8");
|
||||
const data = safeJsonParse(content, null);
|
||||
if (!data?.pageContent) continue;
|
||||
|
||||
results.push({
|
||||
pageContent: data.pageContent,
|
||||
token_count_estimate: file.tokenCountEstimate,
|
||||
...metadata,
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
} catch (error) {
|
||||
console.error("Failed to get context files:", error);
|
||||
return [];
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
module.exports = { WorkspaceParsedFiles };
|
||||
23
server/prisma/migrations/20250808171557_init/migration.sql
Normal file
23
server/prisma/migrations/20250808171557_init/migration.sql
Normal file
@ -0,0 +1,23 @@
|
||||
-- CreateTable
|
||||
CREATE TABLE "workspace_parsed_files" (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"filename" TEXT NOT NULL,
|
||||
"workspaceId" INTEGER NOT NULL,
|
||||
"userId" INTEGER,
|
||||
"threadId" INTEGER,
|
||||
"metadata" TEXT,
|
||||
"tokenCountEstimate" INTEGER DEFAULT 0,
|
||||
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT "workspace_parsed_files_workspaceId_fkey" FOREIGN KEY ("workspaceId") REFERENCES "workspaces" ("id") ON DELETE CASCADE ON UPDATE CASCADE,
|
||||
CONSTRAINT "workspace_parsed_files_userId_fkey" FOREIGN KEY ("userId") REFERENCES "users" ("id") ON DELETE CASCADE ON UPDATE CASCADE,
|
||||
CONSTRAINT "workspace_parsed_files_threadId_fkey" FOREIGN KEY ("threadId") REFERENCES "workspace_threads" ("id") ON DELETE CASCADE ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "workspace_parsed_files_filename_key" ON "workspace_parsed_files"("filename");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "workspace_parsed_files_workspaceId_idx" ON "workspace_parsed_files"("workspaceId");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "workspace_parsed_files_userId_idx" ON "workspace_parsed_files"("userId");
|
||||
@ -83,6 +83,7 @@ model users {
|
||||
system_prompt_variables system_prompt_variables[]
|
||||
prompt_history prompt_history[]
|
||||
desktop_mobile_devices desktop_mobile_devices[]
|
||||
workspace_parsed_files workspace_parsed_files[]
|
||||
}
|
||||
|
||||
model recovery_codes {
|
||||
@ -149,6 +150,7 @@ model workspaces {
|
||||
threads workspace_threads[]
|
||||
workspace_agent_invocations workspace_agent_invocations[]
|
||||
prompt_history prompt_history[]
|
||||
workspace_parsed_files workspace_parsed_files[]
|
||||
}
|
||||
|
||||
model workspace_threads {
|
||||
@ -161,6 +163,7 @@ model workspace_threads {
|
||||
lastUpdatedAt DateTime @default(now())
|
||||
workspace workspaces @relation(fields: [workspace_id], references: [id], onDelete: Cascade)
|
||||
user users? @relation(fields: [user_id], references: [id], onDelete: Cascade)
|
||||
workspace_parsed_files workspace_parsed_files[]
|
||||
|
||||
@@index([workspace_id])
|
||||
@@index([user_id])
|
||||
@ -371,3 +374,20 @@ model desktop_mobile_devices {
|
||||
|
||||
@@index([userId])
|
||||
}
|
||||
|
||||
model workspace_parsed_files {
|
||||
id Int @id @default(autoincrement())
|
||||
filename String @unique
|
||||
workspaceId Int
|
||||
userId Int?
|
||||
threadId Int?
|
||||
metadata String?
|
||||
tokenCountEstimate Int? @default(0)
|
||||
createdAt DateTime @default(now())
|
||||
workspace workspaces @relation(fields: [workspaceId], references: [id], onDelete: Cascade)
|
||||
user users? @relation(fields: [userId], references: [id], onDelete: Cascade)
|
||||
thread workspace_threads? @relation(fields: [threadId], references: [id], onDelete: Cascade)
|
||||
|
||||
@@index([workspaceId])
|
||||
@@index([userId])
|
||||
}
|
||||
|
||||
@ -6,8 +6,26 @@ const setLogger = require("../logger");
|
||||
class BackgroundService {
|
||||
name = "BackgroundWorkerService";
|
||||
static _instance = null;
|
||||
documentSyncEnabled = false;
|
||||
#root = path.resolve(__dirname, "../../jobs");
|
||||
|
||||
#alwaysRunJobs = [
|
||||
{
|
||||
name: "cleanup-orphan-documents",
|
||||
timeout: "1m",
|
||||
interval: "12hr",
|
||||
},
|
||||
];
|
||||
|
||||
#documentSyncJobs = [
|
||||
// Job for auto-sync of documents
|
||||
// https://github.com/breejs/bree
|
||||
{
|
||||
name: "sync-watched-documents",
|
||||
interval: "1hr",
|
||||
},
|
||||
];
|
||||
|
||||
constructor() {
|
||||
if (BackgroundService._instance) {
|
||||
this.#log("SINGLETON LOCK: Using existing BackgroundService.");
|
||||
@ -24,16 +42,14 @@ class BackgroundService {
|
||||
|
||||
async boot() {
|
||||
const { DocumentSyncQueue } = require("../../models/documentSyncQueue");
|
||||
if (!(await DocumentSyncQueue.enabled())) {
|
||||
this.#log("Feature is not enabled and will not be started.");
|
||||
return;
|
||||
}
|
||||
this.documentSyncEnabled = await DocumentSyncQueue.enabled();
|
||||
const jobsToRun = this.jobs();
|
||||
|
||||
this.#log("Starting...");
|
||||
this.bree = new Bree({
|
||||
logger: this.logger,
|
||||
root: this.#root,
|
||||
jobs: this.jobs(),
|
||||
jobs: jobsToRun,
|
||||
errorHandler: this.onError,
|
||||
workerMessageHandler: this.onWorkerMessageHandler,
|
||||
runJobsAs: "process",
|
||||
@ -41,7 +57,10 @@ class BackgroundService {
|
||||
this.graceful = new Graceful({ brees: [this.bree], logger: this.logger });
|
||||
this.graceful.listen();
|
||||
this.bree.start();
|
||||
this.#log("Service started");
|
||||
this.#log(
|
||||
`Service started with ${jobsToRun.length} jobs`,
|
||||
jobsToRun.map((j) => j.name)
|
||||
);
|
||||
}
|
||||
|
||||
async stop() {
|
||||
@ -54,14 +73,9 @@ class BackgroundService {
|
||||
|
||||
/** @returns {import("@mintplex-labs/bree").Job[]} */
|
||||
jobs() {
|
||||
return [
|
||||
// Job for auto-sync of documents
|
||||
// https://github.com/breejs/bree
|
||||
{
|
||||
name: "sync-watched-documents",
|
||||
interval: "1hr",
|
||||
},
|
||||
];
|
||||
const activeJobs = [...this.#alwaysRunJobs];
|
||||
if (this.documentSyncEnabled) activeJobs.push(...this.#documentSyncJobs);
|
||||
return activeJobs;
|
||||
}
|
||||
|
||||
onError(error, _workerMetadata) {
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
const { v4: uuidv4 } = require("uuid");
|
||||
const { DocumentManager } = require("../DocumentManager");
|
||||
const { WorkspaceChats } = require("../../models/workspaceChats");
|
||||
const { WorkspaceParsedFiles } = require("../../models/workspaceParsedFiles");
|
||||
const { getVectorDbClass, getLLMProvider } = require("../helpers");
|
||||
const { writeResponseChunk } = require("../helpers/chat/responses");
|
||||
const { grepAgents } = require("./agents");
|
||||
@ -130,6 +131,22 @@ async function streamChatWithWorkspace(
|
||||
});
|
||||
});
|
||||
|
||||
// Inject any parsed files for this workspace/thread/user
|
||||
const parsedFiles = await WorkspaceParsedFiles.getContextFiles(
|
||||
workspace,
|
||||
thread || null,
|
||||
user || null
|
||||
);
|
||||
parsedFiles.forEach((doc) => {
|
||||
const { pageContent, ...metadata } = doc;
|
||||
contextTexts.push(doc.pageContent);
|
||||
sources.push({
|
||||
text:
|
||||
pageContent.slice(0, 1_000) + "...continued on in source document...",
|
||||
...metadata,
|
||||
});
|
||||
});
|
||||
|
||||
const vectorSearchResults =
|
||||
embeddingsCount !== 0
|
||||
? await VectorDb.performSimilaritySearch({
|
||||
|
||||
@ -232,6 +232,42 @@ class CollectorApi {
|
||||
return { success: false, content: null };
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a document without processing it
|
||||
* - Will append the options to the request body
|
||||
* @param {string} filename - The filename of the document to parse
|
||||
* @returns {Promise<Object>} - The response from the collector API
|
||||
*/
|
||||
async parseDocument(filename = "") {
|
||||
if (!filename) return false;
|
||||
|
||||
const data = JSON.stringify({
|
||||
filename,
|
||||
options: this.#attachOptions(),
|
||||
});
|
||||
|
||||
return await fetch(`${this.endpoint}/parse`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"X-Integrity": this.comkey.sign(data),
|
||||
"X-Payload-Signer": this.comkey.encrypt(
|
||||
new EncryptionManager().xPayload
|
||||
),
|
||||
},
|
||||
body: data,
|
||||
})
|
||||
.then((res) => {
|
||||
if (!res.ok) throw new Error("Response could not be completed");
|
||||
return res.json();
|
||||
})
|
||||
.then((res) => res)
|
||||
.catch((e) => {
|
||||
this.log(e.message);
|
||||
return { success: false, reason: e.message, documents: [] };
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
module.exports.CollectorApi = CollectorApi;
|
||||
|
||||
@ -7,6 +7,10 @@ const documentsPath =
|
||||
process.env.NODE_ENV === "development"
|
||||
? path.resolve(__dirname, `../../storage/documents`)
|
||||
: path.resolve(process.env.STORAGE_DIR, `documents`);
|
||||
const directUploadsPath =
|
||||
process.env.NODE_ENV === "development"
|
||||
? path.resolve(__dirname, `../../storage/direct-uploads`)
|
||||
: path.resolve(process.env.STORAGE_DIR, `direct-uploads`);
|
||||
const vectorCachePath =
|
||||
process.env.NODE_ENV === "development"
|
||||
? path.resolve(__dirname, `../../storage/vector-cache`)
|
||||
@ -468,6 +472,7 @@ module.exports = {
|
||||
normalizePath,
|
||||
isWithin,
|
||||
documentsPath,
|
||||
directUploadsPath,
|
||||
hasVectorCachedFiles,
|
||||
purgeEntireVectorCache,
|
||||
getDocumentsByFolder,
|
||||
|
||||
@ -402,19 +402,19 @@ function getBaseLLMProviderModel({ provider = null } = {}) {
|
||||
case "koboldcpp":
|
||||
return process.env.KOBOLD_CPP_MODEL_PREF;
|
||||
case "textgenwebui":
|
||||
return process.env.TEXT_GEN_WEB_UI_API_KEY;
|
||||
return null;
|
||||
case "cohere":
|
||||
return process.env.COHERE_MODEL_PREF;
|
||||
case "litellm":
|
||||
return process.env.LITE_LLM_MODEL_PREF;
|
||||
case "generic-openai":
|
||||
return process.env.GENERIC_OPEN_AI_EMBEDDING_API_KEY;
|
||||
return process.env.GENERIC_OPEN_AI_MODEL_PREF;
|
||||
case "bedrock":
|
||||
return process.env.AWS_BEDROCK_LLM_MODEL_PREFERENCE;
|
||||
case "deepseek":
|
||||
return process.env.DEEPSEEK_MODEL_PREF;
|
||||
case "apipie":
|
||||
return process.env.APIPIE_LLM_API_KEY;
|
||||
return process.env.APIPIE_LLM_MODEL_PREF;
|
||||
case "novita":
|
||||
return process.env.NOVITA_LLM_MODEL_PREF;
|
||||
case "xai":
|
||||
@ -422,7 +422,7 @@ function getBaseLLMProviderModel({ provider = null } = {}) {
|
||||
case "nvidia-nim":
|
||||
return process.env.NVIDIA_NIM_LLM_MODEL_PREF;
|
||||
case "ppio":
|
||||
return process.env.PPIO_API_KEY;
|
||||
return process.env.PPIO_MODEL_PREF;
|
||||
case "dpais":
|
||||
return process.env.DPAIS_LLM_MODEL_PREF;
|
||||
case "moonshotai":
|
||||
|
||||
Loading…
Reference in New Issue
Block a user