feat: Add endpoint to retrieve documents by folder name (#3258)

* feat: Add endpoint to retrieve documents by folder name

* isWithin Check on path to prevent path traversal

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
Shanmukeshwar 2025-02-19 03:31:19 +05:30 committed by GitHub
parent 354b66e09e
commit eeaa6b0151
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 123 additions and 0 deletions

View File

@ -4,6 +4,7 @@ const { handleAPIFileUpload } = require("../../../utils/files/multer");
const {
viewLocalFiles,
findDocumentInDocuments,
getDocumentsByFolder,
normalizePath,
isWithin,
} = require("../../../utils/files");
@ -395,6 +396,59 @@ function apiDocumentEndpoints(app) {
}
});
app.get(
"/v1/documents/folder/:folderName",
[validApiKey],
async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Get all documents stored in a specific folder.'
#swagger.parameters['folderName'] = {
in: 'path',
description: 'Name of the folder to retrieve documents from',
required: true,
type: 'string'
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
folder: "custom-documents",
documents: [
{
name: "document1.json",
type: "file",
cached: false,
pinnedWorkspaces: [],
watched: false,
// ... other document metadata
},
// more documents
]
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
*/
try {
const { folderName } = request.params;
const result = await getDocumentsByFolder(folderName);
response.status(200).json(result);
} catch (e) {
console.error(e.message, e);
response.sendStatus(500).end();
}
}
);
app.get(
"/v1/document/accepted-file-types",
[validApiKey],

View File

@ -1124,6 +1124,30 @@
}
}
},
"/v1/documents/folder/{folderName}": {
"get": {
"tags": [
"Documents"
],
"description": "Get all documents stored in a specific folder.",
"parameters": [
{
"name": "folderName",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "Name of the folder to retrieve documents from"
}
],
"responses": {
"403": {
"description": "Forbidden"
}
}
}
},
"/v1/document/accepted-file-types": {
"get": {
"tags": [

View File

@ -91,6 +91,50 @@ async function viewLocalFiles() {
return directory;
}
async function getDocumentsByFolder(folderName = "") {
if (!folderName) throw new Error("Folder name must be provided.");
const folderPath = path.resolve(documentsPath, normalizePath(folderName));
if (
!isWithin(documentsPath, folderPath) ||
!fs.existsSync(folderPath) ||
!fs.lstatSync(folderPath).isDirectory()
)
throw new Error(`Folder "${folderName}" does not exist.`);
const documents = [];
const filenames = {};
const files = fs.readdirSync(folderPath);
for (const file of files) {
if (path.extname(file) !== ".json") continue;
const filePath = path.join(folderPath, file);
const rawData = fs.readFileSync(filePath, "utf8");
const cachefilename = `${folderName}/${file}`;
const { pageContent, ...metadata } = JSON.parse(rawData);
documents.push({
name: file,
type: "file",
...metadata,
cached: await cachedVectorInformation(cachefilename, true),
});
filenames[cachefilename] = file;
}
// Get pinned and watched information for each document in the folder
const pinnedWorkspacesByDocument =
await getPinnedWorkspacesByDocument(filenames);
const watchedDocumentsFilenames =
await getWatchedDocumentFilenames(filenames);
for (let doc of documents) {
doc.pinnedWorkspaces = pinnedWorkspacesByDocument[doc.name] || [];
doc.watched = Object.prototype.hasOwnProperty.call(
watchedDocumentsFilenames,
doc.name
);
}
return { folder: folderName, documents };
}
/**
* Searches the vector-cache folder for existing information so we dont have to re-embed a
* document and can instead push directly to vector db.
@ -304,4 +348,5 @@ module.exports = {
documentsPath,
hasVectorCachedFiles,
purgeEntireVectorCache,
getDocumentsByFolder,
};