feat: Add endpoint to retrieve documents by folder name (#3258)
* feat: Add endpoint to retrieve documents by folder name * isWithin Check on path to prevent path traversal --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
parent
354b66e09e
commit
eeaa6b0151
@ -4,6 +4,7 @@ const { handleAPIFileUpload } = require("../../../utils/files/multer");
|
||||
const {
|
||||
viewLocalFiles,
|
||||
findDocumentInDocuments,
|
||||
getDocumentsByFolder,
|
||||
normalizePath,
|
||||
isWithin,
|
||||
} = require("../../../utils/files");
|
||||
@ -395,6 +396,59 @@ function apiDocumentEndpoints(app) {
|
||||
}
|
||||
});
|
||||
|
||||
app.get(
|
||||
"/v1/documents/folder/:folderName",
|
||||
[validApiKey],
|
||||
async (request, response) => {
|
||||
/*
|
||||
#swagger.tags = ['Documents']
|
||||
#swagger.description = 'Get all documents stored in a specific folder.'
|
||||
#swagger.parameters['folderName'] = {
|
||||
in: 'path',
|
||||
description: 'Name of the folder to retrieve documents from',
|
||||
required: true,
|
||||
type: 'string'
|
||||
}
|
||||
#swagger.responses[200] = {
|
||||
content: {
|
||||
"application/json": {
|
||||
schema: {
|
||||
type: 'object',
|
||||
example: {
|
||||
folder: "custom-documents",
|
||||
documents: [
|
||||
{
|
||||
name: "document1.json",
|
||||
type: "file",
|
||||
cached: false,
|
||||
pinnedWorkspaces: [],
|
||||
watched: false,
|
||||
// ... other document metadata
|
||||
},
|
||||
// more documents
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#swagger.responses[403] = {
|
||||
schema: {
|
||||
"$ref": "#/definitions/InvalidAPIKey"
|
||||
}
|
||||
}
|
||||
*/
|
||||
try {
|
||||
const { folderName } = request.params;
|
||||
const result = await getDocumentsByFolder(folderName);
|
||||
response.status(200).json(result);
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
response.sendStatus(500).end();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
app.get(
|
||||
"/v1/document/accepted-file-types",
|
||||
[validApiKey],
|
||||
|
||||
@ -1124,6 +1124,30 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/documents/folder/{folderName}": {
|
||||
"get": {
|
||||
"tags": [
|
||||
"Documents"
|
||||
],
|
||||
"description": "Get all documents stored in a specific folder.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "folderName",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Name of the folder to retrieve documents from"
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"403": {
|
||||
"description": "Forbidden"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/document/accepted-file-types": {
|
||||
"get": {
|
||||
"tags": [
|
||||
|
||||
@ -91,6 +91,50 @@ async function viewLocalFiles() {
|
||||
return directory;
|
||||
}
|
||||
|
||||
async function getDocumentsByFolder(folderName = "") {
|
||||
if (!folderName) throw new Error("Folder name must be provided.");
|
||||
const folderPath = path.resolve(documentsPath, normalizePath(folderName));
|
||||
if (
|
||||
!isWithin(documentsPath, folderPath) ||
|
||||
!fs.existsSync(folderPath) ||
|
||||
!fs.lstatSync(folderPath).isDirectory()
|
||||
)
|
||||
throw new Error(`Folder "${folderName}" does not exist.`);
|
||||
|
||||
const documents = [];
|
||||
const filenames = {};
|
||||
const files = fs.readdirSync(folderPath);
|
||||
for (const file of files) {
|
||||
if (path.extname(file) !== ".json") continue;
|
||||
const filePath = path.join(folderPath, file);
|
||||
const rawData = fs.readFileSync(filePath, "utf8");
|
||||
const cachefilename = `${folderName}/${file}`;
|
||||
const { pageContent, ...metadata } = JSON.parse(rawData);
|
||||
documents.push({
|
||||
name: file,
|
||||
type: "file",
|
||||
...metadata,
|
||||
cached: await cachedVectorInformation(cachefilename, true),
|
||||
});
|
||||
filenames[cachefilename] = file;
|
||||
}
|
||||
|
||||
// Get pinned and watched information for each document in the folder
|
||||
const pinnedWorkspacesByDocument =
|
||||
await getPinnedWorkspacesByDocument(filenames);
|
||||
const watchedDocumentsFilenames =
|
||||
await getWatchedDocumentFilenames(filenames);
|
||||
for (let doc of documents) {
|
||||
doc.pinnedWorkspaces = pinnedWorkspacesByDocument[doc.name] || [];
|
||||
doc.watched = Object.prototype.hasOwnProperty.call(
|
||||
watchedDocumentsFilenames,
|
||||
doc.name
|
||||
);
|
||||
}
|
||||
|
||||
return { folder: folderName, documents };
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches the vector-cache folder for existing information so we dont have to re-embed a
|
||||
* document and can instead push directly to vector db.
|
||||
@ -304,4 +348,5 @@ module.exports = {
|
||||
documentsPath,
|
||||
hasVectorCachedFiles,
|
||||
purgeEntireVectorCache,
|
||||
getDocumentsByFolder,
|
||||
};
|
||||
|
||||
Loading…
Reference in New Issue
Block a user