feat: Add endpoint to upload documents to a specified folder (#3276)
* feat: Add endpoint to retrieve documents by folder name * isWithin Check on path to prevent path traversal * feat: Add endpoint to upload documents to a specified folder * refactor upload to folder endpoint + update jsdoc for swagger * linting --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
This commit is contained in:
parent
2f5ed6c705
commit
d005107e24
@ -36,6 +36,7 @@ function apiDocumentEndpoints(app) {
|
||||
"multipart/form-data": {
|
||||
schema: {
|
||||
type: 'object',
|
||||
required: ['file'],
|
||||
properties: {
|
||||
file: {
|
||||
type: 'string',
|
||||
@ -66,7 +67,7 @@ function apiDocumentEndpoints(app) {
|
||||
"description": "Unknown",
|
||||
"docSource": "a text file uploaded by the user.",
|
||||
"chunkSource": "anythingllm.txt",
|
||||
"published": "1/16/2024, 3:07:00 PM",
|
||||
"published": "1/16/2024, 3:07:00 PM",
|
||||
"wordCount": 93,
|
||||
"token_count_estimate": 115,
|
||||
}
|
||||
@ -123,6 +124,167 @@ function apiDocumentEndpoints(app) {
|
||||
}
|
||||
);
|
||||
|
||||
app.post(
|
||||
"/v1/document/upload/:folderName",
|
||||
[validApiKey, handleAPIFileUpload],
|
||||
async (request, response) => {
|
||||
/*
|
||||
#swagger.tags = ['Documents']
|
||||
#swagger.description = 'Upload a new file to a specific folder in AnythingLLM to be parsed and prepared for embedding. If the folder does not exist, it will be created.'
|
||||
#swagger.parameters['folderName'] = {
|
||||
in: 'path',
|
||||
description: 'Target folder path (defaults to "custom-documents" if not provided)',
|
||||
required: true,
|
||||
type: 'string',
|
||||
example: 'my-folder'
|
||||
}
|
||||
#swagger.requestBody = {
|
||||
description: 'File to be uploaded.',
|
||||
required: true,
|
||||
content: {
|
||||
"multipart/form-data": {
|
||||
schema: {
|
||||
type: 'object',
|
||||
required: ['file'],
|
||||
properties: {
|
||||
file: {
|
||||
type: 'string',
|
||||
format: 'binary',
|
||||
description: 'The file to upload'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#swagger.responses[200] = {
|
||||
content: {
|
||||
"application/json": {
|
||||
schema: {
|
||||
type: 'object',
|
||||
example: {
|
||||
success: true,
|
||||
error: null,
|
||||
documents: [
|
||||
{
|
||||
"location": "custom-documents/anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
|
||||
"name": "anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
|
||||
"url": "file:///Users/tim/Documents/anything-llm/collector/hotdir/anythingllm.txt",
|
||||
"title": "anythingllm.txt",
|
||||
"docAuthor": "Unknown",
|
||||
"description": "Unknown",
|
||||
"docSource": "a text file uploaded by the user.",
|
||||
"chunkSource": "anythingllm.txt",
|
||||
"published": "1/16/2024, 3:07:00 PM",
|
||||
"wordCount": 93,
|
||||
"token_count_estimate": 115
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#swagger.responses[403] = {
|
||||
schema: {
|
||||
"$ref": "#/definitions/InvalidAPIKey"
|
||||
}
|
||||
}
|
||||
#swagger.responses[500] = {
|
||||
description: "Internal Server Error",
|
||||
content: {
|
||||
"application/json": {
|
||||
schema: {
|
||||
type: 'object',
|
||||
example: {
|
||||
success: false,
|
||||
error: "Document processing API is not online. Document will not be processed automatically."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
try {
|
||||
const { originalname } = request.file;
|
||||
let folder = request.params?.folderName || "custom-documents";
|
||||
folder = normalizePath(folder);
|
||||
const targetFolderPath = path.join(documentsPath, folder);
|
||||
|
||||
if (
|
||||
!isWithin(path.resolve(documentsPath), path.resolve(targetFolderPath))
|
||||
)
|
||||
throw new Error("Invalid folder name");
|
||||
if (!fs.existsSync(targetFolderPath))
|
||||
fs.mkdirSync(targetFolderPath, { recursive: true });
|
||||
|
||||
const Collector = new CollectorApi();
|
||||
const processingOnline = await Collector.online();
|
||||
if (!processingOnline) {
|
||||
response
|
||||
.status(500)
|
||||
.json({
|
||||
success: false,
|
||||
error: `Document processing API is not online. Document ${originalname} will not be processed automatically.`,
|
||||
})
|
||||
.end();
|
||||
return;
|
||||
}
|
||||
|
||||
// Process the uploaded document
|
||||
const { success, reason, documents } =
|
||||
await Collector.processDocument(originalname);
|
||||
if (!success) {
|
||||
response
|
||||
.status(500)
|
||||
.json({ success: false, error: reason, documents })
|
||||
.end();
|
||||
return;
|
||||
}
|
||||
|
||||
// For each processed document, check if it is already in the desired folder.
|
||||
// If not, move it using similar logic as in the move-files endpoint.
|
||||
for (const doc of documents) {
|
||||
const currentFolder = path.dirname(doc.location);
|
||||
if (currentFolder !== folder) {
|
||||
const sourcePath = path.join(
|
||||
documentsPath,
|
||||
normalizePath(doc.location)
|
||||
);
|
||||
const destinationPath = path.join(
|
||||
targetFolderPath,
|
||||
path.basename(doc.location)
|
||||
);
|
||||
|
||||
if (
|
||||
!isWithin(documentsPath, sourcePath) ||
|
||||
!isWithin(documentsPath, destinationPath)
|
||||
)
|
||||
throw new Error("Invalid file location");
|
||||
|
||||
fs.renameSync(sourcePath, destinationPath);
|
||||
doc.location = path.join(folder, path.basename(doc.location));
|
||||
doc.name = path.basename(doc.location);
|
||||
}
|
||||
}
|
||||
|
||||
Collector.log(
|
||||
`Document ${originalname} uploaded, processed, and moved to folder ${folder} successfully.`
|
||||
);
|
||||
|
||||
await Telemetry.sendTelemetry("document_uploaded");
|
||||
await EventLogs.logEvent("api_document_uploaded", {
|
||||
documentName: originalname,
|
||||
folder,
|
||||
});
|
||||
response.status(200).json({ success: true, error: null, documents });
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
response.sendStatus(500).end();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
app.post(
|
||||
"/v1/document/upload-link",
|
||||
[validApiKey],
|
||||
@ -161,7 +323,7 @@ function apiDocumentEndpoints(app) {
|
||||
"description": "No description found.",
|
||||
"docSource": "URL link uploaded by the user.",
|
||||
"chunkSource": "https:anythingllm.com.html",
|
||||
"published": "1/16/2024, 3:46:33 PM",
|
||||
"published": "1/16/2024, 3:46:33 PM",
|
||||
"wordCount": 252,
|
||||
"pageContent": "AnythingLLM is the best....",
|
||||
"token_count_estimate": 447,
|
||||
@ -264,7 +426,7 @@ function apiDocumentEndpoints(app) {
|
||||
"description": "No description found.",
|
||||
"docSource": "My custom description set during upload",
|
||||
"chunkSource": "no chunk source specified",
|
||||
"published": "1/16/2024, 3:46:33 PM",
|
||||
"published": "1/16/2024, 3:46:33 PM",
|
||||
"wordCount": 252,
|
||||
"pageContent": "AnythingLLM is the best....",
|
||||
"token_count_estimate": 447,
|
||||
|
||||
@ -865,7 +865,7 @@
|
||||
"description": "Unknown",
|
||||
"docSource": "a text file uploaded by the user.",
|
||||
"chunkSource": "anythingllm.txt",
|
||||
"published": "1/16/2024, 3:07:00 PM",
|
||||
"published": "1/16/2024, 3:07:00 PM",
|
||||
"wordCount": 93,
|
||||
"token_count_estimate": 115
|
||||
}
|
||||
@ -901,16 +901,115 @@
|
||||
"multipart/form-data": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"file"
|
||||
],
|
||||
"properties": {
|
||||
"file": {
|
||||
"type": "string",
|
||||
"format": "binary",
|
||||
"description": "The file to upload"
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/document/upload/{folderName}": {
|
||||
"post": {
|
||||
"tags": [
|
||||
"Documents"
|
||||
],
|
||||
"description": "Upload a new file to a specific folder in AnythingLLM to be parsed and prepared for embedding. If the folder does not exist, it will be created.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "folderName",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"example": {
|
||||
"success": true,
|
||||
"error": null,
|
||||
"documents": [
|
||||
{
|
||||
"location": "custom-documents/anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
|
||||
"name": "anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
|
||||
"url": "file://Users/tim/Documents/anything-llm/collector/hotdir/anythingllm.txt",
|
||||
"title": "anythingllm.txt",
|
||||
"docAuthor": "Unknown",
|
||||
"description": "Unknown",
|
||||
"docSource": "a text file uploaded by the user.",
|
||||
"chunkSource": "anythingllm.txt",
|
||||
"published": "1/16/2024, 3:07:00 PM",
|
||||
"wordCount": 93,
|
||||
"token_count_estimate": 115
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/InvalidAPIKey"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/InvalidAPIKey"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"example": {
|
||||
"success": false,
|
||||
"error": "Document processing API is not online. Document will not be processed automatically."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"requestBody": {
|
||||
"description": "File to be uploaded.",
|
||||
"required": true,
|
||||
"content": {
|
||||
"multipart/form-data": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"file"
|
||||
]
|
||||
],
|
||||
"properties": {
|
||||
"file": {
|
||||
"type": "string",
|
||||
"format": "binary",
|
||||
"description": "The file to upload"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -943,7 +1042,7 @@
|
||||
"description": "No description found.",
|
||||
"docSource": "URL link uploaded by the user.",
|
||||
"chunkSource": "https:anythingllm.com.html",
|
||||
"published": "1/16/2024, 3:46:33 PM",
|
||||
"published": "1/16/2024, 3:46:33 PM",
|
||||
"wordCount": 252,
|
||||
"pageContent": "AnythingLLM is the best....",
|
||||
"token_count_estimate": 447,
|
||||
@ -1016,7 +1115,7 @@
|
||||
"description": "No description found.",
|
||||
"docSource": "My custom description set during upload",
|
||||
"chunkSource": "no chunk source specified",
|
||||
"published": "1/16/2024, 3:46:33 PM",
|
||||
"published": "1/16/2024, 3:46:33 PM",
|
||||
"wordCount": 252,
|
||||
"pageContent": "AnythingLLM is the best....",
|
||||
"token_count_estimate": 447,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user