Added metadata parameter to document/upload, document/upload/{folderName}, and document/upload-link (#4342)

* Added the ability to pass in metadata to the /document/upload/{folderName} endpoint

* Added the ability to pass in metadata to the /document/upload-link endpoint

* feat: added metadata to document/upload api endpoint

* simplify optional metadata in document dev api endpoints

* lint

* patch handling of metadata in dev api

* Linting, small comments

---------

Co-authored-by: jstawskigmi <jstawski@getmyinterns.org>
Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
This commit is contained in:
Jonas Stawski 2025-09-17 14:17:29 -04:00 committed by GitHub
parent 9841deb513
commit b8d4cc3454
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 229 additions and 103 deletions

View File

@ -32,7 +32,7 @@ app.post(
"/process",
[verifyPayloadIntegrity],
async function (request, response) {
const { filename, options = {} } = reqBody(request);
const { filename, options = {}, metadata = {} } = reqBody(request);
try {
const targetFilename = path
.normalize(filename)
@ -41,7 +41,7 @@ app.post(
success,
reason,
documents = [],
} = await processSingleFile(targetFilename, options);
} = await processSingleFile(targetFilename, options, metadata);
response
.status(200)
.json({ filename: targetFilename, success, reason, documents });
@ -95,13 +95,13 @@ app.post(
"/process-link",
[verifyPayloadIntegrity],
async function (request, response) {
const { link, scraperHeaders = {} } = reqBody(request);
const { link, scraperHeaders = {}, metadata = {} } = reqBody(request);
try {
const {
success,
reason,
documents = [],
} = await processLink(link, scraperHeaders);
} = await processLink(link, scraperHeaders, metadata);
response.status(200).json({ url: link, success, reason, documents });
} catch (e) {
console.error(e);

View File

@ -13,6 +13,7 @@ const { default: slugify } = require("slugify");
* @param {('html' | 'text')} config.captureAs - The format to capture the page content as. Default is 'text'
* @param {boolean} config.processAsDocument - Whether to process the content as a document or return the content directly. Default is true
* @param {{[key: string]: string}} config.scraperHeaders - Custom headers to use when making the request
* @param {{[key: string]: string}} config.metadata - Metadata to use when creating the document
* @returns {Promise<Object>} - The content of the page
*/
async function scrapeGenericUrl({
@ -20,6 +21,7 @@ async function scrapeGenericUrl({
captureAs = "text",
processAsDocument = true,
scraperHeaders = {},
metadata = {},
}) {
console.log(`-- Working URL ${link} => (${captureAs}) --`);
const content = await getPageContent({
@ -51,10 +53,10 @@ async function scrapeGenericUrl({
const data = {
id: v4(),
url: "file://" + slugify(filename) + ".html",
title: slugify(filename) + ".html",
docAuthor: "no author found",
description: "No description found.",
docSource: "URL link uploaded by the user.",
title: metadata.title || slugify(filename) + ".html",
docAuthor: metadata.docAuthor || "no author found",
description: metadata.description || "No description found.",
docSource: metadata.docSource || "URL link uploaded by the user.",
chunkSource: `link://${link}`,
published: new Date().toLocaleString(),
wordCount: content.split(" ").length,

View File

@ -6,15 +6,17 @@ const { scrapeGenericUrl } = require("./convert/generic");
* so it can be used for embedding later.
* @param {string} link - The link to process
* @param {{[key: string]: string}} scraperHeaders - Custom headers to apply when scraping the link
* @param {Object} metadata - Optional metadata to attach to the document
* @returns {Promise<{success: boolean, content: string}>} - Response from collector
*/
async function processLink(link, scraperHeaders = {}) {
async function processLink(link, scraperHeaders = {}, metadata = {}) {
if (!validURL(link)) return { success: false, reason: "Not a valid URL." };
return await scrapeGenericUrl({
link,
captureAs: "text",
processAsDocument: true,
scraperHeaders,
metadata,
});
}

View File

@ -14,7 +14,12 @@ const WHISPER_PROVIDERS = {
local: LocalWhisper,
};
async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
async function asAudio({
fullFilePath = "",
filename = "",
options = {},
metadata = {},
}) {
const WhisperProvider = WHISPER_PROVIDERS.hasOwnProperty(
options?.whisperProvider
)
@ -48,11 +53,11 @@ async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
const data = {
id: v4(),
url: "file://" + fullFilePath,
title: filename,
docAuthor: "no author found",
description: "No description found.",
docSource: "pdf file uploaded by the user.",
chunkSource: "",
title: metadata.title || filename,
docAuthor: metadata.docAuthor || "no author found",
description: metadata.description || "No description found.",
docSource: metadata.docSource || "audio file uploaded by the user.",
chunkSource: metadata.chunkSource || "",
published: createdDate(fullFilePath),
wordCount: content.split(" ").length,
pageContent: content,

View File

@ -8,7 +8,12 @@ const {
const { tokenizeString } = require("../../utils/tokenizer");
const { default: slugify } = require("slugify");
async function asDocX({ fullFilePath = "", filename = "", options = {} }) {
async function asDocX({
fullFilePath = "",
filename = "",
options = {},
metadata = {},
}) {
const loader = new DocxLoader(fullFilePath);
console.log(`-- Working ${filename} --`);
@ -34,11 +39,11 @@ async function asDocX({ fullFilePath = "", filename = "", options = {} }) {
const data = {
id: v4(),
url: "file://" + fullFilePath,
title: filename,
docAuthor: "no author found",
description: "No description found.",
docSource: "pdf file uploaded by the user.",
chunkSource: "",
title: metadata.title || filename,
docAuthor: metadata.docAuthor || "no author found",
description: metadata.description || "No description found.",
docSource: metadata.docSource || "docx file uploaded by the user.",
chunkSource: metadata.chunkSource || "",
published: createdDate(fullFilePath),
wordCount: content.split(" ").length,
pageContent: content,

View File

@ -8,7 +8,12 @@ const {
} = require("../../utils/files");
const { default: slugify } = require("slugify");
async function asEPub({ fullFilePath = "", filename = "", options = {} }) {
async function asEPub({
fullFilePath = "",
filename = "",
options = {},
metadata = {},
}) {
let content = "";
try {
const loader = new EPubLoader(fullFilePath, { splitChapters: false });
@ -32,11 +37,11 @@ async function asEPub({ fullFilePath = "", filename = "", options = {} }) {
const data = {
id: v4(),
url: "file://" + fullFilePath,
title: filename,
docAuthor: "Unknown", // TODO: Find a better author
description: "Unknown", // TODO: Find a better description
docSource: "a epub file uploaded by the user.",
chunkSource: "",
title: metadata.title || filename,
docAuthor: metadata.docAuthor || "Unknown",
description: metadata.description || "Unknown",
docSource: metadata.docSource || "epub file uploaded by the user.",
chunkSource: metadata.chunkSource || "",
published: createdDate(fullFilePath),
wordCount: content.split(" ").length,
pageContent: content,

View File

@ -8,7 +8,12 @@ const {
const OCRLoader = require("../../utils/OCRLoader");
const { default: slugify } = require("slugify");
async function asImage({ fullFilePath = "", filename = "", options = {} }) {
async function asImage({
fullFilePath = "",
filename = "",
options = {},
metadata = {},
}) {
let content = await new OCRLoader({
targetLanguages: options?.ocr?.langList,
}).ocrImage(fullFilePath);
@ -27,11 +32,11 @@ async function asImage({ fullFilePath = "", filename = "", options = {} }) {
const data = {
id: v4(),
url: "file://" + fullFilePath,
title: filename,
docAuthor: "Unknown", // TODO: Find a better author
description: "Unknown", // TODO: Find a better description
docSource: "a text file uploaded by the user.",
chunkSource: "",
title: metadata.title || filename,
docAuthor: metadata.docAuthor || "Unknown",
description: metadata.description || "Unknown",
docSource: metadata.docSource || "image file uploaded by the user.",
chunkSource: metadata.chunkSource || "",
published: createdDate(fullFilePath),
wordCount: content.split(" ").length,
pageContent: content,

View File

@ -9,7 +9,12 @@ const {
const { tokenizeString } = require("../../utils/tokenizer");
const { default: slugify } = require("slugify");
async function asMbox({ fullFilePath = "", filename = "", options = {} }) {
async function asMbox({
fullFilePath = "",
filename = "",
options = {},
metadata = {},
}) {
console.log(`-- Working ${filename} --`);
const mails = await mboxParser(fs.createReadStream(fullFilePath))
@ -43,13 +48,16 @@ async function asMbox({ fullFilePath = "", filename = "", options = {} }) {
const data = {
id: v4(),
url: "file://" + fullFilePath,
title: mail?.subject
? slugify(mail?.subject?.replace(".", "")) + ".mbox"
: `msg_${item}-${filename}`,
docAuthor: mail?.from?.text,
description: "No description found.",
docSource: "Mbox message file uploaded by the user.",
chunkSource: "",
title:
metadata.title ||
(mail?.subject
? slugify(mail?.subject?.replace(".", "")) + ".mbox"
: `msg_${item}-${filename}`),
docAuthor: metadata.docAuthor || mail?.from?.text,
description: metadata.description || "No description found.",
docSource:
metadata.docSource || "Mbox message file uploaded by the user.",
chunkSource: metadata.chunkSource || "",
published: createdDate(fullFilePath),
wordCount: content.split(" ").length,
pageContent: content,

View File

@ -12,6 +12,7 @@ async function asOfficeMime({
fullFilePath = "",
filename = "",
options = {},
metadata = {},
}) {
console.log(`-- Working ${filename} --`);
let content = "";
@ -34,11 +35,11 @@ async function asOfficeMime({
const data = {
id: v4(),
url: "file://" + fullFilePath,
title: filename,
docAuthor: "no author found",
description: "No description found.",
docSource: "Office file uploaded by the user.",
chunkSource: "",
title: metadata.title || filename,
docAuthor: metadata.docAuthor || "no author found",
description: metadata.description || "No description found.",
docSource: metadata.docSource || "Office file uploaded by the user.",
chunkSource: metadata.chunkSource || "",
published: createdDate(fullFilePath),
wordCount: content.split(" ").length,
pageContent: content,

View File

@ -9,7 +9,12 @@ const { default: slugify } = require("slugify");
const PDFLoader = require("./PDFLoader");
const OCRLoader = require("../../../utils/OCRLoader");
async function asPdf({ fullFilePath = "", filename = "", options = {} }) {
async function asPdf({
fullFilePath = "",
filename = "",
options = {},
metadata = {},
}) {
const pdfLoader = new PDFLoader(fullFilePath, {
splitPages: true,
});
@ -51,11 +56,17 @@ async function asPdf({ fullFilePath = "", filename = "", options = {} }) {
const data = {
id: v4(),
url: "file://" + fullFilePath,
title: filename,
docAuthor: docs[0]?.metadata?.pdf?.info?.Creator || "no author found",
description: docs[0]?.metadata?.pdf?.info?.Title || "No description found.",
docSource: "pdf file uploaded by the user.",
chunkSource: "",
title: metadata.title || filename,
docAuthor:
metadata.docAuthor ||
docs[0]?.metadata?.pdf?.info?.Creator ||
"no author found",
description:
metadata.description ||
docs[0]?.metadata?.pdf?.info?.Title ||
"No description found.",
docSource: metadata.docSource || "pdf file uploaded by the user.",
chunkSource: metadata.chunkSource || "",
published: createdDate(fullFilePath),
wordCount: content.split(" ").length,
pageContent: content,

View File

@ -8,7 +8,12 @@ const {
} = require("../../utils/files");
const { default: slugify } = require("slugify");
async function asTxt({ fullFilePath = "", filename = "", options = {} }) {
async function asTxt({
fullFilePath = "",
filename = "",
options = {},
metadata = {},
}) {
let content = "";
try {
content = fs.readFileSync(fullFilePath, "utf8");
@ -30,11 +35,11 @@ async function asTxt({ fullFilePath = "", filename = "", options = {} }) {
const data = {
id: v4(),
url: "file://" + fullFilePath,
title: filename,
docAuthor: "Unknown", // TODO: Find a better author
description: "Unknown", // TODO: Find a better description
docSource: "a text file uploaded by the user.",
chunkSource: "",
title: metadata.title || filename,
docAuthor: metadata.docAuthor || "Unknown",
description: metadata.description || "Unknown",
docSource: metadata.docSource || "a text file uploaded by the user.",
chunkSource: metadata.chunkSource || "",
published: createdDate(fullFilePath),
wordCount: content.split(" ").length,
pageContent: content,

View File

@ -27,7 +27,12 @@ function convertToCSV(data) {
.join("\n");
}
async function asXlsx({ fullFilePath = "", filename = "", options = {} }) {
async function asXlsx({
fullFilePath = "",
filename = "",
options = {},
metadata = {},
}) {
const documents = [];
const folderName = slugify(`${path.basename(filename)}-${v4().slice(0, 4)}`, {
lower: true,
@ -56,11 +61,12 @@ async function asXlsx({ fullFilePath = "", filename = "", options = {} }) {
const sheetData = {
id: v4(),
url: `file://${path.join(outFolderPath, `${slugify(name)}.csv`)}`,
title: `${filename} - Sheet:${name}`,
docAuthor: "Unknown",
description: `Spreadsheet data from sheet: ${name}`,
docSource: "an xlsx file uploaded by the user.",
chunkSource: "",
title: metadata.title || `${filename} - Sheet:${name}`,
docAuthor: metadata.docAuthor || "Unknown",
description:
metadata.description || `Spreadsheet data from sheet: ${name}`,
docSource: metadata.docSource || "an xlsx file uploaded by the user.",
chunkSource: metadata.chunkSource || "",
published: createdDate(fullFilePath),
wordCount: content.split(/\s+/).length,
pageContent: content,

View File

@ -12,7 +12,14 @@ const {
} = require("../utils/files");
const RESERVED_FILES = ["__HOTDIR__.md"];
async function processSingleFile(targetFilename, options = {}) {
/**
* Process a single file and return the documents
* @param {string} targetFilename - The filename to process
* @param {Object} options - The options for the file processing
* @param {Object} metadata - The metadata for the file processing
* @returns {Promise<{success: boolean, reason: string, documents: Object[]}>} - The documents from the file processing
*/
async function processSingleFile(targetFilename, options = {}, metadata = {}) {
const fullFilePath = path.resolve(
WATCH_DIRECTORY,
normalizePath(targetFilename)
@ -70,6 +77,7 @@ async function processSingleFile(targetFilename, options = {}) {
fullFilePath,
filename: targetFilename,
options,
metadata,
});
}

View File

@ -8,7 +8,7 @@ const {
normalizePath,
isWithin,
} = require("../../../utils/files");
const { reqBody } = require("../../../utils/http");
const { reqBody, safeJsonParse } = require("../../../utils/http");
const { EventLogs } = require("../../../models/eventLogs");
const { CollectorApi } = require("../../../utils/collectorApi");
const fs = require("fs");
@ -29,7 +29,7 @@ function apiDocumentEndpoints(app) {
async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Upload a new file to AnythingLLM to be parsed and prepared for embedding.'
#swagger.description = 'Upload a new file to AnythingLLM to be parsed and prepared for embedding, with optional metadata.'
#swagger.requestBody = {
description: 'File to be uploaded.',
required: true,
@ -47,6 +47,11 @@ function apiDocumentEndpoints(app) {
addToWorkspaces: {
type: 'string',
description: 'comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2',
},
metadata: {
type: 'object',
description: 'Key:Value pairs of metadata to attach to the document in JSON Object format. Only specific keys are allowed - see example.',
example: { 'title': 'Custom Title', 'docAuthor': 'Author Name', 'description': 'A brief description', 'docSource': 'Source of the document' }
}
},
required: ['file']
@ -91,7 +96,12 @@ function apiDocumentEndpoints(app) {
try {
const Collector = new CollectorApi();
const { originalname } = request.file;
const { addToWorkspaces = "" } = reqBody(request);
const { addToWorkspaces = "", metadata: _metadata = {} } =
reqBody(request);
const metadata =
typeof _metadata === "string"
? safeJsonParse(_metadata, {})
: _metadata;
const processingOnline = await Collector.online();
if (!processingOnline) {
@ -105,14 +115,16 @@ function apiDocumentEndpoints(app) {
return;
}
const { success, reason, documents } =
await Collector.processDocument(originalname);
const { success, reason, documents } = await Collector.processDocument(
originalname,
metadata
);
if (!success) {
response
return response
.status(500)
.json({ success: false, error: reason, documents })
.end();
return;
}
Collector.log(
@ -151,7 +163,7 @@ function apiDocumentEndpoints(app) {
example: 'my-folder'
}
#swagger.requestBody = {
description: 'File to be uploaded.',
description: 'File to be uploaded, with optional metadata.',
required: true,
content: {
"multipart/form-data": {
@ -167,6 +179,11 @@ function apiDocumentEndpoints(app) {
addToWorkspaces: {
type: 'string',
description: 'comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2',
},
metadata: {
type: 'object',
description: 'Key:Value pairs of metadata to attach to the document in JSON Object format. Only specific keys are allowed - see example.',
example: { 'title': 'Custom Title', 'docAuthor': 'Author Name', 'description': 'A brief description', 'docSource': 'Source of the document' }
}
}
}
@ -221,7 +238,13 @@ function apiDocumentEndpoints(app) {
*/
try {
const { originalname } = request.file;
const { addToWorkspaces = "" } = reqBody(request);
const { addToWorkspaces = "", metadata: _metadata = {} } =
reqBody(request);
const metadata =
typeof _metadata === "string"
? safeJsonParse(_metadata, {})
: _metadata;
let folder = request.params?.folderName || "custom-documents";
folder = normalizePath(folder);
const targetFolderPath = path.join(documentsPath, folder);
@ -236,25 +259,25 @@ function apiDocumentEndpoints(app) {
const Collector = new CollectorApi();
const processingOnline = await Collector.online();
if (!processingOnline) {
response
return response
.status(500)
.json({
success: false,
error: `Document processing API is not online. Document ${originalname} will not be processed automatically.`,
})
.end();
return;
}
// Process the uploaded document
const { success, reason, documents } =
await Collector.processDocument(originalname);
// Process the uploaded document with metadata
const { success, reason, documents } = await Collector.processDocument(
originalname,
metadata
);
if (!success) {
response
return response
.status(500)
.json({ success: false, error: reason, documents })
.end();
return;
}
// For each processed document, check if it is already in the desired folder.
@ -314,7 +337,7 @@ function apiDocumentEndpoints(app) {
#swagger.tags = ['Documents']
#swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding. Optionally, specify a comma-separated list of workspace slugs to embed the document into post-upload.'
#swagger.requestBody = {
description: 'Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload.',
description: 'Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload, and optional metadata.',
required: true,
content: {
"application/json": {
@ -326,6 +349,12 @@ function apiDocumentEndpoints(app) {
"scraperHeaders": {
"Authorization": "Bearer token123",
"My-Custom-Header": "value"
},
"metadata": {
"title": "Custom Title",
"docAuthor": "Author Name",
"description": "A brief description",
"docSource": "Source of the document"
}
}
}
@ -373,30 +402,34 @@ function apiDocumentEndpoints(app) {
link,
addToWorkspaces = "",
scraperHeaders = {},
metadata: _metadata = {},
} = reqBody(request);
const metadata =
typeof _metadata === "string"
? safeJsonParse(_metadata, {})
: _metadata;
const processingOnline = await Collector.online();
if (!processingOnline) {
response
return response
.status(500)
.json({
success: false,
error: `Document processing API is not online. Link ${link} will not be processed automatically.`,
})
.end();
return;
}
const { success, reason, documents } = await Collector.processLink(
link,
scraperHeaders
scraperHeaders,
metadata
);
if (!success) {
response
return response
.status(500)
.json({ success: false, error: reason, documents })
.end();
return;
}
Collector.log(
@ -488,20 +521,23 @@ function apiDocumentEndpoints(app) {
const requiredMetadata = ["title"];
const {
textContent,
metadata = {},
metadata: _metadata = {},
addToWorkspaces = "",
} = reqBody(request);
const metadata =
typeof _metadata === "string"
? safeJsonParse(_metadata, {})
: _metadata;
const processingOnline = await Collector.online();
if (!processingOnline) {
response
return response
.status(500)
.json({
success: false,
error: `Document processing API is not online. Request will not be processed.`,
})
.end();
return;
}
if (
@ -510,7 +546,7 @@ function apiDocumentEndpoints(app) {
Object.keys(metadata).includes(reqKey) && !!metadata[reqKey]
)
) {
response
return response
.status(422)
.json({
success: false,
@ -519,18 +555,16 @@ function apiDocumentEndpoints(app) {
.join(", ")}`,
})
.end();
return;
}
if (!textContent || textContent?.length === 0) {
response
return response
.status(422)
.json({
success: false,
error: `The 'textContent' key cannot have an empty value.`,
})
.end();
return;
}
const { success, reason, documents } = await Collector.processRawText(
@ -538,11 +572,10 @@ function apiDocumentEndpoints(app) {
metadata
);
if (!success) {
response
return response
.status(500)
.json({ success: false, error: reason, documents })
.end();
return;
}
Collector.log(

View File

@ -843,7 +843,7 @@
"tags": [
"Documents"
],
"description": "Upload a new file to AnythingLLM to be parsed and prepared for embedding.",
"description": "Upload a new file to AnythingLLM to be parsed and prepared for embedding, with optional metadata.",
"parameters": [],
"responses": {
"200": {
@ -913,6 +913,16 @@
"addToWorkspaces": {
"type": "string",
"description": "comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2"
},
"metadata": {
"type": "object",
"description": "Key:Value pairs of metadata to attach to the document in JSON Object format. Only specific keys are allowed - see example.",
"example": {
"title": "Custom Title",
"docAuthor": "Author Name",
"description": "A brief description",
"docSource": "Source of the document"
}
}
}
}
@ -1000,7 +1010,7 @@
}
},
"requestBody": {
"description": "File to be uploaded.",
"description": "File to be uploaded, with optional metadata.",
"required": true,
"content": {
"multipart/form-data": {
@ -1018,6 +1028,16 @@
"addToWorkspaces": {
"type": "string",
"description": "comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2"
},
"metadata": {
"type": "object",
"description": "Key:Value pairs of metadata to attach to the document in JSON Object format. Only specific keys are allowed - see example.",
"example": {
"title": "Custom Title",
"docAuthor": "Author Name",
"description": "A brief description",
"docSource": "Source of the document"
}
}
}
}
@ -1084,7 +1104,7 @@
}
},
"requestBody": {
"description": "Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload.",
"description": "Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload, and optional metadata.",
"required": true,
"content": {
"application/json": {
@ -1096,6 +1116,12 @@
"scraperHeaders": {
"Authorization": "Bearer token123",
"My-Custom-Header": "value"
},
"metadata": {
"title": "Custom Title",
"docAuthor": "Author Name",
"description": "A brief description",
"docSource": "Source of the document"
}
}
}

View File

@ -63,15 +63,17 @@ class CollectorApi {
/**
* Process a document
* - Will append the options to the request body
* - Will append the options and optional metadata to the request body
* @param {string} filename - The filename of the document to process
* @param {Object} metadata - Optional metadata key:value pairs
* @returns {Promise<Object>} - The response from the collector API
*/
async processDocument(filename = "") {
async processDocument(filename = "", metadata = {}) {
if (!filename) return false;
const data = JSON.stringify({
filename,
metadata,
options: this.#attachOptions(),
});
@ -102,15 +104,17 @@ class CollectorApi {
* - Will append the options to the request body
* @param {string} link - The link to process
* @param {{[key: string]: string}} scraperHeaders - Custom headers to apply to the web-scraping request URL
* @param {[key: string]: string} metadata - Optional metadata to attach to the document
* @returns {Promise<Object>} - The response from the collector API
*/
async processLink(link = "", scraperHeaders = {}) {
async processLink(link = "", scraperHeaders = {}, metadata = {}) {
if (!link) return false;
const data = JSON.stringify({
link,
scraperHeaders,
options: this.#attachOptions(),
metadata: metadata,
});
return await fetch(`${this.endpoint}/process-link`, {
@ -139,7 +143,7 @@ class CollectorApi {
* Process raw text as a document for the collector
* - Will append the options to the request body
* @param {string} textContent - The text to process
* @param {Object} metadata - The metadata to process
* @param {[key: string]: string} metadata - The metadata to process
* @returns {Promise<Object>} - The response from the collector API
*/
async processRawText(textContent = "", metadata = {}) {