* Added the ability to pass in metadata to the /document/upload/{folderName} endpoint
* Added the ability to pass in metadata to the /document/upload-link endpoint
* feat: added metadata to document/upload api endpoint
* simplify optional metadata in document dev api endpoints
* lint
* patch handling of metadata in dev api
* Linting, small comments
---------
Co-authored-by: jstawskigmi <jstawski@getmyinterns.org>
Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
43 lines
1.5 KiB
JavaScript
43 lines
1.5 KiB
JavaScript
const { validURL } = require("../utils/url");
|
|
const { scrapeGenericUrl } = require("./convert/generic");
|
|
|
|
/**
|
|
* Process a link and return the text content. This util will save the link as a document
|
|
* so it can be used for embedding later.
|
|
* @param {string} link - The link to process
|
|
* @param {{[key: string]: string}} scraperHeaders - Custom headers to apply when scraping the link
|
|
* @param {Object} metadata - Optional metadata to attach to the document
|
|
* @returns {Promise<{success: boolean, content: string}>} - Response from collector
|
|
*/
|
|
async function processLink(link, scraperHeaders = {}, metadata = {}) {
|
|
if (!validURL(link)) return { success: false, reason: "Not a valid URL." };
|
|
return await scrapeGenericUrl({
|
|
link,
|
|
captureAs: "text",
|
|
processAsDocument: true,
|
|
scraperHeaders,
|
|
metadata,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get the text content of a link - does not save the link as a document
|
|
* Mostly used in agentic flows/tools calls to get the text content of a link
|
|
* @param {string} link - The link to get the text content of
|
|
* @param {('html' | 'text' | 'json')} captureAs - The format to capture the page content as
|
|
* @returns {Promise<{success: boolean, content: string}>} - Response from collector
|
|
*/
|
|
async function getLinkText(link, captureAs = "text") {
|
|
if (!validURL(link)) return { success: false, reason: "Not a valid URL." };
|
|
return await scrapeGenericUrl({
|
|
link,
|
|
captureAs,
|
|
processAsDocument: false,
|
|
});
|
|
}
|
|
|
|
module.exports = {
|
|
processLink,
|
|
getLinkText,
|
|
};
|