From bdfa0328dbe89a09e7a7489404254dc443aab25e Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Wed, 1 Oct 2025 20:45:52 -0700 Subject: [PATCH] update comment about parseOnly --- collector/processLink/convert/generic.js | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/collector/processLink/convert/generic.js b/collector/processLink/convert/generic.js index 780e17a6..1e1e1039 100644 --- a/collector/processLink/convert/generic.js +++ b/collector/processLink/convert/generic.js @@ -68,8 +68,16 @@ async function scrapeGenericUrl({ const fileFilePath = fileContentResult.fileLocation; const targetFilename = path.basename(fileFilePath); - // If the saveAsDocument is false, we are only interested in the text content - // and can delete the file after we have the text content via the parseOnly option + /** + * If the saveAsDocument is false, we are only interested in the text content + * and can ignore the file as a document by using `parseOnly` in the options. + * This will send the file to the Direct Uploads folder instead of the Documents folder. + * that will be deleted by the cleanup-orphan-documents job that runs frequently. The trade off + * is that since it still is in FS we can debug its output or even potentially reuse it for other purposes. + * + * TODO: Improve this process via a new option that will instantly delete the file after processing + * if we find we dont need this file ever after processing. + */ const processSingleFileResult = await processSingleFile(targetFilename, { parseOnly: saveAsDocument === false, });