From bdfa0328dbe89a09e7a7489404254dc443aab25e Mon Sep 17 00:00:00 2001
From: timothycarambat <rambat1010@gmail.com>
Date: Wed, 1 Oct 2025 20:45:52 -0700
Subject: [PATCH] update comment about parseOnly

---
 collector/processLink/convert/generic.js | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/collector/processLink/convert/generic.js b/collector/processLink/convert/generic.js
index 780e17a6..1e1e1039 100644
--- a/collector/processLink/convert/generic.js
+++ b/collector/processLink/convert/generic.js
@@ -68,8 +68,16 @@ async function scrapeGenericUrl({
     const fileFilePath = fileContentResult.fileLocation;
     const targetFilename = path.basename(fileFilePath);
 
-    // If the saveAsDocument is false, we are only interested in the text content
-    // and can delete the file after we have the text content via the parseOnly option
+    /**
+     * If the saveAsDocument is false, we are only interested in the text content
+     * and can ignore the file as a document by using `parseOnly` in the options.
+     * This will send the file to the Direct Uploads folder instead of the Documents folder.
+     * that will be deleted by the cleanup-orphan-documents job that runs frequently. The trade off
+     * is that since it still is in FS we can debug its output or even potentially reuse it for other purposes.
+     *
+     * TODO: Improve this process via a new option that will instantly delete the file after processing
+     * if we find we dont need this file ever after processing.
+     */
     const processSingleFileResult = await processSingleFile(targetFilename, {
       parseOnly: saveAsDocument === false,
     });