From 2dc625193ee4163bb7373a41af075e471e991836 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Mon, 26 Jan 2026 14:36:21 -0800 Subject: [PATCH] 4825 patch yt file collector api (#4904) Patch YT links in API document collector closes #4825 --- .../extensions/YoutubeTranscript/index.js | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/collector/utils/extensions/YoutubeTranscript/index.js b/collector/utils/extensions/YoutubeTranscript/index.js index c5fe9974..02b2e4cb 100644 --- a/collector/utils/extensions/YoutubeTranscript/index.js +++ b/collector/utils/extensions/YoutubeTranscript/index.js @@ -6,6 +6,7 @@ const { writeToServerDocuments, sanitizeFileName, documentsFolder, + isWithin, } = require("../../files"); const { tokenizeString } = require("../../tokenizer"); const { YoutubeLoader } = require("./YoutubeLoader"); @@ -129,11 +130,31 @@ async function loadYouTubeTranscript({ url }, options = { parseOnly: false }) { slugify(`${metadata.author} YouTube transcripts`).toLowerCase() ); const outFolderPath = path.resolve(documentsFolder, outFolder); + const uuid = v4(); + const fileName = sanitizeFileName(`${slugify(metadata.title)}-${uuid}`); + + if (!isWithin(documentsFolder, path.resolve(outFolderPath, fileName))) { + console.error( + `[YouTube Loader]: Invalid file path ${path.resolve( + outFolderPath, + fileName + )} is not within the documents folder ${documentsFolder}` + ); + return { + success: false, + reason: `[YouTube Loader]: Invalid file path ${path.resolve( + outFolderPath, + fileName + )} is not within the documents folder ${documentsFolder}`, + documents: [], + data: {}, + }; + } if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath, { recursive: true }); const data = { - id: v4(), + id: uuid, url: url + ".youtube", title: metadata.title || url, docAuthor: metadata.author, @@ -147,15 +168,16 @@ async function loadYouTubeTranscript({ url }, options = { parseOnly: false }) { }; console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`); - writeToServerDocuments({ + const document = writeToServerDocuments({ data, - filename: sanitizeFileName(`${slugify(metadata.title)}-${data.id}`), + filename: fileName, destinationOverride: outFolderPath, }); return { success: true, reason: null, + documents: [document], data: { title: metadata.title, author: metadata.author,