4825 patch yt file collector api (#4904)

Patch YT links in API document collector
closes #4825
This commit is contained in:
Timothy Carambat 2026-01-26 14:36:21 -08:00 committed by GitHub
parent 39e6ccdaa3
commit 2dc625193e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -6,6 +6,7 @@ const {
writeToServerDocuments, writeToServerDocuments,
sanitizeFileName, sanitizeFileName,
documentsFolder, documentsFolder,
isWithin,
} = require("../../files"); } = require("../../files");
const { tokenizeString } = require("../../tokenizer"); const { tokenizeString } = require("../../tokenizer");
const { YoutubeLoader } = require("./YoutubeLoader"); const { YoutubeLoader } = require("./YoutubeLoader");
@ -129,11 +130,31 @@ async function loadYouTubeTranscript({ url }, options = { parseOnly: false }) {
slugify(`${metadata.author} YouTube transcripts`).toLowerCase() slugify(`${metadata.author} YouTube transcripts`).toLowerCase()
); );
const outFolderPath = path.resolve(documentsFolder, outFolder); const outFolderPath = path.resolve(documentsFolder, outFolder);
const uuid = v4();
const fileName = sanitizeFileName(`${slugify(metadata.title)}-${uuid}`);
if (!isWithin(documentsFolder, path.resolve(outFolderPath, fileName))) {
console.error(
`[YouTube Loader]: Invalid file path ${path.resolve(
outFolderPath,
fileName
)} is not within the documents folder ${documentsFolder}`
);
return {
success: false,
reason: `[YouTube Loader]: Invalid file path ${path.resolve(
outFolderPath,
fileName
)} is not within the documents folder ${documentsFolder}`,
documents: [],
data: {},
};
}
if (!fs.existsSync(outFolderPath)) if (!fs.existsSync(outFolderPath))
fs.mkdirSync(outFolderPath, { recursive: true }); fs.mkdirSync(outFolderPath, { recursive: true });
const data = { const data = {
id: v4(), id: uuid,
url: url + ".youtube", url: url + ".youtube",
title: metadata.title || url, title: metadata.title || url,
docAuthor: metadata.author, docAuthor: metadata.author,
@ -147,15 +168,16 @@ async function loadYouTubeTranscript({ url }, options = { parseOnly: false }) {
}; };
console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`); console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`);
writeToServerDocuments({ const document = writeToServerDocuments({
data, data,
filename: sanitizeFileName(`${slugify(metadata.title)}-${data.id}`), filename: fileName,
destinationOverride: outFolderPath, destinationOverride: outFolderPath,
}); });
return { return {
success: true, success: true,
reason: null, reason: null,
documents: [document],
data: { data: {
title: metadata.title, title: metadata.title,
author: metadata.author, author: metadata.author,