4825 patch yt file collector api (#4904)
Patch YT links in API document collector closes #4825
This commit is contained in:
parent
39e6ccdaa3
commit
2dc625193e
@ -6,6 +6,7 @@ const {
|
|||||||
writeToServerDocuments,
|
writeToServerDocuments,
|
||||||
sanitizeFileName,
|
sanitizeFileName,
|
||||||
documentsFolder,
|
documentsFolder,
|
||||||
|
isWithin,
|
||||||
} = require("../../files");
|
} = require("../../files");
|
||||||
const { tokenizeString } = require("../../tokenizer");
|
const { tokenizeString } = require("../../tokenizer");
|
||||||
const { YoutubeLoader } = require("./YoutubeLoader");
|
const { YoutubeLoader } = require("./YoutubeLoader");
|
||||||
@ -129,11 +130,31 @@ async function loadYouTubeTranscript({ url }, options = { parseOnly: false }) {
|
|||||||
slugify(`${metadata.author} YouTube transcripts`).toLowerCase()
|
slugify(`${metadata.author} YouTube transcripts`).toLowerCase()
|
||||||
);
|
);
|
||||||
const outFolderPath = path.resolve(documentsFolder, outFolder);
|
const outFolderPath = path.resolve(documentsFolder, outFolder);
|
||||||
|
const uuid = v4();
|
||||||
|
const fileName = sanitizeFileName(`${slugify(metadata.title)}-${uuid}`);
|
||||||
|
|
||||||
|
if (!isWithin(documentsFolder, path.resolve(outFolderPath, fileName))) {
|
||||||
|
console.error(
|
||||||
|
`[YouTube Loader]: Invalid file path ${path.resolve(
|
||||||
|
outFolderPath,
|
||||||
|
fileName
|
||||||
|
)} is not within the documents folder ${documentsFolder}`
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
reason: `[YouTube Loader]: Invalid file path ${path.resolve(
|
||||||
|
outFolderPath,
|
||||||
|
fileName
|
||||||
|
)} is not within the documents folder ${documentsFolder}`,
|
||||||
|
documents: [],
|
||||||
|
data: {},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
if (!fs.existsSync(outFolderPath))
|
if (!fs.existsSync(outFolderPath))
|
||||||
fs.mkdirSync(outFolderPath, { recursive: true });
|
fs.mkdirSync(outFolderPath, { recursive: true });
|
||||||
const data = {
|
const data = {
|
||||||
id: v4(),
|
id: uuid,
|
||||||
url: url + ".youtube",
|
url: url + ".youtube",
|
||||||
title: metadata.title || url,
|
title: metadata.title || url,
|
||||||
docAuthor: metadata.author,
|
docAuthor: metadata.author,
|
||||||
@ -147,15 +168,16 @@ async function loadYouTubeTranscript({ url }, options = { parseOnly: false }) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`);
|
console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`);
|
||||||
writeToServerDocuments({
|
const document = writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
filename: sanitizeFileName(`${slugify(metadata.title)}-${data.id}`),
|
filename: fileName,
|
||||||
destinationOverride: outFolderPath,
|
destinationOverride: outFolderPath,
|
||||||
});
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
reason: null,
|
reason: null,
|
||||||
|
documents: [document],
|
||||||
data: {
|
data: {
|
||||||
title: metadata.title,
|
title: metadata.title,
|
||||||
author: metadata.author,
|
author: metadata.author,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user