Sanitize youtube transcription file paths (#4148)

sanitize youtube transcription file paths
This commit is contained in:
Sean Hatfield 2025-07-14 13:53:34 -07:00 committed by GitHub
parent 6111a1660f
commit 5485c58b44
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,7 +2,7 @@ const fs = require("fs");
const path = require("path");
const { default: slugify } = require("slugify");
const { v4 } = require("uuid");
const { writeToServerDocuments } = require("../../files");
const { writeToServerDocuments, sanitizeFileName } = require("../../files");
const { tokenizeString } = require("../../tokenizer");
const { YoutubeLoader } = require("./YoutubeLoader");
@ -89,9 +89,9 @@ async function loadYouTubeTranscript({ url }) {
};
}
const { content, metadata } = transcriptResults;
const outFolder = slugify(
`${metadata.author} YouTube transcripts`
).toLowerCase();
const outFolder = sanitizeFileName(
slugify(`${metadata.author} YouTube transcripts`).toLowerCase()
);
const outFolderPath =
process.env.NODE_ENV === "development"
@ -121,7 +121,7 @@ async function loadYouTubeTranscript({ url }) {
console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`);
writeToServerDocuments(
data,
`${slugify(metadata.title)}-${data.id}`,
sanitizeFileName(`${slugify(metadata.title)}-${data.id}`),
outFolderPath
);