Remove illegal chars for Windows on files (#5364)

This commit is contained in:
Timothy Carambat 2026-04-06 11:12:13 -07:00 committed by GitHub
parent 3444b9b0aa
commit 8ed1d35ab3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 44 additions and 13 deletions

View File

@ -132,8 +132,9 @@ function writeToServerDocuments({
if (!fs.existsSync(destination)) if (!fs.existsSync(destination))
fs.mkdirSync(destination, { recursive: true }); fs.mkdirSync(destination, { recursive: true });
const safeFilename = sanitizeFileName(filename);
const destinationFilePath = normalizePath( const destinationFilePath = normalizePath(
path.resolve(destination, filename) + ".json" path.resolve(destination, safeFilename) + ".json"
); );
fs.writeFileSync(destinationFilePath, JSON.stringify(data, null, 4), { fs.writeFileSync(destinationFilePath, JSON.stringify(data, null, 4), {
@ -210,10 +211,19 @@ function normalizePath(filepath = "") {
return result; return result;
} }
/**
* Strips characters that are illegal in Windows filenames, including Unicode
* quotation marks (U+201C, U+201D, etc.) that can get corrupted into ASCII
* double-quotes during charset conversion in the upload pipeline.
* @param {string} fileName - The filename to sanitize.
* @returns {string} - The sanitized filename.
*/
function sanitizeFileName(fileName) { function sanitizeFileName(fileName) {
if (!fileName) return fileName; if (!fileName) return fileName;
//eslint-disable-next-line return fileName.replace(
return fileName.replace(/[<>:"\/\\|?*]/g, ""); /[<>:"/\\|?*\u201C\u201D\u201E\u201F\u2018\u2019\u201A\u201B]/g,
""
);
} }
module.exports = { module.exports = {

View File

@ -17,7 +17,12 @@ const { Telemetry } = require("../../models/telemetry");
const { CollectorApi } = require("../collectorApi"); const { CollectorApi } = require("../collectorApi");
const fs = require("fs"); const fs = require("fs");
const path = require("path"); const path = require("path");
const { hotdirPath, normalizePath, isWithin } = require("../files"); const {
hotdirPath,
normalizePath,
isWithin,
sanitizeFileName,
} = require("../files");
/** /**
* @typedef ResponseObject * @typedef ResponseObject
* @property {string} id - uuid of response * @property {string} id - uuid of response
@ -72,8 +77,8 @@ async function processDocumentAttachments(attachments = []) {
if (dataUriMatch) base64Data = dataUriMatch[1]; if (dataUriMatch) base64Data = dataUriMatch[1];
const buffer = Buffer.from(base64Data, "base64"); const buffer = Buffer.from(base64Data, "base64");
const filename = normalizePath( const filename = sanitizeFileName(
attachment.name || `attachment-${uuidv4()}` normalizePath(attachment.name || `attachment-${uuidv4()}`)
); );
const filePath = normalizePath(path.join(hotdirPath, filename)); const filePath = normalizePath(path.join(hotdirPath, filename));
if (!isWithin(hotdirPath, filePath)) if (!isWithin(hotdirPath, filePath))

View File

@ -284,6 +284,21 @@ function normalizePath(filepath = "") {
return result; return result;
} }
/**
* Strips characters that are illegal in Windows filenames, including Unicode
* quotation marks (U+201C, U+201D, etc.) that can get corrupted into ASCII
* double-quotes during charset conversion in the upload pipeline.
* @param {string} fileName - The filename to sanitize.
* @returns {string} - The sanitized filename.
*/
function sanitizeFileName(fileName) {
if (!fileName) return fileName;
return fileName.replace(
/[<>:"/\\|?*\u201C\u201D\u201E\u201F\u2018\u2019\u201A\u201B]/g,
""
);
}
// Check if the vector-cache folder is empty or not // Check if the vector-cache folder is empty or not
// useful for it the user is changing embedders as this will // useful for it the user is changing embedders as this will
// break the previous cache. // break the previous cache.
@ -500,4 +515,5 @@ module.exports = {
purgeEntireVectorCache, purgeEntireVectorCache,
getDocumentsByFolder, getDocumentsByFolder,
hotdirPath, hotdirPath,
sanitizeFileName,
}; };

View File

@ -2,7 +2,7 @@ const multer = require("multer");
const path = require("path"); const path = require("path");
const fs = require("fs"); const fs = require("fs");
const { v4 } = require("uuid"); const { v4 } = require("uuid");
const { normalizePath } = require("."); const { normalizePath, sanitizeFileName } = require(".");
/** /**
* Handle File uploads for auto-uploading. * Handle File uploads for auto-uploading.
@ -17,8 +17,8 @@ const fileUploadStorage = multer.diskStorage({
cb(null, uploadOutput); cb(null, uploadOutput);
}, },
filename: function (_, file, cb) { filename: function (_, file, cb) {
file.originalname = normalizePath( file.originalname = sanitizeFileName(
Buffer.from(file.originalname, "latin1").toString("utf8") normalizePath(Buffer.from(file.originalname, "latin1").toString("utf8"))
); );
cb(null, file.originalname); cb(null, file.originalname);
}, },
@ -37,8 +37,8 @@ const fileAPIUploadStorage = multer.diskStorage({
cb(null, uploadOutput); cb(null, uploadOutput);
}, },
filename: function (_, file, cb) { filename: function (_, file, cb) {
file.originalname = normalizePath( file.originalname = sanitizeFileName(
Buffer.from(file.originalname, "latin1").toString("utf8") normalizePath(Buffer.from(file.originalname, "latin1").toString("utf8"))
); );
cb(null, file.originalname); cb(null, file.originalname);
}, },
@ -55,8 +55,8 @@ const assetUploadStorage = multer.diskStorage({
return cb(null, uploadOutput); return cb(null, uploadOutput);
}, },
filename: function (_, file, cb) { filename: function (_, file, cb) {
file.originalname = normalizePath( file.originalname = sanitizeFileName(
Buffer.from(file.originalname, "latin1").toString("utf8") normalizePath(Buffer.from(file.originalname, "latin1").toString("utf8"))
); );
cb(null, file.originalname); cb(null, file.originalname);
}, },