Update writeToServerDocuments to take config object (#4213)

This commit is contained in:
Timothy Carambat 2025-07-29 17:53:05 -07:00 committed by GitHub
parent 7692775942
commit 70a07b743b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 81 additions and 55 deletions

View File

@ -62,10 +62,10 @@ async function scrapeGenericUrl({
token_count_estimate: tokenizeString(content), token_count_estimate: tokenizeString(content),
}; };
const document = writeToServerDocuments( const document = writeToServerDocuments({
data, data,
`url-${slugify(filename)}-${data.id}` filename: `url-${slugify(filename)}-${data.id}`,
); });
console.log(`[SUCCESS]: URL ${link} converted & ready for embedding.\n`); console.log(`[SUCCESS]: URL ${link} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] }; return { success: true, reason: null, documents: [document] };
} }

View File

@ -58,10 +58,10 @@ async function processRawText(textContent, metadata) {
token_count_estimate: tokenizeString(textContent), token_count_estimate: tokenizeString(textContent),
}; };
const document = writeToServerDocuments( const document = writeToServerDocuments({
data, data,
`raw-${stripAndSlug(metadata.title)}-${data.id}` filename: `raw-${stripAndSlug(metadata.title)}-${data.id}`,
); });
console.log(`[SUCCESS]: Raw text and metadata saved & ready for embedding.\n`); console.log(`[SUCCESS]: Raw text and metadata saved & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] }; return { success: true, reason: null, documents: [document] };
} }

View File

@ -59,10 +59,10 @@ async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
token_count_estimate: tokenizeString(content), token_count_estimate: tokenizeString(content),
}; };
const document = writeToServerDocuments( const document = writeToServerDocuments({
data, data,
`${slugify(filename)}-${data.id}` filename: `${slugify(filename)}-${data.id}`,
); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log( console.log(
`[SUCCESS]: ${filename} transcribed, converted & ready for embedding.\n` `[SUCCESS]: ${filename} transcribed, converted & ready for embedding.\n`

View File

@ -45,10 +45,10 @@ async function asDocX({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content), token_count_estimate: tokenizeString(content),
}; };
const document = writeToServerDocuments( const document = writeToServerDocuments({
data, data,
`${slugify(filename)}-${data.id}` filename: `${slugify(filename)}-${data.id}`,
); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] }; return { success: true, reason: null, documents: [document] };

View File

@ -43,10 +43,10 @@ async function asEPub({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content), token_count_estimate: tokenizeString(content),
}; };
const document = writeToServerDocuments( const document = writeToServerDocuments({
data, data,
`${slugify(filename)}-${data.id}` filename: `${slugify(filename)}-${data.id}`,
); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] }; return { success: true, reason: null, documents: [document] };

View File

@ -38,10 +38,10 @@ async function asImage({ fullFilePath = "", filename = "", options = {} }) {
token_count_estimate: tokenizeString(content), token_count_estimate: tokenizeString(content),
}; };
const document = writeToServerDocuments( const document = writeToServerDocuments({
data, data,
`${slugify(filename)}-${data.id}` filename: `${slugify(filename)}-${data.id}`,
); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] }; return { success: true, reason: null, documents: [document] };

View File

@ -57,10 +57,10 @@ async function asMbox({ fullFilePath = "", filename = "" }) {
}; };
item++; item++;
const document = writeToServerDocuments( const document = writeToServerDocuments({
data, data,
`${slugify(filename)}-${data.id}-msg-${item}` filename: `${slugify(filename)}-${data.id}-msg-${item}`,
); });
documents.push(document); documents.push(document);
} }

View File

@ -41,10 +41,10 @@ async function asOfficeMime({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content), token_count_estimate: tokenizeString(content),
}; };
const document = writeToServerDocuments( const document = writeToServerDocuments({
data, data,
`${slugify(filename)}-${data.id}` filename: `${slugify(filename)}-${data.id}`,
); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] }; return { success: true, reason: null, documents: [document] };

View File

@ -62,10 +62,10 @@ async function asPdf({ fullFilePath = "", filename = "", options = {} }) {
token_count_estimate: tokenizeString(content), token_count_estimate: tokenizeString(content),
}; };
const document = writeToServerDocuments( const document = writeToServerDocuments({
data, data,
`${slugify(filename)}-${data.id}` filename: `${slugify(filename)}-${data.id}`,
); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] }; return { success: true, reason: null, documents: [document] };

View File

@ -41,10 +41,10 @@ async function asTxt({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content), token_count_estimate: tokenizeString(content),
}; };
const document = writeToServerDocuments( const document = writeToServerDocuments({
data, data,
`${slugify(filename)}-${data.id}` filename: `${slugify(filename)}-${data.id}`,
); });
trashFile(fullFilePath); trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`); console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
return { success: true, reason: null, documents: [document] }; return { success: true, reason: null, documents: [document] };

View File

@ -64,11 +64,11 @@ async function asXlsx({ fullFilePath = "", filename = "" }) {
token_count_estimate: tokenizeString(content), token_count_estimate: tokenizeString(content),
}; };
const document = writeToServerDocuments( const document = writeToServerDocuments({
sheetData, data: sheetData,
`sheet-${slugify(name)}`, filename: `sheet-${slugify(name)}`,
outFolderPath destinationOverride: outFolderPath,
); });
documents.push(document); documents.push(document);
console.log( console.log(
`[SUCCESS]: Sheet "${name}" converted & ready for embedding.` `[SUCCESS]: Sheet "${name}" converted & ready for embedding.`

View File

@ -116,7 +116,11 @@ async function loadConfluence(
const fileName = sanitizeFileName( const fileName = sanitizeFileName(
`${slugify(doc.metadata.title)}-${data.id}` `${slugify(doc.metadata.title)}-${data.id}`
); );
writeToServerDocuments(data, fileName, outFolderPath); writeToServerDocuments({
data,
filename: fileName,
destinationOverride: outFolderPath,
});
}); });
return { return {

View File

@ -202,7 +202,11 @@ class DrupalWiki {
console.log( console.log(
`[DrupalWiki Loader]: Saving page '${page.title}' (${page.id}) to '${this.storagePath}/${fileName}'` `[DrupalWiki Loader]: Saving page '${page.title}' (${page.id}) to '${this.storagePath}/${fileName}'`
); );
writeToServerDocuments(data, fileName, this.storagePath); writeToServerDocuments({
data,
filename: fileName,
destinationOverride: this.storagePath,
});
} }
/** /**

View File

@ -66,7 +66,11 @@ async function loadObsidianVault({ files = [] }) {
const targetFileName = sanitizeFileName( const targetFileName = sanitizeFileName(
`${slugify(file.name)}-${data.id}` `${slugify(file.name)}-${data.id}`
); );
writeToServerDocuments(data, targetFileName, outFolderPath); writeToServerDocuments({
data,
filename: targetFileName,
destinationOverride: outFolderPath,
});
results.push({ file: file.path, status: "success" }); results.push({ file: file.path, status: "success" });
} catch (e) { } catch (e) {
console.error(`Failed to process ${file.path}:`, e); console.error(`Failed to process ${file.path}:`, e);

View File

@ -71,11 +71,11 @@ async function loadGithubRepo(args, response) {
console.log( console.log(
`[GitHub Loader]: Saving ${doc.metadata.source} to ${outFolder}` `[GitHub Loader]: Saving ${doc.metadata.source} to ${outFolder}`
); );
writeToServerDocuments( writeToServerDocuments({
data, data,
`${slugify(doc.metadata.source)}-${data.id}`, filename: `${slugify(doc.metadata.source)}-${data.id}`,
outFolderPath destinationOverride: outFolderPath,
); });
} }
return { return {

View File

@ -98,11 +98,11 @@ async function loadGitlabRepo(args, response) {
`[GitLab Loader]: Saving ${doc.metadata.source} to ${outFolder}` `[GitLab Loader]: Saving ${doc.metadata.source} to ${outFolder}`
); );
writeToServerDocuments( writeToServerDocuments({
data, data,
sanitizeFileName(`${slugify(doc.metadata.source)}-${data.id}`), filename: sanitizeFileName(`${slugify(doc.metadata.source)}-${data.id}`),
outFolderPath destinationOverride: outFolderPath,
); });
} }
return { return {

View File

@ -125,7 +125,11 @@ async function bulkScrapePages(links, outFolderPath) {
token_count_estimate: tokenizeString(content), token_count_estimate: tokenizeString(content),
}; };
writeToServerDocuments(data, data.title, outFolderPath); writeToServerDocuments({
data,
filename: data.title,
destinationOverride: outFolderPath,
});
scrapedData.push(data); scrapedData.push(data);
console.log(`Successfully scraped ${link}.`); console.log(`Successfully scraped ${link}.`);

View File

@ -116,11 +116,11 @@ async function loadYouTubeTranscript({ url }) {
}; };
console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`); console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`);
writeToServerDocuments( writeToServerDocuments({
data, data,
sanitizeFileName(`${slugify(metadata.title)}-${data.id}`), filename: sanitizeFileName(`${slugify(metadata.title)}-${data.id}`),
outFolderPath destinationOverride: outFolderPath,
); });
return { return {
success: true, success: true,

View File

@ -96,11 +96,21 @@ function createdDate(filepath) {
} }
} }
function writeToServerDocuments( /**
* Writes a document to the server documents folder.
* @param {Object} params - The parameters for the function.
* @param {Object} params.data - The data to write to the file. Must look like a document object.
* @param {string} params.filename - The name of the file to write to.
* @param {string|null} params.destinationOverride - A forced destination to write to - will be honored if provided.
* @returns {Object} - The data with the location added.
*/
function writeToServerDocuments({
data = {}, data = {},
filename, filename = null,
destinationOverride = null destinationOverride = null,
) { }) {
if (!filename) throw new Error("Filename is required!");
let destination = null; let destination = null;
if (destinationOverride) destination = path.resolve(destinationOverride); if (destinationOverride) destination = path.resolve(destinationOverride);
else destination = path.resolve(documentsFolder, "custom-documents"); else destination = path.resolve(documentsFolder, "custom-documents");