Update writeToServerDocuments to take config object (#4213)
This commit is contained in:
parent
7692775942
commit
70a07b743b
@ -62,10 +62,10 @@ async function scrapeGenericUrl({
|
||||
token_count_estimate: tokenizeString(content),
|
||||
};
|
||||
|
||||
const document = writeToServerDocuments(
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
`url-${slugify(filename)}-${data.id}`
|
||||
);
|
||||
filename: `url-${slugify(filename)}-${data.id}`,
|
||||
});
|
||||
console.log(`[SUCCESS]: URL ${link} converted & ready for embedding.\n`);
|
||||
return { success: true, reason: null, documents: [document] };
|
||||
}
|
||||
|
||||
@ -58,10 +58,10 @@ async function processRawText(textContent, metadata) {
|
||||
token_count_estimate: tokenizeString(textContent),
|
||||
};
|
||||
|
||||
const document = writeToServerDocuments(
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
`raw-${stripAndSlug(metadata.title)}-${data.id}`
|
||||
);
|
||||
filename: `raw-${stripAndSlug(metadata.title)}-${data.id}`,
|
||||
});
|
||||
console.log(`[SUCCESS]: Raw text and metadata saved & ready for embedding.\n`);
|
||||
return { success: true, reason: null, documents: [document] };
|
||||
}
|
||||
|
||||
@ -59,10 +59,10 @@ async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
|
||||
token_count_estimate: tokenizeString(content),
|
||||
};
|
||||
|
||||
const document = writeToServerDocuments(
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
`${slugify(filename)}-${data.id}`
|
||||
);
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(
|
||||
`[SUCCESS]: ${filename} transcribed, converted & ready for embedding.\n`
|
||||
|
||||
@ -45,10 +45,10 @@ async function asDocX({ fullFilePath = "", filename = "" }) {
|
||||
token_count_estimate: tokenizeString(content),
|
||||
};
|
||||
|
||||
const document = writeToServerDocuments(
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
`${slugify(filename)}-${data.id}`
|
||||
);
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
return { success: true, reason: null, documents: [document] };
|
||||
|
||||
@ -43,10 +43,10 @@ async function asEPub({ fullFilePath = "", filename = "" }) {
|
||||
token_count_estimate: tokenizeString(content),
|
||||
};
|
||||
|
||||
const document = writeToServerDocuments(
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
`${slugify(filename)}-${data.id}`
|
||||
);
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
return { success: true, reason: null, documents: [document] };
|
||||
|
||||
@ -38,10 +38,10 @@ async function asImage({ fullFilePath = "", filename = "", options = {} }) {
|
||||
token_count_estimate: tokenizeString(content),
|
||||
};
|
||||
|
||||
const document = writeToServerDocuments(
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
`${slugify(filename)}-${data.id}`
|
||||
);
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
return { success: true, reason: null, documents: [document] };
|
||||
|
||||
@ -57,10 +57,10 @@ async function asMbox({ fullFilePath = "", filename = "" }) {
|
||||
};
|
||||
|
||||
item++;
|
||||
const document = writeToServerDocuments(
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
`${slugify(filename)}-${data.id}-msg-${item}`
|
||||
);
|
||||
filename: `${slugify(filename)}-${data.id}-msg-${item}`,
|
||||
});
|
||||
documents.push(document);
|
||||
}
|
||||
|
||||
|
||||
@ -41,10 +41,10 @@ async function asOfficeMime({ fullFilePath = "", filename = "" }) {
|
||||
token_count_estimate: tokenizeString(content),
|
||||
};
|
||||
|
||||
const document = writeToServerDocuments(
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
`${slugify(filename)}-${data.id}`
|
||||
);
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
return { success: true, reason: null, documents: [document] };
|
||||
|
||||
@ -62,10 +62,10 @@ async function asPdf({ fullFilePath = "", filename = "", options = {} }) {
|
||||
token_count_estimate: tokenizeString(content),
|
||||
};
|
||||
|
||||
const document = writeToServerDocuments(
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
`${slugify(filename)}-${data.id}`
|
||||
);
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
return { success: true, reason: null, documents: [document] };
|
||||
|
||||
@ -41,10 +41,10 @@ async function asTxt({ fullFilePath = "", filename = "" }) {
|
||||
token_count_estimate: tokenizeString(content),
|
||||
};
|
||||
|
||||
const document = writeToServerDocuments(
|
||||
const document = writeToServerDocuments({
|
||||
data,
|
||||
`${slugify(filename)}-${data.id}`
|
||||
);
|
||||
filename: `${slugify(filename)}-${data.id}`,
|
||||
});
|
||||
trashFile(fullFilePath);
|
||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||
return { success: true, reason: null, documents: [document] };
|
||||
|
||||
@ -64,11 +64,11 @@ async function asXlsx({ fullFilePath = "", filename = "" }) {
|
||||
token_count_estimate: tokenizeString(content),
|
||||
};
|
||||
|
||||
const document = writeToServerDocuments(
|
||||
sheetData,
|
||||
`sheet-${slugify(name)}`,
|
||||
outFolderPath
|
||||
);
|
||||
const document = writeToServerDocuments({
|
||||
data: sheetData,
|
||||
filename: `sheet-${slugify(name)}`,
|
||||
destinationOverride: outFolderPath,
|
||||
});
|
||||
documents.push(document);
|
||||
console.log(
|
||||
`[SUCCESS]: Sheet "${name}" converted & ready for embedding.`
|
||||
|
||||
@ -116,7 +116,11 @@ async function loadConfluence(
|
||||
const fileName = sanitizeFileName(
|
||||
`${slugify(doc.metadata.title)}-${data.id}`
|
||||
);
|
||||
writeToServerDocuments(data, fileName, outFolderPath);
|
||||
writeToServerDocuments({
|
||||
data,
|
||||
filename: fileName,
|
||||
destinationOverride: outFolderPath,
|
||||
});
|
||||
});
|
||||
|
||||
return {
|
||||
|
||||
@ -202,7 +202,11 @@ class DrupalWiki {
|
||||
console.log(
|
||||
`[DrupalWiki Loader]: Saving page '${page.title}' (${page.id}) to '${this.storagePath}/${fileName}'`
|
||||
);
|
||||
writeToServerDocuments(data, fileName, this.storagePath);
|
||||
writeToServerDocuments({
|
||||
data,
|
||||
filename: fileName,
|
||||
destinationOverride: this.storagePath,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -66,7 +66,11 @@ async function loadObsidianVault({ files = [] }) {
|
||||
const targetFileName = sanitizeFileName(
|
||||
`${slugify(file.name)}-${data.id}`
|
||||
);
|
||||
writeToServerDocuments(data, targetFileName, outFolderPath);
|
||||
writeToServerDocuments({
|
||||
data,
|
||||
filename: targetFileName,
|
||||
destinationOverride: outFolderPath,
|
||||
});
|
||||
results.push({ file: file.path, status: "success" });
|
||||
} catch (e) {
|
||||
console.error(`Failed to process ${file.path}:`, e);
|
||||
|
||||
@ -71,11 +71,11 @@ async function loadGithubRepo(args, response) {
|
||||
console.log(
|
||||
`[GitHub Loader]: Saving ${doc.metadata.source} to ${outFolder}`
|
||||
);
|
||||
writeToServerDocuments(
|
||||
writeToServerDocuments({
|
||||
data,
|
||||
`${slugify(doc.metadata.source)}-${data.id}`,
|
||||
outFolderPath
|
||||
);
|
||||
filename: `${slugify(doc.metadata.source)}-${data.id}`,
|
||||
destinationOverride: outFolderPath,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
|
||||
@ -98,11 +98,11 @@ async function loadGitlabRepo(args, response) {
|
||||
`[GitLab Loader]: Saving ${doc.metadata.source} to ${outFolder}`
|
||||
);
|
||||
|
||||
writeToServerDocuments(
|
||||
writeToServerDocuments({
|
||||
data,
|
||||
sanitizeFileName(`${slugify(doc.metadata.source)}-${data.id}`),
|
||||
outFolderPath
|
||||
);
|
||||
filename: sanitizeFileName(`${slugify(doc.metadata.source)}-${data.id}`),
|
||||
destinationOverride: outFolderPath,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
|
||||
@ -125,7 +125,11 @@ async function bulkScrapePages(links, outFolderPath) {
|
||||
token_count_estimate: tokenizeString(content),
|
||||
};
|
||||
|
||||
writeToServerDocuments(data, data.title, outFolderPath);
|
||||
writeToServerDocuments({
|
||||
data,
|
||||
filename: data.title,
|
||||
destinationOverride: outFolderPath,
|
||||
});
|
||||
scrapedData.push(data);
|
||||
|
||||
console.log(`Successfully scraped ${link}.`);
|
||||
|
||||
@ -116,11 +116,11 @@ async function loadYouTubeTranscript({ url }) {
|
||||
};
|
||||
|
||||
console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`);
|
||||
writeToServerDocuments(
|
||||
writeToServerDocuments({
|
||||
data,
|
||||
sanitizeFileName(`${slugify(metadata.title)}-${data.id}`),
|
||||
outFolderPath
|
||||
);
|
||||
filename: sanitizeFileName(`${slugify(metadata.title)}-${data.id}`),
|
||||
destinationOverride: outFolderPath,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
|
||||
@ -96,11 +96,21 @@ function createdDate(filepath) {
|
||||
}
|
||||
}
|
||||
|
||||
function writeToServerDocuments(
|
||||
/**
|
||||
* Writes a document to the server documents folder.
|
||||
* @param {Object} params - The parameters for the function.
|
||||
* @param {Object} params.data - The data to write to the file. Must look like a document object.
|
||||
* @param {string} params.filename - The name of the file to write to.
|
||||
* @param {string|null} params.destinationOverride - A forced destination to write to - will be honored if provided.
|
||||
* @returns {Object} - The data with the location added.
|
||||
*/
|
||||
function writeToServerDocuments({
|
||||
data = {},
|
||||
filename,
|
||||
destinationOverride = null
|
||||
) {
|
||||
filename = null,
|
||||
destinationOverride = null,
|
||||
}) {
|
||||
if (!filename) throw new Error("Filename is required!");
|
||||
|
||||
let destination = null;
|
||||
if (destinationOverride) destination = path.resolve(destinationOverride);
|
||||
else destination = path.resolve(documentsFolder, "custom-documents");
|
||||
|
||||
Loading…
Reference in New Issue
Block a user