Update writeToServerDocuments to take config object (#4213)
This commit is contained in:
parent
7692775942
commit
70a07b743b
@ -62,10 +62,10 @@ async function scrapeGenericUrl({
|
|||||||
token_count_estimate: tokenizeString(content),
|
token_count_estimate: tokenizeString(content),
|
||||||
};
|
};
|
||||||
|
|
||||||
const document = writeToServerDocuments(
|
const document = writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
`url-${slugify(filename)}-${data.id}`
|
filename: `url-${slugify(filename)}-${data.id}`,
|
||||||
);
|
});
|
||||||
console.log(`[SUCCESS]: URL ${link} converted & ready for embedding.\n`);
|
console.log(`[SUCCESS]: URL ${link} converted & ready for embedding.\n`);
|
||||||
return { success: true, reason: null, documents: [document] };
|
return { success: true, reason: null, documents: [document] };
|
||||||
}
|
}
|
||||||
|
|||||||
@ -58,10 +58,10 @@ async function processRawText(textContent, metadata) {
|
|||||||
token_count_estimate: tokenizeString(textContent),
|
token_count_estimate: tokenizeString(textContent),
|
||||||
};
|
};
|
||||||
|
|
||||||
const document = writeToServerDocuments(
|
const document = writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
`raw-${stripAndSlug(metadata.title)}-${data.id}`
|
filename: `raw-${stripAndSlug(metadata.title)}-${data.id}`,
|
||||||
);
|
});
|
||||||
console.log(`[SUCCESS]: Raw text and metadata saved & ready for embedding.\n`);
|
console.log(`[SUCCESS]: Raw text and metadata saved & ready for embedding.\n`);
|
||||||
return { success: true, reason: null, documents: [document] };
|
return { success: true, reason: null, documents: [document] };
|
||||||
}
|
}
|
||||||
|
|||||||
@ -59,10 +59,10 @@ async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
|
|||||||
token_count_estimate: tokenizeString(content),
|
token_count_estimate: tokenizeString(content),
|
||||||
};
|
};
|
||||||
|
|
||||||
const document = writeToServerDocuments(
|
const document = writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
`${slugify(filename)}-${data.id}`
|
filename: `${slugify(filename)}-${data.id}`,
|
||||||
);
|
});
|
||||||
trashFile(fullFilePath);
|
trashFile(fullFilePath);
|
||||||
console.log(
|
console.log(
|
||||||
`[SUCCESS]: ${filename} transcribed, converted & ready for embedding.\n`
|
`[SUCCESS]: ${filename} transcribed, converted & ready for embedding.\n`
|
||||||
|
|||||||
@ -45,10 +45,10 @@ async function asDocX({ fullFilePath = "", filename = "" }) {
|
|||||||
token_count_estimate: tokenizeString(content),
|
token_count_estimate: tokenizeString(content),
|
||||||
};
|
};
|
||||||
|
|
||||||
const document = writeToServerDocuments(
|
const document = writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
`${slugify(filename)}-${data.id}`
|
filename: `${slugify(filename)}-${data.id}`,
|
||||||
);
|
});
|
||||||
trashFile(fullFilePath);
|
trashFile(fullFilePath);
|
||||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||||
return { success: true, reason: null, documents: [document] };
|
return { success: true, reason: null, documents: [document] };
|
||||||
|
|||||||
@ -43,10 +43,10 @@ async function asEPub({ fullFilePath = "", filename = "" }) {
|
|||||||
token_count_estimate: tokenizeString(content),
|
token_count_estimate: tokenizeString(content),
|
||||||
};
|
};
|
||||||
|
|
||||||
const document = writeToServerDocuments(
|
const document = writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
`${slugify(filename)}-${data.id}`
|
filename: `${slugify(filename)}-${data.id}`,
|
||||||
);
|
});
|
||||||
trashFile(fullFilePath);
|
trashFile(fullFilePath);
|
||||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||||
return { success: true, reason: null, documents: [document] };
|
return { success: true, reason: null, documents: [document] };
|
||||||
|
|||||||
@ -38,10 +38,10 @@ async function asImage({ fullFilePath = "", filename = "", options = {} }) {
|
|||||||
token_count_estimate: tokenizeString(content),
|
token_count_estimate: tokenizeString(content),
|
||||||
};
|
};
|
||||||
|
|
||||||
const document = writeToServerDocuments(
|
const document = writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
`${slugify(filename)}-${data.id}`
|
filename: `${slugify(filename)}-${data.id}`,
|
||||||
);
|
});
|
||||||
trashFile(fullFilePath);
|
trashFile(fullFilePath);
|
||||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||||
return { success: true, reason: null, documents: [document] };
|
return { success: true, reason: null, documents: [document] };
|
||||||
|
|||||||
@ -57,10 +57,10 @@ async function asMbox({ fullFilePath = "", filename = "" }) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
item++;
|
item++;
|
||||||
const document = writeToServerDocuments(
|
const document = writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
`${slugify(filename)}-${data.id}-msg-${item}`
|
filename: `${slugify(filename)}-${data.id}-msg-${item}`,
|
||||||
);
|
});
|
||||||
documents.push(document);
|
documents.push(document);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -41,10 +41,10 @@ async function asOfficeMime({ fullFilePath = "", filename = "" }) {
|
|||||||
token_count_estimate: tokenizeString(content),
|
token_count_estimate: tokenizeString(content),
|
||||||
};
|
};
|
||||||
|
|
||||||
const document = writeToServerDocuments(
|
const document = writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
`${slugify(filename)}-${data.id}`
|
filename: `${slugify(filename)}-${data.id}`,
|
||||||
);
|
});
|
||||||
trashFile(fullFilePath);
|
trashFile(fullFilePath);
|
||||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||||
return { success: true, reason: null, documents: [document] };
|
return { success: true, reason: null, documents: [document] };
|
||||||
|
|||||||
@ -62,10 +62,10 @@ async function asPdf({ fullFilePath = "", filename = "", options = {} }) {
|
|||||||
token_count_estimate: tokenizeString(content),
|
token_count_estimate: tokenizeString(content),
|
||||||
};
|
};
|
||||||
|
|
||||||
const document = writeToServerDocuments(
|
const document = writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
`${slugify(filename)}-${data.id}`
|
filename: `${slugify(filename)}-${data.id}`,
|
||||||
);
|
});
|
||||||
trashFile(fullFilePath);
|
trashFile(fullFilePath);
|
||||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||||
return { success: true, reason: null, documents: [document] };
|
return { success: true, reason: null, documents: [document] };
|
||||||
|
|||||||
@ -41,10 +41,10 @@ async function asTxt({ fullFilePath = "", filename = "" }) {
|
|||||||
token_count_estimate: tokenizeString(content),
|
token_count_estimate: tokenizeString(content),
|
||||||
};
|
};
|
||||||
|
|
||||||
const document = writeToServerDocuments(
|
const document = writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
`${slugify(filename)}-${data.id}`
|
filename: `${slugify(filename)}-${data.id}`,
|
||||||
);
|
});
|
||||||
trashFile(fullFilePath);
|
trashFile(fullFilePath);
|
||||||
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
|
||||||
return { success: true, reason: null, documents: [document] };
|
return { success: true, reason: null, documents: [document] };
|
||||||
|
|||||||
@ -64,11 +64,11 @@ async function asXlsx({ fullFilePath = "", filename = "" }) {
|
|||||||
token_count_estimate: tokenizeString(content),
|
token_count_estimate: tokenizeString(content),
|
||||||
};
|
};
|
||||||
|
|
||||||
const document = writeToServerDocuments(
|
const document = writeToServerDocuments({
|
||||||
sheetData,
|
data: sheetData,
|
||||||
`sheet-${slugify(name)}`,
|
filename: `sheet-${slugify(name)}`,
|
||||||
outFolderPath
|
destinationOverride: outFolderPath,
|
||||||
);
|
});
|
||||||
documents.push(document);
|
documents.push(document);
|
||||||
console.log(
|
console.log(
|
||||||
`[SUCCESS]: Sheet "${name}" converted & ready for embedding.`
|
`[SUCCESS]: Sheet "${name}" converted & ready for embedding.`
|
||||||
|
|||||||
@ -116,7 +116,11 @@ async function loadConfluence(
|
|||||||
const fileName = sanitizeFileName(
|
const fileName = sanitizeFileName(
|
||||||
`${slugify(doc.metadata.title)}-${data.id}`
|
`${slugify(doc.metadata.title)}-${data.id}`
|
||||||
);
|
);
|
||||||
writeToServerDocuments(data, fileName, outFolderPath);
|
writeToServerDocuments({
|
||||||
|
data,
|
||||||
|
filename: fileName,
|
||||||
|
destinationOverride: outFolderPath,
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
@ -202,7 +202,11 @@ class DrupalWiki {
|
|||||||
console.log(
|
console.log(
|
||||||
`[DrupalWiki Loader]: Saving page '${page.title}' (${page.id}) to '${this.storagePath}/${fileName}'`
|
`[DrupalWiki Loader]: Saving page '${page.title}' (${page.id}) to '${this.storagePath}/${fileName}'`
|
||||||
);
|
);
|
||||||
writeToServerDocuments(data, fileName, this.storagePath);
|
writeToServerDocuments({
|
||||||
|
data,
|
||||||
|
filename: fileName,
|
||||||
|
destinationOverride: this.storagePath,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -66,7 +66,11 @@ async function loadObsidianVault({ files = [] }) {
|
|||||||
const targetFileName = sanitizeFileName(
|
const targetFileName = sanitizeFileName(
|
||||||
`${slugify(file.name)}-${data.id}`
|
`${slugify(file.name)}-${data.id}`
|
||||||
);
|
);
|
||||||
writeToServerDocuments(data, targetFileName, outFolderPath);
|
writeToServerDocuments({
|
||||||
|
data,
|
||||||
|
filename: targetFileName,
|
||||||
|
destinationOverride: outFolderPath,
|
||||||
|
});
|
||||||
results.push({ file: file.path, status: "success" });
|
results.push({ file: file.path, status: "success" });
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error(`Failed to process ${file.path}:`, e);
|
console.error(`Failed to process ${file.path}:`, e);
|
||||||
|
|||||||
@ -71,11 +71,11 @@ async function loadGithubRepo(args, response) {
|
|||||||
console.log(
|
console.log(
|
||||||
`[GitHub Loader]: Saving ${doc.metadata.source} to ${outFolder}`
|
`[GitHub Loader]: Saving ${doc.metadata.source} to ${outFolder}`
|
||||||
);
|
);
|
||||||
writeToServerDocuments(
|
writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
`${slugify(doc.metadata.source)}-${data.id}`,
|
filename: `${slugify(doc.metadata.source)}-${data.id}`,
|
||||||
outFolderPath
|
destinationOverride: outFolderPath,
|
||||||
);
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
@ -98,11 +98,11 @@ async function loadGitlabRepo(args, response) {
|
|||||||
`[GitLab Loader]: Saving ${doc.metadata.source} to ${outFolder}`
|
`[GitLab Loader]: Saving ${doc.metadata.source} to ${outFolder}`
|
||||||
);
|
);
|
||||||
|
|
||||||
writeToServerDocuments(
|
writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
sanitizeFileName(`${slugify(doc.metadata.source)}-${data.id}`),
|
filename: sanitizeFileName(`${slugify(doc.metadata.source)}-${data.id}`),
|
||||||
outFolderPath
|
destinationOverride: outFolderPath,
|
||||||
);
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
@ -125,7 +125,11 @@ async function bulkScrapePages(links, outFolderPath) {
|
|||||||
token_count_estimate: tokenizeString(content),
|
token_count_estimate: tokenizeString(content),
|
||||||
};
|
};
|
||||||
|
|
||||||
writeToServerDocuments(data, data.title, outFolderPath);
|
writeToServerDocuments({
|
||||||
|
data,
|
||||||
|
filename: data.title,
|
||||||
|
destinationOverride: outFolderPath,
|
||||||
|
});
|
||||||
scrapedData.push(data);
|
scrapedData.push(data);
|
||||||
|
|
||||||
console.log(`Successfully scraped ${link}.`);
|
console.log(`Successfully scraped ${link}.`);
|
||||||
|
|||||||
@ -116,11 +116,11 @@ async function loadYouTubeTranscript({ url }) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`);
|
console.log(`[YouTube Loader]: Saving ${metadata.title} to ${outFolder}`);
|
||||||
writeToServerDocuments(
|
writeToServerDocuments({
|
||||||
data,
|
data,
|
||||||
sanitizeFileName(`${slugify(metadata.title)}-${data.id}`),
|
filename: sanitizeFileName(`${slugify(metadata.title)}-${data.id}`),
|
||||||
outFolderPath
|
destinationOverride: outFolderPath,
|
||||||
);
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
|
|||||||
@ -96,11 +96,21 @@ function createdDate(filepath) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function writeToServerDocuments(
|
/**
|
||||||
|
* Writes a document to the server documents folder.
|
||||||
|
* @param {Object} params - The parameters for the function.
|
||||||
|
* @param {Object} params.data - The data to write to the file. Must look like a document object.
|
||||||
|
* @param {string} params.filename - The name of the file to write to.
|
||||||
|
* @param {string|null} params.destinationOverride - A forced destination to write to - will be honored if provided.
|
||||||
|
* @returns {Object} - The data with the location added.
|
||||||
|
*/
|
||||||
|
function writeToServerDocuments({
|
||||||
data = {},
|
data = {},
|
||||||
filename,
|
filename = null,
|
||||||
destinationOverride = null
|
destinationOverride = null,
|
||||||
) {
|
}) {
|
||||||
|
if (!filename) throw new Error("Filename is required!");
|
||||||
|
|
||||||
let destination = null;
|
let destination = null;
|
||||||
if (destinationOverride) destination = path.resolve(destinationOverride);
|
if (destinationOverride) destination = path.resolve(destinationOverride);
|
||||||
else destination = path.resolve(documentsFolder, "custom-documents");
|
else destination = path.resolve(documentsFolder, "custom-documents");
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user