3416 file picker load times (#3833)
* wip * implment conditional streaming * no bench
This commit is contained in:
parent
9371bcc71e
commit
3bbceb82aa
@ -26,6 +26,7 @@ async function fileData(filePath = null) {
|
|||||||
|
|
||||||
async function viewLocalFiles() {
|
async function viewLocalFiles() {
|
||||||
if (!fs.existsSync(documentsPath)) fs.mkdirSync(documentsPath);
|
if (!fs.existsSync(documentsPath)) fs.mkdirSync(documentsPath);
|
||||||
|
const filePromises = [];
|
||||||
const liveSyncAvailable = await DocumentSyncQueue.enabled();
|
const liveSyncAvailable = await DocumentSyncQueue.enabled();
|
||||||
const directory = {
|
const directory = {
|
||||||
name: "documents",
|
name: "documents",
|
||||||
@ -43,28 +44,26 @@ async function viewLocalFiles() {
|
|||||||
type: "folder",
|
type: "folder",
|
||||||
items: [],
|
items: [],
|
||||||
};
|
};
|
||||||
|
|
||||||
const subfiles = fs.readdirSync(folderPath);
|
const subfiles = fs.readdirSync(folderPath);
|
||||||
const filenames = {};
|
const filenames = {};
|
||||||
|
|
||||||
for (const subfile of subfiles) {
|
for (let i = 0; i < subfiles.length; i++) {
|
||||||
if (path.extname(subfile) !== ".json") continue;
|
const subfile = subfiles[i];
|
||||||
const filePath = path.join(folderPath, subfile);
|
|
||||||
const rawData = fs.readFileSync(filePath, "utf8");
|
|
||||||
const cachefilename = `${file}/${subfile}`;
|
const cachefilename = `${file}/${subfile}`;
|
||||||
const { pageContent, ...metadata } = JSON.parse(rawData);
|
if (path.extname(subfile) !== ".json") continue;
|
||||||
subdocs.items.push({
|
filePromises.push(
|
||||||
name: subfile,
|
fileToPickerData({
|
||||||
type: "file",
|
pathToFile: path.join(folderPath, subfile),
|
||||||
...metadata,
|
liveSyncAvailable,
|
||||||
cached: await cachedVectorInformation(cachefilename, true),
|
})
|
||||||
canWatch: liveSyncAvailable
|
);
|
||||||
? DocumentSyncQueue.canWatch(metadata)
|
|
||||||
: false,
|
|
||||||
// pinnedWorkspaces: [], // This is the list of workspaceIds that have pinned this document
|
|
||||||
// watched: false, // boolean to indicate if this document is watched in ANY workspace
|
|
||||||
});
|
|
||||||
filenames[cachefilename] = subfile;
|
filenames[cachefilename] = subfile;
|
||||||
}
|
}
|
||||||
|
const results = await Promise.all(filePromises).then((results) =>
|
||||||
|
results.filter((i) => !!i)
|
||||||
|
); // Filter out any null results
|
||||||
|
subdocs.items.push(...results);
|
||||||
|
|
||||||
// Grab the pinned workspaces and watched documents for this folder's documents
|
// Grab the pinned workspaces and watched documents for this folder's documents
|
||||||
// at the time of the query so we don't have to re-query the database for each file
|
// at the time of the query so we don't have to re-query the database for each file
|
||||||
@ -335,6 +334,100 @@ function purgeEntireVectorCache() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* File size threshold for files that are too large to be read into memory (MB)
|
||||||
|
*
|
||||||
|
* If the file is larger than this, we will stream it and parse it in chunks
|
||||||
|
* This is to prevent us from using too much memory when parsing large files
|
||||||
|
* or loading the files in the file picker.
|
||||||
|
* @TODO - When lazy loading for folders is implemented, we should increase this threshold (512MB)
|
||||||
|
* since it will always be faster to readSync than to stream the file and parse it in chunks.
|
||||||
|
*/
|
||||||
|
const FILE_READ_SIZE_THRESHOLD = 150 * (1024 * 1024);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a file to picker data
|
||||||
|
* @param {string} pathToFile - The path to the file to convert
|
||||||
|
* @param {boolean} liveSyncAvailable - Whether live sync is available
|
||||||
|
* @returns {Promise<{name: string, type: string, [string]: any, cached: boolean, canWatch: boolean}>} - The picker data
|
||||||
|
*/
|
||||||
|
async function fileToPickerData({ pathToFile, liveSyncAvailable = false }) {
|
||||||
|
let metadata = {};
|
||||||
|
const filename = path.basename(pathToFile);
|
||||||
|
const fileStats = fs.statSync(pathToFile);
|
||||||
|
const cachedStatus = await cachedVectorInformation(pathToFile, true);
|
||||||
|
const canWatchStatus = liveSyncAvailable
|
||||||
|
? DocumentSyncQueue.canWatch(metadata)
|
||||||
|
: false;
|
||||||
|
|
||||||
|
if (fileStats.size < FILE_READ_SIZE_THRESHOLD) {
|
||||||
|
const rawData = fs.readFileSync(pathToFile, "utf8");
|
||||||
|
try {
|
||||||
|
metadata = JSON.parse(rawData);
|
||||||
|
// Remove the pageContent field from the metadata - it is large and not needed for the picker
|
||||||
|
delete metadata.pageContent;
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Error parsing file", err);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: filename,
|
||||||
|
type: "file",
|
||||||
|
...metadata,
|
||||||
|
cached: cachedStatus,
|
||||||
|
canWatch: canWatchStatus,
|
||||||
|
// pinnedWorkspaces: [], // This is the list of workspaceIds that have pinned this document
|
||||||
|
// watched: false, // boolean to indicate if this document is watched in ANY workspace
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`Stream-parsing ${path.basename(pathToFile)} because it exceeds the ${FILE_READ_SIZE_THRESHOLD} byte limit.`
|
||||||
|
);
|
||||||
|
const stream = fs.createReadStream(pathToFile, { encoding: "utf8" });
|
||||||
|
try {
|
||||||
|
let fileContent = "";
|
||||||
|
metadata = await new Promise((resolve, reject) => {
|
||||||
|
stream
|
||||||
|
.on("data", (chunk) => {
|
||||||
|
fileContent += chunk;
|
||||||
|
})
|
||||||
|
.on("end", () => {
|
||||||
|
metadata = JSON.parse(fileContent);
|
||||||
|
// Remove the pageContent field from the metadata - it is large and not needed for the picker
|
||||||
|
delete metadata.pageContent;
|
||||||
|
resolve(metadata);
|
||||||
|
})
|
||||||
|
.on("error", (err) => {
|
||||||
|
console.error("Error parsing file", err);
|
||||||
|
reject(null);
|
||||||
|
});
|
||||||
|
}).catch((err) => {
|
||||||
|
console.error("Error parsing file", err);
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Error parsing file", err);
|
||||||
|
metadata = null;
|
||||||
|
} finally {
|
||||||
|
stream.destroy();
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the metadata is empty or something went wrong, return null
|
||||||
|
if (!metadata || !Object.keys(metadata)?.length) {
|
||||||
|
console.log(`Stream-parsing failed for ${path.basename(pathToFile)}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: filename,
|
||||||
|
type: "file",
|
||||||
|
...metadata,
|
||||||
|
cached: cachedStatus,
|
||||||
|
canWatch: canWatchStatus,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
findDocumentInDocuments,
|
findDocumentInDocuments,
|
||||||
cachedVectorInformation,
|
cachedVectorInformation,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user