improve native embedder handling of large files (#584)
* improve native embedder handling of large files * perf changes * ignore storage tmp
This commit is contained in:
parent
b4ab0a5e8c
commit
4f6d93159f
@ -64,8 +64,14 @@ server {
|
|||||||
listen 80;
|
listen 80;
|
||||||
server_name [insert FQDN here];
|
server_name [insert FQDN here];
|
||||||
location / {
|
location / {
|
||||||
|
# Prevent timeouts on long-running requests.
|
||||||
|
proxy_connect_timeout 605;
|
||||||
|
proxy_send_timeout 605;
|
||||||
|
proxy_read_timeout 605;
|
||||||
|
send_timeout 605;
|
||||||
|
keepalive_timeout 605;
|
||||||
proxy_pass http://0.0.0.0:3001;
|
proxy_pass http://0.0.0.0:3001;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
3. Enter ':wq' to save the changes to the anything config file
|
3. Enter ':wq' to save the changes to the anything config file
|
||||||
|
|
||||||
|
|||||||
1
server/.gitignore
vendored
1
server/.gitignore
vendored
@ -3,6 +3,7 @@
|
|||||||
storage/assets/*
|
storage/assets/*
|
||||||
!storage/assets/anything-llm.png
|
!storage/assets/anything-llm.png
|
||||||
storage/documents/*
|
storage/documents/*
|
||||||
|
storage/tmp/*
|
||||||
storage/vector-cache/*.json
|
storage/vector-cache/*.json
|
||||||
storage/exports
|
storage/exports
|
||||||
storage/imports
|
storage/imports
|
||||||
|
|||||||
@ -27,7 +27,7 @@
|
|||||||
"@pinecone-database/pinecone": "^0.1.6",
|
"@pinecone-database/pinecone": "^0.1.6",
|
||||||
"@prisma/client": "5.3.0",
|
"@prisma/client": "5.3.0",
|
||||||
"@qdrant/js-client-rest": "^1.4.0",
|
"@qdrant/js-client-rest": "^1.4.0",
|
||||||
"@xenova/transformers": "^2.10.0",
|
"@xenova/transformers": "^2.14.0",
|
||||||
"@zilliz/milvus2-sdk-node": "^2.3.5",
|
"@zilliz/milvus2-sdk-node": "^2.3.5",
|
||||||
"archiver": "^5.3.1",
|
"archiver": "^5.3.1",
|
||||||
"bcrypt": "^5.1.0",
|
"bcrypt": "^5.1.0",
|
||||||
@ -78,4 +78,4 @@
|
|||||||
"nodemon": "^2.0.22",
|
"nodemon": "^2.0.22",
|
||||||
"prettier": "^3.0.3"
|
"prettier": "^3.0.3"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1,6 +1,7 @@
|
|||||||
const path = require("path");
|
const path = require("path");
|
||||||
const fs = require("fs");
|
const fs = require("fs");
|
||||||
const { toChunks } = require("../../helpers");
|
const { toChunks } = require("../../helpers");
|
||||||
|
const { v4 } = require("uuid");
|
||||||
|
|
||||||
class NativeEmbedder {
|
class NativeEmbedder {
|
||||||
constructor() {
|
constructor() {
|
||||||
@ -15,13 +16,30 @@ class NativeEmbedder {
|
|||||||
this.dimensions = 384;
|
this.dimensions = 384;
|
||||||
|
|
||||||
// Limit of how many strings we can process in a single pass to stay with resource or network limits
|
// Limit of how many strings we can process in a single pass to stay with resource or network limits
|
||||||
this.maxConcurrentChunks = 50;
|
this.maxConcurrentChunks = 25;
|
||||||
this.embeddingMaxChunkLength = 1_000;
|
this.embeddingMaxChunkLength = 1_000;
|
||||||
|
|
||||||
// Make directory when it does not exist in existing installations
|
// Make directory when it does not exist in existing installations
|
||||||
if (!fs.existsSync(this.cacheDir)) fs.mkdirSync(this.cacheDir);
|
if (!fs.existsSync(this.cacheDir)) fs.mkdirSync(this.cacheDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#tempfilePath() {
|
||||||
|
const filename = `${v4()}.tmp`;
|
||||||
|
const tmpPath = process.env.STORAGE_DIR
|
||||||
|
? path.resolve(process.env.STORAGE_DIR, "tmp")
|
||||||
|
: path.resolve(__dirname, `../../../storage/tmp`);
|
||||||
|
if (!fs.existsSync(tmpPath)) fs.mkdirSync(tmpPath, { recursive: true });
|
||||||
|
return path.resolve(tmpPath, filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
async #writeToTempfile(filePath, data) {
|
||||||
|
try {
|
||||||
|
await fs.promises.appendFile(filePath, data, { encoding: "utf8" });
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`Error writing to tempfile: ${e}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async embedderClient() {
|
async embedderClient() {
|
||||||
if (!fs.existsSync(this.modelPath)) {
|
if (!fs.existsSync(this.modelPath)) {
|
||||||
console.log(
|
console.log(
|
||||||
@ -62,18 +80,51 @@ class NativeEmbedder {
|
|||||||
return result?.[0] || [];
|
return result?.[0] || [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If you are thinking you want to edit this function - you probably don't.
|
||||||
|
// This process was benchmarked heavily on a t3.small (2GB RAM 1vCPU)
|
||||||
|
// and without careful memory management for the V8 garbage collector
|
||||||
|
// this function will likely result in an OOM on any resource-constrained deployment.
|
||||||
|
// To help manage very large documents we run a concurrent write-log each iteration
|
||||||
|
// to keep the embedding result out of memory. The `maxConcurrentChunk` is set to 25,
|
||||||
|
// as 50 seems to overflow no matter what. Given the above, memory use hovers around ~30%
|
||||||
|
// during a very large document (>100K words) but can spike up to 70% before gc.
|
||||||
|
// This seems repeatable for all document sizes.
|
||||||
|
// While this does take a while, it is zero set up and is 100% free and on-instance.
|
||||||
async embedChunks(textChunks = []) {
|
async embedChunks(textChunks = []) {
|
||||||
const Embedder = await this.embedderClient();
|
const tmpFilePath = this.#tempfilePath();
|
||||||
const embeddingResults = [];
|
const chunks = toChunks(textChunks, this.maxConcurrentChunks);
|
||||||
for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
|
const chunkLen = chunks.length;
|
||||||
const output = await Embedder(chunk, {
|
|
||||||
|
for (let [idx, chunk] of chunks.entries()) {
|
||||||
|
if (idx === 0) await this.#writeToTempfile(tmpFilePath, "[");
|
||||||
|
let data;
|
||||||
|
let pipeline = await this.embedderClient();
|
||||||
|
let output = await pipeline(chunk, {
|
||||||
pooling: "mean",
|
pooling: "mean",
|
||||||
normalize: true,
|
normalize: true,
|
||||||
});
|
});
|
||||||
if (output.length === 0) continue;
|
|
||||||
embeddingResults.push(output.tolist());
|
if (output.length === 0) {
|
||||||
|
pipeline = null;
|
||||||
|
output = null;
|
||||||
|
data = null;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
data = JSON.stringify(output.tolist());
|
||||||
|
await this.#writeToTempfile(tmpFilePath, data);
|
||||||
|
console.log(`\x1b[34m[Embedded Chunk ${idx + 1} of ${chunkLen}]\x1b[0m`);
|
||||||
|
if (chunkLen - 1 !== idx) await this.#writeToTempfile(tmpFilePath, ",");
|
||||||
|
if (chunkLen - 1 === idx) await this.#writeToTempfile(tmpFilePath, "]");
|
||||||
|
pipeline = null;
|
||||||
|
output = null;
|
||||||
|
data = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const embeddingResults = JSON.parse(
|
||||||
|
fs.readFileSync(tmpFilePath, { encoding: "utf-8" })
|
||||||
|
);
|
||||||
|
fs.rmSync(tmpFilePath, { force: true });
|
||||||
return embeddingResults.length > 0 ? embeddingResults.flat() : null;
|
return embeddingResults.length > 0 ? embeddingResults.flat() : null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -269,6 +269,11 @@
|
|||||||
dependencies:
|
dependencies:
|
||||||
"@hapi/hoek" "^9.0.0"
|
"@hapi/hoek" "^9.0.0"
|
||||||
|
|
||||||
|
"@huggingface/jinja@^0.1.0":
|
||||||
|
version "0.1.2"
|
||||||
|
resolved "https://registry.yarnpkg.com/@huggingface/jinja/-/jinja-0.1.2.tgz#073fa0a68ef481a1806b0186bbafd8013e586fbe"
|
||||||
|
integrity sha512-x5mpbfJt1nKmVep5WNP5VjNsjWApWNj8pPYI+uYMkBWH9bWUJmQmHt2lbf0VCoQd54Oq3XuFEh/UyoVh7rPxmg==
|
||||||
|
|
||||||
"@humanwhocodes/config-array@^0.11.13":
|
"@humanwhocodes/config-array@^0.11.13":
|
||||||
version "0.11.13"
|
version "0.11.13"
|
||||||
resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.11.13.tgz#075dc9684f40a531d9b26b0822153c1e832ee297"
|
resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.11.13.tgz#075dc9684f40a531d9b26b0822153c1e832ee297"
|
||||||
@ -851,11 +856,12 @@
|
|||||||
resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406"
|
resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406"
|
||||||
integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==
|
integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==
|
||||||
|
|
||||||
"@xenova/transformers@^2.10.0":
|
"@xenova/transformers@^2.14.0":
|
||||||
version "2.10.0"
|
version "2.14.0"
|
||||||
resolved "https://registry.yarnpkg.com/@xenova/transformers/-/transformers-2.10.0.tgz#ae97d724a3addf78de7314336a9f7b28ed96a140"
|
resolved "https://registry.yarnpkg.com/@xenova/transformers/-/transformers-2.14.0.tgz#6fe128957e64377ca4fca910e77f6092f3f3512a"
|
||||||
integrity sha512-Al9WKiOsimAC3mU9Ef434GkHF0izmeAM7mMMx5npdWsWLAYL8fmJXCrULj6uCfjomMQ7jyN9rDtKpp570hffiw==
|
integrity sha512-rQ3O7SW5EM64b6XFZGx3XQ2cfiroefxUwU9ShfSpEZyhd082GvwNJJKndxgaukse1hZP1JUDoT0DfjDiq4IZiw==
|
||||||
dependencies:
|
dependencies:
|
||||||
|
"@huggingface/jinja" "^0.1.0"
|
||||||
onnxruntime-web "1.14.0"
|
onnxruntime-web "1.14.0"
|
||||||
sharp "^0.32.0"
|
sharp "^0.32.0"
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user