Add fallback URL for reranking model

2025-01-07 15:09:54 -08:00 · 2025-01-07 15:09:54 -08:00 · 6134c15ca4
commit 6134c15ca4
parent 55e0949e1f
1 changed files with 113 additions and 25 deletions
--- a/server/utils/EmbeddingRerankers/native/index.js
+++ b/server/utils/EmbeddingRerankers/native/index.js
@ -6,6 +6,11 @@ class NativeEmbeddingReranker {
  static #tokenizer = null;
  static #transformers = null;
  // This is a folder that Mintplex Labs hosts for those who cannot capture the HF model download
  // endpoint for various reasons. This endpoint is not guaranteed to be active or maintained
  // and may go offline at any time at Mintplex Labs's discretion.
  #fallbackHost = "https://cdn.useanything.com/support/models/";
  constructor() {
    // An alternative model to the mixedbread-ai/mxbai-rerank-xsmall-v1 model (speed on CPU is much slower for this model @ 18docs = 6s)
    // Model Card: https://huggingface.co/Xenova/ms-marco-MiniLM-L-6-v2 (speed on CPU is much faster @ 18docs = 1.6s)
@ -18,6 +23,10 @@ class NativeEmbeddingReranker {
    this.modelPath = path.resolve(this.cacheDir, ...this.model.split("/"));
    // Make directory when it does not exist in existing installations
    if (!fs.existsSync(this.cacheDir)) fs.mkdirSync(this.cacheDir);
    this.modelDownloaded = fs.existsSync(
      path.resolve(this.cacheDir, this.model)
    );
    this.log("Initialized");
  }
@ -25,6 +34,20 @@ class NativeEmbeddingReranker {
    console.log(`\x1b[36m[NativeEmbeddingReranker]\x1b[0m ${text}`, ...args);
  }
  /**
   * This function will return the host of the current reranker suite.
   * If the reranker suite is not initialized, it will return the default HF host.
   * @returns {string} The host of the current reranker suite.
   */
  get host() {
    if (!NativeEmbeddingReranker.#transformers) return "https://huggingface.co";
    try {
      return new URL(NativeEmbeddingReranker.#transformers.env.remoteHost).host;
    } catch (e) {
      return this.#fallbackHost;
    }
  }
  /**
   * This function will preload the reranker suite and tokenizer.
   * This is useful for reducing the latency of the first rerank call and pre-downloading the models and such
@ -54,12 +77,17 @@ class NativeEmbeddingReranker {
    }
    await import("@xenova/transformers").then(
-      async ({ AutoModelForSequenceClassification, AutoTokenizer }) => {
+      async ({ AutoModelForSequenceClassification, AutoTokenizer, env }) => {
        this.log(`Loading reranker suite...`);
        NativeEmbeddingReranker.#transformers = {
          AutoModelForSequenceClassification,
          AutoTokenizer,
          env,
        };
        // Attempt to load the model and tokenizer in this order:
        // 1. From local file system cache
        // 2. Download and cache from remote host (hf.co)
        // 3. Download and cache from fallback host (cdn.useanything.com)
        await this.#getPreTrainedModel();
        await this.#getPreTrainedTokenizer();
      }
@ -67,46 +95,106 @@ class NativeEmbeddingReranker {
    return;
  }
  /**
   * This function will load the model from the local file system cache, or download and cache it from the remote host.
   * If the model is not found in the local file system cache, it will download and cache it from the remote host.
   * If the model is not found in the remote host, it will download and cache it from the fallback host.
   * @returns {Promise<any>} The loaded model.
   */
  async #getPreTrainedModel() {
    if (NativeEmbeddingReranker.#model) {
      this.log(`Loading model from singleton...`);
      return NativeEmbeddingReranker.#model;
    }
-    const model =
+    try {
-      await NativeEmbeddingReranker.#transformers.AutoModelForSequenceClassification.from_pretrained(
+      const model =
-        this.model,
+        await NativeEmbeddingReranker.#transformers.AutoModelForSequenceClassification.from_pretrained(
-        {
+          this.model,
-          progress_callback: (p) =>
+          {
-            p.status === "progress" &&
+            progress_callback: (p) => {
-            this.log(`Loading model ${this.model}... ${p?.progress}%`),
+              if (!this.modelDownloaded && p.status === "progress") {
-          cache_dir: this.cacheDir,
+                this.log(
-        }
+                  `[${this.host}] Loading model ${this.model}... ${p?.progress}%`
                );
              }
            },
            cache_dir: this.cacheDir,
          }
        );
      this.log(`Loaded model ${this.model}`);
      NativeEmbeddingReranker.#model = model;
      return model;
    } catch (e) {
      this.log(
        `Failed to load model ${this.model} from ${this.host}.`,
        e.message,
        e.stack
      );
-    this.log(`Loaded model ${this.model}`);
+      if (
-    NativeEmbeddingReranker.#model = model;
+        NativeEmbeddingReranker.#transformers.env.remoteHost ===
-    return model;
+        this.#fallbackHost
      ) {
        this.log(`Failed to load model ${this.model} from fallback host.`);
        throw e;
      }
      this.log(`Falling back to fallback host. ${this.#fallbackHost}`);
      NativeEmbeddingReranker.#transformers.env.remoteHost = this.#fallbackHost;
      NativeEmbeddingReranker.#transformers.env.remotePathTemplate = "{model}/";
      return await this.#getPreTrainedModel();
    }
  }
  /**
   * This function will load the tokenizer from the local file system cache, or download and cache it from the remote host.
   * If the tokenizer is not found in the local file system cache, it will download and cache it from the remote host.
   * If the tokenizer is not found in the remote host, it will download and cache it from the fallback host.
   * @returns {Promise<any>} The loaded tokenizer.
   */
  async #getPreTrainedTokenizer() {
    if (NativeEmbeddingReranker.#tokenizer) {
      this.log(`Loading tokenizer from singleton...`);
      return NativeEmbeddingReranker.#tokenizer;
    }
-    const tokenizer =
+    try {
-      await NativeEmbeddingReranker.#transformers.AutoTokenizer.from_pretrained(
+      const tokenizer =
-        this.model,
+        await NativeEmbeddingReranker.#transformers.AutoTokenizer.from_pretrained(
-        {
+          this.model,
-          progress_callback: (p) =>
+          {
-            p.status === "progress" &&
+            progress_callback: (p) => {
-            this.log(`Loading tokenizer ${this.model}... ${p?.progress}%`),
+              if (!this.modelDownloaded && p.status === "progress") {
-          cache_dir: this.cacheDir,
+                this.log(
-        }
+                  `[${this.host}] Loading tokenizer ${this.model}... ${p?.progress}%`
                );
              }
            },
            cache_dir: this.cacheDir,
          }
        );
      this.log(`Loaded tokenizer ${this.model}`);
      NativeEmbeddingReranker.#tokenizer = tokenizer;
      return tokenizer;
    } catch (e) {
      this.log(
        `Failed to load tokenizer ${this.model} from ${this.host}.`,
        e.message,
        e.stack
      );
-    this.log(`Loaded tokenizer ${this.model}`);
+      if (
-    NativeEmbeddingReranker.#tokenizer = tokenizer;
+        NativeEmbeddingReranker.#transformers.env.remoteHost ===
-    return tokenizer;
+        this.#fallbackHost
      ) {
        this.log(`Failed to load tokenizer ${this.model} from fallback host.`);
        throw e;
      }
      this.log(`Falling back to fallback host. ${this.#fallbackHost}`);
      NativeEmbeddingReranker.#transformers.env.remoteHost = this.#fallbackHost;
      NativeEmbeddingReranker.#transformers.env.remotePathTemplate = "{model}/";
      return await this.#getPreTrainedTokenizer();
    }
  }
  /**