Sync models from remote for FireworksAI (#4475)

resolves #4474
2025-10-02 12:34:05 -07:00 · 2025-10-02 12:34:05 -07:00 · 8cdadd8cb3
commit 8cdadd8cb3
parent 0b18ac6577
8 changed files with 140 additions and 211 deletions
--- a/frontend/src/components/LLMSelection/FireworksAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/FireworksAiOptions/index.jsx
@ -2,6 +2,11 @@ import System from "@/models/system";
 import { useState, useEffect } from "react";

 export default function FireworksAiOptions({ settings }) {
+  const [inputValue, setInputValue] = useState(settings?.FireworksAiLLMApiKey);
+  const [fireworksAiApiKey, setFireworksAiApiKey] = useState(
+    settings?.FireworksAiLLMApiKey
+  );
+
  return (
    <div className="flex gap-[36px] mt-1.5">
      <div className="flex flex-col w-60">
@ -17,22 +22,27 @@ export default function FireworksAiOptions({ settings }) {
          required={true}
          autoComplete="off"
          spellCheck={false}
+          onChange={(e) => setInputValue(e.target.value)}
+          onBlur={() => setFireworksAiApiKey(inputValue)}
        />
      </div>
      {!settings?.credentialsOnly && (
-        <FireworksAiModelSelection settings={settings} />
+        <FireworksAiModelSelection
+          apiKey={fireworksAiApiKey}
+          settings={settings}
+        />
      )}
    </div>
  );
 }
-function FireworksAiModelSelection({ settings }) {
+function FireworksAiModelSelection({ apiKey, settings }) {
  const [groupedModels, setGroupedModels] = useState({});
  const [loading, setLoading] = useState(true);

  useEffect(() => {
    async function findCustomModels() {
      setLoading(true);
-      const { models } = await System.customModels("fireworksai");
+      const { models } = await System.customModels("fireworksai", apiKey);

      if (models?.length > 0) {
        const modelsByOrganization = models.reduce((acc, model) => {
@ -47,7 +57,7 @@ function FireworksAiModelSelection({ settings }) {
      setLoading(false);
    }
    findCustomModels();
-  }, []);
+  }, [apiKey]);

  if (loading || Object.keys(groupedModels).length === 0) {
    return (
--- a/server/storage/models/.gitignore
+++ b/server/storage/models/.gitignore
@ -11,4 +11,5 @@ tesseract
 ppio
 context-windows/*
 MintplexLabs
-cometapi
+cometapi
+fireworks
--- a/server/utils/AiProviders/fireworksAi/index.js
+++ b/server/utils/AiProviders/fireworksAi/index.js
@ -1,3 +1,6 @@
+const fs = require("fs");
+const path = require("path");
+const { safeJsonParse } = require("../../http");
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const {
  LLMPerformanceMonitor,
@ -6,13 +9,16 @@ const {
  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");

-function fireworksAiModels() {
-  const { MODELS } = require("./models.js");
-  return MODELS || {};
-}
+const cacheFolder = path.resolve(
+  process.env.STORAGE_DIR
+    ? path.resolve(process.env.STORAGE_DIR, "models", "fireworks")
+    : path.resolve(__dirname, `../../../storage/models/fireworks`)
+);

 class FireworksAiLLM {
  constructor(embedder = null, modelPreference = null) {
+    this.className = "FireworksAiLLM";
+
    if (!process.env.FIREWORKS_AI_LLM_API_KEY)
      throw new Error("No FireworksAI API key was set.");
    const { OpenAI: OpenAIApi } = require("openai");
@ -29,6 +35,51 @@ class FireworksAiLLM {

    this.embedder = !embedder ? new NativeEmbedder() : embedder;
    this.defaultTemp = 0.7;
+
+    if (!fs.existsSync(cacheFolder))
+      fs.mkdirSync(cacheFolder, { recursive: true });
+    this.cacheModelPath = path.resolve(cacheFolder, "models.json");
+    this.cacheAtPath = path.resolve(cacheFolder, ".cached_at");
+  }
+
+  log(text, ...args) {
+    console.log(`\x1b[36m[${this.className}]\x1b[0m ${text}`, ...args);
+  }
+
+  // This checks if the .cached_at file has a timestamp that is more than 1Week (in millis)
+  // from the current date. If it is, then we will refetch the API so that all the models are up
+  // to date.
+  #cacheIsStale() {
+    const MAX_STALE = 6.048e8; // 1 Week in MS
+    if (!fs.existsSync(this.cacheAtPath)) return true;
+    const now = Number(new Date());
+    const timestampMs = Number(fs.readFileSync(this.cacheAtPath));
+    return now - timestampMs > MAX_STALE;
+  }
+
+  // This function fetches the models from the ApiPie API and caches them locally.
+  // We do this because the ApiPie API has a lot of models, and we need to get the proper token context window
+  // for each model and this is a constructor property - so we can really only get it if this cache exists.
+  // We used to have this as a chore, but given there is an API to get the info - this makes little sense.
+  // This might slow down the first request, but we need the proper token context window
+  // for each model and this is a constructor property - so we can really only get it if this cache exists.
+  async #syncModels() {
+    if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale())
+      return false;
+
+    this.log(
+      "Model cache is not present or stale. Fetching from FireworksAI API."
+    );
+    await fireworksAiModels();
+    return;
+  }
+
+  models() {
+    if (!fs.existsSync(this.cacheModelPath)) return {};
+    return safeJsonParse(
+      fs.readFileSync(this.cacheModelPath, { encoding: "utf-8" }),
+      {}
+    );
  }

  #appendContext(contextTexts = []) {
@ -43,28 +94,31 @@ class FireworksAiLLM {
    );
  }

-  allModelInformation() {
-    return fireworksAiModels();
-  }
-
  streamingEnabled() {
    return "streamGetChatCompletion" in this;
  }

  static promptWindowLimit(modelName) {
-    const availableModels = fireworksAiModels();
+    const cacheModelPath = path.resolve(cacheFolder, "models.json");
+    const availableModels = fs.existsSync(cacheModelPath)
+      ? safeJsonParse(
+          fs.readFileSync(cacheModelPath, { encoding: "utf-8" }),
+          {}
+        )
+      : {};
    return availableModels[modelName]?.maxLength || 4096;
  }

  // Ensure the user set a value for the token limit
  // and if undefined - assume 4096 window.
  promptWindowLimit() {
-    const availableModels = this.allModelInformation();
+    const availableModels = this.models();
    return availableModels[this.model]?.maxLength || 4096;
  }

  async isValidChatCompletionModel(model = "") {
-    const availableModels = this.allModelInformation();
+    await this.#syncModels();
+    const availableModels = this.models();
    return availableModels.hasOwnProperty(model);
  }

@ -151,6 +205,63 @@ class FireworksAiLLM {
  }
 }

+async function fireworksAiModels(providedApiKey = null) {
+  const apiKey = providedApiKey || process.env.FIREWORKS_AI_LLM_API_KEY || null;
+  const { OpenAI: OpenAIApi } = require("openai");
+  const client = new OpenAIApi({
+    baseURL: "https://api.fireworks.ai/inference/v1",
+    apiKey: apiKey,
+  });
+
+  return await client.models
+    .list()
+    .then((res) => res.data)
+    .then((models = []) => {
+      const validModels = {};
+      models.forEach((model) => {
+        // There are many models - the ones without a context length are not chat models
+        if (!model.hasOwnProperty("context_length")) return;
+
+        validModels[model.id] = {
+          id: model.id,
+          name: model.id.split("/").pop(),
+          organization: model.owned_by,
+          subtype: model.type,
+          maxLength: model.context_length ?? 4096,
+        };
+      });
+
+      if (Object.keys(validModels).length === 0) {
+        console.log("fireworksAi: No models found");
+        return {};
+      }
+
+      // Cache all response information
+      if (!fs.existsSync(cacheFolder))
+        fs.mkdirSync(cacheFolder, { recursive: true });
+      fs.writeFileSync(
+        path.resolve(cacheFolder, "models.json"),
+        JSON.stringify(validModels),
+        {
+          encoding: "utf-8",
+        }
+      );
+      fs.writeFileSync(
+        path.resolve(cacheFolder, ".cached_at"),
+        String(Number(new Date())),
+        {
+          encoding: "utf-8",
+        }
+      );
+
+      return validModels;
+    })
+    .catch((e) => {
+      console.error(e);
+      return {};
+    });
+}
+
 module.exports = {
  FireworksAiLLM,
  fireworksAiModels,
--- a/server/utils/AiProviders/fireworksAi/models.js
+++ b/server/utils/AiProviders/fireworksAi/models.js
@ -1,124 +0,0 @@
-const MODELS = {
-  "accounts/fireworks/models/llama-v3p2-3b-instruct": {
-    id: "accounts/fireworks/models/llama-v3p2-3b-instruct",
-    organization: "Meta",
-    name: "Llama 3.2 3B Instruct",
-    maxLength: 131072,
-  },
-  "accounts/fireworks/models/llama-v3p2-1b-instruct": {
-    id: "accounts/fireworks/models/llama-v3p2-1b-instruct",
-    organization: "Meta",
-    name: "Llama 3.2 1B Instruct",
-    maxLength: 131072,
-  },
-  "accounts/fireworks/models/llama-v3p1-405b-instruct": {
-    id: "accounts/fireworks/models/llama-v3p1-405b-instruct",
-    organization: "Meta",
-    name: "Llama 3.1 405B Instruct",
-    maxLength: 131072,
-  },
-  "accounts/fireworks/models/llama-v3p1-70b-instruct": {
-    id: "accounts/fireworks/models/llama-v3p1-70b-instruct",
-    organization: "Meta",
-    name: "Llama 3.1 70B Instruct",
-    maxLength: 131072,
-  },
-  "accounts/fireworks/models/llama-v3p1-8b-instruct": {
-    id: "accounts/fireworks/models/llama-v3p1-8b-instruct",
-    organization: "Meta",
-    name: "Llama 3.1 8B Instruct",
-    maxLength: 131072,
-  },
-  "accounts/fireworks/models/llama-v3-70b-instruct": {
-    id: "accounts/fireworks/models/llama-v3-70b-instruct",
-    organization: "Meta",
-    name: "Llama 3 70B Instruct",
-    maxLength: 8192,
-  },
-  "accounts/fireworks/models/mixtral-8x22b-instruct": {
-    id: "accounts/fireworks/models/mixtral-8x22b-instruct",
-    organization: "mistralai",
-    name: "Mixtral MoE 8x22B Instruct",
-    maxLength: 65536,
-  },
-  "accounts/fireworks/models/mixtral-8x7b-instruct": {
-    id: "accounts/fireworks/models/mixtral-8x7b-instruct",
-    organization: "mistralai",
-    name: "Mixtral MoE 8x7B Instruct",
-    maxLength: 32768,
-  },
-  "accounts/fireworks/models/firefunction-v2": {
-    id: "accounts/fireworks/models/firefunction-v2",
-    organization: "Fireworks AI",
-    name: "Firefunction V2",
-    maxLength: 8192,
-  },
-  "accounts/fireworks/models/firefunction-v1": {
-    id: "accounts/fireworks/models/firefunction-v1",
-    organization: "Fireworks AI",
-    name: "FireFunction V1",
-    maxLength: 32768,
-  },
-  "accounts/fireworks/models/gemma2-9b-it": {
-    id: "accounts/fireworks/models/gemma2-9b-it",
-    organization: "Google",
-    name: "Gemma 2 9B Instruct",
-    maxLength: 8192,
-  },
-  "accounts/fireworks/models/llama-v3-70b-instruct-hf": {
-    id: "accounts/fireworks/models/llama-v3-70b-instruct-hf",
-    organization: "Hugging Face",
-    name: "Llama 3 70B Instruct (HF version)",
-    maxLength: 8192,
-  },
-  "accounts/fireworks/models/llama-v3-8b-instruct": {
-    id: "accounts/fireworks/models/llama-v3-8b-instruct",
-    organization: "Hugging Face",
-    name: "Llama 3 8B Instruct",
-    maxLength: 8192,
-  },
-  "accounts/fireworks/models/llama-v3-8b-instruct-hf": {
-    id: "accounts/fireworks/models/llama-v3-8b-instruct-hf",
-    organization: "Hugging Face",
-    name: "Llama 3 8B Instruct (HF version)",
-    maxLength: 8192,
-  },
-  "accounts/fireworks/models/mixtral-8x7b-instruct-hf": {
-    id: "accounts/fireworks/models/mixtral-8x7b-instruct-hf",
-    organization: "Hugging Face",
-    name: "Mixtral MoE 8x7B Instruct (HF version)",
-    maxLength: 32768,
-  },
-  "accounts/fireworks/models/mythomax-l2-13b": {
-    id: "accounts/fireworks/models/mythomax-l2-13b",
-    organization: "Gryphe",
-    name: "MythoMax L2 13b",
-    maxLength: 4096,
-  },
-  "accounts/fireworks/models/phi-3-vision-128k-instruct": {
-    id: "accounts/fireworks/models/phi-3-vision-128k-instruct",
-    organization: "Microsoft",
-    name: "Phi 3.5 Vision Instruct",
-    maxLength: 8192,
-  },
-  "accounts/fireworks/models/starcoder-16b": {
-    id: "accounts/fireworks/models/starcoder-16b",
-    organization: "BigCode",
-    name: "StarCoder 15.5B",
-    maxLength: 8192,
-  },
-  "accounts/fireworks/models/starcoder-7b": {
-    id: "accounts/fireworks/models/starcoder-7b",
-    organization: "BigCode",
-    name: "StarCoder 7B",
-    maxLength: 8192,
-  },
-  "accounts/fireworks/models/yi-01-ai/yi-large": {
-    id: "accounts/fireworks/models/yi-01-ai/yi-large",
-    organization: "01.AI",
-    name: "Yi-Large",
-    maxLength: 32768,
-  },
-};
-
-module.exports.MODELS = MODELS;
--- a/server/utils/AiProviders/fireworksAi/scripts/.gitignore
+++ b/server/utils/AiProviders/fireworksAi/scripts/.gitignore
@ -1 +0,0 @@
-*.json
--- a/server/utils/AiProviders/fireworksAi/scripts/chat_models.txt
+++ b/server/utils/AiProviders/fireworksAi/scripts/chat_models.txt
@ -1,22 +0,0 @@
-| Organization | Model Name | Model String for API | Context length |
-|--------------|------------|----------------------|----------------|
-| Meta | Llama 3.2 3B Instruct | accounts/fireworks/models/llama-v3p2-3b-instruct | 131072 |
-| Meta | Llama 3.2 1B Instruct | accounts/fireworks/models/llama-v3p2-1b-instruct | 131072 |
-| Meta | Llama 3.1 405B Instruct | accounts/fireworks/models/llama-v3p1-405b-instruct | 131072 |
-| Meta | Llama 3.1 70B Instruct | accounts/fireworks/models/llama-v3p1-70b-instruct | 131072 |
-| Meta | Llama 3.1 8B Instruct | accounts/fireworks/models/llama-v3p1-8b-instruct | 131072 |
-| Meta | Llama 3 70B Instruct | accounts/fireworks/models/llama-v3-70b-instruct | 8192 |
-| mistralai | Mixtral MoE 8x22B Instruct | accounts/fireworks/models/mixtral-8x22b-instruct | 65536 |
-| mistralai | Mixtral MoE 8x7B Instruct | accounts/fireworks/models/mixtral-8x7b-instruct | 32768 |
-| Fireworks AI | Firefunction V2 | accounts/fireworks/models/firefunction-v2 | 8192 |
-| Fireworks AI | FireFunction V1 | accounts/fireworks/models/firefunction-v1 | 32768 |
-| Google | Gemma 2 9B Instruct | accounts/fireworks/models/gemma2-9b-it | 8192 |
-| Hugging Face | Llama 3 70B Instruct (HF version) | accounts/fireworks/models/llama-v3-70b-instruct-hf | 8192 |
-| Hugging Face | Llama 3 8B Instruct | accounts/fireworks/models/llama-v3-8b-instruct | 8192 |
-| Hugging Face | Llama 3 8B Instruct (HF version) | accounts/fireworks/models/llama-v3-8b-instruct-hf | 8192 |
-| Hugging Face | Mixtral MoE 8x7B Instruct (HF version) | accounts/fireworks/models/mixtral-8x7b-instruct-hf | 32768 |
-| Gryphe | MythoMax L2 13b | accounts/fireworks/models/mythomax-l2-13b | 4096 |
-| Microsoft | Phi 3.5 Vision Instruct | accounts/fireworks/models/phi-3-vision-128k-instruct | 8192 |
-| BigCode | StarCoder 15.5B | accounts/fireworks/models/starcoder-16b | 8192 |
-| BigCode | StarCoder 7B | accounts/fireworks/models/starcoder-7b | 8192 |
-| 01.AI | Yi-Large | accounts/fireworks/models/yi-01-ai/yi-large | 32768 |
--- a/server/utils/AiProviders/fireworksAi/scripts/parse.mjs
+++ b/server/utils/AiProviders/fireworksAi/scripts/parse.mjs
@ -1,46 +0,0 @@
-// Fireworks AI does not provide a simple REST API to get models,
-// so we have a table which we copy from their documentation
-// at https://fireworks.ai/models that we can
-// then parse and get all models from in a format that makes sense
-// Why this does not exist is so bizarre, but whatever.
-
-// To run, cd into this directory and run `node parse.mjs`
-// copy outputs into the export in ../models.js
-
-// Update the date below if you run this again because Fireworks AI added new models.
-
-// Last Collected: Sep 27, 2024
-// NOTE: Only managed to collect 20 out of ~100 models!
-// https://fireworks.ai/models lists almost 100 chat language models.
-// If you want to add models, please manually add them to chat_models.txt...
-// ... I tried to write a script to grab them all but gave up after a few hours...
-
-import fs from "fs";
-
-function parseChatModels() {
-  const fixed = {};
-  const tableString = fs.readFileSync("chat_models.txt", { encoding: "utf-8" });
-  const rows = tableString.split("\n").slice(2);
-
-  rows.forEach((row) => {
-    const [provider, name, id, maxLength] = row.split("|").slice(1, -1);
-    const data = {
-      provider: provider.trim(),
-      name: name.trim(),
-      id: id.trim(),
-      maxLength: Number(maxLength.trim()),
-    };
-
-    fixed[data.id] = {
-      id: data.id,
-      organization: data.provider,
-      name: data.name,
-      maxLength: data.maxLength,
-    };
-  });
-
-  fs.writeFileSync("chat_models.json", JSON.stringify(fixed, null, 2), "utf-8");
-  return fixed;
-}
-
-parseChatModels();
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -403,8 +403,8 @@ async function getTogetherAiModels(apiKey = null) {
  }
 }

-async function getFireworksAiModels() {
-  const knownModels = fireworksAiModels();
+async function getFireworksAiModels(apiKey = null) {
+  const knownModels = await fireworksAiModels(apiKey);
  if (!Object.keys(knownModels).length === 0)
    return { models: [], error: null };