Sync models from remote for FireworksAI (#4475)

resolves #4474
This commit is contained in:
Timothy Carambat 2025-10-02 12:34:05 -07:00 committed by GitHub
parent 0b18ac6577
commit 8cdadd8cb3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 140 additions and 211 deletions

View File

@ -2,6 +2,11 @@ import System from "@/models/system";
import { useState, useEffect } from "react";
export default function FireworksAiOptions({ settings }) {
const [inputValue, setInputValue] = useState(settings?.FireworksAiLLMApiKey);
const [fireworksAiApiKey, setFireworksAiApiKey] = useState(
settings?.FireworksAiLLMApiKey
);
return (
<div className="flex gap-[36px] mt-1.5">
<div className="flex flex-col w-60">
@ -17,22 +22,27 @@ export default function FireworksAiOptions({ settings }) {
required={true}
autoComplete="off"
spellCheck={false}
onChange={(e) => setInputValue(e.target.value)}
onBlur={() => setFireworksAiApiKey(inputValue)}
/>
</div>
{!settings?.credentialsOnly && (
<FireworksAiModelSelection settings={settings} />
<FireworksAiModelSelection
apiKey={fireworksAiApiKey}
settings={settings}
/>
)}
</div>
);
}
function FireworksAiModelSelection({ settings }) {
function FireworksAiModelSelection({ apiKey, settings }) {
const [groupedModels, setGroupedModels] = useState({});
const [loading, setLoading] = useState(true);
useEffect(() => {
async function findCustomModels() {
setLoading(true);
const { models } = await System.customModels("fireworksai");
const { models } = await System.customModels("fireworksai", apiKey);
if (models?.length > 0) {
const modelsByOrganization = models.reduce((acc, model) => {
@ -47,7 +57,7 @@ function FireworksAiModelSelection({ settings }) {
setLoading(false);
}
findCustomModels();
}, []);
}, [apiKey]);
if (loading || Object.keys(groupedModels).length === 0) {
return (

View File

@ -11,4 +11,5 @@ tesseract
ppio
context-windows/*
MintplexLabs
cometapi
cometapi
fireworks

View File

@ -1,3 +1,6 @@
const fs = require("fs");
const path = require("path");
const { safeJsonParse } = require("../../http");
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
LLMPerformanceMonitor,
@ -6,13 +9,16 @@ const {
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
function fireworksAiModels() {
const { MODELS } = require("./models.js");
return MODELS || {};
}
const cacheFolder = path.resolve(
process.env.STORAGE_DIR
? path.resolve(process.env.STORAGE_DIR, "models", "fireworks")
: path.resolve(__dirname, `../../../storage/models/fireworks`)
);
class FireworksAiLLM {
constructor(embedder = null, modelPreference = null) {
this.className = "FireworksAiLLM";
if (!process.env.FIREWORKS_AI_LLM_API_KEY)
throw new Error("No FireworksAI API key was set.");
const { OpenAI: OpenAIApi } = require("openai");
@ -29,6 +35,51 @@ class FireworksAiLLM {
this.embedder = !embedder ? new NativeEmbedder() : embedder;
this.defaultTemp = 0.7;
if (!fs.existsSync(cacheFolder))
fs.mkdirSync(cacheFolder, { recursive: true });
this.cacheModelPath = path.resolve(cacheFolder, "models.json");
this.cacheAtPath = path.resolve(cacheFolder, ".cached_at");
}
log(text, ...args) {
console.log(`\x1b[36m[${this.className}]\x1b[0m ${text}`, ...args);
}
// This checks if the .cached_at file has a timestamp that is more than 1Week (in millis)
// from the current date. If it is, then we will refetch the API so that all the models are up
// to date.
#cacheIsStale() {
const MAX_STALE = 6.048e8; // 1 Week in MS
if (!fs.existsSync(this.cacheAtPath)) return true;
const now = Number(new Date());
const timestampMs = Number(fs.readFileSync(this.cacheAtPath));
return now - timestampMs > MAX_STALE;
}
// This function fetches the models from the ApiPie API and caches them locally.
// We do this because the ApiPie API has a lot of models, and we need to get the proper token context window
// for each model and this is a constructor property - so we can really only get it if this cache exists.
// We used to have this as a chore, but given there is an API to get the info - this makes little sense.
// This might slow down the first request, but we need the proper token context window
// for each model and this is a constructor property - so we can really only get it if this cache exists.
async #syncModels() {
if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale())
return false;
this.log(
"Model cache is not present or stale. Fetching from FireworksAI API."
);
await fireworksAiModels();
return;
}
models() {
if (!fs.existsSync(this.cacheModelPath)) return {};
return safeJsonParse(
fs.readFileSync(this.cacheModelPath, { encoding: "utf-8" }),
{}
);
}
#appendContext(contextTexts = []) {
@ -43,28 +94,31 @@ class FireworksAiLLM {
);
}
allModelInformation() {
return fireworksAiModels();
}
streamingEnabled() {
return "streamGetChatCompletion" in this;
}
static promptWindowLimit(modelName) {
const availableModels = fireworksAiModels();
const cacheModelPath = path.resolve(cacheFolder, "models.json");
const availableModels = fs.existsSync(cacheModelPath)
? safeJsonParse(
fs.readFileSync(cacheModelPath, { encoding: "utf-8" }),
{}
)
: {};
return availableModels[modelName]?.maxLength || 4096;
}
// Ensure the user set a value for the token limit
// and if undefined - assume 4096 window.
promptWindowLimit() {
const availableModels = this.allModelInformation();
const availableModels = this.models();
return availableModels[this.model]?.maxLength || 4096;
}
async isValidChatCompletionModel(model = "") {
const availableModels = this.allModelInformation();
await this.#syncModels();
const availableModels = this.models();
return availableModels.hasOwnProperty(model);
}
@ -151,6 +205,63 @@ class FireworksAiLLM {
}
}
async function fireworksAiModels(providedApiKey = null) {
const apiKey = providedApiKey || process.env.FIREWORKS_AI_LLM_API_KEY || null;
const { OpenAI: OpenAIApi } = require("openai");
const client = new OpenAIApi({
baseURL: "https://api.fireworks.ai/inference/v1",
apiKey: apiKey,
});
return await client.models
.list()
.then((res) => res.data)
.then((models = []) => {
const validModels = {};
models.forEach((model) => {
// There are many models - the ones without a context length are not chat models
if (!model.hasOwnProperty("context_length")) return;
validModels[model.id] = {
id: model.id,
name: model.id.split("/").pop(),
organization: model.owned_by,
subtype: model.type,
maxLength: model.context_length ?? 4096,
};
});
if (Object.keys(validModels).length === 0) {
console.log("fireworksAi: No models found");
return {};
}
// Cache all response information
if (!fs.existsSync(cacheFolder))
fs.mkdirSync(cacheFolder, { recursive: true });
fs.writeFileSync(
path.resolve(cacheFolder, "models.json"),
JSON.stringify(validModels),
{
encoding: "utf-8",
}
);
fs.writeFileSync(
path.resolve(cacheFolder, ".cached_at"),
String(Number(new Date())),
{
encoding: "utf-8",
}
);
return validModels;
})
.catch((e) => {
console.error(e);
return {};
});
}
module.exports = {
FireworksAiLLM,
fireworksAiModels,

View File

@ -1,124 +0,0 @@
const MODELS = {
"accounts/fireworks/models/llama-v3p2-3b-instruct": {
id: "accounts/fireworks/models/llama-v3p2-3b-instruct",
organization: "Meta",
name: "Llama 3.2 3B Instruct",
maxLength: 131072,
},
"accounts/fireworks/models/llama-v3p2-1b-instruct": {
id: "accounts/fireworks/models/llama-v3p2-1b-instruct",
organization: "Meta",
name: "Llama 3.2 1B Instruct",
maxLength: 131072,
},
"accounts/fireworks/models/llama-v3p1-405b-instruct": {
id: "accounts/fireworks/models/llama-v3p1-405b-instruct",
organization: "Meta",
name: "Llama 3.1 405B Instruct",
maxLength: 131072,
},
"accounts/fireworks/models/llama-v3p1-70b-instruct": {
id: "accounts/fireworks/models/llama-v3p1-70b-instruct",
organization: "Meta",
name: "Llama 3.1 70B Instruct",
maxLength: 131072,
},
"accounts/fireworks/models/llama-v3p1-8b-instruct": {
id: "accounts/fireworks/models/llama-v3p1-8b-instruct",
organization: "Meta",
name: "Llama 3.1 8B Instruct",
maxLength: 131072,
},
"accounts/fireworks/models/llama-v3-70b-instruct": {
id: "accounts/fireworks/models/llama-v3-70b-instruct",
organization: "Meta",
name: "Llama 3 70B Instruct",
maxLength: 8192,
},
"accounts/fireworks/models/mixtral-8x22b-instruct": {
id: "accounts/fireworks/models/mixtral-8x22b-instruct",
organization: "mistralai",
name: "Mixtral MoE 8x22B Instruct",
maxLength: 65536,
},
"accounts/fireworks/models/mixtral-8x7b-instruct": {
id: "accounts/fireworks/models/mixtral-8x7b-instruct",
organization: "mistralai",
name: "Mixtral MoE 8x7B Instruct",
maxLength: 32768,
},
"accounts/fireworks/models/firefunction-v2": {
id: "accounts/fireworks/models/firefunction-v2",
organization: "Fireworks AI",
name: "Firefunction V2",
maxLength: 8192,
},
"accounts/fireworks/models/firefunction-v1": {
id: "accounts/fireworks/models/firefunction-v1",
organization: "Fireworks AI",
name: "FireFunction V1",
maxLength: 32768,
},
"accounts/fireworks/models/gemma2-9b-it": {
id: "accounts/fireworks/models/gemma2-9b-it",
organization: "Google",
name: "Gemma 2 9B Instruct",
maxLength: 8192,
},
"accounts/fireworks/models/llama-v3-70b-instruct-hf": {
id: "accounts/fireworks/models/llama-v3-70b-instruct-hf",
organization: "Hugging Face",
name: "Llama 3 70B Instruct (HF version)",
maxLength: 8192,
},
"accounts/fireworks/models/llama-v3-8b-instruct": {
id: "accounts/fireworks/models/llama-v3-8b-instruct",
organization: "Hugging Face",
name: "Llama 3 8B Instruct",
maxLength: 8192,
},
"accounts/fireworks/models/llama-v3-8b-instruct-hf": {
id: "accounts/fireworks/models/llama-v3-8b-instruct-hf",
organization: "Hugging Face",
name: "Llama 3 8B Instruct (HF version)",
maxLength: 8192,
},
"accounts/fireworks/models/mixtral-8x7b-instruct-hf": {
id: "accounts/fireworks/models/mixtral-8x7b-instruct-hf",
organization: "Hugging Face",
name: "Mixtral MoE 8x7B Instruct (HF version)",
maxLength: 32768,
},
"accounts/fireworks/models/mythomax-l2-13b": {
id: "accounts/fireworks/models/mythomax-l2-13b",
organization: "Gryphe",
name: "MythoMax L2 13b",
maxLength: 4096,
},
"accounts/fireworks/models/phi-3-vision-128k-instruct": {
id: "accounts/fireworks/models/phi-3-vision-128k-instruct",
organization: "Microsoft",
name: "Phi 3.5 Vision Instruct",
maxLength: 8192,
},
"accounts/fireworks/models/starcoder-16b": {
id: "accounts/fireworks/models/starcoder-16b",
organization: "BigCode",
name: "StarCoder 15.5B",
maxLength: 8192,
},
"accounts/fireworks/models/starcoder-7b": {
id: "accounts/fireworks/models/starcoder-7b",
organization: "BigCode",
name: "StarCoder 7B",
maxLength: 8192,
},
"accounts/fireworks/models/yi-01-ai/yi-large": {
id: "accounts/fireworks/models/yi-01-ai/yi-large",
organization: "01.AI",
name: "Yi-Large",
maxLength: 32768,
},
};
module.exports.MODELS = MODELS;

View File

@ -1 +0,0 @@
*.json

View File

@ -1,22 +0,0 @@
| Organization | Model Name | Model String for API | Context length |
|--------------|------------|----------------------|----------------|
| Meta | Llama 3.2 3B Instruct | accounts/fireworks/models/llama-v3p2-3b-instruct | 131072 |
| Meta | Llama 3.2 1B Instruct | accounts/fireworks/models/llama-v3p2-1b-instruct | 131072 |
| Meta | Llama 3.1 405B Instruct | accounts/fireworks/models/llama-v3p1-405b-instruct | 131072 |
| Meta | Llama 3.1 70B Instruct | accounts/fireworks/models/llama-v3p1-70b-instruct | 131072 |
| Meta | Llama 3.1 8B Instruct | accounts/fireworks/models/llama-v3p1-8b-instruct | 131072 |
| Meta | Llama 3 70B Instruct | accounts/fireworks/models/llama-v3-70b-instruct | 8192 |
| mistralai | Mixtral MoE 8x22B Instruct | accounts/fireworks/models/mixtral-8x22b-instruct | 65536 |
| mistralai | Mixtral MoE 8x7B Instruct | accounts/fireworks/models/mixtral-8x7b-instruct | 32768 |
| Fireworks AI | Firefunction V2 | accounts/fireworks/models/firefunction-v2 | 8192 |
| Fireworks AI | FireFunction V1 | accounts/fireworks/models/firefunction-v1 | 32768 |
| Google | Gemma 2 9B Instruct | accounts/fireworks/models/gemma2-9b-it | 8192 |
| Hugging Face | Llama 3 70B Instruct (HF version) | accounts/fireworks/models/llama-v3-70b-instruct-hf | 8192 |
| Hugging Face | Llama 3 8B Instruct | accounts/fireworks/models/llama-v3-8b-instruct | 8192 |
| Hugging Face | Llama 3 8B Instruct (HF version) | accounts/fireworks/models/llama-v3-8b-instruct-hf | 8192 |
| Hugging Face | Mixtral MoE 8x7B Instruct (HF version) | accounts/fireworks/models/mixtral-8x7b-instruct-hf | 32768 |
| Gryphe | MythoMax L2 13b | accounts/fireworks/models/mythomax-l2-13b | 4096 |
| Microsoft | Phi 3.5 Vision Instruct | accounts/fireworks/models/phi-3-vision-128k-instruct | 8192 |
| BigCode | StarCoder 15.5B | accounts/fireworks/models/starcoder-16b | 8192 |
| BigCode | StarCoder 7B | accounts/fireworks/models/starcoder-7b | 8192 |
| 01.AI | Yi-Large | accounts/fireworks/models/yi-01-ai/yi-large | 32768 |

View File

@ -1,46 +0,0 @@
// Fireworks AI does not provide a simple REST API to get models,
// so we have a table which we copy from their documentation
// at https://fireworks.ai/models that we can
// then parse and get all models from in a format that makes sense
// Why this does not exist is so bizarre, but whatever.
// To run, cd into this directory and run `node parse.mjs`
// copy outputs into the export in ../models.js
// Update the date below if you run this again because Fireworks AI added new models.
// Last Collected: Sep 27, 2024
// NOTE: Only managed to collect 20 out of ~100 models!
// https://fireworks.ai/models lists almost 100 chat language models.
// If you want to add models, please manually add them to chat_models.txt...
// ... I tried to write a script to grab them all but gave up after a few hours...
import fs from "fs";
function parseChatModels() {
const fixed = {};
const tableString = fs.readFileSync("chat_models.txt", { encoding: "utf-8" });
const rows = tableString.split("\n").slice(2);
rows.forEach((row) => {
const [provider, name, id, maxLength] = row.split("|").slice(1, -1);
const data = {
provider: provider.trim(),
name: name.trim(),
id: id.trim(),
maxLength: Number(maxLength.trim()),
};
fixed[data.id] = {
id: data.id,
organization: data.provider,
name: data.name,
maxLength: data.maxLength,
};
});
fs.writeFileSync("chat_models.json", JSON.stringify(fixed, null, 2), "utf-8");
return fixed;
}
parseChatModels();

View File

@ -403,8 +403,8 @@ async function getTogetherAiModels(apiKey = null) {
}
}
async function getFireworksAiModels() {
const knownModels = fireworksAiModels();
async function getFireworksAiModels(apiKey = null) {
const knownModels = await fireworksAiModels(apiKey);
if (!Object.keys(knownModels).length === 0)
return { models: [], error: null };