Refactor Gemini to use OpenAI interface API (#3616)
* Refactor Gemini to use OpenAI interface API * add TODO * handle errors better (gemini) * remove unused code
This commit is contained in:
parent
699684e301
commit
1b59295f89
@ -15,7 +15,7 @@ GID='1000'
|
|||||||
|
|
||||||
# LLM_PROVIDER='gemini'
|
# LLM_PROVIDER='gemini'
|
||||||
# GEMINI_API_KEY=
|
# GEMINI_API_KEY=
|
||||||
# GEMINI_LLM_MODEL_PREF='gemini-pro'
|
# GEMINI_LLM_MODEL_PREF='gemini-2.0-flash-lite'
|
||||||
|
|
||||||
# LLM_PROVIDER='azure'
|
# LLM_PROVIDER='azure'
|
||||||
# AZURE_OPENAI_ENDPOINT=
|
# AZURE_OPENAI_ENDPOINT=
|
||||||
|
|||||||
@ -29,6 +29,11 @@ export default function GeminiLLMOptions({ settings }) {
|
|||||||
{!settings?.credentialsOnly && (
|
{!settings?.credentialsOnly && (
|
||||||
<>
|
<>
|
||||||
<GeminiModelSelection apiKey={geminiApiKey} settings={settings} />
|
<GeminiModelSelection apiKey={geminiApiKey} settings={settings} />
|
||||||
|
{/*
|
||||||
|
|
||||||
|
Safety setting is not supported for Gemini yet due to the openai compatible Gemini API.
|
||||||
|
We are not using the generativeAPI endpoint and therefore cannot set the safety threshold.
|
||||||
|
|
||||||
<div className="flex flex-col w-60">
|
<div className="flex flex-col w-60">
|
||||||
<label className="text-white text-sm font-semibold block mb-3">
|
<label className="text-white text-sm font-semibold block mb-3">
|
||||||
Safety Setting
|
Safety Setting
|
||||||
@ -48,7 +53,7 @@ export default function GeminiLLMOptions({ settings }) {
|
|||||||
</option>
|
</option>
|
||||||
<option value="BLOCK_LOW_AND_ABOVE">Block most</option>
|
<option value="BLOCK_LOW_AND_ABOVE">Block most</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div> */}
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@ -10,21 +10,7 @@ export const DISABLED_PROVIDERS = [
|
|||||||
];
|
];
|
||||||
const PROVIDER_DEFAULT_MODELS = {
|
const PROVIDER_DEFAULT_MODELS = {
|
||||||
openai: [],
|
openai: [],
|
||||||
gemini: [
|
gemini: [],
|
||||||
"gemini-pro",
|
|
||||||
"gemini-1.0-pro",
|
|
||||||
"gemini-1.5-pro-latest",
|
|
||||||
"gemini-1.5-flash-latest",
|
|
||||||
"gemini-1.5-pro-exp-0801",
|
|
||||||
"gemini-1.5-pro-exp-0827",
|
|
||||||
"gemini-1.5-flash-exp-0827",
|
|
||||||
"gemini-1.5-flash-8b-exp-0827",
|
|
||||||
"gemini-exp-1114",
|
|
||||||
"gemini-exp-1121",
|
|
||||||
"gemini-exp-1206",
|
|
||||||
"learnlm-1.5-pro-experimental",
|
|
||||||
"gemini-2.0-flash-exp",
|
|
||||||
],
|
|
||||||
anthropic: [],
|
anthropic: [],
|
||||||
azure: [],
|
azure: [],
|
||||||
lmstudio: [],
|
lmstudio: [],
|
||||||
|
|||||||
@ -30,10 +30,10 @@ const ENABLED_PROVIDERS = [
|
|||||||
"apipie",
|
"apipie",
|
||||||
"xai",
|
"xai",
|
||||||
"nvidia-nim",
|
"nvidia-nim",
|
||||||
|
"gemini",
|
||||||
// TODO: More agent support.
|
// TODO: More agent support.
|
||||||
// "cohere", // Has tool calling and will need to build explicit support
|
// "cohere", // Has tool calling and will need to build explicit support
|
||||||
// "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested.
|
// "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested.
|
||||||
// "gemini", // Too rate limited and broken in several ways to use for agents.
|
|
||||||
];
|
];
|
||||||
const WARN_PERFORMANCE = [
|
const WARN_PERFORMANCE = [
|
||||||
"lmstudio",
|
"lmstudio",
|
||||||
|
|||||||
@ -12,7 +12,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
|
|||||||
|
|
||||||
# LLM_PROVIDER='gemini'
|
# LLM_PROVIDER='gemini'
|
||||||
# GEMINI_API_KEY=
|
# GEMINI_API_KEY=
|
||||||
# GEMINI_LLM_MODEL_PREF='gemini-pro'
|
# GEMINI_LLM_MODEL_PREF='gemini-2.0-flash-lite'
|
||||||
|
|
||||||
# LLM_PROVIDER='azure'
|
# LLM_PROVIDER='azure'
|
||||||
# AZURE_OPENAI_ENDPOINT=
|
# AZURE_OPENAI_ENDPOINT=
|
||||||
|
|||||||
@ -450,7 +450,8 @@ const SystemSettings = {
|
|||||||
|
|
||||||
// Gemini Keys
|
// Gemini Keys
|
||||||
GeminiLLMApiKey: !!process.env.GEMINI_API_KEY,
|
GeminiLLMApiKey: !!process.env.GEMINI_API_KEY,
|
||||||
GeminiLLMModelPref: process.env.GEMINI_LLM_MODEL_PREF || "gemini-pro",
|
GeminiLLMModelPref:
|
||||||
|
process.env.GEMINI_LLM_MODEL_PREF || "gemini-2.0-flash-lite",
|
||||||
GeminiSafetySetting:
|
GeminiSafetySetting:
|
||||||
process.env.GEMINI_SAFETY_SETTING || "BLOCK_MEDIUM_AND_ABOVE",
|
process.env.GEMINI_SAFETY_SETTING || "BLOCK_MEDIUM_AND_ABOVE",
|
||||||
|
|
||||||
|
|||||||
@ -5,9 +5,8 @@ const {
|
|||||||
LLMPerformanceMonitor,
|
LLMPerformanceMonitor,
|
||||||
} = require("../../helpers/chat/LLMPerformanceMonitor");
|
} = require("../../helpers/chat/LLMPerformanceMonitor");
|
||||||
const {
|
const {
|
||||||
writeResponseChunk,
|
|
||||||
clientAbortedHandler,
|
|
||||||
formatChatHistory,
|
formatChatHistory,
|
||||||
|
handleDefaultStreamResponseV2,
|
||||||
} = require("../../helpers/chat/responses");
|
} = require("../../helpers/chat/responses");
|
||||||
const { MODEL_MAP } = require("../modelMap");
|
const { MODEL_MAP } = require("../modelMap");
|
||||||
const { defaultGeminiModels, v1BetaModels } = require("./defaultModels");
|
const { defaultGeminiModels, v1BetaModels } = require("./defaultModels");
|
||||||
@ -18,22 +17,31 @@ const cacheFolder = path.resolve(
|
|||||||
: path.resolve(__dirname, `../../../storage/models/gemini`)
|
: path.resolve(__dirname, `../../../storage/models/gemini`)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const NO_SYSTEM_PROMPT_MODELS = [
|
||||||
|
"gemma-3-1b-it",
|
||||||
|
"gemma-3-4b-it",
|
||||||
|
"gemma-3-12b-it",
|
||||||
|
"gemma-3-27b-it",
|
||||||
|
];
|
||||||
|
|
||||||
class GeminiLLM {
|
class GeminiLLM {
|
||||||
constructor(embedder = null, modelPreference = null) {
|
constructor(embedder = null, modelPreference = null) {
|
||||||
if (!process.env.GEMINI_API_KEY)
|
if (!process.env.GEMINI_API_KEY)
|
||||||
throw new Error("No Gemini API key was set.");
|
throw new Error("No Gemini API key was set.");
|
||||||
|
|
||||||
// Docs: https://ai.google.dev/tutorials/node_quickstart
|
const { OpenAI: OpenAIApi } = require("openai");
|
||||||
const { GoogleGenerativeAI } = require("@google/generative-ai");
|
|
||||||
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
|
|
||||||
this.model =
|
this.model =
|
||||||
modelPreference || process.env.GEMINI_LLM_MODEL_PREF || "gemini-pro";
|
modelPreference ||
|
||||||
|
process.env.GEMINI_LLM_MODEL_PREF ||
|
||||||
|
"gemini-2.0-flash-lite";
|
||||||
|
|
||||||
const isExperimental = this.isExperimentalModel(this.model);
|
const isExperimental = this.isExperimentalModel(this.model);
|
||||||
this.gemini = genAI.getGenerativeModel(
|
this.openai = new OpenAIApi({
|
||||||
{ model: this.model },
|
apiKey: process.env.GEMINI_API_KEY,
|
||||||
{ apiVersion: isExperimental ? "v1beta" : "v1" }
|
// Even models that are v1 in gemini API can be used with v1beta/openai/ endpoint and nobody knows why.
|
||||||
);
|
baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/",
|
||||||
|
});
|
||||||
|
|
||||||
this.limits = {
|
this.limits = {
|
||||||
history: this.promptWindowLimit() * 0.15,
|
history: this.promptWindowLimit() * 0.15,
|
||||||
system: this.promptWindowLimit() * 0.15,
|
system: this.promptWindowLimit() * 0.15,
|
||||||
@ -41,8 +49,7 @@ class GeminiLLM {
|
|||||||
};
|
};
|
||||||
|
|
||||||
this.embedder = embedder ?? new NativeEmbedder();
|
this.embedder = embedder ?? new NativeEmbedder();
|
||||||
this.defaultTemp = 0.7; // not used for Gemini
|
this.defaultTemp = 0.7;
|
||||||
this.safetyThreshold = this.#fetchSafetyThreshold();
|
|
||||||
|
|
||||||
if (!fs.existsSync(cacheFolder))
|
if (!fs.existsSync(cacheFolder))
|
||||||
fs.mkdirSync(cacheFolder, { recursive: true });
|
fs.mkdirSync(cacheFolder, { recursive: true });
|
||||||
@ -53,6 +60,16 @@ class GeminiLLM {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the model supports system prompts
|
||||||
|
* This is a static list of models that are known to not support system prompts
|
||||||
|
* since this information is not available in the API model response.
|
||||||
|
* @returns {boolean}
|
||||||
|
*/
|
||||||
|
get supportsSystemPrompt() {
|
||||||
|
return !NO_SYSTEM_PROMPT_MODELS.includes(this.model);
|
||||||
|
}
|
||||||
|
|
||||||
#log(text, ...args) {
|
#log(text, ...args) {
|
||||||
console.log(`\x1b[32m[GeminiLLM]\x1b[0m ${text}`, ...args);
|
console.log(`\x1b[32m[GeminiLLM]\x1b[0m ${text}`, ...args);
|
||||||
}
|
}
|
||||||
@ -82,41 +99,6 @@ class GeminiLLM {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// BLOCK_NONE can be a special candidate for some fields
|
|
||||||
// https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-attributes#how_to_remove_automated_response_blocking_for_select_safety_attributes
|
|
||||||
// so if you are wondering why BLOCK_NONE still failed, the link above will explain why.
|
|
||||||
#fetchSafetyThreshold() {
|
|
||||||
const threshold =
|
|
||||||
process.env.GEMINI_SAFETY_SETTING ?? "BLOCK_MEDIUM_AND_ABOVE";
|
|
||||||
const safetyThresholds = [
|
|
||||||
"BLOCK_NONE",
|
|
||||||
"BLOCK_ONLY_HIGH",
|
|
||||||
"BLOCK_MEDIUM_AND_ABOVE",
|
|
||||||
"BLOCK_LOW_AND_ABOVE",
|
|
||||||
];
|
|
||||||
return safetyThresholds.includes(threshold)
|
|
||||||
? threshold
|
|
||||||
: "BLOCK_MEDIUM_AND_ABOVE";
|
|
||||||
}
|
|
||||||
|
|
||||||
#safetySettings() {
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
category: "HARM_CATEGORY_HATE_SPEECH",
|
|
||||||
threshold: this.safetyThreshold,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
category: "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
||||||
threshold: this.safetyThreshold,
|
|
||||||
},
|
|
||||||
{ category: "HARM_CATEGORY_HARASSMENT", threshold: this.safetyThreshold },
|
|
||||||
{
|
|
||||||
category: "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
||||||
threshold: this.safetyThreshold,
|
|
||||||
},
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
streamingEnabled() {
|
streamingEnabled() {
|
||||||
return "streamGetChatCompletion" in this;
|
return "streamGetChatCompletion" in this;
|
||||||
}
|
}
|
||||||
@ -336,147 +318,114 @@ class GeminiLLM {
|
|||||||
* @returns {string|object[]}
|
* @returns {string|object[]}
|
||||||
*/
|
*/
|
||||||
#generateContent({ userPrompt, attachments = [] }) {
|
#generateContent({ userPrompt, attachments = [] }) {
|
||||||
if (!attachments.length) {
|
if (!attachments.length) return userPrompt;
|
||||||
return userPrompt;
|
|
||||||
}
|
|
||||||
|
|
||||||
const content = [{ text: userPrompt }];
|
const content = [{ type: "text", text: userPrompt }];
|
||||||
for (let attachment of attachments) {
|
for (let attachment of attachments) {
|
||||||
content.push({
|
content.push({
|
||||||
inlineData: {
|
type: "image_url",
|
||||||
data: attachment.contentString.split("base64,")[1],
|
image_url: {
|
||||||
mimeType: attachment.mime,
|
url: attachment.contentString,
|
||||||
|
detail: "high",
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return content.flat();
|
return content.flat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct the user prompt for this model.
|
||||||
|
* @param {{attachments: import("../../helpers").Attachment[]}} param0
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
constructPrompt({
|
constructPrompt({
|
||||||
systemPrompt = "",
|
systemPrompt = "",
|
||||||
contextTexts = [],
|
contextTexts = [],
|
||||||
chatHistory = [],
|
chatHistory = [],
|
||||||
userPrompt = "",
|
userPrompt = "",
|
||||||
attachments = [],
|
attachments = [], // This is the specific attachment for only this prompt
|
||||||
}) {
|
}) {
|
||||||
const prompt = {
|
let prompt = [];
|
||||||
role: "system",
|
if (this.supportsSystemPrompt) {
|
||||||
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
|
prompt.push({
|
||||||
};
|
role: "system",
|
||||||
|
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
this.#log(
|
||||||
|
`${this.model} - does not support system prompts - emulating...`
|
||||||
|
);
|
||||||
|
prompt.push(
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: "Okay.",
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
return [
|
return [
|
||||||
prompt,
|
...prompt,
|
||||||
{ role: "assistant", content: "Okay." },
|
|
||||||
...formatChatHistory(chatHistory, this.#generateContent),
|
...formatChatHistory(chatHistory, this.#generateContent),
|
||||||
{
|
{
|
||||||
role: "USER_PROMPT",
|
role: "user",
|
||||||
content: this.#generateContent({ userPrompt, attachments }),
|
content: this.#generateContent({ userPrompt, attachments }),
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
// This will take an OpenAi format message array and only pluck valid roles from it.
|
async getChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||||
formatMessages(messages = []) {
|
const result = await LLMPerformanceMonitor.measureAsyncFunction(
|
||||||
// Gemini roles are either user || model.
|
this.openai.chat.completions
|
||||||
// and all "content" is relabeled to "parts"
|
.create({
|
||||||
const allMessages = messages
|
model: this.model,
|
||||||
.map((message) => {
|
messages,
|
||||||
if (message.role === "system")
|
temperature: temperature,
|
||||||
return { role: "user", parts: [{ text: message.content }] };
|
})
|
||||||
|
.catch((e) => {
|
||||||
|
console.error(e);
|
||||||
|
throw new Error(e.message);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
if (message.role === "user") {
|
|
||||||
// If the content is an array - then we have already formatted the context so return it directly.
|
|
||||||
if (Array.isArray(message.content))
|
|
||||||
return { role: "user", parts: message.content };
|
|
||||||
|
|
||||||
// Otherwise, this was a regular user message with no attachments
|
|
||||||
// so we need to format it for Gemini
|
|
||||||
return { role: "user", parts: [{ text: message.content }] };
|
|
||||||
}
|
|
||||||
|
|
||||||
if (message.role === "assistant")
|
|
||||||
return { role: "model", parts: [{ text: message.content }] };
|
|
||||||
return null;
|
|
||||||
})
|
|
||||||
.filter((msg) => !!msg);
|
|
||||||
|
|
||||||
// Specifically, Google cannot have the last sent message be from a user with no assistant reply
|
|
||||||
// otherwise it will crash. So if the last item is from the user, it was not completed so pop it off
|
|
||||||
// the history.
|
|
||||||
if (
|
if (
|
||||||
allMessages.length > 0 &&
|
!result.output.hasOwnProperty("choices") ||
|
||||||
allMessages[allMessages.length - 1].role === "user"
|
result.output.choices.length === 0
|
||||||
)
|
)
|
||||||
allMessages.pop();
|
return null;
|
||||||
|
|
||||||
// Validate that after every user message, there is a model message
|
|
||||||
// sometimes when using gemini we try to compress messages in order to retain as
|
|
||||||
// much context as possible but this may mess up the order of the messages that the gemini model expects
|
|
||||||
// we do this check to work around the edge case where 2 user prompts may be next to each other, in the message array
|
|
||||||
for (let i = 0; i < allMessages.length; i++) {
|
|
||||||
if (
|
|
||||||
allMessages[i].role === "user" &&
|
|
||||||
i < allMessages.length - 1 &&
|
|
||||||
allMessages[i + 1].role !== "model"
|
|
||||||
) {
|
|
||||||
allMessages.splice(i + 1, 0, {
|
|
||||||
role: "model",
|
|
||||||
parts: [{ text: "Okay." }],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return allMessages;
|
|
||||||
}
|
|
||||||
|
|
||||||
async getChatCompletion(messages = [], _opts = {}) {
|
|
||||||
const prompt = messages.find(
|
|
||||||
(chat) => chat.role === "USER_PROMPT"
|
|
||||||
)?.content;
|
|
||||||
const chatThread = this.gemini.startChat({
|
|
||||||
history: this.formatMessages(messages),
|
|
||||||
safetySettings: this.#safetySettings(),
|
|
||||||
});
|
|
||||||
|
|
||||||
const { output: result, duration } =
|
|
||||||
await LLMPerformanceMonitor.measureAsyncFunction(
|
|
||||||
chatThread.sendMessage(prompt)
|
|
||||||
);
|
|
||||||
const responseText = result.response.text();
|
|
||||||
if (!responseText) throw new Error("Gemini: No response could be parsed.");
|
|
||||||
|
|
||||||
const promptTokens = LLMPerformanceMonitor.countTokens(messages);
|
|
||||||
const completionTokens = LLMPerformanceMonitor.countTokens([
|
|
||||||
{ content: responseText },
|
|
||||||
]);
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
textResponse: responseText,
|
textResponse: result.output.choices[0].message.content,
|
||||||
metrics: {
|
metrics: {
|
||||||
prompt_tokens: promptTokens,
|
prompt_tokens: result.output.usage.prompt_tokens || 0,
|
||||||
completion_tokens: completionTokens,
|
completion_tokens: result.output.usage.completion_tokens || 0,
|
||||||
total_tokens: promptTokens + completionTokens,
|
total_tokens: result.output.usage.total_tokens || 0,
|
||||||
outputTps: (promptTokens + completionTokens) / duration,
|
outputTps: result.output.usage.completion_tokens / result.duration,
|
||||||
duration,
|
duration: result.duration,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async streamGetChatCompletion(messages = [], _opts = {}) {
|
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||||
const prompt = messages.find(
|
const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
|
||||||
(chat) => chat.role === "USER_PROMPT"
|
this.openai.chat.completions.create({
|
||||||
)?.content;
|
model: this.model,
|
||||||
const chatThread = this.gemini.startChat({
|
stream: true,
|
||||||
history: this.formatMessages(messages),
|
messages,
|
||||||
safetySettings: this.#safetySettings(),
|
temperature: temperature,
|
||||||
});
|
}),
|
||||||
const responseStream = await LLMPerformanceMonitor.measureStream(
|
messages,
|
||||||
(await chatThread.sendMessageStream(prompt)).stream,
|
true
|
||||||
messages
|
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!responseStream)
|
return measuredStreamRequest;
|
||||||
throw new Error("Could not stream response stream from Gemini.");
|
}
|
||||||
return responseStream;
|
|
||||||
|
handleStream(response, stream, responseProps) {
|
||||||
|
return handleDefaultStreamResponseV2(response, stream, responseProps);
|
||||||
}
|
}
|
||||||
|
|
||||||
async compressMessages(promptArgs = {}, rawHistory = []) {
|
async compressMessages(promptArgs = {}, rawHistory = []) {
|
||||||
@ -485,81 +434,6 @@ class GeminiLLM {
|
|||||||
return await messageArrayCompressor(this, messageArray, rawHistory);
|
return await messageArrayCompressor(this, messageArray, rawHistory);
|
||||||
}
|
}
|
||||||
|
|
||||||
handleStream(response, stream, responseProps) {
|
|
||||||
const { uuid = uuidv4(), sources = [] } = responseProps;
|
|
||||||
// Usage is not available for Gemini streams
|
|
||||||
// so we need to calculate the completion tokens manually
|
|
||||||
// because 1 chunk != 1 token in gemini responses and it buffers
|
|
||||||
// many tokens before sending them to the client as a "chunk"
|
|
||||||
|
|
||||||
return new Promise(async (resolve) => {
|
|
||||||
let fullText = "";
|
|
||||||
|
|
||||||
// Establish listener to early-abort a streaming response
|
|
||||||
// in case things go sideways or the user does not like the response.
|
|
||||||
// We preserve the generated text but continue as if chat was completed
|
|
||||||
// to preserve previously generated content.
|
|
||||||
const handleAbort = () => {
|
|
||||||
stream?.endMeasurement({
|
|
||||||
completion_tokens: LLMPerformanceMonitor.countTokens([
|
|
||||||
{ content: fullText },
|
|
||||||
]),
|
|
||||||
});
|
|
||||||
clientAbortedHandler(resolve, fullText);
|
|
||||||
};
|
|
||||||
response.on("close", handleAbort);
|
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
|
||||||
let chunkText;
|
|
||||||
try {
|
|
||||||
// Due to content sensitivity we cannot always get the function .text();
|
|
||||||
// https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-attributes#gemini-TASK-samples-nodejs
|
|
||||||
// and it is not possible to unblock or disable this safety protocol without being allowlisted by Google.
|
|
||||||
chunkText = chunk.text();
|
|
||||||
} catch (e) {
|
|
||||||
chunkText = e.message;
|
|
||||||
writeResponseChunk(response, {
|
|
||||||
uuid,
|
|
||||||
sources: [],
|
|
||||||
type: "abort",
|
|
||||||
textResponse: null,
|
|
||||||
close: true,
|
|
||||||
error: e.message,
|
|
||||||
});
|
|
||||||
stream?.endMeasurement({ completion_tokens: 0 });
|
|
||||||
resolve(e.message);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
fullText += chunkText;
|
|
||||||
writeResponseChunk(response, {
|
|
||||||
uuid,
|
|
||||||
sources: [],
|
|
||||||
type: "textResponseChunk",
|
|
||||||
textResponse: chunk.text(),
|
|
||||||
close: false,
|
|
||||||
error: false,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
writeResponseChunk(response, {
|
|
||||||
uuid,
|
|
||||||
sources,
|
|
||||||
type: "textResponseChunk",
|
|
||||||
textResponse: "",
|
|
||||||
close: true,
|
|
||||||
error: false,
|
|
||||||
});
|
|
||||||
response.removeListener("close", handleAbort);
|
|
||||||
stream?.endMeasurement({
|
|
||||||
completion_tokens: LLMPerformanceMonitor.countTokens([
|
|
||||||
{ content: fullText },
|
|
||||||
]),
|
|
||||||
});
|
|
||||||
resolve(fullText);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
||||||
async embedTextInput(textInput) {
|
async embedTextInput(textInput) {
|
||||||
return await this.embedder.embedTextInput(textInput);
|
return await this.embedder.embedTextInput(textInput);
|
||||||
@ -571,4 +445,5 @@ class GeminiLLM {
|
|||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
GeminiLLM,
|
GeminiLLM,
|
||||||
|
NO_SYSTEM_PROMPT_MODELS,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -2,6 +2,8 @@ class GeminiEmbedder {
|
|||||||
constructor() {
|
constructor() {
|
||||||
if (!process.env.GEMINI_EMBEDDING_API_KEY)
|
if (!process.env.GEMINI_EMBEDDING_API_KEY)
|
||||||
throw new Error("No Gemini API key was set.");
|
throw new Error("No Gemini API key was set.");
|
||||||
|
|
||||||
|
// TODO: Deprecate this and use OpenAI interface instead - after which, remove the @google/generative-ai dependency
|
||||||
const { GoogleGenerativeAI } = require("@google/generative-ai");
|
const { GoogleGenerativeAI } = require("@google/generative-ai");
|
||||||
const genAI = new GoogleGenerativeAI(process.env.GEMINI_EMBEDDING_API_KEY);
|
const genAI = new GoogleGenerativeAI(process.env.GEMINI_EMBEDDING_API_KEY);
|
||||||
this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-004";
|
this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-004";
|
||||||
|
|||||||
@ -491,9 +491,7 @@ Only return the role.
|
|||||||
// and remove the @ from the response
|
// and remove the @ from the response
|
||||||
const { result } = await provider.complete(messages);
|
const { result } = await provider.complete(messages);
|
||||||
const name = result?.replace(/^@/g, "");
|
const name = result?.replace(/^@/g, "");
|
||||||
if (this.agents.get(name)) {
|
if (this.agents.get(name)) return name;
|
||||||
return name;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if the name is not in the nodes, return a random node
|
// if the name is not in the nodes, return a random node
|
||||||
return availableNodes[Math.floor(Math.random() * availableNodes.length)];
|
return availableNodes[Math.floor(Math.random() * availableNodes.length)];
|
||||||
@ -797,6 +795,8 @@ ${this.getHistory({ to: route.to })
|
|||||||
return new Providers.NovitaProvider({ model: config.model });
|
return new Providers.NovitaProvider({ model: config.model });
|
||||||
case "ppio":
|
case "ppio":
|
||||||
return new Providers.PPIOProvider({ model: config.model });
|
return new Providers.PPIOProvider({ model: config.model });
|
||||||
|
case "gemini":
|
||||||
|
return new Providers.GeminiProvider({ model: config.model });
|
||||||
default:
|
default:
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Unknown provider: ${config.provider}. Please use a valid provider.`
|
`Unknown provider: ${config.provider}. Please use a valid provider.`
|
||||||
|
|||||||
@ -171,6 +171,14 @@ class Provider {
|
|||||||
apiKey: process.env.PPIO_API_KEY ?? null,
|
apiKey: process.env.PPIO_API_KEY ?? null,
|
||||||
...config,
|
...config,
|
||||||
});
|
});
|
||||||
|
case "gemini":
|
||||||
|
return new ChatOpenAI({
|
||||||
|
configuration: {
|
||||||
|
baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/",
|
||||||
|
},
|
||||||
|
apiKey: process.env.GEMINI_API_KEY ?? null,
|
||||||
|
...config,
|
||||||
|
});
|
||||||
|
|
||||||
// OSS Model Runners
|
// OSS Model Runners
|
||||||
// case "anythingllm_ollama":
|
// case "anythingllm_ollama":
|
||||||
|
|||||||
154
server/utils/agents/aibitat/providers/gemini.js
Normal file
154
server/utils/agents/aibitat/providers/gemini.js
Normal file
@ -0,0 +1,154 @@
|
|||||||
|
const OpenAI = require("openai");
|
||||||
|
const Provider = require("./ai-provider.js");
|
||||||
|
const InheritMultiple = require("./helpers/classes.js");
|
||||||
|
const UnTooled = require("./helpers/untooled.js");
|
||||||
|
const {
|
||||||
|
NO_SYSTEM_PROMPT_MODELS,
|
||||||
|
} = require("../../../AiProviders/gemini/index.js");
|
||||||
|
const { APIError } = require("../error.js");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The agent provider for the Gemini provider.
|
||||||
|
* We wrap Gemini in UnTooled because its tool-calling is not supported via the dedicated OpenAI API.
|
||||||
|
*/
|
||||||
|
class GeminiProvider extends InheritMultiple([Provider, UnTooled]) {
|
||||||
|
model;
|
||||||
|
|
||||||
|
constructor(config = {}) {
|
||||||
|
const { model = "gemini-2.0-flash-lite" } = config;
|
||||||
|
super();
|
||||||
|
const client = new OpenAI({
|
||||||
|
baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/",
|
||||||
|
apiKey: process.env.GEMINI_API_KEY,
|
||||||
|
maxRetries: 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
this._client = client;
|
||||||
|
this.model = model;
|
||||||
|
this.verbose = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
get client() {
|
||||||
|
return this._client;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format the messages to the format required by the Gemini API since some models do not support system prompts.
|
||||||
|
* @see {NO_SYSTEM_PROMPT_MODELS}
|
||||||
|
* @param {import("openai").OpenAI.ChatCompletionMessage[]} messages
|
||||||
|
* @returns {import("openai").OpenAI.ChatCompletionMessage[]}
|
||||||
|
*/
|
||||||
|
formatMessages(messages) {
|
||||||
|
if (!NO_SYSTEM_PROMPT_MODELS.includes(this.model)) return messages;
|
||||||
|
|
||||||
|
// Replace the system message with a user/assistant message pair
|
||||||
|
const formattedMessages = [];
|
||||||
|
for (const message of messages) {
|
||||||
|
if (message.role === "system") {
|
||||||
|
formattedMessages.push({
|
||||||
|
role: "user",
|
||||||
|
content: message.content,
|
||||||
|
});
|
||||||
|
formattedMessages.push({
|
||||||
|
role: "assistant",
|
||||||
|
content: "Okay, I'll follow your instructions.",
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
formattedMessages.push(message);
|
||||||
|
}
|
||||||
|
return formattedMessages;
|
||||||
|
}
|
||||||
|
|
||||||
|
async #handleFunctionCallChat({ messages = [] }) {
|
||||||
|
return await this.client.chat.completions
|
||||||
|
.create({
|
||||||
|
model: this.model,
|
||||||
|
temperature: 0,
|
||||||
|
messages: this.cleanMsgs(this.formatMessages(messages)),
|
||||||
|
})
|
||||||
|
.then((result) => {
|
||||||
|
if (!result.hasOwnProperty("choices"))
|
||||||
|
throw new Error("Gemini chat: No results!");
|
||||||
|
if (result.choices.length === 0)
|
||||||
|
throw new Error("Gemini chat: No results length!");
|
||||||
|
return result.choices[0].message.content;
|
||||||
|
})
|
||||||
|
.catch((_) => {
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a completion based on the received messages.
|
||||||
|
*
|
||||||
|
* @param messages A list of messages to send to the API.
|
||||||
|
* @param functions
|
||||||
|
* @returns The completion.
|
||||||
|
*/
|
||||||
|
async complete(messages, functions = []) {
|
||||||
|
try {
|
||||||
|
let completion;
|
||||||
|
|
||||||
|
if (functions.length > 0) {
|
||||||
|
const { toolCall, text } = await this.functionCall(
|
||||||
|
this.cleanMsgs(this.formatMessages(messages)),
|
||||||
|
functions,
|
||||||
|
this.#handleFunctionCallChat.bind(this)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (toolCall !== null) {
|
||||||
|
this.providerLog(`Valid tool call found - running ${toolCall.name}.`);
|
||||||
|
this.deduplicator.trackRun(toolCall.name, toolCall.arguments);
|
||||||
|
return {
|
||||||
|
result: null,
|
||||||
|
functionCall: {
|
||||||
|
name: toolCall.name,
|
||||||
|
arguments: toolCall.arguments,
|
||||||
|
},
|
||||||
|
cost: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
completion = { content: text };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!completion?.content) {
|
||||||
|
this.providerLog(
|
||||||
|
"Will assume chat completion without tool call inputs."
|
||||||
|
);
|
||||||
|
const response = await this.client.chat.completions.create({
|
||||||
|
model: this.model,
|
||||||
|
messages: this.cleanMsgs(this.formatMessages(messages)),
|
||||||
|
});
|
||||||
|
completion = response.choices[0].message;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
|
||||||
|
// from calling the exact same function over and over in a loop within a single chat exchange
|
||||||
|
// _but_ we should enable it to call previously used tools in a new chat interaction.
|
||||||
|
this.deduplicator.reset("runs");
|
||||||
|
return {
|
||||||
|
result: completion.content,
|
||||||
|
cost: 0,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
throw new APIError(
|
||||||
|
error?.message
|
||||||
|
? `${this.constructor.name} encountered an error while executing the request: ${error.message}`
|
||||||
|
: "There was an error with the Gemini provider executing the request"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the cost of the completion.
|
||||||
|
*
|
||||||
|
* @param _usage The completion to get the cost for.
|
||||||
|
* @returns The cost of the completion.
|
||||||
|
*/
|
||||||
|
getCost(_usage) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = GeminiProvider;
|
||||||
@ -21,6 +21,7 @@ const XAIProvider = require("./xai.js");
|
|||||||
const NovitaProvider = require("./novita.js");
|
const NovitaProvider = require("./novita.js");
|
||||||
const NvidiaNimProvider = require("./nvidiaNim.js");
|
const NvidiaNimProvider = require("./nvidiaNim.js");
|
||||||
const PPIOProvider = require("./ppio.js");
|
const PPIOProvider = require("./ppio.js");
|
||||||
|
const GeminiProvider = require("./gemini.js");
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
OpenAIProvider,
|
OpenAIProvider,
|
||||||
@ -46,4 +47,5 @@ module.exports = {
|
|||||||
NovitaProvider,
|
NovitaProvider,
|
||||||
NvidiaNimProvider,
|
NvidiaNimProvider,
|
||||||
PPIOProvider,
|
PPIOProvider,
|
||||||
|
GeminiProvider,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -115,10 +115,6 @@ class AgentHandler {
|
|||||||
"LocalAI must have a valid base path to use for the api."
|
"LocalAI must have a valid base path to use for the api."
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
case "gemini":
|
|
||||||
if (!process.env.GEMINI_API_KEY)
|
|
||||||
throw new Error("Gemini API key must be provided to use agents.");
|
|
||||||
break;
|
|
||||||
case "openrouter":
|
case "openrouter":
|
||||||
if (!process.env.OPENROUTER_API_KEY)
|
if (!process.env.OPENROUTER_API_KEY)
|
||||||
throw new Error("OpenRouter API key must be provided to use agents.");
|
throw new Error("OpenRouter API key must be provided to use agents.");
|
||||||
@ -189,6 +185,10 @@ class AgentHandler {
|
|||||||
if (!process.env.PPIO_API_KEY)
|
if (!process.env.PPIO_API_KEY)
|
||||||
throw new Error("PPIO API Key must be provided to use agents.");
|
throw new Error("PPIO API Key must be provided to use agents.");
|
||||||
break;
|
break;
|
||||||
|
case "gemini":
|
||||||
|
if (!process.env.GEMINI_API_KEY)
|
||||||
|
throw new Error("Gemini API key must be provided to use agents.");
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
throw new Error(
|
throw new Error(
|
||||||
@ -224,8 +224,6 @@ class AgentHandler {
|
|||||||
return null;
|
return null;
|
||||||
case "koboldcpp":
|
case "koboldcpp":
|
||||||
return process.env.KOBOLD_CPP_MODEL_PREF ?? null;
|
return process.env.KOBOLD_CPP_MODEL_PREF ?? null;
|
||||||
case "gemini":
|
|
||||||
return process.env.GEMINI_MODEL_PREF ?? "gemini-pro";
|
|
||||||
case "localai":
|
case "localai":
|
||||||
return process.env.LOCAL_AI_MODEL_PREF ?? null;
|
return process.env.LOCAL_AI_MODEL_PREF ?? null;
|
||||||
case "openrouter":
|
case "openrouter":
|
||||||
@ -256,6 +254,8 @@ class AgentHandler {
|
|||||||
return process.env.NVIDIA_NIM_LLM_MODEL_PREF ?? null;
|
return process.env.NVIDIA_NIM_LLM_MODEL_PREF ?? null;
|
||||||
case "ppio":
|
case "ppio":
|
||||||
return process.env.PPIO_MODEL_PREF ?? "qwen/qwen2.5-32b-instruct";
|
return process.env.PPIO_MODEL_PREF ?? "qwen/qwen2.5-32b-instruct";
|
||||||
|
case "gemini":
|
||||||
|
return process.env.GEMINI_LLM_MODEL_PREF ?? "gemini-2.0-flash-lite";
|
||||||
default:
|
default:
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user