Native Tool calling (#5071)

* checkpoint

* test MCP and flows

* add native tool call detection back to LMStudio

* add native tool call loops for Ollama

* Add ablity detection to DMR (regex parse)

* bedrock and generic openai with ENV flag

* deepseek native tool calling

* localAI native function

* groq support

* linting, add litellm and OR native tool calling via flag
This commit is contained in:
Timothy Carambat 2026-02-26 13:37:56 -08:00 committed by GitHub
parent 55dc0da488
commit ac0b1d401d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
42 changed files with 2002 additions and 409 deletions

View File

@ -420,4 +420,9 @@ GID='1000'
# Disable MCP cooldown timer for agent calls
# this can lead to infinite recursive calls of the same function
# for some model/provider combinations
# MCP_NO_COOLDOWN="true
# MCP_NO_COOLDOWN="true
# Allow native tool calling for specific providers.
# This can VASTLY improve performance and speed of agent calls.
# Check code for supported providers who can be enabled here via this flag
# PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING="generic-openai,bedrock,localai,groq,litellm,openrouter"

View File

@ -423,4 +423,9 @@ TTS_PROVIDER="native"
# Disable MCP cooldown timer for agent calls
# this can lead to infinite recursive calls of the same function
# for some model/provider combinations
# MCP_NO_COOLDOWN="true
# MCP_NO_COOLDOWN="true
# Allow native tool calling for specific providers.
# This can VASTLY improve performance and speed of agent calls.
# Check code for supported providers who can be enabled here via this flag
# PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING="generic-openai,bedrock,localai,groq,litellm,openrouter"

View File

@ -205,6 +205,47 @@ class DockerModelRunnerLLM {
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
/**
* Returns the capabilities of the model.
* Note: This is a heuristic approach to get the capabilities of the model based on the model metadata.
* It is not perfect, but works since every model metadata is different and may not have key values we rely on.
* There is no "capabilities" key in the metadata via any API endpoint - so we do this.
* @returns {Promise<{tools: 'unknown' | boolean, reasoning: 'unknown' | boolean, imageGeneration: 'unknown' | boolean, vision: 'unknown' | boolean}>}
*/
async getModelCapabilities() {
try {
const endpoint = new URL(
parseDockerModelRunnerEndpoint(
process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
"dmr"
)
);
// eg: /models/ai/qwen3:4B-UD-Q4_K_XL
endpoint.pathname = `/models/${this.model}`;
const response = await fetch(endpoint.toString());
const data = await response.text();
const tools = /tools|tool|tool_use|tool_call/.test(data);
const reasoning = /thinking|reason|reasoning|think/.test(data);
const imageGeneration = /diffusion/.test(data);
const vision = /vision|vllm|image/.test(data);
return {
tools: tools,
reasoning: reasoning,
imageGeneration: imageGeneration,
vision: vision,
};
} catch (error) {
console.error("Error getting model capabilities:", error);
return {
tools: "unknown",
reasoning: "unknown",
imageGeneration: "unknown",
vision: "unknown",
};
}
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
async embedTextInput(textInput) {
return await this.embedder.embedTextInput(textInput);
@ -232,6 +273,7 @@ function parseDockerModelRunnerEndpoint(basePath = null, to = "openai") {
try {
const url = new URL(basePath);
if (to === "openai") url.pathname = "engines/v1";
else if (to === "ollama") url.pathname = "api";
else if (to === "dmr") url.pathname = "";
return url.toString();
} catch (e) {

View File

@ -272,6 +272,60 @@ class LMStudioLLM {
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
/**
* Returns the capabilities of the model.
* This uses the new /api/v1 endpoint, which returns the model info in a different format.
* @returns {Promise<{tools: 'unknown' | boolean, reasoning: 'unknown' | boolean, imageGeneration: 'unknown' | boolean, vision: 'unknown' | boolean}>}
*/
async getModelCapabilities() {
try {
const endpoint = new URL(
parseLMStudioBasePath(process.env.LMSTUDIO_BASE_PATH, "v1")
);
const apiKey = process.env.LMSTUDIO_AUTH_TOKEN ?? null;
endpoint.pathname += "/models";
const modelInfo =
(await fetch(endpoint.toString(), {
headers: {
"Content-Type": "application/json",
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
},
})
.then((res) => {
if (!res.ok)
throw new Error(
`LMStudio:getModelCapabilities - ${res.statusText}`
);
return res.json();
})
.then(({ models = [] }) =>
models.find((model) => model.key === this.model)
)) || {};
const capabilities = modelInfo.hasOwnProperty("capabilities")
? modelInfo.capabilities
: {
trained_for_tool_use: "unknown",
vision: "unknown",
};
return {
tools: capabilities.trained_for_tool_use,
reasoning: "unknown",
imageGeneration: "unknown", // LM Studio does not support image generation yet.
vision: capabilities.vision,
};
} catch (error) {
console.error("Error getting model capabilities:", error);
return {
tools: "unknown",
reasoning: "unknown",
imageGeneration: "unknown",
vision: "unknown",
};
}
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
async embedTextInput(textInput) {
return await this.embedder.embedTextInput(textInput);
@ -292,12 +346,15 @@ class LMStudioLLM {
* Parse the base path for the LMStudio API. Since the base path must end in /v1 and cannot have a trailing slash,
* and the user can possibly set it to anything and likely incorrectly due to pasting behaviors, we need to ensure it is in the correct format.
* @param {string} basePath
* @param {'legacy' | 'v1'} apiVersion
* @returns {string}
*/
function parseLMStudioBasePath(providedBasePath = "") {
function parseLMStudioBasePath(providedBasePath = "", apiVersion = "legacy") {
try {
const baseURL = new URL(providedBasePath);
const basePath = `${baseURL.origin}/v1`;
let basePath = `${baseURL.origin}`;
if (apiVersion === "legacy") basePath += `/v1`;
if (apiVersion === "v1") basePath += `/api/v1`;
return basePath;
} catch (e) {
return providedBasePath;

View File

@ -468,6 +468,32 @@ class OllamaAILLM {
});
}
/**
* Returns the capabilities of the model.
* @returns {Promise<{tools: 'unknown' | boolean, reasoning: 'unknown' | boolean, imageGeneration: 'unknown' | boolean, vision: 'unknown' | boolean}>}
*/
async getModelCapabilities() {
try {
const { capabilities = [] } = await this.client.show({
model: this.model,
});
return {
tools: capabilities.includes("tools") ? true : false,
reasoning: capabilities.includes("thinking") ? true : false,
imageGeneration: false, // we dont have any image generation capabilities for Ollama or anywhere right now.
vision: capabilities.includes("vision") ? true : false,
};
} catch (error) {
console.error("Error getting model capabilities:", error);
return {
tools: "unknown",
reasoning: "unknown",
imageGeneration: "unknown",
vision: "unknown",
};
}
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
async embedTextInput(textInput) {
return await this.embedder.embedTextInput(textInput);

View File

@ -185,6 +185,24 @@ class AgentFlows {
.map(([uuid]) => `@@flow_${uuid}`);
}
/**
* Sanitize a flow name into a valid OpenAI-compatible tool name.
* Must match ^[a-zA-Z0-9_-]{1,64}$
* @param {string} flowName - The human-readable flow name
* @returns {string|null} Sanitized tool name, or null if empty after sanitization
*/
static sanitizeToolName(flowName) {
const sanitized = flowName
.toLowerCase()
.trim()
.replace(/\s+/g, "_")
.replace(/[^a-z0-9_-]/g, "")
.replace(/_+/g, "_")
.replace(/^[-_]+|[-_]+$/g, "");
if (!sanitized) return null;
return sanitized.slice(0, 64);
}
/**
* Load a flow plugin by its UUID
* @param {string} uuid - The UUID of the flow to load
@ -196,17 +214,18 @@ class AgentFlows {
const startBlock = flow.config.steps?.find((s) => s.type === "start");
const variables = startBlock?.config?.variables || [];
const toolName = AgentFlows.sanitizeToolName(flow.name) || `flow_${uuid}`;
return {
name: `flow_${uuid}`,
name: toolName,
description: `Execute agent flow: ${flow.name}`,
plugin: (_runtimeArgs = {}) => ({
name: `flow_${uuid}`,
name: toolName,
description:
flow.config.description || `Execute agent flow: ${flow.name}`,
setup: (aibitat) => {
aibitat.function({
name: `flow_${uuid}`,
name: toolName,
description:
flow.config.description || `Execute agent flow: ${flow.name}`,
parameters: {

View File

@ -41,6 +41,7 @@ class AIbitat {
chats = [],
interrupt = "NEVER",
maxRounds = 100,
maxToolCalls = 10,
provider = "openai",
handlerProps = {}, // Inherited props we can spread so aibitat can access.
...rest
@ -48,6 +49,7 @@ class AIbitat {
this._chats = chats;
this.defaultInterrupt = interrupt;
this.maxRounds = maxRounds;
this.maxToolCalls = maxToolCalls;
this.handlerProps = handlerProps;
this.defaultProvider = {
@ -641,7 +643,8 @@ ${this.getHistory({ to: route.to })
provider,
messages = [],
functions = [],
byAgent = null
byAgent = null,
depth = 0
) {
const eventHandler = (type, data) => {
this?.socket?.send(type, data);
@ -655,6 +658,24 @@ ${this.getHistory({ to: route.to })
);
if (completionStream.functionCall) {
if (depth >= this.maxToolCalls) {
this.handlerProps?.log?.(
`[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Stopping tool execution.`
);
this?.introspect?.(
`Maximum tool call limit (${this.maxToolCalls}) reached. Returning what I have so far.`
);
const bailoutMessage =
completionStream.textResponse ||
"I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run.";
eventHandler?.("reportStreamEvent", {
type: "fullTextResponse",
uuid: v4(),
content: bailoutMessage,
});
return bailoutMessage;
}
const { name, arguments: args } = completionStream.functionCall;
const fn = this.functions.get(name);
@ -673,7 +694,8 @@ ${this.getHistory({ to: route.to })
},
],
functions,
byAgent
byAgent,
depth + 1
);
}
@ -730,7 +752,8 @@ ${this.getHistory({ to: route.to })
},
],
functions,
byAgent
byAgent,
depth + 1
);
}
@ -752,12 +775,26 @@ ${this.getHistory({ to: route.to })
provider,
messages = [],
functions = [],
byAgent = null
byAgent = null,
depth = 0
) {
// get the chat completion
const completion = await provider.complete(messages, functions);
if (completion.functionCall) {
if (depth >= this.maxToolCalls) {
this.handlerProps?.log?.(
`[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Stopping tool execution.`
);
this?.introspect?.(
`Maximum tool call limit (${this.maxToolCalls}) reached. Returning what I have so far.`
);
return (
completion.textResponse ||
"I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run."
);
}
const { name, arguments: args } = completion.functionCall;
const fn = this.functions.get(name);
@ -776,7 +813,8 @@ ${this.getHistory({ to: route.to })
},
],
functions,
byAgent
byAgent,
depth + 1
);
}
@ -824,7 +862,8 @@ ${this.getHistory({ to: route.to })
},
],
functions,
byAgent
byAgent,
depth + 1
);
}

View File

@ -31,6 +31,15 @@ class ApiPieProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -1,14 +1,12 @@
const { OpenAI } = require("openai");
const { AzureOpenAiLLM } = require("../../../AiProviders/azureOpenAi");
const Provider = require("./ai-provider.js");
const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
const { RetryError } = require("../error.js");
const { v4 } = require("uuid");
const { safeJsonParse } = require("../../../http");
/**
* The agent provider for the Azure OpenAI API.
* Uses the tool calling format (not legacy function calling) for compatibility
* with newer Azure OpenAI models.
* Uses the shared native tool calling helper for OpenAI-compatible tool calling.
*/
class AzureOpenAiProvider extends Provider {
model;
@ -28,199 +26,27 @@ class AzureOpenAiProvider extends Provider {
}
/**
* Convert legacy function definitions to the tools format.
* @param {Array} functions - Legacy function definitions
* @returns {Array} Tools in the new format
*/
#formatFunctionsToTools(functions) {
if (!Array.isArray(functions) || functions.length === 0) return [];
return functions.map((func) => ({
type: "function",
function: {
name: func.name,
description: func.description,
parameters: func.parameters,
},
}));
}
/**
* Format messages to use tool calling format instead of legacy function format.
* Converts role: "function" messages to role: "tool" messages.
* @param {Array} messages - Messages array that may contain legacy function messages
* @returns {Array} Messages formatted for tool calling
*/
#formatMessagesForTools(messages) {
const formattedMessages = [];
for (const message of messages) {
if (message.role === "function") {
// Convert legacy function result to tool result format
// We need the tool_call_id from the originalFunctionCall
if (message.originalFunctionCall?.id) {
// First, add the assistant message with the tool_call if not already present
// Check if previous message already has this tool call
const prevMsg = formattedMessages[formattedMessages.length - 1];
if (!prevMsg || prevMsg.role !== "assistant" || !prevMsg.tool_calls) {
formattedMessages.push({
role: "assistant",
content: null,
tool_calls: [
{
id: message.originalFunctionCall.id,
type: "function",
function: {
name: message.originalFunctionCall.name,
arguments:
typeof message.originalFunctionCall.arguments === "string"
? message.originalFunctionCall.arguments
: JSON.stringify(
message.originalFunctionCall.arguments
),
},
},
],
});
}
// Add the tool result
formattedMessages.push({
role: "tool",
tool_call_id: message.originalFunctionCall.id,
content:
typeof message.content === "string"
? message.content
: JSON.stringify(message.content),
});
} else {
// Fallback: generate a tool_call_id if not present
const toolCallId = `call_${v4()}`;
formattedMessages.push({
role: "assistant",
content: null,
tool_calls: [
{
id: toolCallId,
type: "function",
function: {
name: message.name,
arguments: "{}",
},
},
],
});
formattedMessages.push({
role: "tool",
tool_call_id: toolCallId,
content:
typeof message.content === "string"
? message.content
: JSON.stringify(message.content),
});
}
} else {
formattedMessages.push(message);
}
}
return formattedMessages;
}
/**
* Stream a chat completion from the LLM with tool calling.
* Uses the tool calling format instead of legacy function calling.
* Stream a chat completion from Azure OpenAI with tool calling.
*
* @param {any[]} messages - The messages to send to the LLM.
* @param {any[]} functions - The functions to use in the LLM.
* @param {function} eventHandler - The event handler to use to report stream events.
* @returns {Promise<{ functionCall: any, textResponse: string }>} - The result of the chat completion.
* @param {any[]} messages
* @param {any[]} functions
* @param {function} eventHandler
* @returns {Promise<{ functionCall: any, textResponse: string }>}
*/
async stream(messages, functions = [], eventHandler = null) {
this.providerLog("Provider.stream - will process this chat completion.");
const msgUUID = v4();
try {
const formattedMessages = this.#formatMessagesForTools(messages);
const tools = this.#formatFunctionsToTools(functions);
const stream = await this.client.chat.completions.create({
model: this.model,
stream: true,
messages: formattedMessages,
...(tools.length > 0 ? { tools } : {}),
});
const result = {
functionCall: null,
textResponse: "",
};
// For accumulating tool calls during streaming
let currentToolCall = null;
for await (const chunk of stream) {
if (!chunk?.choices?.[0]) continue;
const choice = chunk.choices[0];
if (choice.delta?.content) {
result.textResponse += choice.delta.content;
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
uuid: msgUUID,
content: choice.delta.content,
});
}
// Handle tool calls (new format)
if (choice.delta?.tool_calls) {
for (const toolCall of choice.delta.tool_calls) {
if (toolCall.id) {
// New tool call starting
currentToolCall = {
id: toolCall.id,
name: toolCall.function?.name || "",
arguments: toolCall.function?.arguments || "",
};
} else if (currentToolCall) {
// Continuation of existing tool call
if (toolCall.function?.name) {
currentToolCall.name += toolCall.function.name;
}
if (toolCall.function?.arguments) {
currentToolCall.arguments += toolCall.function.arguments;
}
}
if (currentToolCall) {
eventHandler?.("reportStreamEvent", {
uuid: `${msgUUID}:tool_call_invocation`,
type: "toolCallInvocation",
content: `Assembling Tool Call: ${currentToolCall.name}(${currentToolCall.arguments})`,
});
}
}
}
}
// Set the function call result if we have a tool call
if (currentToolCall) {
result.functionCall = {
id: currentToolCall.id,
name: currentToolCall.name,
arguments: safeJsonParse(currentToolCall.arguments, {}),
};
}
return {
textResponse: result.textResponse,
functionCall: result.functionCall,
};
return await tooledStream(
this.client,
this.model,
messages,
functions,
eventHandler
);
} catch (error) {
console.error(error.message, error);
// If invalid Auth error we need to abort because no amount of waiting
// will make auth better.
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
@ -228,91 +54,41 @@ class AzureOpenAiProvider extends Provider {
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Create a completion based on the received messages.
* Uses the tool calling format instead of legacy function calling.
* Create a completion based on the received messages with tool calling.
*
* @param messages A list of messages to send to the OpenAI API.
* @param functions
* @param {any[]} messages
* @param {any[]} functions
* @returns The completion.
*/
async complete(messages, functions = []) {
try {
const formattedMessages = this.#formatMessagesForTools(messages);
const tools = this.#formatFunctionsToTools(functions);
const result = await tooledComplete(
this.client,
this.model,
messages,
functions,
this.getCost.bind(this)
);
const response = await this.client.chat.completions.create({
model: this.model,
stream: false,
messages: formattedMessages,
...(tools.length > 0 ? { tools } : {}),
});
// Right now, we only support one completion,
// so we just take the first one in the list
const completion = response.choices[0].message;
const cost = this.getCost(response.usage);
// Handle tool calls (new format)
if (completion.tool_calls && completion.tool_calls.length > 0) {
const toolCall = completion.tool_calls[0];
let functionArgs = {};
try {
functionArgs = JSON.parse(toolCall.function.arguments);
} catch (error) {
// Call the complete function again in case of JSON error
const toolCallId = toolCall.id;
return this.complete(
[
...messages,
{
role: "function",
name: toolCall.function.name,
content: error?.message,
originalFunctionCall: {
id: toolCallId,
name: toolCall.function.name,
arguments: toolCall.function.arguments,
},
},
],
functions
);
}
return {
textResponse: null,
functionCall: {
id: toolCall.id,
name: toolCall.function.name,
arguments: functionArgs,
},
cost,
};
if (result.retryWithError) {
return this.complete([...messages, result.retryWithError], functions);
}
return {
textResponse: completion.content,
cost,
};
return result;
} catch (error) {
// If invalid Auth error we need to abort because no amount of waiting
// will make auth better.
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError // Also will catch AuthenticationError!!!
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}

View File

@ -7,10 +7,13 @@ const { AWSBedrockLLM } = require("../../../AiProviders/bedrock/index.js");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const { safeJsonParse } = require("../../../http");
const { v4 } = require("uuid");
const {
HumanMessage,
SystemMessage,
AIMessage,
ToolMessage,
} = require("@langchain/core/messages");
/**
@ -32,6 +35,7 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) {
this._client = client;
this.model = model;
this.verbose = true;
this._supportsToolCalling = null;
}
/**
@ -45,6 +49,27 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native tool calling via the Bedrock Converse API.
* Checks the ENV to see if the provider supports tool calling.
* If the ENV is not set, we default to false.
* @returns {boolean}
*/
supportsNativeToolCalling() {
if (this._supportsToolCalling !== null) return this._supportsToolCalling;
const supportsToolCalling =
process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("bedrock");
if (supportsToolCalling)
this.providerLog("AWS Bedrock native tool calling is ENABLED via ENV.");
else
this.providerLog(
"AWS Bedrock native tool calling is DISABLED via ENV. Will use UnTooled instead."
);
this._supportsToolCalling = supportsToolCalling;
return supportsToolCalling;
}
/**
* Gets the credentials for the AWS Bedrock LLM based on the authentication method provided.
* @returns {object} The credentials object.
@ -85,6 +110,99 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) {
return langchainChats;
}
/**
* Convert aibitat message history to Langchain message prototypes with
* proper tool call / tool result handling for native tool calling.
* role:"function" messages (from previous aibitat tool runs) are converted
* to AIMessage(tool_calls) + ToolMessage pairs that Langchain expects.
* @param {Array} chats - The aibitat message history.
* @returns {Array} Langchain message instances.
*/
#convertToLangchainPrototypesWithTools(chats = []) {
const langchainChats = [];
for (const chat of chats) {
if (chat.role === "function") {
if (chat.originalFunctionCall?.id) {
const prevMsg = langchainChats[langchainChats.length - 1];
if (
!prevMsg ||
!(prevMsg instanceof AIMessage) ||
!prevMsg.tool_calls?.length
) {
langchainChats.push(
new AIMessage({
content: "",
tool_calls: [
{
name: chat.originalFunctionCall.name,
args:
typeof chat.originalFunctionCall.arguments === "string"
? safeJsonParse(chat.originalFunctionCall.arguments, {})
: chat.originalFunctionCall.arguments,
id: chat.originalFunctionCall.id,
},
],
})
);
}
langchainChats.push(
new ToolMessage({
content:
typeof chat.content === "string"
? chat.content
: JSON.stringify(chat.content),
tool_call_id: chat.originalFunctionCall.id,
})
);
} else {
const toolCallId = `call_${v4()}`;
langchainChats.push(
new AIMessage({
content: "",
tool_calls: [{ name: chat.name, args: {}, id: toolCallId }],
})
);
langchainChats.push(
new ToolMessage({
content:
typeof chat.content === "string"
? chat.content
: JSON.stringify(chat.content),
tool_call_id: toolCallId,
})
);
}
} else if (chat.role === "system") {
langchainChats.push(new SystemMessage({ content: chat.content }));
} else if (chat.role === "user") {
langchainChats.push(new HumanMessage({ content: chat.content }));
} else if (chat.role === "assistant") {
langchainChats.push(new AIMessage({ content: chat.content }));
}
}
return langchainChats;
}
/**
* Convert aibitat function definitions to the format expected by
* Langchain's ChatBedrockConverse.bindTools().
* @param {Array<{name: string, description: string, parameters: object}>} functions
* @returns {Array<{type: "function", function: {name: string, description: string, parameters: object}}>}
*/
#formatFunctionsToLangchainTools(functions) {
if (!Array.isArray(functions) || functions.length === 0) return [];
return functions.map((func) => ({
type: "function",
function: {
name: func.name,
description: func.description,
parameters: func.parameters,
},
}));
}
async #handleFunctionCallChat({ messages = [] }) {
const response = await this.client
.invoke(this.#convertToLangchainPrototypes(messages))
@ -141,6 +259,7 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) {
/**
* Stream a chat completion from the Bedrock LLM with tool calling.
* Uses native Bedrock Converse tool calling when supported, otherwise falls back to UnTooled.
*
* @param {any[]} messages - The messages to send to the LLM.
* @param {any[]} functions - The functions to use in the LLM.
@ -148,38 +267,149 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) {
* @returns {Promise<{ functionCall: any, textResponse: string }>} - The result of the chat completion.
*/
async stream(messages, functions = [], eventHandler = null) {
return await UnTooled.prototype.stream
.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
)
.catch((e) => {
AWSBedrockLLM.errorToHumanReadable(e, {
method: "stream",
model: this.model,
const useNative = functions.length > 0 && this.supportsNativeToolCalling();
if (!useNative) {
return await UnTooled.prototype.stream
.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
)
.catch((e) => {
AWSBedrockLLM.errorToHumanReadable(e, {
method: "stream",
model: this.model,
});
});
}
this.providerLog(
"Provider.stream (tooled) - will process this chat completion."
);
try {
const langchainMessages =
this.#convertToLangchainPrototypesWithTools(messages);
const tools = this.#formatFunctionsToLangchainTools(functions);
const modelWithTools = this.client.bindTools(tools);
const stream = await modelWithTools.stream(langchainMessages);
const msgUUID = v4();
let textResponse = "";
let finalMessage = null;
for await (const chunk of stream) {
finalMessage =
finalMessage === null ? chunk : finalMessage.concat(chunk);
const content = typeof chunk.content === "string" ? chunk.content : "";
if (content) {
textResponse += content;
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
uuid: msgUUID,
content,
});
}
if (chunk.tool_call_chunks?.length) {
for (const toolChunk of chunk.tool_call_chunks) {
if (toolChunk.name) {
eventHandler?.("reportStreamEvent", {
uuid: `${msgUUID}:tool_call_invocation`,
type: "toolCallInvocation",
content: `Assembling Tool Call: ${toolChunk.name}`,
});
}
}
}
}
if (finalMessage?.tool_calls?.length > 0) {
const toolCall = finalMessage.tool_calls[0];
return {
textResponse,
functionCall: {
id: toolCall.id || `call_${v4()}`,
name: toolCall.name,
arguments: toolCall.args || {},
},
cost: 0,
};
}
return { textResponse, functionCall: null, cost: 0 };
} catch (e) {
AWSBedrockLLM.errorToHumanReadable(e, {
method: "stream",
model: this.model,
});
}
}
/**
* Create a completion based on the received messages.
* Create a non-streaming completion with tool calling support.
* Uses native Bedrock Converse tool calling when supported, otherwise falls back to UnTooled.
*
* @param messages A list of messages to send to the API.
* @param functions
* @param {any[]} messages A list of messages to send to the API.
* @param {any[]} functions The function definitions available to the model.
* @returns The completion.
*/
async complete(messages, functions = []) {
return await UnTooled.prototype.complete
.call(this, messages, functions, this.#handleFunctionCallChat.bind(this))
.catch((e) => {
AWSBedrockLLM.errorToHumanReadable(e, {
method: "complete",
model: this.model,
const useNative = functions.length > 0 && this.supportsNativeToolCalling();
if (!useNative) {
return await UnTooled.prototype.complete
.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
)
.catch((e) => {
AWSBedrockLLM.errorToHumanReadable(e, {
method: "complete",
model: this.model,
});
});
}
try {
const langchainMessages =
this.#convertToLangchainPrototypesWithTools(messages);
const tools = this.#formatFunctionsToLangchainTools(functions);
const modelWithTools = this.client.bindTools(tools);
const response = await modelWithTools.invoke(langchainMessages);
if (response.tool_calls?.length > 0) {
const toolCall = response.tool_calls[0];
return {
textResponse: null,
functionCall: {
id: toolCall.id || `call_${v4()}`,
name: toolCall.name,
arguments: toolCall.args || {},
},
cost: 0,
};
}
return {
textResponse:
typeof response.content === "string"
? response.content
: JSON.stringify(response.content),
cost: 0,
};
} catch (e) {
AWSBedrockLLM.errorToHumanReadable(e, {
method: "complete",
model: this.model,
});
}
}
/**

View File

@ -28,6 +28,15 @@ class CohereProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
#convertChatHistoryCohere(chatHistory = []) {
let cohereHistory = [];
chatHistory.forEach((message) => {

View File

@ -35,6 +35,15 @@ class CometApiProvider extends InheritMultiple([Provider, UnTooled]) {
return false;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -2,6 +2,8 @@ const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
const { RetryError } = require("../error.js");
const { toValidNumber } = require("../../../http/index.js");
class DeepSeekProvider extends InheritMultiple([Provider, UnTooled]) {
@ -32,6 +34,23 @@ class DeepSeekProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* All current DeepSeek models (deepseek-chat and deepseek-reasoner)
* support native OpenAI-compatible tool calling.
* @returns {boolean}
*/
supportsNativeToolCalling() {
return true;
}
get #isThinkingModel() {
return this.model === "deepseek-reasoner";
}
get #tooledOptions() {
return this.#isThinkingModel ? { injectReasoningContent: true } : {};
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({
@ -60,22 +79,83 @@ class DeepSeekProvider extends InheritMultiple([Provider, UnTooled]) {
}
async stream(messages, functions = [], eventHandler = null) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
const useNative = functions.length > 0 && this.supportsNativeToolCalling();
if (!useNative) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
);
}
this.providerLog(
"Provider.stream (tooled) - will process this chat completion."
);
try {
return await tooledStream(
this.client,
this.model,
messages,
functions,
eventHandler,
this.#tooledOptions
);
} catch (error) {
console.error(error.message, error);
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
async complete(messages, functions = []) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
const useNative = functions.length > 0 && this.supportsNativeToolCalling();
if (!useNative) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
}
try {
const result = await tooledComplete(
this.client,
this.model,
messages,
functions,
this.getCost.bind(this),
this.#tooledOptions
);
if (result.retryWithError) {
return this.complete([...messages, result.retryWithError], functions);
}
return result;
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**

View File

@ -37,6 +37,15 @@ class DellProAiStudioProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -2,7 +2,10 @@ const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
const { RetryError } = require("../error.js");
const {
DockerModelRunnerLLM,
parseDockerModelRunnerEndpoint,
} = require("../../../AiProviders/dockerModelRunner/index.js");
@ -31,6 +34,7 @@ class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) {
this._client = client;
this.model = model;
this.verbose = true;
this._supportsToolCalling = null;
}
get client() {
@ -41,6 +45,19 @@ class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
async supportsNativeToolCalling() {
if (this._supportsToolCalling !== null) return this._supportsToolCalling;
const dmr = new DockerModelRunnerLLM(null, this.model);
const capabilities = await dmr.getModelCapabilities();
this._supportsToolCalling = capabilities.tools === true;
return this._supportsToolCalling;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({
@ -67,23 +84,92 @@ class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) {
});
}
/**
* Stream a chat completion with tool calling support.
* Uses native tool calling when supported, otherwise falls back to UnTooled.
*/
async stream(messages, functions = [], eventHandler = null) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
);
}
this.providerLog(
"Provider.stream (tooled) - will process this chat completion."
);
try {
return await tooledStream(
this.client,
this.model,
messages,
functions,
eventHandler
);
} catch (error) {
console.error(error.message, error);
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Create a non-streaming completion with tool calling support.
* Uses native tool calling when supported, otherwise falls back to UnTooled.
*/
async complete(messages, functions = []) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
}
try {
const result = await tooledComplete(
this.client,
this.model,
messages,
functions,
this.getCost.bind(this)
);
if (result.retryWithError) {
return this.complete([...messages, result.retryWithError], functions);
}
return result;
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**

View File

@ -33,6 +33,15 @@ class FireworksAIProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -40,6 +40,15 @@ class FoundryProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
await FoundryLLM.cacheContextWindows();
return await this.client.chat.completions

View File

@ -2,6 +2,8 @@ const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
const { RetryError } = require("../error.js");
const { toValidNumber } = require("../../../http/index.js");
const { getAnythingLLMUserAgent } = require("../../../../endpoints/utils");
const { GenericOpenAiLLM } = require("../../../AiProviders/genericOpenAi");
@ -31,6 +33,7 @@ class GenericOpenAiProvider extends InheritMultiple([Provider, UnTooled]) {
this._client = client;
this.model = model;
this.verbose = true;
this._supportsToolCalling = null;
this.maxTokens = process.env.GENERIC_OPEN_AI_MAX_TOKENS
? toValidNumber(process.env.GENERIC_OPEN_AI_MAX_TOKENS, 1024)
: 1024;
@ -46,6 +49,32 @@ class GenericOpenAiProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* - This can be any OpenAI compatible provider that supports tool calling
* - We check the ENV to see if the provider supports tool calling.
* - If the ENV is not set, we default to false.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
if (this._supportsToolCalling !== null) return this._supportsToolCalling;
const supportsToolCalling =
process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes(
"generic-openai"
);
if (supportsToolCalling)
this.providerLog(
"Generic OpenAI supports native tool calling is ENABLED via ENV."
);
else
this.providerLog(
"Generic OpenAI supports native tool calling is DISABLED via ENV. Will use UnTooled instead."
);
this._supportsToolCalling = supportsToolCalling;
return supportsToolCalling;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({
@ -74,23 +103,92 @@ class GenericOpenAiProvider extends InheritMultiple([Provider, UnTooled]) {
});
}
/**
* Stream a chat completion with tool calling support.
* Uses native tool calling when supported, otherwise falls back to UnTooled.
*/
async stream(messages, functions = [], eventHandler = null) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
);
}
this.providerLog(
"Provider.stream (tooled) - will process this chat completion."
);
try {
return await tooledStream(
this.client,
this.model,
messages,
functions,
eventHandler
);
} catch (error) {
console.error(error.message, error);
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Create a non-streaming completion with tool calling support.
* Uses native tool calling when supported, otherwise falls back to UnTooled.
*/
async complete(messages, functions = []) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
}
try {
const result = await tooledComplete(
this.client,
this.model,
messages,
functions,
this.getCost.bind(this)
);
if (result.retryWithError) {
return this.complete([...messages, result.retryWithError], functions);
}
return result;
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**

View File

@ -26,6 +26,15 @@ class GiteeAIProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -2,10 +2,13 @@ const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
const { RetryError } = require("../error.js");
/**
* The agent provider for the GroqAI provider.
* We wrap Groq in UnTooled because its tool-calling built in is quite bad and wasteful.
* Supports true OpenAI-compatible tool calling when enabled via ENV,
* falling back to the UnTooled prompt-based approach otherwise.
*/
class GroqProvider extends InheritMultiple([Provider, UnTooled]) {
model;
@ -22,6 +25,7 @@ class GroqProvider extends InheritMultiple([Provider, UnTooled]) {
this._client = client;
this.model = model;
this.verbose = true;
this._supportsToolCalling = null;
}
get client() {
@ -32,6 +36,27 @@ class GroqProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* - Since Groq models vary in tool calling support, we check the ENV.
* - If the ENV is not set, we default to false.
* @returns {boolean}
*/
supportsNativeToolCalling() {
if (this._supportsToolCalling !== null) return this._supportsToolCalling;
const supportsToolCalling =
process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("groq");
if (supportsToolCalling)
this.providerLog("Groq supports native tool calling is ENABLED via ENV.");
else
this.providerLog(
"Groq supports native tool calling is DISABLED via ENV. Will use UnTooled instead."
);
this._supportsToolCalling = supportsToolCalling;
return supportsToolCalling;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({
@ -58,31 +83,98 @@ class GroqProvider extends InheritMultiple([Provider, UnTooled]) {
});
}
/**
* Stream a chat completion with tool calling support.
* Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
*/
async stream(messages, functions = [], eventHandler = null) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
);
}
this.providerLog(
"Provider.stream (tooled) - will process this chat completion."
);
try {
return await tooledStream(
this.client,
this.model,
messages,
functions,
eventHandler
);
} catch (error) {
console.error(error.message, error);
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Create a non-streaming completion with tool calling support.
* Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
*/
async complete(messages, functions = []) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
}
try {
const result = await tooledComplete(
this.client,
this.model,
messages,
functions,
this.getCost.bind(this)
);
if (result.retryWithError) {
return this.complete([...messages, result.retryWithError], functions);
}
return result;
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Get the cost of the completion.
*
* @param _usage The completion to get the cost for.
* @returns The cost of the completion.
* Stubbed since LMStudio has no cost basis.
*/
getCost(_usage) {
return 0;

View File

@ -0,0 +1,298 @@
const { v4 } = require("uuid");
const { safeJsonParse } = require("../../../../http");
/**
* Shared native OpenAI-compatible tool calling utilities.
* Any provider with an OpenAI-compatible client can use these functions
* instead of the UnTooled prompt-based approach when the model supports
* native tool calling.
*
* Usage in a provider:
* const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
*
* async stream(messages, functions, eventHandler) {
* if (functions.length > 0 && await this.supportsNativeToolCalling()) {
* return tooledStream(this.client, this.model, messages, functions, eventHandler);
* }
* // ... fallback to UnTooled ...
* }
*/
/**
* Convert aibitat function definitions to the OpenAI tools format.
* @param {Array<{name: string, description: string, parameters: object}>} functions
* @returns {Array<{type: "function", function: {name: string, description: string, parameters: object}}>}
*/
function formatFunctionsToTools(functions) {
if (!Array.isArray(functions) || functions.length === 0) return [];
return functions.map((func) => ({
type: "function",
function: {
name: func.name,
description: func.description,
parameters: func.parameters,
},
}));
}
/**
* Convert the aibitat message history (which uses role:"function" with
* `originalFunctionCall` metadata) into the OpenAI tool-calling message
* format (assistant `tool_calls` + role:"tool" pairs).
* @param {Array} messages
* @param {{injectReasoningContent?: boolean}} options
* - injectReasoningContent: when true, ensures every assistant message has
* a `reasoning_content` field (required by DeepSeek thinking-mode models).
* @returns {Array} Messages formatted for the OpenAI tools API
*/
function formatMessagesForTools(messages, options = {}) {
const formattedMessages = [];
const { injectReasoningContent = false } = options;
for (const message of messages) {
if (message.role === "function") {
if (message.originalFunctionCall?.id) {
const prevMsg = formattedMessages[formattedMessages.length - 1];
if (!prevMsg || prevMsg.role !== "assistant" || !prevMsg.tool_calls) {
formattedMessages.push({
role: "assistant",
content: null,
...(injectReasoningContent ? { reasoning_content: "" } : {}),
tool_calls: [
{
id: message.originalFunctionCall.id,
type: "function",
function: {
name: message.originalFunctionCall.name,
arguments:
typeof message.originalFunctionCall.arguments === "string"
? message.originalFunctionCall.arguments
: JSON.stringify(message.originalFunctionCall.arguments),
},
},
],
});
}
formattedMessages.push({
role: "tool",
tool_call_id: message.originalFunctionCall.id,
content:
typeof message.content === "string"
? message.content
: JSON.stringify(message.content),
});
} else {
const toolCallId = `call_${v4()}`;
formattedMessages.push({
role: "assistant",
content: null,
...(injectReasoningContent ? { reasoning_content: "" } : {}),
tool_calls: [
{
id: toolCallId,
type: "function",
function: {
name: message.name,
arguments: "{}",
},
},
],
});
formattedMessages.push({
role: "tool",
tool_call_id: toolCallId,
content:
typeof message.content === "string"
? message.content
: JSON.stringify(message.content),
});
}
} else if (
injectReasoningContent &&
message.role === "assistant" &&
!("reasoning_content" in message)
) {
formattedMessages.push({ ...message, reasoning_content: "" });
} else {
formattedMessages.push(message);
}
}
return formattedMessages;
}
/**
* Stream a chat completion using native OpenAI-compatible tool calling.
* Handles parallel tool calls by tracking each tool call by its streaming
* index, then returning only the first one for the agent framework to process.
*
* @param {import("openai").OpenAI} client - OpenAI-compatible client
* @param {string} model - Model identifier
* @param {Array} messages - Raw aibitat message history
* @param {Array} functions - Aibitat function definitions
* @param {function|null} eventHandler - Stream event handler
* @param {{injectReasoningContent?: boolean}} options - Provider-specific options forwarded to formatMessagesForTools
* @returns {Promise<{textResponse: string, functionCall: object|null}>}
*/
async function tooledStream(
client,
model,
messages,
functions = [],
eventHandler = null,
options = {}
) {
const msgUUID = v4();
const formattedMessages = formatMessagesForTools(messages, options);
const tools = formatFunctionsToTools(functions);
const stream = await client.chat.completions.create({
model,
stream: true,
messages: formattedMessages,
...(tools.length > 0 ? { tools } : {}),
});
const result = {
functionCall: null,
textResponse: "",
};
const toolCallsByIndex = {};
for await (const chunk of stream) {
if (!chunk?.choices?.[0]) continue;
const choice = chunk.choices[0];
if (choice.delta?.content) {
result.textResponse += choice.delta.content;
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
uuid: msgUUID,
content: choice.delta.content,
});
}
if (choice.delta?.tool_calls) {
for (const toolCall of choice.delta.tool_calls) {
const idx = toolCall.index ?? 0;
if (toolCall.id) {
toolCallsByIndex[idx] = {
id: toolCall.id,
name: toolCall.function?.name || "",
arguments: toolCall.function?.arguments || "",
};
} else if (toolCallsByIndex[idx]) {
if (toolCall.function?.name) {
toolCallsByIndex[idx].name += toolCall.function.name;
}
if (toolCall.function?.arguments) {
toolCallsByIndex[idx].arguments += toolCall.function.arguments;
}
}
if (toolCallsByIndex[idx]) {
eventHandler?.("reportStreamEvent", {
uuid: `${msgUUID}:tool_call_invocation`,
type: "toolCallInvocation",
content: `Assembling Tool Call: ${toolCallsByIndex[idx].name}(${toolCallsByIndex[idx].arguments})`,
});
}
}
}
}
const toolCallIndices = Object.keys(toolCallsByIndex).map(Number);
if (toolCallIndices.length > 0) {
const firstToolCall = toolCallsByIndex[Math.min(...toolCallIndices)];
result.functionCall = {
id: firstToolCall.id,
name: firstToolCall.name,
arguments: safeJsonParse(firstToolCall.arguments, {}),
};
}
return {
textResponse: result.textResponse,
functionCall: result.functionCall,
};
}
/**
* Non-streaming chat completion using native OpenAI-compatible tool calling.
* Returns the first tool call if the model requests any, otherwise the text response.
*
* @param {import("openai").OpenAI} client - OpenAI-compatible client
* @param {string} model - Model identifier
* @param {Array} messages - Raw aibitat message history
* @param {Array} functions - Aibitat function definitions
* @param {function} getCostFn - Provider's getCost function
* @param {{injectReasoningContent?: boolean}} options - Provider-specific options forwarded to formatMessagesForTools
* @returns {Promise<{textResponse: string|null, functionCall: object|null, cost: number}>}
*/
async function tooledComplete(
client,
model,
messages,
functions = [],
getCostFn = () => 0,
options = {}
) {
const formattedMessages = formatMessagesForTools(messages, options);
const tools = formatFunctionsToTools(functions);
const response = await client.chat.completions.create({
model,
stream: false,
messages: formattedMessages,
...(tools.length > 0 ? { tools } : {}),
});
const completion = response.choices[0].message;
const cost = getCostFn(response.usage);
if (completion.tool_calls && completion.tool_calls.length > 0) {
const toolCall = completion.tool_calls[0];
const functionArgs = safeJsonParse(toolCall.function.arguments, null);
if (functionArgs === null) {
return {
textResponse: null,
retryWithError: {
role: "function",
name: toolCall.function.name,
content: `Failed to parse tool call arguments as JSON. Raw arguments: ${toolCall.function.arguments}`,
originalFunctionCall: {
id: toolCall.id,
name: toolCall.function.name,
arguments: toolCall.function.arguments,
},
},
cost,
};
}
return {
textResponse: null,
functionCall: {
id: toolCall.id,
name: toolCall.function.name,
arguments: functionArgs,
},
cost,
};
}
return {
textResponse: completion.content,
cost,
};
}
module.exports = {
formatFunctionsToTools,
formatMessagesForTools,
tooledStream,
tooledComplete,
};

View File

@ -32,6 +32,15 @@ class KoboldCPPProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -2,9 +2,13 @@ const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
const { RetryError } = require("../error.js");
/**
* The agent provider for LiteLLM.
* Supports true OpenAI-compatible tool calling when enabled via ENV,
* falling back to the UnTooled prompt-based approach otherwise.
*/
class LiteLLMProvider extends InheritMultiple([Provider, UnTooled]) {
model;
@ -21,6 +25,7 @@ class LiteLLMProvider extends InheritMultiple([Provider, UnTooled]) {
this._client = client;
this.model = model || process.env.LITE_LLM_MODEL_PREF;
this.verbose = true;
this._supportsToolCalling = null;
}
get client() {
@ -31,6 +36,29 @@ class LiteLLMProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* - Since LiteLLM models vary in tool calling support, we check the ENV.
* - If the ENV is not set, we default to false.
* @returns {boolean}
*/
supportsNativeToolCalling() {
if (this._supportsToolCalling !== null) return this._supportsToolCalling;
const supportsToolCalling =
process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("litellm");
if (supportsToolCalling)
this.providerLog(
"LiteLLM supports native tool calling is ENABLED via ENV."
);
else
this.providerLog(
"LiteLLM supports native tool calling is DISABLED via ENV. Will use UnTooled instead."
);
this._supportsToolCalling = supportsToolCalling;
return supportsToolCalling;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({
@ -57,25 +85,99 @@ class LiteLLMProvider extends InheritMultiple([Provider, UnTooled]) {
});
}
/**
* Stream a chat completion with tool calling support.
* Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
*/
async stream(messages, functions = [], eventHandler = null) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
);
}
this.providerLog(
"Provider.stream (tooled) - will process this chat completion."
);
try {
return await tooledStream(
this.client,
this.model,
messages,
functions,
eventHandler
);
} catch (error) {
console.error(error.message, error);
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Create a non-streaming completion with tool calling support.
* Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
*/
async complete(messages, functions = []) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
}
try {
const result = await tooledComplete(
this.client,
this.model,
messages,
functions,
this.getCost.bind(this)
);
if (result.retryWithError) {
return this.complete([...messages, result.retryWithError], functions);
}
return result;
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Get the cost of the completion.
* @param _usage The completion to get the cost for.
* @returns The cost of the completion.
*/
getCost(_usage) {
return 0;
}

View File

@ -2,6 +2,8 @@ const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
const { RetryError } = require("../error.js");
const {
LMStudioLLM,
parseLMStudioBasePath,
@ -9,12 +11,13 @@ const {
/**
* The agent provider for the LMStudio.
* Supports true OpenAI-compatible tool calling when the model supports it,
* falling back to the UnTooled prompt-based approach otherwise.
*/
class LMStudioProvider extends InheritMultiple([Provider, UnTooled]) {
model;
/**
*
* @param {{model?: string}} config
*/
constructor(config = {}) {
@ -32,6 +35,7 @@ class LMStudioProvider extends InheritMultiple([Provider, UnTooled]) {
this._client = client;
this.model = model;
this.verbose = true;
this._supportsToolCalling = null;
}
get client() {
@ -42,6 +46,21 @@ class LMStudioProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether the loaded model supports native OpenAI-compatible tool calling.
* Checks the LMStudio /api/v1/models endpoint for the model's capabilities.
* @returns {Promise<boolean>}
*/
async supportsNativeToolCalling() {
if (this._supportsToolCalling !== null) return this._supportsToolCalling;
const lmstudio = new LMStudioLLM(null, this.model);
const capabilities = await lmstudio.getModelCapabilities();
this._supportsToolCalling = capabilities.tools === true;
return this._supportsToolCalling;
}
// ---- UnTooled callbacks (used when native tool calling is not supported) ----
async #handleFunctionCallChat({ messages = [] }) {
await LMStudioLLM.cacheContextWindows();
return await this.client.chat.completions
@ -70,31 +89,101 @@ class LMStudioProvider extends InheritMultiple([Provider, UnTooled]) {
});
}
/**
* Stream a chat completion with tool calling support.
* Uses native tool calling when supported, otherwise falls back to UnTooled.
*/
async stream(messages, functions = [], eventHandler = null) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
);
}
this.providerLog(
"Provider.stream (tooled) - will process this chat completion."
);
try {
await LMStudioLLM.cacheContextWindows();
return await tooledStream(
this.client,
this.model,
messages,
functions,
eventHandler
);
} catch (error) {
console.error(error.message, error);
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Create a non-streaming completion with tool calling support.
* Uses native tool calling when supported, otherwise falls back to UnTooled.
*/
async complete(messages, functions = []) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
}
try {
await LMStudioLLM.cacheContextWindows();
const result = await tooledComplete(
this.client,
this.model,
messages,
functions,
this.getCost.bind(this)
);
if (result.retryWithError) {
return this.complete([...messages, result.retryWithError], functions);
}
return result;
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Get the cost of the completion.
*
* Stubbed since LMStudio has no cost basis.
* @param _usage The completion to get the cost for.
* @returns The cost of the completion.
* Stubbed since LMStudio has no cost basis.
*/
getCost(_usage) {
return 0;

View File

@ -2,9 +2,13 @@ const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
const { RetryError } = require("../error.js");
/**
* The agent provider for the LocalAI provider.
* Supports native OpenAI-compatible tool calling when enabled via ENV,
* falling back to the UnTooled prompt-based approach otherwise.
*/
class LocalAiProvider extends InheritMultiple([Provider, UnTooled]) {
model;
@ -21,6 +25,7 @@ class LocalAiProvider extends InheritMultiple([Provider, UnTooled]) {
this._client = client;
this.model = model;
this.verbose = true;
this._supportsToolCalling = null;
}
get client() {
@ -31,6 +36,31 @@ class LocalAiProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Since LocalAI does not expose model capabilities via API, we check
* the PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING ENV flag for "localai".
* @returns {boolean}
*/
supportsNativeToolCalling() {
if (this._supportsToolCalling !== null) return this._supportsToolCalling;
const supportsToolCalling =
process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("localai");
if (supportsToolCalling)
this.providerLog(
"LocalAI supports native tool calling is ENABLED via ENV."
);
else
this.providerLog(
"LocalAI supports native tool calling is DISABLED via ENV. Will use UnTooled instead."
);
this._supportsToolCalling = supportsToolCalling;
return supportsToolCalling;
}
// ---- UnTooled callbacks (used when native tool calling is not supported) ----
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({
@ -40,10 +70,8 @@ class LocalAiProvider extends InheritMultiple([Provider, UnTooled]) {
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("LocalAI chat: No results!");
if (result.choices.length === 0)
throw new Error("LocalAI chat: No results length!");
return result.choices[0].message.content;
})
.catch((_) => {
@ -59,23 +87,92 @@ class LocalAiProvider extends InheritMultiple([Provider, UnTooled]) {
});
}
/**
* Stream a chat completion with tool calling support.
* Uses native tool calling when supported, otherwise falls back to UnTooled.
*/
async stream(messages, functions = [], eventHandler = null) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
);
}
this.providerLog(
"Provider.stream (tooled) - will process this chat completion."
);
try {
return await tooledStream(
this.client,
this.model,
messages,
functions,
eventHandler
);
} catch (error) {
console.error(error.message, error);
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Create a non-streaming completion with tool calling support.
* Uses native tool calling when supported, otherwise falls back to UnTooled.
*/
async complete(messages, functions = []) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
}
try {
const result = await tooledComplete(
this.client,
this.model,
messages,
functions,
this.getCost.bind(this)
);
if (result.retryWithError) {
return this.complete([...messages, result.retryWithError], functions);
}
return result;
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**

View File

@ -35,6 +35,15 @@ class MistralProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -35,6 +35,15 @@ class MoonshotAiProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -35,6 +35,15 @@ class NovitaProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -33,6 +33,15 @@ class NvidiaNimProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -1,6 +1,7 @@
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const { formatFunctionsToTools } = require("./helpers/tooled.js");
const { OllamaAILLM } = require("../../../AiProviders/ollama");
const { Ollama } = require("ollama");
const { v4 } = require("uuid");
@ -8,6 +9,8 @@ const { safeJsonParse } = require("../../../http");
/**
* The agent provider for the Ollama provider.
* Supports true OpenAI-compatible tool calling when the model supports it,
* falling back to the UnTooled prompt-based approach otherwise.
*/
class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
model;
@ -19,16 +22,17 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
} = config;
super();
const headers = process.env.OLLAMA_AUTH_TOKEN
? { Authorization: `Bearer ${process.env.OLLAMA_AUTH_TOKEN}` }
: {};
const authToken = process.env.OLLAMA_AUTH_TOKEN;
const basePath = process.env.OLLAMA_BASE_PATH;
const headers = authToken ? { Authorization: `Bearer ${authToken}` } : {};
this._client = new Ollama({
host: process.env.OLLAMA_BASE_PATH,
host: basePath,
headers: headers,
fetch: this.#applyFetch(),
});
this.model = model;
this.verbose = true;
this._supportsToolCalling = null;
}
get client() {
@ -39,6 +43,19 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
async supportsNativeToolCalling() {
if (this._supportsToolCalling !== null) return this._supportsToolCalling;
const ollama = new OllamaAILLM(null, this.model);
const capabilities = await ollama.getModelCapabilities();
this._supportsToolCalling = capabilities.tools === true;
return this._supportsToolCalling;
}
get queryOptions() {
this.providerLog(
`${this.model} is using a max context window of ${OllamaAILLM.promptWindowLimit(this.model)}/${OllamaAILLM.maxContextWindow(this.model)} tokens.`
@ -74,6 +91,49 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
});
}
/**
* Convert aibitat's internal message history (which uses role:"function" with
* originalFunctionCall metadata) into the Ollama tool-calling message format
* (assistant tool_calls + role:"tool" result pairs).
* @param {Array} messages
* @returns {Array}
*/
#formatMessagesForOllamaTools(messages) {
const formatted = [];
for (const message of messages) {
if (message.role === "function") {
const funcName =
message.originalFunctionCall?.name || message.name || "unknown";
const funcArgs = message.originalFunctionCall?.arguments || {};
formatted.push({
role: "assistant",
content: "",
tool_calls: [
{
function: {
name: funcName,
arguments:
typeof funcArgs === "string"
? safeJsonParse(funcArgs, {})
: funcArgs,
},
},
],
});
formatted.push({
role: "tool",
content:
typeof message.content === "string"
? message.content
: JSON.stringify(message.content),
});
} else {
formatted.push(message);
}
}
return formatted;
}
async streamingFunctionCall(
messages,
functions,
@ -168,9 +228,9 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
}
/**
* Stream a chat completion from the LLM with tool calling
* This is overriding the inherited `stream` method since Ollamas
* SDK has different response structures to other OpenAI.
* Stream a chat completion with tool calling support.
* Uses native tool calling when supported, otherwise falls back to the
* Ollama SDK + UnTooled prompt-based approach.
*
* @param messages A list of messages to send to the API.
* @param functions
@ -178,8 +238,74 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
* @returns The completion.
*/
async stream(messages, functions = [], eventHandler = null) {
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (useNative) {
this.providerLog(
"OllamaProvider.stream (tooled) - will process this chat completion."
);
await OllamaAILLM.cacheContextWindows();
const msgUUID = v4();
const formattedMessages = this.#formatMessagesForOllamaTools(messages);
const tools = formatFunctionsToTools(functions);
const stream = await this.client.chat({
model: this.model,
messages: formattedMessages,
tools,
stream: true,
options: this.queryOptions,
});
let textResponse = "";
let toolCalls = null;
for await (const chunk of stream) {
if (!chunk?.message) continue;
if (chunk.message.content) {
textResponse += chunk.message.content;
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
uuid: msgUUID,
content: chunk.message.content,
});
}
if (chunk.message.tool_calls?.length > 0) {
toolCalls = chunk.message.tool_calls;
eventHandler?.("reportStreamEvent", {
uuid: `${msgUUID}:tool_call_invocation`,
type: "toolCallInvocation",
content: `Tool Call: ${toolCalls[0].function.name}(${JSON.stringify(toolCalls[0].function.arguments)})`,
});
}
}
if (toolCalls && toolCalls.length > 0) {
const toolCall = toolCalls[0];
const args =
typeof toolCall.function.arguments === "string"
? safeJsonParse(toolCall.function.arguments, {})
: toolCall.function.arguments || {};
return {
textResponse,
functionCall: {
id: `ollama_${v4()}`,
name: toolCall.function.name,
arguments: args,
},
};
}
return { textResponse, functionCall: null };
}
// Fallback: UnTooled prompt-based approach via the native Ollama SDK
this.providerLog(
"OllamaProvider.complete - will process this chat completion."
"OllamaProvider.stream - will process this chat completion."
);
try {
let completion = { content: "" };
@ -281,9 +407,6 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
}
}
// The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
// from calling the exact same function over and over in a loop within a single chat exchange
// _but_ we should enable it to call previously used tools in a new chat interaction.
this.deduplicator.reset("runs");
return {
textResponse: completion.content,
@ -295,13 +418,54 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
}
/**
* Create a completion based on the received messages.
* Create a non-streaming completion with tool calling support.
* Uses native tool calling when supported, otherwise falls back to UnTooled.
*
* @param messages A list of messages to send to the API.
* @param functions
* @returns The completion.
*/
async complete(messages, functions = []) {
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (useNative) {
await OllamaAILLM.cacheContextWindows();
const formattedMessages = this.#formatMessagesForOllamaTools(messages);
const tools = formatFunctionsToTools(functions);
const response = await this.client.chat({
model: this.model,
messages: formattedMessages,
tools,
options: this.queryOptions,
});
if (response.message?.tool_calls?.length > 0) {
const toolCall = response.message.tool_calls[0];
const args =
typeof toolCall.function.arguments === "string"
? safeJsonParse(toolCall.function.arguments, {})
: toolCall.function.arguments || {};
return {
textResponse: null,
functionCall: {
id: `ollama_${v4()}`,
name: toolCall.function.name,
arguments: args,
},
cost: 0,
};
}
return {
textResponse: response.message?.content || null,
cost: 0,
};
}
// Fallback: UnTooled prompt-based approach via the native Ollama SDK
this.providerLog(
"OllamaProvider.complete - will process this chat completion."
);
@ -341,9 +505,6 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
completion.content = textResponse;
}
// The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
// from calling the exact same function over and over in a loop within a single chat exchange
// _but_ we should enable it to call previously used tools in a new chat interaction.
this.deduplicator.reset("runs");
return {
textResponse: completion.content,
@ -356,10 +517,9 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
/**
* Get the cost of the completion.
*
* Stubbed since Ollama has no cost basis.
* @param _usage The completion to get the cost for.
* @returns The cost of the completion.
* Stubbed since LMStudio has no cost basis.
*/
getCost(_usage) {
return 0;

View File

@ -2,9 +2,13 @@ const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
const { RetryError } = require("../error.js");
/**
* The agent provider for the OpenRouter provider.
* Supports true OpenAI-compatible tool calling when enabled via ENV,
* falling back to the UnTooled prompt-based approach otherwise.
* @extends {Provider}
* @extends {UnTooled}
*/
@ -27,6 +31,7 @@ class OpenRouterProvider extends InheritMultiple([Provider, UnTooled]) {
this._client = client;
this.model = model;
this.verbose = true;
this._supportsToolCalling = null;
}
get client() {
@ -37,6 +42,29 @@ class OpenRouterProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* - Since OpenRouter models vary in tool calling support, we check the ENV.
* - If the ENV is not set, we default to false.
* @returns {boolean}
*/
supportsNativeToolCalling() {
if (this._supportsToolCalling !== null) return this._supportsToolCalling;
const supportsToolCalling =
process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("openrouter");
if (supportsToolCalling)
this.providerLog(
"OpenRouter supports native tool calling is ENABLED via ENV."
);
else
this.providerLog(
"OpenRouter supports native tool calling is DISABLED via ENV. Will use UnTooled instead."
);
this._supportsToolCalling = supportsToolCalling;
return supportsToolCalling;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({
@ -65,31 +93,98 @@ class OpenRouterProvider extends InheritMultiple([Provider, UnTooled]) {
});
}
/**
* Stream a chat completion with tool calling support.
* Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
*/
async stream(messages, functions = [], eventHandler = null) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
);
}
this.providerLog(
"Provider.stream (tooled) - will process this chat completion."
);
try {
return await tooledStream(
this.client,
this.model,
messages,
functions,
eventHandler
);
} catch (error) {
console.error(error.message, error);
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Create a non-streaming completion with tool calling support.
* Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
*/
async complete(messages, functions = []) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
const useNative =
functions.length > 0 && (await this.supportsNativeToolCalling());
if (!useNative) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
}
try {
const result = await tooledComplete(
this.client,
this.model,
messages,
functions,
this.getCost.bind(this)
);
if (result.retryWithError) {
return this.complete([...messages, result.retryWithError], functions);
}
return result;
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Get the cost of the completion.
*
* @param _usage The completion to get the cost for.
* @returns The cost of the completion.
* Stubbed since OpenRouter has no cost basis.
*/
getCost(_usage) {
return 0;

View File

@ -31,6 +31,15 @@ class PerplexityProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -35,6 +35,15 @@ class PPIOProvider extends InheritMultiple([Provider, UnTooled]) {
return false;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -36,6 +36,15 @@ class PrivatemodelProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -35,6 +35,15 @@ class SambaNovaProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -30,6 +30,15 @@ class TextWebGenUiProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -31,6 +31,15 @@ class TogetherAIProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -31,6 +31,15 @@ class XAIProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -35,6 +35,15 @@ class ZAIProvider extends InheritMultiple([Provider, UnTooled]) {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* Override in subclass and return true to use native tool calling instead of UnTooled.
* @returns {boolean|Promise<boolean>}
*/
supportsNativeToolCalling() {
return false;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({

View File

@ -227,6 +227,8 @@ class EphemeralAgentHandler extends AgentHandler {
}
// Load flow plugin. This is marked by `@@flow_` in the array of functions to load.
// Replace the @@flow_ placeholder in the agent's function list with the actual
// tool name so the function lookup in reply() can find it.
if (name.startsWith("@@flow_")) {
const uuid = name.replace("@@flow_", "");
const plugin = AgentFlows.loadFlowPlugin(uuid, this.aibitat);
@ -237,6 +239,11 @@ class EphemeralAgentHandler extends AgentHandler {
continue;
}
this.aibitat.agents.get("@agent").functions = this.aibitat.agents
.get("@agent")
.functions.filter((f) => f !== name);
this.aibitat.agents.get("@agent").functions.push(plugin.name);
this.aibitat.use(plugin.plugin());
this.log(
`Attached flow ${plugin.name} (${plugin.flowName}) plugin to Agent cluster`

View File

@ -458,6 +458,8 @@ class AgentHandler {
}
// Load flow plugin. This is marked by `@@flow_` in the array of functions to load.
// Replace the @@flow_ placeholder in the agent's function list with the actual
// tool name so the function lookup in reply() can find it.
if (name.startsWith("@@flow_")) {
const uuid = name.replace("@@flow_", "");
const plugin = AgentFlows.loadFlowPlugin(uuid, this.aibitat);
@ -468,6 +470,11 @@ class AgentHandler {
continue;
}
this.aibitat.agents.get("@agent").functions = this.aibitat.agents
.get("@agent")
.functions.filter((f) => f !== name);
this.aibitat.agents.get("@agent").functions.push(plugin.name);
this.aibitat.use(plugin.plugin());
this.log(
`Attached flow ${plugin.name} (${plugin.flowName}) plugin to Agent cluster`

View File

@ -1303,6 +1303,9 @@ function dumpENV() {
// Allow disabling of streaming for AWS Bedrock
"AWS_BEDROCK_STREAMING_DISABLED",
// Allow native tool calling for specific providers.
"PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING",
];
// Simple sanitization of each value to prevent ENV injection via newline or quote escaping.