merlyn/server/utils/agents/aibitat/providers/helpers/tooled.js
Timothy Carambat faf2dd998e
Add capability detection and streaming usage for Generic OpenAI provider (#5477)
- Add ENV-configurable model capabilities (tools, reasoning, vision,
  imageGeneration) via PROVIDER_SUPPORTS_* environment variables
- Add optional stream usage reporting via GENERIC_OPEN_AI_REPORT_USAGE
- Fix streaming tool calls for providers that send null tool_call.id
  (e.g., mlx-server) by generating fallback UUIDs
- Refactor supportsNativeToolCalling() to use centralized capabilities API
2026-04-21 09:31:58 -07:00

385 lines
12 KiB
JavaScript

const { v4 } = require("uuid");
const { safeJsonParse } = require("../../../../http");
/**
* Shared native OpenAI-compatible tool calling utilities.
* Any provider with an OpenAI-compatible client can use these functions
* instead of the UnTooled prompt-based approach when the model supports
* native tool calling.
*
* Usage in a provider:
* const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
*
* async stream(messages, functions, eventHandler) {
* if (functions.length > 0 && await this.supportsNativeToolCalling()) {
* return tooledStream(this.client, this.model, messages, functions, eventHandler);
* }
* // ... fallback to UnTooled ...
* }
*/
/**
* Convert aibitat function definitions to the OpenAI tools format.
* @param {Array<{name: string, description: string, parameters: object}>} functions
* @returns {Array<{type: "function", function: {name: string, description: string, parameters: object}}>}
*/
function formatFunctionsToTools(functions) {
if (!Array.isArray(functions) || functions.length === 0) return [];
return functions.map((func) => ({
type: "function",
function: {
name: func.name,
description: func.description,
parameters: func.parameters,
},
}));
}
/**
* Format message content with attachments (images) for multimodal support.
* Transforms a message with attachments into the OpenAI-compatible format.
* @param {Object} message - The message to format
* @returns {Object} Message with content formatted for the API
*/
function formatMessageWithAttachments(message) {
if (!message.attachments || message.attachments.length === 0) {
return message;
}
// Transform message with attachments into multimodal format
const content = [{ type: "text", text: message.content }];
for (const attachment of message.attachments) {
content.push({
type: "image_url",
image_url: {
url: attachment.contentString,
},
});
}
// Return message without attachments property, with content as array
const { attachments: _, ...rest } = message;
return {
...rest,
content,
};
}
/**
* Convert the aibitat message history (which uses role:"function" with
* `originalFunctionCall` metadata) into the OpenAI tool-calling message
* format (assistant `tool_calls` + role:"tool" pairs).
* Also handles image attachments for multimodal support.
* @param {Array} messages
* @param {{injectReasoningContent?: boolean}} options
* - injectReasoningContent: when true, ensures every assistant message has
* a `reasoning_content` field (required by DeepSeek thinking-mode models).
* @returns {Array} Messages formatted for the OpenAI tools API
*/
function formatMessagesForTools(messages, options = {}) {
const formattedMessages = [];
const { injectReasoningContent = false } = options;
for (const message of messages) {
if (message.role === "function") {
if (message.originalFunctionCall?.id) {
const prevMsg = formattedMessages[formattedMessages.length - 1];
if (!prevMsg || prevMsg.role !== "assistant" || !prevMsg.tool_calls) {
formattedMessages.push({
role: "assistant",
content: null,
...(injectReasoningContent ? { reasoning_content: "" } : {}),
tool_calls: [
{
id: message.originalFunctionCall.id,
type: "function",
function: {
name: message.originalFunctionCall.name,
arguments:
typeof message.originalFunctionCall.arguments === "string"
? message.originalFunctionCall.arguments
: JSON.stringify(message.originalFunctionCall.arguments),
},
},
],
});
}
formattedMessages.push({
role: "tool",
tool_call_id: message.originalFunctionCall.id,
content:
typeof message.content === "string"
? message.content
: JSON.stringify(message.content),
});
} else {
const toolCallId = `call_${v4()}`;
formattedMessages.push({
role: "assistant",
content: null,
...(injectReasoningContent ? { reasoning_content: "" } : {}),
tool_calls: [
{
id: toolCallId,
type: "function",
function: {
name: message.name,
arguments: "{}",
},
},
],
});
formattedMessages.push({
role: "tool",
tool_call_id: toolCallId,
content:
typeof message.content === "string"
? message.content
: JSON.stringify(message.content),
});
}
} else if (
injectReasoningContent &&
message.role === "assistant" &&
!("reasoning_content" in message)
) {
formattedMessages.push(
formatMessageWithAttachments({ ...message, reasoning_content: "" })
);
} else {
formattedMessages.push(formatMessageWithAttachments(message));
}
}
return formattedMessages;
}
/**
* Stream a chat completion using native OpenAI-compatible tool calling.
* Handles parallel tool calls by tracking each tool call by its streaming
* index, then returning only the first one for the agent framework to process.
*
* @param {import("openai").OpenAI} client - OpenAI-compatible client
* @param {string} model - Model identifier
* @param {Array} messages - Raw aibitat message history
* @param {Array} functions - Aibitat function definitions
* @param {function|null} eventHandler - Stream event handler
* @param {{injectReasoningContent?: boolean, provider?: object}} options - Provider-specific options
* - provider: If passed, automatically handles usage tracking via provider.resetUsage()/recordUsage()
* @returns {Promise<{textResponse: string, functionCall: object|null, uuid: string, usage: object|null}>}
*/
async function tooledStream(
client,
model,
messages,
functions = [],
eventHandler = null,
options = {}
) {
const { provider, ...formatOptions } = options;
// Auto-reset usage if provider is passed
if (provider?.resetUsage) {
try {
provider.resetUsage();
} catch {}
}
const msgUUID = v4();
const formattedMessages = formatMessagesForTools(messages, formatOptions);
const tools = formatFunctionsToTools(functions);
const stream = await client.chat.completions.create({
model,
stream: true,
stream_options: { include_usage: true },
messages: formattedMessages,
...(tools.length > 0 ? { tools } : {}),
});
const result = {
functionCall: null,
textResponse: "",
};
const toolCallsByIndex = {};
let usage = null;
for await (const chunk of stream) {
// Capture usage from final chunk (some providers send usage after finish_reason)
if (chunk?.usage) {
usage = chunk.usage;
}
if (!chunk?.choices?.[0]) continue;
const choice = chunk.choices[0];
if (choice.delta?.content) {
result.textResponse += choice.delta.content;
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
uuid: msgUUID,
content: choice.delta.content,
});
}
if (choice.delta?.tool_calls) {
for (const toolCall of choice.delta.tool_calls) {
const idx = toolCall.index ?? 0;
// Initialize tool call entry if it doesn't exist yet.
// Some providers (e.g. mlx-server) send id as null, so we generate one.
if (!toolCallsByIndex[idx]) {
toolCallsByIndex[idx] = {
id: toolCall.id || `call_${v4()}`,
name: toolCall.function?.name || "",
arguments: toolCall.function?.arguments || "",
};
} else {
// Update existing entry with streamed data
if (toolCall.id && !toolCallsByIndex[idx].id.startsWith("call_")) {
toolCallsByIndex[idx].id = toolCall.id;
}
if (toolCall.function?.name) {
toolCallsByIndex[idx].name += toolCall.function.name;
}
if (toolCall.function?.arguments) {
toolCallsByIndex[idx].arguments += toolCall.function.arguments;
}
}
if (toolCallsByIndex[idx]) {
eventHandler?.("reportStreamEvent", {
uuid: `${msgUUID}:tool_call_invocation`,
type: "toolCallInvocation",
content: `Assembling Tool Call: ${toolCallsByIndex[idx].name}(${toolCallsByIndex[idx].arguments})`,
});
}
}
}
}
// Auto-record usage if provider is passed and usage is available
if (provider?.recordUsage && usage) {
try {
provider.recordUsage(usage);
} catch {}
}
const toolCallIndices = Object.keys(toolCallsByIndex).map(Number);
if (toolCallIndices.length > 0) {
const firstToolCall = toolCallsByIndex[Math.min(...toolCallIndices)];
result.functionCall = {
id: firstToolCall.id,
name: firstToolCall.name,
arguments: safeJsonParse(firstToolCall.arguments, {}),
};
}
return {
textResponse: result.textResponse,
functionCall: result.functionCall,
uuid: msgUUID,
usage,
};
}
/**
* Non-streaming chat completion using native OpenAI-compatible tool calling.
* Returns the first tool call if the model requests any, otherwise the text response.
*
* @param {import("openai").OpenAI} client - OpenAI-compatible client
* @param {string} model - Model identifier
* @param {Array} messages - Raw aibitat message history
* @param {Array} functions - Aibitat function definitions
* @param {function} getCostFn - Provider's getCost function
* @param {{injectReasoningContent?: boolean, provider?: object}} options - Provider-specific options
* - provider: If passed, automatically handles usage tracking via provider.resetUsage()/recordUsage()
* @returns {Promise<{textResponse: string|null, functionCall: object|null, cost: number, usage: object|null}>}
*/
async function tooledComplete(
client,
model,
messages,
functions = [],
getCostFn = () => 0,
options = {}
) {
const { provider, ...formatOptions } = options;
// Auto-reset usage if provider is passed
if (provider?.resetUsage) {
try {
provider.resetUsage();
} catch {}
}
const formattedMessages = formatMessagesForTools(messages, formatOptions);
const tools = formatFunctionsToTools(functions);
const response = await client.chat.completions.create({
model,
stream: false,
messages: formattedMessages,
...(tools.length > 0 ? { tools } : {}),
});
const completion = response.choices[0].message;
const cost = getCostFn(response.usage);
const usage = response.usage || null;
// Auto-record usage if provider is passed and usage is available
if (provider?.recordUsage && usage) {
try {
provider.recordUsage(usage);
} catch {}
}
if (completion.tool_calls && completion.tool_calls.length > 0) {
const toolCall = completion.tool_calls[0];
const functionArgs = safeJsonParse(toolCall.function.arguments, null);
if (functionArgs === null) {
return {
textResponse: null,
retryWithError: {
role: "function",
name: toolCall.function.name,
content: `Failed to parse tool call arguments as JSON. Raw arguments: ${toolCall.function.arguments}`,
originalFunctionCall: {
id: toolCall.id,
name: toolCall.function.name,
arguments: toolCall.function.arguments,
},
},
cost,
usage,
};
}
return {
textResponse: null,
functionCall: {
id: toolCall.id,
name: toolCall.function.name,
arguments: functionArgs,
},
cost,
usage,
};
}
return {
textResponse: completion.content,
cost,
usage,
};
}
module.exports = {
formatFunctionsToTools,
formatMessagesForTools,
tooledStream,
tooledComplete,
};