merlyn/server/utils/agents/aibitat/providers/openai.js
Timothy Carambat f395083978
Automatic mode for workspace (Agent mode default) (#5143)
* Add automatic chat mode with native tool calling support

    Introduces a new automatic chat mode (now the default) that automatically invokes tools when the provider supports native tool calling. Conditionally shows/hides the @agent command based on whether native tooling is available.

    - Add supportsNativeToolCalling() to AI providers (OpenAI, Anthropic, Azure always support; others opt-in via ENV)
    - Update all locale translations with new mode descriptions
    - Enhance translator to preserve Trans component tags
    - Remove deprecated ability tags UI

* rebase translations

* WIP on image attachments. Supports initial image attachment + subsequent attachments

* persist images

* Image attachments and updates for providers

* desktop pre-change

* always show command on failure

* add back gemini streaming detection

* move provider native tooling flag to Provider func

* whoops - forgot to delete

* strip "@agent" from prompts to prevent weird replies

* translations for automatic-mode (#5145)

* translations for automatic-mode

* rebase

* translations

* lint

* fix dead translations

* change default for now to chat mode just for rollout

* remove pfp for workspace

* passthrough workspace for showAgentCommand detection and rendering

* Agent API automatic mode support

* ephemeral attachments passthrough

* support reading of pinned documents in agent context
2026-03-18 12:35:43 -07:00

346 lines
10 KiB
JavaScript

const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const { RetryError } = require("../error.js");
const { v4 } = require("uuid");
const { safeJsonParse } = require("../../../http");
/**
* The agent provider for the OpenAI API.
* By default, the model is set to 'gpt-3.5-turbo'.
*/
class OpenAIProvider extends Provider {
model;
constructor(config = {}) {
const {
options = {
apiKey: process.env.OPEN_AI_KEY,
maxRetries: 3,
},
model = "gpt-4o",
} = config;
const client = new OpenAI(options);
super(client);
this.model = model;
}
get supportsAgentStreaming() {
return true;
}
/**
* Whether this provider supports native OpenAI-compatible tool calling.
* - OpenAI always supports tool calling.
* @returns {Promise<boolean>}
*/
supportsNativeToolCalling() {
return true;
}
/**
* Format the messages to the OpenAI API Responses format.
* - If the message is our internal `function` type, then we need to map it to a function call + output format
* - Otherwise, map it to the input text format for user, system, and assistant messages
* - Handles attachments (images) for multimodal support
*
* @param {any[]} messages - The messages to format.
* @returns {OpenAI.OpenAI.Responses.ResponseInput[]} The formatted messages.
*/
#formatToResponsesInput(messages) {
let formattedMessages = [];
messages.forEach((message) => {
if (message.role === "function") {
// If the message does not have an originalFunctionCall we cannot
// map it to a function call id and OpenAI will throw an error.
// so if this does not carry over - log and skip
if (!message.hasOwnProperty("originalFunctionCall")) {
this.providerLog(
"[OpenAI.#formatToResponsesInput]: message did not pass back the originalFunctionCall. We need this to map the function call to the correct id.",
{ message: JSON.stringify(message, null, 2) }
);
return;
}
formattedMessages.push(
{
type: "function_call",
name: message.originalFunctionCall.name,
call_id: message.originalFunctionCall.id,
arguments: JSON.stringify(message.originalFunctionCall.arguments),
},
{
type: "function_call_output",
call_id: message.originalFunctionCall.id,
output: message.content,
}
);
return;
}
// Build content array with text and optional image attachments
const content = [
{
type: message.role === "assistant" ? "output_text" : "input_text",
text: message.content,
},
];
// Add image attachments if present (for multimodal/vision support)
if (message.attachments && message.attachments.length > 0) {
for (const attachment of message.attachments) {
content.push({
type: "input_image",
image_url: attachment.contentString,
});
}
}
formattedMessages.push({
role: message.role,
content,
});
});
return formattedMessages;
}
/**
* Format the functions to the OpenAI API Responses format.
*
* @param {any[]} functions - The functions to format.
* @returns {{
* type: "function",
* name: string,
* description: string,
* parameters: object,
* strict: boolean,
* }[]} The formatted functions.
*/
#formatFunctions(functions) {
return functions.map((func) => ({
type: "function",
name: func.name,
description: func.description,
parameters: func.parameters,
strict: false,
}));
}
/**
* Stream a chat completion from the LLM with tool calling
* Note: This using the OpenAI API Responses SDK and its implementation is specific to OpenAI models.
* Do not re-use this code for providers that do not EXACTLY implement the OpenAI API Responses SDK.
*
* @param {any[]} messages - The messages to send to the LLM.
* @param {any[]} functions - The functions to use in the LLM.
* @param {function} eventHandler - The event handler to use to report stream events.
* @returns {Promise<{ functionCall: any, textResponse: string }>} - The result of the chat completion.
*/
async stream(messages, functions = [], eventHandler = null) {
this.providerLog("OpenAI.stream - will process this chat completion.");
this.resetUsage();
try {
const msgUUID = v4();
/** @type {OpenAI.OpenAI.Responses.Response} */
const response = await this.client.responses.create({
model: this.model,
input: this.#formatToResponsesInput(messages),
stream: true,
store: false,
parallel_tool_calls: false,
...(Array.isArray(functions) && functions?.length > 0
? { tools: this.#formatFunctions(functions) }
: {}),
});
const completion = {
content: "",
/** @type {null|{name: string, call_id: string, arguments: string|object}} */
functionCall: null,
};
for await (const streamEvent of response) {
/** @type {OpenAI.OpenAI.Responses.ResponseStreamEvent} */
const chunk = streamEvent;
if (chunk.type === "response.output_text.delta") {
completion.content += chunk.delta;
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
uuid: msgUUID,
content: chunk.delta,
});
continue;
}
if (
chunk.type === "response.output_item.added" &&
chunk.item.type === "function_call"
) {
completion.functionCall = {
name: chunk.item.name,
call_id: chunk.item.call_id,
arguments: chunk.item.arguments,
};
eventHandler?.("reportStreamEvent", {
type: "toolCallInvocation",
uuid: `${msgUUID}:tool_call_invocation`,
content: `Assembling Tool Call: ${completion.functionCall.name}(${completion.functionCall.arguments})`,
});
continue;
}
if (chunk.type === "response.function_call_arguments.delta") {
completion.functionCall.arguments += chunk.delta;
eventHandler?.("reportStreamEvent", {
type: "toolCallInvocation",
uuid: `${msgUUID}:tool_call_invocation`,
content: `Assembling Tool Call: ${completion.functionCall.name}(${completion.functionCall.arguments})`,
});
continue;
}
if (chunk.type === "response.completed") {
const completedResponse = chunk.response;
if (!completedResponse?.usage) continue;
this.recordUsage(completedResponse.usage);
continue;
}
}
if (completion.functionCall) {
completion.functionCall.arguments = safeJsonParse(
completion.functionCall.arguments,
{}
);
return {
textResponse: completion.content,
functionCall: {
id: completion.functionCall.call_id,
name: completion.functionCall.name,
arguments: completion.functionCall.arguments,
},
cost: this.getCost(),
uuid: msgUUID,
};
}
return {
textResponse: completion.content,
functionCall: null,
cost: this.getCost(),
uuid: msgUUID,
};
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Create a completion based on the received messages.
*
* @param messages A list of messages to send to the OpenAI API.
* @param functions
* @returns The completion.
*/
async complete(messages, functions = []) {
this.providerLog("OpenAI.complete - will process this chat completion.");
this.resetUsage();
try {
const completion = {
content: "",
functionCall: null,
};
/** @type {OpenAI.OpenAI.Responses.Response} */
const response = await this.client.responses.create({
model: this.model,
stream: false,
store: false,
parallel_tool_calls: false,
input: this.#formatToResponsesInput(messages),
...(Array.isArray(functions) && functions?.length > 0
? { tools: this.#formatFunctions(functions) }
: {}),
});
if (response.usage) this.recordUsage(response.usage);
for (const outputBlock of response.output) {
if (outputBlock.type === "message") {
if (outputBlock.content[0]?.type === "output_text") {
completion.content = outputBlock.content[0].text;
}
}
if (outputBlock.type === "function_call") {
completion.functionCall = {
name: outputBlock.name,
call_id: outputBlock.call_id,
arguments: outputBlock.arguments,
};
}
}
if (completion.functionCall) {
completion.functionCall.arguments = safeJsonParse(
completion.functionCall.arguments,
{}
);
return {
textResponse: completion.content,
functionCall: {
id: completion.functionCall.call_id,
name: completion.functionCall.name,
arguments: completion.functionCall.arguments,
},
cost: this.getCost(),
usage: this.getUsage(),
};
}
return {
textResponse: completion.content,
functionCall: null,
cost: this.getCost(),
usage: this.getUsage(),
};
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) throw error;
if (
error instanceof OpenAI.RateLimitError ||
error instanceof OpenAI.InternalServerError ||
error instanceof OpenAI.APIError
) {
throw new RetryError(error.message);
}
throw error;
}
}
/**
* Get the cost of the completion.
* @returns {number} The cost of the completion (currently returns 0).
*/
getCost() {
return 0;
}
}
module.exports = OpenAIProvider;