Add capability detection and streaming usage for Generic OpenAI provider (#5477)

- Add ENV-configurable model capabilities (tools, reasoning, vision, imageGeneration) via PROVIDER_SUPPORTS_* environment variables - Add optional stream usage reporting via GENERIC_OPEN_AI_REPORT_USAGE - Fix streaming tool calls for providers that send null tool_call.id (e.g., mlx-server) by generating fallback UUIDs - Refactor supportsNativeToolCalling() to use centralized capabilities API
2026-04-21 10:31:58 -06:00 · 2026-04-21 10:31:58 -06:00 · faf2dd998e
commit faf2dd998e
parent 649c94298c
4 changed files with 84 additions and 11 deletions
--- a/server/utils/AiProviders/genericOpenAi/index.js
+++ b/server/utils/AiProviders/genericOpenAi/index.js
@ -208,6 +208,22 @@ class GenericOpenAiLLM {
    return textResponse;
  }
  /**
   * Includes the usage in the response if the ENV flag is set
   * using the stream_options: { include_usage: true } option. This is available via ENV
   * because some providers will crash with invalid options.
   * @returns {Object}
   */
  #includeStreamOptionsUsage() {
    if (!("GENERIC_OPEN_AI_REPORT_USAGE" in process.env)) return {};
    if (process.env.GENERIC_OPEN_AI_REPORT_USAGE !== "true") return {};
    return {
      stream_options: {
        include_usage: true,
      },
    };
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    const result = await LLMPerformanceMonitor.measureAsyncFunction(
      this.openai.chat.completions
@ -256,6 +272,7 @@ class GenericOpenAiLLM {
        messages,
        temperature,
        max_tokens: this.maxTokens,
        ...this.#includeStreamOptionsUsage(),
      }),
      messages,
      runPromptTokenCalculation: true,
@ -404,6 +421,50 @@ class GenericOpenAiLLM {
    });
  }
  /**
   * Whether this provider supports native OpenAI-compatible tool calling.
   * - This can be any OpenAI compatible provider that supports tool calling
   * - We check the ENV to see if the provider supports tool calling.
   * - If the ENV is not set, we default to false.
   * @returns {boolean}
   */
  #supportsCapabilityFromENV(capability = "") {
    const CapabilityEnvMap = {
      tools: "PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING",
      reasoning: "PROVIDER_SUPPORTS_REASONING",
      imageGeneration: "PROVIDER_SUPPORTS_IMAGE_GENERATION",
      vision: "PROVIDER_SUPPORTS_VISION",
    };
    const envKey = CapabilityEnvMap[capability];
    if (!envKey) return false;
    if (!(envKey in process.env)) return false;
    return process.env[envKey]?.includes("generic-openai") || false;
  }
  /**
   * Returns the capabilities of the model.
   * @returns {{tools: 'unknown' | boolean, reasoning: 'unknown' | boolean, imageGeneration: 'unknown' | boolean, vision: 'unknown' | boolean}}
   */
  getModelCapabilities() {
    try {
      return {
        tools: this.#supportsCapabilityFromENV("tools"),
        reasoning: this.#supportsCapabilityFromENV("reasoning"),
        imageGeneration: this.#supportsCapabilityFromENV("imageGeneration"),
        vision: this.#supportsCapabilityFromENV("vision"),
      };
    } catch (error) {
      console.error("Error getting model capabilities:", error);
      return {
        tools: "unknown",
        reasoning: "unknown",
        imageGeneration: "unknown",
        vision: "unknown",
      };
    }
  }
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
--- a/server/utils/agents/aibitat/providers/genericOpenAi.js
+++ b/server/utils/agents/aibitat/providers/genericOpenAi.js
@ -58,18 +58,20 @@ class GenericOpenAiProvider extends InheritMultiple([Provider, UnTooled]) {
   */
  supportsNativeToolCalling() {
    if (this._supportsToolCalling !== null) return this._supportsToolCalling;
-    const supportsToolCalling =
+    const genericOpenAi = new GenericOpenAiLLM(null, this.model);
-      this.supportsNativeToolCallingViaEnv("generic-openai");
+    const capabilities = genericOpenAi.getModelCapabilities();
-    if (supportsToolCalling)
+    this._supportsToolCalling = capabilities.tools === true;
    if (this._supportsToolCalling)
      this.providerLog(
-        "Generic OpenAI supports native tool calling is ENABLED via ENV."
+        "Generic OpenAI supports native tool calling is ENABLED."
      );
    else
      this.providerLog(
-        "Generic OpenAI supports native tool calling is DISABLED via ENV. Will use UnTooled instead."
+        "Generic OpenAI supports native tool calling is DISABLED. Will use UnTooled instead."
      );
-    this._supportsToolCalling = supportsToolCalling;
+
-    return supportsToolCalling;
+    return this._supportsToolCalling;
  }
  async #handleFunctionCallChat({ messages = [] }) {
--- a/server/utils/agents/aibitat/providers/helpers/tooled.js
+++ b/server/utils/agents/aibitat/providers/helpers/tooled.js
@ -227,13 +227,19 @@ async function tooledStream(
      for (const toolCall of choice.delta.tool_calls) {
        const idx = toolCall.index ?? 0;
-        if (toolCall.id) {
+        // Initialize tool call entry if it doesn't exist yet.
        // Some providers (e.g. mlx-server) send id as null, so we generate one.
        if (!toolCallsByIndex[idx]) {
          toolCallsByIndex[idx] = {
-            id: toolCall.id,
+            id: toolCall.id || `call_${v4()}`,
            name: toolCall.function?.name || "",
            arguments: toolCall.function?.arguments || "",
          };
-        } else if (toolCallsByIndex[idx]) {
+        } else {
          // Update existing entry with streamed data
          if (toolCall.id && !toolCallsByIndex[idx].id.startsWith("call_")) {
            toolCallsByIndex[idx].id = toolCall.id;
          }
          if (toolCall.function?.name) {
            toolCallsByIndex[idx].name += toolCall.function.name;
          }
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -1334,8 +1334,12 @@ function dumpENV() {
    // Allow disabling of streaming for AWS Bedrock
    "AWS_BEDROCK_STREAMING_DISABLED",
-    // Allow native tool calling for specific providers.
+    // Allow capabilities for specific providers.
    "PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING",
    "PROVIDER_SUPPORTS_REASONING",
    "PROVIDER_SUPPORTS_IMAGE_GENERATION",
    "PROVIDER_SUPPORTS_VISION",
    "GENERIC_OPEN_AI_REPORT_USAGE",
    // Allow auto-approval of skills
    "AGENT_AUTO_APPROVED_SKILLS",