Native Tool calling (#5071)

* checkpoint * test MCP and flows * add native tool call detection back to LMStudio * add native tool call loops for Ollama * Add ablity detection to DMR (regex parse) * bedrock and generic openai with ENV flag * deepseek native tool calling * localAI native function * groq support * linting, add litellm and OR native tool calling via flag
2026-02-26 13:37:56 -08:00 · 2026-02-26 13:37:56 -08:00 · ac0b1d401d
commit ac0b1d401d
parent 55dc0da488
42 changed files with 2002 additions and 409 deletions
--- a/docker/.env.example
+++ b/docker/.env.example
@ -420,4 +420,9 @@ GID='1000'
 # Disable MCP cooldown timer for agent calls
 # this can lead to infinite recursive calls of the same function
 # for some model/provider combinations
-# MCP_NO_COOLDOWN="true
+# MCP_NO_COOLDOWN="true
+
+# Allow native tool calling for specific providers.
+# This can VASTLY improve performance and speed of agent calls.
+# Check code for supported providers who can be enabled here via this flag
+# PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING="generic-openai,bedrock,localai,groq,litellm,openrouter"
--- a/server/.env.example
+++ b/server/.env.example
@ -423,4 +423,9 @@ TTS_PROVIDER="native"
 # Disable MCP cooldown timer for agent calls
 # this can lead to infinite recursive calls of the same function
 # for some model/provider combinations
-# MCP_NO_COOLDOWN="true
+# MCP_NO_COOLDOWN="true
+
+# Allow native tool calling for specific providers.
+# This can VASTLY improve performance and speed of agent calls.
+# Check code for supported providers who can be enabled here via this flag
+# PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING="generic-openai,bedrock,localai,groq,litellm,openrouter"
--- a/server/utils/AiProviders/dockerModelRunner/index.js
+++ b/server/utils/AiProviders/dockerModelRunner/index.js
@ -205,6 +205,47 @@ class DockerModelRunnerLLM {
    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

+  /**
+   * Returns the capabilities of the model.
+   * Note: This is a heuristic approach to get the capabilities of the model based on the model metadata.
+   * It is not perfect, but works since every model metadata is different and may not have key values we rely on.
+   * There is no "capabilities" key in the metadata via any API endpoint - so we do this.
+   * @returns {Promise<{tools: 'unknown' | boolean, reasoning: 'unknown' | boolean, imageGeneration: 'unknown' | boolean, vision: 'unknown' | boolean}>}
+   */
+  async getModelCapabilities() {
+    try {
+      const endpoint = new URL(
+        parseDockerModelRunnerEndpoint(
+          process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
+          "dmr"
+        )
+      );
+      // eg: /models/ai/qwen3:4B-UD-Q4_K_XL
+      endpoint.pathname = `/models/${this.model}`;
+      const response = await fetch(endpoint.toString());
+      const data = await response.text();
+
+      const tools = /tools|tool|tool_use|tool_call/.test(data);
+      const reasoning = /thinking|reason|reasoning|think/.test(data);
+      const imageGeneration = /diffusion/.test(data);
+      const vision = /vision|vllm|image/.test(data);
+      return {
+        tools: tools,
+        reasoning: reasoning,
+        imageGeneration: imageGeneration,
+        vision: vision,
+      };
+    } catch (error) {
+      console.error("Error getting model capabilities:", error);
+      return {
+        tools: "unknown",
+        reasoning: "unknown",
+        imageGeneration: "unknown",
+        vision: "unknown",
+      };
+    }
+  }
+
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
@ -232,6 +273,7 @@ function parseDockerModelRunnerEndpoint(basePath = null, to = "openai") {
  try {
    const url = new URL(basePath);
    if (to === "openai") url.pathname = "engines/v1";
+    else if (to === "ollama") url.pathname = "api";
    else if (to === "dmr") url.pathname = "";
    return url.toString();
  } catch (e) {
--- a/server/utils/AiProviders/lmStudio/index.js
+++ b/server/utils/AiProviders/lmStudio/index.js
@ -272,6 +272,60 @@ class LMStudioLLM {
    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

+  /**
+   * Returns the capabilities of the model.
+   * This uses the new /api/v1 endpoint, which returns the model info in a different format.
+   * @returns {Promise<{tools: 'unknown' | boolean, reasoning: 'unknown' | boolean, imageGeneration: 'unknown' | boolean, vision: 'unknown' | boolean}>}
+   */
+  async getModelCapabilities() {
+    try {
+      const endpoint = new URL(
+        parseLMStudioBasePath(process.env.LMSTUDIO_BASE_PATH, "v1")
+      );
+      const apiKey = process.env.LMSTUDIO_AUTH_TOKEN ?? null;
+      endpoint.pathname += "/models";
+      const modelInfo =
+        (await fetch(endpoint.toString(), {
+          headers: {
+            "Content-Type": "application/json",
+            ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
+          },
+        })
+          .then((res) => {
+            if (!res.ok)
+              throw new Error(
+                `LMStudio:getModelCapabilities - ${res.statusText}`
+              );
+            return res.json();
+          })
+          .then(({ models = [] }) =>
+            models.find((model) => model.key === this.model)
+          )) || {};
+
+      const capabilities = modelInfo.hasOwnProperty("capabilities")
+        ? modelInfo.capabilities
+        : {
+            trained_for_tool_use: "unknown",
+            vision: "unknown",
+          };
+
+      return {
+        tools: capabilities.trained_for_tool_use,
+        reasoning: "unknown",
+        imageGeneration: "unknown", // LM Studio does not support image generation yet.
+        vision: capabilities.vision,
+      };
+    } catch (error) {
+      console.error("Error getting model capabilities:", error);
+      return {
+        tools: "unknown",
+        reasoning: "unknown",
+        imageGeneration: "unknown",
+        vision: "unknown",
+      };
+    }
+  }
+
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
@ -292,12 +346,15 @@ class LMStudioLLM {
 * Parse the base path for the LMStudio API. Since the base path must end in /v1 and cannot have a trailing slash,
 * and the user can possibly set it to anything and likely incorrectly due to pasting behaviors, we need to ensure it is in the correct format.
 * @param {string} basePath
+ * @param {'legacy' | 'v1'} apiVersion
 * @returns {string}
 */
-function parseLMStudioBasePath(providedBasePath = "") {
+function parseLMStudioBasePath(providedBasePath = "", apiVersion = "legacy") {
  try {
    const baseURL = new URL(providedBasePath);
-    const basePath = `${baseURL.origin}/v1`;
+    let basePath = `${baseURL.origin}`;
+    if (apiVersion === "legacy") basePath += `/v1`;
+    if (apiVersion === "v1") basePath += `/api/v1`;
    return basePath;
  } catch (e) {
    return providedBasePath;
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@ -468,6 +468,32 @@ class OllamaAILLM {
    });
  }

+  /**
+   * Returns the capabilities of the model.
+   * @returns {Promise<{tools: 'unknown' | boolean, reasoning: 'unknown' | boolean, imageGeneration: 'unknown' | boolean, vision: 'unknown' | boolean}>}
+   */
+  async getModelCapabilities() {
+    try {
+      const { capabilities = [] } = await this.client.show({
+        model: this.model,
+      });
+      return {
+        tools: capabilities.includes("tools") ? true : false,
+        reasoning: capabilities.includes("thinking") ? true : false,
+        imageGeneration: false, // we dont have any image generation capabilities for Ollama or anywhere right now.
+        vision: capabilities.includes("vision") ? true : false,
+      };
+    } catch (error) {
+      console.error("Error getting model capabilities:", error);
+      return {
+        tools: "unknown",
+        reasoning: "unknown",
+        imageGeneration: "unknown",
+        vision: "unknown",
+      };
+    }
+  }
+
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
--- a/server/utils/agentFlows/index.js
+++ b/server/utils/agentFlows/index.js
@ -185,6 +185,24 @@ class AgentFlows {
      .map(([uuid]) => `@@flow_${uuid}`);
  }

+  /**
+   * Sanitize a flow name into a valid OpenAI-compatible tool name.
+   * Must match ^[a-zA-Z0-9_-]{1,64}$
+   * @param {string} flowName - The human-readable flow name
+   * @returns {string|null} Sanitized tool name, or null if empty after sanitization
+   */
+  static sanitizeToolName(flowName) {
+    const sanitized = flowName
+      .toLowerCase()
+      .trim()
+      .replace(/\s+/g, "_")
+      .replace(/[^a-z0-9_-]/g, "")
+      .replace(/_+/g, "_")
+      .replace(/^[-_]+|[-_]+$/g, "");
+    if (!sanitized) return null;
+    return sanitized.slice(0, 64);
+  }
+
  /**
   * Load a flow plugin by its UUID
   * @param {string} uuid - The UUID of the flow to load
@ -196,17 +214,18 @@ class AgentFlows {

    const startBlock = flow.config.steps?.find((s) => s.type === "start");
    const variables = startBlock?.config?.variables || [];
+    const toolName = AgentFlows.sanitizeToolName(flow.name) || `flow_${uuid}`;

    return {
-      name: `flow_${uuid}`,
+      name: toolName,
      description: `Execute agent flow: ${flow.name}`,
      plugin: (_runtimeArgs = {}) => ({
-        name: `flow_${uuid}`,
+        name: toolName,
        description:
          flow.config.description || `Execute agent flow: ${flow.name}`,
        setup: (aibitat) => {
          aibitat.function({
-            name: `flow_${uuid}`,
+            name: toolName,
            description:
              flow.config.description || `Execute agent flow: ${flow.name}`,
            parameters: {
--- a/server/utils/agents/aibitat/index.js
+++ b/server/utils/agents/aibitat/index.js
@ -41,6 +41,7 @@ class AIbitat {
      chats = [],
      interrupt = "NEVER",
      maxRounds = 100,
+      maxToolCalls = 10,
      provider = "openai",
      handlerProps = {}, // Inherited props we can spread so aibitat can access.
      ...rest
@ -48,6 +49,7 @@ class AIbitat {
    this._chats = chats;
    this.defaultInterrupt = interrupt;
    this.maxRounds = maxRounds;
+    this.maxToolCalls = maxToolCalls;
    this.handlerProps = handlerProps;

    this.defaultProvider = {
@ -641,7 +643,8 @@ ${this.getHistory({ to: route.to })
    provider,
    messages = [],
    functions = [],
-    byAgent = null
+    byAgent = null,
+    depth = 0
  ) {
    const eventHandler = (type, data) => {
      this?.socket?.send(type, data);
@ -655,6 +658,24 @@ ${this.getHistory({ to: route.to })
    );

    if (completionStream.functionCall) {
+      if (depth >= this.maxToolCalls) {
+        this.handlerProps?.log?.(
+          `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Stopping tool execution.`
+        );
+        this?.introspect?.(
+          `Maximum tool call limit (${this.maxToolCalls}) reached. Returning what I have so far.`
+        );
+        const bailoutMessage =
+          completionStream.textResponse ||
+          "I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run.";
+        eventHandler?.("reportStreamEvent", {
+          type: "fullTextResponse",
+          uuid: v4(),
+          content: bailoutMessage,
+        });
+        return bailoutMessage;
+      }
+
      const { name, arguments: args } = completionStream.functionCall;
      const fn = this.functions.get(name);

@ -673,7 +694,8 @@ ${this.getHistory({ to: route.to })
            },
          ],
          functions,
-          byAgent
+          byAgent,
+          depth + 1
        );
      }

@ -730,7 +752,8 @@ ${this.getHistory({ to: route.to })
          },
        ],
        functions,
-        byAgent
+        byAgent,
+        depth + 1
      );
    }

@ -752,12 +775,26 @@ ${this.getHistory({ to: route.to })
    provider,
    messages = [],
    functions = [],
-    byAgent = null
+    byAgent = null,
+    depth = 0
  ) {
    // get the chat completion
    const completion = await provider.complete(messages, functions);

    if (completion.functionCall) {
+      if (depth >= this.maxToolCalls) {
+        this.handlerProps?.log?.(
+          `[warning]: Maximum tool call limit (${this.maxToolCalls}) reached. Stopping tool execution.`
+        );
+        this?.introspect?.(
+          `Maximum tool call limit (${this.maxToolCalls}) reached. Returning what I have so far.`
+        );
+        return (
+          completion.textResponse ||
+          "I reached the maximum number of tool calls allowed for a single response. Here is what I have so far based on the tools I was able to run."
+        );
+      }
+
      const { name, arguments: args } = completion.functionCall;
      const fn = this.functions.get(name);

@ -776,7 +813,8 @@ ${this.getHistory({ to: route.to })
            },
          ],
          functions,
-          byAgent
+          byAgent,
+          depth + 1
        );
      }

@ -824,7 +862,8 @@ ${this.getHistory({ to: route.to })
          },
        ],
        functions,
-        byAgent
+        byAgent,
+        depth + 1
      );
    }

--- a/server/utils/agents/aibitat/providers/apipie.js
+++ b/server/utils/agents/aibitat/providers/apipie.js
@ -31,6 +31,15 @@ class ApiPieProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/azure.js
+++ b/server/utils/agents/aibitat/providers/azure.js
@ -1,14 +1,12 @@
 const { OpenAI } = require("openai");
 const { AzureOpenAiLLM } = require("../../../AiProviders/azureOpenAi");
 const Provider = require("./ai-provider.js");
+const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
 const { RetryError } = require("../error.js");
-const { v4 } = require("uuid");
-const { safeJsonParse } = require("../../../http");

 /**
 * The agent provider for the Azure OpenAI API.
- * Uses the tool calling format (not legacy function calling) for compatibility
- * with newer Azure OpenAI models.
+ * Uses the shared native tool calling helper for OpenAI-compatible tool calling.
 */
 class AzureOpenAiProvider extends Provider {
  model;
@ -28,199 +26,27 @@ class AzureOpenAiProvider extends Provider {
  }

  /**
-   * Convert legacy function definitions to the tools format.
-   * @param {Array} functions - Legacy function definitions
-   * @returns {Array} Tools in the new format
-   */
-  #formatFunctionsToTools(functions) {
-    if (!Array.isArray(functions) || functions.length === 0) return [];
-    return functions.map((func) => ({
-      type: "function",
-      function: {
-        name: func.name,
-        description: func.description,
-        parameters: func.parameters,
-      },
-    }));
-  }
-
-  /**
-   * Format messages to use tool calling format instead of legacy function format.
-   * Converts role: "function" messages to role: "tool" messages.
-   * @param {Array} messages - Messages array that may contain legacy function messages
-   * @returns {Array} Messages formatted for tool calling
-   */
-  #formatMessagesForTools(messages) {
-    const formattedMessages = [];
-
-    for (const message of messages) {
-      if (message.role === "function") {
-        // Convert legacy function result to tool result format
-        // We need the tool_call_id from the originalFunctionCall
-        if (message.originalFunctionCall?.id) {
-          // First, add the assistant message with the tool_call if not already present
-          // Check if previous message already has this tool call
-          const prevMsg = formattedMessages[formattedMessages.length - 1];
-          if (!prevMsg || prevMsg.role !== "assistant" || !prevMsg.tool_calls) {
-            formattedMessages.push({
-              role: "assistant",
-              content: null,
-              tool_calls: [
-                {
-                  id: message.originalFunctionCall.id,
-                  type: "function",
-                  function: {
-                    name: message.originalFunctionCall.name,
-                    arguments:
-                      typeof message.originalFunctionCall.arguments === "string"
-                        ? message.originalFunctionCall.arguments
-                        : JSON.stringify(
-                            message.originalFunctionCall.arguments
-                          ),
-                  },
-                },
-              ],
-            });
-          }
-          // Add the tool result
-          formattedMessages.push({
-            role: "tool",
-            tool_call_id: message.originalFunctionCall.id,
-            content:
-              typeof message.content === "string"
-                ? message.content
-                : JSON.stringify(message.content),
-          });
-        } else {
-          // Fallback: generate a tool_call_id if not present
-          const toolCallId = `call_${v4()}`;
-          formattedMessages.push({
-            role: "assistant",
-            content: null,
-            tool_calls: [
-              {
-                id: toolCallId,
-                type: "function",
-                function: {
-                  name: message.name,
-                  arguments: "{}",
-                },
-              },
-            ],
-          });
-          formattedMessages.push({
-            role: "tool",
-            tool_call_id: toolCallId,
-            content:
-              typeof message.content === "string"
-                ? message.content
-                : JSON.stringify(message.content),
-          });
-        }
-      } else {
-        formattedMessages.push(message);
-      }
-    }
-
-    return formattedMessages;
-  }
-
-  /**
-   * Stream a chat completion from the LLM with tool calling.
-   * Uses the tool calling format instead of legacy function calling.
+   * Stream a chat completion from Azure OpenAI with tool calling.
   *
-   * @param {any[]} messages - The messages to send to the LLM.
-   * @param {any[]} functions - The functions to use in the LLM.
-   * @param {function} eventHandler - The event handler to use to report stream events.
-   * @returns {Promise<{ functionCall: any, textResponse: string }>} - The result of the chat completion.
+   * @param {any[]} messages
+   * @param {any[]} functions
+   * @param {function} eventHandler
+   * @returns {Promise<{ functionCall: any, textResponse: string }>}
   */
  async stream(messages, functions = [], eventHandler = null) {
    this.providerLog("Provider.stream - will process this chat completion.");
-    const msgUUID = v4();

    try {
-      const formattedMessages = this.#formatMessagesForTools(messages);
-      const tools = this.#formatFunctionsToTools(functions);
-
-      const stream = await this.client.chat.completions.create({
-        model: this.model,
-        stream: true,
-        messages: formattedMessages,
-        ...(tools.length > 0 ? { tools } : {}),
-      });
-
-      const result = {
-        functionCall: null,
-        textResponse: "",
-      };
-
-      // For accumulating tool calls during streaming
-      let currentToolCall = null;
-
-      for await (const chunk of stream) {
-        if (!chunk?.choices?.[0]) continue;
-        const choice = chunk.choices[0];
-
-        if (choice.delta?.content) {
-          result.textResponse += choice.delta.content;
-          eventHandler?.("reportStreamEvent", {
-            type: "textResponseChunk",
-            uuid: msgUUID,
-            content: choice.delta.content,
-          });
-        }
-
-        // Handle tool calls (new format)
-        if (choice.delta?.tool_calls) {
-          for (const toolCall of choice.delta.tool_calls) {
-            if (toolCall.id) {
-              // New tool call starting
-              currentToolCall = {
-                id: toolCall.id,
-                name: toolCall.function?.name || "",
-                arguments: toolCall.function?.arguments || "",
-              };
-            } else if (currentToolCall) {
-              // Continuation of existing tool call
-              if (toolCall.function?.name) {
-                currentToolCall.name += toolCall.function.name;
-              }
-              if (toolCall.function?.arguments) {
-                currentToolCall.arguments += toolCall.function.arguments;
-              }
-            }
-
-            if (currentToolCall) {
-              eventHandler?.("reportStreamEvent", {
-                uuid: `${msgUUID}:tool_call_invocation`,
-                type: "toolCallInvocation",
-                content: `Assembling Tool Call: ${currentToolCall.name}(${currentToolCall.arguments})`,
-              });
-            }
-          }
-        }
-      }
-
-      // Set the function call result if we have a tool call
-      if (currentToolCall) {
-        result.functionCall = {
-          id: currentToolCall.id,
-          name: currentToolCall.name,
-          arguments: safeJsonParse(currentToolCall.arguments, {}),
-        };
-      }
-
-      return {
-        textResponse: result.textResponse,
-        functionCall: result.functionCall,
-      };
+      return await tooledStream(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        eventHandler
+      );
    } catch (error) {
      console.error(error.message, error);
-
-      // If invalid Auth error we need to abort because no amount of waiting
-      // will make auth better.
      if (error instanceof OpenAI.AuthenticationError) throw error;
-
      if (
        error instanceof OpenAI.RateLimitError ||
        error instanceof OpenAI.InternalServerError ||
@ -228,91 +54,41 @@ class AzureOpenAiProvider extends Provider {
      ) {
        throw new RetryError(error.message);
      }
-
      throw error;
    }
  }

  /**
-   * Create a completion based on the received messages.
-   * Uses the tool calling format instead of legacy function calling.
+   * Create a completion based on the received messages with tool calling.
   *
-   * @param messages A list of messages to send to the OpenAI API.
-   * @param functions
+   * @param {any[]} messages
+   * @param {any[]} functions
   * @returns The completion.
   */
  async complete(messages, functions = []) {
    try {
-      const formattedMessages = this.#formatMessagesForTools(messages);
-      const tools = this.#formatFunctionsToTools(functions);
+      const result = await tooledComplete(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        this.getCost.bind(this)
+      );

-      const response = await this.client.chat.completions.create({
-        model: this.model,
-        stream: false,
-        messages: formattedMessages,
-        ...(tools.length > 0 ? { tools } : {}),
-      });
-
-      // Right now, we only support one completion,
-      // so we just take the first one in the list
-      const completion = response.choices[0].message;
-      const cost = this.getCost(response.usage);
-
-      // Handle tool calls (new format)
-      if (completion.tool_calls && completion.tool_calls.length > 0) {
-        const toolCall = completion.tool_calls[0];
-        let functionArgs = {};
-        try {
-          functionArgs = JSON.parse(toolCall.function.arguments);
-        } catch (error) {
-          // Call the complete function again in case of JSON error
-          const toolCallId = toolCall.id;
-          return this.complete(
-            [
-              ...messages,
-              {
-                role: "function",
-                name: toolCall.function.name,
-                content: error?.message,
-                originalFunctionCall: {
-                  id: toolCallId,
-                  name: toolCall.function.name,
-                  arguments: toolCall.function.arguments,
-                },
-              },
-            ],
-            functions
-          );
-        }
-
-        return {
-          textResponse: null,
-          functionCall: {
-            id: toolCall.id,
-            name: toolCall.function.name,
-            arguments: functionArgs,
-          },
-          cost,
-        };
+      if (result.retryWithError) {
+        return this.complete([...messages, result.retryWithError], functions);
      }

-      return {
-        textResponse: completion.content,
-        cost,
-      };
+      return result;
    } catch (error) {
-      // If invalid Auth error we need to abort because no amount of waiting
-      // will make auth better.
      if (error instanceof OpenAI.AuthenticationError) throw error;
-
      if (
        error instanceof OpenAI.RateLimitError ||
        error instanceof OpenAI.InternalServerError ||
-        error instanceof OpenAI.APIError // Also will catch AuthenticationError!!!
+        error instanceof OpenAI.APIError
      ) {
        throw new RetryError(error.message);
      }
-
      throw error;
    }
  }
--- a/server/utils/agents/aibitat/providers/bedrock.js
+++ b/server/utils/agents/aibitat/providers/bedrock.js
@ -7,10 +7,13 @@ const { AWSBedrockLLM } = require("../../../AiProviders/bedrock/index.js");
 const Provider = require("./ai-provider.js");
 const InheritMultiple = require("./helpers/classes.js");
 const UnTooled = require("./helpers/untooled.js");
+const { safeJsonParse } = require("../../../http");
+const { v4 } = require("uuid");
 const {
  HumanMessage,
  SystemMessage,
  AIMessage,
+  ToolMessage,
 } = require("@langchain/core/messages");

 /**
@ -32,6 +35,7 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) {
    this._client = client;
    this.model = model;
    this.verbose = true;
+    this._supportsToolCalling = null;
  }

  /**
@ -45,6 +49,27 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native tool calling via the Bedrock Converse API.
+   * Checks the ENV to see if the provider supports tool calling.
+   * If the ENV is not set, we default to false.
+   * @returns {boolean}
+   */
+  supportsNativeToolCalling() {
+    if (this._supportsToolCalling !== null) return this._supportsToolCalling;
+    const supportsToolCalling =
+      process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("bedrock");
+
+    if (supportsToolCalling)
+      this.providerLog("AWS Bedrock native tool calling is ENABLED via ENV.");
+    else
+      this.providerLog(
+        "AWS Bedrock native tool calling is DISABLED via ENV. Will use UnTooled instead."
+      );
+    this._supportsToolCalling = supportsToolCalling;
+    return supportsToolCalling;
+  }
+
  /**
   * Gets the credentials for the AWS Bedrock LLM based on the authentication method provided.
   * @returns {object} The credentials object.
@ -85,6 +110,99 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) {
    return langchainChats;
  }

+  /**
+   * Convert aibitat message history to Langchain message prototypes with
+   * proper tool call / tool result handling for native tool calling.
+   * role:"function" messages (from previous aibitat tool runs) are converted
+   * to AIMessage(tool_calls) + ToolMessage pairs that Langchain expects.
+   * @param {Array} chats - The aibitat message history.
+   * @returns {Array} Langchain message instances.
+   */
+  #convertToLangchainPrototypesWithTools(chats = []) {
+    const langchainChats = [];
+
+    for (const chat of chats) {
+      if (chat.role === "function") {
+        if (chat.originalFunctionCall?.id) {
+          const prevMsg = langchainChats[langchainChats.length - 1];
+          if (
+            !prevMsg ||
+            !(prevMsg instanceof AIMessage) ||
+            !prevMsg.tool_calls?.length
+          ) {
+            langchainChats.push(
+              new AIMessage({
+                content: "",
+                tool_calls: [
+                  {
+                    name: chat.originalFunctionCall.name,
+                    args:
+                      typeof chat.originalFunctionCall.arguments === "string"
+                        ? safeJsonParse(chat.originalFunctionCall.arguments, {})
+                        : chat.originalFunctionCall.arguments,
+                    id: chat.originalFunctionCall.id,
+                  },
+                ],
+              })
+            );
+          }
+          langchainChats.push(
+            new ToolMessage({
+              content:
+                typeof chat.content === "string"
+                  ? chat.content
+                  : JSON.stringify(chat.content),
+              tool_call_id: chat.originalFunctionCall.id,
+            })
+          );
+        } else {
+          const toolCallId = `call_${v4()}`;
+          langchainChats.push(
+            new AIMessage({
+              content: "",
+              tool_calls: [{ name: chat.name, args: {}, id: toolCallId }],
+            })
+          );
+          langchainChats.push(
+            new ToolMessage({
+              content:
+                typeof chat.content === "string"
+                  ? chat.content
+                  : JSON.stringify(chat.content),
+              tool_call_id: toolCallId,
+            })
+          );
+        }
+      } else if (chat.role === "system") {
+        langchainChats.push(new SystemMessage({ content: chat.content }));
+      } else if (chat.role === "user") {
+        langchainChats.push(new HumanMessage({ content: chat.content }));
+      } else if (chat.role === "assistant") {
+        langchainChats.push(new AIMessage({ content: chat.content }));
+      }
+    }
+
+    return langchainChats;
+  }
+
+  /**
+   * Convert aibitat function definitions to the format expected by
+   * Langchain's ChatBedrockConverse.bindTools().
+   * @param {Array<{name: string, description: string, parameters: object}>} functions
+   * @returns {Array<{type: "function", function: {name: string, description: string, parameters: object}}>}
+   */
+  #formatFunctionsToLangchainTools(functions) {
+    if (!Array.isArray(functions) || functions.length === 0) return [];
+    return functions.map((func) => ({
+      type: "function",
+      function: {
+        name: func.name,
+        description: func.description,
+        parameters: func.parameters,
+      },
+    }));
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    const response = await this.client
      .invoke(this.#convertToLangchainPrototypes(messages))
@ -141,6 +259,7 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) {

  /**
   * Stream a chat completion from the Bedrock LLM with tool calling.
+   * Uses native Bedrock Converse tool calling when supported, otherwise falls back to UnTooled.
   *
   * @param {any[]} messages - The messages to send to the LLM.
   * @param {any[]} functions - The functions to use in the LLM.
@ -148,38 +267,149 @@ class AWSBedrockProvider extends InheritMultiple([Provider, UnTooled]) {
   * @returns {Promise<{ functionCall: any, textResponse: string }>} - The result of the chat completion.
   */
  async stream(messages, functions = [], eventHandler = null) {
-    return await UnTooled.prototype.stream
-      .call(
-        this,
-        messages,
-        functions,
-        this.#handleFunctionCallStream.bind(this),
-        eventHandler
-      )
-      .catch((e) => {
-        AWSBedrockLLM.errorToHumanReadable(e, {
-          method: "stream",
-          model: this.model,
+    const useNative = functions.length > 0 && this.supportsNativeToolCalling();
+
+    if (!useNative) {
+      return await UnTooled.prototype.stream
+        .call(
+          this,
+          messages,
+          functions,
+          this.#handleFunctionCallStream.bind(this),
+          eventHandler
+        )
+        .catch((e) => {
+          AWSBedrockLLM.errorToHumanReadable(e, {
+            method: "stream",
+            model: this.model,
+          });
        });
+    }
+
+    this.providerLog(
+      "Provider.stream (tooled) - will process this chat completion."
+    );
+
+    try {
+      const langchainMessages =
+        this.#convertToLangchainPrototypesWithTools(messages);
+      const tools = this.#formatFunctionsToLangchainTools(functions);
+      const modelWithTools = this.client.bindTools(tools);
+      const stream = await modelWithTools.stream(langchainMessages);
+
+      const msgUUID = v4();
+      let textResponse = "";
+      let finalMessage = null;
+
+      for await (const chunk of stream) {
+        finalMessage =
+          finalMessage === null ? chunk : finalMessage.concat(chunk);
+
+        const content = typeof chunk.content === "string" ? chunk.content : "";
+        if (content) {
+          textResponse += content;
+          eventHandler?.("reportStreamEvent", {
+            type: "textResponseChunk",
+            uuid: msgUUID,
+            content,
+          });
+        }
+
+        if (chunk.tool_call_chunks?.length) {
+          for (const toolChunk of chunk.tool_call_chunks) {
+            if (toolChunk.name) {
+              eventHandler?.("reportStreamEvent", {
+                uuid: `${msgUUID}:tool_call_invocation`,
+                type: "toolCallInvocation",
+                content: `Assembling Tool Call: ${toolChunk.name}`,
+              });
+            }
+          }
+        }
+      }
+
+      if (finalMessage?.tool_calls?.length > 0) {
+        const toolCall = finalMessage.tool_calls[0];
+        return {
+          textResponse,
+          functionCall: {
+            id: toolCall.id || `call_${v4()}`,
+            name: toolCall.name,
+            arguments: toolCall.args || {},
+          },
+          cost: 0,
+        };
+      }
+
+      return { textResponse, functionCall: null, cost: 0 };
+    } catch (e) {
+      AWSBedrockLLM.errorToHumanReadable(e, {
+        method: "stream",
+        model: this.model,
      });
+    }
  }

  /**
-   * Create a completion based on the received messages.
+   * Create a non-streaming completion with tool calling support.
+   * Uses native Bedrock Converse tool calling when supported, otherwise falls back to UnTooled.
   *
-   * @param messages A list of messages to send to the API.
-   * @param functions
+   * @param {any[]} messages A list of messages to send to the API.
+   * @param {any[]} functions The function definitions available to the model.
   * @returns The completion.
   */
  async complete(messages, functions = []) {
-    return await UnTooled.prototype.complete
-      .call(this, messages, functions, this.#handleFunctionCallChat.bind(this))
-      .catch((e) => {
-        AWSBedrockLLM.errorToHumanReadable(e, {
-          method: "complete",
-          model: this.model,
+    const useNative = functions.length > 0 && this.supportsNativeToolCalling();
+
+    if (!useNative) {
+      return await UnTooled.prototype.complete
+        .call(
+          this,
+          messages,
+          functions,
+          this.#handleFunctionCallChat.bind(this)
+        )
+        .catch((e) => {
+          AWSBedrockLLM.errorToHumanReadable(e, {
+            method: "complete",
+            model: this.model,
+          });
        });
+    }
+
+    try {
+      const langchainMessages =
+        this.#convertToLangchainPrototypesWithTools(messages);
+      const tools = this.#formatFunctionsToLangchainTools(functions);
+      const modelWithTools = this.client.bindTools(tools);
+      const response = await modelWithTools.invoke(langchainMessages);
+
+      if (response.tool_calls?.length > 0) {
+        const toolCall = response.tool_calls[0];
+        return {
+          textResponse: null,
+          functionCall: {
+            id: toolCall.id || `call_${v4()}`,
+            name: toolCall.name,
+            arguments: toolCall.args || {},
+          },
+          cost: 0,
+        };
+      }
+
+      return {
+        textResponse:
+          typeof response.content === "string"
+            ? response.content
+            : JSON.stringify(response.content),
+        cost: 0,
+      };
+    } catch (e) {
+      AWSBedrockLLM.errorToHumanReadable(e, {
+        method: "complete",
+        model: this.model,
      });
+    }
  }

  /**
--- a/server/utils/agents/aibitat/providers/cohere.js
+++ b/server/utils/agents/aibitat/providers/cohere.js
@ -28,6 +28,15 @@ class CohereProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  #convertChatHistoryCohere(chatHistory = []) {
    let cohereHistory = [];
    chatHistory.forEach((message) => {
--- a/server/utils/agents/aibitat/providers/cometapi.js
+++ b/server/utils/agents/aibitat/providers/cometapi.js
@ -35,6 +35,15 @@ class CometApiProvider extends InheritMultiple([Provider, UnTooled]) {
    return false;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/deepseek.js
+++ b/server/utils/agents/aibitat/providers/deepseek.js
@ -2,6 +2,8 @@ const OpenAI = require("openai");
 const Provider = require("./ai-provider.js");
 const InheritMultiple = require("./helpers/classes.js");
 const UnTooled = require("./helpers/untooled.js");
+const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
+const { RetryError } = require("../error.js");
 const { toValidNumber } = require("../../../http/index.js");

 class DeepSeekProvider extends InheritMultiple([Provider, UnTooled]) {
@ -32,6 +34,23 @@ class DeepSeekProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * All current DeepSeek models (deepseek-chat and deepseek-reasoner)
+   * support native OpenAI-compatible tool calling.
+   * @returns {boolean}
+   */
+  supportsNativeToolCalling() {
+    return true;
+  }
+
+  get #isThinkingModel() {
+    return this.model === "deepseek-reasoner";
+  }
+
+  get #tooledOptions() {
+    return this.#isThinkingModel ? { injectReasoningContent: true } : {};
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
@ -60,22 +79,83 @@ class DeepSeekProvider extends InheritMultiple([Provider, UnTooled]) {
  }

  async stream(messages, functions = [], eventHandler = null) {
-    return await UnTooled.prototype.stream.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallStream.bind(this),
-      eventHandler
+    const useNative = functions.length > 0 && this.supportsNativeToolCalling();
+
+    if (!useNative) {
+      return await UnTooled.prototype.stream.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallStream.bind(this),
+        eventHandler
+      );
+    }
+
+    this.providerLog(
+      "Provider.stream (tooled) - will process this chat completion."
    );
+
+    try {
+      return await tooledStream(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        eventHandler,
+        this.#tooledOptions
+      );
+    } catch (error) {
+      console.error(error.message, error);
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

  async complete(messages, functions = []) {
-    return await UnTooled.prototype.complete.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallChat.bind(this)
-    );
+    const useNative = functions.length > 0 && this.supportsNativeToolCalling();
+
+    if (!useNative) {
+      return await UnTooled.prototype.complete.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallChat.bind(this)
+      );
+    }
+
+    try {
+      const result = await tooledComplete(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        this.getCost.bind(this),
+        this.#tooledOptions
+      );
+
+      if (result.retryWithError) {
+        return this.complete([...messages, result.retryWithError], functions);
+      }
+
+      return result;
+    } catch (error) {
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

  /**
--- a/server/utils/agents/aibitat/providers/dellProAiStudio.js
+++ b/server/utils/agents/aibitat/providers/dellProAiStudio.js
@ -37,6 +37,15 @@ class DellProAiStudioProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/dockerModelRunner.js
+++ b/server/utils/agents/aibitat/providers/dockerModelRunner.js
@ -2,7 +2,10 @@ const OpenAI = require("openai");
 const Provider = require("./ai-provider.js");
 const InheritMultiple = require("./helpers/classes.js");
 const UnTooled = require("./helpers/untooled.js");
+const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
+const { RetryError } = require("../error.js");
 const {
+  DockerModelRunnerLLM,
  parseDockerModelRunnerEndpoint,
 } = require("../../../AiProviders/dockerModelRunner/index.js");

@ -31,6 +34,7 @@ class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) {
    this._client = client;
    this.model = model;
    this.verbose = true;
+    this._supportsToolCalling = null;
  }

  get client() {
@ -41,6 +45,19 @@ class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  async supportsNativeToolCalling() {
+    if (this._supportsToolCalling !== null) return this._supportsToolCalling;
+    const dmr = new DockerModelRunnerLLM(null, this.model);
+    const capabilities = await dmr.getModelCapabilities();
+    this._supportsToolCalling = capabilities.tools === true;
+    return this._supportsToolCalling;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
@ -67,23 +84,92 @@ class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) {
    });
  }

+  /**
+   * Stream a chat completion with tool calling support.
+   * Uses native tool calling when supported, otherwise falls back to UnTooled.
+   */
  async stream(messages, functions = [], eventHandler = null) {
-    return await UnTooled.prototype.stream.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallStream.bind(this),
-      eventHandler
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.stream.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallStream.bind(this),
+        eventHandler
+      );
+    }
+
+    this.providerLog(
+      "Provider.stream (tooled) - will process this chat completion."
    );
+
+    try {
+      return await tooledStream(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        eventHandler
+      );
+    } catch (error) {
+      console.error(error.message, error);
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

+  /**
+   * Create a non-streaming completion with tool calling support.
+   * Uses native tool calling when supported, otherwise falls back to UnTooled.
+   */
  async complete(messages, functions = []) {
-    return await UnTooled.prototype.complete.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallChat.bind(this)
-    );
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.complete.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallChat.bind(this)
+      );
+    }
+
+    try {
+      const result = await tooledComplete(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        this.getCost.bind(this)
+      );
+
+      if (result.retryWithError) {
+        return this.complete([...messages, result.retryWithError], functions);
+      }
+
+      return result;
+    } catch (error) {
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

  /**
--- a/server/utils/agents/aibitat/providers/fireworksai.js
+++ b/server/utils/agents/aibitat/providers/fireworksai.js
@ -33,6 +33,15 @@ class FireworksAIProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/foundry.js
+++ b/server/utils/agents/aibitat/providers/foundry.js
@ -40,6 +40,15 @@ class FoundryProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    await FoundryLLM.cacheContextWindows();
    return await this.client.chat.completions
--- a/server/utils/agents/aibitat/providers/genericOpenAi.js
+++ b/server/utils/agents/aibitat/providers/genericOpenAi.js
@ -2,6 +2,8 @@ const OpenAI = require("openai");
 const Provider = require("./ai-provider.js");
 const InheritMultiple = require("./helpers/classes.js");
 const UnTooled = require("./helpers/untooled.js");
+const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
+const { RetryError } = require("../error.js");
 const { toValidNumber } = require("../../../http/index.js");
 const { getAnythingLLMUserAgent } = require("../../../../endpoints/utils");
 const { GenericOpenAiLLM } = require("../../../AiProviders/genericOpenAi");
@ -31,6 +33,7 @@ class GenericOpenAiProvider extends InheritMultiple([Provider, UnTooled]) {
    this._client = client;
    this.model = model;
    this.verbose = true;
+    this._supportsToolCalling = null;
    this.maxTokens = process.env.GENERIC_OPEN_AI_MAX_TOKENS
      ? toValidNumber(process.env.GENERIC_OPEN_AI_MAX_TOKENS, 1024)
      : 1024;
@ -46,6 +49,32 @@ class GenericOpenAiProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * - This can be any OpenAI compatible provider that supports tool calling
+   * - We check the ENV to see if the provider supports tool calling.
+   * - If the ENV is not set, we default to false.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    if (this._supportsToolCalling !== null) return this._supportsToolCalling;
+    const supportsToolCalling =
+      process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes(
+        "generic-openai"
+      );
+
+    if (supportsToolCalling)
+      this.providerLog(
+        "Generic OpenAI supports native tool calling is ENABLED via ENV."
+      );
+    else
+      this.providerLog(
+        "Generic OpenAI supports native tool calling is DISABLED via ENV. Will use UnTooled instead."
+      );
+    this._supportsToolCalling = supportsToolCalling;
+    return supportsToolCalling;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
@ -74,23 +103,92 @@ class GenericOpenAiProvider extends InheritMultiple([Provider, UnTooled]) {
    });
  }

+  /**
+   * Stream a chat completion with tool calling support.
+   * Uses native tool calling when supported, otherwise falls back to UnTooled.
+   */
  async stream(messages, functions = [], eventHandler = null) {
-    return await UnTooled.prototype.stream.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallStream.bind(this),
-      eventHandler
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.stream.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallStream.bind(this),
+        eventHandler
+      );
+    }
+
+    this.providerLog(
+      "Provider.stream (tooled) - will process this chat completion."
    );
+
+    try {
+      return await tooledStream(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        eventHandler
+      );
+    } catch (error) {
+      console.error(error.message, error);
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

+  /**
+   * Create a non-streaming completion with tool calling support.
+   * Uses native tool calling when supported, otherwise falls back to UnTooled.
+   */
  async complete(messages, functions = []) {
-    return await UnTooled.prototype.complete.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallChat.bind(this)
-    );
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.complete.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallChat.bind(this)
+      );
+    }
+
+    try {
+      const result = await tooledComplete(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        this.getCost.bind(this)
+      );
+
+      if (result.retryWithError) {
+        return this.complete([...messages, result.retryWithError], functions);
+      }
+
+      return result;
+    } catch (error) {
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

  /**
--- a/server/utils/agents/aibitat/providers/giteeai.js
+++ b/server/utils/agents/aibitat/providers/giteeai.js
@ -26,6 +26,15 @@ class GiteeAIProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/groq.js
+++ b/server/utils/agents/aibitat/providers/groq.js
@ -2,10 +2,13 @@ const OpenAI = require("openai");
 const Provider = require("./ai-provider.js");
 const InheritMultiple = require("./helpers/classes.js");
 const UnTooled = require("./helpers/untooled.js");
+const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
+const { RetryError } = require("../error.js");

 /**
 * The agent provider for the GroqAI provider.
- * We wrap Groq in UnTooled because its tool-calling built in is quite bad and wasteful.
+ * Supports true OpenAI-compatible tool calling when enabled via ENV,
+ * falling back to the UnTooled prompt-based approach otherwise.
 */
 class GroqProvider extends InheritMultiple([Provider, UnTooled]) {
  model;
@ -22,6 +25,7 @@ class GroqProvider extends InheritMultiple([Provider, UnTooled]) {
    this._client = client;
    this.model = model;
    this.verbose = true;
+    this._supportsToolCalling = null;
  }

  get client() {
@ -32,6 +36,27 @@ class GroqProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * - Since Groq models vary in tool calling support, we check the ENV.
+   * - If the ENV is not set, we default to false.
+   * @returns {boolean}
+   */
+  supportsNativeToolCalling() {
+    if (this._supportsToolCalling !== null) return this._supportsToolCalling;
+    const supportsToolCalling =
+      process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("groq");
+
+    if (supportsToolCalling)
+      this.providerLog("Groq supports native tool calling is ENABLED via ENV.");
+    else
+      this.providerLog(
+        "Groq supports native tool calling is DISABLED via ENV. Will use UnTooled instead."
+      );
+    this._supportsToolCalling = supportsToolCalling;
+    return supportsToolCalling;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
@ -58,31 +83,98 @@ class GroqProvider extends InheritMultiple([Provider, UnTooled]) {
    });
  }

+  /**
+   * Stream a chat completion with tool calling support.
+   * Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
+   */
  async stream(messages, functions = [], eventHandler = null) {
-    return await UnTooled.prototype.stream.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallStream.bind(this),
-      eventHandler
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.stream.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallStream.bind(this),
+        eventHandler
+      );
+    }
+
+    this.providerLog(
+      "Provider.stream (tooled) - will process this chat completion."
    );
+
+    try {
+      return await tooledStream(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        eventHandler
+      );
+    } catch (error) {
+      console.error(error.message, error);
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

+  /**
+   * Create a non-streaming completion with tool calling support.
+   * Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
+   */
  async complete(messages, functions = []) {
-    return await UnTooled.prototype.complete.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallChat.bind(this)
-    );
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.complete.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallChat.bind(this)
+      );
+    }
+
+    try {
+      const result = await tooledComplete(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        this.getCost.bind(this)
+      );
+
+      if (result.retryWithError) {
+        return this.complete([...messages, result.retryWithError], functions);
+      }
+
+      return result;
+    } catch (error) {
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

  /**
   * Get the cost of the completion.
-   *
   * @param _usage The completion to get the cost for.
   * @returns The cost of the completion.
-   * Stubbed since LMStudio has no cost basis.
   */
  getCost(_usage) {
    return 0;
--- a/server/utils/agents/aibitat/providers/helpers/tooled.js
+++ b/server/utils/agents/aibitat/providers/helpers/tooled.js
@ -0,0 +1,298 @@
+const { v4 } = require("uuid");
+const { safeJsonParse } = require("../../../../http");
+
+/**
+ * Shared native OpenAI-compatible tool calling utilities.
+ * Any provider with an OpenAI-compatible client can use these functions
+ * instead of the UnTooled prompt-based approach when the model supports
+ * native tool calling.
+ *
+ * Usage in a provider:
+ *   const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
+ *
+ *   async stream(messages, functions, eventHandler) {
+ *     if (functions.length > 0 && await this.supportsNativeToolCalling()) {
+ *       return tooledStream(this.client, this.model, messages, functions, eventHandler);
+ *     }
+ *     // ... fallback to UnTooled ...
+ *   }
+ */
+
+/**
+ * Convert aibitat function definitions to the OpenAI tools format.
+ * @param {Array<{name: string, description: string, parameters: object}>} functions
+ * @returns {Array<{type: "function", function: {name: string, description: string, parameters: object}}>}
+ */
+function formatFunctionsToTools(functions) {
+  if (!Array.isArray(functions) || functions.length === 0) return [];
+  return functions.map((func) => ({
+    type: "function",
+    function: {
+      name: func.name,
+      description: func.description,
+      parameters: func.parameters,
+    },
+  }));
+}
+
+/**
+ * Convert the aibitat message history (which uses role:"function" with
+ * `originalFunctionCall` metadata) into the OpenAI tool-calling message
+ * format (assistant `tool_calls` + role:"tool" pairs).
+ * @param {Array} messages
+ * @param {{injectReasoningContent?: boolean}} options
+ *   - injectReasoningContent: when true, ensures every assistant message has
+ *     a `reasoning_content` field (required by DeepSeek thinking-mode models).
+ * @returns {Array} Messages formatted for the OpenAI tools API
+ */
+function formatMessagesForTools(messages, options = {}) {
+  const formattedMessages = [];
+  const { injectReasoningContent = false } = options;
+
+  for (const message of messages) {
+    if (message.role === "function") {
+      if (message.originalFunctionCall?.id) {
+        const prevMsg = formattedMessages[formattedMessages.length - 1];
+        if (!prevMsg || prevMsg.role !== "assistant" || !prevMsg.tool_calls) {
+          formattedMessages.push({
+            role: "assistant",
+            content: null,
+            ...(injectReasoningContent ? { reasoning_content: "" } : {}),
+            tool_calls: [
+              {
+                id: message.originalFunctionCall.id,
+                type: "function",
+                function: {
+                  name: message.originalFunctionCall.name,
+                  arguments:
+                    typeof message.originalFunctionCall.arguments === "string"
+                      ? message.originalFunctionCall.arguments
+                      : JSON.stringify(message.originalFunctionCall.arguments),
+                },
+              },
+            ],
+          });
+        }
+        formattedMessages.push({
+          role: "tool",
+          tool_call_id: message.originalFunctionCall.id,
+          content:
+            typeof message.content === "string"
+              ? message.content
+              : JSON.stringify(message.content),
+        });
+      } else {
+        const toolCallId = `call_${v4()}`;
+        formattedMessages.push({
+          role: "assistant",
+          content: null,
+          ...(injectReasoningContent ? { reasoning_content: "" } : {}),
+          tool_calls: [
+            {
+              id: toolCallId,
+              type: "function",
+              function: {
+                name: message.name,
+                arguments: "{}",
+              },
+            },
+          ],
+        });
+        formattedMessages.push({
+          role: "tool",
+          tool_call_id: toolCallId,
+          content:
+            typeof message.content === "string"
+              ? message.content
+              : JSON.stringify(message.content),
+        });
+      }
+    } else if (
+      injectReasoningContent &&
+      message.role === "assistant" &&
+      !("reasoning_content" in message)
+    ) {
+      formattedMessages.push({ ...message, reasoning_content: "" });
+    } else {
+      formattedMessages.push(message);
+    }
+  }
+
+  return formattedMessages;
+}
+
+/**
+ * Stream a chat completion using native OpenAI-compatible tool calling.
+ * Handles parallel tool calls by tracking each tool call by its streaming
+ * index, then returning only the first one for the agent framework to process.
+ *
+ * @param {import("openai").OpenAI} client - OpenAI-compatible client
+ * @param {string} model - Model identifier
+ * @param {Array} messages - Raw aibitat message history
+ * @param {Array} functions - Aibitat function definitions
+ * @param {function|null} eventHandler - Stream event handler
+ * @param {{injectReasoningContent?: boolean}} options - Provider-specific options forwarded to formatMessagesForTools
+ * @returns {Promise<{textResponse: string, functionCall: object|null}>}
+ */
+async function tooledStream(
+  client,
+  model,
+  messages,
+  functions = [],
+  eventHandler = null,
+  options = {}
+) {
+  const msgUUID = v4();
+  const formattedMessages = formatMessagesForTools(messages, options);
+  const tools = formatFunctionsToTools(functions);
+
+  const stream = await client.chat.completions.create({
+    model,
+    stream: true,
+    messages: formattedMessages,
+    ...(tools.length > 0 ? { tools } : {}),
+  });
+
+  const result = {
+    functionCall: null,
+    textResponse: "",
+  };
+
+  const toolCallsByIndex = {};
+
+  for await (const chunk of stream) {
+    if (!chunk?.choices?.[0]) continue;
+    const choice = chunk.choices[0];
+
+    if (choice.delta?.content) {
+      result.textResponse += choice.delta.content;
+      eventHandler?.("reportStreamEvent", {
+        type: "textResponseChunk",
+        uuid: msgUUID,
+        content: choice.delta.content,
+      });
+    }
+
+    if (choice.delta?.tool_calls) {
+      for (const toolCall of choice.delta.tool_calls) {
+        const idx = toolCall.index ?? 0;
+
+        if (toolCall.id) {
+          toolCallsByIndex[idx] = {
+            id: toolCall.id,
+            name: toolCall.function?.name || "",
+            arguments: toolCall.function?.arguments || "",
+          };
+        } else if (toolCallsByIndex[idx]) {
+          if (toolCall.function?.name) {
+            toolCallsByIndex[idx].name += toolCall.function.name;
+          }
+          if (toolCall.function?.arguments) {
+            toolCallsByIndex[idx].arguments += toolCall.function.arguments;
+          }
+        }
+
+        if (toolCallsByIndex[idx]) {
+          eventHandler?.("reportStreamEvent", {
+            uuid: `${msgUUID}:tool_call_invocation`,
+            type: "toolCallInvocation",
+            content: `Assembling Tool Call: ${toolCallsByIndex[idx].name}(${toolCallsByIndex[idx].arguments})`,
+          });
+        }
+      }
+    }
+  }
+
+  const toolCallIndices = Object.keys(toolCallsByIndex).map(Number);
+  if (toolCallIndices.length > 0) {
+    const firstToolCall = toolCallsByIndex[Math.min(...toolCallIndices)];
+    result.functionCall = {
+      id: firstToolCall.id,
+      name: firstToolCall.name,
+      arguments: safeJsonParse(firstToolCall.arguments, {}),
+    };
+  }
+
+  return {
+    textResponse: result.textResponse,
+    functionCall: result.functionCall,
+  };
+}
+
+/**
+ * Non-streaming chat completion using native OpenAI-compatible tool calling.
+ * Returns the first tool call if the model requests any, otherwise the text response.
+ *
+ * @param {import("openai").OpenAI} client - OpenAI-compatible client
+ * @param {string} model - Model identifier
+ * @param {Array} messages - Raw aibitat message history
+ * @param {Array} functions - Aibitat function definitions
+ * @param {function} getCostFn - Provider's getCost function
+ * @param {{injectReasoningContent?: boolean}} options - Provider-specific options forwarded to formatMessagesForTools
+ * @returns {Promise<{textResponse: string|null, functionCall: object|null, cost: number}>}
+ */
+async function tooledComplete(
+  client,
+  model,
+  messages,
+  functions = [],
+  getCostFn = () => 0,
+  options = {}
+) {
+  const formattedMessages = formatMessagesForTools(messages, options);
+  const tools = formatFunctionsToTools(functions);
+
+  const response = await client.chat.completions.create({
+    model,
+    stream: false,
+    messages: formattedMessages,
+    ...(tools.length > 0 ? { tools } : {}),
+  });
+
+  const completion = response.choices[0].message;
+  const cost = getCostFn(response.usage);
+
+  if (completion.tool_calls && completion.tool_calls.length > 0) {
+    const toolCall = completion.tool_calls[0];
+    const functionArgs = safeJsonParse(toolCall.function.arguments, null);
+
+    if (functionArgs === null) {
+      return {
+        textResponse: null,
+        retryWithError: {
+          role: "function",
+          name: toolCall.function.name,
+          content: `Failed to parse tool call arguments as JSON. Raw arguments: ${toolCall.function.arguments}`,
+          originalFunctionCall: {
+            id: toolCall.id,
+            name: toolCall.function.name,
+            arguments: toolCall.function.arguments,
+          },
+        },
+        cost,
+      };
+    }
+
+    return {
+      textResponse: null,
+      functionCall: {
+        id: toolCall.id,
+        name: toolCall.function.name,
+        arguments: functionArgs,
+      },
+      cost,
+    };
+  }
+
+  return {
+    textResponse: completion.content,
+    cost,
+  };
+}
+
+module.exports = {
+  formatFunctionsToTools,
+  formatMessagesForTools,
+  tooledStream,
+  tooledComplete,
+};
--- a/server/utils/agents/aibitat/providers/koboldcpp.js
+++ b/server/utils/agents/aibitat/providers/koboldcpp.js
@ -32,6 +32,15 @@ class KoboldCPPProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/litellm.js
+++ b/server/utils/agents/aibitat/providers/litellm.js
@ -2,9 +2,13 @@ const OpenAI = require("openai");
 const Provider = require("./ai-provider.js");
 const InheritMultiple = require("./helpers/classes.js");
 const UnTooled = require("./helpers/untooled.js");
+const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
+const { RetryError } = require("../error.js");

 /**
 * The agent provider for LiteLLM.
+ * Supports true OpenAI-compatible tool calling when enabled via ENV,
+ * falling back to the UnTooled prompt-based approach otherwise.
 */
 class LiteLLMProvider extends InheritMultiple([Provider, UnTooled]) {
  model;
@ -21,6 +25,7 @@ class LiteLLMProvider extends InheritMultiple([Provider, UnTooled]) {
    this._client = client;
    this.model = model || process.env.LITE_LLM_MODEL_PREF;
    this.verbose = true;
+    this._supportsToolCalling = null;
  }

  get client() {
@ -31,6 +36,29 @@ class LiteLLMProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * - Since LiteLLM models vary in tool calling support, we check the ENV.
+   * - If the ENV is not set, we default to false.
+   * @returns {boolean}
+   */
+  supportsNativeToolCalling() {
+    if (this._supportsToolCalling !== null) return this._supportsToolCalling;
+    const supportsToolCalling =
+      process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("litellm");
+
+    if (supportsToolCalling)
+      this.providerLog(
+        "LiteLLM supports native tool calling is ENABLED via ENV."
+      );
+    else
+      this.providerLog(
+        "LiteLLM supports native tool calling is DISABLED via ENV. Will use UnTooled instead."
+      );
+    this._supportsToolCalling = supportsToolCalling;
+    return supportsToolCalling;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
@ -57,25 +85,99 @@ class LiteLLMProvider extends InheritMultiple([Provider, UnTooled]) {
    });
  }

+  /**
+   * Stream a chat completion with tool calling support.
+   * Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
+   */
  async stream(messages, functions = [], eventHandler = null) {
-    return await UnTooled.prototype.stream.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallStream.bind(this),
-      eventHandler
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.stream.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallStream.bind(this),
+        eventHandler
+      );
+    }
+
+    this.providerLog(
+      "Provider.stream (tooled) - will process this chat completion."
    );
+
+    try {
+      return await tooledStream(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        eventHandler
+      );
+    } catch (error) {
+      console.error(error.message, error);
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

+  /**
+   * Create a non-streaming completion with tool calling support.
+   * Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
+   */
  async complete(messages, functions = []) {
-    return await UnTooled.prototype.complete.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallChat.bind(this)
-    );
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.complete.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallChat.bind(this)
+      );
+    }
+
+    try {
+      const result = await tooledComplete(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        this.getCost.bind(this)
+      );
+
+      if (result.retryWithError) {
+        return this.complete([...messages, result.retryWithError], functions);
+      }
+
+      return result;
+    } catch (error) {
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

+  /**
+   * Get the cost of the completion.
+   * @param _usage The completion to get the cost for.
+   * @returns The cost of the completion.
+   */
  getCost(_usage) {
    return 0;
  }
--- a/server/utils/agents/aibitat/providers/lmstudio.js
+++ b/server/utils/agents/aibitat/providers/lmstudio.js
@ -2,6 +2,8 @@ const OpenAI = require("openai");
 const Provider = require("./ai-provider.js");
 const InheritMultiple = require("./helpers/classes.js");
 const UnTooled = require("./helpers/untooled.js");
+const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
+const { RetryError } = require("../error.js");
 const {
  LMStudioLLM,
  parseLMStudioBasePath,
@ -9,12 +11,13 @@ const {

 /**
 * The agent provider for the LMStudio.
+ * Supports true OpenAI-compatible tool calling when the model supports it,
+ * falling back to the UnTooled prompt-based approach otherwise.
 */
 class LMStudioProvider extends InheritMultiple([Provider, UnTooled]) {
  model;

  /**
-   *
   * @param {{model?: string}} config
   */
  constructor(config = {}) {
@ -32,6 +35,7 @@ class LMStudioProvider extends InheritMultiple([Provider, UnTooled]) {
    this._client = client;
    this.model = model;
    this.verbose = true;
+    this._supportsToolCalling = null;
  }

  get client() {
@ -42,6 +46,21 @@ class LMStudioProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether the loaded model supports native OpenAI-compatible tool calling.
+   * Checks the LMStudio /api/v1/models endpoint for the model's capabilities.
+   * @returns {Promise<boolean>}
+   */
+  async supportsNativeToolCalling() {
+    if (this._supportsToolCalling !== null) return this._supportsToolCalling;
+    const lmstudio = new LMStudioLLM(null, this.model);
+    const capabilities = await lmstudio.getModelCapabilities();
+    this._supportsToolCalling = capabilities.tools === true;
+    return this._supportsToolCalling;
+  }
+
+  // ---- UnTooled callbacks (used when native tool calling is not supported) ----
+
  async #handleFunctionCallChat({ messages = [] }) {
    await LMStudioLLM.cacheContextWindows();
    return await this.client.chat.completions
@ -70,31 +89,101 @@ class LMStudioProvider extends InheritMultiple([Provider, UnTooled]) {
    });
  }

+  /**
+   * Stream a chat completion with tool calling support.
+   * Uses native tool calling when supported, otherwise falls back to UnTooled.
+   */
  async stream(messages, functions = [], eventHandler = null) {
-    return await UnTooled.prototype.stream.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallStream.bind(this),
-      eventHandler
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.stream.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallStream.bind(this),
+        eventHandler
+      );
+    }
+
+    this.providerLog(
+      "Provider.stream (tooled) - will process this chat completion."
    );
+
+    try {
+      await LMStudioLLM.cacheContextWindows();
+      return await tooledStream(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        eventHandler
+      );
+    } catch (error) {
+      console.error(error.message, error);
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

+  /**
+   * Create a non-streaming completion with tool calling support.
+   * Uses native tool calling when supported, otherwise falls back to UnTooled.
+   */
  async complete(messages, functions = []) {
-    return await UnTooled.prototype.complete.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallChat.bind(this)
-    );
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.complete.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallChat.bind(this)
+      );
+    }
+
+    try {
+      await LMStudioLLM.cacheContextWindows();
+      const result = await tooledComplete(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        this.getCost.bind(this)
+      );
+
+      if (result.retryWithError) {
+        return this.complete([...messages, result.retryWithError], functions);
+      }
+
+      return result;
+    } catch (error) {
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

  /**
   * Get the cost of the completion.
-   *
+   * Stubbed since LMStudio has no cost basis.
   * @param _usage The completion to get the cost for.
   * @returns The cost of the completion.
-   * Stubbed since LMStudio has no cost basis.
   */
  getCost(_usage) {
    return 0;
--- a/server/utils/agents/aibitat/providers/localai.js
+++ b/server/utils/agents/aibitat/providers/localai.js
@ -2,9 +2,13 @@ const OpenAI = require("openai");
 const Provider = require("./ai-provider.js");
 const InheritMultiple = require("./helpers/classes.js");
 const UnTooled = require("./helpers/untooled.js");
+const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
+const { RetryError } = require("../error.js");

 /**
 * The agent provider for the LocalAI provider.
+ * Supports native OpenAI-compatible tool calling when enabled via ENV,
+ * falling back to the UnTooled prompt-based approach otherwise.
 */
 class LocalAiProvider extends InheritMultiple([Provider, UnTooled]) {
  model;
@ -21,6 +25,7 @@ class LocalAiProvider extends InheritMultiple([Provider, UnTooled]) {
    this._client = client;
    this.model = model;
    this.verbose = true;
+    this._supportsToolCalling = null;
  }

  get client() {
@ -31,6 +36,31 @@ class LocalAiProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Since LocalAI does not expose model capabilities via API, we check
+   * the PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING ENV flag for "localai".
+   * @returns {boolean}
+   */
+  supportsNativeToolCalling() {
+    if (this._supportsToolCalling !== null) return this._supportsToolCalling;
+    const supportsToolCalling =
+      process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("localai");
+
+    if (supportsToolCalling)
+      this.providerLog(
+        "LocalAI supports native tool calling is ENABLED via ENV."
+      );
+    else
+      this.providerLog(
+        "LocalAI supports native tool calling is DISABLED via ENV. Will use UnTooled instead."
+      );
+    this._supportsToolCalling = supportsToolCalling;
+    return supportsToolCalling;
+  }
+
+  // ---- UnTooled callbacks (used when native tool calling is not supported) ----
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
@ -40,10 +70,8 @@ class LocalAiProvider extends InheritMultiple([Provider, UnTooled]) {
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("LocalAI chat: No results!");
-
        if (result.choices.length === 0)
          throw new Error("LocalAI chat: No results length!");
-
        return result.choices[0].message.content;
      })
      .catch((_) => {
@ -59,23 +87,92 @@ class LocalAiProvider extends InheritMultiple([Provider, UnTooled]) {
    });
  }

+  /**
+   * Stream a chat completion with tool calling support.
+   * Uses native tool calling when supported, otherwise falls back to UnTooled.
+   */
  async stream(messages, functions = [], eventHandler = null) {
-    return await UnTooled.prototype.stream.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallStream.bind(this),
-      eventHandler
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.stream.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallStream.bind(this),
+        eventHandler
+      );
+    }
+
+    this.providerLog(
+      "Provider.stream (tooled) - will process this chat completion."
    );
+
+    try {
+      return await tooledStream(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        eventHandler
+      );
+    } catch (error) {
+      console.error(error.message, error);
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

+  /**
+   * Create a non-streaming completion with tool calling support.
+   * Uses native tool calling when supported, otherwise falls back to UnTooled.
+   */
  async complete(messages, functions = []) {
-    return await UnTooled.prototype.complete.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallChat.bind(this)
-    );
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.complete.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallChat.bind(this)
+      );
+    }
+
+    try {
+      const result = await tooledComplete(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        this.getCost.bind(this)
+      );
+
+      if (result.retryWithError) {
+        return this.complete([...messages, result.retryWithError], functions);
+      }
+
+      return result;
+    } catch (error) {
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

  /**
--- a/server/utils/agents/aibitat/providers/mistral.js
+++ b/server/utils/agents/aibitat/providers/mistral.js
@ -35,6 +35,15 @@ class MistralProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/moonshotAi.js
+++ b/server/utils/agents/aibitat/providers/moonshotAi.js
@ -35,6 +35,15 @@ class MoonshotAiProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/novita.js
+++ b/server/utils/agents/aibitat/providers/novita.js
@ -35,6 +35,15 @@ class NovitaProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/nvidiaNim.js
+++ b/server/utils/agents/aibitat/providers/nvidiaNim.js
@ -33,6 +33,15 @@ class NvidiaNimProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/ollama.js
+++ b/server/utils/agents/aibitat/providers/ollama.js
@ -1,6 +1,7 @@
 const Provider = require("./ai-provider.js");
 const InheritMultiple = require("./helpers/classes.js");
 const UnTooled = require("./helpers/untooled.js");
+const { formatFunctionsToTools } = require("./helpers/tooled.js");
 const { OllamaAILLM } = require("../../../AiProviders/ollama");
 const { Ollama } = require("ollama");
 const { v4 } = require("uuid");
@ -8,6 +9,8 @@ const { safeJsonParse } = require("../../../http");

 /**
 * The agent provider for the Ollama provider.
+ * Supports true OpenAI-compatible tool calling when the model supports it,
+ * falling back to the UnTooled prompt-based approach otherwise.
 */
 class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
  model;
@ -19,16 +22,17 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
    } = config;

    super();
-    const headers = process.env.OLLAMA_AUTH_TOKEN
-      ? { Authorization: `Bearer ${process.env.OLLAMA_AUTH_TOKEN}` }
-      : {};
+    const authToken = process.env.OLLAMA_AUTH_TOKEN;
+    const basePath = process.env.OLLAMA_BASE_PATH;
+    const headers = authToken ? { Authorization: `Bearer ${authToken}` } : {};
    this._client = new Ollama({
-      host: process.env.OLLAMA_BASE_PATH,
+      host: basePath,
      headers: headers,
      fetch: this.#applyFetch(),
    });
    this.model = model;
    this.verbose = true;
+    this._supportsToolCalling = null;
  }

  get client() {
@ -39,6 +43,19 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  async supportsNativeToolCalling() {
+    if (this._supportsToolCalling !== null) return this._supportsToolCalling;
+    const ollama = new OllamaAILLM(null, this.model);
+    const capabilities = await ollama.getModelCapabilities();
+    this._supportsToolCalling = capabilities.tools === true;
+    return this._supportsToolCalling;
+  }
+
  get queryOptions() {
    this.providerLog(
      `${this.model} is using a max context window of ${OllamaAILLM.promptWindowLimit(this.model)}/${OllamaAILLM.maxContextWindow(this.model)} tokens.`
@ -74,6 +91,49 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
    });
  }

+  /**
+   * Convert aibitat's internal message history (which uses role:"function" with
+   * originalFunctionCall metadata) into the Ollama tool-calling message format
+   * (assistant tool_calls + role:"tool" result pairs).
+   * @param {Array} messages
+   * @returns {Array}
+   */
+  #formatMessagesForOllamaTools(messages) {
+    const formatted = [];
+    for (const message of messages) {
+      if (message.role === "function") {
+        const funcName =
+          message.originalFunctionCall?.name || message.name || "unknown";
+        const funcArgs = message.originalFunctionCall?.arguments || {};
+        formatted.push({
+          role: "assistant",
+          content: "",
+          tool_calls: [
+            {
+              function: {
+                name: funcName,
+                arguments:
+                  typeof funcArgs === "string"
+                    ? safeJsonParse(funcArgs, {})
+                    : funcArgs,
+              },
+            },
+          ],
+        });
+        formatted.push({
+          role: "tool",
+          content:
+            typeof message.content === "string"
+              ? message.content
+              : JSON.stringify(message.content),
+        });
+      } else {
+        formatted.push(message);
+      }
+    }
+    return formatted;
+  }
+
  async streamingFunctionCall(
    messages,
    functions,
@ -168,9 +228,9 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
  }

  /**
-   * Stream a chat completion from the LLM with tool calling
-   * This is overriding the inherited `stream` method since Ollamas
-   * SDK has different response structures to other OpenAI.
+   * Stream a chat completion with tool calling support.
+   * Uses native tool calling when supported, otherwise falls back to the
+   * Ollama SDK + UnTooled prompt-based approach.
   *
   * @param messages A list of messages to send to the API.
   * @param functions
@ -178,8 +238,74 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
   * @returns The completion.
   */
  async stream(messages, functions = [], eventHandler = null) {
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (useNative) {
+      this.providerLog(
+        "OllamaProvider.stream (tooled) - will process this chat completion."
+      );
+      await OllamaAILLM.cacheContextWindows();
+      const msgUUID = v4();
+      const formattedMessages = this.#formatMessagesForOllamaTools(messages);
+      const tools = formatFunctionsToTools(functions);
+
+      const stream = await this.client.chat({
+        model: this.model,
+        messages: formattedMessages,
+        tools,
+        stream: true,
+        options: this.queryOptions,
+      });
+
+      let textResponse = "";
+      let toolCalls = null;
+
+      for await (const chunk of stream) {
+        if (!chunk?.message) continue;
+
+        if (chunk.message.content) {
+          textResponse += chunk.message.content;
+          eventHandler?.("reportStreamEvent", {
+            type: "textResponseChunk",
+            uuid: msgUUID,
+            content: chunk.message.content,
+          });
+        }
+
+        if (chunk.message.tool_calls?.length > 0) {
+          toolCalls = chunk.message.tool_calls;
+          eventHandler?.("reportStreamEvent", {
+            uuid: `${msgUUID}:tool_call_invocation`,
+            type: "toolCallInvocation",
+            content: `Tool Call: ${toolCalls[0].function.name}(${JSON.stringify(toolCalls[0].function.arguments)})`,
+          });
+        }
+      }
+
+      if (toolCalls && toolCalls.length > 0) {
+        const toolCall = toolCalls[0];
+        const args =
+          typeof toolCall.function.arguments === "string"
+            ? safeJsonParse(toolCall.function.arguments, {})
+            : toolCall.function.arguments || {};
+
+        return {
+          textResponse,
+          functionCall: {
+            id: `ollama_${v4()}`,
+            name: toolCall.function.name,
+            arguments: args,
+          },
+        };
+      }
+
+      return { textResponse, functionCall: null };
+    }
+
+    // Fallback: UnTooled prompt-based approach via the native Ollama SDK
    this.providerLog(
-      "OllamaProvider.complete - will process this chat completion."
+      "OllamaProvider.stream - will process this chat completion."
    );
    try {
      let completion = { content: "" };
@ -281,9 +407,6 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
        }
      }

-      // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
-      // from calling the exact same function over and over in a loop within a single chat exchange
-      // _but_ we should enable it to call previously used tools in a new chat interaction.
      this.deduplicator.reset("runs");
      return {
        textResponse: completion.content,
@ -295,13 +418,54 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
  }

  /**
-   * Create a completion based on the received messages.
+   * Create a non-streaming completion with tool calling support.
+   * Uses native tool calling when supported, otherwise falls back to UnTooled.
   *
   * @param messages A list of messages to send to the API.
   * @param functions
   * @returns The completion.
   */
  async complete(messages, functions = []) {
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (useNative) {
+      await OllamaAILLM.cacheContextWindows();
+      const formattedMessages = this.#formatMessagesForOllamaTools(messages);
+      const tools = formatFunctionsToTools(functions);
+
+      const response = await this.client.chat({
+        model: this.model,
+        messages: formattedMessages,
+        tools,
+        options: this.queryOptions,
+      });
+
+      if (response.message?.tool_calls?.length > 0) {
+        const toolCall = response.message.tool_calls[0];
+        const args =
+          typeof toolCall.function.arguments === "string"
+            ? safeJsonParse(toolCall.function.arguments, {})
+            : toolCall.function.arguments || {};
+
+        return {
+          textResponse: null,
+          functionCall: {
+            id: `ollama_${v4()}`,
+            name: toolCall.function.name,
+            arguments: args,
+          },
+          cost: 0,
+        };
+      }
+
+      return {
+        textResponse: response.message?.content || null,
+        cost: 0,
+      };
+    }
+
+    // Fallback: UnTooled prompt-based approach via the native Ollama SDK
    this.providerLog(
      "OllamaProvider.complete - will process this chat completion."
    );
@ -341,9 +505,6 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {
        completion.content = textResponse;
      }

-      // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
-      // from calling the exact same function over and over in a loop within a single chat exchange
-      // _but_ we should enable it to call previously used tools in a new chat interaction.
      this.deduplicator.reset("runs");
      return {
        textResponse: completion.content,
@ -356,10 +517,9 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) {

  /**
   * Get the cost of the completion.
-   *
+   * Stubbed since Ollama has no cost basis.
   * @param _usage The completion to get the cost for.
   * @returns The cost of the completion.
-   * Stubbed since LMStudio has no cost basis.
   */
  getCost(_usage) {
    return 0;
--- a/server/utils/agents/aibitat/providers/openrouter.js
+++ b/server/utils/agents/aibitat/providers/openrouter.js
@ -2,9 +2,13 @@ const OpenAI = require("openai");
 const Provider = require("./ai-provider.js");
 const InheritMultiple = require("./helpers/classes.js");
 const UnTooled = require("./helpers/untooled.js");
+const { tooledStream, tooledComplete } = require("./helpers/tooled.js");
+const { RetryError } = require("../error.js");

 /**
 * The agent provider for the OpenRouter provider.
+ * Supports true OpenAI-compatible tool calling when enabled via ENV,
+ * falling back to the UnTooled prompt-based approach otherwise.
 * @extends {Provider}
 * @extends {UnTooled}
 */
@ -27,6 +31,7 @@ class OpenRouterProvider extends InheritMultiple([Provider, UnTooled]) {
    this._client = client;
    this.model = model;
    this.verbose = true;
+    this._supportsToolCalling = null;
  }

  get client() {
@ -37,6 +42,29 @@ class OpenRouterProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * - Since OpenRouter models vary in tool calling support, we check the ENV.
+   * - If the ENV is not set, we default to false.
+   * @returns {boolean}
+   */
+  supportsNativeToolCalling() {
+    if (this._supportsToolCalling !== null) return this._supportsToolCalling;
+    const supportsToolCalling =
+      process.env.PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING?.includes("openrouter");
+
+    if (supportsToolCalling)
+      this.providerLog(
+        "OpenRouter supports native tool calling is ENABLED via ENV."
+      );
+    else
+      this.providerLog(
+        "OpenRouter supports native tool calling is DISABLED via ENV. Will use UnTooled instead."
+      );
+    this._supportsToolCalling = supportsToolCalling;
+    return supportsToolCalling;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
@ -65,31 +93,98 @@ class OpenRouterProvider extends InheritMultiple([Provider, UnTooled]) {
    });
  }

+  /**
+   * Stream a chat completion with tool calling support.
+   * Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
+   */
  async stream(messages, functions = [], eventHandler = null) {
-    return await UnTooled.prototype.stream.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallStream.bind(this),
-      eventHandler
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.stream.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallStream.bind(this),
+        eventHandler
+      );
+    }
+
+    this.providerLog(
+      "Provider.stream (tooled) - will process this chat completion."
    );
+
+    try {
+      return await tooledStream(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        eventHandler
+      );
+    } catch (error) {
+      console.error(error.message, error);
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

+  /**
+   * Create a non-streaming completion with tool calling support.
+   * Uses native tool calling when enabled via ENV, otherwise falls back to UnTooled.
+   */
  async complete(messages, functions = []) {
-    return await UnTooled.prototype.complete.call(
-      this,
-      messages,
-      functions,
-      this.#handleFunctionCallChat.bind(this)
-    );
+    const useNative =
+      functions.length > 0 && (await this.supportsNativeToolCalling());
+
+    if (!useNative) {
+      return await UnTooled.prototype.complete.call(
+        this,
+        messages,
+        functions,
+        this.#handleFunctionCallChat.bind(this)
+      );
+    }
+
+    try {
+      const result = await tooledComplete(
+        this.client,
+        this.model,
+        messages,
+        functions,
+        this.getCost.bind(this)
+      );
+
+      if (result.retryWithError) {
+        return this.complete([...messages, result.retryWithError], functions);
+      }
+
+      return result;
+    } catch (error) {
+      if (error instanceof OpenAI.AuthenticationError) throw error;
+      if (
+        error instanceof OpenAI.RateLimitError ||
+        error instanceof OpenAI.InternalServerError ||
+        error instanceof OpenAI.APIError
+      ) {
+        throw new RetryError(error.message);
+      }
+      throw error;
+    }
  }

  /**
   * Get the cost of the completion.
-   *
   * @param _usage The completion to get the cost for.
   * @returns The cost of the completion.
-   * Stubbed since OpenRouter has no cost basis.
   */
  getCost(_usage) {
    return 0;
--- a/server/utils/agents/aibitat/providers/perplexity.js
+++ b/server/utils/agents/aibitat/providers/perplexity.js
@ -31,6 +31,15 @@ class PerplexityProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/ppio.js
+++ b/server/utils/agents/aibitat/providers/ppio.js
@ -35,6 +35,15 @@ class PPIOProvider extends InheritMultiple([Provider, UnTooled]) {
    return false;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/privatemode.js
+++ b/server/utils/agents/aibitat/providers/privatemode.js
@ -36,6 +36,15 @@ class PrivatemodelProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/sambanova.js
+++ b/server/utils/agents/aibitat/providers/sambanova.js
@ -35,6 +35,15 @@ class SambaNovaProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/textgenwebui.js
+++ b/server/utils/agents/aibitat/providers/textgenwebui.js
@ -30,6 +30,15 @@ class TextWebGenUiProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/togetherai.js
+++ b/server/utils/agents/aibitat/providers/togetherai.js
@ -31,6 +31,15 @@ class TogetherAIProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/xai.js
+++ b/server/utils/agents/aibitat/providers/xai.js
@ -31,6 +31,15 @@ class XAIProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/aibitat/providers/zai.js
+++ b/server/utils/agents/aibitat/providers/zai.js
@ -35,6 +35,15 @@ class ZAIProvider extends InheritMultiple([Provider, UnTooled]) {
    return true;
  }

+  /**
+   * Whether this provider supports native OpenAI-compatible tool calling.
+   * Override in subclass and return true to use native tool calling instead of UnTooled.
+   * @returns {boolean|Promise<boolean>}
+   */
+  supportsNativeToolCalling() {
+    return false;
+  }
+
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
--- a/server/utils/agents/ephemeral.js
+++ b/server/utils/agents/ephemeral.js
@ -227,6 +227,8 @@ class EphemeralAgentHandler extends AgentHandler {
      }

      // Load flow plugin. This is marked by `@@flow_` in the array of functions to load.
+      // Replace the @@flow_ placeholder in the agent's function list with the actual
+      // tool name so the function lookup in reply() can find it.
      if (name.startsWith("@@flow_")) {
        const uuid = name.replace("@@flow_", "");
        const plugin = AgentFlows.loadFlowPlugin(uuid, this.aibitat);
@ -237,6 +239,11 @@ class EphemeralAgentHandler extends AgentHandler {
          continue;
        }

+        this.aibitat.agents.get("@agent").functions = this.aibitat.agents
+          .get("@agent")
+          .functions.filter((f) => f !== name);
+        this.aibitat.agents.get("@agent").functions.push(plugin.name);
+
        this.aibitat.use(plugin.plugin());
        this.log(
          `Attached flow ${plugin.name} (${plugin.flowName}) plugin to Agent cluster`
--- a/server/utils/agents/index.js
+++ b/server/utils/agents/index.js
@ -458,6 +458,8 @@ class AgentHandler {
      }

      // Load flow plugin. This is marked by `@@flow_` in the array of functions to load.
+      // Replace the @@flow_ placeholder in the agent's function list with the actual
+      // tool name so the function lookup in reply() can find it.
      if (name.startsWith("@@flow_")) {
        const uuid = name.replace("@@flow_", "");
        const plugin = AgentFlows.loadFlowPlugin(uuid, this.aibitat);
@ -468,6 +470,11 @@ class AgentHandler {
          continue;
        }

+        this.aibitat.agents.get("@agent").functions = this.aibitat.agents
+          .get("@agent")
+          .functions.filter((f) => f !== name);
+        this.aibitat.agents.get("@agent").functions.push(plugin.name);
+
        this.aibitat.use(plugin.plugin());
        this.log(
          `Attached flow ${plugin.name} (${plugin.flowName}) plugin to Agent cluster`
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -1303,6 +1303,9 @@ function dumpENV() {

    // Allow disabling of streaming for AWS Bedrock
    "AWS_BEDROCK_STREAMING_DISABLED",
+
+    // Allow native tool calling for specific providers.
+    "PROVIDER_SUPPORTS_NATIVE_TOOL_CALLING",
  ];

  // Simple sanitization of each value to prevent ENV injection via newline or quote escaping.