Improved DMR support (#4863)

* Improve DMR support - Autodetect models installed - Grab all models from hub.docker to show available - UI to handle render,search, install, and management of models - Support functionality for chat, stream, and agentic calls * forgot files * fix loader circle being too large fix tooltip width command adjust location of docker installer open for web platform * adjust imports
2026-01-14 15:55:26 -08:00 · 2026-01-14 15:55:26 -08:00 · ff7cb17e34
commit ff7cb17e34
parent 34a96ad30f
37 changed files with 1463 additions and 10 deletions
--- a/.github/workflows/dev-build.yaml
+++ b/.github/workflows/dev-build.yaml
@ -6,7 +6,7 @@ concurrency:
 on:
  push:
-    branches: ['4822-feat-remove-workspace-creation-onboarding-page'] # put your current branch to create a build. Core team only.
+    branches: ['4391-dmr-support'] # put your current branch to create a build. Core team only.
    paths-ignore:
      - '**.md'
      - 'cloud-deployments/*'
--- a/README.md
+++ b/README.md
@ -106,6 +106,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace
 - [Moonshot AI](https://www.moonshot.ai/)
 - [Microsoft Foundry Local](https://github.com/microsoft/Foundry-Local)
 - [CometAPI (chat models)](https://api.cometapi.com/)
 - [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
 **Embedder models:**
--- a/docker/.env.example
+++ b/docker/.env.example
@ -162,6 +162,11 @@ GID='1000'
 # GITEE_AI_MODEL_PREF=
 # GITEE_AI_MODEL_TOKEN_LIMIT=
 # LLM_PROVIDER='docker-model-runner'
 # DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434'
 # DOCKER_MODEL_RUNNER_LLM_MODEL_PREF='phi-3.5-mini'
 # DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT=4096
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
--- a/frontend/src/components/LLMSelection/DPAISOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/DPAISOptions/index.jsx
@ -33,7 +33,7 @@ export default function DellProAIStudioOptions({
            />
            <div className="flex flex-col w-60">
              <label className="text-white text-sm font-semibold block mb-2">
-                Token context window
+                Model context window
              </label>
              <input
                type="number"
--- a/frontend/src/components/LLMSelection/DockerModelRunnerOptions/ModelTable/index.jsx
+++ b/frontend/src/components/LLMSelection/DockerModelRunnerOptions/ModelTable/index.jsx
@ -0,0 +1,314 @@
 import { useRef, useState, useEffect } from "react";
 import {
  CaretDown,
  CaretRight,
  Cpu,
  DownloadSimple,
  CircleNotch,
  CheckCircle,
  Dot,
  Circle,
  DotsThreeCircleVertical,
  DotsThreeVertical,
  CloudArrowDown,
 } from "@phosphor-icons/react";
 import pluralize from "pluralize";
 import { titleCase } from "text-case";
 import { humanFileSize } from "@/utils/numbers";
 /**
 * @typedef {Object} ModelDefinition
 * @property {string} id - The ID of the model.
 * @property {'CPU' | 'GPU' | 'NPU'} deviceType - The device type of the model.
 * @property {number} modelSize - The size of the model in megabytes.
 * @property {boolean} downloaded - Whether the model is downloaded.
 */
 /**
 * @param {object} props - The props of the component.
 * @param {string} props.alias - The alias of the model.
 * @param {Array<ModelDefinition>} props.models - The models to display.
 * @param {(model: string, progressCallback: (percentage: number) => void) => void} props.downloadModel - The function to download the model.
 * @param {(model: string) => void} props.uninstallModel - The function to uninstall the model.
 * @param {(model: string) => void} props.setActiveModel - The function to set the active model.
 * @param {string} props.selectedModelId - The ID of the selected model.
 * @param {object} props.ui - The UI configuration.
 * @param {boolean} props.ui.showRuntime - Whether to show the runtime.
 * @returns {React.ReactNode}
 */
 export default function ModelTable({
  alias = "",
  models = [],
  downloadModel = null,
  uninstallModel = null,
  setActiveModel = () => {},
  selectedModelId = "",
  ui = {
    showRuntime: true,
  },
 }) {
  const [showAll, setShowAll] = useState(
    models.some((model) => model.downloaded)
  );
  const totalModels = models.length;
  return (
    <div className="flex flex-col w-full border-b border-theme-modal-border py-[18px]">
      <button
        type="button"
        onClick={() => setShowAll(!showAll)}
        className="border-none text-theme-text-secondary text-sm font-medium hover:underline flex items-center gap-x-[8px]"
      >
        {showAll ? (
          <CaretDown
            size={16}
            weight="bold"
            className="text-theme-text-secondary"
          />
        ) : (
          <CaretRight
            size={16}
            weight="bold"
            className="text-theme-text-secondary"
          />
        )}
        <h3 className="flex items-center gap-x-1 text-theme-text-primary text-base font-bold">
          {titleCase(alias)}
          <span className="text-theme-text-secondary font-normal text-sm">
            ({totalModels} {pluralize("Model", totalModels)})
          </span>
        </h3>
      </button>
      <div hidden={!showAll} className="mt-[16px]">
        <div className="w-full flex flex-col gap-y-[8px]">
          {models.map((model) => (
            <ModelRow
              key={model.id}
              model={model}
              downloadModel={downloadModel}
              uninstallModel={uninstallModel}
              setActiveModel={setActiveModel}
              selectedModelId={selectedModelId}
              ui={ui}
            />
          ))}
        </div>
      </div>
    </div>
  );
 }
 /**
 * @param {{deviceType: ModelDefinition["deviceType"]}} deviceType
 * @returns {React.ReactNode}
 */
 function DeviceTypeTag({ deviceType }) {
  const Wrapper = ({ text, bgClass, textClass }) => {
    return (
      <div
        className={
          bgClass + " px-1.5 py-1 rounded-full flex items-center gap-x-1 w-fit"
        }
      >
        <Cpu size={16} weight="bold" className={textClass} />
        <p className={textClass + " text-xs"}>{text}</p>
      </div>
    );
  };
  switch (deviceType?.toLowerCase()) {
    case "cpu":
      return (
        <Wrapper
          text="CPU"
          bgClass="bg-blue-600/20"
          textClass="text-blue-300"
        />
      );
    case "gpu":
      return (
        <Wrapper
          text="GPU"
          bgClass="bg-green-600/20"
          textClass="text-green-300"
        />
      );
    case "npu":
      return (
        <Wrapper
          text="NPU"
          bgClass="bg-indigo-600/20"
          textClass="text-indigo-300"
        />
      );
    default:
      return (
        <Wrapper
          text="CPU"
          bgClass="bg-blue-600/20"
          textClass="text-blue-300"
        />
      );
  }
 }
 /**
 * @param {object} props - The props of the component.
 * @param {ModelDefinition} props.model - The model to display.
 * @param {(model: string, progressCallback: (percentage: number) => void) => Promise<void>} props.downloadModel - The function to download the model.
 * @param {(model: string) => Promise<void>} props.uninstallModel - The function to uninstall the model.
 * @param {(model: string) => void} props.setActiveModel - The function to set the active model.
 * @param {string} props.selectedModelId - The ID of the selected model.
 * @param {object} props.ui - The UI configuration.
 * @param {boolean} props.ui.showRuntime - Whether to show the runtime.
 * @returns {React.ReactNode}
 */
 function ModelRow({
  model,
  downloadModel = null,
  uninstallModel = null,
  setActiveModel,
  selectedModelId,
  ui = {
    showRuntime: true,
  },
 }) {
  const modelRowRef = useRef(null);
  const [showOptions, setShowOptions] = useState(false);
  const [processing, setProcessing] = useState(false);
  const [_downloadPercentage, setDownloadPercentage] = useState(0);
  const fileSize =
    typeof model.size === "number"
      ? humanFileSize(model.size * 1e6, true, 2)
      : (model.size ?? "Unknown size");
  const [isActiveModel, setIsActiveModel] = useState(
    selectedModelId === model.id
  );
  async function handleSetActiveModel() {
    setDownloadPercentage(0);
    if (model.downloaded) setActiveModel(model.id);
    else {
      try {
        if (!downloadModel) return;
        setProcessing(true);
        await downloadModel(model.id, fileSize, (percentage) =>
          setDownloadPercentage(percentage)
        );
      } catch {
      } finally {
        setProcessing(false);
      }
    }
  }
  async function handleUninstallModel() {
    if (!uninstallModel) return;
    try {
      setProcessing(true);
      await uninstallModel(model.id);
    } catch {
    } finally {
      setProcessing(false);
    }
  }
  useEffect(() => {
    if (selectedModelId === model.id) {
      setIsActiveModel(true);
      modelRowRef.current.classList.add("!bg-gray-200/10");
      setTimeout(
        () => modelRowRef.current.classList.remove("!bg-gray-200/10"),
        800
      );
    } else {
      setIsActiveModel(false);
    }
  }, [selectedModelId]);
  return (
    <div
      ref={modelRowRef}
      className="w-full grid grid-cols-[1fr_auto_1fr] items-center gap-x-4 transition-all duration-300 rounded-lg"
    >
      <button
        type="button"
        className="border-none flex items-center gap-x-[8px] whitespace-nowrap py-[8px]"
        disabled={processing}
        onClick={handleSetActiveModel}
      >
        {ui.showRuntime && <DeviceTypeTag deviceType={model.deviceType} />}
        <p className="text-theme-text-primary text-base px-2">{model.name}</p>
        <p className="text-theme-text-secondary opacity-70 text-base">
          {fileSize}
        </p>
      </button>
      <div className="justify-self-start">
        {isActiveModel && (
          <div className="flex items-center justify-center gap-x-[10px] whitespace-nowrap">
            <Circle size={8} weight="fill" className="text-green-500" />
            <p className="text-theme-text-primary text-sm">Active</p>
          </div>
        )}
        {!isActiveModel && model.downloaded && !uninstallModel && (
          <p className="text-theme-text-secondary text-sm italic whitespace-nowrap">
            Installed
          </p>
        )}
        {!model.downloaded && (
          <p className="text-theme-text-secondary text-sm italic whitespace-nowrap">
            Not Installed
          </p>
        )}
      </div>
      <div className="relative justify-self-end">
        {uninstallModel && model.downloaded ? (
          <>
            <button
              type="button"
              className="border-none hover:bg-white/20 rounded-lg p-1"
              onClick={() => setShowOptions(!showOptions)}
            >
              <DotsThreeVertical
                size={22}
                weight="bold"
                className="text-theme-text-primary cursor-pointer"
              />
            </button>
            {showOptions && (
              <div className="absolute top-[20px] right-[20px] bg-theme-action-menu-bg border border-theme-modal-border rounded-lg py-2 px-4 shadow-lg">
                <button
                  type="button"
                  className="border-none font-medium group"
                  onClick={handleUninstallModel}
                >
                  <p className="text-sm text-theme-text-primary group-hover:underline group-hover:text-theme-text-secondary">
                    Uninstall
                  </p>
                </button>
              </div>
            )}
          </>
        ) : null}
        {!model.downloaded ? (
          <button
            type="button"
            className="border-none hover:bg-white/20 rounded-lg p-1 flex items-center gap-x-1"
            onClick={handleSetActiveModel}
          >
            <CloudArrowDown
              size={16}
              weight="bold"
              className="text-blue-300 cursor-pointer"
            />
            <p className="text-sm text-blue-300">Install</p>
          </button>
        ) : null}
      </div>
    </div>
  );
 }
--- a/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx
@ -0,0 +1,426 @@
 import { useState, useEffect } from "react";
 import System from "@/models/system";
 import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
 import {
  ArrowClockwise,
  CircleNotch,
  MagnifyingGlass,
  Info,
 } from "@phosphor-icons/react";
 import strDistance from "js-levenshtein";
 import { LLM_PREFERENCE_CHANGED_EVENT } from "@/pages/GeneralSettings/LLMPreference";
 import { DOCKER_MODEL_RUNNER_COMMON_URLS } from "@/utils/constants";
 import { Tooltip } from "react-tooltip";
 import { Link } from "react-router-dom";
 import ModelTable from "./ModelTable";
 import * as Skeleton from "react-loading-skeleton";
 import "react-loading-skeleton/dist/skeleton.css";
 export default function DockerModelRunnerOptions({ settings }) {
  const {
    autoDetecting: loading,
    basePath,
    basePathValue,
    handleAutoDetectClick,
  } = useProviderEndpointAutoDiscovery({
    provider: "docker-model-runner",
    initialBasePath: settings?.DockerModelRunnerBasePath,
    ENDPOINTS: DOCKER_MODEL_RUNNER_COMMON_URLS,
  });
  const [maxTokens, setMaxTokens] = useState(
    settings?.DockerModelRunnerModelTokenLimit || 4096
  );
  return (
    <div className="w-full flex flex-col gap-y-7">
      <div className="flex gap-[36px] mt-1.5 flex-wrap">
        <div className="flex flex-col w-60">
          <div className="flex items-center gap-1 mb-3">
            <div className="flex justify-between items-center gap-x-2">
              <label className="text-white text-sm font-semibold">
                Base URL
              </label>
              {loading ? (
                <CircleNotch className="w-4 h-4 text-theme-text-secondary animate-spin" />
              ) : (
                <>
                  {!basePathValue.value && (
                    <button
                      onClick={handleAutoDetectClick}
                      className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
                    >
                      Auto-Detect
                    </button>
                  )}
                </>
              )}
            </div>
            <Tooltip
              id="docker-model-runner-base-url"
              place="top"
              delayShow={300}
              delayHide={800}
              clickable={true}
              className="tooltip !text-xs !opacity-100 z-99"
              style={{
                maxWidth: "250px",
                whiteSpace: "normal",
                wordWrap: "break-word",
              }}
            >
              Enter the URL where the Docker Model Runner is running.
              <br />
              <br />
              You <b>must</b> have enabled the Docker Model Runner TCP support
              for this to work.
              <br />
              <br />
              <Link
                to="https://docs.docker.com/ai/model-runner/get-started/#docker-desktop"
                target="_blank"
                className="text-blue-500 hover:underline"
              >
                Learn more &rarr;
              </Link>
            </Tooltip>
            <div
              className="text-theme-text-secondary cursor-pointer hover:bg-theme-bg-primary flex items-center justify-center rounded-full"
              data-tooltip-id="docker-model-runner-base-url"
              data-tooltip-place="top"
              data-tooltip-delay-hide={800}
            >
              <Info size={18} className="text-theme-text-secondary" />
            </div>
          </div>
          <input
            type="url"
            name="DockerModelRunnerBasePath"
            className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
            placeholder="http://localhost:12434/engines/llama.cpp/v1"
            value={basePathValue.value}
            required={true}
            autoComplete="off"
            spellCheck={false}
            onChange={basePath.onChange}
            onBlur={basePath.onBlur}
          />
        </div>
        <div className="flex flex-col w-60">
          <div className="flex items-center gap-1 mb-3">
            <label className="text-white text-sm font-semibold block">
              Model context window
            </label>
            <Tooltip
              id="docker-model-runner-model-context-window"
              place="top"
              delayShow={300}
              delayHide={800}
              clickable={true}
              className="tooltip !text-xs !opacity-100 z-99"
              style={{
                maxWidth: "350px",
                whiteSpace: "normal",
                wordWrap: "break-word",
              }}
            >
              The maximum number of tokens that can be used for a model context
              window.
              <br />
              <br />
              To set the context window limit for a model, you can use the{" "}
              <code>docker run</code> command with the{" "}
              <code>--context-window</code> parameter.
              <br />
              <br />
              <code>
                docker model configure --context-size 8192 ai/qwen3:latest
              </code>
              <br />
              <br />
              <Link
                to="https://docs.docker.com/ai/model-runner/#context-size"
                target="_blank"
                className="text-blue-500 hover:underline"
              >
                Learn more &rarr;
              </Link>
            </Tooltip>
            <div
              className="text-theme-text-secondary cursor-pointer hover:bg-theme-bg-primary flex items-center justify-center rounded-full"
              data-tooltip-id="docker-model-runner-model-context-window"
              data-tooltip-place="top"
              data-tooltip-delay-hide={800}
            >
              <Info size={18} className="text-theme-text-secondary" />
            </div>
          </div>
          <input
            type="number"
            name="DockerModelRunnerModelTokenLimit"
            className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
            placeholder="4096"
            min={1}
            value={maxTokens}
            onChange={(e) => setMaxTokens(Number(e.target.value))}
            onScroll={(e) => e.target.blur()}
            required={true}
            autoComplete="off"
          />
        </div>
        <DockerModelRunnerModelSelection
          settings={settings}
          basePath={basePathValue.value}
        />
      </div>
    </div>
  );
 }
 function DockerModelRunnerModelSelection({ settings, basePath = null }) {
  const [selectedModelId, setSelectedModelId] = useState(
    settings?.DockerModelRunnerModelPref
  );
  const [customModels, setCustomModels] = useState([]);
  const [filteredModels, setFilteredModels] = useState([]);
  const [loading, setLoading] = useState(true);
  const [searchQuery, setSearchQuery] = useState("");
  async function fetchModels() {
    if (!basePath) {
      setCustomModels([]);
      setFilteredModels([]);
      setLoading(false);
      setSearchQuery("");
      return;
    }
    setLoading(true);
    const { models } = await System.customModels(
      "docker-model-runner",
      null,
      basePath
    );
    setCustomModels(models || []);
    setFilteredModels(models || []);
    setSearchQuery("");
    setLoading(false);
  }
  useEffect(() => {
    fetchModels();
  }, [basePath]);
  useEffect(() => {
    if (!searchQuery || !customModels.length) {
      setFilteredModels(customModels || []);
      return;
    }
    const normalizedSearchQuery = searchQuery.toLowerCase().trim();
    const filteredModels = new Map();
    customModels.forEach((model) => {
      const modelNameNormalized = model.name.toLowerCase();
      const modelOrganizationNormalized = model.organization.toLowerCase();
      if (modelNameNormalized.startsWith(normalizedSearchQuery))
        filteredModels.set(model.id, model);
      if (modelOrganizationNormalized.startsWith(normalizedSearchQuery))
        filteredModels.set(model.id, model);
      if (strDistance(modelNameNormalized, normalizedSearchQuery) <= 2)
        filteredModels.set(model.id, model);
      if (strDistance(modelOrganizationNormalized, normalizedSearchQuery) <= 2)
        filteredModels.set(model.id, model);
    });
    setFilteredModels(Array.from(filteredModels.values()));
  }, [searchQuery]);
  function downloadModel(modelId, _fileSize, progressCallback) {
    const [name, tag] = modelId.split(":");
    // Open the model in the Docker Hub (via browser since they may not be installed locally)
    window.open(`https://hub.docker.com/layers/${name}/${tag}`, "_blank");
    progressCallback(100);
  }
  function groupModelsByAlias(models) {
    const mapping = new Map();
    mapping.set("installed", new Map());
    mapping.set("not installed", new Map());
    const groupedModels = models.reduce((acc, model) => {
      acc[model.organization] = acc[model.organization] || [];
      acc[model.organization].push(model);
      return acc;
    }, {});
    Object.entries(groupedModels).forEach(([organization, models]) => {
      const hasInstalled = models.some((model) => model.downloaded);
      if (hasInstalled) {
        const installedModels = models.filter((model) => model.downloaded);
        mapping
          .get("installed")
          .set("Downloaded Models", [
            ...(mapping.get("installed").get("Downloaded Models") || []),
            ...installedModels,
          ]);
      }
      const tags = models.map((model) => ({
        ...model,
        name: model.name.split(":")[1],
      }));
      mapping.get("not installed").set(organization, tags);
    });
    const orderedMap = new Map();
    mapping
      .get("installed")
      .entries()
      .forEach(([organization, models]) =>
        orderedMap.set(organization, models)
      );
    mapping
      .get("not installed")
      .entries()
      .forEach(([organization, models]) =>
        orderedMap.set(organization, models)
      );
    return Object.fromEntries(orderedMap);
  }
  function handleSetActiveModel(modelId) {
    if (modelId === selectedModelId) return;
    setSelectedModelId(modelId);
    window.dispatchEvent(new Event(LLM_PREFERENCE_CHANGED_EVENT));
  }
  const groupedModels = groupModelsByAlias(filteredModels);
  return (
    <Layout
      fetchModels={fetchModels}
      searchQuery={searchQuery}
      setSearchQuery={setSearchQuery}
      loading={loading}
    >
      <input
        type="hidden"
        name="DockerModelRunnerModelPref"
        id="DockerModelRunnerModelPref"
        value={selectedModelId}
      />
      {loading ? (
        <LoadingSkeleton />
      ) : filteredModels.length === 0 ? (
        <div className="flex flex-col w-full gap-y-2 mt-4">
          <p className="text-theme-text-secondary text-sm">No models found!</p>
        </div>
      ) : (
        Object.entries(groupedModels).map(([alias, models]) => (
          <ModelTable
            key={alias}
            alias={alias}
            models={models}
            setActiveModel={handleSetActiveModel}
            downloadModel={downloadModel}
            selectedModelId={selectedModelId}
            ui={{
              showRuntime: false,
            }}
          />
        ))
      )}
    </Layout>
  );
 }
 function Layout({
  children,
  fetchModels = null,
  searchQuery = "",
  setSearchQuery = () => {},
  loading = false,
 }) {
  const [isRefreshing, setIsRefreshing] = useState(false);
  async function refreshModels() {
    setIsRefreshing(true);
    try {
      await fetchModels?.();
    } catch {
    } finally {
      setIsRefreshing(false);
    }
  }
  return (
    <div className="flex flex-col w-full">
      <div className="flex gap-x-2 items-center pb-[8px]">
        <label className="text-theme-text-primary text-base font-semibold">
          Available Models
        </label>
      </div>
      <div className="flex w-full items-center gap-x-[16px]">
        <div className="relative flex-1 max-w-[640px]">
          <MagnifyingGlass
            size={14}
            weight="bold"
            color="var(--theme-text-primary)"
            className="absolute left-[9px] top-[10px] text-theme-settings-input-placeholder peer-focus:invisible"
          />
          <input
            type="search"
            placeholder="Search models"
            value={searchQuery}
            disabled={loading}
            className="min-h-[32px] border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5 pl-[30px] py-2 search-input disabled:opacity-50 disabled:cursor-not-allowed"
            onChange={(e) => {
              e.preventDefault();
              e.stopPropagation();
              setSearchQuery(e.target.value);
            }}
          />
        </div>
        {!!fetchModels && (
          <button
            type="button"
            onClick={refreshModels}
            disabled={isRefreshing || loading}
            className="border-none text-theme-text-secondary text-sm font-medium hover:underline flex items-center gap-x-1 disabled:opacity-50 disabled:cursor-not-allowed"
          >
            {isRefreshing ? (
              <CircleNotch className="w-4 h-4 text-theme-text-secondary animate-spin" />
            ) : (
              <ArrowClockwise
                weight="bold"
                className="w-4 h-4 text-theme-text-secondary"
              />
            )}
            <span
              className={`text-sm font-medium ${isRefreshing ? "hidden" : "text-theme-text-secondary"}`}
            >
              Refresh Models
            </span>
          </button>
        )}
      </div>
      {children}
    </div>
  );
 }
 function LoadingSkeleton() {
  return (
    <div className="flex flex-col w-full gap-y-4">
      <Skeleton.default
        height={100}
        width="100%"
        count={7}
        highlightColor="var(--theme-settings-input-active)"
        baseColor="var(--theme-settings-input-bg)"
        enableAnimation={true}
        containerClassName="w-fill flex gap-[8px] flex-col p-0"
      />
    </div>
  );
 }
--- a/frontend/src/components/LLMSelection/FoundryOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/FoundryOptions/index.jsx
@ -92,7 +92,7 @@ export default function FoundryOptions({ settings }) {
        </div>
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-3">
-            Token Context Window
+            Model context window
          </label>
          <input
            type="number"
--- a/frontend/src/components/LLMSelection/GenericOpenAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/GenericOpenAiOptions/index.jsx
@ -50,7 +50,7 @@ export default function GenericOpenAiOptions({ settings }) {
      <div className="flex gap-[36px] flex-wrap">
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-3">
-            Token context window
+            Model context window
          </label>
          <input
            type="number"
--- a/frontend/src/components/LLMSelection/GiteeAIOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/GiteeAIOptions/index.jsx
@ -24,7 +24,7 @@ export default function GiteeAIOptions({ settings }) {
          <GiteeAIModelSelection settings={settings} />
          <div className="flex flex-col w-60">
            <label className="text-white text-sm font-semibold block mb-2">
-              Token context window
+              Model context window
            </label>
            <input
              type="number"
--- a/frontend/src/components/LLMSelection/KoboldCPPOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/KoboldCPPOptions/index.jsx
@ -43,7 +43,7 @@ export default function KoboldCPPOptions({ settings }) {
        />
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-2">
-            Token context window
+            Model context window
          </label>
          <input
            type="number"
--- a/frontend/src/components/LLMSelection/LiteLLMOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/LiteLLMOptions/index.jsx
@ -34,7 +34,7 @@ export default function LiteLLMOptions({ settings }) {
        />
        <div className="flex flex-col w-60">
          <label className="text-white text-sm font-semibold block mb-3">
-            Token context window
+            Model context window
          </label>
          <input
            type="number"
--- a/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx
@ -51,7 +51,7 @@ export default function LocalAiOptions({ settings, showAlert = false }) {
            />
            <div className="flex flex-col w-60">
              <label className="text-white text-sm font-semibold block mb-2">
-                Token context window
+                Model context window
              </label>
              <input
                type="number"
--- a/frontend/src/components/LLMSelection/TextGenWebUIOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/TextGenWebUIOptions/index.jsx
@ -18,7 +18,7 @@ export default function TextGenWebUIOptions({ settings }) {
      </div>
      <div className="flex flex-col w-60">
        <label className="text-white text-sm font-semibold block mb-3">
-          Token context window
+          Model context window
        </label>
        <input
          type="number"
--- a/frontend/src/components/ProviderPrivacy/constants.js
+++ b/frontend/src/components/ProviderPrivacy/constants.js
@ -41,6 +41,7 @@ import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
 import CometApiLogo from "@/media/llmprovider/cometapi.png";
 import FoundryLogo from "@/media/llmprovider/foundry-local.png";
 import GiteeAILogo from "@/media/llmprovider/giteeai.png";
 import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
 const LLM_PROVIDER_PRIVACY_MAP = {
  openai: {
@ -224,6 +225,13 @@ const LLM_PROVIDER_PRIVACY_MAP = {
    policyUrl: "https://ai.gitee.com/docs/appendix/privacy",
    logo: GiteeAILogo,
  },
  "docker-model-runner": {
    name: "Docker Model Runner",
    description: [
      "Your model and chats are only accessible on the machine running Docker Model Runner.",
    ],
    logo: DockerModelRunnerLogo,
  },
 };
 const VECTOR_DB_PROVIDER_PRIVACY_MAP = {
--- a/frontend/src/hooks/useGetProvidersModels.js
+++ b/frontend/src/hooks/useGetProvidersModels.js
@ -52,6 +52,7 @@ const groupedProviders = [
  "novita",
  "openrouter",
  "ppio",
  "docker-model-runner",
 ];
 export default function useGetProviderModels(provider = null) {
  const [defaultModels, setDefaultModels] = useState([]);
--- a/frontend/src/media/llmprovider/docker-model-runner.png
+++ b/frontend/src/media/llmprovider/docker-model-runner.png
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@ -37,6 +37,7 @@ import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
 import CometApiLogo from "@/media/llmprovider/cometapi.png";
 import FoundryLogo from "@/media/llmprovider/foundry-local.png";
 import GiteeAILogo from "@/media/llmprovider/giteeai.png";
 import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
 import PreLoader from "@/components/Preloader";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
@ -71,6 +72,7 @@ import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions";
 import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions";
 import FoundryOptions from "@/components/LLMSelection/FoundryOptions";
 import GiteeAIOptions from "@/components/LLMSelection/GiteeAIOptions/index.jsx";
 import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions";
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
@ -161,6 +163,18 @@ export const AVAILABLE_LLM_PROVIDERS = [
      "Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
    requiredConfig: ["LMStudioBasePath"],
  },
  {
    name: "Docker Model Runner",
    value: "docker-model-runner",
    logo: DockerModelRunnerLogo,
    options: (settings) => <DockerModelRunnerOptions settings={settings} />,
    description: "Run LLMs using Docker Model Runner.",
    requiredConfig: [
      "DockerModelRunnerBasePath",
      "DockerModelRunnerModelPref",
      "DockerModelRunnerModelTokenLimit",
    ],
  },
  {
    name: "Local AI",
    value: "localai",
@ -371,6 +385,7 @@ export const AVAILABLE_LLM_PROVIDERS = [
  },
 ];
 export const LLM_PREFERENCE_CHANGED_EVENT = "llm-preference-changed";
 export default function GeneralLLMPreference() {
  const [saving, setSaving] = useState(false);
  const [hasChanges, setHasChanges] = useState(false);
@ -428,6 +443,21 @@ export default function GeneralLLMPreference() {
    fetchKeys();
  }, []);
  // Some more complex LLM options do not bubble up the change event, so we need to listen to the custom event
  // we can emit from the LLM options component using window.dispatchEvent(new Event(LLM_PREFERENCE_CHANGED_EVENT));
  useEffect(() => {
    function updateHasChanges() {
      setHasChanges(true);
    }
    window.addEventListener(LLM_PREFERENCE_CHANGED_EVENT, updateHasChanges);
    return () => {
      window.removeEventListener(
        LLM_PREFERENCE_CHANGED_EVENT,
        updateHasChanges
      );
    };
  }, []);
  useEffect(() => {
    const filtered = AVAILABLE_LLM_PROVIDERS.filter((llm) =>
      llm.name.toLowerCase().includes(searchQuery.toLowerCase())
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@ -31,6 +31,7 @@ import DellProAiStudioLogo from "@/media/llmprovider/dpais.png";
 import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
 import CometApiLogo from "@/media/llmprovider/cometapi.png";
 import GiteeAILogo from "@/media/llmprovider/giteeai.png";
 import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
 import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
@ -63,6 +64,7 @@ import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions";
 import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions";
 import CometApiLLMOptions from "@/components/LLMSelection/CometApiLLMOptions";
 import GiteeAiOptions from "@/components/LLMSelection/GiteeAIOptions";
 import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions";
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import System from "@/models/system";
@ -139,6 +141,13 @@ const LLMS = [
    description:
      "Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
  },
  {
    name: "Docker Model Runner",
    value: "docker-model-runner",
    logo: DockerModelRunnerLogo,
    options: (settings) => <DockerModelRunnerOptions settings={settings} />,
    description: "Run LLMs using Docker Model Runner.",
  },
  {
    name: "Local AI",
    value: "localai",
--- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
+++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
@ -37,6 +37,7 @@ const ENABLED_PROVIDERS = [
  "zai",
  "giteeai",
  "cohere",
  "docker-model-runner",
  // TODO: More agent support.
  // "huggingface"     // Can be done but already has issues with no-chat templated. Needs to be tested.
 ];
@ -46,6 +47,7 @@ const WARN_PERFORMANCE = [
  "ollama",
  "localai",
  "textgenwebui",
  "docker-model-runner",
 ];
 const LLM_DEFAULT = {
--- a/frontend/src/utils/constants.js
+++ b/frontend/src/utils/constants.js
@ -53,6 +53,14 @@ export const NVIDIA_NIM_COMMON_URLS = [
  "http://172.17.0.1:8000/v1/version",
 ];
 export const DOCKER_MODEL_RUNNER_COMMON_URLS = [
  "http://localhost:12434/engines/llama.cpp/v1",
  "http://127.0.0.1:12434/engines/llama.cpp/v1",
  "http://model-runner.docker.internal/engines/llama.cpp/v1",
  "http://host.docker.internal:12434/engines/llama.cpp/v1",
  "http://172.17.0.1:12434/engines/llama.cpp/v1",
 ];
 export function fullApiUrl() {
  if (API_BASE !== "/api") return API_BASE;
  return `${window.location.origin}/api`;
--- a/locales/README.fa-IR.md
+++ b/locales/README.fa-IR.md
@ -105,6 +105,7 @@ AnythingLLM اسناد شما را به اشیایی به نام `workspaces` ت
 - [Z.AI (chat models)](https://z.ai/model-api)
 - [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
 - [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
 - [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
 <div dir="rtl">
--- a/locales/README.ja-JP.md
+++ b/locales/README.ja-JP.md
@ -94,6 +94,7 @@ AnythingLLMは、ドキュメントを`ワークスペース`と呼ばれるオ
 - [Z.AI (チャットモデル)](https://z.ai/model-api)
 - [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
 - [CometAPI (チャットモデル)](https://api.cometapi.com/)
 - [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
 **埋め込みモデル：**
--- a/locales/README.tr-TR.md
+++ b/locales/README.tr-TR.md
@ -102,6 +102,7 @@ AnythingLLM, belgelerinizi **"çalışma alanları" (workspaces)** adı verilen
 - [Z.AI (chat models)](https://z.ai/model-api)
 - [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
 - [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
 - [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
 **Embedder modelleri:**
--- a/locales/README.zh-CN.md
+++ b/locales/README.zh-CN.md
@ -102,6 +102,7 @@ AnythingLLM将您的文档划分为称为`workspaces` (工作区)的对象。工
 - [Novita AI (聊天模型)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
 - [PPIO (聊天模型)](https://ppinfra.com?utm_source=github_anything-llm)
 - [CometAPI (聊天模型)](https://api.cometapi.com/)
 - [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
 **支持的嵌入模型：**
--- a/server/.env.example
+++ b/server/.env.example
@ -161,6 +161,11 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
 # GITEE_AI_MODEL_PREF=
 # GITEE_AI_MODEL_TOKEN_LIMIT=
 # LLM_PROVIDER='docker-model-runner'
 # DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434'
 # DOCKER_MODEL_RUNNER_LLM_MODEL_PREF='phi-3.5-mini'
 # DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT=4096
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
--- a/server/endpoints/utils.js
+++ b/server/endpoints/utils.js
@ -154,6 +154,9 @@ function getModelTag() {
    case "cohere":
      model = process.env.COHERE_MODEL_PREF;
      break;
    case "docker-model-runner":
      model = process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
      break;
    default:
      model = "--";
      break;
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -646,6 +646,13 @@ const SystemSettings = {
      GiteeAIApiKey: !!process.env.GITEE_AI_API_KEY,
      GiteeAIModelPref: process.env.GITEE_AI_MODEL_PREF,
      GiteeAITokenLimit: process.env.GITEE_AI_MODEL_TOKEN_LIMIT || 8192,
      // Docker Model Runner Keys
      DockerModelRunnerBasePath: process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
      DockerModelRunnerModelPref:
        process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF,
      DockerModelRunnerModelTokenLimit:
        process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT || 8192,
    };
  },
--- a/server/storage/models/.gitignore
+++ b/server/storage/models/.gitignore
@ -14,3 +14,4 @@ MintplexLabs
 cometapi
 fireworks
 giteeai
 docker-model-runner
--- a/server/utils/AiProviders/dockerModelRunner/index.js
+++ b/server/utils/AiProviders/dockerModelRunner/index.js
@ -0,0 +1,434 @@
 const fs = require("fs");
 const path = require("path");
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const {
  handleDefaultStreamResponseV2,
  formatChatHistory,
 } = require("../../helpers/chat/responses");
 const {
  LLMPerformanceMonitor,
 } = require("../../helpers/chat/LLMPerformanceMonitor");
 const { OpenAI: OpenAIApi } = require("openai");
 const { humanFileSize } = require("../../helpers");
 const { safeJsonParse } = require("../../http");
 class DockerModelRunnerLLM {
  static cacheTime = 1000 * 60 * 60 * 24; // 24 hours
  static cacheFolder = path.resolve(
    process.env.STORAGE_DIR
      ? path.resolve(process.env.STORAGE_DIR, "models", "docker-model-runner")
      : path.resolve(__dirname, `../../../storage/models/docker-model-runner`)
  );
  constructor(embedder = null, modelPreference = null) {
    if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH)
      throw new Error("No Docker Model Runner API Base Path was set.");
    if (!process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF)
      throw new Error("No Docker Model Runner Model Pref was set.");
    this.dmr = new OpenAIApi({
      baseURL: parseDockerModelRunnerEndpoint(
        process.env.DOCKER_MODEL_RUNNER_BASE_PATH
      ),
      apiKey: null,
    });
    this.model =
      modelPreference || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
    this.embedder = embedder ?? new NativeEmbedder();
    this.defaultTemp = 0.7;
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
      system: this.promptWindowLimit() * 0.15,
      user: this.promptWindowLimit() * 0.7,
    };
    this.#log(`initialized with model: ${this.model}`);
  }
  #log(text, ...args) {
    console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args);
  }
  static slog(text, ...args) {
    console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args);
  }
  async assertModelContextLimits() {
    if (this.limits !== null) return;
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
      system: this.promptWindowLimit() * 0.15,
      user: this.promptWindowLimit() * 0.7,
    };
  }
  #appendContext(contextTexts = []) {
    if (!contextTexts || !contextTexts.length) return "";
    return (
      "\nContext:\n" +
      contextTexts
        .map((text, i) => {
          return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
        })
        .join("")
    );
  }
  streamingEnabled() {
    return "streamGetChatCompletion" in this;
  }
  /** DMR does not support curling the context window limit from the API, so we return the system defined limit. */
  static promptWindowLimit(_) {
    const systemDefinedLimit =
      Number(process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT) || 8192;
    return systemDefinedLimit;
  }
  promptWindowLimit() {
    return this.constructor.promptWindowLimit(this.model);
  }
  async isValidChatCompletionModel(_ = "") {
    return true;
  }
  /**
   * Generates appropriate content array for a message + attachments.
   * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
   * @returns {string|object[]}
   */
  #generateContent({ userPrompt, attachments = [] }) {
    if (!attachments.length) {
      return userPrompt;
    }
    const content = [{ type: "text", text: userPrompt }];
    for (let attachment of attachments) {
      content.push({
        type: "image_url",
        image_url: {
          url: attachment.contentString,
          detail: "auto",
        },
      });
    }
    return content.flat();
  }
  /**
   * Construct the user prompt for this model.
   * @param {{attachments: import("../../helpers").Attachment[]}} param0
   * @returns
   */
  constructPrompt({
    systemPrompt = "",
    contextTexts = [],
    chatHistory = [],
    userPrompt = "",
    attachments = [],
  }) {
    const prompt = {
      role: "system",
      content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
    };
    return [
      prompt,
      ...formatChatHistory(chatHistory, this.#generateContent),
      {
        role: "user",
        content: this.#generateContent({ userPrompt, attachments }),
      },
    ];
  }
  async getChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!this.model)
      throw new Error(
        `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!`
      );
    const result = await LLMPerformanceMonitor.measureAsyncFunction(
      this.dmr.chat.completions.create({
        model: this.model,
        messages,
        temperature,
      })
    );
    if (
      !result.output.hasOwnProperty("choices") ||
      result.output.choices.length === 0
    )
      return null;
    return {
      textResponse: result.output.choices[0].message.content,
      metrics: {
        prompt_tokens: result.output.usage?.prompt_tokens || 0,
        completion_tokens: result.output.usage?.completion_tokens || 0,
        total_tokens: result.output.usage?.total_tokens || 0,
        outputTps: result.output.usage?.completion_tokens / result.duration,
        duration: result.duration,
        model: this.model,
        timestamp: new Date(),
      },
    };
  }
  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
    if (!this.model)
      throw new Error(
        `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!`
      );
    const measuredStreamRequest = await LLMPerformanceMonitor.measureStream({
      func: this.dmr.chat.completions.create({
        model: this.model,
        stream: true,
        messages,
        temperature,
      }),
      messages,
      runPromptTokenCalculation: true,
      modelTag: this.model,
    });
    return measuredStreamRequest;
  }
  handleStream(response, stream, responseProps) {
    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
  }
  async embedChunks(textChunks = []) {
    return await this.embedder.embedChunks(textChunks);
  }
  async compressMessages(promptArgs = {}, rawHistory = []) {
    await this.assertModelContextLimits();
    const { messageArrayCompressor } = require("../../helpers/chat");
    const messageArray = this.constructPrompt(promptArgs);
    return await messageArrayCompressor(this, messageArray, rawHistory);
  }
 }
 /**
 * Parse the base path of the Docker Model Runner endpoint and return the host and port.
 * @param {string} basePath - The base path of the Docker Model Runner endpoint.
 * @param {'openai' | 'dmr'} to - The provider to parse the endpoint for (internal DMR or openai-compatible)
 * @returns {string | null}
 */
 function parseDockerModelRunnerEndpoint(basePath = null, to = "openai") {
  if (!basePath) return null;
  try {
    const url = new URL(basePath);
    if (to === "openai") url.pathname = "engines/v1";
    else if (to === "dmr") url.pathname = "";
    return url.toString();
  } catch (e) {
    return basePath;
  }
 }
 /**
 * @typedef {Object} DockerRunnerInstalledModel
 * @property {string} id - The SHA256 identifier of the model layer/blob.
 * @property {string[]} tags - List of tags or aliases associated with this model (e.g., "ai/qwen3:4B-UD-Q4_K_XL").
 * @property {number} created - The Unix timestamp (seconds) when the model was created.
 * @property {string} config - The configuration of the model.
 * @property {string} config.format - The file format (e.g., "gguf").
 * @property {string} config.quantization - The quantization level (e.g., "MOSTLY_Q4_K_M", "Q4_0").
 * @property {string} config.parameters - The parameter count formatted as a string (e.g., "4.02 B").
 * @property {string} config.architecture - The base architecture of the model (e.g., "qwen3", "llama").
 * @property {string} config.size - The physical file size formatted as a string (e.g., "2.37 GiB").
 * @property {string} config?.gguf - Raw GGUF metadata headers containing tokenizer, architecture details, and licensing.
 * @property {string} config?.gguf['general.base_model.0.organization'] - The tokenizer of the model.
 * @property {string} config?.gguf['general.basename'] - The base name of the model (the real name of the model, not the tag)
 * @property {string} config?.gguf['*.context_length'] - The context length of the model. will be something like qwen3.context_length
 */
 /**
 * Fetch the remote models from the Docker Hub and cache the results.
 * @returns {Promise<Record<string, {id: string, name: string, size: string, organization: string}[]>>}
 */
 async function fetchRemoteModels() {
  const cachePath = path.resolve(
    DockerModelRunnerLLM.cacheFolder,
    "models.json"
  );
  const cachedAtPath = path.resolve(
    DockerModelRunnerLLM.cacheFolder,
    ".cached_at"
  );
  let cacheTime = 0;
  if (fs.existsSync(cachePath) && fs.existsSync(cachedAtPath)) {
    cacheTime = Number(fs.readFileSync(cachedAtPath, "utf8"));
    if (Date.now() - cacheTime < DockerModelRunnerLLM.cacheTime)
      return safeJsonParse(fs.readFileSync(cachePath, "utf8"));
  }
  DockerModelRunnerLLM.slog(`Refreshing remote models from Docker Hub`);
  // Now hit the Docker Hub API to get the remote model namespace and root tags
  const availableNamespaces = []; // array of strings like ai/mistral, ai/qwen3, etc
  let nextPage =
    "https://hub.docker.com/v2/namespaces/ai/repositories?page_size=100&page=1";
  while (nextPage) {
    const response = await fetch(nextPage)
      .then((res) => res.json())
      .then((data) => {
        const namespaces = data.results
          .filter(
            (result) =>
              result.namespace &&
              result.name &&
              result.content_types.includes("model") &&
              result.namespace === "ai"
          )
          .map((result) => result.namespace + "/" + result.name);
        availableNamespaces.push(...namespaces);
      })
      .catch((e) => {
        DockerModelRunnerLLM.slog(
          `Error fetching remote models from Docker Hub`,
          e
        );
        return [];
      });
    if (!response) break;
    if (!response || !response.next) break;
    nextPage = response.next;
  }
  const availableRemoteModels = {};
  const BATCH_SIZE = 10;
  // Run batch requests to avoid rate limiting but also
  // improve the speed of the total request time.
  for (let i = 0; i < availableNamespaces.length; i += BATCH_SIZE) {
    const batch = availableNamespaces.slice(i, i + BATCH_SIZE);
    DockerModelRunnerLLM.slog(
      `Fetching tags for batch ${Math.floor(i / BATCH_SIZE) + 1} of ${Math.ceil(availableNamespaces.length / BATCH_SIZE)}`
    );
    await Promise.all(
      batch.map(async (namespace) => {
        const [organization, model] = namespace.split("/");
        const namespaceUrl = new URL(
          "https://hub.docker.com/v2/namespaces/ai/repositories/" +
            model +
            "/tags"
        );
        DockerModelRunnerLLM.slog(
          `Fetching tags for ${namespaceUrl.toString()}`
        );
        await fetch(namespaceUrl.toString())
          .then((res) => res.json())
          .then((data) => {
            const tags = data.results.map((result) => {
              return {
                id: `${organization}/${model}:${result.name}`,
                name: `${model}:${result.name}`,
                size: humanFileSize(result.full_size),
                organization: model,
              };
            });
            availableRemoteModels[model] = tags;
          })
          .catch((e) => {
            DockerModelRunnerLLM.slog(
              `Error fetching tags for ${namespaceUrl.toString()}`,
              e
            );
          });
      })
    );
  }
  if (Object.keys(availableRemoteModels).length === 0) {
    DockerModelRunnerLLM.slog(
      `No remote models found - API may be down or not available`
    );
    return {};
  }
  if (!fs.existsSync(DockerModelRunnerLLM.cacheFolder))
    fs.mkdirSync(DockerModelRunnerLLM.cacheFolder, { recursive: true });
  fs.writeFileSync(cachePath, JSON.stringify(availableRemoteModels), {
    encoding: "utf8",
  });
  fs.writeFileSync(cachedAtPath, String(Number(new Date())), {
    encoding: "utf8",
  });
  return availableRemoteModels;
 }
 /**
 * This function will fetch the remote models from the Docker Hub as well
 * as the local models installed on the system.
 * @param {string} basePath - The base path of the Docker Model Runner endpoint.
 */
 async function getDockerModels(basePath = null) {
  let availableModels = {};
  /** @type {Array<DockerRunnerInstalledModel>} */
  let installedModels = {};
  try {
    // Grab the locally installed models from the Docker Model Runner API
    const dmrUrl = new URL(
      parseDockerModelRunnerEndpoint(
        basePath ?? process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
        "dmr"
      )
    );
    dmrUrl.pathname = "/models";
    await fetch(dmrUrl.toString())
      .then((res) => res.json())
      .then((data) => {
        data?.map((model) => {
          const id = model.tags.at(0);
          // eg: ai/qwen3:latest -> qwen3
          const tag =
            id?.split("/").pop()?.split(":")?.at(1) ??
            id?.split(":").at(1) ??
            "latest";
          const organization = id?.split("/").pop()?.split(":")?.at(0) ?? id;
          installedModels[id] = {
            id: id,
            name: `${organization}:${tag}`,
            size: model.config?.size ?? "Unknown size",
            organization: organization,
          };
        });
      });
    // Now hit the Docker Hub API to get the remote model namespace and root tags
    const remoteModels = await fetchRemoteModels();
    for (const [modelName, tags] of Object.entries(remoteModels)) {
      availableModels[modelName] = { tags: [] };
      for (const tag of tags) {
        if (!installedModels[tag.id])
          availableModels[modelName].tags.push({ ...tag, downloaded: false });
        else availableModels[modelName].tags.push({ ...tag, downloaded: true });
      }
    }
  } catch (e) {
    DockerModelRunnerLLM.slog(`Error getting Docker models`, e);
  } finally {
    return Object.values(availableModels).flatMap((m) => m.tags);
  }
 }
 module.exports = {
  DockerModelRunnerLLM,
  parseDockerModelRunnerEndpoint,
  getDockerModels,
 };
--- a/server/utils/agents/aibitat/index.js
+++ b/server/utils/agents/aibitat/index.js
@ -992,6 +992,8 @@ ${this.getHistory({ to: route.to })
        return new Providers.GiteeAIProvider({ model: config.model });
      case "cohere":
        return new Providers.CohereProvider({ model: config.model });
      case "docker-model-runner":
        return new Providers.DockerModelRunnerProvider({ model: config.model });
      default:
        throw new Error(
          `Unknown provider: ${config.provider}. Please use a valid provider.`
--- a/server/utils/agents/aibitat/providers/ai-provider.js
+++ b/server/utils/agents/aibitat/providers/ai-provider.js
@ -18,6 +18,9 @@ const { ChatOllama } = require("@langchain/community/chat_models/ollama");
 const { toValidNumber, safeJsonParse } = require("../../../http");
 const { getLLMProviderClass } = require("../../../helpers");
 const { parseLMStudioBasePath } = require("../../../AiProviders/lmStudio");
 const {
  parseDockerModelRunnerEndpoint,
 } = require("../../../AiProviders/dockerModelRunner");
 const { parseFoundryBasePath } = require("../../../AiProviders/foundry");
 const {
  SystemPromptVariables,
@ -313,6 +316,16 @@ class Provider {
          ...config,
        });
      }
      case "docker-model-runner":
        return new ChatOpenAI({
          configuration: {
            baseURL: parseDockerModelRunnerEndpoint(
              process.env.DOCKER_MODEL_RUNNER_BASE_PATH
            ),
          },
          apiKey: null,
          ...config,
        });
      default:
        throw new Error(`Unsupported provider ${provider} for this task.`);
    }
--- a/server/utils/agents/aibitat/providers/dockerModelRunner.js
+++ b/server/utils/agents/aibitat/providers/dockerModelRunner.js
@ -0,0 +1,101 @@
 const OpenAI = require("openai");
 const Provider = require("./ai-provider.js");
 const InheritMultiple = require("./helpers/classes.js");
 const UnTooled = require("./helpers/untooled.js");
 const {
  parseDockerModelRunnerEndpoint,
 } = require("../../../AiProviders/dockerModelRunner/index.js");
 /**
 * The agent provider for the Docker Model Runner.
 */
 class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) {
  model;
  /**
   *
   * @param {{model?: string}} config
   */
  constructor(config = {}) {
    super();
    const model =
      config?.model || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF || null;
    const client = new OpenAI({
      baseURL: parseDockerModelRunnerEndpoint(
        process.env.DOCKER_MODEL_RUNNER_BASE_PATH
      ),
      apiKey: null,
      maxRetries: 3,
    });
    this._client = client;
    this.model = model;
    this.verbose = true;
  }
  get client() {
    return this._client;
  }
  get supportsAgentStreaming() {
    return true;
  }
  async #handleFunctionCallChat({ messages = [] }) {
    return await this.client.chat.completions
      .create({
        model: this.model,
        messages,
      })
      .then((result) => {
        if (!result.hasOwnProperty("choices"))
          throw new Error("Docker Model Runner chat: No results!");
        if (result.choices.length === 0)
          throw new Error("Docker Model Runner chat: No results length!");
        return result.choices[0].message.content;
      })
      .catch((_) => {
        return null;
      });
  }
  async #handleFunctionCallStream({ messages = [] }) {
    return await this.client.chat.completions.create({
      model: this.model,
      stream: true,
      messages,
    });
  }
  async stream(messages, functions = [], eventHandler = null) {
    return await UnTooled.prototype.stream.call(
      this,
      messages,
      functions,
      this.#handleFunctionCallStream.bind(this),
      eventHandler
    );
  }
  async complete(messages, functions = []) {
    return await UnTooled.prototype.complete.call(
      this,
      messages,
      functions,
      this.#handleFunctionCallChat.bind(this)
    );
  }
  /**
   * Get the cost of the completion.
   *
   * @param _usage The completion to get the cost for.
   * @returns The cost of the completion.
   * Stubbed since Docker Model Runner has no cost basis.
   */
  getCost(_usage) {
    return 0;
  }
 }
 module.exports = DockerModelRunnerProvider;
--- a/server/utils/agents/aibitat/providers/index.js
+++ b/server/utils/agents/aibitat/providers/index.js
@ -29,6 +29,7 @@ const CometApiProvider = require("./cometapi.js");
 const FoundryProvider = require("./foundry.js");
 const GiteeAIProvider = require("./giteeai.js");
 const CohereProvider = require("./cohere.js");
 const DockerModelRunnerProvider = require("./dockerModelRunner.js");
 module.exports = {
  OpenAIProvider,
@ -62,4 +63,5 @@ module.exports = {
  FoundryProvider,
  GiteeAIProvider,
  CohereProvider,
  DockerModelRunnerProvider,
 };
--- a/server/utils/agents/index.js
+++ b/server/utils/agents/index.js
@ -217,6 +217,12 @@ class AgentHandler {
        if (!process.env.COHERE_API_KEY)
          throw new Error("Cohere API key must be provided to use agents.");
        break;
      case "docker-model-runner":
        if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH)
          throw new Error(
            "Docker Model Runner base path must be provided to use agents."
          );
        break;
      default:
        throw new Error(
          "No workspace agent provider set. Please set your agent provider in the workspace's settings"
@ -297,6 +303,8 @@ class AgentHandler {
        return process.env.GITEE_AI_MODEL_PREF ?? null;
      case "cohere":
        return process.env.COHERE_MODEL_PREF ?? "command-r-08-2024";
      case "docker-model-runner":
        return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF ?? null;
      default:
        return null;
    }
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -13,6 +13,7 @@ const { fetchPPIOModels } = require("../AiProviders/ppio");
 const { GeminiLLM } = require("../AiProviders/gemini");
 const { fetchCometApiModels } = require("../AiProviders/cometapi");
 const { parseFoundryBasePath } = require("../AiProviders/foundry");
 const { getDockerModels } = require("../AiProviders/dockerModelRunner");
 const SUPPORT_CUSTOM_MODELS = [
  "openai",
@ -43,6 +44,7 @@ const SUPPORT_CUSTOM_MODELS = [
  "cohere",
  "zai",
  "giteeai",
  "docker-model-runner",
  // Embedding Engines
  "native-embedder",
  "cohere-embedder",
@ -116,6 +118,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
      return await getOpenRouterEmbeddingModels();
    case "giteeai":
      return await getGiteeAIModels(apiKey);
    case "docker-model-runner":
      return await getDockerModelRunnerModels(basePath);
    default:
      return { models: [], error: "Invalid provider for custom models" };
  }
@ -864,6 +868,19 @@ async function getOpenRouterEmbeddingModels() {
  return { models, error: null };
 }
 async function getDockerModelRunnerModels(basePath = null) {
  try {
    const models = await getDockerModels(basePath);
    return { models, error: null };
  } catch (e) {
    console.error(`DockerModelRunner:getDockerModelRunnerModels`, e.message);
    return {
      models: [],
      error: "Could not fetch Docker Model Runner Models",
    };
  }
 }
 module.exports = {
  getCustomModels,
  SUPPORT_CUSTOM_MODELS,
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@ -229,6 +229,11 @@ function getLLMProvider({ provider = null, model = null } = {}) {
    case "giteeai":
      const { GiteeAILLM } = require("../AiProviders/giteeai");
      return new GiteeAILLM(embedder, model);
    case "docker-model-runner":
      const {
        DockerModelRunnerLLM,
      } = require("../AiProviders/dockerModelRunner");
      return new DockerModelRunnerLLM(embedder, model);
    default:
      throw new Error(
        `ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}`
@ -394,6 +399,11 @@ function getLLMProviderClass({ provider = null } = {}) {
    case "giteeai":
      const { GiteeAILLM } = require("../AiProviders/giteeai");
      return GiteeAILLM;
    case "docker-model-runner":
      const {
        DockerModelRunnerLLM,
      } = require("../AiProviders/dockerModelRunner");
      return DockerModelRunnerLLM;
    default:
      return null;
  }
@ -470,6 +480,8 @@ function getBaseLLMProviderModel({ provider = null } = {}) {
      return process.env.ZAI_MODEL_PREF;
    case "giteeai":
      return process.env.GITEE_AI_MODEL_PREF;
    case "docker-model-runner":
      return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
    default:
      return null;
  }
@ -495,6 +507,30 @@ function toChunks(arr, size) {
  );
 }
 function humanFileSize(bytes, si = false, dp = 1) {
  const thresh = si ? 1000 : 1024;
  if (Math.abs(bytes) < thresh) {
    return bytes + " B";
  }
  const units = si
    ? ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
    : ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
  let u = -1;
  const r = 10 ** dp;
  do {
    bytes /= thresh;
    ++u;
  } while (
    Math.round(Math.abs(bytes) * r) / r >= thresh &&
    u < units.length - 1
  );
  return bytes.toFixed(dp) + " " + units[u];
 }
 module.exports = {
  getEmbeddingEngineSelection,
  maximumChunkLength,
@ -503,4 +539,5 @@ module.exports = {
  getBaseLLMProviderModel,
  getLLMProvider,
  toChunks,
  humanFileSize,
 };
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -789,6 +789,20 @@ const KEY_MAPPING = {
    envKey: "GITEE_AI_MODEL_TOKEN_LIMIT",
    checks: [nonZero],
  },
  // Docker Model Runner Options
  DockerModelRunnerBasePath: {
    envKey: "DOCKER_MODEL_RUNNER_BASE_PATH",
    checks: [isValidURL],
  },
  DockerModelRunnerModelPref: {
    envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_PREF",
    checks: [isNotEmpty],
  },
  DockerModelRunnerModelTokenLimit: {
    envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT",
    checks: [nonZero],
  },
 };
 function isNotEmpty(input = "") {
@ -902,6 +916,7 @@ function supportedLLM(input = "") {
    "foundry",
    "zai",
    "giteeai",
    "docker-model-runner",
  ].includes(input);
  return validSelection ? null : `${input} is not a valid LLM provider.`;
 }