Improved DMR support (#4863)

* Improve DMR support
- Autodetect models installed
- Grab all models from hub.docker to show available
- UI to handle render,search, install, and management of models
- Support functionality for chat, stream, and agentic calls

* forgot files

* fix loader circle being too large
fix tooltip width command
adjust location of docker installer open for web platform

* adjust imports
This commit is contained in:
Timothy Carambat 2026-01-14 15:55:26 -08:00 committed by GitHub
parent 34a96ad30f
commit ff7cb17e34
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
37 changed files with 1463 additions and 10 deletions

View File

@ -6,7 +6,7 @@ concurrency:
on: on:
push: push:
branches: ['4822-feat-remove-workspace-creation-onboarding-page'] # put your current branch to create a build. Core team only. branches: ['4391-dmr-support'] # put your current branch to create a build. Core team only.
paths-ignore: paths-ignore:
- '**.md' - '**.md'
- 'cloud-deployments/*' - 'cloud-deployments/*'

View File

@ -106,6 +106,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace
- [Moonshot AI](https://www.moonshot.ai/) - [Moonshot AI](https://www.moonshot.ai/)
- [Microsoft Foundry Local](https://github.com/microsoft/Foundry-Local) - [Microsoft Foundry Local](https://github.com/microsoft/Foundry-Local)
- [CometAPI (chat models)](https://api.cometapi.com/) - [CometAPI (chat models)](https://api.cometapi.com/)
- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
**Embedder models:** **Embedder models:**

View File

@ -162,6 +162,11 @@ GID='1000'
# GITEE_AI_MODEL_PREF= # GITEE_AI_MODEL_PREF=
# GITEE_AI_MODEL_TOKEN_LIMIT= # GITEE_AI_MODEL_TOKEN_LIMIT=
# LLM_PROVIDER='docker-model-runner'
# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434'
# DOCKER_MODEL_RUNNER_LLM_MODEL_PREF='phi-3.5-mini'
# DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT=4096
########################################### ###########################################
######## Embedding API SElECTION ########## ######## Embedding API SElECTION ##########
########################################### ###########################################

View File

@ -33,7 +33,7 @@ export default function DellProAIStudioOptions({
/> />
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-2"> <label className="text-white text-sm font-semibold block mb-2">
Token context window Model context window
</label> </label>
<input <input
type="number" type="number"

View File

@ -0,0 +1,314 @@
import { useRef, useState, useEffect } from "react";
import {
CaretDown,
CaretRight,
Cpu,
DownloadSimple,
CircleNotch,
CheckCircle,
Dot,
Circle,
DotsThreeCircleVertical,
DotsThreeVertical,
CloudArrowDown,
} from "@phosphor-icons/react";
import pluralize from "pluralize";
import { titleCase } from "text-case";
import { humanFileSize } from "@/utils/numbers";
/**
* @typedef {Object} ModelDefinition
* @property {string} id - The ID of the model.
* @property {'CPU' | 'GPU' | 'NPU'} deviceType - The device type of the model.
* @property {number} modelSize - The size of the model in megabytes.
* @property {boolean} downloaded - Whether the model is downloaded.
*/
/**
* @param {object} props - The props of the component.
* @param {string} props.alias - The alias of the model.
* @param {Array<ModelDefinition>} props.models - The models to display.
* @param {(model: string, progressCallback: (percentage: number) => void) => void} props.downloadModel - The function to download the model.
* @param {(model: string) => void} props.uninstallModel - The function to uninstall the model.
* @param {(model: string) => void} props.setActiveModel - The function to set the active model.
* @param {string} props.selectedModelId - The ID of the selected model.
* @param {object} props.ui - The UI configuration.
* @param {boolean} props.ui.showRuntime - Whether to show the runtime.
* @returns {React.ReactNode}
*/
export default function ModelTable({
alias = "",
models = [],
downloadModel = null,
uninstallModel = null,
setActiveModel = () => {},
selectedModelId = "",
ui = {
showRuntime: true,
},
}) {
const [showAll, setShowAll] = useState(
models.some((model) => model.downloaded)
);
const totalModels = models.length;
return (
<div className="flex flex-col w-full border-b border-theme-modal-border py-[18px]">
<button
type="button"
onClick={() => setShowAll(!showAll)}
className="border-none text-theme-text-secondary text-sm font-medium hover:underline flex items-center gap-x-[8px]"
>
{showAll ? (
<CaretDown
size={16}
weight="bold"
className="text-theme-text-secondary"
/>
) : (
<CaretRight
size={16}
weight="bold"
className="text-theme-text-secondary"
/>
)}
<h3 className="flex items-center gap-x-1 text-theme-text-primary text-base font-bold">
{titleCase(alias)}
<span className="text-theme-text-secondary font-normal text-sm">
({totalModels} {pluralize("Model", totalModels)})
</span>
</h3>
</button>
<div hidden={!showAll} className="mt-[16px]">
<div className="w-full flex flex-col gap-y-[8px]">
{models.map((model) => (
<ModelRow
key={model.id}
model={model}
downloadModel={downloadModel}
uninstallModel={uninstallModel}
setActiveModel={setActiveModel}
selectedModelId={selectedModelId}
ui={ui}
/>
))}
</div>
</div>
</div>
);
}
/**
* @param {{deviceType: ModelDefinition["deviceType"]}} deviceType
* @returns {React.ReactNode}
*/
function DeviceTypeTag({ deviceType }) {
const Wrapper = ({ text, bgClass, textClass }) => {
return (
<div
className={
bgClass + " px-1.5 py-1 rounded-full flex items-center gap-x-1 w-fit"
}
>
<Cpu size={16} weight="bold" className={textClass} />
<p className={textClass + " text-xs"}>{text}</p>
</div>
);
};
switch (deviceType?.toLowerCase()) {
case "cpu":
return (
<Wrapper
text="CPU"
bgClass="bg-blue-600/20"
textClass="text-blue-300"
/>
);
case "gpu":
return (
<Wrapper
text="GPU"
bgClass="bg-green-600/20"
textClass="text-green-300"
/>
);
case "npu":
return (
<Wrapper
text="NPU"
bgClass="bg-indigo-600/20"
textClass="text-indigo-300"
/>
);
default:
return (
<Wrapper
text="CPU"
bgClass="bg-blue-600/20"
textClass="text-blue-300"
/>
);
}
}
/**
* @param {object} props - The props of the component.
* @param {ModelDefinition} props.model - The model to display.
* @param {(model: string, progressCallback: (percentage: number) => void) => Promise<void>} props.downloadModel - The function to download the model.
* @param {(model: string) => Promise<void>} props.uninstallModel - The function to uninstall the model.
* @param {(model: string) => void} props.setActiveModel - The function to set the active model.
* @param {string} props.selectedModelId - The ID of the selected model.
* @param {object} props.ui - The UI configuration.
* @param {boolean} props.ui.showRuntime - Whether to show the runtime.
* @returns {React.ReactNode}
*/
function ModelRow({
model,
downloadModel = null,
uninstallModel = null,
setActiveModel,
selectedModelId,
ui = {
showRuntime: true,
},
}) {
const modelRowRef = useRef(null);
const [showOptions, setShowOptions] = useState(false);
const [processing, setProcessing] = useState(false);
const [_downloadPercentage, setDownloadPercentage] = useState(0);
const fileSize =
typeof model.size === "number"
? humanFileSize(model.size * 1e6, true, 2)
: (model.size ?? "Unknown size");
const [isActiveModel, setIsActiveModel] = useState(
selectedModelId === model.id
);
async function handleSetActiveModel() {
setDownloadPercentage(0);
if (model.downloaded) setActiveModel(model.id);
else {
try {
if (!downloadModel) return;
setProcessing(true);
await downloadModel(model.id, fileSize, (percentage) =>
setDownloadPercentage(percentage)
);
} catch {
} finally {
setProcessing(false);
}
}
}
async function handleUninstallModel() {
if (!uninstallModel) return;
try {
setProcessing(true);
await uninstallModel(model.id);
} catch {
} finally {
setProcessing(false);
}
}
useEffect(() => {
if (selectedModelId === model.id) {
setIsActiveModel(true);
modelRowRef.current.classList.add("!bg-gray-200/10");
setTimeout(
() => modelRowRef.current.classList.remove("!bg-gray-200/10"),
800
);
} else {
setIsActiveModel(false);
}
}, [selectedModelId]);
return (
<div
ref={modelRowRef}
className="w-full grid grid-cols-[1fr_auto_1fr] items-center gap-x-4 transition-all duration-300 rounded-lg"
>
<button
type="button"
className="border-none flex items-center gap-x-[8px] whitespace-nowrap py-[8px]"
disabled={processing}
onClick={handleSetActiveModel}
>
{ui.showRuntime && <DeviceTypeTag deviceType={model.deviceType} />}
<p className="text-theme-text-primary text-base px-2">{model.name}</p>
<p className="text-theme-text-secondary opacity-70 text-base">
{fileSize}
</p>
</button>
<div className="justify-self-start">
{isActiveModel && (
<div className="flex items-center justify-center gap-x-[10px] whitespace-nowrap">
<Circle size={8} weight="fill" className="text-green-500" />
<p className="text-theme-text-primary text-sm">Active</p>
</div>
)}
{!isActiveModel && model.downloaded && !uninstallModel && (
<p className="text-theme-text-secondary text-sm italic whitespace-nowrap">
Installed
</p>
)}
{!model.downloaded && (
<p className="text-theme-text-secondary text-sm italic whitespace-nowrap">
Not Installed
</p>
)}
</div>
<div className="relative justify-self-end">
{uninstallModel && model.downloaded ? (
<>
<button
type="button"
className="border-none hover:bg-white/20 rounded-lg p-1"
onClick={() => setShowOptions(!showOptions)}
>
<DotsThreeVertical
size={22}
weight="bold"
className="text-theme-text-primary cursor-pointer"
/>
</button>
{showOptions && (
<div className="absolute top-[20px] right-[20px] bg-theme-action-menu-bg border border-theme-modal-border rounded-lg py-2 px-4 shadow-lg">
<button
type="button"
className="border-none font-medium group"
onClick={handleUninstallModel}
>
<p className="text-sm text-theme-text-primary group-hover:underline group-hover:text-theme-text-secondary">
Uninstall
</p>
</button>
</div>
)}
</>
) : null}
{!model.downloaded ? (
<button
type="button"
className="border-none hover:bg-white/20 rounded-lg p-1 flex items-center gap-x-1"
onClick={handleSetActiveModel}
>
<CloudArrowDown
size={16}
weight="bold"
className="text-blue-300 cursor-pointer"
/>
<p className="text-sm text-blue-300">Install</p>
</button>
) : null}
</div>
</div>
);
}

View File

@ -0,0 +1,426 @@
import { useState, useEffect } from "react";
import System from "@/models/system";
import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
import {
ArrowClockwise,
CircleNotch,
MagnifyingGlass,
Info,
} from "@phosphor-icons/react";
import strDistance from "js-levenshtein";
import { LLM_PREFERENCE_CHANGED_EVENT } from "@/pages/GeneralSettings/LLMPreference";
import { DOCKER_MODEL_RUNNER_COMMON_URLS } from "@/utils/constants";
import { Tooltip } from "react-tooltip";
import { Link } from "react-router-dom";
import ModelTable from "./ModelTable";
import * as Skeleton from "react-loading-skeleton";
import "react-loading-skeleton/dist/skeleton.css";
export default function DockerModelRunnerOptions({ settings }) {
const {
autoDetecting: loading,
basePath,
basePathValue,
handleAutoDetectClick,
} = useProviderEndpointAutoDiscovery({
provider: "docker-model-runner",
initialBasePath: settings?.DockerModelRunnerBasePath,
ENDPOINTS: DOCKER_MODEL_RUNNER_COMMON_URLS,
});
const [maxTokens, setMaxTokens] = useState(
settings?.DockerModelRunnerModelTokenLimit || 4096
);
return (
<div className="w-full flex flex-col gap-y-7">
<div className="flex gap-[36px] mt-1.5 flex-wrap">
<div className="flex flex-col w-60">
<div className="flex items-center gap-1 mb-3">
<div className="flex justify-between items-center gap-x-2">
<label className="text-white text-sm font-semibold">
Base URL
</label>
{loading ? (
<CircleNotch className="w-4 h-4 text-theme-text-secondary animate-spin" />
) : (
<>
{!basePathValue.value && (
<button
onClick={handleAutoDetectClick}
className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
>
Auto-Detect
</button>
)}
</>
)}
</div>
<Tooltip
id="docker-model-runner-base-url"
place="top"
delayShow={300}
delayHide={800}
clickable={true}
className="tooltip !text-xs !opacity-100 z-99"
style={{
maxWidth: "250px",
whiteSpace: "normal",
wordWrap: "break-word",
}}
>
Enter the URL where the Docker Model Runner is running.
<br />
<br />
You <b>must</b> have enabled the Docker Model Runner TCP support
for this to work.
<br />
<br />
<Link
to="https://docs.docker.com/ai/model-runner/get-started/#docker-desktop"
target="_blank"
className="text-blue-500 hover:underline"
>
Learn more &rarr;
</Link>
</Tooltip>
<div
className="text-theme-text-secondary cursor-pointer hover:bg-theme-bg-primary flex items-center justify-center rounded-full"
data-tooltip-id="docker-model-runner-base-url"
data-tooltip-place="top"
data-tooltip-delay-hide={800}
>
<Info size={18} className="text-theme-text-secondary" />
</div>
</div>
<input
type="url"
name="DockerModelRunnerBasePath"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="http://localhost:12434/engines/llama.cpp/v1"
value={basePathValue.value}
required={true}
autoComplete="off"
spellCheck={false}
onChange={basePath.onChange}
onBlur={basePath.onBlur}
/>
</div>
<div className="flex flex-col w-60">
<div className="flex items-center gap-1 mb-3">
<label className="text-white text-sm font-semibold block">
Model context window
</label>
<Tooltip
id="docker-model-runner-model-context-window"
place="top"
delayShow={300}
delayHide={800}
clickable={true}
className="tooltip !text-xs !opacity-100 z-99"
style={{
maxWidth: "350px",
whiteSpace: "normal",
wordWrap: "break-word",
}}
>
The maximum number of tokens that can be used for a model context
window.
<br />
<br />
To set the context window limit for a model, you can use the{" "}
<code>docker run</code> command with the{" "}
<code>--context-window</code> parameter.
<br />
<br />
<code>
docker model configure --context-size 8192 ai/qwen3:latest
</code>
<br />
<br />
<Link
to="https://docs.docker.com/ai/model-runner/#context-size"
target="_blank"
className="text-blue-500 hover:underline"
>
Learn more &rarr;
</Link>
</Tooltip>
<div
className="text-theme-text-secondary cursor-pointer hover:bg-theme-bg-primary flex items-center justify-center rounded-full"
data-tooltip-id="docker-model-runner-model-context-window"
data-tooltip-place="top"
data-tooltip-delay-hide={800}
>
<Info size={18} className="text-theme-text-secondary" />
</div>
</div>
<input
type="number"
name="DockerModelRunnerModelTokenLimit"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="4096"
min={1}
value={maxTokens}
onChange={(e) => setMaxTokens(Number(e.target.value))}
onScroll={(e) => e.target.blur()}
required={true}
autoComplete="off"
/>
</div>
<DockerModelRunnerModelSelection
settings={settings}
basePath={basePathValue.value}
/>
</div>
</div>
);
}
function DockerModelRunnerModelSelection({ settings, basePath = null }) {
const [selectedModelId, setSelectedModelId] = useState(
settings?.DockerModelRunnerModelPref
);
const [customModels, setCustomModels] = useState([]);
const [filteredModels, setFilteredModels] = useState([]);
const [loading, setLoading] = useState(true);
const [searchQuery, setSearchQuery] = useState("");
async function fetchModels() {
if (!basePath) {
setCustomModels([]);
setFilteredModels([]);
setLoading(false);
setSearchQuery("");
return;
}
setLoading(true);
const { models } = await System.customModels(
"docker-model-runner",
null,
basePath
);
setCustomModels(models || []);
setFilteredModels(models || []);
setSearchQuery("");
setLoading(false);
}
useEffect(() => {
fetchModels();
}, [basePath]);
useEffect(() => {
if (!searchQuery || !customModels.length) {
setFilteredModels(customModels || []);
return;
}
const normalizedSearchQuery = searchQuery.toLowerCase().trim();
const filteredModels = new Map();
customModels.forEach((model) => {
const modelNameNormalized = model.name.toLowerCase();
const modelOrganizationNormalized = model.organization.toLowerCase();
if (modelNameNormalized.startsWith(normalizedSearchQuery))
filteredModels.set(model.id, model);
if (modelOrganizationNormalized.startsWith(normalizedSearchQuery))
filteredModels.set(model.id, model);
if (strDistance(modelNameNormalized, normalizedSearchQuery) <= 2)
filteredModels.set(model.id, model);
if (strDistance(modelOrganizationNormalized, normalizedSearchQuery) <= 2)
filteredModels.set(model.id, model);
});
setFilteredModels(Array.from(filteredModels.values()));
}, [searchQuery]);
function downloadModel(modelId, _fileSize, progressCallback) {
const [name, tag] = modelId.split(":");
// Open the model in the Docker Hub (via browser since they may not be installed locally)
window.open(`https://hub.docker.com/layers/${name}/${tag}`, "_blank");
progressCallback(100);
}
function groupModelsByAlias(models) {
const mapping = new Map();
mapping.set("installed", new Map());
mapping.set("not installed", new Map());
const groupedModels = models.reduce((acc, model) => {
acc[model.organization] = acc[model.organization] || [];
acc[model.organization].push(model);
return acc;
}, {});
Object.entries(groupedModels).forEach(([organization, models]) => {
const hasInstalled = models.some((model) => model.downloaded);
if (hasInstalled) {
const installedModels = models.filter((model) => model.downloaded);
mapping
.get("installed")
.set("Downloaded Models", [
...(mapping.get("installed").get("Downloaded Models") || []),
...installedModels,
]);
}
const tags = models.map((model) => ({
...model,
name: model.name.split(":")[1],
}));
mapping.get("not installed").set(organization, tags);
});
const orderedMap = new Map();
mapping
.get("installed")
.entries()
.forEach(([organization, models]) =>
orderedMap.set(organization, models)
);
mapping
.get("not installed")
.entries()
.forEach(([organization, models]) =>
orderedMap.set(organization, models)
);
return Object.fromEntries(orderedMap);
}
function handleSetActiveModel(modelId) {
if (modelId === selectedModelId) return;
setSelectedModelId(modelId);
window.dispatchEvent(new Event(LLM_PREFERENCE_CHANGED_EVENT));
}
const groupedModels = groupModelsByAlias(filteredModels);
return (
<Layout
fetchModels={fetchModels}
searchQuery={searchQuery}
setSearchQuery={setSearchQuery}
loading={loading}
>
<input
type="hidden"
name="DockerModelRunnerModelPref"
id="DockerModelRunnerModelPref"
value={selectedModelId}
/>
{loading ? (
<LoadingSkeleton />
) : filteredModels.length === 0 ? (
<div className="flex flex-col w-full gap-y-2 mt-4">
<p className="text-theme-text-secondary text-sm">No models found!</p>
</div>
) : (
Object.entries(groupedModels).map(([alias, models]) => (
<ModelTable
key={alias}
alias={alias}
models={models}
setActiveModel={handleSetActiveModel}
downloadModel={downloadModel}
selectedModelId={selectedModelId}
ui={{
showRuntime: false,
}}
/>
))
)}
</Layout>
);
}
function Layout({
children,
fetchModels = null,
searchQuery = "",
setSearchQuery = () => {},
loading = false,
}) {
const [isRefreshing, setIsRefreshing] = useState(false);
async function refreshModels() {
setIsRefreshing(true);
try {
await fetchModels?.();
} catch {
} finally {
setIsRefreshing(false);
}
}
return (
<div className="flex flex-col w-full">
<div className="flex gap-x-2 items-center pb-[8px]">
<label className="text-theme-text-primary text-base font-semibold">
Available Models
</label>
</div>
<div className="flex w-full items-center gap-x-[16px]">
<div className="relative flex-1 max-w-[640px]">
<MagnifyingGlass
size={14}
weight="bold"
color="var(--theme-text-primary)"
className="absolute left-[9px] top-[10px] text-theme-settings-input-placeholder peer-focus:invisible"
/>
<input
type="search"
placeholder="Search models"
value={searchQuery}
disabled={loading}
className="min-h-[32px] border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5 pl-[30px] py-2 search-input disabled:opacity-50 disabled:cursor-not-allowed"
onChange={(e) => {
e.preventDefault();
e.stopPropagation();
setSearchQuery(e.target.value);
}}
/>
</div>
{!!fetchModels && (
<button
type="button"
onClick={refreshModels}
disabled={isRefreshing || loading}
className="border-none text-theme-text-secondary text-sm font-medium hover:underline flex items-center gap-x-1 disabled:opacity-50 disabled:cursor-not-allowed"
>
{isRefreshing ? (
<CircleNotch className="w-4 h-4 text-theme-text-secondary animate-spin" />
) : (
<ArrowClockwise
weight="bold"
className="w-4 h-4 text-theme-text-secondary"
/>
)}
<span
className={`text-sm font-medium ${isRefreshing ? "hidden" : "text-theme-text-secondary"}`}
>
Refresh Models
</span>
</button>
)}
</div>
{children}
</div>
);
}
function LoadingSkeleton() {
return (
<div className="flex flex-col w-full gap-y-4">
<Skeleton.default
height={100}
width="100%"
count={7}
highlightColor="var(--theme-settings-input-active)"
baseColor="var(--theme-settings-input-bg)"
enableAnimation={true}
containerClassName="w-fill flex gap-[8px] flex-col p-0"
/>
</div>
);
}

View File

@ -92,7 +92,7 @@ export default function FoundryOptions({ settings }) {
</div> </div>
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3"> <label className="text-white text-sm font-semibold block mb-3">
Token Context Window Model context window
</label> </label>
<input <input
type="number" type="number"

View File

@ -50,7 +50,7 @@ export default function GenericOpenAiOptions({ settings }) {
<div className="flex gap-[36px] flex-wrap"> <div className="flex gap-[36px] flex-wrap">
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3"> <label className="text-white text-sm font-semibold block mb-3">
Token context window Model context window
</label> </label>
<input <input
type="number" type="number"

View File

@ -24,7 +24,7 @@ export default function GiteeAIOptions({ settings }) {
<GiteeAIModelSelection settings={settings} /> <GiteeAIModelSelection settings={settings} />
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-2"> <label className="text-white text-sm font-semibold block mb-2">
Token context window Model context window
</label> </label>
<input <input
type="number" type="number"

View File

@ -43,7 +43,7 @@ export default function KoboldCPPOptions({ settings }) {
/> />
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-2"> <label className="text-white text-sm font-semibold block mb-2">
Token context window Model context window
</label> </label>
<input <input
type="number" type="number"

View File

@ -34,7 +34,7 @@ export default function LiteLLMOptions({ settings }) {
/> />
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3"> <label className="text-white text-sm font-semibold block mb-3">
Token context window Model context window
</label> </label>
<input <input
type="number" type="number"

View File

@ -51,7 +51,7 @@ export default function LocalAiOptions({ settings, showAlert = false }) {
/> />
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-2"> <label className="text-white text-sm font-semibold block mb-2">
Token context window Model context window
</label> </label>
<input <input
type="number" type="number"

View File

@ -18,7 +18,7 @@ export default function TextGenWebUIOptions({ settings }) {
</div> </div>
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3"> <label className="text-white text-sm font-semibold block mb-3">
Token context window Model context window
</label> </label>
<input <input
type="number" type="number"

View File

@ -41,6 +41,7 @@ import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
import CometApiLogo from "@/media/llmprovider/cometapi.png"; import CometApiLogo from "@/media/llmprovider/cometapi.png";
import FoundryLogo from "@/media/llmprovider/foundry-local.png"; import FoundryLogo from "@/media/llmprovider/foundry-local.png";
import GiteeAILogo from "@/media/llmprovider/giteeai.png"; import GiteeAILogo from "@/media/llmprovider/giteeai.png";
import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
const LLM_PROVIDER_PRIVACY_MAP = { const LLM_PROVIDER_PRIVACY_MAP = {
openai: { openai: {
@ -224,6 +225,13 @@ const LLM_PROVIDER_PRIVACY_MAP = {
policyUrl: "https://ai.gitee.com/docs/appendix/privacy", policyUrl: "https://ai.gitee.com/docs/appendix/privacy",
logo: GiteeAILogo, logo: GiteeAILogo,
}, },
"docker-model-runner": {
name: "Docker Model Runner",
description: [
"Your model and chats are only accessible on the machine running Docker Model Runner.",
],
logo: DockerModelRunnerLogo,
},
}; };
const VECTOR_DB_PROVIDER_PRIVACY_MAP = { const VECTOR_DB_PROVIDER_PRIVACY_MAP = {

View File

@ -52,6 +52,7 @@ const groupedProviders = [
"novita", "novita",
"openrouter", "openrouter",
"ppio", "ppio",
"docker-model-runner",
]; ];
export default function useGetProviderModels(provider = null) { export default function useGetProviderModels(provider = null) {
const [defaultModels, setDefaultModels] = useState([]); const [defaultModels, setDefaultModels] = useState([]);

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.5 KiB

View File

@ -37,6 +37,7 @@ import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
import CometApiLogo from "@/media/llmprovider/cometapi.png"; import CometApiLogo from "@/media/llmprovider/cometapi.png";
import FoundryLogo from "@/media/llmprovider/foundry-local.png"; import FoundryLogo from "@/media/llmprovider/foundry-local.png";
import GiteeAILogo from "@/media/llmprovider/giteeai.png"; import GiteeAILogo from "@/media/llmprovider/giteeai.png";
import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
import PreLoader from "@/components/Preloader"; import PreLoader from "@/components/Preloader";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
@ -71,6 +72,7 @@ import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions";
import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions"; import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions";
import FoundryOptions from "@/components/LLMSelection/FoundryOptions"; import FoundryOptions from "@/components/LLMSelection/FoundryOptions";
import GiteeAIOptions from "@/components/LLMSelection/GiteeAIOptions/index.jsx"; import GiteeAIOptions from "@/components/LLMSelection/GiteeAIOptions/index.jsx";
import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions";
import LLMItem from "@/components/LLMSelection/LLMItem"; import LLMItem from "@/components/LLMSelection/LLMItem";
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
@ -161,6 +163,18 @@ export const AVAILABLE_LLM_PROVIDERS = [
"Discover, download, and run thousands of cutting edge LLMs in a few clicks.", "Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
requiredConfig: ["LMStudioBasePath"], requiredConfig: ["LMStudioBasePath"],
}, },
{
name: "Docker Model Runner",
value: "docker-model-runner",
logo: DockerModelRunnerLogo,
options: (settings) => <DockerModelRunnerOptions settings={settings} />,
description: "Run LLMs using Docker Model Runner.",
requiredConfig: [
"DockerModelRunnerBasePath",
"DockerModelRunnerModelPref",
"DockerModelRunnerModelTokenLimit",
],
},
{ {
name: "Local AI", name: "Local AI",
value: "localai", value: "localai",
@ -371,6 +385,7 @@ export const AVAILABLE_LLM_PROVIDERS = [
}, },
]; ];
export const LLM_PREFERENCE_CHANGED_EVENT = "llm-preference-changed";
export default function GeneralLLMPreference() { export default function GeneralLLMPreference() {
const [saving, setSaving] = useState(false); const [saving, setSaving] = useState(false);
const [hasChanges, setHasChanges] = useState(false); const [hasChanges, setHasChanges] = useState(false);
@ -428,6 +443,21 @@ export default function GeneralLLMPreference() {
fetchKeys(); fetchKeys();
}, []); }, []);
// Some more complex LLM options do not bubble up the change event, so we need to listen to the custom event
// we can emit from the LLM options component using window.dispatchEvent(new Event(LLM_PREFERENCE_CHANGED_EVENT));
useEffect(() => {
function updateHasChanges() {
setHasChanges(true);
}
window.addEventListener(LLM_PREFERENCE_CHANGED_EVENT, updateHasChanges);
return () => {
window.removeEventListener(
LLM_PREFERENCE_CHANGED_EVENT,
updateHasChanges
);
};
}, []);
useEffect(() => { useEffect(() => {
const filtered = AVAILABLE_LLM_PROVIDERS.filter((llm) => const filtered = AVAILABLE_LLM_PROVIDERS.filter((llm) =>
llm.name.toLowerCase().includes(searchQuery.toLowerCase()) llm.name.toLowerCase().includes(searchQuery.toLowerCase())

View File

@ -31,6 +31,7 @@ import DellProAiStudioLogo from "@/media/llmprovider/dpais.png";
import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png"; import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
import CometApiLogo from "@/media/llmprovider/cometapi.png"; import CometApiLogo from "@/media/llmprovider/cometapi.png";
import GiteeAILogo from "@/media/llmprovider/giteeai.png"; import GiteeAILogo from "@/media/llmprovider/giteeai.png";
import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions"; import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
@ -63,6 +64,7 @@ import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions";
import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions"; import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions";
import CometApiLLMOptions from "@/components/LLMSelection/CometApiLLMOptions"; import CometApiLLMOptions from "@/components/LLMSelection/CometApiLLMOptions";
import GiteeAiOptions from "@/components/LLMSelection/GiteeAIOptions"; import GiteeAiOptions from "@/components/LLMSelection/GiteeAIOptions";
import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions";
import LLMItem from "@/components/LLMSelection/LLMItem"; import LLMItem from "@/components/LLMSelection/LLMItem";
import System from "@/models/system"; import System from "@/models/system";
@ -139,6 +141,13 @@ const LLMS = [
description: description:
"Discover, download, and run thousands of cutting edge LLMs in a few clicks.", "Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
}, },
{
name: "Docker Model Runner",
value: "docker-model-runner",
logo: DockerModelRunnerLogo,
options: (settings) => <DockerModelRunnerOptions settings={settings} />,
description: "Run LLMs using Docker Model Runner.",
},
{ {
name: "Local AI", name: "Local AI",
value: "localai", value: "localai",

View File

@ -37,6 +37,7 @@ const ENABLED_PROVIDERS = [
"zai", "zai",
"giteeai", "giteeai",
"cohere", "cohere",
"docker-model-runner",
// TODO: More agent support. // TODO: More agent support.
// "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested. // "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested.
]; ];
@ -46,6 +47,7 @@ const WARN_PERFORMANCE = [
"ollama", "ollama",
"localai", "localai",
"textgenwebui", "textgenwebui",
"docker-model-runner",
]; ];
const LLM_DEFAULT = { const LLM_DEFAULT = {

View File

@ -53,6 +53,14 @@ export const NVIDIA_NIM_COMMON_URLS = [
"http://172.17.0.1:8000/v1/version", "http://172.17.0.1:8000/v1/version",
]; ];
export const DOCKER_MODEL_RUNNER_COMMON_URLS = [
"http://localhost:12434/engines/llama.cpp/v1",
"http://127.0.0.1:12434/engines/llama.cpp/v1",
"http://model-runner.docker.internal/engines/llama.cpp/v1",
"http://host.docker.internal:12434/engines/llama.cpp/v1",
"http://172.17.0.1:12434/engines/llama.cpp/v1",
];
export function fullApiUrl() { export function fullApiUrl() {
if (API_BASE !== "/api") return API_BASE; if (API_BASE !== "/api") return API_BASE;
return `${window.location.origin}/api`; return `${window.location.origin}/api`;

View File

@ -105,6 +105,7 @@ AnythingLLM اسناد شما را به اشیایی به نام `workspaces` ت
- [Z.AI (chat models)](https://z.ai/model-api) - [Z.AI (chat models)](https://z.ai/model-api)
- [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link) - [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
- [PPIO](https://ppinfra.com?utm_source=github_anything-llm) - [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
<div dir="rtl"> <div dir="rtl">

View File

@ -94,6 +94,7 @@ AnythingLLMは、ドキュメントを`ワークスペース`と呼ばれるオ
- [Z.AI (チャットモデル)](https://z.ai/model-api) - [Z.AI (チャットモデル)](https://z.ai/model-api)
- [PPIO](https://ppinfra.com?utm_source=github_anything-llm) - [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
- [CometAPI (チャットモデル)](https://api.cometapi.com/) - [CometAPI (チャットモデル)](https://api.cometapi.com/)
- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
**埋め込みモデル:** **埋め込みモデル:**

View File

@ -102,6 +102,7 @@ AnythingLLM, belgelerinizi **"çalışma alanları" (workspaces)** adı verilen
- [Z.AI (chat models)](https://z.ai/model-api) - [Z.AI (chat models)](https://z.ai/model-api)
- [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link) - [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
- [PPIO](https://ppinfra.com?utm_source=github_anything-llm) - [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
**Embedder modelleri:** **Embedder modelleri:**

View File

@ -102,6 +102,7 @@ AnythingLLM将您的文档划分为称为`workspaces` (工作区)的对象。工
- [Novita AI (聊天模型)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link) - [Novita AI (聊天模型)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
- [PPIO (聊天模型)](https://ppinfra.com?utm_source=github_anything-llm) - [PPIO (聊天模型)](https://ppinfra.com?utm_source=github_anything-llm)
- [CometAPI (聊天模型)](https://api.cometapi.com/) - [CometAPI (聊天模型)](https://api.cometapi.com/)
- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
**支持的嵌入模型:** **支持的嵌入模型:**

View File

@ -161,6 +161,11 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# GITEE_AI_MODEL_PREF= # GITEE_AI_MODEL_PREF=
# GITEE_AI_MODEL_TOKEN_LIMIT= # GITEE_AI_MODEL_TOKEN_LIMIT=
# LLM_PROVIDER='docker-model-runner'
# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434'
# DOCKER_MODEL_RUNNER_LLM_MODEL_PREF='phi-3.5-mini'
# DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT=4096
########################################### ###########################################
######## Embedding API SElECTION ########## ######## Embedding API SElECTION ##########
########################################### ###########################################

View File

@ -154,6 +154,9 @@ function getModelTag() {
case "cohere": case "cohere":
model = process.env.COHERE_MODEL_PREF; model = process.env.COHERE_MODEL_PREF;
break; break;
case "docker-model-runner":
model = process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
break;
default: default:
model = "--"; model = "--";
break; break;

View File

@ -646,6 +646,13 @@ const SystemSettings = {
GiteeAIApiKey: !!process.env.GITEE_AI_API_KEY, GiteeAIApiKey: !!process.env.GITEE_AI_API_KEY,
GiteeAIModelPref: process.env.GITEE_AI_MODEL_PREF, GiteeAIModelPref: process.env.GITEE_AI_MODEL_PREF,
GiteeAITokenLimit: process.env.GITEE_AI_MODEL_TOKEN_LIMIT || 8192, GiteeAITokenLimit: process.env.GITEE_AI_MODEL_TOKEN_LIMIT || 8192,
// Docker Model Runner Keys
DockerModelRunnerBasePath: process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
DockerModelRunnerModelPref:
process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF,
DockerModelRunnerModelTokenLimit:
process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT || 8192,
}; };
}, },

View File

@ -14,3 +14,4 @@ MintplexLabs
cometapi cometapi
fireworks fireworks
giteeai giteeai
docker-model-runner

View File

@ -0,0 +1,434 @@
const fs = require("fs");
const path = require("path");
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
handleDefaultStreamResponseV2,
formatChatHistory,
} = require("../../helpers/chat/responses");
const {
LLMPerformanceMonitor,
} = require("../../helpers/chat/LLMPerformanceMonitor");
const { OpenAI: OpenAIApi } = require("openai");
const { humanFileSize } = require("../../helpers");
const { safeJsonParse } = require("../../http");
class DockerModelRunnerLLM {
static cacheTime = 1000 * 60 * 60 * 24; // 24 hours
static cacheFolder = path.resolve(
process.env.STORAGE_DIR
? path.resolve(process.env.STORAGE_DIR, "models", "docker-model-runner")
: path.resolve(__dirname, `../../../storage/models/docker-model-runner`)
);
constructor(embedder = null, modelPreference = null) {
if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH)
throw new Error("No Docker Model Runner API Base Path was set.");
if (!process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF)
throw new Error("No Docker Model Runner Model Pref was set.");
this.dmr = new OpenAIApi({
baseURL: parseDockerModelRunnerEndpoint(
process.env.DOCKER_MODEL_RUNNER_BASE_PATH
),
apiKey: null,
});
this.model =
modelPreference || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
this.limits = {
history: this.promptWindowLimit() * 0.15,
system: this.promptWindowLimit() * 0.15,
user: this.promptWindowLimit() * 0.7,
};
this.#log(`initialized with model: ${this.model}`);
}
#log(text, ...args) {
console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args);
}
static slog(text, ...args) {
console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args);
}
async assertModelContextLimits() {
if (this.limits !== null) return;
this.limits = {
history: this.promptWindowLimit() * 0.15,
system: this.promptWindowLimit() * 0.15,
user: this.promptWindowLimit() * 0.7,
};
}
#appendContext(contextTexts = []) {
if (!contextTexts || !contextTexts.length) return "";
return (
"\nContext:\n" +
contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")
);
}
streamingEnabled() {
return "streamGetChatCompletion" in this;
}
/** DMR does not support curling the context window limit from the API, so we return the system defined limit. */
static promptWindowLimit(_) {
const systemDefinedLimit =
Number(process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT) || 8192;
return systemDefinedLimit;
}
promptWindowLimit() {
return this.constructor.promptWindowLimit(this.model);
}
async isValidChatCompletionModel(_ = "") {
return true;
}
/**
* Generates appropriate content array for a message + attachments.
* @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
* @returns {string|object[]}
*/
#generateContent({ userPrompt, attachments = [] }) {
if (!attachments.length) {
return userPrompt;
}
const content = [{ type: "text", text: userPrompt }];
for (let attachment of attachments) {
content.push({
type: "image_url",
image_url: {
url: attachment.contentString,
detail: "auto",
},
});
}
return content.flat();
}
/**
* Construct the user prompt for this model.
* @param {{attachments: import("../../helpers").Attachment[]}} param0
* @returns
*/
constructPrompt({
systemPrompt = "",
contextTexts = [],
chatHistory = [],
userPrompt = "",
attachments = [],
}) {
const prompt = {
role: "system",
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
};
return [
prompt,
...formatChatHistory(chatHistory, this.#generateContent),
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),
},
];
}
async getChatCompletion(messages = null, { temperature = 0.7 }) {
if (!this.model)
throw new Error(
`Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!`
);
const result = await LLMPerformanceMonitor.measureAsyncFunction(
this.dmr.chat.completions.create({
model: this.model,
messages,
temperature,
})
);
if (
!result.output.hasOwnProperty("choices") ||
result.output.choices.length === 0
)
return null;
return {
textResponse: result.output.choices[0].message.content,
metrics: {
prompt_tokens: result.output.usage?.prompt_tokens || 0,
completion_tokens: result.output.usage?.completion_tokens || 0,
total_tokens: result.output.usage?.total_tokens || 0,
outputTps: result.output.usage?.completion_tokens / result.duration,
duration: result.duration,
model: this.model,
timestamp: new Date(),
},
};
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
if (!this.model)
throw new Error(
`Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!`
);
const measuredStreamRequest = await LLMPerformanceMonitor.measureStream({
func: this.dmr.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
}),
messages,
runPromptTokenCalculation: true,
modelTag: this.model,
});
return measuredStreamRequest;
}
handleStream(response, stream, responseProps) {
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
async embedTextInput(textInput) {
return await this.embedder.embedTextInput(textInput);
}
async embedChunks(textChunks = []) {
return await this.embedder.embedChunks(textChunks);
}
async compressMessages(promptArgs = {}, rawHistory = []) {
await this.assertModelContextLimits();
const { messageArrayCompressor } = require("../../helpers/chat");
const messageArray = this.constructPrompt(promptArgs);
return await messageArrayCompressor(this, messageArray, rawHistory);
}
}
/**
* Parse the base path of the Docker Model Runner endpoint and return the host and port.
* @param {string} basePath - The base path of the Docker Model Runner endpoint.
* @param {'openai' | 'dmr'} to - The provider to parse the endpoint for (internal DMR or openai-compatible)
* @returns {string | null}
*/
function parseDockerModelRunnerEndpoint(basePath = null, to = "openai") {
if (!basePath) return null;
try {
const url = new URL(basePath);
if (to === "openai") url.pathname = "engines/v1";
else if (to === "dmr") url.pathname = "";
return url.toString();
} catch (e) {
return basePath;
}
}
/**
* @typedef {Object} DockerRunnerInstalledModel
* @property {string} id - The SHA256 identifier of the model layer/blob.
* @property {string[]} tags - List of tags or aliases associated with this model (e.g., "ai/qwen3:4B-UD-Q4_K_XL").
* @property {number} created - The Unix timestamp (seconds) when the model was created.
* @property {string} config - The configuration of the model.
* @property {string} config.format - The file format (e.g., "gguf").
* @property {string} config.quantization - The quantization level (e.g., "MOSTLY_Q4_K_M", "Q4_0").
* @property {string} config.parameters - The parameter count formatted as a string (e.g., "4.02 B").
* @property {string} config.architecture - The base architecture of the model (e.g., "qwen3", "llama").
* @property {string} config.size - The physical file size formatted as a string (e.g., "2.37 GiB").
* @property {string} config?.gguf - Raw GGUF metadata headers containing tokenizer, architecture details, and licensing.
* @property {string} config?.gguf['general.base_model.0.organization'] - The tokenizer of the model.
* @property {string} config?.gguf['general.basename'] - The base name of the model (the real name of the model, not the tag)
* @property {string} config?.gguf['*.context_length'] - The context length of the model. will be something like qwen3.context_length
*/
/**
* Fetch the remote models from the Docker Hub and cache the results.
* @returns {Promise<Record<string, {id: string, name: string, size: string, organization: string}[]>>}
*/
async function fetchRemoteModels() {
const cachePath = path.resolve(
DockerModelRunnerLLM.cacheFolder,
"models.json"
);
const cachedAtPath = path.resolve(
DockerModelRunnerLLM.cacheFolder,
".cached_at"
);
let cacheTime = 0;
if (fs.existsSync(cachePath) && fs.existsSync(cachedAtPath)) {
cacheTime = Number(fs.readFileSync(cachedAtPath, "utf8"));
if (Date.now() - cacheTime < DockerModelRunnerLLM.cacheTime)
return safeJsonParse(fs.readFileSync(cachePath, "utf8"));
}
DockerModelRunnerLLM.slog(`Refreshing remote models from Docker Hub`);
// Now hit the Docker Hub API to get the remote model namespace and root tags
const availableNamespaces = []; // array of strings like ai/mistral, ai/qwen3, etc
let nextPage =
"https://hub.docker.com/v2/namespaces/ai/repositories?page_size=100&page=1";
while (nextPage) {
const response = await fetch(nextPage)
.then((res) => res.json())
.then((data) => {
const namespaces = data.results
.filter(
(result) =>
result.namespace &&
result.name &&
result.content_types.includes("model") &&
result.namespace === "ai"
)
.map((result) => result.namespace + "/" + result.name);
availableNamespaces.push(...namespaces);
})
.catch((e) => {
DockerModelRunnerLLM.slog(
`Error fetching remote models from Docker Hub`,
e
);
return [];
});
if (!response) break;
if (!response || !response.next) break;
nextPage = response.next;
}
const availableRemoteModels = {};
const BATCH_SIZE = 10;
// Run batch requests to avoid rate limiting but also
// improve the speed of the total request time.
for (let i = 0; i < availableNamespaces.length; i += BATCH_SIZE) {
const batch = availableNamespaces.slice(i, i + BATCH_SIZE);
DockerModelRunnerLLM.slog(
`Fetching tags for batch ${Math.floor(i / BATCH_SIZE) + 1} of ${Math.ceil(availableNamespaces.length / BATCH_SIZE)}`
);
await Promise.all(
batch.map(async (namespace) => {
const [organization, model] = namespace.split("/");
const namespaceUrl = new URL(
"https://hub.docker.com/v2/namespaces/ai/repositories/" +
model +
"/tags"
);
DockerModelRunnerLLM.slog(
`Fetching tags for ${namespaceUrl.toString()}`
);
await fetch(namespaceUrl.toString())
.then((res) => res.json())
.then((data) => {
const tags = data.results.map((result) => {
return {
id: `${organization}/${model}:${result.name}`,
name: `${model}:${result.name}`,
size: humanFileSize(result.full_size),
organization: model,
};
});
availableRemoteModels[model] = tags;
})
.catch((e) => {
DockerModelRunnerLLM.slog(
`Error fetching tags for ${namespaceUrl.toString()}`,
e
);
});
})
);
}
if (Object.keys(availableRemoteModels).length === 0) {
DockerModelRunnerLLM.slog(
`No remote models found - API may be down or not available`
);
return {};
}
if (!fs.existsSync(DockerModelRunnerLLM.cacheFolder))
fs.mkdirSync(DockerModelRunnerLLM.cacheFolder, { recursive: true });
fs.writeFileSync(cachePath, JSON.stringify(availableRemoteModels), {
encoding: "utf8",
});
fs.writeFileSync(cachedAtPath, String(Number(new Date())), {
encoding: "utf8",
});
return availableRemoteModels;
}
/**
* This function will fetch the remote models from the Docker Hub as well
* as the local models installed on the system.
* @param {string} basePath - The base path of the Docker Model Runner endpoint.
*/
async function getDockerModels(basePath = null) {
let availableModels = {};
/** @type {Array<DockerRunnerInstalledModel>} */
let installedModels = {};
try {
// Grab the locally installed models from the Docker Model Runner API
const dmrUrl = new URL(
parseDockerModelRunnerEndpoint(
basePath ?? process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
"dmr"
)
);
dmrUrl.pathname = "/models";
await fetch(dmrUrl.toString())
.then((res) => res.json())
.then((data) => {
data?.map((model) => {
const id = model.tags.at(0);
// eg: ai/qwen3:latest -> qwen3
const tag =
id?.split("/").pop()?.split(":")?.at(1) ??
id?.split(":").at(1) ??
"latest";
const organization = id?.split("/").pop()?.split(":")?.at(0) ?? id;
installedModels[id] = {
id: id,
name: `${organization}:${tag}`,
size: model.config?.size ?? "Unknown size",
organization: organization,
};
});
});
// Now hit the Docker Hub API to get the remote model namespace and root tags
const remoteModels = await fetchRemoteModels();
for (const [modelName, tags] of Object.entries(remoteModels)) {
availableModels[modelName] = { tags: [] };
for (const tag of tags) {
if (!installedModels[tag.id])
availableModels[modelName].tags.push({ ...tag, downloaded: false });
else availableModels[modelName].tags.push({ ...tag, downloaded: true });
}
}
} catch (e) {
DockerModelRunnerLLM.slog(`Error getting Docker models`, e);
} finally {
return Object.values(availableModels).flatMap((m) => m.tags);
}
}
module.exports = {
DockerModelRunnerLLM,
parseDockerModelRunnerEndpoint,
getDockerModels,
};

View File

@ -992,6 +992,8 @@ ${this.getHistory({ to: route.to })
return new Providers.GiteeAIProvider({ model: config.model }); return new Providers.GiteeAIProvider({ model: config.model });
case "cohere": case "cohere":
return new Providers.CohereProvider({ model: config.model }); return new Providers.CohereProvider({ model: config.model });
case "docker-model-runner":
return new Providers.DockerModelRunnerProvider({ model: config.model });
default: default:
throw new Error( throw new Error(
`Unknown provider: ${config.provider}. Please use a valid provider.` `Unknown provider: ${config.provider}. Please use a valid provider.`

View File

@ -18,6 +18,9 @@ const { ChatOllama } = require("@langchain/community/chat_models/ollama");
const { toValidNumber, safeJsonParse } = require("../../../http"); const { toValidNumber, safeJsonParse } = require("../../../http");
const { getLLMProviderClass } = require("../../../helpers"); const { getLLMProviderClass } = require("../../../helpers");
const { parseLMStudioBasePath } = require("../../../AiProviders/lmStudio"); const { parseLMStudioBasePath } = require("../../../AiProviders/lmStudio");
const {
parseDockerModelRunnerEndpoint,
} = require("../../../AiProviders/dockerModelRunner");
const { parseFoundryBasePath } = require("../../../AiProviders/foundry"); const { parseFoundryBasePath } = require("../../../AiProviders/foundry");
const { const {
SystemPromptVariables, SystemPromptVariables,
@ -313,6 +316,16 @@ class Provider {
...config, ...config,
}); });
} }
case "docker-model-runner":
return new ChatOpenAI({
configuration: {
baseURL: parseDockerModelRunnerEndpoint(
process.env.DOCKER_MODEL_RUNNER_BASE_PATH
),
},
apiKey: null,
...config,
});
default: default:
throw new Error(`Unsupported provider ${provider} for this task.`); throw new Error(`Unsupported provider ${provider} for this task.`);
} }

View File

@ -0,0 +1,101 @@
const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const {
parseDockerModelRunnerEndpoint,
} = require("../../../AiProviders/dockerModelRunner/index.js");
/**
* The agent provider for the Docker Model Runner.
*/
class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) {
model;
/**
*
* @param {{model?: string}} config
*/
constructor(config = {}) {
super();
const model =
config?.model || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF || null;
const client = new OpenAI({
baseURL: parseDockerModelRunnerEndpoint(
process.env.DOCKER_MODEL_RUNNER_BASE_PATH
),
apiKey: null,
maxRetries: 3,
});
this._client = client;
this.model = model;
this.verbose = true;
}
get client() {
return this._client;
}
get supportsAgentStreaming() {
return true;
}
async #handleFunctionCallChat({ messages = [] }) {
return await this.client.chat.completions
.create({
model: this.model,
messages,
})
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("Docker Model Runner chat: No results!");
if (result.choices.length === 0)
throw new Error("Docker Model Runner chat: No results length!");
return result.choices[0].message.content;
})
.catch((_) => {
return null;
});
}
async #handleFunctionCallStream({ messages = [] }) {
return await this.client.chat.completions.create({
model: this.model,
stream: true,
messages,
});
}
async stream(messages, functions = [], eventHandler = null) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
);
}
async complete(messages, functions = []) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
}
/**
* Get the cost of the completion.
*
* @param _usage The completion to get the cost for.
* @returns The cost of the completion.
* Stubbed since Docker Model Runner has no cost basis.
*/
getCost(_usage) {
return 0;
}
}
module.exports = DockerModelRunnerProvider;

View File

@ -29,6 +29,7 @@ const CometApiProvider = require("./cometapi.js");
const FoundryProvider = require("./foundry.js"); const FoundryProvider = require("./foundry.js");
const GiteeAIProvider = require("./giteeai.js"); const GiteeAIProvider = require("./giteeai.js");
const CohereProvider = require("./cohere.js"); const CohereProvider = require("./cohere.js");
const DockerModelRunnerProvider = require("./dockerModelRunner.js");
module.exports = { module.exports = {
OpenAIProvider, OpenAIProvider,
@ -62,4 +63,5 @@ module.exports = {
FoundryProvider, FoundryProvider,
GiteeAIProvider, GiteeAIProvider,
CohereProvider, CohereProvider,
DockerModelRunnerProvider,
}; };

View File

@ -217,6 +217,12 @@ class AgentHandler {
if (!process.env.COHERE_API_KEY) if (!process.env.COHERE_API_KEY)
throw new Error("Cohere API key must be provided to use agents."); throw new Error("Cohere API key must be provided to use agents.");
break; break;
case "docker-model-runner":
if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH)
throw new Error(
"Docker Model Runner base path must be provided to use agents."
);
break;
default: default:
throw new Error( throw new Error(
"No workspace agent provider set. Please set your agent provider in the workspace's settings" "No workspace agent provider set. Please set your agent provider in the workspace's settings"
@ -297,6 +303,8 @@ class AgentHandler {
return process.env.GITEE_AI_MODEL_PREF ?? null; return process.env.GITEE_AI_MODEL_PREF ?? null;
case "cohere": case "cohere":
return process.env.COHERE_MODEL_PREF ?? "command-r-08-2024"; return process.env.COHERE_MODEL_PREF ?? "command-r-08-2024";
case "docker-model-runner":
return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF ?? null;
default: default:
return null; return null;
} }

View File

@ -13,6 +13,7 @@ const { fetchPPIOModels } = require("../AiProviders/ppio");
const { GeminiLLM } = require("../AiProviders/gemini"); const { GeminiLLM } = require("../AiProviders/gemini");
const { fetchCometApiModels } = require("../AiProviders/cometapi"); const { fetchCometApiModels } = require("../AiProviders/cometapi");
const { parseFoundryBasePath } = require("../AiProviders/foundry"); const { parseFoundryBasePath } = require("../AiProviders/foundry");
const { getDockerModels } = require("../AiProviders/dockerModelRunner");
const SUPPORT_CUSTOM_MODELS = [ const SUPPORT_CUSTOM_MODELS = [
"openai", "openai",
@ -43,6 +44,7 @@ const SUPPORT_CUSTOM_MODELS = [
"cohere", "cohere",
"zai", "zai",
"giteeai", "giteeai",
"docker-model-runner",
// Embedding Engines // Embedding Engines
"native-embedder", "native-embedder",
"cohere-embedder", "cohere-embedder",
@ -116,6 +118,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
return await getOpenRouterEmbeddingModels(); return await getOpenRouterEmbeddingModels();
case "giteeai": case "giteeai":
return await getGiteeAIModels(apiKey); return await getGiteeAIModels(apiKey);
case "docker-model-runner":
return await getDockerModelRunnerModels(basePath);
default: default:
return { models: [], error: "Invalid provider for custom models" }; return { models: [], error: "Invalid provider for custom models" };
} }
@ -864,6 +868,19 @@ async function getOpenRouterEmbeddingModels() {
return { models, error: null }; return { models, error: null };
} }
async function getDockerModelRunnerModels(basePath = null) {
try {
const models = await getDockerModels(basePath);
return { models, error: null };
} catch (e) {
console.error(`DockerModelRunner:getDockerModelRunnerModels`, e.message);
return {
models: [],
error: "Could not fetch Docker Model Runner Models",
};
}
}
module.exports = { module.exports = {
getCustomModels, getCustomModels,
SUPPORT_CUSTOM_MODELS, SUPPORT_CUSTOM_MODELS,

View File

@ -229,6 +229,11 @@ function getLLMProvider({ provider = null, model = null } = {}) {
case "giteeai": case "giteeai":
const { GiteeAILLM } = require("../AiProviders/giteeai"); const { GiteeAILLM } = require("../AiProviders/giteeai");
return new GiteeAILLM(embedder, model); return new GiteeAILLM(embedder, model);
case "docker-model-runner":
const {
DockerModelRunnerLLM,
} = require("../AiProviders/dockerModelRunner");
return new DockerModelRunnerLLM(embedder, model);
default: default:
throw new Error( throw new Error(
`ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}` `ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}`
@ -394,6 +399,11 @@ function getLLMProviderClass({ provider = null } = {}) {
case "giteeai": case "giteeai":
const { GiteeAILLM } = require("../AiProviders/giteeai"); const { GiteeAILLM } = require("../AiProviders/giteeai");
return GiteeAILLM; return GiteeAILLM;
case "docker-model-runner":
const {
DockerModelRunnerLLM,
} = require("../AiProviders/dockerModelRunner");
return DockerModelRunnerLLM;
default: default:
return null; return null;
} }
@ -470,6 +480,8 @@ function getBaseLLMProviderModel({ provider = null } = {}) {
return process.env.ZAI_MODEL_PREF; return process.env.ZAI_MODEL_PREF;
case "giteeai": case "giteeai":
return process.env.GITEE_AI_MODEL_PREF; return process.env.GITEE_AI_MODEL_PREF;
case "docker-model-runner":
return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
default: default:
return null; return null;
} }
@ -495,6 +507,30 @@ function toChunks(arr, size) {
); );
} }
function humanFileSize(bytes, si = false, dp = 1) {
const thresh = si ? 1000 : 1024;
if (Math.abs(bytes) < thresh) {
return bytes + " B";
}
const units = si
? ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
: ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
let u = -1;
const r = 10 ** dp;
do {
bytes /= thresh;
++u;
} while (
Math.round(Math.abs(bytes) * r) / r >= thresh &&
u < units.length - 1
);
return bytes.toFixed(dp) + " " + units[u];
}
module.exports = { module.exports = {
getEmbeddingEngineSelection, getEmbeddingEngineSelection,
maximumChunkLength, maximumChunkLength,
@ -503,4 +539,5 @@ module.exports = {
getBaseLLMProviderModel, getBaseLLMProviderModel,
getLLMProvider, getLLMProvider,
toChunks, toChunks,
humanFileSize,
}; };

View File

@ -789,6 +789,20 @@ const KEY_MAPPING = {
envKey: "GITEE_AI_MODEL_TOKEN_LIMIT", envKey: "GITEE_AI_MODEL_TOKEN_LIMIT",
checks: [nonZero], checks: [nonZero],
}, },
// Docker Model Runner Options
DockerModelRunnerBasePath: {
envKey: "DOCKER_MODEL_RUNNER_BASE_PATH",
checks: [isValidURL],
},
DockerModelRunnerModelPref: {
envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_PREF",
checks: [isNotEmpty],
},
DockerModelRunnerModelTokenLimit: {
envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT",
checks: [nonZero],
},
}; };
function isNotEmpty(input = "") { function isNotEmpty(input = "") {
@ -902,6 +916,7 @@ function supportedLLM(input = "") {
"foundry", "foundry",
"zai", "zai",
"giteeai", "giteeai",
"docker-model-runner",
].includes(input); ].includes(input);
return validSelection ? null : `${input} is not a valid LLM provider.`; return validSelection ? null : `${input} is not a valid LLM provider.`;
} }