From ff7cb17e34cedc97fb465303baec9f0bdee176ce Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Wed, 14 Jan 2026 15:55:26 -0800 Subject: [PATCH] Improved DMR support (#4863) * Improve DMR support - Autodetect models installed - Grab all models from hub.docker to show available - UI to handle render,search, install, and management of models - Support functionality for chat, stream, and agentic calls * forgot files * fix loader circle being too large fix tooltip width command adjust location of docker installer open for web platform * adjust imports --- .github/workflows/dev-build.yaml | 2 +- README.md | 1 + docker/.env.example | 5 + .../LLMSelection/DPAISOptions/index.jsx | 2 +- .../ModelTable/index.jsx | 314 +++++++++++++ .../DockerModelRunnerOptions/index.jsx | 426 +++++++++++++++++ .../LLMSelection/FoundryOptions/index.jsx | 2 +- .../GenericOpenAiOptions/index.jsx | 2 +- .../LLMSelection/GiteeAIOptions/index.jsx | 2 +- .../LLMSelection/KoboldCPPOptions/index.jsx | 2 +- .../LLMSelection/LiteLLMOptions/index.jsx | 2 +- .../LLMSelection/LocalAiOptions/index.jsx | 2 +- .../TextGenWebUIOptions/index.jsx | 2 +- .../components/ProviderPrivacy/constants.js | 8 + frontend/src/hooks/useGetProvidersModels.js | 1 + .../media/llmprovider/docker-model-runner.png | Bin 0 -> 8676 bytes .../GeneralSettings/LLMPreference/index.jsx | 30 ++ .../Steps/LLMPreference/index.jsx | 9 + .../AgentConfig/AgentLLMSelection/index.jsx | 2 + frontend/src/utils/constants.js | 8 + locales/README.fa-IR.md | 1 + locales/README.ja-JP.md | 1 + locales/README.tr-TR.md | 1 + locales/README.zh-CN.md | 1 + server/.env.example | 5 + server/endpoints/utils.js | 3 + server/models/systemSettings.js | 7 + server/storage/models/.gitignore | 3 +- .../AiProviders/dockerModelRunner/index.js | 434 ++++++++++++++++++ server/utils/agents/aibitat/index.js | 2 + .../agents/aibitat/providers/ai-provider.js | 13 + .../aibitat/providers/dockerModelRunner.js | 101 ++++ .../utils/agents/aibitat/providers/index.js | 2 + server/utils/agents/index.js | 8 + server/utils/helpers/customModels.js | 17 + server/utils/helpers/index.js | 37 ++ server/utils/helpers/updateENV.js | 15 + 37 files changed, 1463 insertions(+), 10 deletions(-) create mode 100644 frontend/src/components/LLMSelection/DockerModelRunnerOptions/ModelTable/index.jsx create mode 100644 frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx create mode 100644 frontend/src/media/llmprovider/docker-model-runner.png create mode 100644 server/utils/AiProviders/dockerModelRunner/index.js create mode 100644 server/utils/agents/aibitat/providers/dockerModelRunner.js diff --git a/.github/workflows/dev-build.yaml b/.github/workflows/dev-build.yaml index 5094c941..62f2dd98 100644 --- a/.github/workflows/dev-build.yaml +++ b/.github/workflows/dev-build.yaml @@ -6,7 +6,7 @@ concurrency: on: push: - branches: ['4822-feat-remove-workspace-creation-onboarding-page'] # put your current branch to create a build. Core team only. + branches: ['4391-dmr-support'] # put your current branch to create a build. Core team only. paths-ignore: - '**.md' - 'cloud-deployments/*' diff --git a/README.md b/README.md index 4aabec7e..7b98c252 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace - [Moonshot AI](https://www.moonshot.ai/) - [Microsoft Foundry Local](https://github.com/microsoft/Foundry-Local) - [CometAPI (chat models)](https://api.cometapi.com/) +- [Docker Model Runner](https://docs.docker.com/ai/model-runner/) **Embedder models:** diff --git a/docker/.env.example b/docker/.env.example index c3fa5544..b17517e5 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -162,6 +162,11 @@ GID='1000' # GITEE_AI_MODEL_PREF= # GITEE_AI_MODEL_TOKEN_LIMIT= +# LLM_PROVIDER='docker-model-runner' +# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434' +# DOCKER_MODEL_RUNNER_LLM_MODEL_PREF='phi-3.5-mini' +# DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT=4096 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/frontend/src/components/LLMSelection/DPAISOptions/index.jsx b/frontend/src/components/LLMSelection/DPAISOptions/index.jsx index 00995a28..78f09532 100644 --- a/frontend/src/components/LLMSelection/DPAISOptions/index.jsx +++ b/frontend/src/components/LLMSelection/DPAISOptions/index.jsx @@ -33,7 +33,7 @@ export default function DellProAIStudioOptions({ />
} props.models - The models to display. + * @param {(model: string, progressCallback: (percentage: number) => void) => void} props.downloadModel - The function to download the model. + * @param {(model: string) => void} props.uninstallModel - The function to uninstall the model. + * @param {(model: string) => void} props.setActiveModel - The function to set the active model. + * @param {string} props.selectedModelId - The ID of the selected model. + * @param {object} props.ui - The UI configuration. + * @param {boolean} props.ui.showRuntime - Whether to show the runtime. + * @returns {React.ReactNode} + */ +export default function ModelTable({ + alias = "", + models = [], + downloadModel = null, + uninstallModel = null, + setActiveModel = () => {}, + selectedModelId = "", + ui = { + showRuntime: true, + }, +}) { + const [showAll, setShowAll] = useState( + models.some((model) => model.downloaded) + ); + const totalModels = models.length; + + return ( +
+ + +
+ ); +} + +/** + * @param {{deviceType: ModelDefinition["deviceType"]}} deviceType + * @returns {React.ReactNode} + */ +function DeviceTypeTag({ deviceType }) { + const Wrapper = ({ text, bgClass, textClass }) => { + return ( +
+ +

{text}

+
+ ); + }; + + switch (deviceType?.toLowerCase()) { + case "cpu": + return ( + + ); + case "gpu": + return ( + + ); + case "npu": + return ( + + ); + default: + return ( + + ); + } +} + +/** + * @param {object} props - The props of the component. + * @param {ModelDefinition} props.model - The model to display. + * @param {(model: string, progressCallback: (percentage: number) => void) => Promise} props.downloadModel - The function to download the model. + * @param {(model: string) => Promise} props.uninstallModel - The function to uninstall the model. + * @param {(model: string) => void} props.setActiveModel - The function to set the active model. + * @param {string} props.selectedModelId - The ID of the selected model. + * @param {object} props.ui - The UI configuration. + * @param {boolean} props.ui.showRuntime - Whether to show the runtime. + * @returns {React.ReactNode} + */ +function ModelRow({ + model, + downloadModel = null, + uninstallModel = null, + setActiveModel, + selectedModelId, + ui = { + showRuntime: true, + }, +}) { + const modelRowRef = useRef(null); + const [showOptions, setShowOptions] = useState(false); + const [processing, setProcessing] = useState(false); + const [_downloadPercentage, setDownloadPercentage] = useState(0); + const fileSize = + typeof model.size === "number" + ? humanFileSize(model.size * 1e6, true, 2) + : (model.size ?? "Unknown size"); + const [isActiveModel, setIsActiveModel] = useState( + selectedModelId === model.id + ); + + async function handleSetActiveModel() { + setDownloadPercentage(0); + if (model.downloaded) setActiveModel(model.id); + else { + try { + if (!downloadModel) return; + setProcessing(true); + await downloadModel(model.id, fileSize, (percentage) => + setDownloadPercentage(percentage) + ); + } catch { + } finally { + setProcessing(false); + } + } + } + + async function handleUninstallModel() { + if (!uninstallModel) return; + try { + setProcessing(true); + await uninstallModel(model.id); + } catch { + } finally { + setProcessing(false); + } + } + + useEffect(() => { + if (selectedModelId === model.id) { + setIsActiveModel(true); + modelRowRef.current.classList.add("!bg-gray-200/10"); + setTimeout( + () => modelRowRef.current.classList.remove("!bg-gray-200/10"), + 800 + ); + } else { + setIsActiveModel(false); + } + }, [selectedModelId]); + + return ( +
+ + +
+ {isActiveModel && ( +
+ +

Active

+
+ )} + + {!isActiveModel && model.downloaded && !uninstallModel && ( +

+ Installed +

+ )} + + {!model.downloaded && ( +

+ Not Installed +

+ )} +
+ +
+ {uninstallModel && model.downloaded ? ( + <> + + {showOptions && ( +
+ +
+ )} + + ) : null} + {!model.downloaded ? ( + + ) : null} +
+
+ ); +} diff --git a/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx b/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx new file mode 100644 index 00000000..cd38cc0f --- /dev/null +++ b/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx @@ -0,0 +1,426 @@ +import { useState, useEffect } from "react"; +import System from "@/models/system"; +import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery"; +import { + ArrowClockwise, + CircleNotch, + MagnifyingGlass, + Info, +} from "@phosphor-icons/react"; +import strDistance from "js-levenshtein"; +import { LLM_PREFERENCE_CHANGED_EVENT } from "@/pages/GeneralSettings/LLMPreference"; +import { DOCKER_MODEL_RUNNER_COMMON_URLS } from "@/utils/constants"; +import { Tooltip } from "react-tooltip"; +import { Link } from "react-router-dom"; +import ModelTable from "./ModelTable"; +import * as Skeleton from "react-loading-skeleton"; +import "react-loading-skeleton/dist/skeleton.css"; + +export default function DockerModelRunnerOptions({ settings }) { + const { + autoDetecting: loading, + basePath, + basePathValue, + handleAutoDetectClick, + } = useProviderEndpointAutoDiscovery({ + provider: "docker-model-runner", + initialBasePath: settings?.DockerModelRunnerBasePath, + ENDPOINTS: DOCKER_MODEL_RUNNER_COMMON_URLS, + }); + + const [maxTokens, setMaxTokens] = useState( + settings?.DockerModelRunnerModelTokenLimit || 4096 + ); + + return ( +
+
+
+
+
+ + {loading ? ( + + ) : ( + <> + {!basePathValue.value && ( + + )} + + )} +
+ + Enter the URL where the Docker Model Runner is running. +
+
+ You must have enabled the Docker Model Runner TCP support + for this to work. +
+
+ + Learn more → + +
+
+ +
+
+ + +
+
+
+ + + The maximum number of tokens that can be used for a model context + window. +
+
+ To set the context window limit for a model, you can use the{" "} + docker run command with the{" "} + --context-window parameter. +
+
+ + docker model configure --context-size 8192 ai/qwen3:latest + +
+
+ + Learn more → + +
+
+ +
+
+ setMaxTokens(Number(e.target.value))} + onScroll={(e) => e.target.blur()} + required={true} + autoComplete="off" + /> +
+ +
+
+ ); +} + +function DockerModelRunnerModelSelection({ settings, basePath = null }) { + const [selectedModelId, setSelectedModelId] = useState( + settings?.DockerModelRunnerModelPref + ); + const [customModels, setCustomModels] = useState([]); + const [filteredModels, setFilteredModels] = useState([]); + const [loading, setLoading] = useState(true); + const [searchQuery, setSearchQuery] = useState(""); + + async function fetchModels() { + if (!basePath) { + setCustomModels([]); + setFilteredModels([]); + setLoading(false); + setSearchQuery(""); + return; + } + setLoading(true); + const { models } = await System.customModels( + "docker-model-runner", + null, + basePath + ); + setCustomModels(models || []); + setFilteredModels(models || []); + setSearchQuery(""); + setLoading(false); + } + + useEffect(() => { + fetchModels(); + }, [basePath]); + + useEffect(() => { + if (!searchQuery || !customModels.length) { + setFilteredModels(customModels || []); + return; + } + + const normalizedSearchQuery = searchQuery.toLowerCase().trim(); + const filteredModels = new Map(); + + customModels.forEach((model) => { + const modelNameNormalized = model.name.toLowerCase(); + const modelOrganizationNormalized = model.organization.toLowerCase(); + + if (modelNameNormalized.startsWith(normalizedSearchQuery)) + filteredModels.set(model.id, model); + if (modelOrganizationNormalized.startsWith(normalizedSearchQuery)) + filteredModels.set(model.id, model); + if (strDistance(modelNameNormalized, normalizedSearchQuery) <= 2) + filteredModels.set(model.id, model); + if (strDistance(modelOrganizationNormalized, normalizedSearchQuery) <= 2) + filteredModels.set(model.id, model); + }); + + setFilteredModels(Array.from(filteredModels.values())); + }, [searchQuery]); + + function downloadModel(modelId, _fileSize, progressCallback) { + const [name, tag] = modelId.split(":"); + + // Open the model in the Docker Hub (via browser since they may not be installed locally) + window.open(`https://hub.docker.com/layers/${name}/${tag}`, "_blank"); + progressCallback(100); + } + + function groupModelsByAlias(models) { + const mapping = new Map(); + mapping.set("installed", new Map()); + mapping.set("not installed", new Map()); + + const groupedModels = models.reduce((acc, model) => { + acc[model.organization] = acc[model.organization] || []; + acc[model.organization].push(model); + return acc; + }, {}); + + Object.entries(groupedModels).forEach(([organization, models]) => { + const hasInstalled = models.some((model) => model.downloaded); + if (hasInstalled) { + const installedModels = models.filter((model) => model.downloaded); + mapping + .get("installed") + .set("Downloaded Models", [ + ...(mapping.get("installed").get("Downloaded Models") || []), + ...installedModels, + ]); + } + const tags = models.map((model) => ({ + ...model, + name: model.name.split(":")[1], + })); + mapping.get("not installed").set(organization, tags); + }); + + const orderedMap = new Map(); + mapping + .get("installed") + .entries() + .forEach(([organization, models]) => + orderedMap.set(organization, models) + ); + mapping + .get("not installed") + .entries() + .forEach(([organization, models]) => + orderedMap.set(organization, models) + ); + return Object.fromEntries(orderedMap); + } + + function handleSetActiveModel(modelId) { + if (modelId === selectedModelId) return; + setSelectedModelId(modelId); + window.dispatchEvent(new Event(LLM_PREFERENCE_CHANGED_EVENT)); + } + + const groupedModels = groupModelsByAlias(filteredModels); + return ( + + + {loading ? ( + + ) : filteredModels.length === 0 ? ( +
+

No models found!

+
+ ) : ( + Object.entries(groupedModels).map(([alias, models]) => ( + + )) + )} +
+ ); +} + +function Layout({ + children, + fetchModels = null, + searchQuery = "", + setSearchQuery = () => {}, + loading = false, +}) { + const [isRefreshing, setIsRefreshing] = useState(false); + async function refreshModels() { + setIsRefreshing(true); + try { + await fetchModels?.(); + } catch { + } finally { + setIsRefreshing(false); + } + } + + return ( +
+
+ +
+
+
+ + { + e.preventDefault(); + e.stopPropagation(); + setSearchQuery(e.target.value); + }} + /> +
+ {!!fetchModels && ( + + )} +
+ {children} +
+ ); +} + +function LoadingSkeleton() { + return ( +
+ +
+ ); +} diff --git a/frontend/src/components/LLMSelection/FoundryOptions/index.jsx b/frontend/src/components/LLMSelection/FoundryOptions/index.jsx index e03c62d6..7d5d380c 100644 --- a/frontend/src/components/LLMSelection/FoundryOptions/index.jsx +++ b/frontend/src/components/LLMSelection/FoundryOptions/index.jsx @@ -92,7 +92,7 @@ export default function FoundryOptions({ settings }) {
&zW;NsbDj8hKFl?<)?Vwka_@WZea{O`4P_ESIzl`=JQA3Sf;JxBHJ+=_ z?dv$s0TV4N?hD|iV&s8`NBZUJb1jLR^d27G%?f*6L$u)|b#ZG~XFfQ>)e6by118@o|G&J0j7{R!CcW7b%X-`eqJhdxR8+f$$^#M{e>+JA0KUD5TC44PEOej@A$a zhm16#q>nfbz!`~#Gy6C@xp;{CNOAnZ703O)VuLwwLMVidxVD1Q-y%3lio*_#b`uAK zy}iBpyoLB&QMOlYEA@d&$1*C^H%H9oa@9M&Q zg$cKE^+Zc?a1b*84K0U8qwKBzL3i=sga6@pUa9cmf(FA89$*1J{wuebnI->{7nehL zT#kpWpf+uYkxyLEeYRht|9hQ3!+=&MzX&FDm-b zM#M(oAE3Wf{u4&Y3Wp*rC@d%<3=w!JAPNx{$-Yc+g*hs{Vxaq#rq>G z3BJm%ll@;w`9u7jm4AT$O4(n6|5ld&{Zb*U|0$E3C(7wh3m~k)NGGH-(gl50B!Rz+ zgs>Jz+oPS3|0$&_uK4`I3eTXSEv-=8^l)p)o8TuefL$AqKA-)SujKyw2(Ih@`1^mAU);3(C&B;Wh!5QD-(~<#O#Dyhx}tPlU7e)m;VxcqW`wJ? zl!BbHv#_R?D%xF++0z4wlG2k?veL3ug{sQ)Gh3nHE(j^;6~{&Jp+3x&8I5#ybHYsk zDM|2u#r&Vf@mFZLRROm?fd5%VaKu0B5E5sB1BF{JhD?U?ov_gTvUxt-R zDZI1FnhRn|(ctwXFM5dzr}d)Jd5PnsjI z~vzu0oEJ9xmxlrZBQuk7U&^cY%u_y;l->%z)SlnSc zsgAwpHziXNKMCLwKF&r@LNB#-v_ox`25Omz7vPw?vj2VJM`CdNpG>ei1_odpW2)p* z1!ggdI@_-Y7fKwqE-ow;TC^SC-vp+nB9tJrDdKlkGuAn!JLF;gvc`LvPXV|bKeujq zCOA-?_%gI|gQjj9Rs$|75lasWvT45n(iy+nYW(pDQXv`GIDN~G|?#Fz{4;xtLRqiUm2iBYI~ zHOG&Z`+%hZ-TVNv@5MG2O`+2qZq^(5kf2bVgz}MZ zK;vajLZh1Y0Ms=g6gtGRbdGEwwXeShV9ESsS${a3*_nWiL)<^5#CWelp(=K)<9RjR z_q<1+(0j}Mo<^Zfx$jl`US&4G4v_UL@to{v3wMf>g#_9oHbwg@xH zP@FpddO{&YFm^45b0Op-{KaOX**AHW+W^JZ10*x&Y+Y6YMaV5ir&|n-Td_>;Mg6eU z)$j;BTx4g+*4%1dZp zFW)L|Mnl|0&Iz@@KPRnX2r9ll!O(45+o`MywXIO@kWQI0@Op0LFt51za=L_3)MP093Qy(NK>g#Gm!Pqs~~ z!$*JS546u!ZMJ%H%OzK0uvPs&7M0vLp9(O|e<@uJgm?!%N?1r4; zjauU0A1RYQLZxuhPpRhHG2s`4_*?5NcdvyUmXZZ1DmT@!e4KOMpQurjnYSfX-tdKu zwRiPj?5ed8)b5)uGZ~RYoyQBiDtc~+9J`~|51k3In-8@FIuESGcxNxBDV`H>a1)P& z@ovnjkL5h zdkS2_tV7&%;f7iq9|)em9A^)D1%+zrFRULyO+;r$qKbwg4`nbTRx>%l>xI!$_R>#j5UkZE`f0n5Nn1C3c>nBb( z7Wxpwh9aW6s~+(ZhmANozY(`?Uv4>BX(t)&pO-Q0*T6hvpOqzKP07^ z9g=On15$lwX!Tc`8;@CuPR0`&03umHX_I-5A%N-oac<{R0w+_S{FbvxYc%Sz1~KLs zvb5rh*P~F^cdHiTFTYF}j8m@|99b-;5CEy@RK&C*65L2)jItB_w88mrbq9N)M&}dX z#Q=c3gF$F(1*b-94UtxpS+?)#JUi33GP1pdSz!44n6P+>+@sa1L0nFSl?gJOV$N1& zFRd}Bj1#mM+Z$WpgF~4?#sUb?2#s=Uo2qE`BJ)-hdZ9McOCvdQ?T_JYMT^MLHxgD) zGA!%XyUyO^Uc}Vm|EL%cZG`#9ph`Nl=I@`vq`|45j8T`M%eAcQ7DMZe&U#CtqK9Xh zQai@BtQj1jvQOmMPeY&K12qzVfu`rx%46DZlT9Sa68MZuw0)r5hjspl*Nr~rz{>WW zTkiDXpO*__V*+o%B*_f3mV*-g&6~*r5W~&o@Y$-G_G@wSkHgN*mF>FWK|( zVQX*a4Mj|7L~9$qMyVa|sS5&_CZ&-84~gBj^g8n%-AaUn;#zKqsev9v^8k4PTgAkN zhR9sw_yChPEQzmwkXcGjJw8FI%YJnX;?xk`moUzYEVHgTm#J2!4_wc8*jk+sgDFbQ z9F=sRt;0lWwsBiQbP|WZN#BnUhpUZX{>ZfS3>|YyfMVkh>`+@^-F!+JRaG(3)a#+f z0>{*0-<(Ak+RCx`izk6^Tlb9!b$EP_=4Tog*85*=Hf9wZ_1Nv`i}(a)W-uEO&54%n zCzSOMR3PiNkI|fY%3jZv4@_t30w4H~t(Fq{OY|Q&FJ1>@Xe18@=#F=%OfVnQXn%EW zj=+&MNUv*gPfjI9I-g*_Fq_A-cxCu6ivKeGSESGFk8!s?*66`_Og^nW7S8)EAwTtGwOynth#&V9ed*^ z*n_3U>U=Ou0##yPGQQPd+A55Zh`mg@vS-CwTm#_^AVm}~1?T}T@-6`QL53{S#yg&GId%I3u< z(^X#9(>?a0;0nZ2k#`g{o*W8+Z7}{M{w22v07mCR6y1_rD6x9=B(==}a;xL=)*LyL z&YrzrzoCbtis1P$uh49VGVo!>E3M_ESgg4PtHB;efNJO-!Tk{LD|pN^vbn%9PIe*iUrd!jHmN=M<_Zd>{I2+C6^zZ7z*^jWIIBRzv)= zls@gNilrsXlz+2&ti)E+JFYQl?1>I-JXLH|bv0e&`2FLTP6+jh4&=8WezUhdYEP9$ ztxFWVwCnUBn1X5lMd4Y8T&eNT^pSNo>MN5M8#P-(VQh~5%2V~v+1gMS!f7Hc^k9{6 zLy2|Y<<~?F^^Gs88VkrLzc6SI+_GND%#o}KXB*A!-4n+&0^5gnc)2wh6hohth;Dx# zWkmks>y11}cA9(k>Q+PIdJ_+Q7#Ok3r8uQ7WO zVnz@8lJSbnja3@__Ec3OjwDP36oSG5Z*0skXxYJY=9Oy{>9BxJ{40CNn#V&EK~CA)@`OJeTTH$IQQbKshQ>X{=h<>nCncXtbdh6F$ECCQyHN9A!Z>NCjPEW!+K*MSh}t@|DLDxMV1&>RH3 zdk`!skZzC#VnZ<+bG|r5#*uRX#^z zOq}Y#=ry=}FH4ght^D0U#Axvx6`tD`(Q;U}jM$8=k`u^xqvfDi|F&to-Xe}^dbq{LroD_QYwr>MvQVHOe>o-A^UkG1V6(V+OeXY8e!10F^6zwifC*&?As9fNPOkt9r%6>4SH$ML3jj>4 zjQ@UAK1TM=6<>h0SZD7*FI}GVO(e-;_Nf`g>hTuo`?9ncEy6GP`HsW&DH+;%o#X0q z=HO764FwM>h$eBGj;c-Hyv+(I)<75K85`hwkI@cF48X=0Y!^#W>p6|lA#nch_v$jy ze!lVO_F?%Fe)CTJ$@II6mm`U|k(o3vpI@I0xh`eK4ZLL)|b&e#`w zna^6Ev+Fm(EKHo|tu*J@StOL#_`YnR!L97otPY14uju_ih~Cz|%spWueniYNNn=dC z_ z?QF5lUi(=}5fpbt9?Bd`csm0E0aJY)^t=3;LF&BgS}$;iJbdfXfPSO9TZyshwViH@ z&yfLjM|6@3ghW`m{Dk)-YGp^{st`Q|T1j@@>Y+n6ZMLu0k5pQ;n3A@7AWlWkh>1Gx z0BX@+R)%Q@nRu;UlncBlb0y|P>J#<07(u;A=k1Trm)R5vZ#xn;xxTj0w(vF6KNo;P!eBv~I2gG;d2}eGg=%X{hc^!#_ zEwTWQXLBVFh`XhSKl^4(sGU1~jS$=v6!0;fWGFUev(!b15YxY`z$w8jyc`C!NC4e@Ne;9={#BcrqQq%; z+Sq&25BjZHY6OYikPBfxUEU^6qiDm`CZOEE>E;g_9ceh*&RfHo-+D|jPlFOP2?3P7 zZA?XEr&(d`2bsv9riWu9p(o_Dx3cf1WGes{e-!%91DT%KIni?nlxNT#wV5@-^rUHg zj8s9HS$Zm?88P1{ad!PTtojxqB3sj3*9#`7Q^@WeoQLiAg3* zi>+-{Xa1I#Zix_&$!PiCF66wy(l0%KgbLB6=jc?^L zfePO}xpSZ89t2bp{o0OtQxi_a`=udHnfuH@eDNyiL*vw#$cg-_~UCD*^Nc}AL%vy*x{^&>8 z)TTUd(5()F(^mmBcs!IgiJDz&fhHb&C*~hiiY@1Lv%p4Ep~7d&!E7Upd1@!?lP;*j z`N?jUflDJ*b*{{K1L1wL_kra?lIf=6!Y=xbz<^sO;u9XrStQNebd8q>DH-X&DzcLo zEe$|H)R+Rs@(6cpLrQI?Ds_tYX1Qp>m=v>%!u`FAAMk#yTu?k%6Df*UXuL!KDjXPiLkw@GD zAr?xLbd@G>JJ&;ADFg6UyoC6@F4-2b(MGPtj3;fVQCQ$%-e*^fH5o?LMf`i;X?*wz zlH+1q@QcTjcD({uyACd<^gX{5ItcU9z^#!R#QcY*-wECl;hq|^+)ZD~MMn9o$7c|R z>QJQs5{CzzWdYnLy}g-hRL6!2kJU)c!Oc4DUn z3`Cz7#t_r5Xx!g@@AH0eRY;U7nZG0dp5LY0BA7uyo8a_R*vz2sP@B>NTFA@Z?YV>~ zv967;bJ9x~6or;=A#!U_64_!ax31R^C2&`7JU4pRf$yI|L2M7LE zAGi=DTnnZnu#`k@4(>-nXPxZ>+(s+C4zEg`I_`6CbZSix0)5;}J}!2CHg`?yb?K6G zB%meUGa@#Dy<+2y*}!yavJ5%)E54Pc@U_Dn={VMvTC5j2&?|SFb|oZK1<#)8<%z(= zZ6{UqK4~z>mhCTai$C+UQRm&VUI4ik0p8+k-pE`W<|r8Dj( zstYgU=6c@DE%3Rfy_)cG`srP!%s4Z2TEZ)JIXrrfyIc?jA6nFQdOKIS>Q`;j2@6sC zlr=pR+C}vSuh;f!%Li , + description: "Run LLMs using Docker Model Runner.", + requiredConfig: [ + "DockerModelRunnerBasePath", + "DockerModelRunnerModelPref", + "DockerModelRunnerModelTokenLimit", + ], + }, { name: "Local AI", value: "localai", @@ -371,6 +385,7 @@ export const AVAILABLE_LLM_PROVIDERS = [ }, ]; +export const LLM_PREFERENCE_CHANGED_EVENT = "llm-preference-changed"; export default function GeneralLLMPreference() { const [saving, setSaving] = useState(false); const [hasChanges, setHasChanges] = useState(false); @@ -428,6 +443,21 @@ export default function GeneralLLMPreference() { fetchKeys(); }, []); + // Some more complex LLM options do not bubble up the change event, so we need to listen to the custom event + // we can emit from the LLM options component using window.dispatchEvent(new Event(LLM_PREFERENCE_CHANGED_EVENT)); + useEffect(() => { + function updateHasChanges() { + setHasChanges(true); + } + window.addEventListener(LLM_PREFERENCE_CHANGED_EVENT, updateHasChanges); + return () => { + window.removeEventListener( + LLM_PREFERENCE_CHANGED_EVENT, + updateHasChanges + ); + }; + }, []); + useEffect(() => { const filtered = AVAILABLE_LLM_PROVIDERS.filter((llm) => llm.name.toLowerCase().includes(searchQuery.toLowerCase()) diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index a0cf2ae8..3f32dcbd 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -31,6 +31,7 @@ import DellProAiStudioLogo from "@/media/llmprovider/dpais.png"; import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png"; import CometApiLogo from "@/media/llmprovider/cometapi.png"; import GiteeAILogo from "@/media/llmprovider/giteeai.png"; +import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions"; @@ -63,6 +64,7 @@ import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions"; import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions"; import CometApiLLMOptions from "@/components/LLMSelection/CometApiLLMOptions"; import GiteeAiOptions from "@/components/LLMSelection/GiteeAIOptions"; +import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import System from "@/models/system"; @@ -139,6 +141,13 @@ const LLMS = [ description: "Discover, download, and run thousands of cutting edge LLMs in a few clicks.", }, + { + name: "Docker Model Runner", + value: "docker-model-runner", + logo: DockerModelRunnerLogo, + options: (settings) => , + description: "Run LLMs using Docker Model Runner.", + }, { name: "Local AI", value: "localai", diff --git a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx index c96531e2..b2a5945b 100644 --- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx @@ -37,6 +37,7 @@ const ENABLED_PROVIDERS = [ "zai", "giteeai", "cohere", + "docker-model-runner", // TODO: More agent support. // "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested. ]; @@ -46,6 +47,7 @@ const WARN_PERFORMANCE = [ "ollama", "localai", "textgenwebui", + "docker-model-runner", ]; const LLM_DEFAULT = { diff --git a/frontend/src/utils/constants.js b/frontend/src/utils/constants.js index c1fae8fc..52f71083 100644 --- a/frontend/src/utils/constants.js +++ b/frontend/src/utils/constants.js @@ -53,6 +53,14 @@ export const NVIDIA_NIM_COMMON_URLS = [ "http://172.17.0.1:8000/v1/version", ]; +export const DOCKER_MODEL_RUNNER_COMMON_URLS = [ + "http://localhost:12434/engines/llama.cpp/v1", + "http://127.0.0.1:12434/engines/llama.cpp/v1", + "http://model-runner.docker.internal/engines/llama.cpp/v1", + "http://host.docker.internal:12434/engines/llama.cpp/v1", + "http://172.17.0.1:12434/engines/llama.cpp/v1", +]; + export function fullApiUrl() { if (API_BASE !== "/api") return API_BASE; return `${window.location.origin}/api`; diff --git a/locales/README.fa-IR.md b/locales/README.fa-IR.md index 342aed2d..70fbf885 100644 --- a/locales/README.fa-IR.md +++ b/locales/README.fa-IR.md @@ -105,6 +105,7 @@ AnythingLLM اسناد شما را به اشیایی به نام `workspaces` ت - [Z.AI (chat models)](https://z.ai/model-api) - [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link) - [PPIO](https://ppinfra.com?utm_source=github_anything-llm) +- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
diff --git a/locales/README.ja-JP.md b/locales/README.ja-JP.md index afb4e5af..cd1798bd 100644 --- a/locales/README.ja-JP.md +++ b/locales/README.ja-JP.md @@ -94,6 +94,7 @@ AnythingLLMは、ドキュメントを`ワークスペース`と呼ばれるオ - [Z.AI (チャットモデル)](https://z.ai/model-api) - [PPIO](https://ppinfra.com?utm_source=github_anything-llm) - [CometAPI (チャットモデル)](https://api.cometapi.com/) +- [Docker Model Runner](https://docs.docker.com/ai/model-runner/) **埋め込みモデル:** diff --git a/locales/README.tr-TR.md b/locales/README.tr-TR.md index 1743db31..37f30018 100644 --- a/locales/README.tr-TR.md +++ b/locales/README.tr-TR.md @@ -102,6 +102,7 @@ AnythingLLM, belgelerinizi **"çalışma alanları" (workspaces)** adı verilen - [Z.AI (chat models)](https://z.ai/model-api) - [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link) - [PPIO](https://ppinfra.com?utm_source=github_anything-llm) +- [Docker Model Runner](https://docs.docker.com/ai/model-runner/) **Embedder modelleri:** diff --git a/locales/README.zh-CN.md b/locales/README.zh-CN.md index c5408dc6..4c978a9a 100644 --- a/locales/README.zh-CN.md +++ b/locales/README.zh-CN.md @@ -102,6 +102,7 @@ AnythingLLM将您的文档划分为称为`workspaces` (工作区)的对象。工 - [Novita AI (聊天模型)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link) - [PPIO (聊天模型)](https://ppinfra.com?utm_source=github_anything-llm) - [CometAPI (聊天模型)](https://api.cometapi.com/) +- [Docker Model Runner](https://docs.docker.com/ai/model-runner/) **支持的嵌入模型:** diff --git a/server/.env.example b/server/.env.example index 2f7b96db..b408b6fa 100644 --- a/server/.env.example +++ b/server/.env.example @@ -161,6 +161,11 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # GITEE_AI_MODEL_PREF= # GITEE_AI_MODEL_TOKEN_LIMIT= +# LLM_PROVIDER='docker-model-runner' +# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434' +# DOCKER_MODEL_RUNNER_LLM_MODEL_PREF='phi-3.5-mini' +# DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT=4096 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/server/endpoints/utils.js b/server/endpoints/utils.js index 327b58f8..30d2e9b7 100644 --- a/server/endpoints/utils.js +++ b/server/endpoints/utils.js @@ -154,6 +154,9 @@ function getModelTag() { case "cohere": model = process.env.COHERE_MODEL_PREF; break; + case "docker-model-runner": + model = process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF; + break; default: model = "--"; break; diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 28e44ca6..53f7dd76 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -646,6 +646,13 @@ const SystemSettings = { GiteeAIApiKey: !!process.env.GITEE_AI_API_KEY, GiteeAIModelPref: process.env.GITEE_AI_MODEL_PREF, GiteeAITokenLimit: process.env.GITEE_AI_MODEL_TOKEN_LIMIT || 8192, + + // Docker Model Runner Keys + DockerModelRunnerBasePath: process.env.DOCKER_MODEL_RUNNER_BASE_PATH, + DockerModelRunnerModelPref: + process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF, + DockerModelRunnerModelTokenLimit: + process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT || 8192, }; }, diff --git a/server/storage/models/.gitignore b/server/storage/models/.gitignore index 6bda7b7a..2e6b5c3e 100644 --- a/server/storage/models/.gitignore +++ b/server/storage/models/.gitignore @@ -13,4 +13,5 @@ context-windows/* MintplexLabs cometapi fireworks -giteeai \ No newline at end of file +giteeai +docker-model-runner \ No newline at end of file diff --git a/server/utils/AiProviders/dockerModelRunner/index.js b/server/utils/AiProviders/dockerModelRunner/index.js new file mode 100644 index 00000000..81f1f36a --- /dev/null +++ b/server/utils/AiProviders/dockerModelRunner/index.js @@ -0,0 +1,434 @@ +const fs = require("fs"); +const path = require("path"); +const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { + handleDefaultStreamResponseV2, + formatChatHistory, +} = require("../../helpers/chat/responses"); +const { + LLMPerformanceMonitor, +} = require("../../helpers/chat/LLMPerformanceMonitor"); +const { OpenAI: OpenAIApi } = require("openai"); +const { humanFileSize } = require("../../helpers"); +const { safeJsonParse } = require("../../http"); + +class DockerModelRunnerLLM { + static cacheTime = 1000 * 60 * 60 * 24; // 24 hours + static cacheFolder = path.resolve( + process.env.STORAGE_DIR + ? path.resolve(process.env.STORAGE_DIR, "models", "docker-model-runner") + : path.resolve(__dirname, `../../../storage/models/docker-model-runner`) + ); + + constructor(embedder = null, modelPreference = null) { + if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH) + throw new Error("No Docker Model Runner API Base Path was set."); + if (!process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF) + throw new Error("No Docker Model Runner Model Pref was set."); + + this.dmr = new OpenAIApi({ + baseURL: parseDockerModelRunnerEndpoint( + process.env.DOCKER_MODEL_RUNNER_BASE_PATH + ), + apiKey: null, + }); + + this.model = + modelPreference || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF; + this.embedder = embedder ?? new NativeEmbedder(); + this.defaultTemp = 0.7; + + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + this.#log(`initialized with model: ${this.model}`); + } + + #log(text, ...args) { + console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args); + } + + static slog(text, ...args) { + console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args); + } + + async assertModelContextLimits() { + if (this.limits !== null) return; + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + streamingEnabled() { + return "streamGetChatCompletion" in this; + } + + /** DMR does not support curling the context window limit from the API, so we return the system defined limit. */ + static promptWindowLimit(_) { + const systemDefinedLimit = + Number(process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT) || 8192; + return systemDefinedLimit; + } + + promptWindowLimit() { + return this.constructor.promptWindowLimit(this.model); + } + + async isValidChatCompletionModel(_ = "") { + return true; + } + + /** + * Generates appropriate content array for a message + attachments. + * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}} + * @returns {string|object[]} + */ + #generateContent({ userPrompt, attachments = [] }) { + if (!attachments.length) { + return userPrompt; + } + + const content = [{ type: "text", text: userPrompt }]; + for (let attachment of attachments) { + content.push({ + type: "image_url", + image_url: { + url: attachment.contentString, + detail: "auto", + }, + }); + } + return content.flat(); + } + + /** + * Construct the user prompt for this model. + * @param {{attachments: import("../../helpers").Attachment[]}} param0 + * @returns + */ + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + attachments = [], + }) { + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + return [ + prompt, + ...formatChatHistory(chatHistory, this.#generateContent), + { + role: "user", + content: this.#generateContent({ userPrompt, attachments }), + }, + ]; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + if (!this.model) + throw new Error( + `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!` + ); + + const result = await LLMPerformanceMonitor.measureAsyncFunction( + this.dmr.chat.completions.create({ + model: this.model, + messages, + temperature, + }) + ); + + if ( + !result.output.hasOwnProperty("choices") || + result.output.choices.length === 0 + ) + return null; + + return { + textResponse: result.output.choices[0].message.content, + metrics: { + prompt_tokens: result.output.usage?.prompt_tokens || 0, + completion_tokens: result.output.usage?.completion_tokens || 0, + total_tokens: result.output.usage?.total_tokens || 0, + outputTps: result.output.usage?.completion_tokens / result.duration, + duration: result.duration, + model: this.model, + timestamp: new Date(), + }, + }; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + if (!this.model) + throw new Error( + `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!` + ); + + const measuredStreamRequest = await LLMPerformanceMonitor.measureStream({ + func: this.dmr.chat.completions.create({ + model: this.model, + stream: true, + messages, + temperature, + }), + messages, + runPromptTokenCalculation: true, + modelTag: this.model, + }); + return measuredStreamRequest; + } + + handleStream(response, stream, responseProps) { + return handleDefaultStreamResponseV2(response, stream, responseProps); + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + await this.assertModelContextLimits(); + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +/** + * Parse the base path of the Docker Model Runner endpoint and return the host and port. + * @param {string} basePath - The base path of the Docker Model Runner endpoint. + * @param {'openai' | 'dmr'} to - The provider to parse the endpoint for (internal DMR or openai-compatible) + * @returns {string | null} + */ +function parseDockerModelRunnerEndpoint(basePath = null, to = "openai") { + if (!basePath) return null; + try { + const url = new URL(basePath); + if (to === "openai") url.pathname = "engines/v1"; + else if (to === "dmr") url.pathname = ""; + return url.toString(); + } catch (e) { + return basePath; + } +} + +/** + * @typedef {Object} DockerRunnerInstalledModel + * @property {string} id - The SHA256 identifier of the model layer/blob. + * @property {string[]} tags - List of tags or aliases associated with this model (e.g., "ai/qwen3:4B-UD-Q4_K_XL"). + * @property {number} created - The Unix timestamp (seconds) when the model was created. + * @property {string} config - The configuration of the model. + * @property {string} config.format - The file format (e.g., "gguf"). + * @property {string} config.quantization - The quantization level (e.g., "MOSTLY_Q4_K_M", "Q4_0"). + * @property {string} config.parameters - The parameter count formatted as a string (e.g., "4.02 B"). + * @property {string} config.architecture - The base architecture of the model (e.g., "qwen3", "llama"). + * @property {string} config.size - The physical file size formatted as a string (e.g., "2.37 GiB"). + * @property {string} config?.gguf - Raw GGUF metadata headers containing tokenizer, architecture details, and licensing. + * @property {string} config?.gguf['general.base_model.0.organization'] - The tokenizer of the model. + * @property {string} config?.gguf['general.basename'] - The base name of the model (the real name of the model, not the tag) + * @property {string} config?.gguf['*.context_length'] - The context length of the model. will be something like qwen3.context_length + */ + +/** + * Fetch the remote models from the Docker Hub and cache the results. + * @returns {Promise>} + */ +async function fetchRemoteModels() { + const cachePath = path.resolve( + DockerModelRunnerLLM.cacheFolder, + "models.json" + ); + const cachedAtPath = path.resolve( + DockerModelRunnerLLM.cacheFolder, + ".cached_at" + ); + let cacheTime = 0; + + if (fs.existsSync(cachePath) && fs.existsSync(cachedAtPath)) { + cacheTime = Number(fs.readFileSync(cachedAtPath, "utf8")); + if (Date.now() - cacheTime < DockerModelRunnerLLM.cacheTime) + return safeJsonParse(fs.readFileSync(cachePath, "utf8")); + } + + DockerModelRunnerLLM.slog(`Refreshing remote models from Docker Hub`); + // Now hit the Docker Hub API to get the remote model namespace and root tags + const availableNamespaces = []; // array of strings like ai/mistral, ai/qwen3, etc + let nextPage = + "https://hub.docker.com/v2/namespaces/ai/repositories?page_size=100&page=1"; + while (nextPage) { + const response = await fetch(nextPage) + .then((res) => res.json()) + .then((data) => { + const namespaces = data.results + .filter( + (result) => + result.namespace && + result.name && + result.content_types.includes("model") && + result.namespace === "ai" + ) + .map((result) => result.namespace + "/" + result.name); + availableNamespaces.push(...namespaces); + }) + .catch((e) => { + DockerModelRunnerLLM.slog( + `Error fetching remote models from Docker Hub`, + e + ); + return []; + }); + if (!response) break; + if (!response || !response.next) break; + nextPage = response.next; + } + + const availableRemoteModels = {}; + const BATCH_SIZE = 10; + + // Run batch requests to avoid rate limiting but also + // improve the speed of the total request time. + for (let i = 0; i < availableNamespaces.length; i += BATCH_SIZE) { + const batch = availableNamespaces.slice(i, i + BATCH_SIZE); + DockerModelRunnerLLM.slog( + `Fetching tags for batch ${Math.floor(i / BATCH_SIZE) + 1} of ${Math.ceil(availableNamespaces.length / BATCH_SIZE)}` + ); + + await Promise.all( + batch.map(async (namespace) => { + const [organization, model] = namespace.split("/"); + const namespaceUrl = new URL( + "https://hub.docker.com/v2/namespaces/ai/repositories/" + + model + + "/tags" + ); + + DockerModelRunnerLLM.slog( + `Fetching tags for ${namespaceUrl.toString()}` + ); + await fetch(namespaceUrl.toString()) + .then((res) => res.json()) + .then((data) => { + const tags = data.results.map((result) => { + return { + id: `${organization}/${model}:${result.name}`, + name: `${model}:${result.name}`, + size: humanFileSize(result.full_size), + organization: model, + }; + }); + availableRemoteModels[model] = tags; + }) + .catch((e) => { + DockerModelRunnerLLM.slog( + `Error fetching tags for ${namespaceUrl.toString()}`, + e + ); + }); + }) + ); + } + + if (Object.keys(availableRemoteModels).length === 0) { + DockerModelRunnerLLM.slog( + `No remote models found - API may be down or not available` + ); + return {}; + } + + if (!fs.existsSync(DockerModelRunnerLLM.cacheFolder)) + fs.mkdirSync(DockerModelRunnerLLM.cacheFolder, { recursive: true }); + fs.writeFileSync(cachePath, JSON.stringify(availableRemoteModels), { + encoding: "utf8", + }); + fs.writeFileSync(cachedAtPath, String(Number(new Date())), { + encoding: "utf8", + }); + return availableRemoteModels; +} + +/** + * This function will fetch the remote models from the Docker Hub as well + * as the local models installed on the system. + * @param {string} basePath - The base path of the Docker Model Runner endpoint. + */ +async function getDockerModels(basePath = null) { + let availableModels = {}; + /** @type {Array} */ + let installedModels = {}; + + try { + // Grab the locally installed models from the Docker Model Runner API + const dmrUrl = new URL( + parseDockerModelRunnerEndpoint( + basePath ?? process.env.DOCKER_MODEL_RUNNER_BASE_PATH, + "dmr" + ) + ); + dmrUrl.pathname = "/models"; + + await fetch(dmrUrl.toString()) + .then((res) => res.json()) + .then((data) => { + data?.map((model) => { + const id = model.tags.at(0); + // eg: ai/qwen3:latest -> qwen3 + const tag = + id?.split("/").pop()?.split(":")?.at(1) ?? + id?.split(":").at(1) ?? + "latest"; + const organization = id?.split("/").pop()?.split(":")?.at(0) ?? id; + installedModels[id] = { + id: id, + name: `${organization}:${tag}`, + size: model.config?.size ?? "Unknown size", + organization: organization, + }; + }); + }); + + // Now hit the Docker Hub API to get the remote model namespace and root tags + const remoteModels = await fetchRemoteModels(); + for (const [modelName, tags] of Object.entries(remoteModels)) { + availableModels[modelName] = { tags: [] }; + for (const tag of tags) { + if (!installedModels[tag.id]) + availableModels[modelName].tags.push({ ...tag, downloaded: false }); + else availableModels[modelName].tags.push({ ...tag, downloaded: true }); + } + } + } catch (e) { + DockerModelRunnerLLM.slog(`Error getting Docker models`, e); + } finally { + return Object.values(availableModels).flatMap((m) => m.tags); + } +} + +module.exports = { + DockerModelRunnerLLM, + parseDockerModelRunnerEndpoint, + getDockerModels, +}; diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js index cc15c123..0a2f6f45 100644 --- a/server/utils/agents/aibitat/index.js +++ b/server/utils/agents/aibitat/index.js @@ -992,6 +992,8 @@ ${this.getHistory({ to: route.to }) return new Providers.GiteeAIProvider({ model: config.model }); case "cohere": return new Providers.CohereProvider({ model: config.model }); + case "docker-model-runner": + return new Providers.DockerModelRunnerProvider({ model: config.model }); default: throw new Error( `Unknown provider: ${config.provider}. Please use a valid provider.` diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js index 3752b161..9d00c9b3 100644 --- a/server/utils/agents/aibitat/providers/ai-provider.js +++ b/server/utils/agents/aibitat/providers/ai-provider.js @@ -18,6 +18,9 @@ const { ChatOllama } = require("@langchain/community/chat_models/ollama"); const { toValidNumber, safeJsonParse } = require("../../../http"); const { getLLMProviderClass } = require("../../../helpers"); const { parseLMStudioBasePath } = require("../../../AiProviders/lmStudio"); +const { + parseDockerModelRunnerEndpoint, +} = require("../../../AiProviders/dockerModelRunner"); const { parseFoundryBasePath } = require("../../../AiProviders/foundry"); const { SystemPromptVariables, @@ -313,6 +316,16 @@ class Provider { ...config, }); } + case "docker-model-runner": + return new ChatOpenAI({ + configuration: { + baseURL: parseDockerModelRunnerEndpoint( + process.env.DOCKER_MODEL_RUNNER_BASE_PATH + ), + }, + apiKey: null, + ...config, + }); default: throw new Error(`Unsupported provider ${provider} for this task.`); } diff --git a/server/utils/agents/aibitat/providers/dockerModelRunner.js b/server/utils/agents/aibitat/providers/dockerModelRunner.js new file mode 100644 index 00000000..e00f8bc5 --- /dev/null +++ b/server/utils/agents/aibitat/providers/dockerModelRunner.js @@ -0,0 +1,101 @@ +const OpenAI = require("openai"); +const Provider = require("./ai-provider.js"); +const InheritMultiple = require("./helpers/classes.js"); +const UnTooled = require("./helpers/untooled.js"); +const { + parseDockerModelRunnerEndpoint, +} = require("../../../AiProviders/dockerModelRunner/index.js"); + +/** + * The agent provider for the Docker Model Runner. + */ +class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) { + model; + + /** + * + * @param {{model?: string}} config + */ + constructor(config = {}) { + super(); + const model = + config?.model || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF || null; + const client = new OpenAI({ + baseURL: parseDockerModelRunnerEndpoint( + process.env.DOCKER_MODEL_RUNNER_BASE_PATH + ), + apiKey: null, + maxRetries: 3, + }); + + this._client = client; + this.model = model; + this.verbose = true; + } + + get client() { + return this._client; + } + + get supportsAgentStreaming() { + return true; + } + + async #handleFunctionCallChat({ messages = [] }) { + return await this.client.chat.completions + .create({ + model: this.model, + messages, + }) + .then((result) => { + if (!result.hasOwnProperty("choices")) + throw new Error("Docker Model Runner chat: No results!"); + if (result.choices.length === 0) + throw new Error("Docker Model Runner chat: No results length!"); + return result.choices[0].message.content; + }) + .catch((_) => { + return null; + }); + } + + async #handleFunctionCallStream({ messages = [] }) { + return await this.client.chat.completions.create({ + model: this.model, + stream: true, + messages, + }); + } + + async stream(messages, functions = [], eventHandler = null) { + return await UnTooled.prototype.stream.call( + this, + messages, + functions, + this.#handleFunctionCallStream.bind(this), + eventHandler + ); + } + + async complete(messages, functions = []) { + return await UnTooled.prototype.complete.call( + this, + messages, + functions, + this.#handleFunctionCallChat.bind(this) + ); + } + + /** + * Get the cost of the completion. + * + * @param _usage The completion to get the cost for. + * @returns The cost of the completion. + * Stubbed since Docker Model Runner has no cost basis. + */ + getCost(_usage) { + return 0; + } +} + +module.exports = DockerModelRunnerProvider; diff --git a/server/utils/agents/aibitat/providers/index.js b/server/utils/agents/aibitat/providers/index.js index e4a11995..c53c01c3 100644 --- a/server/utils/agents/aibitat/providers/index.js +++ b/server/utils/agents/aibitat/providers/index.js @@ -29,6 +29,7 @@ const CometApiProvider = require("./cometapi.js"); const FoundryProvider = require("./foundry.js"); const GiteeAIProvider = require("./giteeai.js"); const CohereProvider = require("./cohere.js"); +const DockerModelRunnerProvider = require("./dockerModelRunner.js"); module.exports = { OpenAIProvider, @@ -62,4 +63,5 @@ module.exports = { FoundryProvider, GiteeAIProvider, CohereProvider, + DockerModelRunnerProvider, }; diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js index 038496cc..24c496b6 100644 --- a/server/utils/agents/index.js +++ b/server/utils/agents/index.js @@ -217,6 +217,12 @@ class AgentHandler { if (!process.env.COHERE_API_KEY) throw new Error("Cohere API key must be provided to use agents."); break; + case "docker-model-runner": + if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH) + throw new Error( + "Docker Model Runner base path must be provided to use agents." + ); + break; default: throw new Error( "No workspace agent provider set. Please set your agent provider in the workspace's settings" @@ -297,6 +303,8 @@ class AgentHandler { return process.env.GITEE_AI_MODEL_PREF ?? null; case "cohere": return process.env.COHERE_MODEL_PREF ?? "command-r-08-2024"; + case "docker-model-runner": + return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF ?? null; default: return null; } diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index e7e094a1..649d2e56 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -13,6 +13,7 @@ const { fetchPPIOModels } = require("../AiProviders/ppio"); const { GeminiLLM } = require("../AiProviders/gemini"); const { fetchCometApiModels } = require("../AiProviders/cometapi"); const { parseFoundryBasePath } = require("../AiProviders/foundry"); +const { getDockerModels } = require("../AiProviders/dockerModelRunner"); const SUPPORT_CUSTOM_MODELS = [ "openai", @@ -43,6 +44,7 @@ const SUPPORT_CUSTOM_MODELS = [ "cohere", "zai", "giteeai", + "docker-model-runner", // Embedding Engines "native-embedder", "cohere-embedder", @@ -116,6 +118,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { return await getOpenRouterEmbeddingModels(); case "giteeai": return await getGiteeAIModels(apiKey); + case "docker-model-runner": + return await getDockerModelRunnerModels(basePath); default: return { models: [], error: "Invalid provider for custom models" }; } @@ -864,6 +868,19 @@ async function getOpenRouterEmbeddingModels() { return { models, error: null }; } +async function getDockerModelRunnerModels(basePath = null) { + try { + const models = await getDockerModels(basePath); + return { models, error: null }; + } catch (e) { + console.error(`DockerModelRunner:getDockerModelRunnerModels`, e.message); + return { + models: [], + error: "Could not fetch Docker Model Runner Models", + }; + } +} + module.exports = { getCustomModels, SUPPORT_CUSTOM_MODELS, diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index 9b352015..d508f7ee 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -229,6 +229,11 @@ function getLLMProvider({ provider = null, model = null } = {}) { case "giteeai": const { GiteeAILLM } = require("../AiProviders/giteeai"); return new GiteeAILLM(embedder, model); + case "docker-model-runner": + const { + DockerModelRunnerLLM, + } = require("../AiProviders/dockerModelRunner"); + return new DockerModelRunnerLLM(embedder, model); default: throw new Error( `ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}` @@ -394,6 +399,11 @@ function getLLMProviderClass({ provider = null } = {}) { case "giteeai": const { GiteeAILLM } = require("../AiProviders/giteeai"); return GiteeAILLM; + case "docker-model-runner": + const { + DockerModelRunnerLLM, + } = require("../AiProviders/dockerModelRunner"); + return DockerModelRunnerLLM; default: return null; } @@ -470,6 +480,8 @@ function getBaseLLMProviderModel({ provider = null } = {}) { return process.env.ZAI_MODEL_PREF; case "giteeai": return process.env.GITEE_AI_MODEL_PREF; + case "docker-model-runner": + return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF; default: return null; } @@ -495,6 +507,30 @@ function toChunks(arr, size) { ); } +function humanFileSize(bytes, si = false, dp = 1) { + const thresh = si ? 1000 : 1024; + + if (Math.abs(bytes) < thresh) { + return bytes + " B"; + } + + const units = si + ? ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"] + : ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"]; + let u = -1; + const r = 10 ** dp; + + do { + bytes /= thresh; + ++u; + } while ( + Math.round(Math.abs(bytes) * r) / r >= thresh && + u < units.length - 1 + ); + + return bytes.toFixed(dp) + " " + units[u]; +} + module.exports = { getEmbeddingEngineSelection, maximumChunkLength, @@ -503,4 +539,5 @@ module.exports = { getBaseLLMProviderModel, getLLMProvider, toChunks, + humanFileSize, }; diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 6603d9a7..256bba37 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -789,6 +789,20 @@ const KEY_MAPPING = { envKey: "GITEE_AI_MODEL_TOKEN_LIMIT", checks: [nonZero], }, + + // Docker Model Runner Options + DockerModelRunnerBasePath: { + envKey: "DOCKER_MODEL_RUNNER_BASE_PATH", + checks: [isValidURL], + }, + DockerModelRunnerModelPref: { + envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_PREF", + checks: [isNotEmpty], + }, + DockerModelRunnerModelTokenLimit: { + envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT", + checks: [nonZero], + }, }; function isNotEmpty(input = "") { @@ -902,6 +916,7 @@ function supportedLLM(input = "") { "foundry", "zai", "giteeai", + "docker-model-runner", ].includes(input); return validSelection ? null : `${input} is not a valid LLM provider.`; }