Merlyn: initial containerization setup

- Modified Dockerfile: amd64 only, merlyn user (UID 1117), python3/pip, simplified build
- Added _startup.sh: root-level startup, hands off to startup.sh as merlyn user
- Added startup.sh: yarn installs, frontend build, Prisma migrations, start server+collector
- Added server/requirements.txt: Python dependencies for merlyn-server
- Fixed collector/yarn.lock: epub2 git URL SSH->HTTPS with commit hash
- Fixed server/yarn.lock: zod version bumped to 3.25.76 to resolve zod-to-json-schema compatibility
This commit is contained in:
PQ32 Developer 2026-05-10 13:49:11 -07:00
parent 21ce030871
commit c9c1fea95d
5 changed files with 84 additions and 139 deletions

View File

@ -2,80 +2,8 @@
FROM ubuntu:noble-20251013 AS base
# Build arguments
ARG ARG_UID=1000
ARG ARG_GID=1000
FROM base AS build-arm64
RUN echo "Preparing build of AnythingLLM image for arm64 architecture"
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
# Install system dependencies
# hadolint ignore=DL3008,DL3013
RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -yq --no-install-recommends \
unzip curl gnupg libgfortran5 libgbm1 tzdata netcat-openbsd \
libasound2t64 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 \
libgcc1 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libx11-6 libx11-xcb1 libxcb1 \
libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 \
libxss1 libxtst6 ca-certificates fonts-liberation libappindicator3-1 libnss3 lsb-release \
xdg-utils git build-essential ffmpeg && \
mkdir -p /etc/apt/keyrings && \
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_18.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
apt-get update && \
# Install node and yarn
apt-get install -yq --no-install-recommends nodejs && \
curl -LO https://github.com/yarnpkg/yarn/releases/download/v1.22.19/yarn_1.22.19_all.deb \
&& dpkg -i yarn_1.22.19_all.deb \
&& rm yarn_1.22.19_all.deb && \
# Install uvx (pinned to 0.6.10) for MCP support
curl -LsSf https://astral.sh/uv/0.6.10/install.sh | sh && \
mv /root/.local/bin/uv /usr/local/bin/uv && \
mv /root/.local/bin/uvx /usr/local/bin/uvx && \
echo "Installed uvx! $(uv --version)" && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a group and user with specific UID and GID
# First, remove any existing user/group with the target UID/GID to avoid conflicts
RUN (getent passwd "$ARG_UID" && userdel -f "$(getent passwd "$ARG_UID" | cut -d: -f1)") || true && \
(getent group "$ARG_GID" && groupdel "$(getent group "$ARG_GID" | cut -d: -f1)") || true && \
groupadd -g "$ARG_GID" anythingllm && \
useradd -l -u "$ARG_UID" -m -d /app -s /bin/bash -g anythingllm anythingllm && \
mkdir -p /app/frontend/ /app/server/ /app/collector/ && chown -R anythingllm:anythingllm /app
# Copy docker helper scripts
COPY ./docker/docker-entrypoint.sh /usr/local/bin/
COPY ./docker/docker-healthcheck.sh /usr/local/bin/
COPY --chown=anythingllm:anythingllm ./docker/.env.example /app/server/.env
# Ensure the scripts are executable
RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \
chmod +x /usr/local/bin/docker-healthcheck.sh
USER anythingllm
WORKDIR /app
# Puppeteer does not ship with an ARM86 compatible build for Chromium
# so web-scraping would be broken in arm docker containers unless we patch it
# by manually installing a compatible chromedriver.
RUN echo "Need to patch Puppeteer x Chromium support for ARM86 - installing dep!" && \
curl -fSL https://webassets.anythingllm.com/chromium-1088-linux-arm64.zip -o chrome-linux.zip && \
unzip chrome-linux.zip && \
rm -rf chrome-linux.zip
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
ENV CHROME_PATH=/app/chrome-linux/chrome
ENV PUPPETEER_EXECUTABLE_PATH=/app/chrome-linux/chrome
RUN echo "Done running arm64 specific installation steps"
#############################################
# amd64-specific stage
FROM base AS build-amd64
RUN echo "Preparing build of AnythingLLM image for non-ARM architecture"
ARG ARG_UID=1117
ARG ARG_GID=1117
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
@ -88,7 +16,8 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
libgcc1 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libx11-6 libx11-xcb1 libxcb1 \
libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 \
libxss1 libxtst6 ca-certificates fonts-liberation libappindicator3-1 libnss3 lsb-release \
xdg-utils git build-essential ffmpeg && \
xdg-utils git build-essential ffmpeg \
python3 python3-pip && \
mkdir -p /etc/apt/keyrings && \
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_18.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
@ -106,77 +35,34 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create a group and user with specific UID and GID
# First, remove any existing user/group with the target UID/GID to avoid conflicts
# Create merlyn group and user
RUN (getent passwd "$ARG_UID" && userdel -f "$(getent passwd "$ARG_UID" | cut -d: -f1)") || true && \
(getent group "$ARG_GID" && groupdel "$(getent group "$ARG_GID" | cut -d: -f1)") || true && \
groupadd -g "$ARG_GID" anythingllm && \
useradd -l -u "$ARG_UID" -m -d /app -s /bin/bash -g anythingllm anythingllm && \
mkdir -p /app/frontend/ /app/server/ /app/collector/ && chown -R anythingllm:anythingllm /app
groupadd -g "$ARG_GID" merlyn && \
useradd -l -u "$ARG_UID" -m -d /app -s /bin/bash -g merlyn merlyn && \
mkdir -p /app/frontend/ /app/server/ /app/collector/ && chown -R merlyn:merlyn /app
# Copy docker helper scripts
COPY ./docker/docker-entrypoint.sh /usr/local/bin/
# Copy helper scripts
COPY ./docker/_startup.sh /usr/local/bin/
COPY ./docker/startup.sh /usr/local/bin/
COPY ./docker/docker-healthcheck.sh /usr/local/bin/
COPY --chown=anythingllm:anythingllm ./docker/.env.example /app/server/.env
COPY --chown=merlyn:merlyn ./docker/.env.example /app/server/.env
# Ensure the scripts are executable
RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \
# Ensure scripts are executable
RUN chmod +x /usr/local/bin/_startup.sh && \
chmod +x /usr/local/bin/startup.sh && \
chmod +x /usr/local/bin/docker-healthcheck.sh
#############################################
# COMMON BUILD FLOW FOR ALL ARCHS
#############################################
# hadolint ignore=DL3006
FROM build-${TARGETARCH} AS build
RUN echo "Running common build flow of AnythingLLM image for all architectures"
USER anythingllm
WORKDIR /app
# Install & Build frontend layer
# Use BUILDPLATFORM to run on the native host architecture (not emulated).
# This avoids esbuild crashing under QEMU when cross-compiling.
# The output (static HTML/CSS/JS) is platform-independent.
FROM --platform=$BUILDPLATFORM node:18-slim AS frontend-build
WORKDIR /app/frontend
COPY ./frontend/package.json ./frontend/yarn.lock ./
RUN yarn install --network-timeout 100000 && yarn cache clean
COPY ./frontend/ ./
RUN yarn build
WORKDIR /app
# Install server layer
# Also pull and build collector deps (chromium issues prevent bad bindings)
FROM build AS backend-build
COPY --chown=anythingllm:anythingllm ./server /app/server/
WORKDIR /app/server
RUN yarn install --production --network-timeout 100000 && yarn cache clean
WORKDIR /app
# Install collector dependencies
COPY --chown=anythingllm:anythingllm ./collector/ ./collector/
WORKDIR /app/collector
ENV PUPPETEER_DOWNLOAD_BASE_URL=https://storage.googleapis.com/chrome-for-testing-public
RUN yarn install --production --network-timeout 100000 && yarn cache clean
WORKDIR /app
USER anythingllm
# Since we are building from backend-build we just need to move built frontend into server/public
FROM backend-build AS production-build
WORKDIR /app
COPY --chown=anythingllm:anythingllm --from=frontend-build /app/frontend/dist /app/server/public
# Setup the environment
# Environment
ENV NODE_ENV=production
ENV ANYTHING_LLM_RUNTIME=docker
ENV DEPLOYMENT_VERSION=1.12.1
ENV DEPLOYMENT_VERSION=merlyn-1.12.1
# Setup the healthcheck
# Healthcheck
HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \
CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1
# Run the server
# CMD ["sh", "-c", "tail -f /dev/null"] # For development: keep container open
ENTRYPOINT ["/bin/bash", "/usr/local/bin/docker-entrypoint.sh"]
USER root
WORKDIR /app
ENTRYPOINT ["/bin/bash", "/usr/local/bin/_startup.sh"]

11
docker/_startup.sh Normal file
View File

@ -0,0 +1,11 @@
#!/bin/bash
# _startup.sh - runs as root
# Install Python dependencies if requirements.txt exists and is non-empty
REQUIREMENTS=/app/server/requirements.txt
if [ -s "$REQUIREMENTS" ]; then
pip install --break-system-packages -r "$REQUIREMENTS"
fi
# Hand off to startup.sh as merlyn user
exec runuser -u merlyn -- /bin/bash /app/docker/startup.sh

48
docker/startup.sh Normal file
View File

@ -0,0 +1,48 @@
#!/bin/bash
# startup.sh - runs as merlyn user
if [ -z "$STORAGE_DIR" ]; then
echo "================================================================"
echo "⚠️ WARNING: STORAGE_DIR environment variable is not set!"
echo "================================================================"
fi
umask 002
# Force git to use HTTPS instead of SSH
git config --global url."https://".insteadOf ssh://
git config --global url."https://github.com/".insteadOf git@github.com:
# Install server dependencies
echo "Installing server dependencies..."
cd /app/server && yarn install --production --frozen-lockfile --no-cache --network-timeout 100000
# Install collector dependencies
echo "Installing collector dependencies..."
cd /app/collector && yarn install --production --frozen-lockfile --no-cache --network-timeout 100000
# NODE_ENV must be set to development for frontend install
# otherwise yarn skips devDependencies (including vite)
# NODE_ENV=production is set in the Dockerfile for the server
# Build frontend if public folder doesn't exist
if [ ! -d "/app/server/public" ]; then
echo "Building frontend..."
cd /app/frontend
NODE_ENV=development yarn install --frozen-lockfile --no-cache --network-timeout 100000
yarn build
cp -r /app/frontend/dist /app/server/public
fi
# Run Prisma migrations and start AnythingLLM
{
cd /app/server/ &&
export CHECKPOINT_DISABLE=1 &&
npx prisma generate --schema=./prisma/schema.prisma &&
npx prisma migrate deploy --schema=./prisma/schema.prisma &&
node /app/server/index.js
} &
{ node /app/collector/index.js; } &
wait -n
exit $?

0
server/requirements.txt Normal file
View File

View File

@ -10692,9 +10692,9 @@ zod-to-json-schema@^3.25.0:
integrity sha512-HvWtU2UG41LALjajJrML6uQejQhNJx+JBO9IflpSja4R03iNWfKXrj6W2h7ljuLyc1nKS+9yDyL/9tD1U/yBnQ==
zod@^3.22.3, zod@^3.22.4:
version "3.23.5"
resolved "https://registry.npmjs.org/zod/-/zod-3.23.5.tgz"
integrity sha512-fkwiq0VIQTksNNA131rDOsVJcns0pfVUjHzLrNBiF/O/Xxb5lQyEXkhZWcJ7npWsYlvs+h0jFWXXy4X46Em1JA==
version "3.25.76"
resolved "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz"
integrity sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==
"zod@^3.25 || ^4.0":
version "4.1.13"