Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/actions/load-go-version/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: Load Go version
description: Read GO_VERSION from .versions and expose it as an output

outputs:
go-version:
value: ${{ steps.load.outputs.GO_VERSION }}

runs:
using: composite
steps:
- name: Load GO version
id: load
shell: bash
run: grep '^GO_VERSION=' .versions | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//' >> "$GITHUB_OUTPUT"
15 changes: 13 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@ jobs:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd

- name: Load GO version
id: versions
uses: ./.github/actions/load-go-version

- name: Set up Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
with:
go-version: 1.25.8
go-version: ${{ steps.versions.outputs.go-version }}
cache: true

- name: Install golangci-lint
Expand All @@ -42,10 +46,17 @@ jobs:
- name: Verify vendor/ is not present
run: stat vendor && exit 1 || exit 0

- name: Load GO version
id: versions
uses: ./.github/actions/load-go-version

- name: Validate .versions against Dockerfile ARGs
run: make validate-versions

- name: Set up Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
with:
go-version: 1.25.8
go-version: ${{ steps.versions.outputs.go-version }}
cache: true

- name: Check go mod tidy
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,14 @@ jobs:
with:
submodules: recursive

- name: Load GO version
id: versions
uses: ./.github/actions/load-go-version

- name: Set up Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
with:
go-version: 1.25.8
go-version: ${{ steps.versions.outputs.go-version }}
cache: true

- name: Set up Docker
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,14 @@ jobs:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd

- name: Load GO version
id: versions
uses: ./.github/actions/load-go-version

- name: Set up Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
with:
go-version: 1.25.8
go-version: ${{ steps.versions.outputs.go-version }}
cache: true

- name: Set up Docker Buildx
Expand Down
31 changes: 24 additions & 7 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,14 @@ on:
description: "llama-server version"
required: false
type: string
default: "latest"
vllmVersion:
description: "vLLM version"
required: false
type: string
default: "0.17.0"
sglangVersion:
description: "SGLang version"
required: false
type: string
default: "0.4.0"
# This can be removed once we have llama.cpp built for MUSA and CANN.
buildMusaCann:
description: "Build MUSA and CANN images"
Expand Down Expand Up @@ -204,10 +201,14 @@ jobs:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd

- name: Load GO version
id: versions
uses: ./.github/actions/load-go-version

- name: Set up Go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
with:
go-version: 1.25.8
go-version: ${{ steps.versions.outputs.go-version }}
cache: true

- name: Run tests
Expand All @@ -223,9 +224,9 @@ jobs:
contents: read
env:
RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }}
LLAMA_SERVER_VERSION: ${{ inputs.llamaServerVersion || 'latest' }}
LLAMA_SERVER_VERSION: ${{ inputs.llamaServerVersion }}
VLLM_VERSION: ${{ inputs.vllmVersion }}
SGLANG_VERSION: ${{ inputs.sglangVersion || '0.4.0' }}
SGLANG_VERSION: ${{ inputs.sglangVersion }}
BUILD_MUSA_CANN: ${{ inputs.buildMusaCann || 'false' }}
steps:
- name: Checkout repo
Expand Down Expand Up @@ -263,6 +264,18 @@ jobs:
echo "docker/model-runner:$RELEASE_TAG-cann" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:latest-cann" >> "$GITHUB_OUTPUT"
echo 'EOF' >> "$GITHUB_OUTPUT"

- name: Load versions
shell: bash
run: |
VERSIONS_LLAMA=$(grep '^LLAMA_SERVER_VERSION=' .versions | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//')
VERSIONS_VLLM=$(grep '^VLLM_VERSION=' .versions | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//')
VERSIONS_SGLANG=$(grep '^SGLANG_VERSION=' .versions | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//')

# Use input value if set, otherwise fallback to .versions
echo "LLAMA_SERVER_VERSION=${LLAMA_SERVER_VERSION:-$VERSIONS_LLAMA}" >> "$GITHUB_ENV"
echo "VLLM_VERSION=${VLLM_VERSION:-$VERSIONS_VLLM}" >> "$GITHUB_ENV"
echo "SGLANG_VERSION=${SGLANG_VERSION:-$VERSIONS_SGLANG}" >> "$GITHUB_ENV"

- name: Log in to DockerHub
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2
Expand Down Expand Up @@ -535,11 +548,15 @@ jobs:
token: ${{ secrets.CLI_RELEASE_PAT }}
fetch-depth: 0

- name: Load GO version
id: versions
uses: ./.github/actions/load-go-version

- name: Set up Go
if: steps.check-docs.outputs.changed == 'true'
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
with:
go-version: 1.25.8
go-version: ${{ steps.versions.outputs.go-version }}
cache: true

- name: Vendor model-runner CLI docs
Expand Down
8 changes: 8 additions & 0 deletions .versions
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
GO_VERSION=1.25.8
VLLM_VERSION=0.17.0
VLLM_UPSTREAM_VERSION=0.17.1
VLLM_METAL_RELEASE=v0.1.0-20260320-122309
DIFFUSERS_RELEASE=v0.1.0-20260216-000000
SGLANG_VERSION=0.5.6
LLAMA_SERVER_VERSION=latest
BASE_IMAGE=ubuntu:26.04
34 changes: 26 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
# Project variables
include .versions

APP_NAME := model-runner
GO_VERSION := 1.25.8
LLAMA_SERVER_VERSION := latest
LLAMA_SERVER_VARIANT := cpu
BASE_IMAGE := ubuntu:26.04
VLLM_BASE_IMAGE := nvidia/cuda:13.0.2-runtime-ubuntu24.04
VLLM_VERSION ?= 0.17.0
DOCKER_IMAGE := docker/model-runner:latest
DOCKER_IMAGE_VLLM := docker/model-runner:latest-vllm-cuda
DOCKER_IMAGE_SGLANG := docker/model-runner:latest-sglang
Expand All @@ -15,16 +13,18 @@ LLAMA_ARGS ?=
DOCKER_BUILD_ARGS := \
--load \
--platform linux/$(shell docker version --format '{{.Server.Arch}}') \
--build-arg GO_VERSION=$(GO_VERSION) \
--build-arg LLAMA_SERVER_VERSION=$(LLAMA_SERVER_VERSION) \
--build-arg LLAMA_SERVER_VARIANT=$(LLAMA_SERVER_VARIANT) \
--build-arg SGLANG_VERSION=$(SGLANG_VERSION) \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg VLLM_VERSION='$(VLLM_VERSION)' \
--target $(DOCKER_TARGET) \
-t $(DOCKER_IMAGE)

# Phony targets grouped by category
.PHONY: build build-cli build-dmr build-llamacpp install-cli run clean test integration-tests e2e
.PHONY: validate validate-all lint help
.PHONY: validate validate-versions validate-all lint help
.PHONY: docker-build docker-build-multiplatform docker-run docker-run-impl
.PHONY: docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang
.PHONY: test-docker-ce-installation
Expand Down Expand Up @@ -107,6 +107,23 @@ validate:
find . -type f -name "*.sh" | grep -v "pkg/go-containerregistry\|llamacpp/native/vendor" | xargs shellcheck
@echo "✓ Shellcheck validation passed!"

validate-versions:
@errors=0; \
while IFS='=' read -r key value || [ -n "$$key" ]; do \
case "$$key" in ''|\#*) continue ;; esac; \
value=$$(echo "$$value" | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$$//'); \
dockerfile_val=$$(grep -m1 "^ARG $${key}=" Dockerfile | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$$//'); \
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it expected for the script to skip keys where no ARG X= exists in Dockerfile?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the current behavior check that every key in .versions and also appears as an ARG in the Dockerfile has the same value, while keys that only live in .versions (e.g. VLLM_METAL_RELEASE) are silently skipped.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth having the values provided in the Dockerfile as well and not rely on the Makefile to always supply it?

ARG BASE_IMAGE

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes , I think it is worth and you already mentioned that here in this comment :

One concern: Dockerfile ARG defaults can't be removed entirely. The Dockerfile needs to work standalone (docker build . without Make), for example in the inference-engine repo's CI which builds the image directly. Removing ARG defaults would break that. Instead, keep the ARG defaults in the Dockerfile but always pass --build-arg from Make/CI so the .versions values take precedence. The Dockerfile defaults become the fallback, not the source of truth.

[ -z "$$dockerfile_val" ] && continue; \
if [ "$$value" != "$$dockerfile_val" ]; then \
echo "MISMATCH: $$key — .versions=$$value Dockerfile=$$dockerfile_val"; \
errors=$$((errors + 1)); \
else \
echo "OK: $$key=$$value"; \
fi; \
done < .versions; \
[ $$errors -eq 0 ] || exit 1
@echo "✓ .versions is in sync with Dockerfile ARGs"

lint:
@echo "Running golangci-lint..."
golangci-lint run ./...
Expand All @@ -129,6 +146,9 @@ validate-all:
@echo "==> Running shellcheck validation..."
@$(MAKE) validate
@echo ""
@echo "==> Validating .versions against Dockerfile ARGs..."
@$(MAKE) validate-versions
@echo ""
@echo "==> All validations passed! ✅"

# Build Docker image
Expand Down Expand Up @@ -184,7 +204,6 @@ docker-run-impl:

# vllm-metal (macOS ARM64 only)
# The tarball is self-contained: includes a standalone Python 3.12 + all packages.
VLLM_METAL_RELEASE ?= v0.1.0-20260320-122309
VLLM_METAL_INSTALL_DIR := $(HOME)/.docker/model-runner/vllm-metal
VLLM_METAL_TARBALL := vllm-metal-macos-arm64-$(VLLM_METAL_RELEASE).tar.gz

Expand Down Expand Up @@ -237,7 +256,7 @@ vllm-metal-dev:
rm -rf "$(VLLM_METAL_INSTALL_DIR)"; \
$$PYTHON_BIN -m venv "$(VLLM_METAL_INSTALL_DIR)"; \
. "$(VLLM_METAL_INSTALL_DIR)/bin/activate" && \
VLLM_UPSTREAM_VERSION="0.17.1" && \
VLLM_UPSTREAM_VERSION=$(VLLM_UPSTREAM_VERSION) && \
WORK_DIR=$$(mktemp -d) && \
curl -fsSL -o "$$WORK_DIR/vllm.tar.gz" "https://github.com/vllm-project/vllm/releases/download/v$$VLLM_UPSTREAM_VERSION/vllm-$$VLLM_UPSTREAM_VERSION.tar.gz" && \
tar -xzf "$$WORK_DIR/vllm.tar.gz" -C "$$WORK_DIR" && \
Expand All @@ -257,7 +276,6 @@ vllm-metal-clean:

# diffusers (macOS ARM64 and Linux)
# The tarball is self-contained: includes a standalone Python 3.12 + all packages.
DIFFUSERS_RELEASE ?= v0.1.0-20260216-000000
DIFFUSERS_INSTALL_DIR := $(HOME)/.docker/model-runner/diffusers
DIFFUSERS_OS := $(shell uname -s | tr '[:upper:]' '[:lower:]')
DIFFUSERS_ARCH := $(shell uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/')
Expand Down
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ MODEL_RUNNER_HOST=http://localhost:13434 ./model-cli list
## Using the Makefile

This project includes a Makefile to simplify common development tasks. Docker targets require Docker Desktop >= 4.41.0.

Run `make help` for a full list, but the key targets are:

- `build` - Build the Go application
Expand Down Expand Up @@ -194,6 +195,8 @@ This will:
- Start the service on port 8080 (or the specified port)
- All models downloaded will be stored in the host's `models` directory and will persist between container runs

> NOTE: The [`.versions`](.versions) file is the single source of truth for all version variables (Go, vLLM, SGLang, llama-server, etc.).

### llama.cpp integration

The Docker image includes the llama.cpp server binary from the `docker/docker-model-backend-llamacpp` image. You can specify the version of the image to use by setting the `LLAMA_SERVER_VERSION` variable. Additionally, you can configure the target OS, architecture, and acceleration type:
Expand Down Expand Up @@ -228,7 +231,7 @@ The Docker image also supports vLLM as an alternative inference backend.
To build a Docker image with vLLM support:

```sh
# Build with default settings (vLLM 0.12.0)
# Build with default settings (vLLM 0.17.0)
make docker-build DOCKER_TARGET=final-vllm BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04 LLAMA_SERVER_VARIANT=cuda

# Build for specific architecture
Expand All @@ -237,15 +240,15 @@ docker buildx build \
--target final-vllm \
--build-arg BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04 \
--build-arg LLAMA_SERVER_VARIANT=cuda \
--build-arg VLLM_VERSION=0.12.0 \
--build-arg VLLM_VERSION=0.17.0 \
-t docker/model-runner:vllm .
```

#### Build Arguments

The vLLM variant supports the following build arguments:

- **VLLM_VERSION**: The vLLM version to install (default: `0.12.0`)
- **VLLM_VERSION**: The vLLM version to install (default: `0.17.0`)
- **VLLM_CUDA_VERSION**: The CUDA version suffix for the wheel (default: `cu130`)
- **VLLM_PYTHON_TAG**: The Python compatibility tag (default: `cp38-abi3`, compatible with Python 3.8+)

Expand Down Expand Up @@ -274,8 +277,8 @@ To update to a new vLLM version:
```sh
docker buildx build \
--target final-vllm \
--build-arg VLLM_VERSION=0.11.1 \
-t docker/model-runner:vllm-0.11.1 .
--build-arg VLLM_VERSION=0.17.0 \
-t docker/model-runner:vllm-0.17.0 .
```

The vLLM wheels are sourced from the official vLLM GitHub Releases at `https://github.com/vllm-project/vllm/releases`, which provides prebuilt wheels for each release version.
Expand Down
3 changes: 2 additions & 1 deletion scripts/build-vllm-metal-tarball.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ WORK_DIR=$(mktemp -d)
# Convert tarball path to absolute before we cd elsewhere
TARBALL="$(cd "$(dirname "$TARBALL_ARG")" && pwd)/$(basename "$TARBALL_ARG")"

VLLM_VERSION="0.17.1"
VLLM_VERSION=$(grep '^VLLM_UPSTREAM_VERSION=' "$(cd "$(dirname "$0")/.." && pwd)/.versions" | cut -d= -f2 | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//')

# Extract wheel version from release tag (e.g., v0.1.0-20260126-121650 -> 0.1.0)
VLLM_METAL_WHEEL_VERSION=$(echo "$VLLM_METAL_RELEASE" | sed 's/^v//' | cut -d'-' -f1)
VLLM_METAL_WHEEL_URL="https://github.com/vllm-project/vllm-metal/releases/download/${VLLM_METAL_RELEASE}/vllm_metal-${VLLM_METAL_WHEEL_VERSION}-cp312-cp312-macosx_11_0_arm64.whl"
Expand Down