From 8743df14c68b6fdddafcbb1fd44347c8aa57df3c Mon Sep 17 00:00:00 2001 From: "https://github.com/alimx07" <149194464+alimx07@users.noreply.github.com> Date: Wed, 1 Apr 2026 21:19:20 +0200 Subject: [PATCH 1/3] centralize version variables into .versions Add a .versions file as the single source of truth for version variables (Go, vLLM, vLLM upstream, SGLang, llama-server, vllm-metal, diffusers, base image), replacing values scattered across the Makefile, Dockerfile, CI workflows, and scripts. --- .github/actions/load-go-version/action.yml | 14 +++++++++ .github/workflows/ci.yml | 15 ++++++++-- .github/workflows/e2e-test.yml | 6 +++- .github/workflows/integration-test.yml | 6 +++- .github/workflows/release.yml | 31 +++++++++++++++----- .versions | 8 +++++ Dockerfile | 2 +- Makefile | 34 +++++++++++++++++----- README.md | 13 +++++---- scripts/build-vllm-metal-tarball.sh | 3 +- 10 files changed, 106 insertions(+), 26 deletions(-) create mode 100644 .github/actions/load-go-version/action.yml create mode 100644 .versions diff --git a/.github/actions/load-go-version/action.yml b/.github/actions/load-go-version/action.yml new file mode 100644 index 00000000..af6646a9 --- /dev/null +++ b/.github/actions/load-go-version/action.yml @@ -0,0 +1,14 @@ +name: Load Go version +description: Read GO_VERSION from .versions and expose it as an output + +outputs: + go-version: + value: ${{ steps.load.outputs.GO_VERSION }} + +runs: + using: composite + steps: + - name: Load GO version + id: load + shell: bash + run: grep '^GO_VERSION=' .versions | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//' >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d5f59c76..26548494 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,10 +16,14 @@ jobs: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd + - name: Load GO version + id: versions + uses: ./.github/actions/load-go-version + - name: Set up Go uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c with: - go-version: 1.25.8 + go-version: ${{ steps.versions.outputs.go-version }} cache: true - name: Install golangci-lint @@ -42,10 +46,17 @@ jobs: - name: Verify vendor/ is not present run: stat vendor && exit 1 || exit 0 + - name: Load GO version + id: versions + uses: ./.github/actions/load-go-version + + - name: Validate .versions against Dockerfile ARGs + run: make validate-versions + - name: Set up Go uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c with: - go-version: 1.25.8 + go-version: ${{ steps.versions.outputs.go-version }} cache: true - name: Check go mod tidy diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 777462c1..c6349b78 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -24,10 +24,14 @@ jobs: with: submodules: recursive + - name: Load GO version + id: versions + uses: ./.github/actions/load-go-version + - name: Set up Go uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c with: - go-version: 1.25.8 + go-version: ${{ steps.versions.outputs.go-version }} cache: true - name: Set up Docker diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 106bff7b..da70b588 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -15,10 +15,14 @@ jobs: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd + - name: Load GO version + id: versions + uses: ./.github/actions/load-go-version + - name: Set up Go uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c with: - go-version: 1.25.8 + go-version: ${{ steps.versions.outputs.go-version }} cache: true - name: Set up Docker Buildx diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1a4f3593..d404847f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,17 +23,14 @@ on: description: "llama-server version" required: false type: string - default: "latest" vllmVersion: description: "vLLM version" required: false type: string - default: "0.17.0" sglangVersion: description: "SGLang version" required: false type: string - default: "0.4.0" # This can be removed once we have llama.cpp built for MUSA and CANN. buildMusaCann: description: "Build MUSA and CANN images" @@ -204,10 +201,14 @@ jobs: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd + - name: Load GO version + id: versions + uses: ./.github/actions/load-go-version + - name: Set up Go uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c with: - go-version: 1.25.8 + go-version: ${{ steps.versions.outputs.go-version }} cache: true - name: Run tests @@ -223,9 +224,9 @@ jobs: contents: read env: RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }} - LLAMA_SERVER_VERSION: ${{ inputs.llamaServerVersion || 'latest' }} + LLAMA_SERVER_VERSION: ${{ inputs.llamaServerVersion }} VLLM_VERSION: ${{ inputs.vllmVersion }} - SGLANG_VERSION: ${{ inputs.sglangVersion || '0.4.0' }} + SGLANG_VERSION: ${{ inputs.sglangVersion }} BUILD_MUSA_CANN: ${{ inputs.buildMusaCann || 'false' }} steps: - name: Checkout repo @@ -263,6 +264,18 @@ jobs: echo "docker/model-runner:$RELEASE_TAG-cann" >> "$GITHUB_OUTPUT" echo "docker/model-runner:latest-cann" >> "$GITHUB_OUTPUT" echo 'EOF' >> "$GITHUB_OUTPUT" + + - name: Load versions + shell: bash + run: | + VERSIONS_LLAMA=$(grep '^LLAMA_SERVER_VERSION=' .versions | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//') + VERSIONS_VLLM=$(grep '^VLLM_VERSION=' .versions | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//') + VERSIONS_SGLANG=$(grep '^SGLANG_VERSION=' .versions | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//') + + # Use input value if set, otherwise fallback to .versions + echo "LLAMA_SERVER_VERSION=${LLAMA_SERVER_VERSION:-$VERSIONS_LLAMA}" >> "$GITHUB_ENV" + echo "VLLM_VERSION=${VLLM_VERSION:-$VERSIONS_VLLM}" >> "$GITHUB_ENV" + echo "SGLANG_VERSION=${SGLANG_VERSION:-$VERSIONS_SGLANG}" >> "$GITHUB_ENV" - name: Log in to DockerHub uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 @@ -535,11 +548,15 @@ jobs: token: ${{ secrets.CLI_RELEASE_PAT }} fetch-depth: 0 + - name: Load GO version + id: versions + uses: ./.github/actions/load-go-version + - name: Set up Go if: steps.check-docs.outputs.changed == 'true' uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c with: - go-version: 1.25.8 + go-version: ${{ steps.versions.outputs.go-version }} cache: true - name: Vendor model-runner CLI docs diff --git a/.versions b/.versions new file mode 100644 index 00000000..68512bd4 --- /dev/null +++ b/.versions @@ -0,0 +1,8 @@ +GO_VERSION=1.25.8 +VLLM_VERSION=0.17.0 +VLLM_UPSTREAM_VERSION=0.17.1 +VLLM_METAL_RELEASE=v0.1.0-20260320-122309 +DIFFUSERS_RELEASE=v0.1.0-20260216-000000 +SGLANG_VERSION=0.5.6 +LLAMA_SERVER_VERSION=latest +BASE_IMAGE=ubuntu:26.04 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index c4f35c57..3ab2151d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1 -ARG GO_VERSION=1.25 +ARG GO_VERSION=1.25.8 ARG LLAMA_SERVER_VERSION=latest ARG LLAMA_SERVER_VARIANT=cpu ARG LLAMA_BINARY_PATH=/com.docker.llama-server.native.linux.${LLAMA_SERVER_VARIANT}.${TARGETARCH} diff --git a/Makefile b/Makefile index 6bd8cf0f..5063ced0 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,9 @@ # Project variables +include .versions + APP_NAME := model-runner -GO_VERSION := 1.25.8 -LLAMA_SERVER_VERSION := latest LLAMA_SERVER_VARIANT := cpu -BASE_IMAGE := ubuntu:26.04 VLLM_BASE_IMAGE := nvidia/cuda:13.0.2-runtime-ubuntu24.04 -VLLM_VERSION ?= 0.17.0 DOCKER_IMAGE := docker/model-runner:latest DOCKER_IMAGE_VLLM := docker/model-runner:latest-vllm-cuda DOCKER_IMAGE_SGLANG := docker/model-runner:latest-sglang @@ -15,8 +13,10 @@ LLAMA_ARGS ?= DOCKER_BUILD_ARGS := \ --load \ --platform linux/$(shell docker version --format '{{.Server.Arch}}') \ + --build-arg GO_VERSION=$(GO_VERSION) \ --build-arg LLAMA_SERVER_VERSION=$(LLAMA_SERVER_VERSION) \ --build-arg LLAMA_SERVER_VARIANT=$(LLAMA_SERVER_VARIANT) \ + --build-arg SGLANG_VERSION=$(SGLANG_VERSION) \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ --build-arg VLLM_VERSION='$(VLLM_VERSION)' \ --target $(DOCKER_TARGET) \ @@ -24,7 +24,7 @@ DOCKER_BUILD_ARGS := \ # Phony targets grouped by category .PHONY: build build-cli build-dmr build-llamacpp install-cli run clean test integration-tests e2e -.PHONY: validate validate-all lint help +.PHONY: validate validate-versions validate-all lint help .PHONY: docker-build docker-build-multiplatform docker-run docker-run-impl .PHONY: docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang .PHONY: test-docker-ce-installation @@ -107,6 +107,23 @@ validate: find . -type f -name "*.sh" | grep -v "pkg/go-containerregistry\|llamacpp/native/vendor" | xargs shellcheck @echo "✓ Shellcheck validation passed!" +validate-versions: + @errors=0; \ + while IFS='=' read -r key value || [ -n "$$key" ]; do \ + case "$$key" in ''|\#*) continue ;; esac; \ + value=$$(echo "$$value" | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$$//'); \ + dockerfile_val=$$(grep -m1 "^ARG $${key}=" Dockerfile | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$$//'); \ + [ -z "$$dockerfile_val" ] && continue; \ + if [ "$$value" != "$$dockerfile_val" ]; then \ + echo "MISMATCH: $$key — .versions=$$value Dockerfile=$$dockerfile_val"; \ + errors=$$((errors + 1)); \ + else \ + echo "OK: $$key=$$value"; \ + fi; \ + done < .versions; \ + [ $$errors -eq 0 ] || exit 1 + @echo "✓ .versions is in sync with Dockerfile ARGs" + lint: @echo "Running golangci-lint..." golangci-lint run ./... @@ -129,6 +146,9 @@ validate-all: @echo "==> Running shellcheck validation..." @$(MAKE) validate @echo "" + @echo "==> Validating .versions against Dockerfile ARGs..." + @$(MAKE) validate-versions + @echo "" @echo "==> All validations passed! ✅" # Build Docker image @@ -184,7 +204,6 @@ docker-run-impl: # vllm-metal (macOS ARM64 only) # The tarball is self-contained: includes a standalone Python 3.12 + all packages. -VLLM_METAL_RELEASE ?= v0.1.0-20260320-122309 VLLM_METAL_INSTALL_DIR := $(HOME)/.docker/model-runner/vllm-metal VLLM_METAL_TARBALL := vllm-metal-macos-arm64-$(VLLM_METAL_RELEASE).tar.gz @@ -237,7 +256,7 @@ vllm-metal-dev: rm -rf "$(VLLM_METAL_INSTALL_DIR)"; \ $$PYTHON_BIN -m venv "$(VLLM_METAL_INSTALL_DIR)"; \ . "$(VLLM_METAL_INSTALL_DIR)/bin/activate" && \ - VLLM_UPSTREAM_VERSION="0.17.1" && \ + VLLM_UPSTREAM_VERSION=$(VLLM_UPSTREAM_VERSION) && \ WORK_DIR=$$(mktemp -d) && \ curl -fsSL -o "$$WORK_DIR/vllm.tar.gz" "https://github.com/vllm-project/vllm/releases/download/v$$VLLM_UPSTREAM_VERSION/vllm-$$VLLM_UPSTREAM_VERSION.tar.gz" && \ tar -xzf "$$WORK_DIR/vllm.tar.gz" -C "$$WORK_DIR" && \ @@ -257,7 +276,6 @@ vllm-metal-clean: # diffusers (macOS ARM64 and Linux) # The tarball is self-contained: includes a standalone Python 3.12 + all packages. -DIFFUSERS_RELEASE ?= v0.1.0-20260216-000000 DIFFUSERS_INSTALL_DIR := $(HOME)/.docker/model-runner/diffusers DIFFUSERS_OS := $(shell uname -s | tr '[:upper:]' '[:lower:]') DIFFUSERS_ARCH := $(shell uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/') diff --git a/README.md b/README.md index 183dfb0f..3e27e0d2 100644 --- a/README.md +++ b/README.md @@ -157,6 +157,7 @@ MODEL_RUNNER_HOST=http://localhost:13434 ./model-cli list ## Using the Makefile This project includes a Makefile to simplify common development tasks. Docker targets require Docker Desktop >= 4.41.0. + Run `make help` for a full list, but the key targets are: - `build` - Build the Go application @@ -194,6 +195,8 @@ This will: - Start the service on port 8080 (or the specified port) - All models downloaded will be stored in the host's `models` directory and will persist between container runs +> NOTE: The [`.versions`](.versions) file is the single source of truth for all version variables (Go, vLLM, SGLang, llama-server, etc.). + ### llama.cpp integration The Docker image includes the llama.cpp server binary from the `docker/docker-model-backend-llamacpp` image. You can specify the version of the image to use by setting the `LLAMA_SERVER_VERSION` variable. Additionally, you can configure the target OS, architecture, and acceleration type: @@ -228,7 +231,7 @@ The Docker image also supports vLLM as an alternative inference backend. To build a Docker image with vLLM support: ```sh -# Build with default settings (vLLM 0.12.0) +# Build with default settings (vLLM 0.17.0) make docker-build DOCKER_TARGET=final-vllm BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04 LLAMA_SERVER_VARIANT=cuda # Build for specific architecture @@ -237,7 +240,7 @@ docker buildx build \ --target final-vllm \ --build-arg BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04 \ --build-arg LLAMA_SERVER_VARIANT=cuda \ - --build-arg VLLM_VERSION=0.12.0 \ + --build-arg VLLM_VERSION=0.17.0 \ -t docker/model-runner:vllm . ``` @@ -245,7 +248,7 @@ docker buildx build \ The vLLM variant supports the following build arguments: -- **VLLM_VERSION**: The vLLM version to install (default: `0.12.0`) +- **VLLM_VERSION**: The vLLM version to install (default: `0.17.0`) - **VLLM_CUDA_VERSION**: The CUDA version suffix for the wheel (default: `cu130`) - **VLLM_PYTHON_TAG**: The Python compatibility tag (default: `cp38-abi3`, compatible with Python 3.8+) @@ -274,8 +277,8 @@ To update to a new vLLM version: ```sh docker buildx build \ --target final-vllm \ - --build-arg VLLM_VERSION=0.11.1 \ - -t docker/model-runner:vllm-0.11.1 . + --build-arg VLLM_VERSION=0.17.0 \ + -t docker/model-runner:vllm-0.17.0 . ``` The vLLM wheels are sourced from the official vLLM GitHub Releases at `https://github.com/vllm-project/vllm/releases`, which provides prebuilt wheels for each release version. diff --git a/scripts/build-vllm-metal-tarball.sh b/scripts/build-vllm-metal-tarball.sh index 6573bbe5..08d27eb1 100755 --- a/scripts/build-vllm-metal-tarball.sh +++ b/scripts/build-vllm-metal-tarball.sh @@ -20,7 +20,8 @@ WORK_DIR=$(mktemp -d) # Convert tarball path to absolute before we cd elsewhere TARBALL="$(cd "$(dirname "$TARBALL_ARG")" && pwd)/$(basename "$TARBALL_ARG")" -VLLM_VERSION="0.17.1" +VLLM_VERSION=$(grep '^VLLM_UPSTREAM_VERSION=' "$(cd "$(dirname "$0")/.." && pwd)/.versions" | cut -d= -f2 | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//') + # Extract wheel version from release tag (e.g., v0.1.0-20260126-121650 -> 0.1.0) VLLM_METAL_WHEEL_VERSION=$(echo "$VLLM_METAL_RELEASE" | sed 's/^v//' | cut -d'-' -f1) VLLM_METAL_WHEEL_URL="https://github.com/vllm-project/vllm-metal/releases/download/${VLLM_METAL_RELEASE}/vllm_metal-${VLLM_METAL_WHEEL_VERSION}-cp312-cp312-macosx_11_0_arm64.whl" From 32ea7c66e9c7f6db9e1baf5e36549f45594e1df4 Mon Sep 17 00:00:00 2001 From: Dorin-Andrei Geman Date: Fri, 3 Apr 2026 11:35:14 +0300 Subject: [PATCH 2/3] fix: add trailing newline to .versions --- .versions | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.versions b/.versions index 68512bd4..412dbceb 100644 --- a/.versions +++ b/.versions @@ -5,4 +5,4 @@ VLLM_METAL_RELEASE=v0.1.0-20260320-122309 DIFFUSERS_RELEASE=v0.1.0-20260216-000000 SGLANG_VERSION=0.5.6 LLAMA_SERVER_VERSION=latest -BASE_IMAGE=ubuntu:26.04 \ No newline at end of file +BASE_IMAGE=ubuntu:26.04 From 12b677b22e5a8bab58f9a123843beb34b43f29a9 Mon Sep 17 00:00:00 2001 From: "https://github.com/alimx07" <149194464+alimx07@users.noreply.github.com> Date: Fri, 3 Apr 2026 20:07:24 +0200 Subject: [PATCH 3/3] Drop go patch version in Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3ab2151d..c4f35c57 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1 -ARG GO_VERSION=1.25.8 +ARG GO_VERSION=1.25 ARG LLAMA_SERVER_VERSION=latest ARG LLAMA_SERVER_VARIANT=cpu ARG LLAMA_BINARY_PATH=/com.docker.llama-server.native.linux.${LLAMA_SERVER_VARIANT}.${TARGETARCH}