From 8743df14c68b6fdddafcbb1fd44347c8aa57df3c Mon Sep 17 00:00:00 2001
From: "https://github.com/alimx07"
 <149194464+alimx07@users.noreply.github.com>
Date: Wed, 1 Apr 2026 21:19:20 +0200
Subject: [PATCH 1/3] centralize version variables into .versions

Add a .versions file as the single source of truth for  version
variables (Go, vLLM, vLLM upstream, SGLang, llama-server, vllm-metal,
diffusers, base image), replacing values  scattered across
the Makefile, Dockerfile, CI workflows, and scripts.
---
 .github/actions/load-go-version/action.yml | 14 +++++++++
 .github/workflows/ci.yml                   | 15 ++++++++--
 .github/workflows/e2e-test.yml             |  6 +++-
 .github/workflows/integration-test.yml     |  6 +++-
 .github/workflows/release.yml              | 31 +++++++++++++++-----
 .versions                                  |  8 +++++
 Dockerfile                                 |  2 +-
 Makefile                                   | 34 +++++++++++++++++-----
 README.md                                  | 13 +++++----
 scripts/build-vllm-metal-tarball.sh        |  3 +-
 10 files changed, 106 insertions(+), 26 deletions(-)
 create mode 100644 .github/actions/load-go-version/action.yml
 create mode 100644 .versions

diff --git a/.github/actions/load-go-version/action.yml b/.github/actions/load-go-version/action.yml
new file mode 100644
index 00000000..af6646a9
--- /dev/null
+++ b/.github/actions/load-go-version/action.yml
@@ -0,0 +1,14 @@
+name: Load Go version
+description: Read GO_VERSION from .versions and expose it as an output
+
+outputs:
+  go-version:
+    value: ${{ steps.load.outputs.GO_VERSION }}
+
+runs:
+  using: composite
+  steps:
+    - name: Load GO version
+      id: load
+      shell: bash
+      run: grep '^GO_VERSION=' .versions | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//' >> "$GITHUB_OUTPUT"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d5f59c76..26548494 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,10 +16,14 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
 
+      - name: Load GO version
+        id: versions
+        uses: ./.github/actions/load-go-version
+
       - name: Set up Go
         uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
         with:
-          go-version: 1.25.8
+          go-version: ${{ steps.versions.outputs.go-version }}
           cache: true
 
       - name: Install golangci-lint
@@ -42,10 +46,17 @@ jobs:
       - name: Verify vendor/ is not present
         run: stat vendor && exit 1 || exit 0
 
+      - name: Load GO version
+        id: versions
+        uses: ./.github/actions/load-go-version
+
+      - name: Validate .versions against Dockerfile ARGs
+        run: make validate-versions
+
       - name: Set up Go
         uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
         with:
-          go-version: 1.25.8
+          go-version: ${{ steps.versions.outputs.go-version }}
           cache: true
 
       - name: Check go mod tidy
diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml
index 777462c1..c6349b78 100644
--- a/.github/workflows/e2e-test.yml
+++ b/.github/workflows/e2e-test.yml
@@ -24,10 +24,14 @@ jobs:
         with:
           submodules: recursive
 
+      - name: Load GO version
+        id: versions
+        uses: ./.github/actions/load-go-version
+
       - name: Set up Go
         uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
         with:
-          go-version: 1.25.8
+          go-version: ${{ steps.versions.outputs.go-version }}
           cache: true
 
       - name: Set up Docker
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
index 106bff7b..da70b588 100644
--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -15,10 +15,14 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
 
+      - name: Load GO version
+        id: versions
+        uses: ./.github/actions/load-go-version
+
       - name: Set up Go
         uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
         with:
-          go-version: 1.25.8
+          go-version: ${{ steps.versions.outputs.go-version }}
           cache: true
 
       - name: Set up Docker Buildx
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 1a4f3593..d404847f 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -23,17 +23,14 @@ on:
         description: "llama-server version"
         required: false
         type: string
-        default: "latest"
       vllmVersion:
         description: "vLLM version"
         required: false
         type: string
-        default: "0.17.0"
       sglangVersion:
         description: "SGLang version"
         required: false
         type: string
-        default: "0.4.0"
       # This can be removed once we have llama.cpp built for MUSA and CANN.
       buildMusaCann:
         description: "Build MUSA and CANN images"
@@ -204,10 +201,14 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
 
+      - name: Load GO version
+        id: versions
+        uses: ./.github/actions/load-go-version
+
       - name: Set up Go
         uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
         with:
-          go-version: 1.25.8
+          go-version: ${{ steps.versions.outputs.go-version }}
           cache: true
 
       - name: Run tests
@@ -223,9 +224,9 @@ jobs:
       contents: read
     env:
       RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }}
-      LLAMA_SERVER_VERSION: ${{ inputs.llamaServerVersion || 'latest' }}
+      LLAMA_SERVER_VERSION: ${{ inputs.llamaServerVersion }}
       VLLM_VERSION: ${{ inputs.vllmVersion }}
-      SGLANG_VERSION: ${{ inputs.sglangVersion || '0.4.0' }}
+      SGLANG_VERSION: ${{ inputs.sglangVersion }}
       BUILD_MUSA_CANN: ${{ inputs.buildMusaCann || 'false' }}
     steps:
       - name: Checkout repo
@@ -263,6 +264,18 @@ jobs:
           echo "docker/model-runner:$RELEASE_TAG-cann" >> "$GITHUB_OUTPUT"
           echo "docker/model-runner:latest-cann" >> "$GITHUB_OUTPUT"
           echo 'EOF' >> "$GITHUB_OUTPUT"
+          
+      - name: Load versions
+        shell: bash
+        run: |
+          VERSIONS_LLAMA=$(grep '^LLAMA_SERVER_VERSION=' .versions | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//')
+          VERSIONS_VLLM=$(grep  '^VLLM_VERSION='          .versions | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//')
+          VERSIONS_SGLANG=$(grep '^SGLANG_VERSION='       .versions | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//')
+
+          # Use input value if set, otherwise fallback to .versions 
+          echo "LLAMA_SERVER_VERSION=${LLAMA_SERVER_VERSION:-$VERSIONS_LLAMA}"   >> "$GITHUB_ENV"
+          echo "VLLM_VERSION=${VLLM_VERSION:-$VERSIONS_VLLM}"                   >> "$GITHUB_ENV"
+          echo "SGLANG_VERSION=${SGLANG_VERSION:-$VERSIONS_SGLANG}"             >> "$GITHUB_ENV"
 
       - name: Log in to DockerHub
         uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2
@@ -535,11 +548,15 @@ jobs:
           token: ${{ secrets.CLI_RELEASE_PAT }}
           fetch-depth: 0
 
+      - name: Load GO version
+        id: versions
+        uses: ./.github/actions/load-go-version
+
       - name: Set up Go
         if: steps.check-docs.outputs.changed == 'true'
         uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c
         with:
-          go-version: 1.25.8
+          go-version: ${{ steps.versions.outputs.go-version }}
           cache: true
 
       - name: Vendor model-runner CLI docs
diff --git a/.versions b/.versions
new file mode 100644
index 00000000..68512bd4
--- /dev/null
+++ b/.versions
@@ -0,0 +1,8 @@
+GO_VERSION=1.25.8
+VLLM_VERSION=0.17.0
+VLLM_UPSTREAM_VERSION=0.17.1
+VLLM_METAL_RELEASE=v0.1.0-20260320-122309
+DIFFUSERS_RELEASE=v0.1.0-20260216-000000
+SGLANG_VERSION=0.5.6
+LLAMA_SERVER_VERSION=latest
+BASE_IMAGE=ubuntu:26.04
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index c4f35c57..3ab2151d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 # syntax=docker/dockerfile:1
 
-ARG GO_VERSION=1.25
+ARG GO_VERSION=1.25.8
 ARG LLAMA_SERVER_VERSION=latest
 ARG LLAMA_SERVER_VARIANT=cpu
 ARG LLAMA_BINARY_PATH=/com.docker.llama-server.native.linux.${LLAMA_SERVER_VARIANT}.${TARGETARCH}
diff --git a/Makefile b/Makefile
index 6bd8cf0f..5063ced0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,9 @@
 # Project variables
+include .versions
+
 APP_NAME := model-runner
-GO_VERSION := 1.25.8
-LLAMA_SERVER_VERSION := latest
 LLAMA_SERVER_VARIANT := cpu
-BASE_IMAGE := ubuntu:26.04
 VLLM_BASE_IMAGE := nvidia/cuda:13.0.2-runtime-ubuntu24.04
-VLLM_VERSION ?= 0.17.0
 DOCKER_IMAGE := docker/model-runner:latest
 DOCKER_IMAGE_VLLM := docker/model-runner:latest-vllm-cuda
 DOCKER_IMAGE_SGLANG := docker/model-runner:latest-sglang
@@ -15,8 +13,10 @@ LLAMA_ARGS ?=
 DOCKER_BUILD_ARGS := \
 	--load \
 	--platform linux/$(shell docker version --format '{{.Server.Arch}}') \
+	--build-arg GO_VERSION=$(GO_VERSION) \
 	--build-arg LLAMA_SERVER_VERSION=$(LLAMA_SERVER_VERSION) \
 	--build-arg LLAMA_SERVER_VARIANT=$(LLAMA_SERVER_VARIANT) \
+	--build-arg SGLANG_VERSION=$(SGLANG_VERSION) \
 	--build-arg BASE_IMAGE=$(BASE_IMAGE) \
 	--build-arg VLLM_VERSION='$(VLLM_VERSION)' \
 	--target $(DOCKER_TARGET) \
@@ -24,7 +24,7 @@ DOCKER_BUILD_ARGS := \
 
 # Phony targets grouped by category
 .PHONY: build build-cli build-dmr build-llamacpp install-cli run clean test integration-tests e2e
-.PHONY: validate validate-all lint help
+.PHONY: validate validate-versions validate-all lint help
 .PHONY: docker-build docker-build-multiplatform docker-run docker-run-impl
 .PHONY: docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang
 .PHONY: test-docker-ce-installation
@@ -107,6 +107,23 @@ validate:
 	find . -type f -name "*.sh" | grep -v "pkg/go-containerregistry\|llamacpp/native/vendor" | xargs shellcheck
 	@echo "✓ Shellcheck validation passed!"
 
+validate-versions:
+	@errors=0; \
+	while IFS='=' read -r key value || [ -n "$$key" ]; do \
+		case "$$key" in ''|\#*) continue ;; esac; \
+		value=$$(echo "$$value" | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$$//'); \
+		dockerfile_val=$$(grep -m1 "^ARG $${key}=" Dockerfile | cut -d= -f2- | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$$//'); \
+		[ -z "$$dockerfile_val" ] && continue; \
+		if [ "$$value" != "$$dockerfile_val" ]; then \
+			echo "MISMATCH: $$key — .versions=$$value  Dockerfile=$$dockerfile_val"; \
+			errors=$$((errors + 1)); \
+		else \
+			echo "OK: $$key=$$value"; \
+		fi; \
+	done < .versions; \
+	[ $$errors -eq 0 ] || exit 1
+	@echo "✓ .versions is in sync with Dockerfile ARGs"
+
 lint:
 	@echo "Running golangci-lint..."
 	golangci-lint run ./...
@@ -129,6 +146,9 @@ validate-all:
 	@echo "==> Running shellcheck validation..."
 	@$(MAKE) validate
 	@echo ""
+	@echo "==> Validating .versions against Dockerfile ARGs..."
+	@$(MAKE) validate-versions
+	@echo ""
 	@echo "==> All validations passed! ✅"
 
 # Build Docker image
@@ -184,7 +204,6 @@ docker-run-impl:
 
 # vllm-metal (macOS ARM64 only)
 # The tarball is self-contained: includes a standalone Python 3.12 + all packages.
-VLLM_METAL_RELEASE ?= v0.1.0-20260320-122309
 VLLM_METAL_INSTALL_DIR := $(HOME)/.docker/model-runner/vllm-metal
 VLLM_METAL_TARBALL := vllm-metal-macos-arm64-$(VLLM_METAL_RELEASE).tar.gz
 
@@ -237,7 +256,7 @@ vllm-metal-dev:
 	rm -rf "$(VLLM_METAL_INSTALL_DIR)"; \
 	$$PYTHON_BIN -m venv "$(VLLM_METAL_INSTALL_DIR)"; \
 	. "$(VLLM_METAL_INSTALL_DIR)/bin/activate" && \
-		VLLM_UPSTREAM_VERSION="0.17.1" && \
+		VLLM_UPSTREAM_VERSION=$(VLLM_UPSTREAM_VERSION) && \
 		WORK_DIR=$$(mktemp -d) && \
 		curl -fsSL -o "$$WORK_DIR/vllm.tar.gz" "https://github.com/vllm-project/vllm/releases/download/v$$VLLM_UPSTREAM_VERSION/vllm-$$VLLM_UPSTREAM_VERSION.tar.gz" && \
 		tar -xzf "$$WORK_DIR/vllm.tar.gz" -C "$$WORK_DIR" && \
@@ -257,7 +276,6 @@ vllm-metal-clean:
 
 # diffusers (macOS ARM64 and Linux)
 # The tarball is self-contained: includes a standalone Python 3.12 + all packages.
-DIFFUSERS_RELEASE ?= v0.1.0-20260216-000000
 DIFFUSERS_INSTALL_DIR := $(HOME)/.docker/model-runner/diffusers
 DIFFUSERS_OS := $(shell uname -s | tr '[:upper:]' '[:lower:]')
 DIFFUSERS_ARCH := $(shell uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/')
diff --git a/README.md b/README.md
index 183dfb0f..3e27e0d2 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,7 @@ MODEL_RUNNER_HOST=http://localhost:13434 ./model-cli list
 ## Using the Makefile
 
 This project includes a Makefile to simplify common development tasks. Docker targets require Docker Desktop >= 4.41.0.
+
 Run `make help` for a full list, but the key targets are:
 
 - `build` - Build the Go application
@@ -194,6 +195,8 @@ This will:
 - Start the service on port 8080 (or the specified port)
 - All models downloaded will be stored in the host's `models` directory and will persist between container runs
 
+> NOTE: The [`.versions`](.versions) file is the single source of truth for all version variables (Go, vLLM, SGLang, llama-server, etc.).
+
 ### llama.cpp integration
 
 The Docker image includes the llama.cpp server binary from the `docker/docker-model-backend-llamacpp` image. You can specify the version of the image to use by setting the `LLAMA_SERVER_VERSION` variable. Additionally, you can configure the target OS, architecture, and acceleration type:
@@ -228,7 +231,7 @@ The Docker image also supports vLLM as an alternative inference backend.
 To build a Docker image with vLLM support:
 
 ```sh
-# Build with default settings (vLLM 0.12.0)
+# Build with default settings (vLLM 0.17.0)
 make docker-build DOCKER_TARGET=final-vllm BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04 LLAMA_SERVER_VARIANT=cuda
 
 # Build for specific architecture
@@ -237,7 +240,7 @@ docker buildx build \
   --target final-vllm \
   --build-arg BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04 \
   --build-arg LLAMA_SERVER_VARIANT=cuda \
-  --build-arg VLLM_VERSION=0.12.0 \
+  --build-arg VLLM_VERSION=0.17.0 \
   -t docker/model-runner:vllm .
 ```
 
@@ -245,7 +248,7 @@ docker buildx build \
 
 The vLLM variant supports the following build arguments:
 
-- **VLLM_VERSION**: The vLLM version to install (default: `0.12.0`)
+- **VLLM_VERSION**: The vLLM version to install (default: `0.17.0`)
 - **VLLM_CUDA_VERSION**: The CUDA version suffix for the wheel (default: `cu130`)
 - **VLLM_PYTHON_TAG**: The Python compatibility tag (default: `cp38-abi3`, compatible with Python 3.8+)
 
@@ -274,8 +277,8 @@ To update to a new vLLM version:
 ```sh
 docker buildx build \
   --target final-vllm \
-  --build-arg VLLM_VERSION=0.11.1 \
-  -t docker/model-runner:vllm-0.11.1 .
+  --build-arg VLLM_VERSION=0.17.0 \
+  -t docker/model-runner:vllm-0.17.0 .
 ```
 
 The vLLM wheels are sourced from the official vLLM GitHub Releases at `https://github.com/vllm-project/vllm/releases`, which provides prebuilt wheels for each release version.
diff --git a/scripts/build-vllm-metal-tarball.sh b/scripts/build-vllm-metal-tarball.sh
index 6573bbe5..08d27eb1 100755
--- a/scripts/build-vllm-metal-tarball.sh
+++ b/scripts/build-vllm-metal-tarball.sh
@@ -20,7 +20,8 @@ WORK_DIR=$(mktemp -d)
 # Convert tarball path to absolute before we cd elsewhere
 TARBALL="$(cd "$(dirname "$TARBALL_ARG")" && pwd)/$(basename "$TARBALL_ARG")"
 
-VLLM_VERSION="0.17.1"
+VLLM_VERSION=$(grep '^VLLM_UPSTREAM_VERSION=' "$(cd "$(dirname "$0")/.." && pwd)/.versions" | cut -d= -f2 | sed 's/[[:space:]]*#.*//;s/[[:space:]]*$//')
+
 # Extract wheel version from release tag (e.g., v0.1.0-20260126-121650 -> 0.1.0)
 VLLM_METAL_WHEEL_VERSION=$(echo "$VLLM_METAL_RELEASE" | sed 's/^v//' | cut -d'-' -f1)
 VLLM_METAL_WHEEL_URL="https://github.com/vllm-project/vllm-metal/releases/download/${VLLM_METAL_RELEASE}/vllm_metal-${VLLM_METAL_WHEEL_VERSION}-cp312-cp312-macosx_11_0_arm64.whl"

From 32ea7c66e9c7f6db9e1baf5e36549f45594e1df4 Mon Sep 17 00:00:00 2001
From: Dorin-Andrei Geman <doringeman@gmail.com>
Date: Fri, 3 Apr 2026 11:35:14 +0300
Subject: [PATCH 2/3] fix: add trailing newline to .versions

---
 .versions | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.versions b/.versions
index 68512bd4..412dbceb 100644
--- a/.versions
+++ b/.versions
@@ -5,4 +5,4 @@ VLLM_METAL_RELEASE=v0.1.0-20260320-122309
 DIFFUSERS_RELEASE=v0.1.0-20260216-000000
 SGLANG_VERSION=0.5.6
 LLAMA_SERVER_VERSION=latest
-BASE_IMAGE=ubuntu:26.04
\ No newline at end of file
+BASE_IMAGE=ubuntu:26.04

From 12b677b22e5a8bab58f9a123843beb34b43f29a9 Mon Sep 17 00:00:00 2001
From: "https://github.com/alimx07"
 <149194464+alimx07@users.noreply.github.com>
Date: Fri, 3 Apr 2026 20:07:24 +0200
Subject: [PATCH 3/3] Drop go patch version in Dockerfile

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 3ab2151d..c4f35c57 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 # syntax=docker/dockerfile:1
 
-ARG GO_VERSION=1.25.8
+ARG GO_VERSION=1.25
 ARG LLAMA_SERVER_VERSION=latest
 ARG LLAMA_SERVER_VARIANT=cpu
 ARG LLAMA_BINARY_PATH=/com.docker.llama-server.native.linux.${LLAMA_SERVER_VARIANT}.${TARGETARCH}