From e8c78d6a20b90f7994c5b32ef7d08ac5434a1711 Mon Sep 17 00:00:00 2001 From: Molecule AI Core-DevOps Date: Mon, 11 May 2026 23:12:22 +0000 Subject: [PATCH] fix(ci): pin docker-capable runner label in both publish workflows (closes #576) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coin-flip failure: publish-workspace-server-image / build-and-push lands on runners without /var/run/docker.sock (molecule-runner-1 vs molecule-runner-4), failing the Docker daemon health check. Fix: - runs-on: ubuntu-latest → runs-on: [ubuntu-latest, docker] infra-sre registers a `docker` label on every act-runner that mounts /var/run/docker.sock (group=docker, perms 660+). Jobs without the `docker` label are never queued on socket-less runners. - Health check step now echoes the runner hostname in both the success path and the error path so failures are traceable to a specific host. Applied to: .gitea/workflows/publish-workspace-server-image.yml .gitea/workflows/publish-canvas-image.yml Co-Authored-By: Claude Opus 4.7 --- .gitea/workflows/publish-canvas-image.yml | 8 +++++++- .gitea/workflows/publish-workspace-server-image.yml | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/publish-canvas-image.yml b/.gitea/workflows/publish-canvas-image.yml index 51ee0270..f21f0817 100644 --- a/.gitea/workflows/publish-canvas-image.yml +++ b/.gitea/workflows/publish-canvas-image.yml @@ -54,7 +54,11 @@ env: jobs: build-and-push: name: Build & push canvas image - runs-on: ubuntu-latest + # NOTE: infra-sre must register a `docker` label on every act-runner that + # mounts /var/run/docker.sock (group=docker, socket perms 660+). Jobs without + # the `docker` label land on runners that lack the socket and fail here. + # See issue #576. + runs-on: [ubuntu-latest, docker] # Phase 3 (RFC #219 §1): surface broken workflows without blocking. continue-on-error: true steps: @@ -79,8 +83,10 @@ jobs: run: | set -euo pipefail echo "::group::Docker daemon health check" + echo "Runner: ${HOSTNAME:-unknown}" docker info 2>&1 | head -5 || { echo "::error::Docker daemon is not accessible at /var/run/docker.sock" + echo "::error::Runner: ${HOSTNAME:-unknown}" echo "::error::Check: (1) daemon running, (2) runner user in docker group, (3) sock perms 660+" exit 1 } diff --git a/.gitea/workflows/publish-workspace-server-image.yml b/.gitea/workflows/publish-workspace-server-image.yml index db84492b..4bdfef86 100644 --- a/.gitea/workflows/publish-workspace-server-image.yml +++ b/.gitea/workflows/publish-workspace-server-image.yml @@ -52,7 +52,12 @@ env: jobs: build-and-push: - runs-on: ubuntu-latest + # NOTE: infra-sre must register a `docker` label on every act-runner that + # mounts /var/run/docker.sock (group=docker, socket perms 660+). Jobs without + # the `docker` label land on runners that lack the socket and fail here. + # molecule-runner-1 (no socket) vs molecule-runner-4 (socket) — coin-flip + # without this label gate. See issue #576. + runs-on: [ubuntu-latest, docker] steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -68,8 +73,10 @@ jobs: run: | set -euo pipefail echo "::group::Docker daemon health check" + echo "Runner: ${HOSTNAME:-unknown}" docker info 2>&1 | head -5 || { echo "::error::Docker daemon is not accessible at /var/run/docker.sock" + echo "::error::Runner: ${HOSTNAME:-unknown}" echo "::error::Check: (1) daemon is running, (2) runner user is in docker group, (3) sock permissions are 660+" exit 1 }