From 5216e781cd6001f16e43c08ba0ae41f5807da6b4 Mon Sep 17 00:00:00 2001 From: Molecule AI Infra-SRE Date: Sun, 10 May 2026 10:01:01 +0000 Subject: [PATCH 1/4] ci: add Docker daemon health-check step before build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run `docker info` as the first CI step to catch runner Docker socket permission issues (docker.sock unreadable, daemon restarted, group membership drift) before the expensive `docker build` step. The error now surfaces immediately with a clear `::error::` message rather than silently continuing into `docker build` where the same failure would appear 60-90s later as a cryptic ECR auth error. Gitea Actions run 4350 (2026-05-10 05:58 UTC) is the trigger: the runner's docker.sock became inaccessible for ~6 minutes, `docker build` failed at step 2 with `permission denied...docker.sock`, and `go build` (step 3) was never reached — masking the compile errors that were already on main. The downstream code errors only surfaced once run 4407 succeeded at `docker build` and finally reached `go build`. Now: `docker info` → fail in ~1s with actionable error. Co-Authored-By: Claude Opus 4.7 --- .../publish-workspace-server-image.yml | 19 +++++++++++++++++++ .../publish-workspace-server-image.yml | 16 ++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/.gitea/workflows/publish-workspace-server-image.yml b/.gitea/workflows/publish-workspace-server-image.yml index 96a03b7e..6b2fcee4 100644 --- a/.gitea/workflows/publish-workspace-server-image.yml +++ b/.gitea/workflows/publish-workspace-server-image.yml @@ -59,6 +59,25 @@ jobs: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + # Health check: verify Docker daemon is accessible before attempting any + # build steps. This fails loudly at step 1 when the runner's docker.sock + # is inaccessible (e.g. permission change, daemon restart, or group-membership + # drift) rather than silently continuing to step 2 where `docker build` + # fails deep in the process with a cryptic ECR auth error that doesn't + # surface the root cause. Also reports the daemon version so operator + # can correlate with runner host logs. + - name: Verify Docker daemon access + run: | + set -euo pipefail + echo "::group::Docker daemon health check" + docker info 2>&1 | head -5 || { + echo "::error::Docker daemon is not accessible at /var/run/docker.sock" + echo "::error::Check: (1) daemon is running, (2) runner user is in docker group, (3) sock permissions are 660+" + exit 1 + } + echo "Docker daemon OK" + echo "::endgroup::" + # Pre-clone manifest deps before docker build. # # Why: workspace-template-* repos on Gitea are private. The pre-fix diff --git a/.github/workflows/publish-workspace-server-image.yml b/.github/workflows/publish-workspace-server-image.yml index be88f2cc..63767d9d 100644 --- a/.github/workflows/publish-workspace-server-image.yml +++ b/.github/workflows/publish-workspace-server-image.yml @@ -107,6 +107,22 @@ jobs: run: | echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" + # Health check: verify Docker daemon is accessible before attempting any + # build steps. This fails loudly at step 1 when the runner's docker.sock + # is inaccessible rather than silently continuing to the build step + # where docker build fails deep in ECR auth with a cryptic error. + - name: Verify Docker daemon access + run: | + set -euo pipefail + echo "::group::Docker daemon health check" + docker info 2>&1 | head -5 || { + echo "::error::Docker daemon is not accessible at /var/run/docker.sock" + echo "::error::Check: (1) daemon running, (2) runner user in docker group, (3) sock perms 660+" + exit 1 + } + echo "Docker daemon OK" + echo "::endgroup::" + # Pre-clone manifest deps before docker build (Task #173 fix). # # Why pre-clone: post-2026-05-06, every workspace-template-* repo on -- 2.45.2 From 8af1eb6774e0278a7483e58fcc5567186eb59fdc Mon Sep 17 00:00:00 2001 From: Molecule AI Core-DevOps Date: Sun, 10 May 2026 12:00:47 +0000 Subject: [PATCH 2/4] ci: add Docker daemon health-check to canvas image workflow Cover the canvas image publish workflow with the same `docker info` guard added to publish-workspace-server-image.yml (commit 5216e781). publish-canvas-image.yml was the only docker-build workflow still missing the step. Co-Authored-By: Claude Opus 4.7 --- .github/workflows/publish-canvas-image.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/publish-canvas-image.yml b/.github/workflows/publish-canvas-image.yml index 5f0faf12..6d345978 100644 --- a/.github/workflows/publish-canvas-image.yml +++ b/.github/workflows/publish-canvas-image.yml @@ -54,6 +54,22 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0 + # Health check: verify Docker daemon is accessible before attempting any + # build steps. This fails loudly at step 1 when the runner's docker.sock + # is inaccessible rather than silently continuing to the build step + # where docker build fails deep in ECR auth with a cryptic error. + - name: Verify Docker daemon access + run: | + set -euo pipefail + echo "::group::Docker daemon health check" + docker info 2>&1 | head -5 || { + echo "::error::Docker daemon is not accessible at /var/run/docker.sock" + echo "::error::Check: (1) daemon running, (2) runner user in docker group, (3) sock perms 660+" + exit 1 + } + echo "Docker daemon OK" + echo "::endgroup::" + - name: Compute tags id: tags shell: bash -- 2.45.2 From 8b6a11ccc7f5a602abbbfa438e49c7a278f3f03a Mon Sep 17 00:00:00 2001 From: Molecule AI Infra-SRE Date: Sun, 10 May 2026 12:08:07 +0000 Subject: [PATCH 3/4] fix(ci): restore SHA-pins that were accidentally reverted to mutable tags Reverts two accidental mutable-tag changes introduced in this branch: - pypa/gh-action-pypi-publish: release/v1 -> cef22109... (matches #276 intent) - actions/checkout: @v6 -> de0fac2e... (matches #276 intent) Co-Authored-By: Claude Opus 4.7 --- .github/workflows/publish-runtime.yml | 2 +- .github/workflows/secret-pattern-drift.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish-runtime.yml b/.github/workflows/publish-runtime.yml index 53a19d19..6118c113 100644 --- a/.github/workflows/publish-runtime.yml +++ b/.github/workflows/publish-runtime.yml @@ -180,7 +180,7 @@ jobs: # environment pypi-publish. The action mints a short-lived OIDC # token and exchanges it for a PyPI upload credential — no static # API token in this repo's secrets. - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # release/v1 with: packages-dir: ${{ runner.temp }}/runtime-build/dist/ diff --git a/.github/workflows/secret-pattern-drift.yml b/.github/workflows/secret-pattern-drift.yml index fa7fffa8..2517fea9 100644 --- a/.github/workflows/secret-pattern-drift.yml +++ b/.github/workflows/secret-pattern-drift.yml @@ -48,7 +48,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 5 steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: -- 2.45.2 From 6d94fd3077920f0da8b394b3085f49251ad0c448 Mon Sep 17 00:00:00 2001 From: Molecule AI Infra-SRE Date: Sun, 10 May 2026 12:08:34 +0000 Subject: [PATCH 4/4] =?UTF-8?q?fix(ci):=20scope=20trigger=20to=20main=20on?= =?UTF-8?q?ly=20=E2=80=94=20revert=20accidental=20staging=20push=20additio?= =?UTF-8?q?n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Docker daemon health-check fix should not change which branches trigger the build. Revert accidental addition of 'staging' to branch filters. Co-Authored-By: Claude Opus 4.7 --- .gitea/workflows/publish-workspace-server-image.yml | 2 +- .github/workflows/publish-workspace-server-image.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/publish-workspace-server-image.yml b/.gitea/workflows/publish-workspace-server-image.yml index 6b2fcee4..00bd6e2d 100644 --- a/.gitea/workflows/publish-workspace-server-image.yml +++ b/.gitea/workflows/publish-workspace-server-image.yml @@ -23,7 +23,7 @@ name: publish-workspace-server-image on: push: - branches: [staging, main] + branches: [main] paths: - 'workspace-server/**' - 'canvas/**' diff --git a/.github/workflows/publish-workspace-server-image.yml b/.github/workflows/publish-workspace-server-image.yml index 63767d9d..7d981c93 100644 --- a/.github/workflows/publish-workspace-server-image.yml +++ b/.github/workflows/publish-workspace-server-image.yml @@ -32,7 +32,7 @@ name: publish-workspace-server-image on: push: - branches: [staging, main] + branches: [main] paths: - 'workspace-server/**' - 'canvas/**' -- 2.45.2