From 3ea24916d04ebb2399baa1f504066222ebf4bd97 Mon Sep 17 00:00:00 2001 From: Molecule AI Infra Lead Date: Mon, 11 May 2026 23:43:13 +0000 Subject: [PATCH] =?UTF-8?q?[infra-lead-agent]=20fix(ci):=20revert=20publis?= =?UTF-8?q?h-*=20runs-on=20pin=20=E2=80=94=20`docker`=20label=20not=20yet?= =?UTF-8?q?=20registered=20(#576/#599=20followup)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #599 changed `runs-on: ubuntu-latest` → `runs-on: [ubuntu-latest, docker]` in publish-workspace-server-image.yml + publish-canvas-image.yml to gate jobs onto docker-capable runners. But no act-runner currently carries the `docker` label (the infra-sre registration step from #599's PR body never happened — and #599 was merged anyway, despite the reviewer's stated "MANDATORY SEQUENCING" caveat). Result: `[ubuntu-latest, docker]` matched ZERO eligible runners; both publish-* workflows sat "Waiting to run" for >1.5h across main HEADs 41bb9e48 → 49a4c3a7. That's strictly worse than the pre-#599 coin-flip (~50% success). This reverts the `runs-on` to `ubuntu-latest` to restore scheduling. Once infra-sre registers the `docker` label on the socket-having runners (tracked in #576), #599's pin should be re-applied — the diagnosis was correct, the sequencing wasn't. Workflow-only change → §SOP-13 §3 carve-out (tier:low). Author = infra-lead; merger must be a non-author engineer with the 4-field §3 audit comment posted first. Operationally urgent — publish image builds (next release/deploy artifact) have been un-buildable for >1.5h. --- .gitea/workflows/publish-canvas-image.yml | 12 +++++++----- .../workflows/publish-workspace-server-image.yml | 14 ++++++++------ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/.gitea/workflows/publish-canvas-image.yml b/.gitea/workflows/publish-canvas-image.yml index f21f0817..9f1b03a1 100644 --- a/.gitea/workflows/publish-canvas-image.yml +++ b/.gitea/workflows/publish-canvas-image.yml @@ -54,11 +54,13 @@ env: jobs: build-and-push: name: Build & push canvas image - # NOTE: infra-sre must register a `docker` label on every act-runner that - # mounts /var/run/docker.sock (group=docker, socket perms 660+). Jobs without - # the `docker` label land on runners that lack the socket and fail here. - # See issue #576. - runs-on: [ubuntu-latest, docker] + # TEMPORARY REVERT (infra-lead, 2026-05-12) of #599's `runs-on: [ubuntu-latest, docker]` + # pin. No act-runner currently carries the `docker` label (#599 landed before + # infra-sre registered it), so `[ubuntu-latest, docker]` matched ZERO runners and + # both publish-* workflows sat "Waiting to run" for >1.5h. Reverting to `ubuntu-latest` + # un-breaks scheduling until the `docker` label is registered, then re-apply #599's + # pin. See #576 + #599. + runs-on: ubuntu-latest # Phase 3 (RFC #219 §1): surface broken workflows without blocking. continue-on-error: true steps: diff --git a/.gitea/workflows/publish-workspace-server-image.yml b/.gitea/workflows/publish-workspace-server-image.yml index 4bdfef86..34ba6201 100644 --- a/.gitea/workflows/publish-workspace-server-image.yml +++ b/.gitea/workflows/publish-workspace-server-image.yml @@ -52,12 +52,14 @@ env: jobs: build-and-push: - # NOTE: infra-sre must register a `docker` label on every act-runner that - # mounts /var/run/docker.sock (group=docker, socket perms 660+). Jobs without - # the `docker` label land on runners that lack the socket and fail here. - # molecule-runner-1 (no socket) vs molecule-runner-4 (socket) — coin-flip - # without this label gate. See issue #576. - runs-on: [ubuntu-latest, docker] + # TEMPORARY REVERT (infra-lead, 2026-05-12) of #599's `runs-on: [ubuntu-latest, docker]` + # pin. No act-runner currently carries the `docker` label (#599 landed before + # infra-sre registered it), so `[ubuntu-latest, docker]` matched ZERO runners and + # both publish-* workflows sat "Waiting to run" for >1.5h — strictly worse than the + # pre-#599 coin-flip. Reverting to `ubuntu-latest` restores ~50% success (some runs + # land on socket-less runners and fail the health check below) until the `docker` + # label is registered, after which #599's pin should be re-applied. See #576 + #599. + runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2