From e22a56d35172566f46d9f4d50f72ca6dea850c91 Mon Sep 17 00:00:00 2001
From: Hongming Wang <hongmingwangalt@gmail.com>
Date: Wed, 29 Apr 2026 16:01:38 -0700
Subject: [PATCH 1/2] ci: collapse Canvas (Next.js) to single job with
 conditional steps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Supersedes PR #2321's two-jobs-sharing-a-name approach, which didn't
actually clear branch-protection's required-check evaluation. Live
test on PR #2314: GraphQL `isRequired` confirmed BOTH check runs
under "Canvas (Next.js)" name (one SUCCESS via no-op, one SKIPPED via
real job) registered, and the SKIPPED one kept mergeStateStatus =
BLOCKED despite the SUCCESS sibling. Branch protection's "set of
matching contexts" semantic is stricter than the durable feedback
memory documented — at least one passing isn't enough; SKIPPED
counts as not-passed regardless.

Real fix: ONE job that always runs (no job-level `if:`), with all
real work gated on the path filter via per-step `if:`. Produces
exactly one "Canvas (Next.js)" check run per commit, always SUCCEEDS,
regardless of which paths changed. Costs ~10s of always-on CI runtime
per PR — negligible vs. the manual-merge cost when the BLOCKED state
catches us.

This same anti-pattern probably affects Platform (Go) (`platform`
filter), Python Lint & Test (`python` filter), and Shellcheck (E2E
scripts) (`scripts` filter) — all required, all path-gated. PR-A and
PR-2321 merged because they happened to trigger every filter; PR-B
only missed canvas. File a follow-up issue to apply the same
single-job-conditional-steps pattern across those required jobs to
remove the latent merge-blocker.

Updates feedback memory: branch_protection_check_name_parity is wrong
about "two jobs sharing name + at-least-one-success works." Need to
correct the note.
---
 .github/workflows/ci.yml | 54 +++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f13c16ec..96d3b668 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -178,42 +178,44 @@ jobs:
             exit 1
           fi
 
-  # Path-filter no-op shadow for Canvas (Next.js).
+  # Canvas (Next.js) is a required check on staging branch protection.
+  # The job ALWAYS runs (no `if:` gate at job level) so branch protection
+  # always sees a SUCCESS conclusion. Inside, every meaningful step is
+  # gated on `needs.changes.outputs.canvas == 'true'` — when the PR
+  # doesn't touch canvas/**, the job spins up, skips all real work, and
+  # exits clean in ~10s.
   #
-  # Branch protection on staging requires a "Canvas (Next.js)" check.
-  # When a PR doesn't touch canvas/** paths, the real canvas-build job
-  # below is skipped via `if:`, and GitHub reports its conclusion as
-  # SKIPPED — which branch protection treats as not-passed → merge
-  # BLOCKED on every workspace-server-only or migration-only PR.
-  #
-  # Pattern (per durable feedback memory: branch_protection_check_name_parity):
-  # split into a real job + a no-op shadow that share the same `name:`.
-  # Exactly one runs per PR; both report the same check context, and at
-  # least one always reports SUCCESS, satisfying the required check.
-  canvas-build-noop:
-    name: Canvas (Next.js)
-    needs: changes
-    if: needs.changes.outputs.canvas != 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - run: echo "No canvas/** changes in this PR — Canvas (Next.js) skip is intentional, satisfying required-check via this no-op."
-
+  # Why not a job-level `if:` + a no-op shadow:
+  # An earlier version (PR #2321) tried two jobs sharing the same `name:`
+  # — one with `if: == 'true'`, the other with `if: != 'true'`. Branch
+  # protection sees BOTH check runs and treats the SKIPPED one as
+  # not-passed even when the other reports SUCCESS — so the merge stays
+  # BLOCKED. Single-job-with-conditional-steps is the only shape that
+  # produces exactly one Canvas (Next.js) check run per commit AND
+  # always SUCCEEDS, regardless of which paths changed.
   canvas-build:
     name: Canvas (Next.js)
     needs: changes
-    if: needs.changes.outputs.canvas == 'true'
     runs-on: ubuntu-latest
     defaults:
       run:
         working-directory: canvas
     steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-      - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
+      - if: needs.changes.outputs.canvas != 'true'
+        working-directory: .
+        run: echo "No canvas/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection."
+      - if: needs.changes.outputs.canvas == 'true'
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - if: needs.changes.outputs.canvas == 'true'
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
         with:
           node-version: '22'
-      - run: rm -f package-lock.json && npm install
-      - run: npm run build
-      - name: Run tests with coverage
+      - if: needs.changes.outputs.canvas == 'true'
+        run: rm -f package-lock.json && npm install
+      - if: needs.changes.outputs.canvas == 'true'
+        run: npm run build
+      - if: needs.changes.outputs.canvas == 'true'
+        name: Run tests with coverage
         # Coverage instrumentation is configured in canvas/vitest.config.ts
         # (provider: v8, reporters: text + html + json-summary). Step 2 of
         # #1815 — wires coverage into CI so we get a baseline visible on
@@ -224,7 +226,7 @@ jobs:
         # thresholds + a hard gate" — this PR ships the observability half.
         run: npx vitest run --coverage
       - name: Upload coverage summary as artifact
-        if: always()
+        if: needs.changes.outputs.canvas == 'true' && always()
         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
         with:
           name: canvas-coverage-${{ github.run_id }}

From 142b8e9d5b8579b4199d1ba66fea6d32f556f8ac Mon Sep 17 00:00:00 2001
From: Hongming Wang <hongmingwangalt@gmail.com>
Date: Wed, 29 Apr 2026 16:08:25 -0700
Subject: [PATCH 2/2] ci: collapse all 4 path-filtered required checks to
 single-job-with-conditional-steps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Supersedes #2321 + #2322. Applies the same shape uniformly across every
required check that uses a path filter: Canvas (Next.js), Platform (Go),
Python Lint & Test, Shellcheck (E2E scripts).

The bug + fix in one paragraph:

GitHub registers a check run for every job whose `name:` matches the
required-check context, regardless of whether the job actually executed.
A job-level `if:` that evaluates false produces a SKIPPED check run.
Branch protection's "required check" rule looks at the SET of check
runs with the matching context name on the latest commit and treats
any conclusion other than SUCCESS as not-passed — including SKIPPED.
Adding a sibling no-op job under the same `name:` (PR #2321 / #2322
attempt) doesn't help: branch protection still sees the SKIPPED
sibling and stays BLOCKED.

The shape that works: ONE job per required check name, no job-level
`if:`, all real work gated per-step. The job always runs and reports
SUCCESS regardless of which paths changed.

This patch:
  * Canvas (Next.js): drops the `canvas-build-noop` shadow added in
    #2321 (which didn't actually clear merge state — verified live on
    PR #2314). Refactors `canvas-build` to always run; gates checkout/
    setup-node/install/build/test on `if: needs.changes.outputs.canvas
    == 'true'`. Coverage upload step also gated.
  * Platform (Go): drops job-level `if:`. Gates checkout/setup-go/
    download/build/vet/lint/test/coverage-report/threshold-check on
    per-step `if:`.
  * Python Lint & Test: drops job-level `if:`. Gates checkout/setup-
    python/install/pytest on per-step `if:`.
  * Shellcheck (E2E scripts): drops job-level `if:`. Gates checkout/
    shellcheck-run on per-step `if:`.

Each refactored job adds a leading no-op echo step with `working-directory: .`
override so the always-running spin-up doesn't fail when the path-
filter-true working-directory (workspace, workspace-server, canvas)
doesn't exist after no-op checkout.

Why all four in one PR: the bug shape is identical across all four,
and a future PR that only touches workspace-server (passing platform
filter, missing canvas/python/scripts) would hit the same BLOCKED state
on whichever filter it missed. PR-A and PR-2321 merged because their
diffs happened to trigger every filter; PR-B (#2314) only missed
canvas. Fixing one at a time means re-living this debugging cycle three
more times.

Cost: ~10s of always-on CI runtime per PR per job (the ubuntu-latest
spin-up + the no-op echo). 40s aggregate, negligible vs. the manual-
merge cost when BLOCKED catches us.

Memory `feedback_branch_protection_check_name_parity` already updated
(2026-04-29) to mark the original two-jobs-sharing-name pattern as
DO NOT FOLLOW and document the working shape this PR uses.

Refs PR #2321 (the misguided fix-attempt that this supersedes).
---
 .github/workflows/ci.yml | 85 +++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 96d3b668..72337316 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -63,29 +63,42 @@ jobs:
           echo "python=$(echo "$DIFF" | grep -qE '^workspace/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"
           echo "scripts=$(echo "$DIFF" | grep -qE '^tests/e2e/|^scripts/|^infra/scripts/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"
 
+  # Platform (Go) is a required check on staging. Always-run + per-step
+  # gating (see Canvas (Next.js) for the rationale and the failure mode
+  # this avoids).
   platform-build:
     name: Platform (Go)
     needs: changes
-    if: needs.changes.outputs.platform == 'true'
     runs-on: ubuntu-latest
     defaults:
       run:
         working-directory: workspace-server
     steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-      - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
+      - if: needs.changes.outputs.platform != 'true'
+        working-directory: .
+        run: echo "No platform/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection."
+      - if: needs.changes.outputs.platform == 'true'
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - if: needs.changes.outputs.platform == 'true'
+        uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
         with:
           go-version: 'stable'
-      - run: go mod download
-      - run: go build ./cmd/server
+      - if: needs.changes.outputs.platform == 'true'
+        run: go mod download
+      - if: needs.changes.outputs.platform == 'true'
+        run: go build ./cmd/server
       # CLI (molecli) moved to standalone repo: github.com/Molecule-AI/molecule-cli
-      - run: go vet ./... || true
-      - name: Run golangci-lint
+      - if: needs.changes.outputs.platform == 'true'
+        run: go vet ./... || true
+      - if: needs.changes.outputs.platform == 'true'
+        name: Run golangci-lint
         run: golangci-lint run --timeout 3m ./... || true
-      - name: Run tests with race detection and coverage
+      - if: needs.changes.outputs.platform == 'true'
+        name: Run tests with race detection and coverage
         run: go test -race -coverprofile=coverage.out ./...
 
-      - name: Per-file coverage report
+      - if: needs.changes.outputs.platform == 'true'
+        name: Per-file coverage report
         # Advisory — lists every source file with its coverage so reviewers
         # can see at-a-glance where gaps are. Sorted ascending so the worst
         # offenders float to the top. Does NOT fail the build; the hard
@@ -98,7 +111,8 @@ jobs:
                    END {for (f in s) printf "%6.1f%%  %s\n", s[f]/c[f], f}' \
             | sort -n
 
-      - name: Check coverage thresholds
+      - if: needs.changes.outputs.platform == 'true'
+        name: Check coverage thresholds
         # Enforces two gates from #1823 Layer 1:
         #   1. Total floor (25% — ratchet plan in COVERAGE_FLOOR.md).
         #   2. Per-file floor — non-test .go files in security-critical
@@ -178,21 +192,15 @@ jobs:
             exit 1
           fi
 
-  # Canvas (Next.js) is a required check on staging branch protection.
-  # The job ALWAYS runs (no `if:` gate at job level) so branch protection
-  # always sees a SUCCESS conclusion. Inside, every meaningful step is
-  # gated on `needs.changes.outputs.canvas == 'true'` — when the PR
-  # doesn't touch canvas/**, the job spins up, skips all real work, and
-  # exits clean in ~10s.
+  # Canvas (Next.js) — required check, always runs. See platform-build
+  # comment above for the rationale.
   #
-  # Why not a job-level `if:` + a no-op shadow:
-  # An earlier version (PR #2321) tried two jobs sharing the same `name:`
-  # — one with `if: == 'true'`, the other with `if: != 'true'`. Branch
-  # protection sees BOTH check runs and treats the SKIPPED one as
-  # not-passed even when the other reports SUCCESS — so the merge stays
-  # BLOCKED. Single-job-with-conditional-steps is the only shape that
-  # produces exactly one Canvas (Next.js) check run per commit AND
-  # always SUCCEEDS, regardless of which paths changed.
+  # Supersedes the canvas-build-noop pattern attempted in PR #2321: two
+  # jobs sharing `name:` doesn't actually satisfy branch protection
+  # because the SKIPPED check run sibling is treated as not-passed
+  # regardless of how many SUCCESS siblings it has. Verified empirically
+  # on PR #2314 — mergeStateStatus stayed BLOCKED until I collapsed to
+  # a single-job-with-conditional-steps shape.
   canvas-build:
     name: Canvas (Next.js)
     needs: changes
@@ -242,14 +250,19 @@ jobs:
   # It now has workflow-level concurrency (cancel-in-progress: false) so
   # new pushes queue the E2E run rather than cancelling it at the run level.
 
+  # Shellcheck (E2E scripts) — required check, always runs. See
+  # platform-build for the rationale.
   shellcheck:
     name: Shellcheck (E2E scripts)
     needs: changes
-    if: needs.changes.outputs.scripts == 'true'
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-      - name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh
+      - if: needs.changes.outputs.scripts != 'true'
+        run: echo "No tests/e2e/ or infra/scripts/ changes — skipping real shellcheck; this job always runs to satisfy the required-check name on branch protection."
+      - if: needs.changes.outputs.scripts == 'true'
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - if: needs.changes.outputs.scripts == 'true'
+        name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh
         # shellcheck is pre-installed on ubuntu-latest runners (via apt).
         # infra/scripts/ is included because setup.sh + nuke.sh gate the
         # README quickstart — a shellcheck regression there silently breaks
@@ -303,10 +316,11 @@ jobs:
             "repos/${{ github.repository }}/commits/${{ github.sha }}/comments" \
             --field "body=@/tmp/deploy-reminder.md"
 
+  # Python Lint & Test — required check, always runs. See platform-build
+  # for the rationale.
   python-lint:
     name: Python Lint & Test
     needs: changes
-    if: needs.changes.outputs.python == 'true'
     runs-on: ubuntu-latest
     env:
       WORKSPACE_ID: test
@@ -314,16 +328,23 @@ jobs:
       run:
         working-directory: workspace
     steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+      - if: needs.changes.outputs.python != 'true'
+        working-directory: .
+        run: echo "No workspace/** changes — skipping real lint+test; this job always runs to satisfy the required-check name on branch protection."
+      - if: needs.changes.outputs.python == 'true'
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - if: needs.changes.outputs.python == 'true'
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
         with:
           python-version: '3.11'
           cache: pip
           cache-dependency-path: workspace/requirements.txt
-      - run: pip install -r requirements.txt pytest pytest-asyncio pytest-cov
+      - if: needs.changes.outputs.python == 'true'
+        run: pip install -r requirements.txt pytest pytest-asyncio pytest-cov
       # Coverage flags + fail-under floor moved into workspace/pytest.ini
       # (issue #1817) so local `pytest` and CI use identical config.
-      - run: python -m pytest --tb=short
+      - if: needs.changes.outputs.python == 'true'
+        run: python -m pytest --tb=short
 
       # SDK + plugin validation moved to standalone repo:
       # github.com/Molecule-AI/molecule-sdk-python