From e22a56d35172566f46d9f4d50f72ca6dea850c91 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 29 Apr 2026 16:01:38 -0700 Subject: [PATCH 1/2] ci: collapse Canvas (Next.js) to single job with conditional steps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supersedes PR #2321's two-jobs-sharing-a-name approach, which didn't actually clear branch-protection's required-check evaluation. Live test on PR #2314: GraphQL `isRequired` confirmed BOTH check runs under "Canvas (Next.js)" name (one SUCCESS via no-op, one SKIPPED via real job) registered, and the SKIPPED one kept mergeStateStatus = BLOCKED despite the SUCCESS sibling. Branch protection's "set of matching contexts" semantic is stricter than the durable feedback memory documented — at least one passing isn't enough; SKIPPED counts as not-passed regardless. Real fix: ONE job that always runs (no job-level `if:`), with all real work gated on the path filter via per-step `if:`. Produces exactly one "Canvas (Next.js)" check run per commit, always SUCCEEDS, regardless of which paths changed. Costs ~10s of always-on CI runtime per PR — negligible vs. the manual-merge cost when the BLOCKED state catches us. This same anti-pattern probably affects Platform (Go) (`platform` filter), Python Lint & Test (`python` filter), and Shellcheck (E2E scripts) (`scripts` filter) — all required, all path-gated. PR-A and PR-2321 merged because they happened to trigger every filter; PR-B only missed canvas. File a follow-up issue to apply the same single-job-conditional-steps pattern across those required jobs to remove the latent merge-blocker. Updates feedback memory: branch_protection_check_name_parity is wrong about "two jobs sharing name + at-least-one-success works." Need to correct the note. --- .github/workflows/ci.yml | 54 +++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f13c16ec..96d3b668 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -178,42 +178,44 @@ jobs: exit 1 fi - # Path-filter no-op shadow for Canvas (Next.js). + # Canvas (Next.js) is a required check on staging branch protection. + # The job ALWAYS runs (no `if:` gate at job level) so branch protection + # always sees a SUCCESS conclusion. Inside, every meaningful step is + # gated on `needs.changes.outputs.canvas == 'true'` — when the PR + # doesn't touch canvas/**, the job spins up, skips all real work, and + # exits clean in ~10s. # - # Branch protection on staging requires a "Canvas (Next.js)" check. - # When a PR doesn't touch canvas/** paths, the real canvas-build job - # below is skipped via `if:`, and GitHub reports its conclusion as - # SKIPPED — which branch protection treats as not-passed → merge - # BLOCKED on every workspace-server-only or migration-only PR. - # - # Pattern (per durable feedback memory: branch_protection_check_name_parity): - # split into a real job + a no-op shadow that share the same `name:`. - # Exactly one runs per PR; both report the same check context, and at - # least one always reports SUCCESS, satisfying the required check. - canvas-build-noop: - name: Canvas (Next.js) - needs: changes - if: needs.changes.outputs.canvas != 'true' - runs-on: ubuntu-latest - steps: - - run: echo "No canvas/** changes in this PR — Canvas (Next.js) skip is intentional, satisfying required-check via this no-op." - + # Why not a job-level `if:` + a no-op shadow: + # An earlier version (PR #2321) tried two jobs sharing the same `name:` + # — one with `if: == 'true'`, the other with `if: != 'true'`. Branch + # protection sees BOTH check runs and treats the SKIPPED one as + # not-passed even when the other reports SUCCESS — so the merge stays + # BLOCKED. Single-job-with-conditional-steps is the only shape that + # produces exactly one Canvas (Next.js) check run per commit AND + # always SUCCEEDS, regardless of which paths changed. canvas-build: name: Canvas (Next.js) needs: changes - if: needs.changes.outputs.canvas == 'true' runs-on: ubuntu-latest defaults: run: working-directory: canvas steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + - if: needs.changes.outputs.canvas != 'true' + working-directory: . + run: echo "No canvas/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection." + - if: needs.changes.outputs.canvas == 'true' + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - if: needs.changes.outputs.canvas == 'true' + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: node-version: '22' - - run: rm -f package-lock.json && npm install - - run: npm run build - - name: Run tests with coverage + - if: needs.changes.outputs.canvas == 'true' + run: rm -f package-lock.json && npm install + - if: needs.changes.outputs.canvas == 'true' + run: npm run build + - if: needs.changes.outputs.canvas == 'true' + name: Run tests with coverage # Coverage instrumentation is configured in canvas/vitest.config.ts # (provider: v8, reporters: text + html + json-summary). Step 2 of # #1815 — wires coverage into CI so we get a baseline visible on @@ -224,7 +226,7 @@ jobs: # thresholds + a hard gate" — this PR ships the observability half. run: npx vitest run --coverage - name: Upload coverage summary as artifact - if: always() + if: needs.changes.outputs.canvas == 'true' && always() uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: canvas-coverage-${{ github.run_id }} From 142b8e9d5b8579b4199d1ba66fea6d32f556f8ac Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 29 Apr 2026 16:08:25 -0700 Subject: [PATCH 2/2] ci: collapse all 4 path-filtered required checks to single-job-with-conditional-steps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supersedes #2321 + #2322. Applies the same shape uniformly across every required check that uses a path filter: Canvas (Next.js), Platform (Go), Python Lint & Test, Shellcheck (E2E scripts). The bug + fix in one paragraph: GitHub registers a check run for every job whose `name:` matches the required-check context, regardless of whether the job actually executed. A job-level `if:` that evaluates false produces a SKIPPED check run. Branch protection's "required check" rule looks at the SET of check runs with the matching context name on the latest commit and treats any conclusion other than SUCCESS as not-passed — including SKIPPED. Adding a sibling no-op job under the same `name:` (PR #2321 / #2322 attempt) doesn't help: branch protection still sees the SKIPPED sibling and stays BLOCKED. The shape that works: ONE job per required check name, no job-level `if:`, all real work gated per-step. The job always runs and reports SUCCESS regardless of which paths changed. This patch: * Canvas (Next.js): drops the `canvas-build-noop` shadow added in #2321 (which didn't actually clear merge state — verified live on PR #2314). Refactors `canvas-build` to always run; gates checkout/ setup-node/install/build/test on `if: needs.changes.outputs.canvas == 'true'`. Coverage upload step also gated. * Platform (Go): drops job-level `if:`. Gates checkout/setup-go/ download/build/vet/lint/test/coverage-report/threshold-check on per-step `if:`. * Python Lint & Test: drops job-level `if:`. Gates checkout/setup- python/install/pytest on per-step `if:`. * Shellcheck (E2E scripts): drops job-level `if:`. Gates checkout/ shellcheck-run on per-step `if:`. Each refactored job adds a leading no-op echo step with `working-directory: .` override so the always-running spin-up doesn't fail when the path- filter-true working-directory (workspace, workspace-server, canvas) doesn't exist after no-op checkout. Why all four in one PR: the bug shape is identical across all four, and a future PR that only touches workspace-server (passing platform filter, missing canvas/python/scripts) would hit the same BLOCKED state on whichever filter it missed. PR-A and PR-2321 merged because their diffs happened to trigger every filter; PR-B (#2314) only missed canvas. Fixing one at a time means re-living this debugging cycle three more times. Cost: ~10s of always-on CI runtime per PR per job (the ubuntu-latest spin-up + the no-op echo). 40s aggregate, negligible vs. the manual- merge cost when BLOCKED catches us. Memory `feedback_branch_protection_check_name_parity` already updated (2026-04-29) to mark the original two-jobs-sharing-name pattern as DO NOT FOLLOW and document the working shape this PR uses. Refs PR #2321 (the misguided fix-attempt that this supersedes). --- .github/workflows/ci.yml | 85 +++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 96d3b668..72337316 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,29 +63,42 @@ jobs: echo "python=$(echo "$DIFF" | grep -qE '^workspace/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT" echo "scripts=$(echo "$DIFF" | grep -qE '^tests/e2e/|^scripts/|^infra/scripts/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT" + # Platform (Go) is a required check on staging. Always-run + per-step + # gating (see Canvas (Next.js) for the rationale and the failure mode + # this avoids). platform-build: name: Platform (Go) needs: changes - if: needs.changes.outputs.platform == 'true' runs-on: ubuntu-latest defaults: run: working-directory: workspace-server steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5 + - if: needs.changes.outputs.platform != 'true' + working-directory: . + run: echo "No platform/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection." + - if: needs.changes.outputs.platform == 'true' + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - if: needs.changes.outputs.platform == 'true' + uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5 with: go-version: 'stable' - - run: go mod download - - run: go build ./cmd/server + - if: needs.changes.outputs.platform == 'true' + run: go mod download + - if: needs.changes.outputs.platform == 'true' + run: go build ./cmd/server # CLI (molecli) moved to standalone repo: github.com/Molecule-AI/molecule-cli - - run: go vet ./... || true - - name: Run golangci-lint + - if: needs.changes.outputs.platform == 'true' + run: go vet ./... || true + - if: needs.changes.outputs.platform == 'true' + name: Run golangci-lint run: golangci-lint run --timeout 3m ./... || true - - name: Run tests with race detection and coverage + - if: needs.changes.outputs.platform == 'true' + name: Run tests with race detection and coverage run: go test -race -coverprofile=coverage.out ./... - - name: Per-file coverage report + - if: needs.changes.outputs.platform == 'true' + name: Per-file coverage report # Advisory — lists every source file with its coverage so reviewers # can see at-a-glance where gaps are. Sorted ascending so the worst # offenders float to the top. Does NOT fail the build; the hard @@ -98,7 +111,8 @@ jobs: END {for (f in s) printf "%6.1f%% %s\n", s[f]/c[f], f}' \ | sort -n - - name: Check coverage thresholds + - if: needs.changes.outputs.platform == 'true' + name: Check coverage thresholds # Enforces two gates from #1823 Layer 1: # 1. Total floor (25% — ratchet plan in COVERAGE_FLOOR.md). # 2. Per-file floor — non-test .go files in security-critical @@ -178,21 +192,15 @@ jobs: exit 1 fi - # Canvas (Next.js) is a required check on staging branch protection. - # The job ALWAYS runs (no `if:` gate at job level) so branch protection - # always sees a SUCCESS conclusion. Inside, every meaningful step is - # gated on `needs.changes.outputs.canvas == 'true'` — when the PR - # doesn't touch canvas/**, the job spins up, skips all real work, and - # exits clean in ~10s. + # Canvas (Next.js) — required check, always runs. See platform-build + # comment above for the rationale. # - # Why not a job-level `if:` + a no-op shadow: - # An earlier version (PR #2321) tried two jobs sharing the same `name:` - # — one with `if: == 'true'`, the other with `if: != 'true'`. Branch - # protection sees BOTH check runs and treats the SKIPPED one as - # not-passed even when the other reports SUCCESS — so the merge stays - # BLOCKED. Single-job-with-conditional-steps is the only shape that - # produces exactly one Canvas (Next.js) check run per commit AND - # always SUCCEEDS, regardless of which paths changed. + # Supersedes the canvas-build-noop pattern attempted in PR #2321: two + # jobs sharing `name:` doesn't actually satisfy branch protection + # because the SKIPPED check run sibling is treated as not-passed + # regardless of how many SUCCESS siblings it has. Verified empirically + # on PR #2314 — mergeStateStatus stayed BLOCKED until I collapsed to + # a single-job-with-conditional-steps shape. canvas-build: name: Canvas (Next.js) needs: changes @@ -242,14 +250,19 @@ jobs: # It now has workflow-level concurrency (cancel-in-progress: false) so # new pushes queue the E2E run rather than cancelling it at the run level. + # Shellcheck (E2E scripts) — required check, always runs. See + # platform-build for the rationale. shellcheck: name: Shellcheck (E2E scripts) needs: changes - if: needs.changes.outputs.scripts == 'true' runs-on: ubuntu-latest steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh + - if: needs.changes.outputs.scripts != 'true' + run: echo "No tests/e2e/ or infra/scripts/ changes — skipping real shellcheck; this job always runs to satisfy the required-check name on branch protection." + - if: needs.changes.outputs.scripts == 'true' + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - if: needs.changes.outputs.scripts == 'true' + name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh # shellcheck is pre-installed on ubuntu-latest runners (via apt). # infra/scripts/ is included because setup.sh + nuke.sh gate the # README quickstart — a shellcheck regression there silently breaks @@ -303,10 +316,11 @@ jobs: "repos/${{ github.repository }}/commits/${{ github.sha }}/comments" \ --field "body=@/tmp/deploy-reminder.md" + # Python Lint & Test — required check, always runs. See platform-build + # for the rationale. python-lint: name: Python Lint & Test needs: changes - if: needs.changes.outputs.python == 'true' runs-on: ubuntu-latest env: WORKSPACE_ID: test @@ -314,16 +328,23 @@ jobs: run: working-directory: workspace steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + - if: needs.changes.outputs.python != 'true' + working-directory: . + run: echo "No workspace/** changes — skipping real lint+test; this job always runs to satisfy the required-check name on branch protection." + - if: needs.changes.outputs.python == 'true' + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - if: needs.changes.outputs.python == 'true' + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' cache: pip cache-dependency-path: workspace/requirements.txt - - run: pip install -r requirements.txt pytest pytest-asyncio pytest-cov + - if: needs.changes.outputs.python == 'true' + run: pip install -r requirements.txt pytest pytest-asyncio pytest-cov # Coverage flags + fail-under floor moved into workspace/pytest.ini # (issue #1817) so local `pytest` and CI use identical config. - - run: python -m pytest --tb=short + - if: needs.changes.outputs.python == 'true' + run: python -m pytest --tb=short # SDK + plugin validation moved to standalone repo: # github.com/Molecule-AI/molecule-sdk-python