From 31d25b5a745a8a46b052212e91255445e66595f9 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 28 Apr 2026 12:43:26 -0700 Subject: [PATCH] fix(ci): e2e gates always emit a result so auto-promote can read it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The auto-promote-staging.yml gate-check (line 99) treats "workflow didn't run" as failure. Path-filtered triggers on E2E API Smoke Test and E2E Staging Canvas meant a platform-only or test-only push to staging — say, the prior PR #2201 which only touched tests/e2e/test_staging_full_saas.sh — never triggered the canvas workflow, and auto-promote saw `missing/none`, marked all_green=false, and aborted. Same class for any push that doesn't touch the gate's watched paths. Dead-lock by design, never noticed because the gate was new. Fix per Design B (always-run + fast-skip): - Drop `paths:` from the push/pull_request triggers on both gate workflows. The workflow now always fires on every staging+main push/PR. - Add a `detect-changes` job using `dorny/paths-filter@v3` that decides whether to do real work, scoped to the same paths the trigger filter used to watch. - Real work job (e2e-api / playwright) gates on `needs: detect-changes; if: needs.detect-changes.outputs.X == 'true'`. - Add a sibling `no-op` job that runs when the filter output is false, emitting `::notice::… no-op pass`. The workflow run's conclusion is `success` either way — auto-promote sees green and proceeds. manual `workflow_dispatch` and the weekly canvas `schedule` short- circuit detect-changes to always-run — those triggers exist precisely to exercise the suite and shouldn't be silently no-op'd. Why this approach over making auto-promote-staging smarter: The alternative (Design A, considered + rejected) was to teach auto-promote-staging to read each gate's `paths:` filter and treat "no run because filter excluded the commit" as conditional pass. That couples auto-promote to other workflows' YAML schema and breaks silently if a gate is renamed or its filter changes. Design B keeps the auto-promote contract simple ("each gate emits success") and makes each gate self-describing — adding a new gate doesn't require touching auto-promote. Cost: ~10-30s of runner overhead per gate per push for the no-op when paths don't match. Negligible vs the alternative of dead-locked auto-promote chains. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/e2e-api.yml | 62 +++++++++++++++++++++--- .github/workflows/e2e-staging-canvas.yml | 53 +++++++++++++++++--- 2 files changed, 101 insertions(+), 14 deletions(-) diff --git a/.github/workflows/e2e-api.yml b/.github/workflows/e2e-api.yml index 89c69b88..d7d6ea09 100644 --- a/.github/workflows/e2e-api.yml +++ b/.github/workflows/e2e-api.yml @@ -1,27 +1,73 @@ name: E2E API Smoke Test # Extracted from ci.yml so workflow-level concurrency can protect this job # from run-level cancellation (issue #458). +# +# Trigger model (changed 2026-04-28 — see auto-promote gap below): +# +# This workflow always FIRES on push/pull_request to staging+main, but +# only does real work when paths under `workspace-server/`, +# `tests/e2e/`, or this workflow file changed. The detect-changes job +# uses dorny/paths-filter to decide; the e2e-api job runs only if +# changes match. Otherwise the no-op job emits success so the workflow +# always produces a `completed/success` run record. +# +# Why: auto-promote-staging.yml's gate-check (line 99) treats "workflow +# didn't run" as failure, which dead-locked any platform-only or +# test-only push to staging that didn't touch workspace-server paths. +# Dropping the path filter on the trigger and gating real work +# internally guarantees the workflow always emits a result that the +# auto-promote chain can read. Same pattern applied to +# e2e-staging-canvas.yml in the same PR. on: push: branches: [main, staging] - paths: - - 'workspace-server/**' - - 'tests/e2e/**' - - '.github/workflows/e2e-api.yml' pull_request: branches: [main, staging] - paths: - - 'workspace-server/**' - - 'tests/e2e/**' - - '.github/workflows/e2e-api.yml' + workflow_dispatch: concurrency: group: e2e-api-${{ github.ref }} cancel-in-progress: false jobs: + detect-changes: + runs-on: ubuntu-latest + outputs: + api: ${{ steps.decide.outputs.api }} + steps: + - uses: actions/checkout@v4 + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + api: + - 'workspace-server/**' + - 'tests/e2e/**' + - '.github/workflows/e2e-api.yml' + - id: decide + # Always run real work for manual dispatch — no diff context to + # filter against and ops dispatching this expects the suite to + # actually exercise the platform. + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "api=true" >> "$GITHUB_OUTPUT" + else + echo "api=${{ steps.filter.outputs.api }}" >> "$GITHUB_OUTPUT" + fi + + no-op: + needs: detect-changes + if: needs.detect-changes.outputs.api != 'true' + runs-on: ubuntu-latest + steps: + - run: | + echo "No workspace-server / tests/e2e / workflow changes — E2E API gate satisfied without running tests." + echo "::notice::E2E API Smoke Test no-op pass (paths filter excluded this commit)." + e2e-api: + needs: detect-changes + if: needs.detect-changes.outputs.api == 'true' name: E2E API Smoke Test runs-on: ubuntu-latest timeout-minutes: 15 diff --git a/.github/workflows/e2e-staging-canvas.yml b/.github/workflows/e2e-staging-canvas.yml index 143d9469..310e16f3 100644 --- a/.github/workflows/e2e-staging-canvas.yml +++ b/.github/workflows/e2e-staging-canvas.yml @@ -13,16 +13,23 @@ name: E2E Staging Canvas (Playwright) # workflow — mirrors what PR #1891 does for e2e-api.yml. on: + # Trigger model (changed 2026-04-28 — see auto-promote gap below): + # + # Always fires on push/pull_request; only does real work when canvas/ + # or this workflow file changed. The detect-changes job uses + # dorny/paths-filter to decide; the playwright job runs only if + # changes match. Otherwise no-op emits success so the workflow always + # produces a `completed/success` run record. + # + # Why: auto-promote-staging.yml's gate-check (line 99) treats + # "workflow didn't run" as failure, which dead-locked platform-only + # pushes to staging. Dropping the trigger path filter and gating real + # work internally guarantees a result the auto-promote chain can + # read. Same pattern applied to e2e-api.yml in the same PR. push: branches: [main, staging] - paths: - - 'canvas/**' - - '.github/workflows/e2e-staging-canvas.yml' pull_request: branches: [main, staging] - paths: - - 'canvas/**' - - '.github/workflows/e2e-staging-canvas.yml' workflow_dispatch: schedule: # Weekly on Sunday 08:00 UTC — catches Chrome / Playwright / Next.js @@ -34,7 +41,41 @@ concurrency: cancel-in-progress: false jobs: + detect-changes: + runs-on: ubuntu-latest + outputs: + canvas: ${{ steps.decide.outputs.canvas }} + steps: + - uses: actions/checkout@v4 + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + canvas: + - 'canvas/**' + - '.github/workflows/e2e-staging-canvas.yml' + - id: decide + # Always run real tests for manual dispatch and the weekly cron — + # both exist precisely to exercise the suite, regardless of diff. + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ] || [ "${{ github.event_name }}" = "schedule" ]; then + echo "canvas=true" >> "$GITHUB_OUTPUT" + else + echo "canvas=${{ steps.filter.outputs.canvas }}" >> "$GITHUB_OUTPUT" + fi + + no-op: + needs: detect-changes + if: needs.detect-changes.outputs.canvas != 'true' + runs-on: ubuntu-latest + steps: + - run: | + echo "No canvas / workflow changes — E2E Staging Canvas gate satisfied without running tests." + echo "::notice::E2E Staging Canvas no-op pass (paths filter excluded this commit)." + playwright: + needs: detect-changes + if: needs.detect-changes.outputs.canvas == 'true' name: Canvas tabs E2E runs-on: ubuntu-latest timeout-minutes: 40