From 31d25b5a745a8a46b052212e91255445e66595f9 Mon Sep 17 00:00:00 2001
From: Hongming Wang <hongmingwangalt@gmail.com>
Date: Tue, 28 Apr 2026 12:43:26 -0700
Subject: [PATCH] fix(ci): e2e gates always emit a result so auto-promote can
 read it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The auto-promote-staging.yml gate-check (line 99) treats "workflow
didn't run" as failure. Path-filtered triggers on E2E API Smoke Test
and E2E Staging Canvas meant a platform-only or test-only push to
staging — say, the prior PR #2201 which only touched
tests/e2e/test_staging_full_saas.sh — never triggered the canvas
workflow, and auto-promote saw `missing/none`, marked all_green=false,
and aborted. Same class for any push that doesn't touch the gate's
watched paths. Dead-lock by design, never noticed because the gate
was new.

Fix per Design B (always-run + fast-skip):

- Drop `paths:` from the push/pull_request triggers on both gate
  workflows. The workflow now always fires on every staging+main
  push/PR.
- Add a `detect-changes` job using `dorny/paths-filter@v3` that
  decides whether to do real work, scoped to the same paths the
  trigger filter used to watch.
- Real work job (e2e-api / playwright) gates on
  `needs: detect-changes; if: needs.detect-changes.outputs.X == 'true'`.
- Add a sibling `no-op` job that runs when the filter output is
  false, emitting `::notice::… no-op pass`. The workflow run's
  conclusion is `success` either way — auto-promote sees green and
  proceeds.

manual `workflow_dispatch` and the weekly canvas `schedule` short-
circuit detect-changes to always-run — those triggers exist precisely
to exercise the suite and shouldn't be silently no-op'd.

Why this approach over making auto-promote-staging smarter:

The alternative (Design A, considered + rejected) was to teach
auto-promote-staging to read each gate's `paths:` filter and treat
"no run because filter excluded the commit" as conditional pass.
That couples auto-promote to other workflows' YAML schema and breaks
silently if a gate is renamed or its filter changes. Design B keeps
the auto-promote contract simple ("each gate emits success") and
makes each gate self-describing — adding a new gate doesn't require
touching auto-promote.

Cost: ~10-30s of runner overhead per gate per push for the no-op when
paths don't match. Negligible vs the alternative of dead-locked
auto-promote chains.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/e2e-api.yml            | 62 +++++++++++++++++++++---
 .github/workflows/e2e-staging-canvas.yml | 53 +++++++++++++++++---
 2 files changed, 101 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/e2e-api.yml b/.github/workflows/e2e-api.yml
index 89c69b88..d7d6ea09 100644
--- a/.github/workflows/e2e-api.yml
+++ b/.github/workflows/e2e-api.yml
@@ -1,27 +1,73 @@
 name: E2E API Smoke Test
 # Extracted from ci.yml so workflow-level concurrency can protect this job
 # from run-level cancellation (issue #458).
+#
+# Trigger model (changed 2026-04-28 — see auto-promote gap below):
+#
+# This workflow always FIRES on push/pull_request to staging+main, but
+# only does real work when paths under `workspace-server/`,
+# `tests/e2e/`, or this workflow file changed. The detect-changes job
+# uses dorny/paths-filter to decide; the e2e-api job runs only if
+# changes match. Otherwise the no-op job emits success so the workflow
+# always produces a `completed/success` run record.
+#
+# Why: auto-promote-staging.yml's gate-check (line 99) treats "workflow
+# didn't run" as failure, which dead-locked any platform-only or
+# test-only push to staging that didn't touch workspace-server paths.
+# Dropping the path filter on the trigger and gating real work
+# internally guarantees the workflow always emits a result that the
+# auto-promote chain can read. Same pattern applied to
+# e2e-staging-canvas.yml in the same PR.
 
 on:
   push:
     branches: [main, staging]
-    paths:
-      - 'workspace-server/**'
-      - 'tests/e2e/**'
-      - '.github/workflows/e2e-api.yml'
   pull_request:
     branches: [main, staging]
-    paths:
-      - 'workspace-server/**'
-      - 'tests/e2e/**'
-      - '.github/workflows/e2e-api.yml'
+  workflow_dispatch:
 
 concurrency:
   group: e2e-api-${{ github.ref }}
   cancel-in-progress: false
 
 jobs:
+  detect-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      api: ${{ steps.decide.outputs.api }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          filters: |
+            api:
+              - 'workspace-server/**'
+              - 'tests/e2e/**'
+              - '.github/workflows/e2e-api.yml'
+      - id: decide
+        # Always run real work for manual dispatch — no diff context to
+        # filter against and ops dispatching this expects the suite to
+        # actually exercise the platform.
+        run: |
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            echo "api=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "api=${{ steps.filter.outputs.api }}" >> "$GITHUB_OUTPUT"
+          fi
+
+  no-op:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.api != 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - run: |
+          echo "No workspace-server / tests/e2e / workflow changes — E2E API gate satisfied without running tests."
+          echo "::notice::E2E API Smoke Test no-op pass (paths filter excluded this commit)."
+
   e2e-api:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.api == 'true'
     name: E2E API Smoke Test
     runs-on: ubuntu-latest
     timeout-minutes: 15
diff --git a/.github/workflows/e2e-staging-canvas.yml b/.github/workflows/e2e-staging-canvas.yml
index 143d9469..310e16f3 100644
--- a/.github/workflows/e2e-staging-canvas.yml
+++ b/.github/workflows/e2e-staging-canvas.yml
@@ -13,16 +13,23 @@ name: E2E Staging Canvas (Playwright)
 # workflow — mirrors what PR #1891 does for e2e-api.yml.
 
 on:
+  # Trigger model (changed 2026-04-28 — see auto-promote gap below):
+  #
+  # Always fires on push/pull_request; only does real work when canvas/
+  # or this workflow file changed. The detect-changes job uses
+  # dorny/paths-filter to decide; the playwright job runs only if
+  # changes match. Otherwise no-op emits success so the workflow always
+  # produces a `completed/success` run record.
+  #
+  # Why: auto-promote-staging.yml's gate-check (line 99) treats
+  # "workflow didn't run" as failure, which dead-locked platform-only
+  # pushes to staging. Dropping the trigger path filter and gating real
+  # work internally guarantees a result the auto-promote chain can
+  # read. Same pattern applied to e2e-api.yml in the same PR.
   push:
     branches: [main, staging]
-    paths:
-      - 'canvas/**'
-      - '.github/workflows/e2e-staging-canvas.yml'
   pull_request:
     branches: [main, staging]
-    paths:
-      - 'canvas/**'
-      - '.github/workflows/e2e-staging-canvas.yml'
   workflow_dispatch:
   schedule:
     # Weekly on Sunday 08:00 UTC — catches Chrome / Playwright / Next.js
@@ -34,7 +41,41 @@ concurrency:
   cancel-in-progress: false
 
 jobs:
+  detect-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      canvas: ${{ steps.decide.outputs.canvas }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          filters: |
+            canvas:
+              - 'canvas/**'
+              - '.github/workflows/e2e-staging-canvas.yml'
+      - id: decide
+        # Always run real tests for manual dispatch and the weekly cron —
+        # both exist precisely to exercise the suite, regardless of diff.
+        run: |
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ] || [ "${{ github.event_name }}" = "schedule" ]; then
+            echo "canvas=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "canvas=${{ steps.filter.outputs.canvas }}" >> "$GITHUB_OUTPUT"
+          fi
+
+  no-op:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.canvas != 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - run: |
+          echo "No canvas / workflow changes — E2E Staging Canvas gate satisfied without running tests."
+          echo "::notice::E2E Staging Canvas no-op pass (paths filter excluded this commit)."
+
   playwright:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.canvas == 'true'
     name: Canvas tabs E2E
     runs-on: ubuntu-latest
     timeout-minutes: 40