From 7cc1c39c49c97d504b62ebaaff982d75c6925bcd Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 20:21:59 -0700 Subject: [PATCH] ci: e2e coverage matrix + branch-protection-as-code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #9. Three pieces, all small: 1. **docs/e2e-coverage.md** — source of truth for which E2E suites guard which surfaces. Today three were running but informational only on staging; that's how the org-import silent-drop bug shipped without a test catching it pre-merge. Now the matrix shows what's required where + a follow-up note for the two suites that need an always-emit refactor before they can be required. 2. **tools/branch-protection/apply.sh** — branch protection as code. Lets `staging` and `main` required-checks live in a reviewable shell script instead of UI clicks that get lost between admins. This PR's net change: add `E2E API Smoke Test` and `Canvas tabs E2E` as required on staging. Both already use the always-emit path-filter pattern (no-op step emits SUCCESS when the workflow's paths weren't touched), so making them required can't deadlock unrelated PRs. 3. **branch-protection-drift.yml** — daily cron + drift_check.sh that compares live protection against apply.sh's desired state. Catches out-of-band UI edits before they drift further. Fails the workflow on mismatch; ops re-runs apply.sh or updates the script. Out of scope (filed as follow-ups): - e2e-staging-saas + e2e-staging-external use plain `paths:` filters and never trigger when paths are unchanged. They need refactoring to the always-emit shape (same as e2e-api / e2e-staging-canvas) before they can be required. - main branch protection mirrors staging here; if main wants the E2E SaaS / External added later, do it in apply.sh and rerun. Operator must apply once after merge: bash tools/branch-protection/apply.sh The drift check picks it up from there. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/branch-protection-drift.yml | 42 ++++++ docs/e2e-coverage.md | 58 ++++++++ tools/branch-protection/apply.sh | 133 ++++++++++++++++++ tools/branch-protection/drift_check.sh | 44 ++++++ 4 files changed, 277 insertions(+) create mode 100644 .github/workflows/branch-protection-drift.yml create mode 100644 docs/e2e-coverage.md create mode 100755 tools/branch-protection/apply.sh create mode 100755 tools/branch-protection/drift_check.sh diff --git a/.github/workflows/branch-protection-drift.yml b/.github/workflows/branch-protection-drift.yml new file mode 100644 index 00000000..7b05f09f --- /dev/null +++ b/.github/workflows/branch-protection-drift.yml @@ -0,0 +1,42 @@ +name: branch-protection drift check + +# Catches out-of-band edits to branch protection (UI clicks, manual gh +# api PATCH from a one-off ops session) by comparing live state against +# tools/branch-protection/apply.sh's desired state every day. Fails the +# workflow when they drift; the failure is the signal. +# +# When it fails: re-run apply.sh to put the live state back to the +# script's intent, OR update apply.sh to encode the new intent and +# commit. Either way the script is the source of truth. + +on: + schedule: + # 14:00 UTC daily. Off-hours for most teams; gives a fresh signal + # at the start of every working day. + - cron: '0 14 * * *' + workflow_dispatch: + pull_request: + branches: [staging, main] + paths: + - 'tools/branch-protection/**' + - '.github/workflows/branch-protection-drift.yml' + +permissions: + contents: read + +jobs: + drift: + name: Branch protection drift + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Run drift check + env: + # GH_TOKEN_FOR_ADMIN_API — repo-admin scope, needed for the + # /branches/:b/protection endpoint. Falls back to GITHUB_TOKEN + # for read-only mode (gh api on the protection endpoint + # returns 200 to maintainers, 404 to read-only). Configure at + # Settings → Secrets and variables → Actions. + GH_TOKEN: ${{ secrets.GH_TOKEN_FOR_ADMIN_API || secrets.GITHUB_TOKEN }} + run: bash tools/branch-protection/drift_check.sh diff --git a/docs/e2e-coverage.md b/docs/e2e-coverage.md new file mode 100644 index 00000000..48ca0e19 --- /dev/null +++ b/docs/e2e-coverage.md @@ -0,0 +1,58 @@ +# E2E coverage matrix + +This document is the source of truth for which E2E suites guard which surfaces and which gates are wired up where. Read this before adding a new E2E or moving a check between branches. + +## Suites + +| Workflow file | Job (= required-check name) | What it covers | Cron | +|---|---|---|---| +| `e2e-api.yml` | `E2E API Smoke Test` | A2A handshake, registry/register, /workspaces/:id/a2a forward, structured-event emission. Lightweight enough to run on every PR. | — | +| `e2e-staging-canvas.yml` | `Canvas tabs E2E` | Canvas-tab Playwright UX checks against staging — config tab, secrets tab, agent-card tab, Activity hydration. | weekly Sun 08:00 UTC | +| `e2e-staging-saas.yml` | `E2E Staging SaaS` | Full lifecycle: org creation → workspace provision (CP path) → A2A delegation → status/heartbeat → workspace delete → EC2 termination. The integration test that catches the silent-drop bug class (#2486 / #2811 / #2813 / #2814). | daily 07:00 UTC | +| `e2e-staging-external.yml` | `E2E Staging External Runtime` | External-runtime registration + heartbeat staleness sweep + `/registry/peers` resolution. Validates the OSS-templated workspace path. | daily 07:30 UTC | +| `e2e-staging-sanity.yml` | `Intentional-failure teardown sanity` | Inverted assertion — the run MUST fail. Validates the leak-detection self-check itself; not for general gating. | weekly Mon 06:00 UTC | +| `continuous-synth-e2e.yml` | `Synthetic E2E against staging` | Standing background coverage between PR runs. Catches drift in production-like staging that PR-time E2Es miss. | every 15 min | + +## Required-check status (branch protection) + +| Suite | staging required | main required | +|---|---|---| +| `E2E API Smoke Test` | ✅ this PR | ✅ | +| `Canvas tabs E2E` | ✅ this PR | (see follow-up) | +| `E2E Staging SaaS` | ❌ — needs always-emit refactor | ❌ | +| `E2E Staging External Runtime` | ❌ — needs always-emit refactor | ❌ | +| `Intentional-failure teardown sanity` | ❌ inverted assertion, never required | ❌ | +| `Synthetic E2E against staging` | ❌ cron-only, not a per-PR gate | ❌ | + +## Why the always-emit pattern matters + +Branch protection requires a *check name* to land at SUCCESS for every PR. Workflows with `paths:` filters that exclude a PR never run, so the check name never appears, and the PR sits BLOCKED forever. + +The pattern that supports being required is: + +1. Workflow always triggers on push/PR to the protected branch. +2. A `detect-changes` job uses `dorny/paths-filter` to decide if real work runs. +3. The protected job runs unconditionally and either (a) does real work when paths matched, or (b) emits a no-op SUCCESS step when paths skipped. + +`e2e-api.yml` and `e2e-staging-canvas.yml` already have this shape. `e2e-staging-saas.yml` and `e2e-staging-external.yml` use plain `paths:` filters and need the refactor before they can be required (filed as follow-up). + +## Adding a new E2E suite + +1. Pick a verb: smoke test, full lifecycle, fault-injection, drift detection. Pre-existing suites split along these lines. +2. Use the always-emit shape so the check name can be made required. +3. Add a row to the matrix above. +4. Decide cron cadence based on cost + how fast drift would otherwise be caught. +5. If you want it required, add to the relevant branch protection via `tools/branch-protection/apply.sh` (this PR adds the script). + +## When to break glass — temporarily skip a required E2E + +Don't. If an E2E is intermittently flaky, fix the test or move it out of required. The point of a required check is that it's load-bearing; bypassing one with admin override teaches the next operator the gate is optional. + +If a Production incident requires bypassing, document the override in the incident postmortem with a same-week followup to either fix the test or rip the check out of required. + +## Related issues / PRs + +- #2486 — silent-drop bug class that the SaaS E2E now catches +- PR #2811 — `provisionWorkspaceAuto` consolidation (org-import SaaS gate) +- PR #2824 — `StopWorkspaceAuto` mirror (closes #2813 + #2814) +- Follow-up: refactor `e2e-staging-saas` + `e2e-staging-external` to always-emit (so they can be required) diff --git a/tools/branch-protection/apply.sh b/tools/branch-protection/apply.sh new file mode 100755 index 00000000..928d2829 --- /dev/null +++ b/tools/branch-protection/apply.sh @@ -0,0 +1,133 @@ +#!/usr/bin/env bash +# tools/branch-protection/apply.sh — idempotently apply branch +# protection to molecule-core's `staging` and `main` branches. +# +# Why a script: GitHub's branch protection lives in repo settings, so +# changes are usually clicked through the UI and lost between admins. +# This script makes the config reproducible — diff it against the live +# state, change the file, run it, done. Single source of truth that +# shows up in code review. +# +# Usage: +# tools/branch-protection/apply.sh # apply both branches +# tools/branch-protection/apply.sh --dry-run # show payload only +# tools/branch-protection/apply.sh --branch staging +# +# Requires: gh CLI authenticated as a repo admin. The script uses gh's +# token (no separate PAT needed). + +set -euo pipefail + +REPO="Molecule-AI/molecule-core" +DRY_RUN=0 +ONLY_BRANCH="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) DRY_RUN=1; shift ;; + --branch) ONLY_BRANCH="$2"; shift 2 ;; + -h|--help) + echo "Usage: $0 [--dry-run] [--branch ]" + exit 0 + ;; + *) echo "Unknown arg: $1" >&2; exit 1 ;; + esac +done + +# Required-check matrices. Each branch's set is the canonical list of +# check NAMES (from each workflow's job-name). Adding/removing a check +# here is the place to do it. Match docs/e2e-coverage.md. +# +# Why staging gets E2E API + Canvas E2E (this PR's addition): both +# already use the always-emit pattern (path-filter no-ops emit SUCCESS), +# so making them required can't deadlock a PR that doesn't touch their +# paths. The other E2Es (SaaS, External) need a refactor to that +# pattern before they can be required — tracked as follow-up. + +read -r -d '' STAGING_CHECKS <<'EOF' || true +Analyze (go) +Analyze (javascript-typescript) +Analyze (python) +Canvas (Next.js) +Canvas tabs E2E +Detect changes +E2E API Smoke Test +Platform (Go) +Python Lint & Test +Scan diff for credential-shaped strings +Shellcheck (E2E scripts) +EOF + +read -r -d '' MAIN_CHECKS <<'EOF' || true +Analyze (go) +Analyze (javascript-typescript) +Analyze (python) +Canvas (Next.js) +Canvas tabs E2E +Detect changes +E2E API Smoke Test +PR-built wheel + import smoke +Platform (Go) +Python Lint & Test +Scan diff for credential-shaped strings +Shellcheck (E2E scripts) +EOF + +build_payload() { + local checks="$1" + local require_reviews="$2" # true / false + local checks_json + checks_json=$(printf '%s\n' "$checks" | jq -Rs ' + split("\n") + | map(select(length > 0)) + | map({context: ., app_id: -1}) + ') + jq -n \ + --argjson checks "$checks_json" \ + --argjson reviews "$require_reviews" \ + '{ + required_status_checks: { + strict: false, + checks: $checks + }, + enforce_admins: false, + required_pull_request_reviews: ( + if $reviews then + { required_approving_review_count: 1, dismiss_stale_reviews: true } + else null end + ), + restrictions: null, + allow_deletions: false, + allow_force_pushes: false, + block_creations: false, + required_conversation_resolution: true, + required_linear_history: false, + lock_branch: false, + allow_fork_syncing: true + }' +} + +apply_branch() { + local branch="$1" + local checks="$2" + local require_reviews="$3" + local payload + payload=$(build_payload "$checks" "$require_reviews") + if [[ "$DRY_RUN" -eq 1 ]]; then + echo "=== branch: $branch ===" + echo "$payload" | jq . + return + fi + echo "Applying branch protection on $branch..." + printf '%s' "$payload" | gh api -X PUT \ + "repos/$REPO/branches/$branch/protection" \ + --input - + echo "Applied: $branch" +} + +if [[ -z "$ONLY_BRANCH" || "$ONLY_BRANCH" == "staging" ]]; then + apply_branch staging "$STAGING_CHECKS" true +fi +if [[ -z "$ONLY_BRANCH" || "$ONLY_BRANCH" == "main" ]]; then + apply_branch main "$MAIN_CHECKS" true +fi diff --git a/tools/branch-protection/drift_check.sh b/tools/branch-protection/drift_check.sh new file mode 100755 index 00000000..b25e23f0 --- /dev/null +++ b/tools/branch-protection/drift_check.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# tools/branch-protection/drift_check.sh — compare the live branch +# protection on staging + main against what apply.sh would set. Used +# by branch-protection-drift.yml (cron) to catch out-of-band UI edits. +# +# Exit codes: +# 0 — live state matches the script +# 1 — drift detected (output shows the diff) +# 2 — gh API call failed + +set -euo pipefail + +REPO="Molecule-AI/molecule-core" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +EXIT_CODE=0 + +check_branch() { + local branch="$1" + local want + want=$(bash "$SCRIPT_DIR/apply.sh" --dry-run --branch "$branch" 2>&1 | + sed -n '/^{$/,/^}$/p' | + jq -S '.required_status_checks.checks | map(.context) | sort') + local have + if ! have=$(gh api "repos/$REPO/branches/$branch/protection/required_status_checks" 2>/dev/null | + jq -S '.checks | map(.context) | sort'); then + echo "drift_check: FAIL to fetch $branch protection (gh API error)" + return 2 + fi + if [[ "$want" != "$have" ]]; then + echo "=== DRIFT on $branch ===" + echo "want:"; echo "$want" + echo "have:"; echo "$have" + diff <(echo "$want") <(echo "$have") || true + return 1 + fi + echo "OK: $branch matches desired state" +} + +for b in staging main; do + if ! check_branch "$b"; then + EXIT_CODE=1 + fi +done +exit "$EXIT_CODE"