name: E2E API Smoke Test # Extracted from ci.yml so workflow-level concurrency can protect this job # from run-level cancellation (issue #458). # # Trigger model (changed 2026-04-28 — see auto-promote gap below): # # This workflow always FIRES on push/pull_request to staging+main, but # only does real work when paths under `workspace-server/`, # `tests/e2e/`, or this workflow file changed. The detect-changes job # uses dorny/paths-filter to decide; the e2e-api job runs only if # changes match. Otherwise the no-op job emits success so the workflow # always produces a `completed/success` run record. # # Why: auto-promote-staging.yml's gate-check (line 99) treats "workflow # didn't run" as failure, which dead-locked any platform-only or # test-only push to staging that didn't touch workspace-server paths. # Dropping the path filter on the trigger and gating real work # internally guarantees the workflow always emits a result that the # auto-promote chain can read. Same pattern applied to # e2e-staging-canvas.yml in the same PR. on: push: branches: [main, staging] pull_request: branches: [main, staging] workflow_dispatch: concurrency: # Per-SHA grouping (changed 2026-04-28 from per-ref). Per-ref had the # same auto-promote-staging brittleness as e2e-staging-canvas — back- # to-back staging pushes share refs/heads/staging, so the older push's # queued run gets cancelled when a newer push lands. Auto-promote- # staging then sees `completed/cancelled` for the older SHA and stays # put; the newer SHA's gates may eventually save the day, but if the # newer push gets cancelled too, we deadlock. # # See e2e-staging-canvas.yml's identical concurrency block for the full # rationale and the 2026-04-28 incident reference. group: e2e-api-${{ github.event.pull_request.head.sha || github.sha }} cancel-in-progress: false jobs: detect-changes: runs-on: ubuntu-latest outputs: api: ${{ steps.decide.outputs.api }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1 id: filter with: filters: | api: - 'workspace-server/**' - 'tests/e2e/**' - '.github/workflows/e2e-api.yml' - id: decide # Always run real work for manual dispatch — no diff context to # filter against and ops dispatching this expects the suite to # actually exercise the platform. run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then echo "api=true" >> "$GITHUB_OUTPUT" else echo "api=${{ steps.filter.outputs.api }}" >> "$GITHUB_OUTPUT" fi # Same `name:` as the real job below so the check-run produced by the # no-op path is indistinguishable from the real one for branch # protection purposes. Without this, the real job was always skipped on # paths-filtered commits → branch protection on `main` saw "E2E API # Smoke Test" as a missing required check → auto-promote-staging's # `git push origin main` got rejected with GH006. Observed 2026-04-28 # 00:22 UTC blocking the staging→main promote despite all gates # actually passing at the workflow level. no-op: needs: detect-changes if: needs.detect-changes.outputs.api != 'true' name: E2E API Smoke Test runs-on: ubuntu-latest steps: - run: | echo "No workspace-server / tests/e2e / workflow changes — E2E API gate satisfied without running tests." echo "::notice::E2E API Smoke Test no-op pass (paths filter excluded this commit)." e2e-api: needs: detect-changes if: needs.detect-changes.outputs.api == 'true' name: E2E API Smoke Test runs-on: ubuntu-latest timeout-minutes: 15 env: DATABASE_URL: postgres://dev:dev@localhost:15432/molecule?sslmode=disable REDIS_URL: redis://localhost:16379 PORT: "8080" PG_CONTAINER: molecule-ci-postgres REDIS_CONTAINER: molecule-ci-redis steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5 with: go-version: 'stable' cache: true cache-dependency-path: workspace-server/go.sum - name: Start Postgres (docker) run: | docker rm -f "$PG_CONTAINER" 2>/dev/null || true docker run -d --name "$PG_CONTAINER" -e POSTGRES_USER=dev -e POSTGRES_PASSWORD=dev -e POSTGRES_DB=molecule -p 15432:5432 postgres:16 for i in $(seq 1 30); do if docker exec "$PG_CONTAINER" pg_isready -U dev >/dev/null 2>&1; then echo "Postgres ready after ${i}s" exit 0 fi sleep 1 done echo "::error::Postgres did not become ready in 30s" docker logs "$PG_CONTAINER" || true exit 1 - name: Start Redis (docker) run: | docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true docker run -d --name "$REDIS_CONTAINER" -p 16379:6379 redis:7 for i in $(seq 1 15); do if docker exec "$REDIS_CONTAINER" redis-cli ping 2>/dev/null | grep -q PONG; then echo "Redis ready after ${i}s" exit 0 fi sleep 1 done echo "::error::Redis did not become ready in 15s" docker logs "$REDIS_CONTAINER" || true exit 1 - name: Build platform working-directory: workspace-server run: go build -o platform-server ./cmd/server - name: Start platform (background) working-directory: workspace-server run: | ./platform-server > platform.log 2>&1 & echo $! > platform.pid - name: Wait for /health run: | for i in $(seq 1 30); do if curl -sf http://localhost:8080/health > /dev/null; then echo "Platform up after ${i}s" exit 0 fi sleep 1 done echo "::error::Platform did not become healthy in 30s" cat workspace-server/platform.log || true exit 1 - name: Assert migrations applied run: | tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc "SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'") if [ "$tables" != "1" ]; then echo "::error::Migrations did not apply" cat workspace-server/platform.log || true exit 1 fi echo "Migrations OK" - name: Run E2E API tests run: bash tests/e2e/test_api.sh - name: Run notify-with-attachments E2E run: bash tests/e2e/test_notify_attachments_e2e.sh - name: Run priority-runtimes E2E (claude-code + hermes — skips when keys absent) # Validates the test script itself runs cleanly even with no LLM # keys (both phases skip gracefully). The wire-real coverage with # actual keys runs in canary-staging.yml + e2e-staging-saas.yml. run: bash tests/e2e/test_priority_runtimes_e2e.sh - name: Dump platform log on failure if: failure() run: cat workspace-server/platform.log || true - name: Stop platform if: always() run: | if [ -f workspace-server/platform.pid ]; then kill "$(cat workspace-server/platform.pid)" 2>/dev/null || true fi - name: Stop service containers if: always() run: | docker rm -f "$PG_CONTAINER" 2>/dev/null || true docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true