molecule-core/.gitea/workflows/ci.yml

# Ported from .github/workflows/ci.yml on 2026-05-11 per RFC internal#219 §1.
# continue-on-error: true on every job; follow-up PR will flip required after
# surfaced bugs are fixed (per RFC §1 — "surface broken workflows without
# blocking"). The four-surface migration audit
# (feedback_gitea_actions_migration_audit_pattern) was performed against this
# port:
#
#   1. YAML — dropped `merge_group` trigger (no Gitea merge queue); no
#      `workflow_dispatch.inputs` to drop (Gitea 1.22.6 rejects those —
#      feedback_gitea_workflow_dispatch_inputs_unsupported); no `environment:`
#      blocks; kept `runs-on: ubuntu-latest` (Gitea runner pool advertises
#      this label per agent_labels in action_runner table). Workflow-level
#      env.GITHUB_SERVER_URL set as belt-and-suspenders against runner
#      defaults (feedback_act_runner_github_server_url).
#
#   2. Cache — `actions/upload-artifact@v3.2.2` was already pinned to v3 for
#      Gitea act_runner v0.6 compatibility (a comment in the original called
#      this out). v4+ is incompatible with Gitea 1.22.x. No `actions/cache`
#      usage to audit. `actions/setup-python@v6` `cache: pip` is left in
#      place — works against Gitea's built-in cache server when runner.cache
#      is configured (currently is, /opt/molecule/runners/config.yaml).
#
#   3. Token — workflow uses no custom dispatch tokens. The auto-injected
#      `GITHUB_TOKEN` (which Gitea aliases to a runner-scoped token) is
#      sufficient for `actions/checkout` against this same repo.
#
#   4. Docs — no docs/scripts reference github.com URLs that need swapping.
#      The canvas-deploy-reminder step writes a `ghcr.io/...` image
#      reference into the step summary text — that's documentation prose
#      pointing at the ECR-mirrored canvas image and stays unchanged for
#      this port (a separate cleanup if ghcr→ECR sweep is in scope).
#
# Cross-links:
#   - RFC: internal#219 (CI/CD hard-gate hardening)
#   - Reference port style: molecule-controlplane/.gitea/workflows/ci.yml
#   - Bugs that may surface immediately and are tracked separately:
#     internal#214 (Go-side vanity-import / go.sum drift, if any)
#   - Phase 4 (this PR's follow-up): flip `continue-on-error: false` once
#     surfaced defects are fixed, then add `all-required` aggregator
#     sentinel (RFC §2) and PATCH branch protection (Phase 4 scope).

name: CI

on:
  push:
    branches: [main, staging]
  pull_request:
    branches: [main, staging]
  # `merge_group` (GitHub merge-queue trigger) dropped — Gitea has no merge
  # queue. The .github/ original retains it; this Gitea-side copy drops it.

# Cancel in-progress CI runs when a new commit arrives on the same ref.
# Stale runs queue up otherwise. PR refs and main/staging refs each get
# their own group because github.ref differs.
concurrency:
  group: ci-${{ github.ref }}
  cancel-in-progress: true

env:
  # Belt-and-suspenders against the runner-default trap
  # (feedback_act_runner_github_server_url). Runners are configured with
  # this env via /opt/molecule/runners/config.yaml runner.envs, but pinning
  # at the workflow level protects against a runner regenerated without
  # the config file (feedback_act_runner_needs_config_file_env).
  GITHUB_SERVER_URL: https://git.moleculesai.app

jobs:
  # Detect which paths changed so downstream jobs can skip when only
  # docs/markdown files were modified.
  changes:
    name: Detect changes
    runs-on: ubuntu-latest
    # Phase 4 (RFC #219 §1): all required jobs >=98% green on main.
    # Flip confirmed 2026-05-12 via combined-status check of latest main
    # commit (all CI jobs green). `all-required` sentinel hard-fails
    # when this job fails; no Phase 3 suppression needed.
    # revert: add `continue-on-error: true` back if regressions appear.
    continue-on-error: false
    outputs:
      platform: ${{ steps.check.outputs.platform }}
      canvas: ${{ steps.check.outputs.canvas }}
      python: ${{ steps.check.outputs.python }}
      scripts: ${{ steps.check.outputs.scripts }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0
      - id: check
        env:
          PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
          PR_BASE_REF: ${{ github.event.pull_request.base.ref }}
          PUSH_BEFORE: ${{ github.event.before }}
        run: |
          python3 .gitea/scripts/detect-changes.py \
            --profile ci \
            --event-name "${{ github.event_name }}" \
            --pr-base-sha "$PR_BASE_SHA" \
            --base-ref "$PR_BASE_REF" \
            --push-before "${GITHUB_EVENT_BEFORE:-$PUSH_BEFORE}"

  # Platform (Go) — Go build/vet/test/lint + coverage gates. The job always
  # emits the required context, but expensive steps are path-scoped on every
  # event so docs/E2E/Canvas-only main pushes do not block deploy on unrelated
  # Go bootstrap work.
  platform-build:
    name: Platform (Go)
    needs: changes
    runs-on: ubuntu-latest
    # mc#774 (closed 2026-05-14): Phase 4 flip of the platform-build job.
    # Phase 4 (#656) originally flipped this to continue-on-error: false based on
    # Phase-3-masked "green on main 2026-05-12". Two failure classes then surfaced:
    #   (1) 4x delegation_test.go sqlmock gaps (PR #669 / #634 fix-forward, closed).
    #   (2) TestMCPHandler_CommitMemory_GlobalScope_Blocked (mcp_test.go:433):
    #       OFFSEC-001 hardening collided with test assertion; tracked in mc#762.
    # Fix-forward for (1) landed in PR #669. The mc#762 gap (2) is a separate
    # issue — it does NOT block this flip because the test is already wrapped in
    # the diagnostic step with its own continue-on-error: true (line 203).
    # Flip confirmed by CI / Platform (Go) status = success on main HEAD 363905d3.
    continue-on-error: false
    # Job-level ceiling. The go test step below runs with a per-step 10m timeout;
    # this cap catches any step that leaks past that. Set well above 10m so
    # the per-step timeout is the active constraint.
    timeout-minutes: 15
    defaults:
      run:
        working-directory: workspace-server
    steps:
      - if: ${{ needs.changes.outputs.platform != 'true' }}
        working-directory: .
        run: echo "No workspace-server/** changes — Platform (Go) gate satisfied without running Go build/test/lint."
      - if: ${{ needs.changes.outputs.platform == 'true' }}
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - if: ${{ needs.changes.outputs.platform == 'true' }}
        uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
        with:
          go-version: 'stable'
      - if: ${{ needs.changes.outputs.platform == 'true' }}
        run: go mod download
      - if: ${{ needs.changes.outputs.platform == 'true' }}
        run: go build ./cmd/server
      # CLI (molecli) moved to standalone repo: git.moleculesai.app/molecule-ai/molecule-cli
      - if: ${{ needs.changes.outputs.platform == 'true' }}
        run: go vet ./...
      - if: ${{ needs.changes.outputs.platform == 'true' }}
        name: Install golangci-lint
        run: go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.12.2
      - if: ${{ needs.changes.outputs.platform == 'true' }}
        name: Run golangci-lint
        run: $(go env GOPATH)/bin/golangci-lint run --timeout 3m ./...
      - if: ${{ needs.changes.outputs.platform == 'true' }}
        name: Diagnostic — per-package verbose 60s
        run: |
          set +e
          go test -race -v -timeout 60s ./internal/handlers/... 2>&1 | tee /tmp/test-handlers.log
          handlers_exit=$?
          go test -race -v -timeout 60s ./internal/pendinguploads/... 2>&1 | tee /tmp/test-pu.log
          pu_exit=$?
          echo "::group::handlers exit=$handlers_exit (last 100 lines)"
          tail -100 /tmp/test-handlers.log
          echo "::endgroup::"
          echo "::group::pendinguploads exit=$pu_exit (last 100 lines)"
          tail -100 /tmp/test-pu.log
          echo "::endgroup::"
        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true
      - if: ${{ needs.changes.outputs.platform == 'true' }}
        name: Run tests with race detection and coverage
        # Explicit timeout: cold runner cache causes OOM kills at ~4m39s on the
        # full ./... suite with race detection + coverage. A 10m per-step timeout
        # lets the suite complete on cold cache (~5-7m) while failing cleanly
        # instead of OOM-killing. The job-level timeout (15m) is a backstop.
        run: go test -race -timeout 10m -coverprofile=coverage.out ./...

      - if: ${{ needs.changes.outputs.platform == 'true' }}
        name: Per-file coverage report
        # Advisory — lists every source file with its coverage so reviewers
        # can see at-a-glance where gaps are. Sorted ascending so the worst
        # offenders float to the top. Does NOT fail the build; the hard
        # gate is the threshold check below. (#1823)
        run: |
          echo "=== Per-file coverage (worst first) ==="
          go tool cover -func=coverage.out \
            | grep -v '^total:' \
            | awk '{file=$1; sub(/:[0-9][0-9.]*:.*/, "", file); pct=$NF; gsub(/%/,"",pct); s[file]+=pct; c[file]++}
                   END {for (f in s) printf "%6.1f%%  %s\n", s[f]/c[f], f}' \
            | sort -n

      - if: ${{ needs.changes.outputs.platform == 'true' }}
        name: Check coverage thresholds
        # Enforces two gates from #1823 Layer 1:
        #   1. Total floor (25% — ratchet plan in COVERAGE_FLOOR.md).
        #   2. Per-file floor — non-test .go files in security-critical
        #      paths with coverage <10% fail the build, UNLESS the file
        #      path is listed in .coverage-allowlist.txt (acknowledged
        #      historical debt with a tracking issue + expiry).
        run: |
          set -e
          TOTAL_FLOOR=25
          # Security-critical paths where a 0%-coverage file is a real risk.
          CRITICAL_PATHS=(
            "internal/handlers/tokens"
            "internal/handlers/workspace_provision"
            "internal/handlers/a2a_proxy"
            "internal/handlers/registry"
            "internal/handlers/secrets"
            "internal/middleware/wsauth"
            "internal/crypto"
          )

          TOTAL=$(go tool cover -func=coverage.out | grep '^total:' | awk '{print $3}' | sed 's/%//')
          echo "Total coverage: ${TOTAL}%"
          if awk "BEGIN{exit !($TOTAL < $TOTAL_FLOOR)}"; then
            echo "::error::Total coverage ${TOTAL}% is below the ${TOTAL_FLOOR}% floor. See COVERAGE_FLOOR.md for ratchet plan."
            exit 1
          fi

          # Aggregate per-file coverage → /tmp/perfile.txt: "<fullpath> <pct>"
          go tool cover -func=coverage.out \
            | grep -v '^total:' \
            | awk '{file=$1; sub(/:[0-9][0-9.]*:.*/, "", file); pct=$NF; gsub(/%/,"",pct); s[file]+=pct; c[file]++}
                   END {for (f in s) printf "%s %.1f\n", f, s[f]/c[f]}' \
            > /tmp/perfile.txt

          # Build allowlist — paths relative to workspace-server, one per line.
          # Lines starting with # are comments.
          ALLOWLIST=""
          if [ -f ../.coverage-allowlist.txt ]; then
            ALLOWLIST=$(grep -vE '^(#|[[:space:]]*$)' ../.coverage-allowlist.txt || true)
          fi

          FAILED=0
          WARNED=0
          for path in "${CRITICAL_PATHS[@]}"; do
            while read -r file pct; do
              [[ "$file" == *_test.go ]] && continue
              [[ "$file" == *"$path"* ]] || continue
              awk "BEGIN{exit !($pct < 10)}" || continue

              # Strip the package-import prefix so we can match .coverage-allowlist.txt
              # entries written as paths relative to workspace-server/.
              # Handle both module paths: platform/workspace-server/... and platform/...
              rel=$(echo "$file" | sed 's|^github.com/molecule-ai/molecule-monorepo/platform/workspace-server/||; s|^github.com/molecule-ai/molecule-monorepo/platform/||')

              if echo "$ALLOWLIST" | grep -qxF "$rel"; then
                echo "::warning file=workspace-server/$rel::Critical file at ${pct}% coverage (allowlisted, #1823) — fix before expiry."
                WARNED=$((WARNED+1))
              else
                echo "::error file=workspace-server/$rel::Critical file at ${pct}% coverage — must be >=10% (target 80%). See #1823. To acknowledge as known debt, add this path to .coverage-allowlist.txt."
                FAILED=$((FAILED+1))
              fi
            done < /tmp/perfile.txt
          done

          echo ""
          echo "Critical-path check: $FAILED new failures, $WARNED allowlisted warnings."

          if [ "$FAILED" -gt 0 ]; then
            echo ""
            echo "$FAILED security-critical file(s) have <10% test coverage and are"
            echo "NOT in the allowlist. These paths handle auth, tokens, secrets, or"
            echo "workspace provisioning — a 0% file here is the exact gap that let"
            echo "CWE-22, CWE-78, KI-005 slip through in past incidents. Either:"
            echo "  (a) add tests to raise coverage above 10%, or"
            echo "  (b) add the path to .coverage-allowlist.txt with an expiry date"
            echo "      and a tracking issue reference."
            exit 1
          fi

  # Canvas (Next.js) — required check, always runs. Same always-run +
  # per-step gating shape as platform-build. The two-job-sharing-name
  # pattern attempted in PR #2321 doesn't satisfy branch protection
  # (SKIPPED siblings count as not-passed regardless of SUCCESS
  # siblings — verified empirically on PR #2314).
  canvas-build:
    name: Canvas (Next.js)
    needs: changes
    runs-on: ubuntu-latest
    timeout-minutes: 20
    # Phase 4 (RFC #219 §1): confirmed green on main 2026-05-12.
    continue-on-error: false
    defaults:
      run:
        working-directory: canvas
    steps:
      - if: ${{ needs.changes.outputs.canvas != 'true' }}
        working-directory: .
        run: echo "No canvas/** changes — Canvas (Next.js) gate satisfied without running npm build/test."
      - if: ${{ needs.changes.outputs.canvas == 'true' }}
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - if: ${{ needs.changes.outputs.canvas == 'true' }}
        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
        with:
          node-version: '22'
      - if: ${{ needs.changes.outputs.canvas == 'true' }}
        run: npm ci --include=optional --prefer-offline
      - if: ${{ needs.changes.outputs.canvas == 'true' }}
        run: npm run build
      - if: ${{ needs.changes.outputs.canvas == 'true' }}
        name: Run tests with coverage
        # Coverage instrumentation is configured in canvas/vitest.config.ts
        # (provider: v8, reporters: text + html + json-summary). Step 2 of
        # #1815 — wires coverage into CI so we get a baseline visible on
        # every PR. No threshold gate yet; thresholds dial in (Step 3, also
        # tracked in #1815) after the team sees what current coverage is.
        run: npx vitest run --coverage
      - name: Upload coverage summary as artifact
        if: ${{ needs.changes.outputs.canvas == 'true' }}
        # Pinned to v3 for Gitea act_runner v0.6 compatibility — v4+ uses
        # the GHES 3.10+ artifact protocol that Gitea 1.22.x does NOT
        # implement, surfacing as `GHESNotSupportedError: @actions/artifact
        # v2.0.0+, upload-artifact@v4+ and download-artifact@v4+ are not
        # currently supported on GHES`. Drop this pin when Gitea ships
        # the v4 protocol (tracked: post-Gitea-1.23 followup).
        uses: actions/upload-artifact@c6a366c94c3e0affe28c06c8df20a878f24da3cf # v3.2.2
        with:
          name: canvas-coverage-${{ github.run_id }}
          path: canvas/coverage/
          retention-days: 7
          if-no-files-found: warn

  # Shellcheck (E2E scripts) — required context, path-scoped heavy steps.
  shellcheck:
    name: Shellcheck (E2E scripts)
    needs: changes
    runs-on: ubuntu-latest
    # Phase 4 (RFC #219 §1): confirmed green on main 2026-05-12.
    continue-on-error: false
    steps:
      - if: ${{ needs.changes.outputs.scripts != 'true' }}
        run: echo "No tests/e2e, scripts, or infra/scripts changes — Shellcheck gate satisfied without running script checks."
      - if: ${{ needs.changes.outputs.scripts == 'true' }}
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - if: ${{ needs.changes.outputs.scripts == 'true' }}
        name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh
        # shellcheck is pre-installed on ubuntu-latest runners (via apt).
        # infra/scripts/ is included because setup.sh + nuke.sh gate the
        # README quickstart — a shellcheck regression there silently breaks
        # new-user onboarding. scripts/ is intentionally excluded until its
        # pre-existing SC3040/SC3043 warnings are cleaned up.
        run: |
          find tests/e2e infra/scripts -type f -name '*.sh' -print0 \
            | xargs -0 shellcheck --severity=warning

      - if: ${{ needs.changes.outputs.scripts == 'true' }}
        name: Lint cleanup-trap hygiene (RFC #2873)
        run: bash tests/e2e/lint_cleanup_traps.sh

      - if: ${{ needs.changes.outputs.scripts == 'true' }}
        name: Run E2E bash unit tests (no live infra)
        run: |
          bash tests/e2e/test_model_slug.sh

      - if: ${{ needs.changes.outputs.scripts == 'true' }}
        name: Test ECR promote-tenant-image script (mock-driven, no live infra)
        # Covers scripts/promote-tenant-image.sh — the codified
        # :staging-latest → :latest ECR promote + tenant fleet redeploy
        # closing molecule-ai/molecule-core#660. 40 mock-driven cases
        # exercise every exit path (preflight, snapshot, promote, redeploy
        # 403→SSM-refresh, verify, rollback). No live AWS/CP/SSM calls.
        run: |
          bash scripts/test-promote-tenant-image.sh

      - if: ${{ needs.changes.outputs.scripts == 'true' }}
        name: Shellcheck promote-tenant-image script
        # scripts/ is excluded from the bulk shellcheck pass above (legacy
        # SC3040/SC3043 cleanup pending). Run shellcheck explicitly on
        # the promote script + its test harness so regressions there are
        # caught by the required check.
        run: |
          shellcheck --severity=warning \
            scripts/promote-tenant-image.sh \
            scripts/test-promote-tenant-image.sh

  # mc#959 root-fix (sre)

  canvas-deploy-reminder:
    name: Canvas Deploy Reminder
    runs-on: docker-host
    # mc#774 root-fix: added job-level `if:` so ci-required-drift.py's
    # ci_job_names() detects this as github.ref-gated and skips it from F1.
    # The step-level exit 0 handles the "not main push" case; the job-level
    # `if:` makes the gating explicit so the drift script sees it.
    # Runs on both main and staging pushes; step exits 0 when not applicable.
    if: ${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging' }}
    needs: [changes, canvas-build]
    steps:
      - name: Write deploy reminder to step summary
        env:
          COMMIT_SHA: ${{ github.sha }}
          CANVAS_CHANGED: ${{ needs.changes.outputs.canvas }}
          EVENT_NAME: ${{ github.event_name }}
          REF_NAME: ${{ github.ref }}
          # github.server_url resolves via the workflow-level env override
          # to the Gitea instance, so the RUN_URL points at the Gitea run
          # page (not github.com). See feedback_act_runner_github_server_url.
          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
        run: |
          set -euo pipefail
          if [ "$CANVAS_CHANGED" != "true" ] || [ "$EVENT_NAME" != "push" ] || [ "$REF_NAME" != "refs/heads/main" ]; then
            echo "Canvas deploy reminder not applicable for event=$EVENT_NAME ref=$REF_NAME canvas_changed=$CANVAS_CHANGED."
            exit 0
          fi

          # Write body to a temp file — avoids backtick escaping in shell.
          cat > /tmp/deploy-reminder.md << 'BODY'
          ## Canvas build passed — deploy required

          The `publish-canvas-image` workflow is now building a fresh Docker image
          (`ghcr.io/molecule-ai/canvas:latest`) in the background.

          Once it completes (~3–5 min), apply on the host machine with:
          ```bash
          cd <runner-workspace>
          git pull origin main
          docker compose pull canvas && docker compose up -d canvas
          ```

          If you need to rebuild from local source instead (e.g. testing unreleased
          changes or a new `NEXT_PUBLIC_*` URL), use:
          ```bash
          docker compose build canvas && docker compose up -d canvas
          ```
          BODY
          printf '\n> Posted automatically by CI · commit `%s` · [build log](%s)\n' \
            "$COMMIT_SHA" "$RUN_URL" >> /tmp/deploy-reminder.md

          # Gitea has no commit-comments API; write to GITHUB_STEP_SUMMARY,
          # which both GitHub Actions and Gitea Actions render as the
          # workflow run's summary page. (#75 / PR-D)
          cat /tmp/deploy-reminder.md >> "$GITHUB_STEP_SUMMARY"

  # Python Lint & Test — required check, always runs.
  # Runtime Python moved to molecule-ai-workspace-runtime. Keep this context as
  # a guard so branch protection still catches attempts to reintroduce an
  # editable runtime copy under molecule-core/workspace/.
  python-lint:
    name: Python Lint & Test
    runs-on: ubuntu-latest
    continue-on-error: false
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - name: Runtime SSOT guard
        run: |
          set -eu
          if [ -d workspace ]; then
            echo "::error file=workspace::Runtime source must live in molecule-ai-workspace-runtime, not molecule-core/workspace."
            exit 1
          fi
          for f in scripts/build_runtime_package.py scripts/test_build_runtime_package.py; do
            if [ -e "$f" ]; then
              echo "::error file=$f::Legacy build-from-workspace packaging script must not be restored."
              exit 1
            fi
          done
          echo "Runtime SSOT guard passed; core consumes the standalone runtime package."

  all-required:
    # Aggregator sentinel — RFC internal#219 §2 (Phase 4 — closes internal#286).
    #
    # Emits `CI / all-required (<event>)` where <event> is the workflow trigger
    # (e.g. `CI / all-required (pull_request)`, `CI / all-required (push)`).
    # Branch protection MUST be updated to require the event-suffixed name —
    # requiring `CI / all-required` (bare, no suffix) silently blocks all merges
    # because Gitea treats absent status contexts as pending (not skipped), and
    # no workflow emits the bare name. Fixed: BP now requires
    # `CI / all-required (pull_request)` per issue #1473.
    #
    # Closes the failure mode where status_check_contexts on molecule-core/main
    # only listed `Secret scan` + `sop-tier-check` (the 2 meta-gates), so real
    # `Platform (Go)` / `Canvas (Next.js)` / `Python Lint & Test` / `Shellcheck`
    # red silently merged through. See internal#286 for the three concrete
    # tonight-of-2026-05-11 incidents that prompted the emergency bump.
    #
    # This job deliberately has no `needs:`. Gitea 1.22/act_runner can mark a
    # job-level `if: always()` + `needs:` sentinel as skipped before upstream
    # jobs settle, leaving branch protection with a permanent pending
    # `CI / all-required` context. Instead, this independent sentinel polls the
    # required commit-status contexts for this SHA and fails if any fail, skip,
    # or never emit.
    #
    # canvas-deploy-reminder is intentionally NOT included in all-required.needs.
    # It is an informational main-push reminder, not a PR quality gate. Keeping
    # it in this dependency list lets a skipped reminder skip the required
    # sentinel before the `always()` guard can emit a branch-protection status.
    #
    continue-on-error: false
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
      - name: Wait for required CI contexts
        env:
          GITEA_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          API_ROOT: ${{ github.server_url }}/api/v1
          REPOSITORY: ${{ github.repository }}
          COMMIT_SHA: ${{ github.sha }}
          EVENT_NAME: ${{ github.event_name }}
        run: |
          set -euo pipefail
          python3 - <<'PY'
          import json
          import os
          import sys
          import time
          import urllib.error
          import urllib.request

          token = os.environ["GITEA_TOKEN"]
          api_root = os.environ["API_ROOT"].rstrip("/")
          repo = os.environ["REPOSITORY"]
          sha = os.environ["COMMIT_SHA"]
          event = os.environ["EVENT_NAME"]
          required = [
              f"CI / Detect changes ({event})",
              f"CI / Platform (Go) ({event})",
              f"CI / Canvas (Next.js) ({event})",
              f"CI / Shellcheck (E2E scripts) ({event})",
              f"CI / Python Lint & Test ({event})",
          ]
          terminal_bad = {"failure", "error"}
          deadline = time.time() + 40 * 60
          last_summary = None

          def fetch_statuses():
              statuses = []
              for page in range(1, 6):
                  url = f"{api_root}/repos/{repo}/commits/{sha}/statuses?page={page}&limit=100"
                  req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
                  with urllib.request.urlopen(req, timeout=10) as resp:
                      chunk = json.load(resp)
                  if not chunk:
                      break
                  statuses.extend(chunk)
              latest = {}
              for item in statuses:
                  ctx = item.get("context")
                  if not ctx:
                      continue
                  prev = latest.get(ctx)
                  if prev is None or (item.get("updated_at") or item.get("created_at") or "") >= (prev.get("updated_at") or prev.get("created_at") or ""):
                      latest[ctx] = item
              return latest

          while True:
              try:
                  latest = fetch_statuses()
              except (TimeoutError, OSError, urllib.error.URLError) as exc:
                  if time.time() >= deadline:
                      print(f"FAIL: status polling did not recover before deadline: {exc}", file=sys.stderr)
                      sys.exit(1)
                  print(f"WARN: status poll failed, retrying: {exc}", flush=True)
                  time.sleep(15)
                  continue
              states = {ctx: (latest.get(ctx) or {}).get("status") or (latest.get(ctx) or {}).get("state") or "missing" for ctx in required}
              summary = ", ".join(f"{ctx}={state}" for ctx, state in states.items())
              if summary != last_summary:
                  print(summary, flush=True)
                  last_summary = summary
              bad = {ctx: state for ctx, state in states.items() if state in terminal_bad}
              if bad:
                  print("FAIL: required CI context failed:", file=sys.stderr)
                  for ctx, state in bad.items():
                      desc = (latest.get(ctx) or {}).get("description") or ""
                      print(f"  - {ctx}: {state} {desc}", file=sys.stderr)
                  sys.exit(1)
              if all(state == "success" for state in states.values()):
                  print(f"OK: all {len(required)} required CI contexts succeeded")
                  sys.exit(0)
              if time.time() >= deadline:
                  print("FAIL: timed out waiting for required CI contexts:", file=sys.stderr)
                  for ctx, state in states.items():
                      print(f"  - {ctx}: {state}", file=sys.stderr)
                  sys.exit(1)
              time.sleep(15)
          PY