name: Sweep stale Cloudflare Tunnels # Janitor for Cloudflare Tunnels whose backing tenant no longer # exists. Parallel-shape to sweep-cf-orphans.yml (which sweeps DNS # records); same justification, different CF resource. # # Why this exists separately from sweep-cf-orphans: # - DNS records live on the zone (`/zones//dns_records`). # - Tunnels live on the account (`/accounts//cfd_tunnel`). # - Different CF API surface, different scopes; the existing CF # token might not have `account:cloudflare_tunnel:edit`. Splitting # the workflows keeps each one's secret-presence gate independent # so neither silent-skips when the other's secret is missing. # - Cleaner blast radius — operators can disable one without the # other if a regression surfaces. # # Safety: the script's MAX_DELETE_PCT gate (default 90% — higher than # the DNS sweep's 50% because tenant-shaped tunnels are mostly # orphans by design) refuses to nuke past the threshold. on: schedule: # Hourly at :45 — offset from sweep-cf-orphans (:15) so the two # janitors don't issue parallel CF API bursts at the same minute. - cron: '45 * * * *' workflow_dispatch: inputs: dry_run: description: "Dry run only — list what would be deleted, no deletion" required: false type: boolean default: true max_delete_pct: description: "Override safety gate (default 90, set higher only for major cleanup)" required: false default: "90" # Don't let two sweeps race the same account. concurrency: group: sweep-cf-tunnels cancel-in-progress: false permissions: contents: read jobs: sweep: name: Sweep CF tunnels runs-on: ubuntu-latest # 30 min cap. Was 5 min on the theory that the only thing that # could take >5min is a CF-API hang — but on 2026-05-02 a backlog # of 672 stale tunnels accumulated (large staging E2E run + delayed # sweep) and the serial `curl -X DELETE` loop (~0.7s/tunnel) needed # ~7-8min to drain. The 5-min cap killed the run mid-sweep # (cancelled at 424/672, see run 25248788312); a manual rerun # finished the remainder fine. # # The fix is two-part: parallelize the delete loop (8-way xargs in # the script — see scripts/ops/sweep-cf-tunnels.sh), AND raise the # cap so a one-off backlog doesn't trip a hangs-detector that # turned out to be a real-job-too-slow detector. With 8-way # parallelism, 600+ tunnels drains in ~60s; 30 min is generous # headroom for actual hangs to still surface (and is in line with # the sweep-cf-orphans companion job). timeout-minutes: 30 env: CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }} CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }} CP_PROD_ADMIN_TOKEN: ${{ secrets.CP_PROD_ADMIN_TOKEN }} CP_STAGING_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_TOKEN }} MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '90' }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Verify required secrets present id: verify # Schedule-vs-dispatch behaviour split mirrors sweep-cf-orphans # (hardened 2026-04-28 after the silent-no-op incident: the # janitor reported green while doing nothing because secrets # were unset, masking a 152/200 zone-record leak). Same # principle applies here: # - schedule → exit 1 on missing secrets (red CI surfaces it) # - workflow_dispatch → exit 0 with warning (operator-driven, # they already accepted the repo state) run: | missing=() for var in CF_API_TOKEN CF_ACCOUNT_ID CP_PROD_ADMIN_TOKEN CP_STAGING_ADMIN_TOKEN; do if [ -z "${!var:-}" ]; then missing+=("$var") fi done if [ ${#missing[@]} -gt 0 ]; then if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then echo "::warning::skipping sweep — secrets not configured: ${missing[*]}" echo "::warning::set them at Settings → Secrets and Variables → Actions, then rerun." echo "::warning::CF_API_TOKEN must include account:cloudflare_tunnel:edit scope (separate from the zone:dns:edit scope used by sweep-cf-orphans)." echo "skip=true" >> "$GITHUB_OUTPUT" exit 0 fi echo "::error::sweep cannot run — required secrets missing: ${missing[*]}" echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow." echo "::error::CF_API_TOKEN must include account:cloudflare_tunnel:edit scope." exit 1 fi echo "All required secrets present ✓" echo "skip=false" >> "$GITHUB_OUTPUT" - name: Run sweep if: steps.verify.outputs.skip != 'true' # Schedule-vs-dispatch dry-run asymmetry mirrors sweep-cf-orphans: # - Scheduled: input empty → "false" → --execute (the whole # point of an hourly janitor). # - Manual workflow_dispatch: input default true → dry-run; # operator must flip it to actually delete. run: | set -euo pipefail if [ "${{ github.event.inputs.dry_run || 'false' }}" = "true" ]; then echo "Running in dry-run mode — no deletions" bash scripts/ops/sweep-cf-tunnels.sh else echo "Running with --execute — will delete identified orphans" bash scripts/ops/sweep-cf-tunnels.sh --execute fi