Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 6. - [Release notes](https://github.com/actions/checkout/releases) - [Commits](https://github.com/actions/checkout/compare/v4...v6) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com>
125 lines
5.5 KiB
YAML
125 lines
5.5 KiB
YAML
name: Sweep stale Cloudflare Tunnels
|
|
|
|
# Janitor for Cloudflare Tunnels whose backing tenant no longer
|
|
# exists. Parallel-shape to sweep-cf-orphans.yml (which sweeps DNS
|
|
# records); same justification, different CF resource.
|
|
#
|
|
# Why this exists separately from sweep-cf-orphans:
|
|
# - DNS records live on the zone (`/zones/<id>/dns_records`).
|
|
# - Tunnels live on the account (`/accounts/<id>/cfd_tunnel`).
|
|
# - Different CF API surface, different scopes; the existing CF
|
|
# token might not have `account:cloudflare_tunnel:edit`. Splitting
|
|
# the workflows keeps each one's secret-presence gate independent
|
|
# so neither silent-skips when the other's secret is missing.
|
|
# - Cleaner blast radius — operators can disable one without the
|
|
# other if a regression surfaces.
|
|
#
|
|
# Safety: the script's MAX_DELETE_PCT gate (default 90% — higher than
|
|
# the DNS sweep's 50% because tenant-shaped tunnels are mostly
|
|
# orphans by design) refuses to nuke past the threshold.
|
|
|
|
on:
|
|
schedule:
|
|
# Hourly at :45 — offset from sweep-cf-orphans (:15) so the two
|
|
# janitors don't issue parallel CF API bursts at the same minute.
|
|
- cron: '45 * * * *'
|
|
workflow_dispatch:
|
|
inputs:
|
|
dry_run:
|
|
description: "Dry run only — list what would be deleted, no deletion"
|
|
required: false
|
|
type: boolean
|
|
default: true
|
|
max_delete_pct:
|
|
description: "Override safety gate (default 90, set higher only for major cleanup)"
|
|
required: false
|
|
default: "90"
|
|
|
|
# Don't let two sweeps race the same account.
|
|
concurrency:
|
|
group: sweep-cf-tunnels
|
|
cancel-in-progress: false
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
jobs:
|
|
sweep:
|
|
name: Sweep CF tunnels
|
|
runs-on: ubuntu-latest
|
|
# 30 min cap. Was 5 min on the theory that the only thing that
|
|
# could take >5min is a CF-API hang — but on 2026-05-02 a backlog
|
|
# of 672 stale tunnels accumulated (large staging E2E run + delayed
|
|
# sweep) and the serial `curl -X DELETE` loop (~0.7s/tunnel) needed
|
|
# ~7-8min to drain. The 5-min cap killed the run mid-sweep
|
|
# (cancelled at 424/672, see run 25248788312); a manual rerun
|
|
# finished the remainder fine.
|
|
#
|
|
# The fix is two-part: parallelize the delete loop (8-way xargs in
|
|
# the script — see scripts/ops/sweep-cf-tunnels.sh), AND raise the
|
|
# cap so a one-off backlog doesn't trip a hangs-detector that
|
|
# turned out to be a real-job-too-slow detector. With 8-way
|
|
# parallelism, 600+ tunnels drains in ~60s; 30 min is generous
|
|
# headroom for actual hangs to still surface (and is in line with
|
|
# the sweep-cf-orphans companion job).
|
|
timeout-minutes: 30
|
|
env:
|
|
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
|
|
CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
|
|
CP_PROD_ADMIN_TOKEN: ${{ secrets.CP_PROD_ADMIN_TOKEN }}
|
|
CP_STAGING_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_TOKEN }}
|
|
MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '90' }}
|
|
|
|
steps:
|
|
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
|
|
|
- name: Verify required secrets present
|
|
id: verify
|
|
# Schedule-vs-dispatch behaviour split mirrors sweep-cf-orphans
|
|
# (hardened 2026-04-28 after the silent-no-op incident: the
|
|
# janitor reported green while doing nothing because secrets
|
|
# were unset, masking a 152/200 zone-record leak). Same
|
|
# principle applies here:
|
|
# - schedule → exit 1 on missing secrets (red CI surfaces it)
|
|
# - workflow_dispatch → exit 0 with warning (operator-driven,
|
|
# they already accepted the repo state)
|
|
run: |
|
|
missing=()
|
|
for var in CF_API_TOKEN CF_ACCOUNT_ID CP_PROD_ADMIN_TOKEN CP_STAGING_ADMIN_TOKEN; do
|
|
if [ -z "${!var:-}" ]; then
|
|
missing+=("$var")
|
|
fi
|
|
done
|
|
if [ ${#missing[@]} -gt 0 ]; then
|
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
|
echo "::warning::skipping sweep — secrets not configured: ${missing[*]}"
|
|
echo "::warning::set them at Settings → Secrets and Variables → Actions, then rerun."
|
|
echo "::warning::CF_API_TOKEN must include account:cloudflare_tunnel:edit scope (separate from the zone:dns:edit scope used by sweep-cf-orphans)."
|
|
echo "skip=true" >> "$GITHUB_OUTPUT"
|
|
exit 0
|
|
fi
|
|
echo "::error::sweep cannot run — required secrets missing: ${missing[*]}"
|
|
echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow."
|
|
echo "::error::CF_API_TOKEN must include account:cloudflare_tunnel:edit scope."
|
|
exit 1
|
|
fi
|
|
echo "All required secrets present ✓"
|
|
echo "skip=false" >> "$GITHUB_OUTPUT"
|
|
|
|
- name: Run sweep
|
|
if: steps.verify.outputs.skip != 'true'
|
|
# Schedule-vs-dispatch dry-run asymmetry mirrors sweep-cf-orphans:
|
|
# - Scheduled: input empty → "false" → --execute (the whole
|
|
# point of an hourly janitor).
|
|
# - Manual workflow_dispatch: input default true → dry-run;
|
|
# operator must flip it to actually delete.
|
|
run: |
|
|
set -euo pipefail
|
|
if [ "${{ github.event.inputs.dry_run || 'false' }}" = "true" ]; then
|
|
echo "Running in dry-run mode — no deletions"
|
|
bash scripts/ops/sweep-cf-tunnels.sh
|
|
else
|
|
echo "Running with --execute — will delete identified orphans"
|
|
bash scripts/ops/sweep-cf-tunnels.sh --execute
|
|
fi
|