From 334b748492aa00d0551dbbd37481804a3b12ee3f Mon Sep 17 00:00:00 2001 From: hongming-codex-laptop Date: Wed, 13 May 2026 00:23:16 -0700 Subject: [PATCH] fix(ci): harden Cloudflare sweep API errors --- .gitea/workflows/sweep-aws-secrets.yml | 14 ++++++++----- scripts/ops/sweep-cf-orphans.sh | 27 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/.gitea/workflows/sweep-aws-secrets.yml b/.gitea/workflows/sweep-aws-secrets.yml index b7c81c6c..02633ea3 100644 --- a/.gitea/workflows/sweep-aws-secrets.yml +++ b/.gitea/workflows/sweep-aws-secrets.yml @@ -40,11 +40,15 @@ name: Sweep stale AWS Secrets Manager secrets # the mostly-orphan tunnels) refuses to nuke past the threshold. on: - schedule: - # Hourly at :30 — offsets from sweep-cf-orphans (:15) and - # sweep-cf-tunnels (:45) so the three janitors don't burst the - # CP admin endpoints at the same minute. - - cron: '30 * * * *' + # Disabled as an hourly schedule until the dedicated + # AWS_SECRETS_JANITOR_* key exists in the key-management SSOT and is + # mirrored into Gitea. Falling back to the molecule-cp app principal is + # intentionally not allowed: it lacks account-wide ListSecrets, and + # granting that to an application credential would weaken least privilege. + # + # Keep the manual trigger so operators can validate the workflow immediately + # after provisioning the janitor key, then restore the hourly :30 schedule. + workflow_dispatch: # Don't let two sweeps race the same AWS account. concurrency: group: sweep-aws-secrets diff --git a/scripts/ops/sweep-cf-orphans.sh b/scripts/ops/sweep-cf-orphans.sh index 8a4da90c..b96d7bfb 100755 --- a/scripts/ops/sweep-cf-orphans.sh +++ b/scripts/ops/sweep-cf-orphans.sh @@ -97,6 +97,33 @@ log " live EC2s: $(echo "$EC2_NAMES" | wc -w | tr -d ' ')" log "Fetching Cloudflare DNS records..." CF_JSON=$(curl -sS -m 15 -H "Authorization: Bearer $CF_API_TOKEN" \ "https://api.cloudflare.com/client/v4/zones/$CF_ZONE_ID/dns_records?per_page=500") +if ! echo "$CF_JSON" | python3 -c ' +import json, sys + +try: + payload = json.load(sys.stdin) +except Exception as exc: + print(f"ERROR: Cloudflare returned non-JSON response: {exc}", file=sys.stderr) + raise SystemExit(1) + +if not payload.get("success", False) or not isinstance(payload.get("result"), list): + errors = payload.get("errors") or [] + if errors: + detail = "; ".join( + "{code}: {message}".format( + code=err.get("code", "unknown"), + message=err.get("message", "unknown error"), + ) + for err in errors + ) + else: + detail = "unexpected result type {}".format(type(payload.get("result")).__name__) + print(f"ERROR: Cloudflare DNS list failed: {detail}", file=sys.stderr) + raise SystemExit(1) +'; then + log "Cloudflare DNS list failed; verify CF_API_TOKEN has Zone:DNS:Edit and CF_ZONE_ID is the moleculesai.app zone." + exit 1 +fi TOTAL_CF=$(echo "$CF_JSON" | python3 -c "import json,sys; print(len(json.load(sys.stdin)['result']))") log " CF records: $TOTAL_CF" -- 2.45.2