fix(sweep-cf-orphans): fail-closed live-org fetch + regression test #3139
@@ -149,18 +149,42 @@ fi
|
||||
log " zone $CF_ZONE_ID reachable ✓"
|
||||
|
||||
|
||||
# Fetch org slugs from a CP admin API endpoint.
|
||||
# Fail-closed: any non-2xx HTTP response, invalid JSON, or missing/invalid
|
||||
# 'orgs' array aborts the sweep with a non-zero exit. This prevents the
|
||||
# safety gate from being the only defense when the CP source of truth is
|
||||
# unreachable or returns an error body.
|
||||
fetch_cp_orgs() {
|
||||
local url="$1" token="$2" label="$3"
|
||||
local resp
|
||||
resp=$(curl -sS -f -m 15 -H "Authorization: Bearer $token" "$url" 2>&1) || {
|
||||
echo "ERROR: $label CP admin API request failed (non-2xx or network error)" >&2
|
||||
echo "$resp" >&2
|
||||
return 1
|
||||
}
|
||||
python3 -c "
|
||||
import json, sys
|
||||
try:
|
||||
d = json.loads(sys.stdin.read())
|
||||
except json.JSONDecodeError as e:
|
||||
print('ERROR: $label CP admin API returned invalid JSON:', e, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
orgs = d.get('orgs')
|
||||
if not isinstance(orgs, list):
|
||||
print('ERROR: $label CP admin API response missing or invalid \"orgs\" array', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print(' '.join(o['slug'] for o in orgs))
|
||||
" <<< "$resp"
|
||||
}
|
||||
|
||||
# --- Gather live sets ------------------------------------------------------
|
||||
|
||||
log "Fetching CP prod org slugs..."
|
||||
PROD_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
"https://api.moleculesai.app/cp/admin/orgs?limit=500" \
|
||||
| python3 -c "import json,sys; print(' '.join(o['slug'] for o in json.load(sys.stdin).get('orgs',[])))")
|
||||
PROD_SLUGS=$(fetch_cp_orgs "https://api.moleculesai.app/cp/admin/orgs?limit=500" "$CP_ADMIN_API_TOKEN" "prod")
|
||||
log " prod orgs: $(echo "$PROD_SLUGS" | wc -w | tr -d ' ')"
|
||||
|
||||
log "Fetching CP staging org slugs..."
|
||||
STAGING_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_STAGING_ADMIN_API_TOKEN" \
|
||||
"https://staging-api.moleculesai.app/cp/admin/orgs?limit=500" \
|
||||
| python3 -c "import json,sys; print(' '.join(o['slug'] for o in json.load(sys.stdin).get('orgs',[])))")
|
||||
STAGING_SLUGS=$(fetch_cp_orgs "https://staging-api.moleculesai.app/cp/admin/orgs?limit=500" "$CP_STAGING_ADMIN_API_TOKEN" "staging")
|
||||
log " staging orgs: $(echo "$STAGING_SLUGS" | wc -w | tr -d ' ')"
|
||||
|
||||
log "Fetching live EC2 Name tags (region=$REGION)..."
|
||||
|
||||
@@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regression test for scripts/ops/sweep-cf-orphans.sh — verifies the
|
||||
# live-org fetch is fail-closed. A non-2xx response, invalid JSON, or a
|
||||
# response missing the 'orgs' array must abort the sweep BEFORE any
|
||||
# Cloudflare DNS records are listed or classified as orphans.
|
||||
set -uo pipefail
|
||||
|
||||
SCRIPT="${SCRIPT:-scripts/ops/sweep-cf-orphans.sh}"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
run_case() {
|
||||
local name="$1" cp_exit="$2" cp_body="$3"
|
||||
local expect_abort="${4:-true}" # true = must stop before AWS/CF boundary
|
||||
local tmp
|
||||
tmp=$(mktemp -d -t cf-orphans-fail-closed-XXXXXX)
|
||||
|
||||
# Generate a URL-aware curl mock. CF token/zone preflight and the CF DNS
|
||||
# list must return valid JSON so the test can prove a bad CP orgs response
|
||||
# aborts at the live-org fetch boundary, not during preflight or after
|
||||
# reaching AWS/CF classification.
|
||||
cat > "$tmp/curl" <<'MOCK'
|
||||
#!/usr/bin/env bash
|
||||
url=""
|
||||
while [ "$#" -gt 0 ]; do
|
||||
case "$1" in
|
||||
https://*) url="$1" ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
case "$url" in
|
||||
*/user/tokens/verify)
|
||||
echo '{"success":true,"result":{"status":"active"}}'
|
||||
exit 0
|
||||
;;
|
||||
*/zones/*/dns_records*)
|
||||
echo '{"success":true,"result":[{"id":"rec1","name":"api.moleculesai.app","type":"A","created_on":"2026-06-20T00:00:00Z"}]}'
|
||||
echo 'reached' > "$CF_SENTINEL"
|
||||
exit 0
|
||||
;;
|
||||
*/zones/*)
|
||||
echo '{"success":true,"result":{"id":"zone"}}'
|
||||
exit 0
|
||||
;;
|
||||
*/cp/admin/orgs*)
|
||||
__CP_BODY__
|
||||
exit __CP_EXIT__
|
||||
;;
|
||||
*)
|
||||
echo '{"success":true,"result":[]}'
|
||||
echo 'reached' > "$CF_SENTINEL"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
MOCK
|
||||
# Substitute the test-case body and exit code. Use printf/sed to avoid
|
||||
# shell quoting issues with JSON in the heredoc.
|
||||
printf '%s\n' "$cp_body" > "$tmp/cp_body.txt"
|
||||
sed -i "s|__CP_BODY__|cat \"$tmp/cp_body.txt\"|g; s|__CP_EXIT__|$cp_exit|g" "$tmp/curl"
|
||||
chmod +x "$tmp/curl"
|
||||
|
||||
# Mock aws cli: required by the script. Returns valid empty EC2 JSON in the
|
||||
# happy path; writes a sentinel if reached so fail-closed cases prove AWS
|
||||
# gather was not entered.
|
||||
cat > "$tmp/aws" <<'MOCK'
|
||||
#!/usr/bin/env bash
|
||||
echo "reached" > "$AWS_SENTINEL"
|
||||
echo '{"Reservations":[]}'
|
||||
exit 0
|
||||
MOCK
|
||||
chmod +x "$tmp/aws"
|
||||
|
||||
local out="$tmp/out" err="$tmp/err"
|
||||
PATH="$tmp:$PATH" \
|
||||
CF_API_TOKEN=tok \
|
||||
CF_ZONE_ID=zone \
|
||||
CP_ADMIN_API_TOKEN=tok-prod \
|
||||
CP_STAGING_ADMIN_API_TOKEN=tok-staging \
|
||||
AWS_ACCESS_KEY_ID=ak \
|
||||
AWS_SECRET_ACCESS_KEY=sk \
|
||||
CF_SENTINEL="$tmp/cf_reached" \
|
||||
AWS_SENTINEL="$tmp/aws_reached" \
|
||||
bash "$SCRIPT" --execute > "$out" 2> "$err"
|
||||
local actual_exit=$?
|
||||
local case_fail=0
|
||||
|
||||
if [ "$expect_abort" = "true" ]; then
|
||||
# Fail-closed cases: script must abort at the CP live-org fetch,
|
||||
# before AWS EC2 gather or CF DNS list/classify/delete.
|
||||
if [ "$actual_exit" -eq 0 ]; then
|
||||
echo " ✗ $name: exited 0 instead of aborting" >&2
|
||||
case_fail=1
|
||||
fi
|
||||
if [ -f "$tmp/cf_reached" ]; then
|
||||
echo " ✗ $name: CF sentinel exists — sweep reached DNS list/classify" >&2
|
||||
case_fail=1
|
||||
fi
|
||||
if [ -f "$tmp/aws_reached" ]; then
|
||||
echo " ✗ $name: AWS sentinel exists — sweep reached EC2 gather" >&2
|
||||
case_fail=1
|
||||
fi
|
||||
if grep -qE '== Sweep plan ==|would delete:|orphan-' "$out" "$err" 2>/dev/null; then
|
||||
echo " ✗ $name: output contains sweep plan / orphan classification" >&2
|
||||
case_fail=1
|
||||
fi
|
||||
else
|
||||
# Happy-path control: valid empty orgs arrays must pass the fetch guard
|
||||
# and reach both AWS EC2 gather and Cloudflare DNS listing.
|
||||
if [ ! -f "$tmp/cf_reached" ]; then
|
||||
echo " ✗ $name: CF sentinel missing — sweep did not reach DNS list" >&2
|
||||
case_fail=1
|
||||
fi
|
||||
if [ ! -f "$tmp/aws_reached" ]; then
|
||||
echo " ✗ $name: AWS sentinel missing — sweep did not reach EC2 gather" >&2
|
||||
case_fail=1
|
||||
fi
|
||||
if [ "$actual_exit" -ne 0 ]; then
|
||||
echo " ✗ $name: expected exit 0 after empty DNS list, got $actual_exit" >&2
|
||||
case_fail=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$case_fail" -eq 0 ]; then
|
||||
echo " ✓ $name"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " stdout:" >&2
|
||||
sed 's/^/ /' "$out" >&2
|
||||
echo " stderr:" >&2
|
||||
sed 's/^/ /' "$err" >&2
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
|
||||
rm -rf "$tmp"
|
||||
}
|
||||
|
||||
echo "Test: sweep-cf-orphans live-org fetch fail-closed"
|
||||
echo
|
||||
|
||||
run_case "prod API returns 500" 22 '{"error":"internal"}' true
|
||||
run_case "prod API returns malformed JSON" 0 'this is not json' true
|
||||
run_case "prod API returns JSON without orgs" 0 '{"foo":"bar"}' true
|
||||
run_case "prod API returns orgs as string" 0 '{"orgs":"not-an-array"}' true
|
||||
run_case "prod API returns valid empty orgs (proceeds)" 0 '{"orgs":[]}' false
|
||||
|
||||
echo
|
||||
echo "passed=$PASS failed=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
Reference in New Issue
Block a user