Merge pull request #2125 from Molecule-AI/fix/canary-teardown-slug-pattern

fix(ci): canary teardown safety-net slug pattern (was reversed)
This commit is contained in:
Hongming Wang 2026-04-26 22:04:46 +00:00 committed by GitHub
commit 05ee0843fc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -159,14 +159,34 @@ jobs:
ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
run: |
set +e
# Slug prefix matches what test_staging_full_saas.sh emits
# in canary mode:
# SLUG="e2e-canary-$(date +%Y%m%d)-${RUN_ID_SUFFIX}"
# Earlier this was `e2e-{today}-canary-` — that was the
# full-mode pattern (date FIRST, mode SECOND); canary slugs
# have mode FIRST, date SECOND. The mismatch silently
# never matched, leaving every cancelled-canary EC2 alive
# until the once-an-hour sweep eventually caught it
# (incident 2026-04-26 21:03Z: 1h25m EC2 leak before manual
# cleanup; same gap on three earlier cancellations today).
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
| python3 -c "
import json, sys
import json, sys, os
run_id = os.environ.get('GITHUB_RUN_ID', '')
d = json.load(sys.stdin)
today = __import__('datetime').date.today().strftime('%Y%m%d')
# Scope to slugs from THIS canary run when GITHUB_RUN_ID is
# available; the canary workflow sets E2E_RUN_ID='canary-\${run_id}'
# so the slug suffix is '-canary-\${run_id}-...'. Mirrors the
# full-mode safety net's per-run scoping (e2e-staging-saas.yml)
# added after the 2026-04-21 cross-run cleanup incident.
if run_id:
prefix = f'e2e-canary-{today}-canary-{run_id}'
else:
prefix = f'e2e-canary-{today}-'
candidates = [o['slug'] for o in d.get('orgs', [])
if o.get('slug','').startswith(f'e2e-{today}-canary-')
if o.get('slug','').startswith(prefix)
and o.get('status') not in ('purged',)]
print('\n'.join(candidates))
" 2>/dev/null)