Merge pull request #2649 from Molecule-AI/staging
staging → main: auto-promote 98da627
This commit is contained in:
commit
aedbbc4a10
28
.github/workflows/canary-staging.yml
vendored
28
.github/workflows/canary-staging.yml
vendored
@ -231,10 +231,34 @@ jobs:
|
||||
and o.get('status') not in ('purged',)]
|
||||
print('\n'.join(candidates))
|
||||
" 2>/dev/null)
|
||||
# Per-slug DELETE with HTTP-code verification. The previous
|
||||
# `... >/dev/null || true` swallowed every failure, so a 5xx
|
||||
# or timeout from CP looked identical to "successfully cleaned
|
||||
# up" and the tenant kept eating ~2 vCPU until the hourly
|
||||
# stale sweep caught it (up to 2h later). Now we capture the
|
||||
# response code and surface non-2xx as a workflow warning, so
|
||||
# the run page shows which slug leaked. We still don't `exit 1`
|
||||
# on cleanup failure — a single-canary cleanup miss shouldn't
|
||||
# fail-flag the canary itself when the actual smoke check
|
||||
# passed. The sweep-stale-e2e-orgs cron (now every 15 min,
|
||||
# 30-min threshold) is the safety net for whatever slips past.
|
||||
# See molecule-controlplane#420.
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
code=$(curl -sS -o /tmp/canary-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/dev/null || true
|
||||
-d "{\"confirm\":\"$slug\"}" \
|
||||
|| echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
echo "::warning::canary teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/canary-cleanup.out 2>/dev/null)"
|
||||
leaks+=("$slug")
|
||||
fi
|
||||
done
|
||||
if [ ${#leaks[@]} -gt 0 ]; then
|
||||
echo "::warning::canary teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
21
.github/workflows/sweep-stale-e2e-orgs.yml
vendored
21
.github/workflows/sweep-stale-e2e-orgs.yml
vendored
@ -25,16 +25,23 @@ name: Sweep stale e2e-* orgs (staging)
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Every hour on the hour. E2E orgs are short-lived (~10-25 min wall
|
||||
# clock from create to teardown). Anything older than the
|
||||
# MAX_AGE_MINUTES threshold below is presumed dead.
|
||||
- cron: '0 * * * *'
|
||||
# Every 15 min. E2E orgs are short-lived (~8-25 min wall clock from
|
||||
# create to teardown — canary is ~8 min, full SaaS ~25 min). The
|
||||
# previous hourly + 120-min stale threshold meant a leaked tenant
|
||||
# could keep an EC2 alive for up to 2 hours, eating ~2 vCPU per
|
||||
# leak. Tightening the cadence + threshold reduces the worst-case
|
||||
# leak window from 120 min to ~45 min (15-min sweep cadence + 30-min
|
||||
# threshold) without risk of catching in-progress runs (the longest
|
||||
# e2e run is the 25-min canary, well under the 30-min threshold).
|
||||
# See molecule-controlplane#420 for the leak-class accounting that
|
||||
# motivated this tightening.
|
||||
- cron: '*/15 * * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
max_age_minutes:
|
||||
description: "Delete e2e-* orgs older than N minutes (default 120)"
|
||||
description: "Delete e2e-* orgs older than N minutes (default 30)"
|
||||
required: false
|
||||
default: "120"
|
||||
default: "30"
|
||||
dry_run:
|
||||
description: "Dry run only — list what would be deleted"
|
||||
required: false
|
||||
@ -58,7 +65,7 @@ jobs:
|
||||
env:
|
||||
MOLECULE_CP_URL: https://staging-api.moleculesai.app
|
||||
ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
|
||||
MAX_AGE_MINUTES: ${{ github.event.inputs.max_age_minutes || '120' }}
|
||||
MAX_AGE_MINUTES: ${{ github.event.inputs.max_age_minutes || '30' }}
|
||||
DRY_RUN: ${{ github.event.inputs.dry_run || 'false' }}
|
||||
# Refuse to delete more than this many orgs in one tick. If the
|
||||
# CP DB is briefly empty (or the admin endpoint goes weird and
|
||||
|
||||
@ -77,7 +77,7 @@ export function Legend() {
|
||||
onClick={openLegend}
|
||||
aria-label="Show legend"
|
||||
title="Show legend"
|
||||
className={`fixed bottom-6 ${leftClass} z-30 flex items-center gap-1.5 rounded-full bg-surface-sunken/95 border border-line/50 px-3 py-1.5 text-[11px] font-semibold text-ink-mid uppercase tracking-wider shadow-xl shadow-black/30 backdrop-blur-sm hover:text-ink hover:border-line transition-[left,colors] duration-200`}
|
||||
className={`fixed bottom-6 ${leftClass} z-30 flex items-center gap-1.5 rounded-full bg-surface-sunken/95 border border-line/50 px-3 py-1.5 text-[11px] font-semibold text-ink-mid uppercase tracking-wider shadow-xl shadow-black/30 backdrop-blur-sm hover:text-ink hover:border-line focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface transition-[left,colors] duration-200`}
|
||||
>
|
||||
<span aria-hidden="true" className="text-[10px]">ⓘ</span>
|
||||
Legend
|
||||
@ -94,7 +94,10 @@ export function Legend() {
|
||||
onClick={closeLegend}
|
||||
aria-label="Hide legend"
|
||||
title="Hide legend"
|
||||
className="-mt-0.5 -mr-1 px-1.5 text-[14px] leading-none text-ink-soft hover:text-ink transition-colors"
|
||||
// 24×24 touch target (was ~10×16, well under WCAG 2.5.5 min).
|
||||
// Negative margin keeps the visual position the same as before
|
||||
// — only the hit area + focus ring are larger.
|
||||
className="-mt-1.5 -mr-1.5 w-6 h-6 inline-flex items-center justify-center rounded text-[14px] leading-none text-ink-soft hover:text-ink hover:bg-surface-card/40 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 transition-colors"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user