Merge pull request #2099 from Molecule-AI/fix/staging-e2e-tls-timeout

fix(e2e): bump staging tenant TLS-readiness timeout 3min → 10min
This commit is contained in:
Hongming Wang 2026-04-26 15:24:01 +00:00 committed by GitHub
commit dafe08450b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 19 additions and 3 deletions

View File

@ -46,7 +46,16 @@ const TENANT_DOMAIN = process.env.STAGING_TENANT_DOMAIN || "staging.moleculesai.
// were blocking staging→main syncs on 2026-04-24.
const PROVISION_TIMEOUT_MS = 20 * 60 * 1000;
const WORKSPACE_ONLINE_TIMEOUT_MS = 20 * 60 * 1000;
const TLS_TIMEOUT_MS = 3 * 60 * 1000;
// TLS readiness depends on (1) Cloudflare DNS propagation through the
// edge, (2) the tenant's CF Tunnel registering the new hostname, (3)
// CF's edge ACME cert provisioning + cache. Each of these layers can
// add 1-3 min on its own under heavy staging load. The original 3-min
// cap blocked four cycles of staging→main PRs across 2026-04-24+.
// 10 min stays inside the 20-min PROVISION_TIMEOUT envelope (so a
// genuinely-stuck tenant still fails-loud at the provision step) but
// absorbs the realistic worst case for a one-shot tenant TLS handshake.
const TLS_TIMEOUT_MS = 10 * 60 * 1000;
async function jsonFetch(
url: string,

View File

@ -195,14 +195,21 @@ TENANT_TOKEN=$(echo "$TENANT_TOKEN_RESP" | python3 -c "import json,sys; print(js
ok "Tenant admin token retrieved (len=${#TENANT_TOKEN})"
# ─── 4. Wait for tenant TLS / DNS propagation ──────────────────────────
# 10 min — same envelope as canvas/e2e/staging-setup.ts TLS_TIMEOUT_MS.
# CF DNS propagation + tunnel hostname registration + ACME cert + edge
# cache routinely take 5-7 min under staging load; the original 3-min
# cap blocked multiple staging→main PRs across 2026-04-24+. Stays
# inside the parent provision envelope so a genuinely-stuck tenant
# still fails loud at the earlier provision step rather than masquerading
# as a TLS issue.
log "4/11 Waiting for tenant TLS / DNS propagation..."
TLS_DEADLINE=$(( $(date +%s) + 180 ))
TLS_DEADLINE=$(( $(date +%s) + 600 ))
while true; do
if curl -sSfk --max-time 5 "$TENANT_URL/health" >/dev/null 2>&1; then
break
fi
if [ "$(date +%s)" -gt "$TLS_DEADLINE" ]; then
fail "Tenant URL never responded 2xx on /health within 3 min"
fail "Tenant URL never responded 2xx on /health within 10 min"
fi
sleep 5
done