diff --git a/canvas/e2e/staging-setup.ts b/canvas/e2e/staging-setup.ts index 89cbf61d..5fc39225 100644 --- a/canvas/e2e/staging-setup.ts +++ b/canvas/e2e/staging-setup.ts @@ -50,12 +50,12 @@ const WORKSPACE_ONLINE_TIMEOUT_MS = 20 * 60 * 1000; // TLS readiness depends on (1) Cloudflare DNS propagation through the // edge, (2) the tenant's CF Tunnel registering the new hostname, (3) // CF's edge ACME cert provisioning + cache. Each of these layers can -// add 1-3 min on its own under heavy staging load. Bumped from 10 to -// 15 min after #2090 (6 consecutive canary failures starting 2026-04-26 -// correlated with CP commits a3eb8be / ed70405 / 4ab339e). Stays below -// the 20-min PROVISION_TIMEOUT envelope so a genuinely-stuck tenant -// still fails-loud at the provision step rather than masquerading as -// a TLS issue. Kept aligned with tests/e2e/test_staging_full_saas.sh. +// add 1-3 min on its own under heavy staging load. Bumped 10→15 min +// after a burst of canary failures correlated with CP changes (#2090). +// Stays below the 20-min PROVISION_TIMEOUT envelope so a genuinely- +// stuck tenant fails-loud at the provision step rather than +// masquerading as a TLS issue. Kept aligned with +// tests/e2e/test_staging_full_saas.sh. const TLS_TIMEOUT_MS = 15 * 60 * 1000; async function jsonFetch( diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index 25e08f81..e9d9da5c 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -195,22 +195,21 @@ TENANT_TOKEN=$(echo "$TENANT_TOKEN_RESP" | python3 -c "import json,sys; print(js ok "Tenant admin token retrieved (len=${#TENANT_TOKEN})" # ─── 4. Wait for tenant TLS / DNS propagation ────────────────────────── -# 15 min — kept below the 20-min provision envelope so a genuinely-stuck -# tenant still fails loud at the earlier provision step rather than -# masquerading as a TLS issue. CF DNS propagation + tunnel hostname -# registration + ACME cert + edge cache run 5-7 min on a healthy day; the -# +5 min headroom over the previous 10-min cap covers the slower path -# observed in #2090 (6 consecutive canary failures starting 2026-04-26 -# correlated with CP commits a3eb8be / ed70405 / 4ab339e). +# Kept below the 20-min provision envelope so a genuinely-stuck tenant +# still fails loud at the earlier provision step rather than masquerading +# as a TLS issue. CF DNS propagation + tunnel hostname registration + +# ACME cert + edge cache run 5-7 min on a healthy day; +5 min headroom +# over the previous 10-min cap covers the slower path observed in #2090. # -# On timeout we print a diagnostic burst — last DNS lookup, last curl -# verbose handshake, last response headers — so the next failure tells -# us which layer (DNS, TLS, HTTP) is actually broken. Without this the -# only signal is "no 2xx in N min" which sent us in circles. +# On timeout, dump DNS + curl -v + headers so the next failure identifies +# the broken layer (DNS / TLS / HTTP). Authorization is redacted +# defensively in case a future caller adds an auth header to this probe. log "4/11 Waiting for tenant TLS / DNS propagation..." -TLS_DEADLINE=$(( $(date +%s) + 900 )) -TENANT_HOST="${TENANT_URL#https://}" +TLS_TIMEOUT_SEC=$((15 * 60)) +TLS_DEADLINE=$(( $(date +%s) + TLS_TIMEOUT_SEC )) +TENANT_HOST="${TENANT_URL#http*://}" TENANT_HOST="${TENANT_HOST%%/*}" +TENANT_HOST="${TENANT_HOST%%:*}" while true; do if curl -sSfk --max-time 5 "$TENANT_URL/health" >/dev/null 2>&1; then break @@ -220,9 +219,11 @@ while true; do log "DNS lookup ($TENANT_HOST):" getent hosts "$TENANT_HOST" 2>&1 || log " (no DNS resolution)" log "curl -v $TENANT_URL/health (last 40 lines):" - curl -kv --max-time 10 "$TENANT_URL/health" 2>&1 | tail -n 40 | sed 's/^/ /' || true + curl -kv --max-time 10 "$TENANT_URL/health" 2>&1 \ + | sed -E 's/(Authorization|Cookie):.*/\1: [redacted]/i' \ + | tail -n 40 | sed 's/^/ /' || true log "── END DIAGNOSTIC ──" - fail "Tenant URL never responded 2xx on /health within 15 min" + fail "Tenant URL never responded 2xx on /health within ${TLS_TIMEOUT_SEC}s" fi sleep 5 done