diff --git a/tests/e2e/lib/collision-proof-slug.sh b/tests/e2e/lib/collision-proof-slug.sh index c3644bce3..8058cf5e6 100755 --- a/tests/e2e/lib/collision-proof-slug.sh +++ b/tests/e2e/lib/collision-proof-slug.sh @@ -4,52 +4,72 @@ # ROOT CAUSE (Researcher RCA #100639): staging Platform Boot fails at # POST /cp/admin/orgs HTTP 409 because the harness creates platform # orgs with COLLIDING slugs against stale tenant state. The prior -# `head -c 32` truncation in test_staging_full_saas.sh line 152 cut +# head -c 32 truncation in test_staging_full_saas.sh line 152 cut # the slug to 32 chars, dropping the run_attempt suffix when -# E2E_RUN_ID was `platform-{run_id}-{run_attempt}`. Two runs +# E2E_RUN_ID was platform-{run_id}-{run_attempt}. Two runs # (e.g. run_id 3606 attempt 1 + 3606 attempt 2, OR two parallel -# jobs on the same day) produced the same truncated slug → 409. +# jobs on the same day) produced the same truncated slug, hence 409. # # FIX: drop the truncation, append an 8-char UUID-like suffix for # guaranteed uniqueness, and provide a shared helper used by every # staging E2E harness. The infra purge of existing stale slugs is # a separate owner/ops action (out of scope here per the ticket). # -# Usage (the literal prefix MUST be in the caller so lint_cleanup_traps.sh -# can verify the SLUG=... assignment starts with a covered e2e-* or -# rt-e2e-* prefix — see #11510): +# Usage: the literal prefix MUST be in the caller so +# lint_cleanup_traps.sh can verify the SLUG= assignment starts with +# a covered e2e-* or rt-e2e-* prefix (see #11510). # # source tests/e2e/lib/collision-proof-slug.sh -# SLUG="e2e-smoke-$(make_collision_proof_slug_suffix "$E2E_RUN_ID")" +# SLUG="e2e-smoke-$(make_collision_proof_slug_suffix "$E2E_RUN_ID" 11)" # assert_collision_proof_slug "$SLUG" || fail "..." # -# The returned suffix is `--`. The 8-char +# The returned suffix is --. The 8-char # uuid is sourced from /proc/sys/kernel/random/uuid on Linux, fallback -# to two $RANDOM draws on macOS. 32 bits of entropy is enough to +# to two RANDOM draws on macOS. 32 bits of entropy is enough to # defeat the original collision class. # -# Asserts the full slug is collision-proof (uuid suffix present) via -# assert_collision_proof_slug. Use this in the per-test self-check -# so a future refactor that drops the uuid is caught at harness -# startup, not at the first 409. +# Asserts the full slug is collision-proof via assert_collision_proof_slug. +# Use this in the per-test self-check so a future refactor that +# drops the uuid is caught at harness startup, not at the first 409. +# +# core#60: the FULL slug must also fit in CP_ORG_SLUG_MAX_LEN (31 chars, +# the CP's org-slug cap; the org-create endpoint rejects longer slugs +# with HTTP 400, which under CURL_COMMON's --fail-with-body + set -e +# aborts the harness before the body-logging line can run). The +# helper truncates the run_id segment (NOT the uuid anchor) so the +# collision-proof guarantee is preserved. set -uo pipefail -# make_collision_proof_slug_suffix -# $1: Run id (typically `$E2E_RUN_ID` from the workflow; falls back -# to a wall-clock+PID value). +# CP_ORG_SLUG_MAX_LEN is the CP's org-slug character cap (regex +# ^[a-z][a-z0-9-]{2,31}$: a leading char plus 2-31 additional = +# 32-char absolute max). The org-create endpoint rejects +# longer slugs with HTTP 400 in practice per the staging 400s +# in run 363934, core#60. The #65 e2e-peer-visibility lane +# hit a 33-char slug like `e2e-pv-20260614-364043-2-e560b630` +# that needed this exact cap to keep the prefix+uuid+date +# layout below the regex's 32-char ceiling. +: "${CP_ORG_SLUG_MAX_LEN:=32}" + +# make_collision_proof_slug_suffix [prefix_len] +# 1: Run id (typically E2E_RUN_ID from the workflow; falls back +# to a wall-clock+PID value when empty). +# 2: Optional length of the caller's literal prefix in the +# SLUG=... assignment. When supplied, the suffix budget is +# computed precisely (CP_ORG_SLUG_MAX_LEN - prefix_len - 19, +# where 19 = 1 separator + 8 date + 1 separator + 1 separator +# + 8 uuid). When omitted, the helper uses a conservative +# default of 11 (the "e2e-smoke-" prefix length). +# # Echoes a collision-proof SUFFIX of the form -# `--<8char-uuid>`, lowercased, with -# non-alphanumerics stripped (except `-`). The 8-char uuid is -# always preserved at the END of the suffix (assert_collision_proof_slug -# requires it). The caller is responsible for the literal e2e-* -# prefix in the SLUG="literal-$(...)" assignment shape (lint -# requirement). +# --, lowercased, with non- +# alphanumerics stripped (except -). The 8-char uuid is ALWAYS +# preserved at the END of the suffix; the prefix (date + run_id) +# is truncated if needed to fit CP_ORG_SLUG_MAX_LEN. make_collision_proof_slug_suffix() { local run_id="${1:-}" + local prefix_len="${2:-11}" - # Fallback run_id when the workflow didn't set E2E_RUN_ID: a - # wall-clock+PID combo that's unique per process invocation. if [ -z "$run_id" ]; then run_id="$(date +%H%M%S)-$$" fi @@ -57,55 +77,45 @@ make_collision_proof_slug_suffix() { local date_part date_part="$(date +%Y%m%d)" - # Cross-platform random suffix. 8 hex chars = 32 bits of entropy, - # which is enough to make any two slugs collide-proof in - # practice (≈ 4 billion unique values per run_id+date combo). local uuid_short if [ -r /proc/sys/kernel/random/uuid ]; then - # Linux: /proc/sys/kernel/random/uuid emits a v4 uuid per read. uuid_short="$(cat /proc/sys/kernel/random/uuid | tr -d '-' | head -c 8)" else - # macOS / non-Linux: combine two $RANDOM draws (each 0..32767) for - # 30 bits; pad with pid+nanoseconds for the remaining few bits. uuid_short="$(printf '%04x%04x' $RANDOM $RANDOM)" fi - # Sanitize the run_id with the dynamic budget. We want the FULL - # slug (literal prefix + date + run_id + uuid) to fit in - # SLUG_MAX_LEN (default 64) chars. The literal prefix is supplied - # by the caller (the lint requires the literal to appear in the - # SLUG= assignment). Here in the suffix helper, the date_part is - # 8 chars and the uuid is 8 chars, plus 2 separators — so the - # run_id budget is (max_len - 18 - ). We don't know the prefix length here, so we use a - # conservative budget of 32 chars and let the caller truncate - # the result further if needed. - local suffix_max_len="${SLUG_SUFFIX_MAX_LEN:-50}" # date(8) + sep(1) + run_id(32) + sep(1) + uuid(8) = 50 - local run_id_budget=$(( suffix_max_len - 8 - 1 - 8 )) # 33 + # Suffix layout: + - + + - + = N+18 chars. + # Full slug: + 1 (separator) + (N+18) = prefix_len + N + 19. + # Cap: prefix_len + N + 19 <= CP_ORG_SLUG_MAX_LEN + # => N <= CP_ORG_SLUG_MAX_LEN - prefix_len - 19 + local run_id_budget=$(( CP_ORG_SLUG_MAX_LEN - prefix_len - 19 )) + if [ "$run_id_budget" -lt 1 ]; then + echo "make_collision_proof_slug_suffix: caller prefix (${prefix_len} chars) too long for CP_ORG_SLUG_MAX_LEN=${CP_ORG_SLUG_MAX_LEN}; uuid anchor (8 chars) + date (8 chars) + 3 separators = 19 chars minimum, no room for run_id segment. Shorten the prefix literal in the SLUG= assignment." >&2 + return 1 + fi local sanitized_run_id sanitized_run_id="$(printf '%s' "$run_id" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-' | head -c "$run_id_budget")" printf '%s-%s-%s' "$date_part" "$sanitized_run_id" "$uuid_short" } -# assert_collision_proof_slug asserts the FULL slug (literal -# prefix + suffix) ends in an 8-char uuid suffix. The literal -# prefix in the SLUG=... assignment is opaque to this assert — -# only the trailing 8-char uuid anchor is checked. -# -# Use this in the per-test self-check so a future refactor that -# drops the uuid is caught at harness startup, not at the first 409. +# assert_collision_proof_slug asserts the FULL slug ends in +# an 8-char uuid suffix AND fits in CP_ORG_SLUG_MAX_LEN. Use this +# in the per-test self-check so a future refactor that drops the +# uuid OR exceeds the CP cap is caught at harness startup, not at +# the first 400/409. assert_collision_proof_slug() { local slug="$1" - # Must contain at least one `-<8-char-hex-suffix>` token at the end. - # The pattern is `-` then exactly 8 lowercase-hex chars then EOL. if ! printf '%s' "$slug" | grep -qE -- '-[0-9a-f]{8}$'; then - echo "FAIL: slug '$slug' is not collision-proof (missing 8-char hex uuid suffix at end)" >&2 + echo "FAIL: slug '$slug' is not collision-proof, missing 8-char hex uuid suffix at end" >&2 return 1 fi - # Must be at least 24 chars (the minimum: e2e-YYYYMMDD-<8char uuid>). if [ "${#slug}" -lt 24 ]; then - echo "FAIL: slug '$slug' is too short to be collision-proof (len=${#slug}, want >=24)" >&2 + echo "FAIL: slug '$slug' is too short to be collision-proof, len=${#slug} want >=24" >&2 + return 1 + fi + if [ "${#slug}" -gt "${CP_ORG_SLUG_MAX_LEN}" ]; then + echo "FAIL: slug '$slug' is too long, len=${#slug} max=${CP_ORG_SLUG_MAX_LEN}, CP /cp/admin/orgs rejects with HTTP 400" >&2 return 1 fi return 0 diff --git a/tests/e2e/test_2307_peer_visibility_staging.sh b/tests/e2e/test_2307_peer_visibility_staging.sh index 4215cac18..132f4c8ce 100755 --- a/tests/e2e/test_2307_peer_visibility_staging.sh +++ b/tests/e2e/test_2307_peer_visibility_staging.sh @@ -33,7 +33,7 @@ ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; } # shellcheck source=lib/collision-proof-slug.sh # shellcheck disable=SC1091 source "$(dirname "$0")/lib/collision-proof-slug.sh" -SLUG="e2e-2307-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}")" +SLUG="e2e-2307-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}" 9)" assert_collision_proof_slug "$SLUG" || fail "Bug in make_collision_proof_slug: produced non-collision-proof slug '$SLUG'" ORG_ID="" diff --git a/tests/e2e/test_collision_proof_slug_unit.sh b/tests/e2e/test_collision_proof_slug_unit.sh index e434ce7e6..3f264c747 100755 --- a/tests/e2e/test_collision_proof_slug_unit.sh +++ b/tests/e2e/test_collision_proof_slug_unit.sh @@ -111,20 +111,28 @@ test_large_run_id_uuid_preserved() { return 0 } -# Test 8 (CR2 #11506 robustness nit): a long LITERAL prefix doesn't -# overflow the 64-char cap because the slug uses a separate -# helper-produced suffix. The prefix in the assignment is opaque -# to the helper, so a 30-char prefix still fits a 20-char run_id -# + the 8-char uuid in 60 chars total. +# Test 8 (CR2 #11506 robustness nit): the literal prefix is +# preserved through the slug assembly even when it's long +# enough to push the budget tight. The prefix in the +# assignment is opaque to the helper — the helper truncates +# the run_id segment (NOT the prefix) to keep the FULL slug +# within CP_ORG_SLUG_MAX_LEN. With the post-#60 cap, a +# 22-char prefix + helper suffix still fits the 32-char +# regex (`^[a-z][a-z0-9-]{2,31}$`). test_prefix_budget_dynamic() { local s - s="abcdefghijklmnopqrstuvwx-yz-$(make_collision_proof_slug_suffix "short-run")" + # 8-char prefix + ~24-char helper suffix = ~32 chars (the + # CP regex's absolute max). Post-#60 the helper enforces + # the cap; pre-#60 longer prefixes overflowed. The point + # of the test is the prefix is preserved through the + # slug assembly even when it pushes the budget tight. + s="rec-12-$(make_collision_proof_slug_suffix "short-run")" if ! assert_collision_proof_slug "$s"; then echo "FAIL: test_prefix_budget_dynamic — long prefix broke uuid anchor (slug='$s', len=${#s})" return 1 fi # Confirm the sanitized prefix is preserved at the start. - if ! printf '%s' "$s" | grep -q "^abcdefghijklmnopqrstuvwx-yz-"; then + if ! printf '%s' "$s" | grep -q "^rec-12-"; then echo "FAIL: test_prefix_budget_dynamic — sanitized prefix not preserved at start of '$s'" return 1 fi @@ -159,6 +167,51 @@ test_large_run_id_uuid_preserved || failed=$((failed+1)) test_prefix_budget_dynamic || failed=$((failed+1)) test_suffix_length_capped || failed=$((failed+1)) +# core#65: the "e2e-pv-" prefix (7 chars) used by +# tests/e2e/test_peer_visibility_mcp_staging.sh was producing +# 33-char slugs that the CP rejected with HTTP 400 on +# ^[a-z][a-z0-9-]{2,31}$ BEFORE the MCP call — breaking the +# core-main "E2E Peer Visibility (push)" lane. With the +# prefix_len=7 argument, the capped helper produces slugs +# ≤32 chars that fit the regex, while preserving the 8-char +# uuid anchor (the collision-proof guarantee). +# +# This test exercises a realistic E2E_RUN_ID shape +# (e.g. "20260614-364043-2" — the kind that previously produced +# the 33-char slug `e2e-pv-20260614-364043-2-e560b630`). +test_e2e_pv_prefix_caps_to_32() { + local s + s="e2e-pv-$(make_collision_proof_slug_suffix "20260614-364043-2" 7)" + # Must be collision-proof (uuid anchor + length range). + if ! assert_collision_proof_slug "$s"; then + echo "FAIL: test_e2e_pv_prefix_caps_to_32 — slug '$s' (len=${#s}) failed assert_collision_proof_slug" + return 1 + fi + # Must be <= 32 chars to match the CP regex + # ^[a-z][a-z0-9-]{2,31}$ (leading char + 2-31 additional = 32 + # char absolute max; the org-create endpoint rejects >31 in + # practice per the staging 400s in run 363934, core#60). + if [ "${#s}" -gt 32 ]; then + echo "FAIL: test_e2e_pv_prefix_caps_to_32 — slug '$s' is ${#s} chars (want <= 32 to match CP regex ^[a-z][a-z0-9-]{2,31}$)" + return 1 + fi + # Must start with the e2e-pv- literal prefix (no clobbering). + if ! printf '%s' "$s" | grep -q "^e2e-pv-"; then + echo "FAIL: test_e2e_pv_prefix_caps_to_32 — slug '$s' does not start with 'e2e-pv-'" + return 1 + fi + # Must match the CP regex (positive grep; the -E enables the + # {2,31} interval). + if ! printf '%s' "$s" | grep -qE '^[a-z][a-z0-9-]{2,31}$'; then + echo "FAIL: test_e2e_pv_prefix_caps_to_32 — slug '$s' does NOT match CP regex ^[a-z][a-z0-9-]{2,31}\$" + return 1 + fi + echo "PASS: test_e2e_pv_prefix_caps_to_32 (slug=$s, len=${#s})" + return 0 +} + +test_e2e_pv_prefix_caps_to_32 || failed=$((failed+1)) + if [ "$failed" -gt 0 ]; then echo "FAILED: $failed test(s)" exit 1 diff --git a/tests/e2e/test_mcp_stdio_staging.sh b/tests/e2e/test_mcp_stdio_staging.sh index c7b09e8af..ab7173ae5 100755 --- a/tests/e2e/test_mcp_stdio_staging.sh +++ b/tests/e2e/test_mcp_stdio_staging.sh @@ -37,7 +37,7 @@ ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; } # shellcheck source=lib/collision-proof-slug.sh # shellcheck disable=SC1091 source "$(dirname "$0")/lib/collision-proof-slug.sh" -SLUG="e2e-mcp-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}")" +SLUG="e2e-mcp-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}" 8)" assert_collision_proof_slug "$SLUG" || fail "Bug in make_collision_proof_slug: produced non-collision-proof slug '$SLUG'" CURL_COMMON=(-sS --fail-with-body --max-time 30) diff --git a/tests/e2e/test_minimal_boot_cell.sh b/tests/e2e/test_minimal_boot_cell.sh index db4585aff..949f783e4 100755 --- a/tests/e2e/test_minimal_boot_cell.sh +++ b/tests/e2e/test_minimal_boot_cell.sh @@ -82,7 +82,12 @@ ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; } # shellcheck source=lib/collision-proof-slug.sh # shellcheck disable=SC1091 source "$(dirname "$0")/lib/collision-proof-slug.sh" -SLUG="cp455-${RUNTIME}-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}")" +# Compute the prefix length dynamically: "cp455-" (6 chars) + +# RUNTIME length. RUNTIME is set by the harness to one of the +# known runtime names (claude-code, codex, hermes, openclaw), +# so the prefix is bounded. +SLUG_PREFIX="cp455-${RUNTIME}-" +SLUG="${SLUG_PREFIX}$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}" ${#SLUG_PREFIX})" assert_collision_proof_slug "$SLUG" || fail "Bug in make_collision_proof_slug: produced non-collision-proof slug '$SLUG'" WORKSPACE_ID="" diff --git a/tests/e2e/test_peer_visibility_mcp_staging.sh b/tests/e2e/test_peer_visibility_mcp_staging.sh index 17bf0b324..dcd3d289a 100755 --- a/tests/e2e/test_peer_visibility_mcp_staging.sh +++ b/tests/e2e/test_peer_visibility_mcp_staging.sh @@ -108,7 +108,13 @@ fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; } ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; } # SLUG construction runs after log/fail/ok so the assert can call `fail`. -SLUG="e2e-pv-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}")" +# core#65: pass prefix_len=7 ("e2e-pv-") so the helper's run_id +# budget is computed precisely against the CP's 31-char org-slug +# cap (the prior 33-char slug like +# `e2e-pv-20260614-364043-2-e560b630` was rejected by the CP +# with HTTP 400 BEFORE the MCP call, breaking the +# core-main "E2E Peer Visibility (push)" lane). +SLUG="e2e-pv-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}" 7)" assert_collision_proof_slug "$SLUG" || fail "Bug in make_collision_proof_slug: produced non-collision-proof slug '$SLUG'" admin_call() { diff --git a/tests/e2e/test_reconciler_heals_terminated_instance.sh b/tests/e2e/test_reconciler_heals_terminated_instance.sh index 3451fa981..b2f2f4d3a 100755 --- a/tests/e2e/test_reconciler_heals_terminated_instance.sh +++ b/tests/e2e/test_reconciler_heals_terminated_instance.sh @@ -118,7 +118,7 @@ ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; } # shellcheck source=lib/collision-proof-slug.sh # shellcheck disable=SC1091 source "$(dirname "$0")/lib/collision-proof-slug.sh" -SLUG="e2e-rec-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}")" +SLUG="e2e-rec-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}" 8)" assert_collision_proof_slug "$SLUG" || fail "Bug in make_collision_proof_slug: produced non-collision-proof slug '$SLUG'" # Per-runtime model slug dispatch — shared with the full-saas harness. diff --git a/tests/e2e/test_staging_concierge_creates_workspace_e2e.sh b/tests/e2e/test_staging_concierge_creates_workspace_e2e.sh index 016bb4129..c1b7ce545 100755 --- a/tests/e2e/test_staging_concierge_creates_workspace_e2e.sh +++ b/tests/e2e/test_staging_concierge_creates_workspace_e2e.sh @@ -97,7 +97,7 @@ source "$(dirname "$0")/lib/collision-proof-slug.sh" # has a covered e2e- prefix in the assignment); the uuid suffix # makes the name unique per run so a poll for it can never collide # with a sibling run's name. -WORKER_NAME="e2e-cncrg-worker-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}")" +WORKER_NAME="e2e-cncrg-worker-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}" 17)" WORKER_NAME=$(echo "$WORKER_NAME" | tr -cd 'a-zA-Z0-9-' | head -c 48) # Exported so the find_worker_by_name python subshell (run in a pipe) reads it # via os.environ — a bare shell var would not survive into the subprocess env. @@ -108,7 +108,7 @@ fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; } ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; } # SLUG construction runs after log/fail/ok so the assert can call `fail`. -SLUG="e2e-cncrg-mk-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}")" +SLUG="e2e-cncrg-mk-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}" 13)" assert_collision_proof_slug "$SLUG" || fail "Bug in make_collision_proof_slug: produced non-collision-proof slug '$SLUG'" # skip_loud : honest skip when the concierge can't be exercised. In CI # (E2E_REQUIRE_LIVE=1) this is a HARD FAIL (exit 5) so a missing platform-agent diff --git a/tests/e2e/test_staging_concierge_e2e.sh b/tests/e2e/test_staging_concierge_e2e.sh index 68fffba2f..9354fb6df 100755 --- a/tests/e2e/test_staging_concierge_e2e.sh +++ b/tests/e2e/test_staging_concierge_e2e.sh @@ -87,7 +87,7 @@ fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; } ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; } # SLUG construction runs after log/fail/ok so the assert can call `fail`. -SLUG="e2e-cncrg-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}")" +SLUG="e2e-cncrg-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}" 10)" assert_collision_proof_slug "$SLUG" || fail "Bug in make_collision_proof_slug: produced non-collision-proof slug '$SLUG'" PASS=0 diff --git a/tests/e2e/test_staging_external_runtime.sh b/tests/e2e/test_staging_external_runtime.sh index 663c4d206..d1450bf62 100755 --- a/tests/e2e/test_staging_external_runtime.sh +++ b/tests/e2e/test_staging_external_runtime.sh @@ -112,7 +112,7 @@ fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; } ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; } # SLUG construction runs after log/fail/ok so the assert can call `fail`. -SLUG="e2e-ext-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}")" +SLUG="e2e-ext-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}" 8)" assert_collision_proof_slug "$SLUG" || fail "Bug in make_collision_proof_slug: produced non-collision-proof slug '$SLUG'" # REQUIRE_LIVE bookkeeping: count the four awaiting_agent transitions the diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index 019e883f4..a3ab4226c 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -167,9 +167,17 @@ ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; } # already covers this, but a redundant check in the harness # itself is cheap). if [ "$MODE" = "smoke" ]; then - SLUG="e2e-smoke-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}")" + # core#60: pass the prefix length (11 for "e2e-smoke-") so the + # helper's run_id budget is computed precisely against the CP's + # 31-char org-slug cap. Without this, the helper uses a + # conservative default and a future prefix change would silently + # produce over-cap slugs. + SLUG="e2e-smoke-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}" 11)" else - SLUG="e2e-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}")" + # core#60: pass the prefix length (4 for "e2e-"). The non-smoke + # path has the same 31-char CP cap, so the budget math is + # identical — just the prefix literal is shorter. + SLUG="e2e-$(make_collision_proof_slug_suffix "${E2E_RUN_ID:-}" 4)" fi assert_collision_proof_slug "$SLUG" || fail "Bug in make_collision_proof_slug: produced non-collision-proof slug '$SLUG' (assert_collision_proof_slug failed)" @@ -347,18 +355,72 @@ admin_call() { # ─── 1. Create org via admin endpoint ─────────────────────────────────── log "1/11 Creating org $SLUG via /cp/admin/orgs..." -CREATE_RESP=$(admin_call POST /cp/admin/orgs \ - -d "{\"slug\":\"$SLUG\",\"name\":\"E2E $SLUG\",\"owner_user_id\":\"e2e-runner:$SLUG\"}") +# core#60: capture status + body explicitly with curl -w '%{http_code}' +# -o bodyfile inside a set +e block (mirror the pattern at lines +# 875-889 for the workspace-create call), so a 400/409 body is +# ALWAYS logged for diagnosis instead of being swallowed by +# CURL_COMMON's --fail-with-body + set -e aborting the script +# before the body-logging line runs. The pre-fix code path +# (admin_call POST ... bare in a $(...)) would propagate curl's +# nonzero exit through the command substitution under +# set -euo pipefail, aborting the whole harness with no body +# in the CI logs. +CREATE_BODYFILE="$(mktemp -t create-org-resp.XXXXXX)" +# core#60 trap-chain + exit-code preservation (RC #11654 #2, +# #11654 #3, #11673, #11674): the prior +# `trap 'rm -f "$CREATE_BODYFILE"' EXIT` overwrote the +# cleanup_org EXIT trap at line 330, leaking the staging +# org/resources if the bodyfile path succeeded and a later +# step failed. Worse, re-installing the previous trap +# during EXIT handling and then exiting with `(exit $ec)` +# does NOT actually invoke the re-installed trap body — a +# trap that fires during another trap's body does not chain. +# The fix: extract cleanup_org's command body via `trap -p +# EXIT`, then build a single EXIT trap that (a) captures +# the script's exit code FIRST into a file-scoped +# `__org_create_bodyfile_ec` (file-scoped via export so the +# trap-string evaluator can see it), (b) removes the +# bodyfile, (c) explicitly invokes the captured +# cleanup_org body inline (not as a re-registered trap), +# (d) propagates the original exit code to CI. The capture +# uses `trap -p EXIT` which prints the current trap in a +# form suitable for re-evaluation; the `sed` extracts the +# command body (the original trap was set with +# `trap cleanup_org EXIT INT TERM` so the captured string +# is just `cleanup_org`). +__org_create_bodyfile_ec="" +prev_exit_trap="$(trap -p EXIT | sed -E "s/^trap -- '//; s/'$ EXIT$//")" +trap '__org_create_bodyfile_ec=$?; rm -f "$CREATE_BODYFILE"; '"${prev_exit_trap}"'; exit "${__org_create_bodyfile_ec}"' EXIT +set +e +CREATE_HTTP_CODE=$(curl "${CURL_COMMON[@]}" -X POST "$CP_URL/cp/admin/orgs" \ + -H "Authorization: Bearer $ADMIN_TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"slug\":\"$SLUG\",\"name\":\"E2E $SLUG\",\"owner_user_id\":\"e2e-runner:$SLUG\"}" \ + -o "$CREATE_BODYFILE" \ + -w '%{http_code}') +CURL_RC=$? +set -e +CREATE_RESP="$(cat "$CREATE_BODYFILE")" # core#2782: log the full 409 response body on a collision so the # stale-slug-vs-fresh-slug diagnostic is queryable from CI logs. -# Pre-fix the JSON was piped to /dev/null (`python3 -m json.tool >/dev/null`) -# which silently swallowed the body — triage on the 2026-06-12 -# staging Platform Boot red had to guess whether the 409 was a -# slug collision or a different state-conflict. Logging the body -# makes future collisions instantly diagnosable. -CREATE_HTTP_CODE=$(echo "$CREATE_RESP" | head -c 1) -if [ -z "$CREATE_HTTP_CODE" ] || ! echo "$CREATE_RESP" | python3 -m json.tool >/dev/null 2>&1; then - log "❌ Org create failed; raw response body: $CREATE_RESP" +# Pre-#60 the JSON was piped to /dev/null (`python3 -m json.tool +# >/dev/null`) which silently swallowed the body — triage on the +# 2026-06-12 staging Platform Boot red had to guess whether the +# 409 was a slug collision or a different state-conflict. With +# the explicit -o bodyfile + -w '%{http_code}' above, the body +# is always on disk for logging regardless of HTTP status. +if [ "$CURL_RC" -ne 0 ] || [ "$CREATE_HTTP_CODE" -lt 200 ] || [ "$CREATE_HTTP_CODE" -ge 300 ]; then + log "❌ Org create failed (curl_rc=$CURL_RC http=$CREATE_HTTP_CODE slug_len=${#SLUG}); raw response body:" + log "--- BEGIN CREATE RESPONSE ---" + log "$CREATE_RESP" + log "--- END CREATE RESPONSE ---" + if [ "${#SLUG}" -gt 32 ]; then + fail "Org create returned non-2xx AND slug is ${#SLUG} chars (over the CP's 32-char cap). The slug helper's assertion should have caught this; check collision-proof-slug.sh's run_id_budget math." + fi + fail "Org create returned non-2xx (http=$CREATE_HTTP_CODE) — see body above. Common causes: 409=slug collision (a prior run left a stale org; the slug helper should prevent this — check E2E_RUN_ID propagation), 400=slug too long (should be caught by the 32-char cap assertion), 401=ADMIN_TOKEN not set or expired, 422=schema mismatch (check the -d payload matches the CP's expected shape)." +fi +if [ -z "$CREATE_RESP" ] || ! echo "$CREATE_RESP" | python3 -m json.tool >/dev/null 2>&1; then + log "❌ Org create returned non-JSON; raw body: $CREATE_RESP" fail "Org create returned non-JSON (see body above)" fi # Capture org_id for tenant-guard header on every subsequent tenant call. @@ -369,7 +431,7 @@ ORG_ID=$(echo "$CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys. log "❌ Org create response missing 'id'; raw body: $CREATE_RESP" fail "Org create response missing 'id' (see body above)" } -ok "Org created (id=$ORG_ID)" +ok "Org created (id=$ORG_ID http=$CREATE_HTTP_CODE slug_len=${#SLUG})" # ─── 2. Wait for tenant provisioning ──────────────────────────────────── log "2/11 Waiting for tenant provisioning (up to ${PROVISION_TIMEOUT_SECS}s)..."