forked from molecule-ai/molecule-core
fix(e2e-sanity): normalize unexpected curl exit codes in cleanup trap (#2159)
When E2E_INTENTIONAL_FAILURE=1 poisons the tenant token, step 5/11's
`tenant_call POST /workspaces` curl exits 22 (HTTP error under
--fail-with-body). `set -e` propagates rc=22 directly, but the
script's documented contract emits only {0,1,2,3,4}, and the sanity
workflow's case statement only matches those. rc=22 falls through
to "Unexpected rc — investigate harness" and opens a false-positive
priority-high "safety net broken" issue (#2159, weekly run on
2026-04-27).
The trap now captures $? at entry (must be the first statement
before any command clobbers it) and at the end normalizes any
non-contract code to 1 (generic failure). Leak detection continues
to exit 4 directly, so its semantics are preserved.
Adds tests/e2e/test_harness_rc_normalization.sh — a self-contained
regression test that builds a stub harness with the same trap
pattern, triggers controlled exit codes, and asserts the
normalization. Covers the 5 contracted codes + curl-22 (the bug) +
3 representative network-failure codes + sigsegv-139.
Verification:
- 10/10 regression tests pass
- shellcheck clean on both modified files
- production teardown path unchanged for legitimate {1,2,3,4}
failures and the leak-detection exit 4
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e99f937630
commit
99fb61bb8c
88
tests/e2e/test_harness_rc_normalization.sh
Executable file
88
tests/e2e/test_harness_rc_normalization.sh
Executable file
@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regression test for #2159: when test_staging_full_saas.sh exits via
|
||||
# `set -e` propagating an exit code outside the documented contract
|
||||
# {0,1,2,3,4}, the cleanup trap must normalize it to 1.
|
||||
#
|
||||
# Pre-fix: a poisoned-token curl in step 5/11 exited with curl rc=22
|
||||
# (HTTP error under --fail-with-body). The sanity workflow's case
|
||||
# statement only matched {0,1,4}, fell through to the "investigate
|
||||
# harness" branch, and opened a false-positive priority-high issue
|
||||
# (#2159, 2026-04-27).
|
||||
#
|
||||
# This test exercises the exact normalization pattern in isolation
|
||||
# so a future refactor that drops or weakens the pattern fails CI.
|
||||
set -uo pipefail # NOT -e — we want to inspect non-zero rc explicitly
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
# Build a stub harness with the same trap pattern as the production
|
||||
# script. Source it in a subshell, trigger an exit with a controlled
|
||||
# rc, and assert the observed final rc.
|
||||
run_stub() {
|
||||
local trigger_rc="$1"
|
||||
local stub
|
||||
stub=$(mktemp)
|
||||
cat > "$stub" <<EOF
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
CLEANUP_DONE=0
|
||||
cleanup_org() {
|
||||
local entry_rc=\$?
|
||||
if [ "\$CLEANUP_DONE" = "1" ]; then return 0; fi
|
||||
CLEANUP_DONE=1
|
||||
case "\$entry_rc" in
|
||||
0|1|2|3|4) ;;
|
||||
*) exit 1 ;;
|
||||
esac
|
||||
}
|
||||
trap cleanup_org EXIT INT TERM
|
||||
exit $trigger_rc
|
||||
EOF
|
||||
chmod +x "$stub"
|
||||
local observed
|
||||
bash "$stub"; observed=$?
|
||||
rm -f "$stub"
|
||||
echo "$observed"
|
||||
}
|
||||
|
||||
assert_rc() {
|
||||
local label="$1" trigger="$2" expected="$3"
|
||||
local observed
|
||||
observed=$(run_stub "$trigger")
|
||||
if [ "$observed" = "$expected" ]; then
|
||||
echo " ✓ $label: trigger=$trigger expected=$expected observed=$observed"
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
echo " ✗ $label: trigger=$trigger expected=$expected OBSERVED=$observed" >&2
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "Test: cleanup_org exit-code normalization"
|
||||
echo " Contract: only {0,1,2,3,4} pass through; anything else maps to 1"
|
||||
echo
|
||||
|
||||
# Contracted codes pass through unchanged.
|
||||
assert_rc "happy path" 0 0
|
||||
assert_rc "fail() generic" 1 1
|
||||
assert_rc "missing env" 2 2
|
||||
assert_rc "provisioning timeout" 3 3
|
||||
assert_rc "leak detected (cleanup exits 4)" 4 4
|
||||
|
||||
# The bug: rc=22 from curl --fail-with-body must normalize to 1.
|
||||
assert_rc "curl HTTP error (rc=22, the bug)" 22 1
|
||||
|
||||
# Other realistic curl failure codes (network, SSL, etc.) must also
|
||||
# normalize. Pinning a few representative values so a future regex
|
||||
# refactor that loses the wildcard is caught.
|
||||
assert_rc "curl couldn't resolve host (rc=6)" 6 1
|
||||
assert_rc "curl SSL error (rc=35)" 35 1
|
||||
assert_rc "curl operation timeout (rc=28)" 28 1
|
||||
|
||||
# Edge: very high rc (from a SIGSEGV-killed child or similar).
|
||||
assert_rc "high rc (139, sigsegv)" 139 1
|
||||
|
||||
echo
|
||||
echo "passed=$PASS failed=$FAIL"
|
||||
[ "$FAIL" = "0" ]
|
||||
@ -72,7 +72,12 @@ CURL_COMMON=(-sS --fail-with-body --max-time 30)
|
||||
# ─── cleanup trap ───────────────────────────────────────────────────────
|
||||
CLEANUP_DONE=0
|
||||
cleanup_org() {
|
||||
[ "$CLEANUP_DONE" = "1" ] && return 0
|
||||
# Capture upstream exit code IMMEDIATELY — must be the first statement
|
||||
# in the trap, before any command (including the CLEANUP_DONE check)
|
||||
# that would clobber $?.
|
||||
local entry_rc=$?
|
||||
|
||||
if [ "$CLEANUP_DONE" = "1" ]; then return 0; fi
|
||||
CLEANUP_DONE=1
|
||||
|
||||
if [ "${E2E_KEEP_ORG:-0}" = "1" ]; then
|
||||
@ -99,6 +104,20 @@ cleanup_org() {
|
||||
exit 4
|
||||
fi
|
||||
ok "Teardown clean — no orphan resources for $SLUG"
|
||||
|
||||
# Normalize unexpected upstream exit codes to 1 (generic failure). The
|
||||
# script's documented contract (header "Exit codes" section) only emits
|
||||
# {0, 1, 2, 3, 4}, but `set -e` propagates the raw exit code of the
|
||||
# failing command — e.g. curl exits 22 on HTTP error under
|
||||
# --fail-with-body. Without this normalization, the
|
||||
# E2E_INTENTIONAL_FAILURE sanity workflow (e2e-staging-sanity.yml)
|
||||
# gets rc=22 from the poisoned-token curl, falls through its
|
||||
# case statement, and opens a false-positive priority-high
|
||||
# "safety net broken" issue (#2159, 2026-04-27).
|
||||
case "$entry_rc" in
|
||||
0|1|2|3|4) ;; # contracted codes — let bash use entry_rc
|
||||
*) exit 1 ;; # anything else is a generic failure
|
||||
esac
|
||||
}
|
||||
trap cleanup_org EXIT INT TERM
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user