harden(e2e): staging-saas lifecycle fail-closed + E2E_REQUIRE_LIVE guard #2278
@@ -364,6 +364,14 @@ jobs:
|
||||
# check missed. If a refactor weakens the gate to a shape check,
|
||||
# this step goes red on every PR.
|
||||
bash tests/e2e/test_completion_assert_unit.sh
|
||||
# harden/e2e-staging-saas-failclosed: fail-direction proof for the
|
||||
# E2E_REQUIRE_LIVE fail-closed-on-skip guard in
|
||||
# test_staging_full_saas.sh. Offline (no LLM/network/provisioning):
|
||||
# asserts the guard exits 5 when a live lifecycle did NOT run and
|
||||
# passes when all milestones fired — so a refactor that lets the
|
||||
# staging gate report green without a real provision→online→A2A
|
||||
# cycle goes red on every PR.
|
||||
bash tests/e2e/test_require_live_guard_unit.sh
|
||||
|
||||
- if: ${{ needs.changes.outputs.scripts == 'true' }}
|
||||
name: Test ECR promote-tenant-image script (mock-driven, no live infra)
|
||||
|
||||
@@ -175,6 +175,11 @@ jobs:
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2' }}
|
||||
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
# Fail-closed-on-skip: in CI the harness MUST prove ≥1 full
|
||||
# provision→online→A2A cycle. If it reaches the end having validated
|
||||
# nothing (a future short-circuit / skip path), it exits 5 rather than
|
||||
# reporting a false green. Mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE.
|
||||
E2E_REQUIRE_LIVE: '1'
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -372,6 +377,10 @@ jobs:
|
||||
E2E_MODE: smoke
|
||||
E2E_RUN_ID: "platform-${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
# Fail-closed-on-skip (see BYOK job). smoke mode still runs steps 2/4/7/8b,
|
||||
# so all four required milestones (provisioned/tenant_online/
|
||||
# workspace_online/a2a_roundtrip) fire — the guard is valid for this lane too.
|
||||
E2E_REQUIRE_LIVE: '1'
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
Executable
+124
@@ -0,0 +1,124 @@
|
||||
#!/usr/bin/env bash
|
||||
# Fail-direction / load-bearing proof for the E2E_REQUIRE_LIVE
|
||||
# fail-closed-on-skip guard in test_staging_full_saas.sh.
|
||||
#
|
||||
# WHY (harden/e2e-staging-saas-failclosed): the staging SaaS E2E is being
|
||||
# hardened to become a HARD merge-gate. A gate that can reach its final `ok`
|
||||
# WITHOUT having actually exercised a provision→online→A2A cycle is a
|
||||
# false-green — it would let a refactor that short-circuits the lifecycle
|
||||
# (or a skip path that swallows it) report PASS. require_live_or_die() is the
|
||||
# guard; this test proves it FAILS (exit 5) when milestones are missing and
|
||||
# PASSES when all fired — the watch-it-fail counterpart the dev-SOP requires.
|
||||
#
|
||||
# Runs entirely offline (no LLM, no network, no provisioning) — pure shell
|
||||
# logic — so it can run on every PR in the fast lane and locally via `bash`.
|
||||
set -uo pipefail
|
||||
|
||||
# Scratch dir for the generated guard-runner stubs. EXIT trap guarantees
|
||||
# cleanup even when an assertion exits the test non-zero (lint_cleanup_traps).
|
||||
TMPDIR_E2E=$(mktemp -d -t require-live-guard-XXXXXX)
|
||||
trap 'rm -rf "$TMPDIR_E2E"' EXIT INT TERM
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
# Reproduce the EXACT guard logic from test_staging_full_saas.sh. Kept in
|
||||
# lockstep with the host script: if the host logic changes, this test must
|
||||
# change with it (and a divergence is itself a signal to re-prove the gate).
|
||||
make_guard_runner() {
|
||||
cat <<'EOF'
|
||||
REQUIRE_LIVE="${E2E_REQUIRE_LIVE:-0}"
|
||||
LIVE_MILESTONES=""
|
||||
live_milestone() {
|
||||
case " $LIVE_MILESTONES " in
|
||||
*" $1 "*) ;;
|
||||
*) LIVE_MILESTONES="$LIVE_MILESTONES $1" ;;
|
||||
esac
|
||||
}
|
||||
require_live_or_die() {
|
||||
[ "$REQUIRE_LIVE" = "1" ] || return 0
|
||||
local required="provisioned tenant_online workspace_online a2a_roundtrip"
|
||||
local m missing=""
|
||||
for m in $required; do
|
||||
case " $LIVE_MILESTONES " in
|
||||
*" $m "*) ;;
|
||||
*) missing="$missing $m" ;;
|
||||
esac
|
||||
done
|
||||
if [ -n "$missing" ]; then
|
||||
echo "MISSING:${missing}" >&2
|
||||
exit 5
|
||||
fi
|
||||
}
|
||||
EOF
|
||||
}
|
||||
|
||||
# run_case <E2E_REQUIRE_LIVE value> <space-separated milestones to stamp>
|
||||
# echoes the observed exit code.
|
||||
run_case() {
|
||||
local require_live="$1"; shift
|
||||
local milestones="$1"; shift || true
|
||||
local stub observed m
|
||||
stub=$(mktemp "$TMPDIR_E2E/stub.XXXXXX")
|
||||
{
|
||||
echo "#!/usr/bin/env bash"
|
||||
echo "set -uo pipefail"
|
||||
make_guard_runner
|
||||
for m in $milestones; do
|
||||
echo "live_milestone $m"
|
||||
done
|
||||
echo "require_live_or_die"
|
||||
echo 'echo REACHED_END'
|
||||
} > "$stub"
|
||||
E2E_REQUIRE_LIVE="$require_live" bash "$stub" >/dev/null 2>&1
|
||||
observed=$?
|
||||
rm -f "$stub"
|
||||
echo "$observed"
|
||||
}
|
||||
|
||||
assert_rc() {
|
||||
local label="$1" require_live="$2" milestones="$3" expected="$4"
|
||||
local observed
|
||||
observed=$(run_case "$require_live" "$milestones")
|
||||
if [ "$observed" = "$expected" ]; then
|
||||
echo " ✓ $label: REQUIRE_LIVE=$require_live milestones='$milestones' → rc=$observed"
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
echo " ✗ $label: REQUIRE_LIVE=$require_live milestones='$milestones' expected=$expected OBSERVED=$observed" >&2
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== E2E_REQUIRE_LIVE fail-closed-on-skip guard proof ==="
|
||||
echo
|
||||
|
||||
# DECISIVE (false-green trap): REQUIRE_LIVE=1 but NO lifecycle ran → exit 5.
|
||||
assert_rc "require-live, nothing ran → exit 5 (the false-green trap)" \
|
||||
1 "" 5
|
||||
|
||||
# REQUIRE_LIVE=1 with a partial lifecycle (provisioned but no A2A) → exit 5.
|
||||
assert_rc "require-live, partial lifecycle → exit 5" \
|
||||
1 "provisioned tenant_online workspace_online" 5
|
||||
|
||||
# REQUIRE_LIVE=1 with every required milestone → pass (rc=0).
|
||||
assert_rc "require-live, full lifecycle → pass" \
|
||||
1 "provisioned tenant_online workspace_online a2a_roundtrip" 0
|
||||
|
||||
# Idempotency: duplicate stamps don't break membership; full set still passes.
|
||||
assert_rc "require-live, duplicate stamps still pass" \
|
||||
1 "provisioned provisioned tenant_online workspace_online a2a_roundtrip a2a_roundtrip" 0
|
||||
|
||||
# Guard is a no-op when CI did not demand a live run: a non-live local run
|
||||
# with nothing stamped must NOT exit 5 (we don't break local/debug runs).
|
||||
assert_rc "no require-live, nothing ran → pass (guard is opt-in)" \
|
||||
0 "" 0
|
||||
assert_rc "require-live unset-equivalent (0), partial → pass" \
|
||||
0 "provisioned" 0
|
||||
|
||||
# Extra unknown milestone is harmless as long as required set is present.
|
||||
assert_rc "require-live, extra milestone tolerated" \
|
||||
1 "provisioned tenant_online workspace_online a2a_roundtrip extra_thing" 0
|
||||
|
||||
echo
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -47,6 +47,15 @@
|
||||
# tear down cleanly (and exit 4 on leak).
|
||||
# Used by a dedicated sanity workflow
|
||||
# that verifies the safety net.
|
||||
# E2E_REQUIRE_LIVE 1 → fail-closed-on-skip guard (CI sets this).
|
||||
# When set, the run MUST actually complete
|
||||
# ≥1 full provision→online→A2A cycle. A run
|
||||
# that reaches the end without having proven
|
||||
# a real round-trip (e.g. a future refactor
|
||||
# short-circuits a stage, or a skip path
|
||||
# swallows the lifecycle) exits 5 rather than
|
||||
# reporting a false green. Mirrors CP
|
||||
# serving-e2e's SERVING_E2E_REQUIRE_LIVE.
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 happy path
|
||||
@@ -54,6 +63,37 @@
|
||||
# 2 missing required env
|
||||
# 3 provisioning timed out
|
||||
# 4 teardown left orphan resources
|
||||
# 5 E2E_REQUIRE_LIVE set but the run validated no real lifecycle (no
|
||||
# false-green-on-skip)
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# PROMOTION-READINESS (harden/e2e-staging-saas-failclosed):
|
||||
# This harness is being hardened so `E2E Staging SaaS` + `E2E Staging
|
||||
# Platform Boot` can become HARD merge-gates. continue-on-error is NOT
|
||||
# flipped here — that promotion is the CTO's irreversible branch-protection
|
||||
# call. What this branch makes fail-closed (was false-green / un-named
|
||||
# flake before):
|
||||
# • Provision/online waits are bounded readiness-POLLS, not fixed sleeps;
|
||||
# each hard-fails with a named mechanism + last-seen signal on deadline,
|
||||
# never a silent timeout (cp#245 boot-timeout class).
|
||||
# • Peer-discovery (9b) asserts a real 2xx, not just "not 404" — a 5xx /
|
||||
# 000 / empty no longer reads as "reachable".
|
||||
# • Activity-log (9b) is ASSERTED reachable (2xx + parseable), not
|
||||
# logged-and-ignored behind `|| echo '[]'`.
|
||||
# • Child activity provenance (10) is asserted (was soft-logged).
|
||||
# • E2E_REQUIRE_LIVE=1 (CI) makes the run exit 5 if it reached the end
|
||||
# without proving a real provision→online→A2A round-trip — no
|
||||
# false-green-on-skip.
|
||||
# STILL BLOCKS making it REQUIRED (must clear before the CTO flips
|
||||
# continue-on-error→false in .gitea/workflows/e2e-staging-saas.yml):
|
||||
# • De-flake window: N consecutive green runs on main for BOTH jobs
|
||||
# (platform-boot shares the cp#245 boot surface — #2187 tracks its
|
||||
# flip). This harness removes the harness-side flake mechanisms; the
|
||||
# remaining surface is real-infra (EC2 cold boot, CF DNS) latency,
|
||||
# already bounded by the readiness polls above.
|
||||
# • Branch-protection required-context wiring is a repo-settings change,
|
||||
# not a code change in this PR.
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
@@ -90,6 +130,41 @@ log() { echo "[$(date +%H:%M:%S)] $*"; }
|
||||
fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; }
|
||||
ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; }
|
||||
|
||||
# ─── fail-closed-on-skip live-lifecycle guard ───────────────────────────
|
||||
# E2E_REQUIRE_LIVE=1 (set by CI) asserts this run ACTUALLY exercised a full
|
||||
# provision→online→A2A cycle. Each load-bearing lifecycle stage stamps a
|
||||
# milestone via live_milestone(); at the very end, require_live_or_die()
|
||||
# checks every required milestone fired. Mechanism: without this, a future
|
||||
# refactor that short-circuits a stage — or a skip/early-return path that
|
||||
# swallows the lifecycle — would let the script reach its final `ok` and
|
||||
# report GREEN having validated nothing. Mirrors CP serving-e2e's
|
||||
# SERVING_E2E_REQUIRE_LIVE (skip-if-absent must be LOUD, never silent green).
|
||||
REQUIRE_LIVE="${E2E_REQUIRE_LIVE:-0}"
|
||||
LIVE_MILESTONES=""
|
||||
live_milestone() {
|
||||
# Idempotent set-membership append. Space-delimited; names are tokens.
|
||||
case " $LIVE_MILESTONES " in
|
||||
*" $1 "*) ;;
|
||||
*) LIVE_MILESTONES="$LIVE_MILESTONES $1" ;;
|
||||
esac
|
||||
}
|
||||
require_live_or_die() {
|
||||
# No-op unless CI demanded a live run.
|
||||
[ "$REQUIRE_LIVE" = "1" ] || return 0
|
||||
local required="provisioned tenant_online workspace_online a2a_roundtrip"
|
||||
local m missing=""
|
||||
for m in $required; do
|
||||
case " $LIVE_MILESTONES " in
|
||||
*" $m "*) ;;
|
||||
*) missing="$missing $m" ;;
|
||||
esac
|
||||
done
|
||||
if [ -n "$missing" ]; then
|
||||
echo "[$(date +%H:%M:%S)] ❌ E2E_REQUIRE_LIVE=1 but the run did NOT prove a full live lifecycle — missing milestone(s):${missing}. Reached:${LIVE_MILESTONES:-<none>}. This is a false-green-on-skip guard: a run that validates no real provision→online→A2A cycle MUST NOT report green." >&2
|
||||
exit 5
|
||||
fi
|
||||
}
|
||||
|
||||
# Per-runtime model slug dispatch — see lib/model_slug.sh for the rationale.
|
||||
# Extracted so unit tests (tests/e2e/test_model_slug.sh) can pin every branch
|
||||
# without booting the full 11-step lifecycle.
|
||||
@@ -197,7 +272,7 @@ cleanup_org() {
|
||||
# case statement, and opens a false-positive priority-high
|
||||
# "safety net broken" issue (#2159, 2026-04-27).
|
||||
case "$entry_rc" in
|
||||
0|1|2|3|4) ;; # contracted codes — let bash use entry_rc
|
||||
0|1|2|3|4|5) ;; # contracted codes — let bash use entry_rc
|
||||
*) exit 1 ;; # anything else is a generic failure
|
||||
esac
|
||||
}
|
||||
@@ -295,6 +370,7 @@ print('(no org row found for slug=$SLUG — DB drift?)')
|
||||
esac
|
||||
done
|
||||
ok "Tenant provisioning complete"
|
||||
live_milestone provisioned
|
||||
|
||||
# Derive tenant domain from CP hostname so the same harness works in
|
||||
# both prod (api.moleculesai.app → moleculesai.app) and staging
|
||||
@@ -351,6 +427,7 @@ while true; do
|
||||
sleep 5
|
||||
done
|
||||
ok "Tenant reachable at $TENANT_URL"
|
||||
live_milestone tenant_online
|
||||
|
||||
# Sanity-test path: once the tenant is provisioned, poisoning the
|
||||
# tenant token proves the EXIT trap + leak assertion still fire.
|
||||
@@ -570,6 +647,7 @@ fi
|
||||
WS_TO_CHECK=("$PARENT_ID")
|
||||
[ -n "$CHILD_ID" ] && WS_TO_CHECK+=("$CHILD_ID")
|
||||
wait_workspaces_online_routable "7/11 Waiting for workspace(s) to reach status=online (up to $((WORKSPACE_ONLINE_TIMEOUT_SECS/60)) min — hermes cold boot)..." "${WS_TO_CHECK[@]}"
|
||||
live_milestone workspace_online
|
||||
|
||||
# ─── 7a. Real chat image upload/download round-trip ───────────────────
|
||||
# This deliberately uses the production workflow: tenant admin/session auth
|
||||
@@ -974,6 +1052,11 @@ except Exception:
|
||||
" 2>/dev/null || echo "")
|
||||
# CORE GATE: contains PINEAPPLE (real round-trip) AND no error-as-text.
|
||||
a2a_assert_real_completion "$KA_TEXT" "PINEAPPLE" "A2A known-answer (parent, $RUNTIME/$MODEL_SLUG)"
|
||||
# Real, deterministic LLM round-trip proven — the load-bearing milestone for
|
||||
# the fail-closed-on-skip guard. Stamped AFTER a2a_assert_real_completion (not
|
||||
# after the looser PONG check) so the milestone means a verified completion,
|
||||
# not just a 2xx-with-text.
|
||||
live_milestone a2a_roundtrip
|
||||
|
||||
# ─── 8c. byok-routing regression guard (#1994) ─────────────────────────
|
||||
# The parent was provisioned with the customer's OWN vendor key
|
||||
@@ -1099,18 +1182,50 @@ print(json.dumps({
|
||||
ok "HMA memory write+read roundtripped"
|
||||
|
||||
log "9b. Peer discovery + activity log smoke..."
|
||||
# FAIL-CLOSED: assert a real 2xx, not merely "not 404". The previous
|
||||
# `[ "$PEERS_CODE" = "404" ] && fail` only caught the route-missing case —
|
||||
# a 5xx, 000 (connection failure), or empty capture ALL fell through to
|
||||
# "reachable" (false-green: a broken-but-present route read as healthy).
|
||||
# Mechanism: route the http_code into its own tempfile (no stderr capture,
|
||||
# which the old `2>&1 | head -1` could pollute with a curl error line) and
|
||||
# require 2xx explicitly.
|
||||
PEERS_TMP=$(e2e_tmp /tmp/e2e_peers.XXXXXX)
|
||||
set +e
|
||||
tenant_call GET "/registry/$PARENT_ID/peers" -o /dev/null -w "%{http_code}\n" 2>&1 | head -1 > /tmp/peers_code.txt
|
||||
PEERS_CODE=$(tenant_call GET "/registry/$PARENT_ID/peers" \
|
||||
-o "$PEERS_TMP" -w "%{http_code}" 2>/dev/null)
|
||||
PEERS_RC=$?
|
||||
set -e
|
||||
PEERS_CODE=$(cat /tmp/peers_code.txt)
|
||||
[ "$PEERS_CODE" = "404" ] && fail "Peers endpoint missing (404) — route regression"
|
||||
PEERS_CODE=${PEERS_CODE:-000}
|
||||
if [ "$PEERS_CODE" = "404" ]; then
|
||||
fail "Peers endpoint missing (404) — route regression. /registry/$PARENT_ID/peers"
|
||||
fi
|
||||
if [ "$PEERS_RC" != "0" ] || [ "$PEERS_CODE" -lt 200 ] || [ "$PEERS_CODE" -ge 300 ]; then
|
||||
fail "Peers endpoint unhealthy (curl_rc=$PEERS_RC, http=$PEERS_CODE) — not a clean 2xx, so 'reachable' would be a false-green. Body: $(head -c 200 "$PEERS_TMP" 2>/dev/null | sanitize_http_body)"
|
||||
fi
|
||||
ok "Peers endpoint reachable (HTTP $PEERS_CODE)"
|
||||
|
||||
ACTIVITY=$(tenant_call GET "/activity?workspace_id=$PARENT_ID&limit=5" 2>/dev/null || echo '[]')
|
||||
ACTIVITY_COUNT=$(echo "$ACTIVITY" | python3 -c "import json,sys
|
||||
d=json.load(sys.stdin)
|
||||
print(len(d if isinstance(d, list) else d.get('events', [])))" 2>/dev/null || echo 0)
|
||||
log " Activity events observed: $ACTIVITY_COUNT"
|
||||
# FAIL-CLOSED: the activity-log read was `|| echo '[]'` then the count was
|
||||
# only LOGGED, never asserted — a 5xx / network failure silently became an
|
||||
# empty list and the step exited 0 having validated nothing (false-green:
|
||||
# "validated nothing" class). Assert the endpoint returns a 2xx and a
|
||||
# parseable activity shape. We do NOT assert count>0 (the parent may
|
||||
# legitimately have 0 events this early — that's a real, valid state), but
|
||||
# we DO require the call to have actually succeeded and returned valid JSON.
|
||||
ACTIVITY_TMP=$(e2e_tmp /tmp/e2e_activity.XXXXXX)
|
||||
set +e
|
||||
ACTIVITY_CODE=$(tenant_call GET "/activity?workspace_id=$PARENT_ID&limit=5" \
|
||||
-o "$ACTIVITY_TMP" -w "%{http_code}" 2>/dev/null)
|
||||
ACTIVITY_RC=$?
|
||||
set -e
|
||||
ACTIVITY_CODE=${ACTIVITY_CODE:-000}
|
||||
if [ "$ACTIVITY_RC" != "0" ] || [ "$ACTIVITY_CODE" -lt 200 ] || [ "$ACTIVITY_CODE" -ge 300 ]; then
|
||||
fail "Activity-log endpoint unhealthy (curl_rc=$ACTIVITY_RC, http=$ACTIVITY_CODE) — was previously swallowed by '|| echo []' and reported as 0 events (false-green). Body: $(head -c 200 "$ACTIVITY_TMP" 2>/dev/null | sanitize_http_body)"
|
||||
fi
|
||||
ACTIVITY_COUNT=$(python3 -c "import json,sys
|
||||
d=json.load(open(sys.argv[1]))
|
||||
print(len(d if isinstance(d, list) else d.get('events', [])))" "$ACTIVITY_TMP" 2>/dev/null) \
|
||||
|| fail "Activity-log returned HTTP $ACTIVITY_CODE but body was not parseable JSON (events array / {events:[...]}). Body: $(head -c 200 "$ACTIVITY_TMP" 2>/dev/null | sanitize_http_body)"
|
||||
log " Activity events observed: $ACTIVITY_COUNT (endpoint 2xx + parseable ✓)"
|
||||
|
||||
# ─── 9c. Workspace KV memory Edit round-trip ─────────────────────────
|
||||
# Pins the Edit affordance added to the canvas Memory tab. The UI calls
|
||||
@@ -1261,14 +1376,44 @@ except Exception:
|
||||
[ -z "$DELEG_TEXT" ] && fail "Delegation returned no text. Raw: ${DELEG_RESP:0:200}"
|
||||
ok "Delegation proxy works (child responded: \"${DELEG_TEXT:0:60}\")"
|
||||
|
||||
CHILD_ACT=$(tenant_call GET "/activity?workspace_id=$CHILD_ID&limit=20" 2>/dev/null || echo '[]')
|
||||
if echo "$CHILD_ACT" | grep -q "$PARENT_ID"; then
|
||||
# FAIL-CLOSED via bounded readiness-POLL (was soft-logged false-green).
|
||||
# The activity pipeline is async, so an immediate single read can miss the
|
||||
# parent reference — but "did not reference parent" was previously just
|
||||
# LOGGED and the step passed regardless, so a genuinely broken provenance
|
||||
# pipeline (parent never recorded as source) read as success. Mechanism:
|
||||
# poll the child activity log for the parent id for a bounded window
|
||||
# (E2E_CHILD_ACTIVITY_TIMEOUT_SECS, default 60s) — this is the real
|
||||
# readiness signal (provenance row materialised), not a fixed sleep — and
|
||||
# hard-fail with a named mechanism if it never appears.
|
||||
CHILD_ACT_DEADLINE=$(( $(date +%s) + ${E2E_CHILD_ACTIVITY_TIMEOUT_SECS:-60} ))
|
||||
CHILD_ACT_SEEN=0
|
||||
CHILD_ACT_LASTCODE="000"
|
||||
while true; do
|
||||
CHILD_ACT_TMP=$(e2e_tmp /tmp/e2e_child_act.XXXXXX)
|
||||
set +e
|
||||
CHILD_ACT_CODE=$(tenant_call GET "/activity?workspace_id=$CHILD_ID&limit=20" \
|
||||
-o "$CHILD_ACT_TMP" -w "%{http_code}" 2>/dev/null)
|
||||
set -e
|
||||
CHILD_ACT_LASTCODE=${CHILD_ACT_CODE:-000}
|
||||
if grep -q "$PARENT_ID" "$CHILD_ACT_TMP" 2>/dev/null; then
|
||||
CHILD_ACT_SEEN=1
|
||||
break
|
||||
fi
|
||||
[ "$(date +%s)" -ge "$CHILD_ACT_DEADLINE" ] && break
|
||||
sleep 5
|
||||
done
|
||||
if [ "$CHILD_ACT_SEEN" = "1" ]; then
|
||||
ok "Child activity log records parent as source"
|
||||
else
|
||||
log "Child activity log did not reference parent (pipeline may be async)"
|
||||
fail "Child activity log never referenced parent $PARENT_ID within ${E2E_CHILD_ACTIVITY_TIMEOUT_SECS:-60}s (last http=$CHILD_ACT_LASTCODE) — delegation-provenance pipeline regression (parent not recorded as source). Previously soft-logged → false-green."
|
||||
fi
|
||||
fi
|
||||
|
||||
# ─── 11. Teardown runs via trap ────────────────────────────────────────
|
||||
# Fail-closed-on-skip: before declaring PASS, assert (when CI demanded a live
|
||||
# run) that every load-bearing lifecycle milestone actually fired. A run that
|
||||
# reaches here without provision→online→A2A having truly happened exits 5
|
||||
# instead of reporting green. Teardown still runs (EXIT trap) on that exit.
|
||||
require_live_or_die
|
||||
log "11/11 All checks passed. Teardown runs via EXIT trap."
|
||||
ok "═══ STAGING $MODE-SAAS E2E PASSED ═══"
|
||||
|
||||
Reference in New Issue
Block a user