From 52ff25ec99b89b0db054350e5fa5dd1de23bbb09 Mon Sep 17 00:00:00 2001 From: Molecule AI Core-BE Date: Tue, 12 May 2026 10:05:58 +0000 Subject: [PATCH] fix(tests/e2e): surface diagnose step Detail (subprocess stderr) in EIC smoke output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mc#687 / mc#424 root-cause finding: the smoke test was extracting only the Go error string (e.g. "exec: ... executable file not found") but ignoring the subprocess stderr captured in each diagnoseStep's Detail field. Detail carries the vendor-truth signal — e.g. "AccessDeniedException: ... is not authorized to perform: ec2-instance-connect:OpenTunnel" — which is exactly what was needed to root-cause the 21h mc#424 outage in 5 minutes instead of 21 hours. The fix extracts both error + detail from the first failing step and includes detail on its own line in the failure message. The shell conditional ${DIAG_DETAIL:+\n detail (subprocess stderr): $DIAG_DETAIL} only appends the extra line when detail is non-empty, keeping the output clean for simple errors. Co-Authored-By: Claude Opus 4.7 --- tests/e2e/test_staging_full_saas.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index 2fa6892d..902bb286 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -511,8 +511,14 @@ for wid in $WS_TO_CHECK; do ok " $wid terminal-reachable (canvas terminal will work)" else DIAG_FAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('first_failure','unknown'))" 2>/dev/null || echo "unknown") - DIAG_DETAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); s=[x for x in d.get('steps',[]) if not x.get('ok')]; print(s[0].get('error','') if s else '')" 2>/dev/null || echo "") - fail "Workspace $wid terminal diagnose failed at step '$DIAG_FAIL': $DIAG_DETAIL — check tenant SG has tcp/22 from EIC endpoint SG (sg-0785d5c6138220523), EIC_ENDPOINT_SG_ID set in Railway, and EIC endpoint health" + # Extract both error (Go error string) and detail (subprocess stderr — vendor truth). + # detail carries subprocess stderr for EIC/ssh/tunnel failures, which is the + # actionable signal (e.g. "AccessDeniedException: ... is not authorized to perform + # ec2-instance-connect:OpenTunnel"). mc#687 / mc#424 root-cause finding. + DIAG_ERR=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); s=[x for x in d.get('steps',[]) if not x.get('ok')]; print(s[0].get('error','') if s else '')" 2>/dev/null || echo "") + DIAG_DETAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); s=[x for x in d.get('steps',[]) if not x.get('ok')]; print(s[0].get('detail','') if s else '')" 2>/dev/null || echo "") + fail "Workspace $wid terminal diagnose failed at step '$DIAG_FAIL': $DIAG_ERR${DIAG_DETAIL:+ + detail (subprocess stderr): $DIAG_DETAIL} — check tenant SG has tcp/22 from EIC endpoint SG (sg-0785d5c6138220523), EIC_ENDPOINT_SG_ID set in Railway, and EIC endpoint health" fi done -- 2.45.2