From 8d61ffb68cb30407c54befc78cfcb19d408ce7e4 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Mon, 15 Jun 2026 14:43:25 +0000 Subject: [PATCH] test(e2e): narrowly scoped infra-skip for local-provision queued A2A (#2897 follow-up) Refine the advisory-lane A2A skip added in #2897 to match the #2922 staging-saas discipline: - Skip only verified transient A2A-layer signatures: * curl connect timeout (rc=28, http=000), * gateway-edge 502/503/504 responses that do NOT carry agent-origin text, * queued response with no queue_id, * queue poll timeout stuck in queued/dispatched/in_progress/empty. - Fail-closed on agent-origin signals (workspace agent unreachable, restarting, workspace agent busy, etc.) so real runtime regressions still surface on the advisory lane. - Hard-fail terminal queue statuses (failed/dropped) and unexpected queue statuses instead of skipping them. - Add a distinct-skip cap (>=2 reasons) so a broadly-broken run cannot false-green by accumulating different skip signatures. Relates #2897, #2917, #2922. --- .../e2e/test_local_provision_lifecycle_e2e.sh | 189 +++++++++++------- 1 file changed, 117 insertions(+), 72 deletions(-) diff --git a/tests/e2e/test_local_provision_lifecycle_e2e.sh b/tests/e2e/test_local_provision_lifecycle_e2e.sh index f4d7ee56b..8251efd80 100755 --- a/tests/e2e/test_local_provision_lifecycle_e2e.sh +++ b/tests/e2e/test_local_provision_lifecycle_e2e.sh @@ -196,60 +196,6 @@ check() { pass() { echo "PASS: $1"; PASS=$((PASS + 1)); } fail() { echo "FAIL: $1"; [ -n "${2:-}" ] && echo " $2"; FAIL=$((FAIL + 1)); } -# Advisory-lane infra-skip helper. When the A2A layer itself is degraded -# (queue never drains after a queued response), exit 0 with a scan_status line -# rather than false-red on an advisory job. Fail-closed on repeated skips is -# handled in the staging helper (#2917); this local-provision lane is advisory. -infra_skip() { - local reason="$1" detail="${2:-}" - echo "[$(date +%H:%M:%S)] ⚠️ scan_status: infra-skip:${reason}${detail:+ $detail}" - echo "=== Results: $PASS passed, $FAIL failed (infra-skip: $reason) ===" - exit 0 -} - -# Poll a queued A2A task until it completes, fails, or times out. -# Prints the response_body (JSON-RPC result envelope) on success; returns 1 on -# failure/timeout so the caller can decide to hard-fail or infra-skip. -poll_a2a_queue() { - local ws_id="$1" qid="$2" deadline="${3:-120}" - local resp="" qstatus="" result_body="" - local start elapsed - start=$(date +%s) - while true; do - elapsed=$(($(date +%s) - start)) - if [ "$elapsed" -ge "$deadline" ]; then - return 1 - fi - resp=$(curl -s --max-time 30 "$BASE/workspaces/$ws_id/a2a/queue/$qid" \ - -H "Content-Type: application/json" \ - -H "X-Workspace-ID: $ws_id" 2>/dev/null || echo "") - qstatus=$(echo "$resp" | python3 -c "import sys,json -try: - print(json.load(sys.stdin).get('status','')) -except Exception: - print('')" 2>/dev/null || echo "") - case "$qstatus" in - completed) - result_body=$(echo "$resp" | python3 -c "import sys,json -try: - rb=json.load(sys.stdin).get('response_body') - print(json.dumps(rb) if rb is not None else '') -except Exception: - print('')" 2>/dev/null || echo "") - if [ -n "$result_body" ]; then - printf '%s' "$result_body" - return 0 - fi - ;; - failed|dropped) - return 1 - ;; - esac - echo " queue poll for $qid status=$qstatus elapsed=${elapsed}s — backing off 2s" >&2 - sleep 2 - done -} - admin_curl() { local _a=(); e2e_admin_auth_args _a curl -s "${_a[@]+"${_a[@]}"}" "$@" @@ -668,6 +614,35 @@ else fi echo "" +# ---------------------------------------------------------------------------- +# Advisory-lane infra-skip helper (core#2917 follow-on). The mandatory stub +# lane must keep asserting the real proxy round-trip; only the advisory +# real-LLM lane may go green-with-skip when the platform A2A layer is under +# a known transient degradation (genuine gateway/queued-A2A signatures only). +INFRA_SKIP_REASONS="" +infra_skip_advisory() { + local reason="$1" + local detail="${2:-}" + if [ "$LIFECYCLE_LLM" != "minimax" ]; then + return + fi + # Cap distinct skip reasons: a broadly broken A2A layer that yields two + # different skip signatures in one run must not false-green the advisory lane. + case " $INFRA_SKIP_REASONS " in + *" $reason "*) ;; + *) INFRA_SKIP_REASONS="$INFRA_SKIP_REASONS $reason" ;; + esac + local distinct_count + distinct_count=$(echo "$INFRA_SKIP_REASONS" | wc -w | tr -d ' ') + if [ "$distinct_count" -ge 2 ]; then + fail "infra-skip cap exceeded ($distinct_count distinct reasons:${INFRA_SKIP_REASONS:-none}) — refusing false-green on repeated A2A-layer degradation" + return + fi + echo "[$(date +%H:%M:%S)] ⚠️ scan_status: infra-skip:${reason}${detail:+ $detail}" + echo "=== Results: advisory infra-skip (${reason}) ===" + exit 0 +} + # ---------------------------------------------------------------------------- # Step 5 — proxy reach (ws-:8000 Docker-DNS rewrite, end to end). # ---------------------------------------------------------------------------- @@ -694,38 +669,108 @@ print(json.dumps({'method':'message/send','params':{'message':{'role':'user','pa # slower than the stub; give the real-LLM call a longer ceiling. A2A_CEIL="$A2A_TIMEOUT" [ "$LIFECYCLE_LLM" = "minimax" ] && A2A_CEIL="${A2A_MINIMAX_TIMEOUT:-120}" -A2A=$(curl -s --max-time "$A2A_CEIL" -X POST "$BASE/workspaces/$WSID/a2a" \ + +# Capture both body and HTTP code so we can detect gateway/queued responses. +A2A_TMP=$(mktemp) +set +e +A2A_CODE=$(curl -s -o "$A2A_TMP" -w '%{http_code}' --max-time "$A2A_CEIL" \ + -X POST "$BASE/workspaces/$WSID/a2a" \ -H "Content-Type: application/json" \ -d "$A2A_BODY") +A2A_RC=$? +set -e +A2A=$(cat "$A2A_TMP" 2>/dev/null || echo "") +rm -f "$A2A_TMP" -# If the platform queued the A2A request, poll the durable queue result. -# core#2917-follow-on: staging-SaaS saw the same A2A-layer degradation where -# the initial POST returns 202-queued and the queue item never drains in time. -# The local-provision advisory lane should handle this without false-red. -A2A_QUEUED=$(echo "$A2A" | python3 -c "import sys,json +# Gateway/transport failure on the initial POST is an A2A-layer infra issue, +# not a local-provision code regression. Only skip the advisory lane. +# Fail-closed on agent-origin signals (workspace agent unreachable, restarting, +# etc.) — those can hide a real workspace-agent regression and must still FAIL. +if [ "$A2A_RC" -eq 28 ] && [ "$A2A_CODE" = "000" ]; then + infra_skip_advisory "a2a-connect-timeout" "curl_rc=$A2A_RC http=$A2A_CODE" +fi +if echo "$A2A_CODE" | grep -Eq '^(502|503|504)$'; then + if ! echo "$A2A" | grep -Eqi 'workspace agent unreachable|connection refused|workspace agent busy|native_session|restarting|restart triggered'; then + infra_skip_advisory "a2a-gateway-error" "curl_rc=$A2A_RC http=$A2A_CODE" + fi +fi + +# core#2917: the A2A proxy can return a 202-queued envelope instead of a +# synchronous result. Poll the durable queue result; if the queue never drains, +# infra-skip the advisory lane rather than falsely blaming local-provision code. +A2A_QUEUED=$(printf '%s' "$A2A" | python3 -c " +import sys,json try: d=json.load(sys.stdin) print('true' if d.get('queued') is True or (d.get('status') or '').lower() == 'queued' else 'false') except Exception: print('false')" 2>/dev/null || echo "false") if [ "$A2A_QUEUED" = "true" ]; then - QUEUE_ID=$(echo "$A2A" | python3 -c "import sys,json + A2A_QID=$(printf '%s' "$A2A" | python3 -c " +import sys,json try: print(json.load(sys.stdin).get('queue_id','')) except Exception: print('')" 2>/dev/null || echo "") - if [ -n "$QUEUE_ID" ]; then - echo " A2A queued (queue_id=$QUEUE_ID); polling durable result..." >&2 - if A2A_POLL=$(poll_a2a_queue "$WSID" "$QUEUE_ID" 120); then - A2A="$A2A_POLL" - else - infra_skip "a2a-queue-timeout" "queue_id=$QUEUE_ID on local-provision advisory lane" + if [ -z "$A2A_QID" ]; then + infra_skip_advisory "a2a-queued-no-queue-id" "initial POST was queued but returned no queue_id" + fi + echo " A2A queued (queue_id=$A2A_QID); polling durable result..." + A2A_POLL_TMP=$(mktemp) + A2A_LAST_STATUS="" + A2A_POLL_COUNT=0 + for poll_attempt in $(seq 1 30); do + : >"$A2A_POLL_TMP" + set +e + curl -s -o "$A2A_POLL_TMP" -w '%{http_code}' --max-time 30 \ + -H "X-Workspace-ID: $WSID" \ + "$BASE/workspaces/$WSID/a2a/queue/$A2A_QID" >/dev/null 2>&1 + set -e + A2A_POLL_RESP=$(cat "$A2A_POLL_TMP" 2>/dev/null || echo "") + A2A_POLL_STATUS=$(printf '%s' "$A2A_POLL_RESP" | python3 -c " +import sys,json +try: + print(json.load(sys.stdin).get('status','')) +except Exception: + print('')" 2>/dev/null || echo "") + A2A_LAST_STATUS="$A2A_POLL_STATUS" + A2A_POLL_COUNT=$poll_attempt + case "$A2A_POLL_STATUS" in + completed) + A2A=$(printf '%s' "$A2A_POLL_RESP" | python3 -c " +import sys,json +try: + rb=json.load(sys.stdin).get('response_body') + print(json.dumps(rb) if rb is not None else '') +except Exception: + print('')" 2>/dev/null || echo "") + if [ -n "$A2A" ]; then + break + fi + ;; + failed|dropped) + rm -f "$A2A_POLL_TMP" + fail "A2A queue item terminal status=$A2A_POLL_STATUS" "queue_id=$A2A_QID" + break + ;; + queued|dispatched|in_progress|"") + echo " queue poll $poll_attempt/30 status=$A2A_POLL_STATUS — backing off 2s" + sleep 2 + ;; + *) + rm -f "$A2A_POLL_TMP" + fail "A2A queue poll unexpected status=$A2A_POLL_STATUS" "queue_id=$A2A_QID" + break + ;; + esac + done + rm -f "$A2A_POLL_TMP" + if [ -z "$A2A" ]; then + if [ "$FAIL" -gt 0 ]; then + echo "=== Results: $PASS passed, $FAIL failed ===" + exit 1 fi - else - # push-async queued response with no queue_id exposed — we can't poll for - # a durable result, so treat it as A2A-layer infra degradation on the - # advisory lane rather than a product regression. - infra_skip "a2a-queued-no-queue-id" "push-async queued response on local-provision advisory lane" + infra_skip_advisory "a2a-queue-timeout" "queue_id=$A2A_QID poll_count=${A2A_POLL_COUNT}/30 last_status=${A2A_LAST_STATUS:-}" fi fi -- 2.52.0