test(e2e): poll A2A queue + advisory infra-skip for local-provision (#2897 #2917) #2928

Closed
agent-dev-a wants to merge 2 commits from fix/local-provision-a2a-queue-poll into main
+105 -1
View File
@@ -614,6 +614,21 @@ else
fi
echo ""
# ----------------------------------------------------------------------------
# Advisory-lane infra-skip helper (core#2917 follow-on). The mandatory stub
# lane must keep asserting the real proxy round-trip; only the advisory
# real-LLM lane may go green-with-skip when the platform A2A layer is under
# a known transient degradation (queue never drains / gateway errors).
infra_skip_advisory() {
local reason="$1"
local detail="${2:-}"
if [ "$LIFECYCLE_LLM" = "minimax" ]; then
echo "[$(date +%H:%M:%S)] ⚠️ scan_status: infra-skip:${reason}${detail:+ $detail}"
echo "=== Results: advisory infra-skip (${reason}) ==="
exit 0
fi
}
# ----------------------------------------------------------------------------
# Step 5 — proxy reach (ws-<id>:8000 Docker-DNS rewrite, end to end).
# ----------------------------------------------------------------------------
@@ -640,9 +655,98 @@ print(json.dumps({'method':'message/send','params':{'message':{'role':'user','pa
# slower than the stub; give the real-LLM call a longer ceiling.
A2A_CEIL="$A2A_TIMEOUT"
[ "$LIFECYCLE_LLM" = "minimax" ] && A2A_CEIL="${A2A_MINIMAX_TIMEOUT:-120}"
A2A=$(curl -s --max-time "$A2A_CEIL" -X POST "$BASE/workspaces/$WSID/a2a" \
# Capture both body and HTTP code so we can detect gateway/queued responses.
A2A_TMP=$(mktemp)
set +e
A2A_CODE=$(curl -s -o "$A2A_TMP" -w '%{http_code}' --max-time "$A2A_CEIL" \
-X POST "$BASE/workspaces/$WSID/a2a" \
-H "Content-Type: application/json" \
-d "$A2A_BODY")
A2A_RC=$?
set -e
A2A=$(cat "$A2A_TMP" 2>/dev/null || echo "")
rm -f "$A2A_TMP"
# Gateway/transport failure on the initial POST is an A2A-layer infra issue,
# not a local-provision code regression. Only skip the advisory lane.
if [ "$A2A_RC" -ne 0 ] || [ "$A2A_CODE" = "000" ] || [[ "$A2A_CODE" == 5* ]]; then
infra_skip_advisory "a2a-gateway-error" "curl_rc=$A2A_RC http=$A2A_CODE"
fi
# core#2917: the A2A proxy can return a 202-queued envelope instead of a
# synchronous result. Poll the durable queue result; if the queue never drains,
# infra-skip the advisory lane rather than falsely blaming local-provision code.
A2A_QUEUED=$(printf '%s' "$A2A" | python3 -c "
import sys,json
try:
d=json.load(sys.stdin)
print('true' if d.get('queued') is True or (d.get('status') or '').lower() == 'queued' else 'false')
except Exception:
print('false')" 2>/dev/null || echo "false")
if [ "$A2A_QUEUED" = "true" ]; then
A2A_QID=$(printf '%s' "$A2A" | python3 -c "
import sys,json
try:
print(json.load(sys.stdin).get('queue_id',''))
except Exception:
print('')" 2>/dev/null || echo "")
if [ -z "$A2A_QID" ]; then
infra_skip_advisory "a2a-queued-no-queue-id" "initial POST was queued but returned no queue_id"
fi
echo " A2A queued (queue_id=$A2A_QID); polling durable result..."
A2A_POLL_TMP=$(mktemp)
A2A_LAST_STATUS=""
A2A_POLL_COUNT=0
for poll_attempt in $(seq 1 30); do
: >"$A2A_POLL_TMP"
set +e
curl -s -o "$A2A_POLL_TMP" -w '%{http_code}' --max-time 30 \
-H "X-Workspace-ID: $WSID" \
"$BASE/workspaces/$WSID/a2a/queue/$A2A_QID" >/dev/null 2>&1
set -e
A2A_POLL_RESP=$(cat "$A2A_POLL_TMP" 2>/dev/null || echo "")
A2A_POLL_STATUS=$(printf '%s' "$A2A_POLL_RESP" | python3 -c "
import sys,json
try:
print(json.load(sys.stdin).get('status',''))
except Exception:
print('')" 2>/dev/null || echo "")
A2A_LAST_STATUS="$A2A_POLL_STATUS"
A2A_POLL_COUNT=$poll_attempt
case "$A2A_POLL_STATUS" in
completed)
A2A=$(printf '%s' "$A2A_POLL_RESP" | python3 -c "
import sys,json
try:
rb=json.load(sys.stdin).get('response_body')
print(json.dumps(rb) if rb is not None else '')
except Exception:
print('')" 2>/dev/null || echo "")
if [ -n "$A2A" ]; then
break
fi
;;
failed|dropped)
rm -f "$A2A_POLL_TMP"
infra_skip_advisory "a2a-queue-terminal" "queue_id=$A2A_QID status=$A2A_POLL_STATUS"
;;
queued|dispatched|in_progress|"")
echo " queue poll $poll_attempt/30 status=$A2A_POLL_STATUS — backing off 2s"
sleep 2
;;
*)
rm -f "$A2A_POLL_TMP"
infra_skip_advisory "a2a-queue-unexpected" "queue_id=$A2A_QID status=$A2A_POLL_STATUS"
;;
esac
done
rm -f "$A2A_POLL_TMP"
if [ -z "$A2A" ]; then
infra_skip_advisory "a2a-queue-timeout" "queue_id=$A2A_QID poll_count=${A2A_POLL_COUNT}/30 last_status=${A2A_LAST_STATUS:-<empty>}"
fi
fi
# Extract the assistant text part once (shared by the minimax assertion +
# diagnostics). Tolerates result.parts[].text and result.message.parts[].text.
a2a_text() {