fix(e2e): wait for routable workspace before A2A #1666
@@ -19,11 +19,18 @@
|
||||
# PR #2558+#2563+#2567 cleared the
|
||||
# masking layers.)
|
||||
#
|
||||
# claude-code → "sonnet" (entry-id form: claude-code template's
|
||||
# config.yaml uses bare model names,
|
||||
# auth comes via CLAUDE_CODE_OAUTH_TOKEN
|
||||
# or ANTHROPIC_API_KEY rather than the
|
||||
# slug.)
|
||||
# claude-code → auth-aware:
|
||||
# E2E_MINIMAX_API_KEY → "MiniMax-M2"
|
||||
# E2E_ANTHROPIC_API_KEY → "claude-sonnet-4-6"
|
||||
# otherwise → "sonnet"
|
||||
#
|
||||
# claude-code provider routing is model-driven. The bare
|
||||
# "sonnet" alias selects the OAuth provider, so it is only a
|
||||
# good default when the canary is using Claude Code OAuth or
|
||||
# intentionally exercising the missing-auth path. MiniMax and
|
||||
# direct Anthropic API keys need model IDs that resolve to
|
||||
# their provider entries, otherwise the workspace boots
|
||||
# reachable but the first A2A call hits the wrong auth path.
|
||||
#
|
||||
# When E2E_MODEL_SLUG is set, it overrides this dispatch — useful when an
|
||||
# operator dispatches the workflow to test a specific slug.
|
||||
@@ -45,7 +52,15 @@ pick_model_slug() {
|
||||
case "$runtime" in
|
||||
hermes) printf 'openai/gpt-4o' ;;
|
||||
langgraph) printf 'openai:gpt-4o' ;;
|
||||
claude-code) printf 'sonnet' ;;
|
||||
claude-code)
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
printf 'MiniMax-M2'
|
||||
elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
printf 'claude-sonnet-4-6'
|
||||
else
|
||||
printf 'sonnet'
|
||||
fi
|
||||
;;
|
||||
*) printf 'openai/gpt-4o' ;; # safest fallback (matches hermes)
|
||||
esac
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ set -uo pipefail
|
||||
# Resolve to the lib relative to this test file so the test runs from
|
||||
# any cwd (CI, local invocation, repo root).
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=lib/model_slug.sh
|
||||
# shellcheck source=tests/e2e/lib/model_slug.sh
|
||||
source "$SCRIPT_DIR/lib/model_slug.sh"
|
||||
|
||||
PASS=0
|
||||
@@ -48,7 +48,16 @@ echo
|
||||
# ── Per-runtime branches (the load-bearing ones for synth-E2E) ──
|
||||
run_test "hermes → slash-form (derive-provider.sh contract)" hermes "openai/gpt-4o"
|
||||
run_test "langgraph → colon-form (init_chat_model contract)" langgraph "openai:gpt-4o"
|
||||
run_test "claude-code → bare model name (entry-id form)" claude-code "sonnet"
|
||||
run_test "claude-code → OAuth/default alias" claude-code "sonnet"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug claude-code)
|
||||
assert_eq "claude-code + MiniMax key → MiniMax model" "$got" "MiniMax-M2"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug claude-code)
|
||||
assert_eq "claude-code + Anthropic API key → Anthropic API model" "$got" "claude-sonnet-4-6"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG; E2E_MINIMAX_API_KEY="mx-priority" E2E_ANTHROPIC_API_KEY="sk-ant-loser" pick_model_slug claude-code)
|
||||
assert_eq "claude-code + both keys → MiniMax priority" "$got" "MiniMax-M2"
|
||||
|
||||
# ── Fallback for unknown runtime ──
|
||||
# Picks slash-form (hermes-shaped) since hermes is the historical
|
||||
|
||||
@@ -350,6 +350,17 @@ tenant_call() {
|
||||
"$@"
|
||||
}
|
||||
|
||||
sanitize_http_body() {
|
||||
python3 -c '
|
||||
import re, sys
|
||||
s = sys.stdin.read()
|
||||
s = re.sub(r"(?i)(Authorization:\s*Bearer\s+)[A-Za-z0-9._~+/=-]+", r"\1[redacted]", s)
|
||||
s = re.sub(r"(?i)(\"(?:auth_token|access_token|refresh_token|token|api_key|secret|password)\"\s*:\s*\")[^\"]+\"", r"\1[redacted]\"", s)
|
||||
s = re.sub(r"(?i)((?:auth_token|access_token|refresh_token|api_key|secret|password)=)[^&\s]+", r"\1[redacted]", s)
|
||||
print(s[:4000])
|
||||
'
|
||||
}
|
||||
|
||||
# ─── 5. Provision parent workspace ─────────────────────────────────────
|
||||
# Inject the LLM provider key so the runtime can authenticate at boot.
|
||||
# Branch by which secret is set so the script supports multiple paths
|
||||
@@ -402,9 +413,9 @@ elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
# is still independent of MOLECULE_STAGING_OPENAI_API_KEY, so an OpenAI
|
||||
# quota collapse doesn't wedge this path. Pinned to the claude-code
|
||||
# runtime: hermes/langgraph use OpenAI-shaped envs and won't honour
|
||||
# ANTHROPIC_API_KEY without further wiring (out of scope for this
|
||||
# branch; if you need a hermes/Anthropic path, dispatch with
|
||||
# E2E_RUNTIME=hermes + E2E_OPENAI_API_KEY pointing at a working key).
|
||||
# ANTHROPIC_API_KEY without further wiring. pick_model_slug maps this
|
||||
# branch to claude-sonnet-4-6 so the claude-code provider registry
|
||||
# selects anthropic-api instead of the OAuth-only sonnet alias.
|
||||
SECRETS_JSON=$(python3 -c "
|
||||
import json, os
|
||||
k = os.environ['E2E_ANTHROPIC_API_KEY']
|
||||
@@ -429,6 +440,7 @@ print(json.dumps({
|
||||
fi
|
||||
|
||||
MODEL_SLUG=$(pick_model_slug "$RUNTIME")
|
||||
log " MODEL_SLUG=$MODEL_SLUG"
|
||||
|
||||
log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..."
|
||||
PARENT_RESP=$(tenant_call POST /workspaces \
|
||||
@@ -456,7 +468,7 @@ fi
|
||||
# deadline fires at 5 min and sets status=failed prematurely; heartbeat
|
||||
# then transitions failed → online after install.sh finishes. So:
|
||||
#
|
||||
# - 20 min deadline (hermes worst-case + slack)
|
||||
# - 30 min deadline (hermes worst-case + slack)
|
||||
# - 'failed' is a TRANSIENT state we must tolerate — log and keep
|
||||
# polling, only hard-fail at the deadline. Pre-bootstrap-watcher-fix
|
||||
# (controlplane#245) this was a flake generator: workspace went
|
||||
@@ -467,21 +479,37 @@ WS_TO_CHECK="$PARENT_ID"
|
||||
[ -n "$CHILD_ID" ] && WS_TO_CHECK="$WS_TO_CHECK $CHILD_ID"
|
||||
for wid in $WS_TO_CHECK; do
|
||||
WS_LAST_STATUS=""
|
||||
WS_LAST_URL=""
|
||||
WS_URL_MISSING_LOGGED=0
|
||||
WS_FAILED_LOGGED=0
|
||||
while true; do
|
||||
if [ "$(date +%s)" -gt "$WS_DEADLINE" ]; then
|
||||
WS_LAST_ERR=$(tenant_call GET "/workspaces/$wid" 2>/dev/null | \
|
||||
python3 -c "import json,sys; print(json.load(sys.stdin).get('last_sample_error',''))" 2>/dev/null || echo "")
|
||||
fail "Workspace $wid never reached online within 20 min (last status=$WS_LAST_STATUS, err=$WS_LAST_ERR)"
|
||||
fail "Workspace $wid never reached online with a routable URL within 30 min (last status=$WS_LAST_STATUS, url=$WS_LAST_URL, err=$WS_LAST_ERR)"
|
||||
fi
|
||||
WS_JSON=$(tenant_call GET "/workspaces/$wid" 2>/dev/null || echo '{}')
|
||||
WS_STATUS=$(echo "$WS_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null)
|
||||
WS_STATUS=$(echo "$WS_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status') or '')" 2>/dev/null)
|
||||
WS_URL=$(echo "$WS_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('url') or '')" 2>/dev/null)
|
||||
if [ "$WS_STATUS" != "$WS_LAST_STATUS" ]; then
|
||||
log " $wid → $WS_STATUS"
|
||||
WS_LAST_STATUS="$WS_STATUS"
|
||||
fi
|
||||
if [ -n "$WS_URL" ] && [ "$WS_URL" != "$WS_LAST_URL" ]; then
|
||||
log " $wid url ready: $WS_URL"
|
||||
WS_LAST_URL="$WS_URL"
|
||||
fi
|
||||
case "$WS_STATUS" in
|
||||
online) break ;;
|
||||
online)
|
||||
if [ -n "$WS_URL" ]; then
|
||||
break
|
||||
fi
|
||||
if [ "$WS_URL_MISSING_LOGGED" = "0" ]; then
|
||||
log " $wid online but URL is not assigned yet — waiting for workspace routing readiness"
|
||||
WS_URL_MISSING_LOGGED=1
|
||||
fi
|
||||
sleep 10
|
||||
;;
|
||||
failed)
|
||||
# Not a hard fail — bootstrap-watcher frequently marks failed at
|
||||
# 5 min on hermes, then heartbeat recovers to online around 10-13
|
||||
@@ -496,7 +524,7 @@ for wid in $WS_TO_CHECK; do
|
||||
*) sleep 10 ;;
|
||||
esac
|
||||
done
|
||||
ok " $wid online"
|
||||
ok " $wid online and routable"
|
||||
done
|
||||
|
||||
# ─── 7b. Canvas-terminal diagnose (EIC chain probe) ────────────────────
|
||||
@@ -631,10 +659,40 @@ print(json.dumps({
|
||||
# 90s gives ~3x headroom over observed cold-call P95 (~25-30s).
|
||||
# Subsequent A2A turns hit the same workspace and are sub-second, so
|
||||
# this only widens the window for step 8/11 of the canary's first turn.
|
||||
A2A_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
|
||||
--max-time 90 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$A2A_PAYLOAD")
|
||||
A2A_TMP=$(mktemp -t synth_a2a.XXXXXX)
|
||||
for A2A_ATTEMPT in $(seq 1 12); do
|
||||
: >"$A2A_TMP"
|
||||
set +e
|
||||
A2A_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
|
||||
--max-time 90 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$A2A_PAYLOAD" \
|
||||
-o "$A2A_TMP" \
|
||||
-w '%{http_code}' \
|
||||
2>/dev/null)
|
||||
A2A_RC=$?
|
||||
set -e
|
||||
A2A_CODE=${A2A_CODE:-000}
|
||||
A2A_RESP=$(cat "$A2A_TMP" 2>/dev/null || echo "")
|
||||
if [ "$A2A_RC" = "0" ] && [ "$A2A_CODE" -ge 200 ] && [ "$A2A_CODE" -lt 300 ]; then
|
||||
break
|
||||
fi
|
||||
|
||||
A2A_SAFE_BODY=$(printf '%s' "$A2A_RESP" | sanitize_http_body)
|
||||
if [ "$A2A_CODE" = "503" ] && echo "$A2A_SAFE_BODY" | grep -Eqi 'Service Unavailable|workspace agent unreachable|connection refused|no healthy upstream'; then
|
||||
log " A2A cold-start probe attempt $A2A_ATTEMPT/12 returned 503: $A2A_SAFE_BODY"
|
||||
if [ "$A2A_ATTEMPT" -lt 12 ]; then
|
||||
sleep 10
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
break
|
||||
done
|
||||
rm -f "$A2A_TMP"
|
||||
if [ "$A2A_RC" != "0" ] || [ "$A2A_CODE" -lt 200 ] || [ "$A2A_CODE" -ge 300 ]; then
|
||||
A2A_SAFE_BODY=$(printf '%s' "$A2A_RESP" | sanitize_http_body)
|
||||
fail "A2A POST /workspaces/$PARENT_ID/a2a failed after $A2A_ATTEMPT attempt(s) (curl_rc=$A2A_RC, http=$A2A_CODE): $A2A_SAFE_BODY"
|
||||
fi
|
||||
AGENT_TEXT=$(echo "$A2A_RESP" | python3 -c "
|
||||
import json, sys
|
||||
d = json.load(sys.stdin)
|
||||
|
||||
Reference in New Issue
Block a user