From 5aaac7d2d950f79db05e6c55155264516509bc3c Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sat, 2 May 2026 04:33:30 -0700 Subject: [PATCH] test(e2e): unified A2A round-trip parity harness across all 4 runtimes Adds two scripts: scripts/test-all-runtimes-a2a-e2e.sh Provisions one workspace per runtime (claude-code, hermes, codex, openclaw), sets provider keys, waits online, sends two A2A messages per workspace. First message validates round-trip; second message validates session continuity. Cleans up via trap on EXIT. scripts/test-hermes-plugin-e2e.sh Hermes-only variant focused on the plugin /a2a/inbound path. Proof-point: session continuity between turns (the plugin path's deliverable; old chat-completions path lost context per turn). Both honor SKIP_ env vars for incremental testing and tolerate the SaaS edge WAF Origin header requirement (per reference_saas_waf_origin_header.md). Run: PLATFORM=https://demo-tenant.staging.moleculesai.app \\ ./scripts/test-all-runtimes-a2a-e2e.sh Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/test-all-runtimes-a2a-e2e.sh | 228 +++++++++++++++++++++++++++ scripts/test-hermes-plugin-e2e.sh | 218 +++++++++++++++++++++++++ 2 files changed, 446 insertions(+) create mode 100755 scripts/test-all-runtimes-a2a-e2e.sh create mode 100755 scripts/test-hermes-plugin-e2e.sh diff --git a/scripts/test-all-runtimes-a2a-e2e.sh b/scripts/test-all-runtimes-a2a-e2e.sh new file mode 100755 index 00000000..20f6f2a5 --- /dev/null +++ b/scripts/test-all-runtimes-a2a-e2e.sh @@ -0,0 +1,228 @@ +#!/usr/bin/env bash +# E2E test: A2A round-trip parity across all four runtimes. +# +# Validates that for each of {claude-code, hermes, codex, openclaw}: +# 1. A workspace can be provisioned + brought online +# 2. The adapter responds to A2A message/send +# 3. The reply contains expected content (echo of the prompt) +# 4. A SECOND message preserves session state where the runtime +# supports it (currently: hermes via plugin path) +# +# Targets a SaaS tenant subdomain. Provisions workspaces in the calling +# tenant, runs the round-trip, deletes them on success. +# +# Pre-reqs: +# - PLATFORM env or first arg pointing at a tenant subdomain +# (e.g. https://demo-tenant.staging.moleculesai.app) +# - $OPENROUTER_API_KEY (or $HERMES_API_KEY) for non-claude runtimes +# - $OPENAI_API_KEY for claude-code peer +# - SaaS edge requires Origin header — see auto-memory +# reference_saas_waf_origin_header.md +# +# Run: +# PLATFORM=https://my-tenant.staging.moleculesai.app \ +# ./scripts/test-all-runtimes-a2a-e2e.sh +# +# Skip individual runtimes: +# SKIP_HERMES=1 SKIP_OPENCLAW=1 ./scripts/test-all-runtimes-a2a-e2e.sh +set -euo pipefail + +PLATFORM="${PLATFORM:-${1:-http://localhost:8080}}" +HERMES_PROVIDER_KEY="${OPENROUTER_API_KEY:-${HERMES_API_KEY:-}}" +PEER_OPENAI_KEY="${OPENAI_API_KEY:-}" +ORIGIN_HEADER="" +case "$PLATFORM" in + https://*.moleculesai.app|https://*.moleculesai.app/*) + ORIGIN_HEADER="-H Origin:$PLATFORM" + ;; +esac + +if [ -z "$HERMES_PROVIDER_KEY" ] && [ -z "${SKIP_HERMES:-}${SKIP_CODEX:-}${SKIP_OPENCLAW:-}" ]; then + echo "FAIL: set OPENROUTER_API_KEY or HERMES_API_KEY for non-claude runtimes" + exit 2 +fi + +PASS=0 +FAIL=0 +declare -A WS_IDS + +check() { + local label="$1" expected="$2" actual="$3" + if echo "$actual" | grep -qiE "$expected"; then + echo "PASS: $label" + PASS=$((PASS + 1)) + else + echo "FAIL: $label" + echo " expected to contain: $expected" + echo " got: $actual" + FAIL=$((FAIL + 1)) + fi +} + +curl_p() { + /usr/bin/curl -s $ORIGIN_HEADER "$@" +} + +wait_online() { + local id="$1" name="$2" max="${3:-60}" + for i in $(seq 1 "$max"); do + local s + s=$(curl_p "$PLATFORM/workspaces/$id" \ + | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))" 2>/dev/null) + [ "$s" = "online" ] && return 0 + [ "$s" = "failed" ] && echo " $name FAILED" && return 1 + [ $((i % 6)) -eq 0 ] && echo " [$name] ${i}/${max}... ($s)" + sleep 5 + done + echo " $name did not come online within $((max*5))s" + return 1 +} + +a2a_send() { + local id="$1" message="$2" + local resp text + resp=$(curl_p -X POST "$PLATFORM/workspaces/$id/a2a" \ + -H 'Content-Type: application/json' \ + -d "$(python3 -c "import json,sys; print(json.dumps({ + 'method': 'message/send', + 'params': {'message': {'role': 'user', 'parts': [{'kind': 'text', 'text': sys.argv[1]}]}} + }))" "$message")") + text=$(echo "$resp" | python3 -c " +import sys, json +try: + r = json.load(sys.stdin) + print(r.get('result', {}).get('parts', [{}])[0].get('text', '')) +except Exception: + print('') +" 2>/dev/null) + echo "$text" +} + +provision() { + local name="$1" template="$2" role="$3" + local r id + r=$(curl_p -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \ + -d "{\"name\":\"$name\",\"role\":\"$role\",\"tier\":2,\"template\":\"$template\"}") + id=$(echo "$r" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))") + if [ -z "$id" ]; then + echo "FAIL: provision $name returned no id: $r" >&2 + return 1 + fi + echo "$id" +} + +set_secret() { + local id="$1" key="$2" value="$3" + curl_p -X POST "$PLATFORM/workspaces/$id/secrets" \ + -H 'Content-Type: application/json' \ + -d "{\"key\":\"$key\",\"value\":\"$value\"}" > /dev/null +} + +cleanup() { + echo "" + echo "--- Cleanup ---" + for runtime in "${!WS_IDS[@]}"; do + id="${WS_IDS[$runtime]}" + [ -n "$id" ] && curl_p -X DELETE "$PLATFORM/workspaces/$id" >/dev/null && \ + echo " Deleted $runtime ($id)" || echo " Cleanup skipped for $runtime" + done +} +trap cleanup EXIT + +echo "==========================================" +echo " All-runtimes A2A parity E2E" +echo " Platform: $PLATFORM" +echo "==========================================" +echo "" + +# ------------------------------------------------------- +# 1. Provision the four runtimes (skip via SKIP_* flags) +# ------------------------------------------------------- +echo "--- 1. Provision workspaces ---" +if [ -z "${SKIP_CLAUDE_CODE:-}" ]; then + WS_IDS[claude-code]=$(provision "ParityClaude" "claude-code-default" "claude-code peer") + echo " claude-code: ${WS_IDS[claude-code]}" +fi +if [ -z "${SKIP_HERMES:-}" ]; then + WS_IDS[hermes]=$(provision "ParityHermes" "hermes" "hermes peer") + echo " hermes: ${WS_IDS[hermes]}" +fi +if [ -z "${SKIP_CODEX:-}" ]; then + WS_IDS[codex]=$(provision "ParityCodex" "codex" "codex peer") + echo " codex: ${WS_IDS[codex]}" +fi +if [ -z "${SKIP_OPENCLAW:-}" ]; then + WS_IDS[openclaw]=$(provision "ParityOpenClaw" "openclaw" "openclaw peer") + echo " openclaw: ${WS_IDS[openclaw]}" +fi + +# ------------------------------------------------------- +# 2. Set provider keys +# ------------------------------------------------------- +echo "" +echo "--- 2. Set provider keys ---" +for runtime in hermes codex openclaw; do + id="${WS_IDS[$runtime]:-}" + [ -n "$id" ] && set_secret "$id" "OPENROUTER_API_KEY" "$HERMES_PROVIDER_KEY" && \ + echo " $runtime: OPENROUTER_API_KEY set" +done +if [ -n "${WS_IDS[claude-code]:-}" ] && [ -n "$PEER_OPENAI_KEY" ]; then + set_secret "${WS_IDS[claude-code]}" "OPENAI_API_KEY" "$PEER_OPENAI_KEY" + echo " claude-code: OPENAI_API_KEY set" +fi + +# ------------------------------------------------------- +# 3. Wait for online +# ------------------------------------------------------- +echo "" +echo "--- 3. Wait online (hermes cold-boot ~3-7 min) ---" +for runtime in "${!WS_IDS[@]}"; do + id="${WS_IDS[$runtime]}" + [ -z "$id" ] && continue + max=60 + [ "$runtime" = "hermes" ] && max=120 + if wait_online "$id" "$runtime" "$max"; then + check "$runtime online" "ok" "ok" + else + check "$runtime online" "online" "timeout" + fi +done + +# ------------------------------------------------------- +# 4. A2A round-trip — first message +# ------------------------------------------------------- +echo "" +echo "--- 4. A2A round-trip (first message) ---" +for runtime in claude-code hermes codex openclaw; do + id="${WS_IDS[$runtime]:-}" + [ -z "$id" ] && continue + reply=$(a2a_send "$id" "Reply with just the word OK so we know you got this.") + echo " [$runtime] reply: ${reply:0:80}" + check "$runtime A2A reply" "ok|got|received|reply|response" "$reply" +done + +# ------------------------------------------------------- +# 5. Session continuity — second message recalls first +# ------------------------------------------------------- +echo "" +echo "--- 5. Session continuity (second message recalls first) ---" +for runtime in claude-code hermes codex openclaw; do + id="${WS_IDS[$runtime]:-}" + [ -z "$id" ] && continue + # Set up: tell the agent a name. + a2a_send "$id" "My name is Carol. Reply with just the word OK." > /dev/null + # Recall: ask for the name back. Hermes plugin path keeps session + # state across turns; chat-completions path forgets between turns. + reply=$(a2a_send "$id" "What name did I introduce myself with one message ago? One word answer.") + echo " [$runtime] recall reply: ${reply:0:80}" + check "$runtime session continuity" "carol" "$reply" +done + +# ------------------------------------------------------- +# Results +# ------------------------------------------------------- +echo "" +echo "==========================================" +echo " Pass: $PASS Fail: $FAIL" +echo "==========================================" +[ "$FAIL" -eq 0 ] diff --git a/scripts/test-hermes-plugin-e2e.sh b/scripts/test-hermes-plugin-e2e.sh new file mode 100755 index 00000000..bc1b8215 --- /dev/null +++ b/scripts/test-hermes-plugin-e2e.sh @@ -0,0 +1,218 @@ +#!/usr/bin/env bash +# E2E test: hermes runtime native MCP push parity via molecule-a2a plugin. +# +# Validates the full chain shipped in: +# - NousResearch/hermes-agent#18775 (upstream patch) +# - Molecule-AI/hermes-platform-molecule-a2a (plugin) +# - Molecule-AI/molecule-ai-workspace-template-hermes#32 (workspace +# template — Dockerfile bakes plugin in, executor uses /a2a/inbound) +# +# Test flow: +# 1. Provision two workspaces — peer (claude-code) + hermes +# 2. Set provider keys on hermes (the plugin path needs an LLM) +# 3. Wait both online +# 4. Verify hermes loaded the plugin (HTTP probe of /a2a/health +# from inside the workspace) +# 5. Send A2A message peer → hermes +# 6. Verify hermes processes via plugin path (no fresh subprocess +# per message; same hermes daemon handles the turn through full +# pipeline) +# 7. Send a SECOND A2A message and verify hermes maintains session +# continuity (the proof-point — old chat-completions path would +# have lost context between turns) +# 8. Cleanup +# +# Pre-reqs: +# - PLATFORM env or first arg pointing at a molecule platform that +# has the hermes runtime image republished AFTER PR #32 merge +# - $OPENROUTER_API_KEY (or $HERMES_API_KEY for direct Nous routing) +# - $OPENAI_API_KEY (for the claude-code peer) +# +# Run: +# PLATFORM=https://demo-tenant.staging.moleculesai.app \ +# ./scripts/test-hermes-plugin-e2e.sh + +set -euo pipefail + +PLATFORM="${PLATFORM:-${1:-http://localhost:8080}}" +HERMES_PROVIDER_KEY="${OPENROUTER_API_KEY:-${HERMES_API_KEY:-}}" +PEER_OPENAI_KEY="${OPENAI_API_KEY:-}" + +if [ -z "$HERMES_PROVIDER_KEY" ]; then + echo "FAIL: set OPENROUTER_API_KEY or HERMES_API_KEY for the hermes workspace" + exit 2 +fi +if [ -z "$PEER_OPENAI_KEY" ]; then + echo "FAIL: set OPENAI_API_KEY for the claude-code peer workspace" + exit 2 +fi + +PASS=0 +FAIL=0 + +check() { + local label="$1" expected="$2" actual="$3" + if echo "$actual" | grep -qiE "$expected"; then + echo "PASS: $label" + PASS=$((PASS + 1)) + else + echo "FAIL: $label" + echo " expected to contain: $expected" + echo " got: $actual" + FAIL=$((FAIL + 1)) + fi +} + +wait_online() { + local id="$1" name="$2" max="${3:-60}" + for i in $(seq 1 "$max"); do + local s + s=$(curl -s "$PLATFORM/workspaces/$id" \ + | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))" 2>/dev/null) + [ "$s" = "online" ] && return 0 + [ "$s" = "failed" ] && echo " $name FAILED" && return 1 + [ $((i % 6)) -eq 0 ] && echo " [$name] ${i}/${max}... ($s)" + sleep 5 + done + echo " $name did not come online within $((max*5))s" + return 1 +} + +a2a_send() { + local id="$1" message="$2" max_retries="${3:-3}" + for attempt in $(seq 1 "$max_retries"); do + local resp text + resp=$(curl -s -X POST "$PLATFORM/workspaces/$id/a2a" \ + -H 'Content-Type: application/json' \ + -d "$(python3 -c "import json,sys; print(json.dumps({ + 'method': 'message/send', + 'params': {'message': {'role': 'user', 'parts': [{'kind': 'text', 'text': sys.argv[1]}]}} + }))" "$message")") + text=$(echo "$resp" | python3 -c " +import sys, json +try: + r = json.load(sys.stdin) + print(r.get('result', {}).get('parts', [{}])[0].get('text', '')) +except Exception: + print('') +" 2>/dev/null) + if echo "$text" | grep -qiE "rate|throttl|429|credits"; then + [ "$attempt" -lt "$max_retries" ] && { sleep 60; continue; } + fi + echo "$text" + return 0 + done + echo "ERROR: all retries exhausted" + return 1 +} + +# In-container probe via the platform's exec-in-workspace helper. If the +# platform doesn't expose one, this becomes a curl-from-host probe of +# the workspace's exposed port (skipped silently if no path exists). +probe_plugin_health() { + local id="$1" + curl -fsS "$PLATFORM/workspaces/$id/exec" \ + -H 'Content-Type: application/json' \ + -d '{"cmd": ["curl", "-fsS", "http://127.0.0.1:8645/a2a/health"]}' \ + 2>/dev/null \ + || echo "exec-helper not available — skipping in-container probe" +} + +echo "==========================================" +echo " Hermes plugin path E2E" +echo " Platform: $PLATFORM" +echo "==========================================" +echo "" + +# ------------------------------------------------------- +# 1. Provision peer (claude-code) + hermes +# ------------------------------------------------------- +echo "--- 1. Provision peer (claude-code) ---" +R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \ + -d '{"name":"PeerAlice","role":"Claude Code peer","tier":2,"template":"claude-code-default"}') +PEER_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") +check "Provision peer" "provisioning|online" "$R" +echo " Peer: $PEER_ID" + +echo "" +echo "--- 2. Provision hermes (plugin path) ---" +R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \ + -d '{"name":"HermesPluginBob","role":"Hermes peer (plugin path)","tier":2,"template":"hermes"}') +HERMES_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") +check "Provision hermes" "provisioning|online" "$R" +echo " Hermes: $HERMES_ID" + +# ------------------------------------------------------- +# 3. Set provider keys +# ------------------------------------------------------- +echo "" +echo "--- 3. Set provider keys ---" +R=$(curl -s -X POST "$PLATFORM/workspaces/$HERMES_ID/secrets" \ + -H 'Content-Type: application/json' \ + -d "{\"key\":\"OPENROUTER_API_KEY\",\"value\":\"$HERMES_PROVIDER_KEY\"}") +check "Set hermes OPENROUTER_API_KEY" "saved" "$R" + +R=$(curl -s -X POST "$PLATFORM/workspaces/$PEER_ID/secrets" \ + -H 'Content-Type: application/json' \ + -d "{\"key\":\"OPENAI_API_KEY\",\"value\":\"$PEER_OPENAI_KEY\"}") +check "Set peer OPENAI_API_KEY" "saved" "$R" + +# ------------------------------------------------------- +# 4. Wait online +# ------------------------------------------------------- +echo "" +echo "--- 4. Wait online (hermes cold-boot ~3-6 min for fork install + plugin) ---" +wait_online "$PEER_ID" "Peer" 30 && check "Peer online" "ok" "ok" || check "Peer online" "online" "timeout" +wait_online "$HERMES_ID" "Hermes" 120 && check "Hermes online" "ok" "ok" || check "Hermes online" "online" "timeout" + +# ------------------------------------------------------- +# 5. Verify plugin loaded inside the hermes container +# ------------------------------------------------------- +echo "" +echo "--- 5. Verify plugin loaded ---" +HEALTH=$(probe_plugin_health "$HERMES_ID") +echo " Plugin /a2a/health probe: $HEALTH" +if echo "$HEALTH" | grep -q "molecule-a2a"; then + check "Plugin /a2a/health responds 200" "molecule-a2a" "$HEALTH" +else + echo " (in-container probe not available on this platform — relying on A2A round-trip below)" +fi + +# ------------------------------------------------------- +# 6. First A2A message — establish session +# ------------------------------------------------------- +echo "" +echo "--- 6. First A2A message (peer → hermes) ---" +echo " Telling hermes: 'My name is Carol. Reply with just OK.'" +RESP1=$(a2a_send "$HERMES_ID" "My name is Carol. Reply with just the word OK.") +echo " Hermes says: $RESP1" +check "First message gets a reply" "ok|received|got|name" "$RESP1" + +# ------------------------------------------------------- +# 7. Second A2A message — verify session continuity +# ------------------------------------------------------- +echo "" +echo "--- 7. Second A2A message (proves session continuity) ---" +echo " Asking hermes to recall the name from msg #1..." +RESP2=$(a2a_send "$HERMES_ID" "What name did I introduce myself with one message ago? One word answer.") +echo " Hermes says: $RESP2" +# Plugin path: hermes daemon kept the conversation in its session store +# across turns; the answer should mention "Carol". +# Old chat-completions path: each turn was independent; reply would NOT +# know the prior name (would say "you didn't introduce yourself" or +# similar). +check "Session continuity proves plugin path" "carol" "$RESP2" + +# ------------------------------------------------------- +# 8. Cleanup +# ------------------------------------------------------- +echo "" +echo "--- 8. Cleanup ---" +curl -s -X DELETE "$PLATFORM/workspaces/$PEER_ID" >/dev/null && echo " Deleted peer" +curl -s -X DELETE "$PLATFORM/workspaces/$HERMES_ID" >/dev/null && echo " Deleted hermes" + +echo "" +echo "==========================================" +echo " Pass: $PASS Fail: $FAIL" +echo "==========================================" +[ "$FAIL" -eq 0 ]