From 2e31f27304f1b006f55250208a6fabff8c52aaf4 Mon Sep 17 00:00:00 2001 From: core-devops Date: Fri, 5 Jun 2026 01:34:20 -0700 Subject: [PATCH] test(e2e): staging coverage for every runtime + resume/hibernate lifecycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the "e2e covers every runtime, no regressions" gap (coverage audit). Adds the missing provision→online→A2A arms so the staging suite exercises every supported runtime, plus the resume/hibernate lifecycle transitions. staging-saas (test_staging_full_saas.sh): - seo-agent arm (E2E_RUNTIME=seo-agent): provisioned via template="seo-agent" (NOT runtime — seo-agent is a claude-code-adapter template VARIANT absent from manifest.json/runtime_registry knownRuntimes; its config.yaml resolves runtime=claude-code). Reuses the same MiniMax/claude-code key path. Full provision→online→A2A→activity matrix, identical to the other runtime arms. - google-adk AI-Studio arm (E2E_RUNTIME=google-adk, E2E_GOOGLE_API_KEY): BYOK GOOGLE_API_KEY/GEMINI_API_KEY → bare gemini-2.5-pro (providers.yaml runtimes.google-adk `google` arm). Exercises google-adk being provisioned at all; the keyless-Vertex PROD path (E2E_LLM_PATH=platform + platform: model) needs WIF — FLAGGED for the CTO (see below). - Lifecycle step 10b: pause→paused→resume→provisioning→online and hibernate→hibernated→(auto-wake A2A)→online, each asserted against the live DB-backed status (workspace_restart.go Pause/Resume/Hibernate). Gated to full MODE + E2E_LIFECYCLE!=off. Job timeout 45→75 for the 2 reprovisions. - Create payload built in Python so template/runtime are emitted conditionally; create errors now fail loud (named) instead of a KeyError. staging-external (test_staging_external_runtime.sh): - kimi + kimi-cli BYO meta-runtime arms (step 7c): create(external:true, runtime=) → awaiting_agent + runtime-label-PRESERVED (not coerced to generic external, workspace.go normalizeExternalRuntime) → register(poll) → online → A2A → assert the poll-mode {status:"queued",delivery_mode:"poll"} envelope (a2a_proxy.go). Proves the a2a proxy routes a BYO meta-runtime to the poll queue rather than 404/500. Idioms preserved: skip-if-absent stays LOUD; REQUIRE_LIVE fail-closed intact; every new arm REDs on a real provision/A2A/transition break, never silently skips. model_slug dispatch pins added for seo-agent + google-adk (test passes 21/21). bash -n + shellcheck clean on all changed scripts. NOT changed (flagged for CTO, needs extra provisioning): - google-adk is in providers.yaml + provisioner/registry.go + registry_gen but MISSING from manifest.json workspace_templates → the Create-handler runtime allowlist (manifest-derived) rejects runtime="google-adk" with RUNTIME_UNSUPPORTED. Adding it (+ template-cache of molecule-ai-workspace-template-google-adk) is the provisioning change that makes the google-adk arm actually green. The arm is wired and REDs clearly until then. - Vertex WIF path for google-adk (server-side mint, no on-box cred) and a standing kimi BYO compute cell (for a REAL kimi completion vs the queued envelope) both need standing infra not present in staging. These staging arms remain continue-on-error (non-gating). Promoting e2e-staging-saas.yml + e2e-staging-external.yml to REQUIRED (after a de-flake window of consecutive green main runs) is the CTO gate-flip that makes runtime provisioning regression-blocking. Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitea/workflows/e2e-staging-saas.yml | 12 +- tests/e2e/lib/model_slug.sh | 26 ++- tests/e2e/test_model_slug.sh | 23 ++ tests/e2e/test_staging_external_runtime.sh | 123 +++++++++- tests/e2e/test_staging_full_saas.sh | 253 ++++++++++++++++++++- 5 files changed, 429 insertions(+), 8 deletions(-) diff --git a/.gitea/workflows/e2e-staging-saas.yml b/.gitea/workflows/e2e-staging-saas.yml index c4e432837..f373d1286 100644 --- a/.gitea/workflows/e2e-staging-saas.yml +++ b/.gitea/workflows/e2e-staging-saas.yml @@ -124,7 +124,12 @@ jobs: # Phase 3 (RFC #219 §1): surface broken workflows without blocking. # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently. continue-on-error: true - timeout-minutes: 45 + # Raised 45→75: step 10b now exercises pause→resume→online + + # hibernate→wake→online, each of which RE-PROVISIONS the parent (CP + # re-provision + heartbeat recovery, not a fresh EC2 cold start, but still + # minutes). The base provision→online→A2A matrix fits in ~35 min; the two + # extra lifecycle reprovisions need headroom under WORKSPACE_ONLINE_TIMEOUT. + timeout-minutes: 75 permissions: contents: read @@ -184,6 +189,11 @@ jobs: E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'minimax:MiniMax-M2.7' }} E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}" E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }} + # Lifecycle transitions (step 10b): pause→resume→online + + # hibernate→wake→online on the provisioned parent. `auto` runs them in + # full mode (this job). Set `off` to skip the ~2x-reprovision cost on an + # ad-hoc dispatch. The timeout-minutes above is sized for this being on. + E2E_LIFECYCLE: auto # Fail-closed-on-skip: in CI the harness MUST prove ≥1 full # provision→online→A2A cycle. If it reaches the end having validated # nothing (a future short-circuit / skip path), it exits 5 rather than diff --git a/tests/e2e/lib/model_slug.sh b/tests/e2e/lib/model_slug.sh index efb5fd71f..aca6148bc 100755 --- a/tests/e2e/lib/model_slug.sh +++ b/tests/e2e/lib/model_slug.sh @@ -83,7 +83,17 @@ pick_model_slug() { fi case "$runtime" in hermes) printf 'openai/gpt-4o' ;; - claude-code) + # seo-agent is a claude-code-adapter template VARIANT selected by + # template name (template="seo-agent"), not a distinct registry runtime + # (it is absent from manifest.json + runtime_registry.go). Its config.yaml + # declares `runtime: claude-code` and copies the claude-code `providers:` + # block (providers.yaml:21 "The same block is copy-pasted into the seo-agent + # template"), so its model dispatch is IDENTICAL to claude-code's: the + # MiniMax BYOK colon id (the staging-default key path), else direct + # Anthropic, else the OAuth `sonnet` alias. Sharing the claude-code branch + # keeps the SSOT one place — a seo-agent run is just a claude-code run + # behind a productized template skin. + claude-code|seo-agent) if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then # Namespaced (colon) BYOK id, not bare "MiniMax-M2" (#2263 deploy-skew): # bare ids can lag the deployed staging ws-server's compiled registry, @@ -102,6 +112,20 @@ pick_model_slug() { printf 'sonnet' fi ;; + # google-adk: Gemini via two distinct provider arms in providers.yaml + # runtimes.google-adk: + # * platform arm → `platform:gemini-2.5-pro` (keyless Vertex via the CP + # LLM proxy + server-side WIF mint; the org-compliant PROD path). This + # id is selected via E2E_LLM_PATH=platform above, NOT here. + # * google arm (AI Studio BYOK) → bare `gemini-2.5-pro` with the tenant's + # own GOOGLE_API_KEY. This is the staging-exercisable path (no WIF + # provisioning needed) and is what this branch selects. + # The workflow may further override with E2E_MODEL_SLUG=google_genai:gemini-2.5-pro + # (the adapter's provider:model spelling) — E2E_MODEL_SLUG wins at the top + # of this function, so both forms are supported. + google-adk) + printf 'gemini-2.5-pro' + ;; *) printf 'openai/gpt-4o' ;; # safest fallback (matches hermes) esac } diff --git a/tests/e2e/test_model_slug.sh b/tests/e2e/test_model_slug.sh index 32b805fb0..02f81e05d 100755 --- a/tests/e2e/test_model_slug.sh +++ b/tests/e2e/test_model_slug.sh @@ -57,6 +57,29 @@ assert_eq "claude-code + Anthropic API key → Anthropic API model" "$got" "clau got=$(unset E2E_MODEL_SLUG; E2E_MINIMAX_API_KEY="mx-priority" E2E_ANTHROPIC_API_KEY="sk-ant-loser" pick_model_slug claude-code) assert_eq "claude-code + both keys → MiniMax priority" "$got" "minimax:MiniMax-M2.7" +# ── seo-agent (claude-code-adapter template variant) ── +# seo-agent shares the claude-code dispatch branch (it reuses the claude-code +# adapter + the same copied providers block). Pin that it resolves IDENTICALLY +# to claude-code for every key path so a future refactor can't accidentally +# fork seo-agent's model selection from claude-code's. +run_test "seo-agent → claude-code default alias" seo-agent "sonnet" + +got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug seo-agent) +assert_eq "seo-agent + MiniMax key → MiniMax model (==claude-code)" "$got" "minimax:MiniMax-M2.7" + +got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug seo-agent) +assert_eq "seo-agent + Anthropic key → Anthropic model (==claude-code)" "$got" "claude-sonnet-4-6" + +# ── google-adk (Gemini) ── +# AI-Studio BYOK arm → bare gemini-2.5-pro (providers.yaml runtimes.google-adk +# `google` arm). The platform/Vertex arm is selected via E2E_LLM_PATH=platform +# (a platform: id), not this dispatch. Pin the bare form so a drift to the +# platform id (which would change billing/route) is caught. +run_test "google-adk → AI-Studio bare gemini id" google-adk "gemini-2.5-pro" + +got=$(E2E_MODEL_SLUG="google_genai:gemini-2.5-pro" pick_model_slug google-adk) +assert_eq "google-adk + E2E_MODEL_SLUG override (adapter spelling)" "$got" "google_genai:gemini-2.5-pro" + # ── Fallback for unknown runtime ── # Picks slash-form (hermes-shaped) since hermes is the historical # default and most third-party runtimes behave hermes-like. Pinning diff --git a/tests/e2e/test_staging_external_runtime.sh b/tests/e2e/test_staging_external_runtime.sh index b4c8d4d70..9e73228c8 100755 --- a/tests/e2e/test_staging_external_runtime.sh +++ b/tests/e2e/test_staging_external_runtime.sh @@ -26,7 +26,26 @@ # the workspace stuck on 'online' indefinitely.) # # Hibernation is intentionally NOT covered here — it has its own timing -# model (idle threshold) and warrants a separate harness. +# model (idle threshold) and warrants a separate harness. (The +# pause→resume + hibernate→wake transitions for PLATFORM-compute runtimes +# are covered by test_staging_full_saas.sh step 10b.) +# +# BYO meta-runtime arms (kimi, kimi-cli) — added 2026-06-05: +# kimi and kimi-cli are BYO-compute meta-runtimes (isExternalLikeRuntime: +# runtime_registry.go:141-147) that go through the SAME external/poll +# provisioning path as `external` — create with external:true → +# awaiting_agent, register → online — but with their runtime LABEL +# PRESERVED (workspace.go:752-770 normalizeExternalRuntime keeps the +# specific label, does NOT coerce to generic "external", so the canvas +# shows the right runtime). They had ONLY validation/unit coverage and +# were NEVER provisioned→online in any e2e. Step 9 adds, for EACH of +# {kimi, kimi-cli}: create → assert awaiting_agent + label-preserved → +# register(poll) → assert online + label-preserved → A2A → assert the +# poll-mode {status:"queued"} envelope (a2a_proxy.go:462-477). The A2A +# arm proves the a2a proxy routes a BYO meta-runtime to the poll queue +# (200 + queued) rather than 404/500 — the meaningful round-trip for a +# workspace with no standing live agent. A real BYO-agent COMPLETION +# needs a standing kimi BYO cell (flagged for the CTO in the PR body). # # Required env (mirrors test_staging_full_saas.sh): # MOLECULE_CP_URL default: https://staging-api.moleculesai.app @@ -456,6 +475,108 @@ RECOVERED_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.lo ok "Re-register succeeded — awaiting_agent → online (operator-recoverable)" require_transition "re-register: awaiting_agent → online (recovery)" +# ─── 7b. BYO meta-runtime arms: kimi + kimi-cli ───────────────────────── +# kimi and kimi-cli are BYO-compute meta-runtimes (isExternalLikeRuntime). +# They share the external/poll provisioning path but PRESERVE their runtime +# label (workspace.go normalizeExternalRuntime). They had no provision→online +# e2e until now. For EACH: create(external:true, runtime=) → assert +# awaiting_agent + label preserved → register(poll) → assert online + label +# preserved → A2A → assert the poll-mode {status:"queued"} envelope. +# +# Why poll-mode {queued} is the A2A assertion (not a real completion): there +# is no standing live BYO agent in staging, so the meaningful round-trip is +# that the a2a proxy ROUTES a BYO meta-runtime to the poll queue (HTTP 200 + +# {status:"queued", delivery_mode:"poll"}, a2a_proxy.go:462-477) instead of +# 404/500. A real BYO-agent COMPLETION needs a standing kimi BYO cell — see +# the CTO flag in the PR body. +byo_meta_runtime_arm() { # $1 = runtime label (kimi | kimi-cli) + local rt="$1" + local resp wid status auth get_resp db_status reg_dm online_status + log " [$rt] create (external:true, runtime=$rt)..." + resp=$(tenant_call POST /workspaces \ + -d "$(printf '{"name":"ext-%s-e2e","runtime":"%s","external":true}' "$rt" "$rt")") + wid=$(echo "$resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))") + status=$(echo "$resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))") + auth=$(echo "$resp" | python3 -c " +import json,sys +try: + d=json.load(sys.stdin); conn=d.get('connection') or {} + print(conn.get('auth_token','') or d.get('auth_token','')) +except Exception: + print('') +") + [ -z "$wid" ] && fail "[$rt] create missing id: $resp" + [ "$status" = "awaiting_agent" ] || fail "[$rt] create status='$status' (expected awaiting_agent — external/poll path)" + [ -z "$auth" ] && fail "[$rt] create returned no workspace auth token — register impossible" + + # Assert the runtime LABEL was preserved (NOT coerced to generic 'external'). + get_resp=$(tenant_call GET "/workspaces/$wid") + db_status=$(echo "$get_resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))") + local db_runtime + db_runtime=$(echo "$get_resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('runtime',''))") + [ "$db_status" = "awaiting_agent" ] || fail "[$rt] DB row status=$db_status (expected awaiting_agent)" + [ "$db_runtime" = "$rt" ] || fail "[$rt] runtime label coerced to '$db_runtime' (expected '$rt' — normalizeExternalRuntime must PRESERVE the BYO meta-runtime label, workspace.go:752-770)" + ok " [$rt] create → awaiting_agent, runtime label preserved ✓" + + # register(poll) → online. Reuse register_with_retry by setting WS_AUTH_TOKEN + # (the helper reads it as a global). REGISTER_RESP is set by the helper. + WS_AUTH_TOKEN="$auth" + local body + body=$(printf '{"id":"%s","url":"https://example.invalid:443","delivery_mode":"poll","agent_card":{"name":"e2e-%s","skills":[{"id":"echo","name":"Echo"}]}}' "$wid" "$rt") + REGISTER_RESP="" + register_with_retry "[$rt] register" "$body" \ + || fail "[$rt] register returned non-200 after bounded retries — body: $(printf '%s' "$REGISTER_RESP" | sanitize_http_body | head -c 300)" + online_status=$(tenant_call GET "/workspaces/$wid" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))") + [ "$online_status" = "online" ] || fail "[$rt] expected online after register, got $online_status" + reg_dm=$(echo "$REGISTER_RESP" | head -n1 | python3 -c "import json,sys; print(json.load(sys.stdin).get('delivery_mode',''))" 2>/dev/null || echo "") + [ "$reg_dm" = "poll" ] || fail "[$rt] register response delivery_mode='$reg_dm' (expected poll)" + ok " [$rt] register → online (delivery_mode=poll) ✓" + + # A2A → assert poll-mode {status:"queued"} envelope. Bounded retry on the + # transient cold-edge 5xx class; a 4xx/non-queued 2xx is a real bug. + local a2a_payload a2a_tmp a2a_code a2a_rc a2a_status attempt + a2a_payload=$(python3 -c " +import json, uuid +print(json.dumps({ + 'jsonrpc':'2.0','method':'message/send','id':'e2e-byo-1', + 'params':{'message':{'role':'user','messageId':f'e2e-{uuid.uuid4().hex[:8]}', + 'parts':[{'kind':'text','text':'BYO meta-runtime poll-route smoke. Respond: OK'}]}} +})) +") + a2a_tmp=$(mktemp -t byo_a2a.XXXXXX) + for attempt in $(seq 1 8); do + : >"$a2a_tmp" + set +e + a2a_code=$(curl -sS --max-time 60 -X POST "$TENANT_URL/workspaces/$wid/a2a" \ + -H "Authorization: Bearer $TENANT_TOKEN" \ + -H "X-Molecule-Org-Id: $ORG_ID" \ + -H "Content-Type: application/json" \ + -d "$a2a_payload" -o "$a2a_tmp" -w '%{http_code}' 2>/dev/null) + a2a_rc=$? + set -e + a2a_code=${a2a_code:-000} + if [ "$a2a_rc" = "0" ] && [ "$a2a_code" = "200" ]; then break; fi + if echo "$a2a_code" | grep -Eq '^(502|503|504)$' && [ "$attempt" -lt 8 ]; then + log " [$rt] A2A transient $a2a_code attempt $attempt/8"; sleep 10; continue + fi + break + done + a2a_status=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('status',''))" "$a2a_tmp" 2>/dev/null || echo "") + local a2a_dm + a2a_dm=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('delivery_mode',''))" "$a2a_tmp" 2>/dev/null || echo "") + rm -f "$a2a_tmp" + [ "$a2a_rc" = "0" ] && [ "$a2a_code" = "200" ] \ + || fail "[$rt] A2A POST failed (rc=$a2a_rc, http=$a2a_code) — a BYO meta-runtime poll-mode A2A must 200 with a queued envelope, not error" + [ "$a2a_status" = "queued" ] && [ "$a2a_dm" = "poll" ] \ + || fail "[$rt] A2A returned status='$a2a_status' delivery_mode='$a2a_dm' (expected queued/poll — a2a proxy must route a BYO meta-runtime to the poll queue, a2a_proxy.go:462-477)" + ok " [$rt] A2A → poll-mode queued envelope ✓ (provision→online→A2A proven for $rt)" +} + +log "7c/8 BYO meta-runtime arms (kimi, kimi-cli) — provision→online→A2A..." +byo_meta_runtime_arm "kimi" +byo_meta_runtime_arm "kimi-cli" +ok "BYO meta-runtime arms passed for kimi + kimi-cli" + # ─── 8. Done — cleanup runs in the EXIT trap ─────────────────────────── # REQUIRE_LIVE belt-and-braces: assert here too (in addition to the EXIT # trap) so the failure surfaces in step order, not only post-teardown. diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index 1247c6c85..670d1308f 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -24,6 +24,19 @@ # # Optional env: # E2E_RUNTIME hermes (default) | claude-code | codex | openclaw +# | seo-agent | google-adk +# - seo-agent: a claude-code-adapter template +# VARIANT (not a distinct registry runtime). +# Selected via the `template` field (config.yaml +# resolves runtime=claude-code); reuses the +# same MiniMax/claude-code key path. See the +# TEMPLATE derivation + SECRETS_JSON block. +# - google-adk: Gemini. The AI-Studio-keyed BYOK +# path (E2E_GOOGLE_API_KEY) is staging- +# exercisable here; the keyless Vertex PROD +# path needs WIF (see header note + the CTO +# flag in the PR body) and is selected via +# E2E_LLM_PATH=platform + a platform: model. # E2E_PROVISION_TIMEOUT_SECS default 900 (15 min cold EC2 budget) # E2E_WORKSPACE_ONLINE_TIMEOUT_SECS default 3600 (60 min — hermes # cold-boot worst-case + slack). Raised from @@ -47,6 +60,18 @@ # tear down cleanly (and exit 4 on leak). # Used by a dedicated sanity workflow # that verifies the safety net. +# E2E_LIFECYCLE auto (default) | off +# When auto + MODE=full, exercises the +# pause→resume→online and hibernate→resume(wake) +# state transitions on the provisioned parent +# (step 10b). These are REAL transitions on the +# live tenant (Pause stops the container + sets +# status=paused; Resume re-provisions → +# provisioning → online; Hibernate stops + +# status=hibernated; the next A2A auto-wakes it). +# Set `off` for a fast smoke that skips the +# ~2x-reprovision cost. In smoke MODE it is +# skipped regardless (no parent stability budget). # E2E_REQUIRE_LIVE 1 → fail-closed-on-skip guard (CI sets this). # When set, the run MUST actually complete # ≥1 full provision→online→A2A cycle. A run @@ -592,6 +617,24 @@ print(json.dumps({ 'ANTHROPIC_API_KEY': k, })) ") +elif [ -n "${E2E_GOOGLE_API_KEY:-}" ]; then + # google-adk AI-Studio BYOK path. The `google` provider entry + # (providers.yaml:401-413) reads GEMINI_API_KEY / GOOGLE_API_KEY and dials + # generativelanguage.googleapis.com — the tenant's OWN key, distinct from the + # keyless-Vertex PROD path (which routes through the CP proxy + server-side + # WIF and carries NO tenant credential). This branch exercises google-adk + # being PROVISIONED AT ALL on staging; the Vertex-specific WIF path is flagged + # for the CTO (needs extra provisioning) and is NOT reachable here. Inject + # under both env names the provider accepts so the adapter resolves regardless + # of which one it reads first. + SECRETS_JSON=$(python3 -c " +import json, os +k = os.environ['E2E_GOOGLE_API_KEY'] +print(json.dumps({ + 'GOOGLE_API_KEY': k, + 'GEMINI_API_KEY': k, +})) +") elif [ -n "${E2E_OPENAI_API_KEY:-}" ]; then SECRETS_JSON=$(python3 -c " import json, os @@ -611,11 +654,79 @@ fi MODEL_SLUG=$(pick_model_slug "$RUNTIME") log " MODEL_SLUG=$MODEL_SLUG" -log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..." +# ─── runtime → provision-selector resolution ──────────────────────────── +# Most runtimes are selected directly by the `runtime` field. seo-agent is +# the exception: it is NOT a registry runtime (absent from manifest.json + +# runtime_registry.go knownRuntimes) — it is a claude-code-adapter template +# VARIANT selected by the `template` field. The ws-server Create handler reads +# the template's config.yaml, which declares `runtime: claude-code`, and +# resolves the concrete runtime from there (workspace.go:290-336). So for +# seo-agent we send template="seo-agent" and OMIT runtime, letting the +# template resolve it — sending an explicit runtime="seo-agent" would +# RUNTIME_UNSUPPORTED-422 at workspace.go:374-384 because it is not in +# knownRuntimes. PROVISION_TEMPLATE is "" for every real registry runtime. +PROVISION_TEMPLATE="" +case "$RUNTIME" in + seo-agent) PROVISION_TEMPLATE="seo-agent" ;; +esac + +# Build the create payload in Python so the optional `template`/`runtime` +# fields are emitted conditionally and the secrets blob is embedded without +# shell-escaping hazards. Args: name, [parent_id|""]. +build_create_payload() { + local name="$1" parent_id="${2:-}" + E2E_WS_NAME="$name" \ + E2E_WS_PARENT_ID="$parent_id" \ + E2E_WS_RUNTIME="$RUNTIME" \ + E2E_WS_TEMPLATE="$PROVISION_TEMPLATE" \ + E2E_WS_MODEL="$MODEL_SLUG" \ + E2E_WS_SECRETS="$SECRETS_JSON" \ + python3 -c " +import json, os +secrets = json.loads(os.environ['E2E_WS_SECRETS'] or '{}') +payload = { + 'name': os.environ['E2E_WS_NAME'], + 'tier': 2, + 'model': os.environ['E2E_WS_MODEL'], + 'secrets': secrets, +} +tmpl = os.environ.get('E2E_WS_TEMPLATE', '') +if tmpl: + # Template-selected variant (seo-agent): the template's config.yaml + # resolves runtime=claude-code server-side. Do NOT also send an explicit + # runtime — seo-agent is not a registry runtime and would 422. + payload['template'] = tmpl +else: + payload['runtime'] = os.environ['E2E_WS_RUNTIME'] +pid = os.environ.get('E2E_WS_PARENT_ID', '') +if pid: + payload['parent_id'] = pid +print(json.dumps(payload)) +" +} + +if [ -n "$PROVISION_TEMPLATE" ]; then + log "5/11 Provisioning parent workspace (runtime=$RUNTIME via template=$PROVISION_TEMPLATE → claude-code adapter)..." +else + log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..." +fi PARENT_RESP=$(tenant_call POST /workspaces \ -H "Content-Type: application/json" \ - -d "{\"name\":\"E2E Parent\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"$MODEL_SLUG\",\"secrets\":$SECRETS_JSON}") -PARENT_ID=$(echo "$PARENT_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])") + -d "$(build_create_payload 'E2E Parent')") +# Surface the workspace-create error CLEARLY instead of dying on a Python +# KeyError when the response has no 'id'. The load-bearing cases this names: +# - google-adk: RUNTIME_UNSUPPORTED 422 if google-adk is absent from the +# deployed manifest.json's workspace_templates (the Create-handler +# allowlist is manifest-derived — runtime_registry.go). google-adk is in +# providers.yaml + provisioner/registry.go + registry_gen but NOT (yet) in +# manifest.json, so it cannot be provisioned by `runtime` until the +# manifest gains it. Flagged for the CTO — this arm REDS until then. +# - seo-agent: an "invalid template" 400 if the seo-agent template isn't +# present in the tenant's configs/cache dir (template-cache refresh gap). +PARENT_ID=$(echo "$PARENT_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "") +if [ -z "$PARENT_ID" ]; then + fail "Parent workspace create returned no 'id' (runtime=$RUNTIME, template=${PROVISION_TEMPLATE:-}). Response: $(printf '%s' "$PARENT_RESP" | sanitize_http_body)" +fi log " PARENT_ID=$PARENT_ID" # ─── 6. Provision child (full mode only) ──────────────────────────────── @@ -624,8 +735,11 @@ if [ "$MODE" = "full" ]; then log "6/11 Provisioning child workspace..." CHILD_RESP=$(tenant_call POST /workspaces \ -H "Content-Type: application/json" \ - -d "{\"name\":\"E2E Child\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"$MODEL_SLUG\",\"parent_id\":\"$PARENT_ID\",\"secrets\":$SECRETS_JSON}") - CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])") + -d "$(build_create_payload 'E2E Child' "$PARENT_ID")") + CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "") + if [ -z "$CHILD_ID" ]; then + fail "Child workspace create returned no 'id' (runtime=$RUNTIME, template=${PROVISION_TEMPLATE:-}). Response: $(printf '%s' "$CHILD_RESP" | sanitize_http_body)" + fi log " CHILD_ID=$CHILD_ID" else log "6/11 Canary mode — skipping child workspace" @@ -1416,6 +1530,135 @@ except Exception: fi fi +# ─── 10b. Pause/Resume + Hibernate/Resume lifecycle transitions ───────── +# Exercise the REAL workspace lifecycle state machine on the provisioned +# parent — the transitions that previously had only handler unit tests +# (handlers_additional_test.go / hibernation_test.go) and NO real-infra +# coverage. Each transition is asserted against the live DB-backed status the +# GET /workspaces/:id endpoint returns, so a regression in the Pause/Resume/ +# Hibernate handlers (workspace_restart.go) or their CP stop/re-provision +# wiring fails the gate instead of silently leaking an EC2 / wedging a tenant. +# +# Contract (workspace_restart.go): +# POST /pause online → 'paused' (container stopped, url cleared) {"status":"paused"} +# POST /resume paused → 'provisioning' → … → 'online' (re-provision) {"status":"provisioning"} +# POST /hibernate online → 'hibernating' → 'hibernated' (container stopped) {"status":"hibernated"} +# auto-wake next A2A message/send on a hibernated ws → online +# +# Gated to full MODE (smoke has no parent-stability budget) + E2E_LIFECYCLE. +# Runs LAST (after all read-only A2A/memory/peer checks) so the pause/stop +# cycles don't disturb the earlier assertions. Skips are LOUD (logged), and +# any broken transition hard-fails — never a silent pass. +if [ "$MODE" = "full" ] && [ "${E2E_LIFECYCLE:-auto}" != "off" ]; then + log "10b/11 Lifecycle transitions: pause→resume→online, hibernate→resume(wake) on parent $PARENT_ID..." + + lifecycle_status() { # echoes the live workspace status + tenant_call GET "/workspaces/$PARENT_ID" 2>/dev/null \ + | python3 -c "import json,sys; print(json.load(sys.stdin).get('status') or '')" 2>/dev/null || echo "" + } + # Bounded readiness-poll for a target status — same fail-closed shape as + # wait_workspaces_online_routable, but for an arbitrary terminal status. + wait_status() { # $1=target $2=timeout_secs $3=label + local target="$1" timeout="$2" label="$3" + local deadline cur last="" + deadline=$(( $(date +%s) + timeout )) + while true; do + cur=$(lifecycle_status) + if [ "$cur" != "$last" ]; then log " parent status → ${cur:-}"; last="$cur"; fi + [ "$cur" = "$target" ] && return 0 + if [ "$(date +%s)" -gt "$deadline" ]; then + log " [lifecycle] $label never reached '$target' within ${timeout}s (last='$cur')" + return 1 + fi + sleep 10 + done + } + + # ── pause → paused ── + PAUSE_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/pause" 2>/dev/null || echo '{}') + PAUSE_STATUS=$(echo "$PAUSE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "") + [ "$PAUSE_STATUS" = "paused" ] || fail "Pause: POST /pause returned status='$PAUSE_STATUS' (expected 'paused'). Body: ${PAUSE_RESP:0:200}" + # Poll the DB-backed status — the response body could lie; the GET proves the row. + wait_status "paused" 120 "pause" || fail "Pause: workspace $PARENT_ID never settled at status=paused (DB row) — Pause handler / CP stop regression (workspace_restart.go Pause)." + ok " pause → paused (DB-verified)" + + # ── resume → provisioning → online ── + RESUME_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/resume" 2>/dev/null || echo '{}') + RESUME_STATUS=$(echo "$RESUME_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "") + [ "$RESUME_STATUS" = "provisioning" ] || fail "Resume: POST /resume returned status='$RESUME_STATUS' (expected 'provisioning'). Body: ${RESUME_RESP:0:200}" + # Resume re-provisions from the preserved config volume; reuse the same + # online+routable readiness boundary the initial boot used (no fresh EC2 + # cold-start, but CP re-provision + heartbeat recovery can still take minutes). + wait_workspaces_online_routable " Waiting for parent to return online after resume (up to $((WORKSPACE_ONLINE_TIMEOUT_SECS/60)) min)..." "$PARENT_ID" + ok " resume → provisioning → online (DB-verified)" + + # ── hibernate → hibernated ── + HIB_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/hibernate?force=true" 2>/dev/null || echo '{}') + HIB_STATUS=$(echo "$HIB_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "") + [ "$HIB_STATUS" = "hibernated" ] || fail "Hibernate: POST /hibernate?force=true returned status='$HIB_STATUS' (expected 'hibernated'). Body: ${HIB_RESP:0:200}" + # The handler runs the claim→stop→'hibernated' sequence; poll the DB row to + # confirm it landed on 'hibernated' (not stuck mid-'hibernating'). + wait_status "hibernated" 120 "hibernate" || fail "Hibernate: workspace $PARENT_ID never settled at status=hibernated (DB row) — Hibernate handler / CP stop regression (workspace_restart.go HibernateWorkspace)." + ok " hibernate → hibernated (DB-verified)" + + # ── resume-from-hibernate via auto-wake on next A2A ── + # A hibernated workspace auto-wakes on the next incoming A2A message/send + # (no explicit /resume — Resume only handles status=paused). Send a wake + # A2A and assert the workspace returns to online. We accept transient cold + # 5xx during wake (same edge class the PONG probe tolerates) and poll the + # status to the online boundary rather than asserting on the single A2A code. + log " Hibernate auto-wake: sending A2A to wake hibernated parent..." + WAKE_PAYLOAD=$(python3 -c " +import json, uuid +print(json.dumps({ + 'jsonrpc': '2.0', + 'method': 'message/send', + 'id': 'e2e-wake-1', + 'params': { + 'message': { + 'role': 'user', + 'messageId': f'e2e-wake-{uuid.uuid4().hex[:8]}', + 'parts': [{'kind': 'text', 'text': 'This is the platform lifecycle smoke test waking a hibernated workspace. No tools or memory are needed — please respond with exactly the single token: WOKE'}] + } + } +})) +") + WAKE_TMP=$(mktemp -t wake_a2a.XXXXXX) + for WAKE_ATTEMPT in $(seq 1 12); do + : >"$WAKE_TMP" + set +e + WAKE_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \ + --max-time 90 \ + -H "Content-Type: application/json" \ + -d "$WAKE_PAYLOAD" \ + -o "$WAKE_TMP" -w '%{http_code}' 2>/dev/null) + WAKE_RC=$? + set -e + WAKE_CODE=${WAKE_CODE:-000} + if [ "$WAKE_RC" = "0" ] && [ "$WAKE_CODE" -ge 200 ] && [ "$WAKE_CODE" -lt 300 ]; then + break + fi + WAKE_SAFE_BODY=$(cat "$WAKE_TMP" 2>/dev/null | sanitize_http_body) + # Wake legitimately returns transient 5xx while the container restarts — + # retry that class only (bounded), never a 4xx. + if echo "$WAKE_CODE" | grep -Eq '^(502|503|504)$' && [ "$WAKE_ATTEMPT" -lt 12 ]; then + log " wake A2A cold/restart attempt $WAKE_ATTEMPT/12 returned $WAKE_CODE: ${WAKE_SAFE_BODY:0:120}" + sleep 15 + continue + fi + break + done + rm -f "$WAKE_TMP" + # The auto-wake contract is the STATUS transition (hibernated → online), not + # the A2A body content — assert the live DB row, the real readiness signal. + wait_status "online" "$WORKSPACE_ONLINE_TIMEOUT_SECS" "hibernate-wake" \ + || fail "Hibernate auto-wake: parent $PARENT_ID never returned to status=online after a wake A2A (last A2A http=$WAKE_CODE) — auto-wake-on-message regression (a hibernated ws must re-provision on the next A2A)." + ok " hibernate → online via auto-wake A2A (DB-verified)" + ok "Lifecycle transitions passed: pause→resume→online + hibernate→wake→online" +else + log "10b/11 Lifecycle transitions skipped (MODE=$MODE, E2E_LIFECYCLE=${E2E_LIFECYCLE:-auto}) — pause/resume/hibernate only run in full mode with E2E_LIFECYCLE!=off." +fi + # ─── 11. Teardown runs via trap ──────────────────────────────────────── # Fail-closed-on-skip: before declaring PASS, assert (when CI demanded a live # run) that every load-bearing lifecycle milestone actually fired. A run that -- 2.52.0