Merge pull request 'test(e2e): staging coverage for every runtime + resume/hibernate lifecycle' (#2296) from harden/staging-saas-all-runtimes into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 1s
CI / Detect changes (push) Successful in 7s
E2E API Smoke Test / detect-changes (push) Successful in 7s
E2E Chat / detect-changes (push) Successful in 7s
E2E Staging Reconciler (heals terminated EC2) / pr-validate (push) Successful in 8s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 14s
CI / Python Lint & Test (push) Successful in 24s
Block internal-flavored paths / Block forbidden paths (push) Successful in 27s
Handlers Postgres Integration / detect-changes (push) Successful in 4s
Lint forbidden tenant-env keys / Scan for repo-host token write into tenant workspace surface (push) Successful in 3s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (push) Successful in 4s
CI / Platform (Go) (push) Successful in 1s
CI / Canvas (Next.js) (push) Successful in 2s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (push) Successful in 34s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 35s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 13s
CI / Shellcheck (E2E scripts) (push) Successful in 12s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 53s
E2E Chat / E2E Chat (push) Successful in 9s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 9s
CI / Canvas Deploy Status (push) Successful in 1s
CI / all-required (push) Successful in 27s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (push) Successful in 1m11s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (push) Failing after 1m23s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 2m15s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 1m54s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m26s
publish-workspace-server-image / build-and-push (push) Successful in 5m19s
E2E Staging SaaS (full lifecycle) / E2E Staging Platform Boot (push) Failing after 5m33s
E2E Staging External Runtime / E2E Staging External Runtime (push) Successful in 7m13s
publish-workspace-server-image / Production auto-deploy (push) Successful in 2m39s
E2E Staging Reconciler (heals terminated EC2) / E2E Staging Reconciler (push) Failing after 17m26s
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 1s
CI / Detect changes (push) Successful in 7s
E2E API Smoke Test / detect-changes (push) Successful in 7s
E2E Chat / detect-changes (push) Successful in 7s
E2E Staging Reconciler (heals terminated EC2) / pr-validate (push) Successful in 8s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 14s
CI / Python Lint & Test (push) Successful in 24s
Block internal-flavored paths / Block forbidden paths (push) Successful in 27s
Handlers Postgres Integration / detect-changes (push) Successful in 4s
Lint forbidden tenant-env keys / Scan for repo-host token write into tenant workspace surface (push) Successful in 3s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (push) Successful in 4s
CI / Platform (Go) (push) Successful in 1s
CI / Canvas (Next.js) (push) Successful in 2s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (push) Successful in 34s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 35s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 13s
CI / Shellcheck (E2E scripts) (push) Successful in 12s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 53s
E2E Chat / E2E Chat (push) Successful in 9s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 9s
CI / Canvas Deploy Status (push) Successful in 1s
CI / all-required (push) Successful in 27s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (push) Successful in 1m11s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (push) Failing after 1m23s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 2m15s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 1m54s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m26s
publish-workspace-server-image / build-and-push (push) Successful in 5m19s
E2E Staging SaaS (full lifecycle) / E2E Staging Platform Boot (push) Failing after 5m33s
E2E Staging External Runtime / E2E Staging External Runtime (push) Successful in 7m13s
publish-workspace-server-image / Production auto-deploy (push) Successful in 2m39s
E2E Staging Reconciler (heals terminated EC2) / E2E Staging Reconciler (push) Failing after 17m26s
This commit was merged in pull request #2296.
This commit is contained in:
@@ -124,7 +124,12 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 45
|
||||
# Raised 45→75: step 10b now exercises pause→resume→online +
|
||||
# hibernate→wake→online, each of which RE-PROVISIONS the parent (CP
|
||||
# re-provision + heartbeat recovery, not a fresh EC2 cold start, but still
|
||||
# minutes). The base provision→online→A2A matrix fits in ~35 min; the two
|
||||
# extra lifecycle reprovisions need headroom under WORKSPACE_ONLINE_TIMEOUT.
|
||||
timeout-minutes: 75
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -184,6 +189,11 @@ jobs:
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'minimax:MiniMax-M2.7' }}
|
||||
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
# Lifecycle transitions (step 10b): pause→resume→online +
|
||||
# hibernate→wake→online on the provisioned parent. `auto` runs them in
|
||||
# full mode (this job). Set `off` to skip the ~2x-reprovision cost on an
|
||||
# ad-hoc dispatch. The timeout-minutes above is sized for this being on.
|
||||
E2E_LIFECYCLE: auto
|
||||
# Fail-closed-on-skip: in CI the harness MUST prove ≥1 full
|
||||
# provision→online→A2A cycle. If it reaches the end having validated
|
||||
# nothing (a future short-circuit / skip path), it exits 5 rather than
|
||||
|
||||
@@ -83,7 +83,17 @@ pick_model_slug() {
|
||||
fi
|
||||
case "$runtime" in
|
||||
hermes) printf 'openai/gpt-4o' ;;
|
||||
claude-code)
|
||||
# seo-agent is a claude-code-adapter template VARIANT selected by
|
||||
# template name (template="seo-agent"), not a distinct registry runtime
|
||||
# (it is absent from manifest.json + runtime_registry.go). Its config.yaml
|
||||
# declares `runtime: claude-code` and copies the claude-code `providers:`
|
||||
# block (providers.yaml:21 "The same block is copy-pasted into the seo-agent
|
||||
# template"), so its model dispatch is IDENTICAL to claude-code's: the
|
||||
# MiniMax BYOK colon id (the staging-default key path), else direct
|
||||
# Anthropic, else the OAuth `sonnet` alias. Sharing the claude-code branch
|
||||
# keeps the SSOT one place — a seo-agent run is just a claude-code run
|
||||
# behind a productized template skin.
|
||||
claude-code|seo-agent)
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
# Namespaced (colon) BYOK id, not bare "MiniMax-M2" (#2263 deploy-skew):
|
||||
# bare ids can lag the deployed staging ws-server's compiled registry,
|
||||
@@ -102,6 +112,20 @@ pick_model_slug() {
|
||||
printf 'sonnet'
|
||||
fi
|
||||
;;
|
||||
# google-adk: Gemini via two distinct provider arms in providers.yaml
|
||||
# runtimes.google-adk:
|
||||
# * platform arm → `platform:gemini-2.5-pro` (keyless Vertex via the CP
|
||||
# LLM proxy + server-side WIF mint; the org-compliant PROD path). This
|
||||
# id is selected via E2E_LLM_PATH=platform above, NOT here.
|
||||
# * google arm (AI Studio BYOK) → bare `gemini-2.5-pro` with the tenant's
|
||||
# own GOOGLE_API_KEY. This is the staging-exercisable path (no WIF
|
||||
# provisioning needed) and is what this branch selects.
|
||||
# The workflow may further override with E2E_MODEL_SLUG=google_genai:gemini-2.5-pro
|
||||
# (the adapter's provider:model spelling) — E2E_MODEL_SLUG wins at the top
|
||||
# of this function, so both forms are supported.
|
||||
google-adk)
|
||||
printf 'gemini-2.5-pro'
|
||||
;;
|
||||
*) printf 'openai/gpt-4o' ;; # safest fallback (matches hermes)
|
||||
esac
|
||||
}
|
||||
|
||||
@@ -57,6 +57,29 @@ assert_eq "claude-code + Anthropic API key → Anthropic API model" "$got" "clau
|
||||
got=$(unset E2E_MODEL_SLUG; E2E_MINIMAX_API_KEY="mx-priority" E2E_ANTHROPIC_API_KEY="sk-ant-loser" pick_model_slug claude-code)
|
||||
assert_eq "claude-code + both keys → MiniMax priority" "$got" "minimax:MiniMax-M2.7"
|
||||
|
||||
# ── seo-agent (claude-code-adapter template variant) ──
|
||||
# seo-agent shares the claude-code dispatch branch (it reuses the claude-code
|
||||
# adapter + the same copied providers block). Pin that it resolves IDENTICALLY
|
||||
# to claude-code for every key path so a future refactor can't accidentally
|
||||
# fork seo-agent's model selection from claude-code's.
|
||||
run_test "seo-agent → claude-code default alias" seo-agent "sonnet"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug seo-agent)
|
||||
assert_eq "seo-agent + MiniMax key → MiniMax model (==claude-code)" "$got" "minimax:MiniMax-M2.7"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug seo-agent)
|
||||
assert_eq "seo-agent + Anthropic key → Anthropic model (==claude-code)" "$got" "claude-sonnet-4-6"
|
||||
|
||||
# ── google-adk (Gemini) ──
|
||||
# AI-Studio BYOK arm → bare gemini-2.5-pro (providers.yaml runtimes.google-adk
|
||||
# `google` arm). The platform/Vertex arm is selected via E2E_LLM_PATH=platform
|
||||
# (a platform: id), not this dispatch. Pin the bare form so a drift to the
|
||||
# platform id (which would change billing/route) is caught.
|
||||
run_test "google-adk → AI-Studio bare gemini id" google-adk "gemini-2.5-pro"
|
||||
|
||||
got=$(E2E_MODEL_SLUG="google_genai:gemini-2.5-pro" pick_model_slug google-adk)
|
||||
assert_eq "google-adk + E2E_MODEL_SLUG override (adapter spelling)" "$got" "google_genai:gemini-2.5-pro"
|
||||
|
||||
# ── Fallback for unknown runtime ──
|
||||
# Picks slash-form (hermes-shaped) since hermes is the historical
|
||||
# default and most third-party runtimes behave hermes-like. Pinning
|
||||
|
||||
@@ -26,7 +26,26 @@
|
||||
# the workspace stuck on 'online' indefinitely.)
|
||||
#
|
||||
# Hibernation is intentionally NOT covered here — it has its own timing
|
||||
# model (idle threshold) and warrants a separate harness.
|
||||
# model (idle threshold) and warrants a separate harness. (The
|
||||
# pause→resume + hibernate→wake transitions for PLATFORM-compute runtimes
|
||||
# are covered by test_staging_full_saas.sh step 10b.)
|
||||
#
|
||||
# BYO meta-runtime arms (kimi, kimi-cli) — added 2026-06-05:
|
||||
# kimi and kimi-cli are BYO-compute meta-runtimes (isExternalLikeRuntime:
|
||||
# runtime_registry.go:141-147) that go through the SAME external/poll
|
||||
# provisioning path as `external` — create with external:true →
|
||||
# awaiting_agent, register → online — but with their runtime LABEL
|
||||
# PRESERVED (workspace.go:752-770 normalizeExternalRuntime keeps the
|
||||
# specific label, does NOT coerce to generic "external", so the canvas
|
||||
# shows the right runtime). They had ONLY validation/unit coverage and
|
||||
# were NEVER provisioned→online in any e2e. Step 9 adds, for EACH of
|
||||
# {kimi, kimi-cli}: create → assert awaiting_agent + label-preserved →
|
||||
# register(poll) → assert online + label-preserved → A2A → assert the
|
||||
# poll-mode {status:"queued"} envelope (a2a_proxy.go:462-477). The A2A
|
||||
# arm proves the a2a proxy routes a BYO meta-runtime to the poll queue
|
||||
# (200 + queued) rather than 404/500 — the meaningful round-trip for a
|
||||
# workspace with no standing live agent. A real BYO-agent COMPLETION
|
||||
# needs a standing kimi BYO cell (flagged for the CTO in the PR body).
|
||||
#
|
||||
# Required env (mirrors test_staging_full_saas.sh):
|
||||
# MOLECULE_CP_URL default: https://staging-api.moleculesai.app
|
||||
@@ -456,6 +475,108 @@ RECOVERED_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.lo
|
||||
ok "Re-register succeeded — awaiting_agent → online (operator-recoverable)"
|
||||
require_transition "re-register: awaiting_agent → online (recovery)"
|
||||
|
||||
# ─── 7b. BYO meta-runtime arms: kimi + kimi-cli ─────────────────────────
|
||||
# kimi and kimi-cli are BYO-compute meta-runtimes (isExternalLikeRuntime).
|
||||
# They share the external/poll provisioning path but PRESERVE their runtime
|
||||
# label (workspace.go normalizeExternalRuntime). They had no provision→online
|
||||
# e2e until now. For EACH: create(external:true, runtime=<rt>) → assert
|
||||
# awaiting_agent + label preserved → register(poll) → assert online + label
|
||||
# preserved → A2A → assert the poll-mode {status:"queued"} envelope.
|
||||
#
|
||||
# Why poll-mode {queued} is the A2A assertion (not a real completion): there
|
||||
# is no standing live BYO agent in staging, so the meaningful round-trip is
|
||||
# that the a2a proxy ROUTES a BYO meta-runtime to the poll queue (HTTP 200 +
|
||||
# {status:"queued", delivery_mode:"poll"}, a2a_proxy.go:462-477) instead of
|
||||
# 404/500. A real BYO-agent COMPLETION needs a standing kimi BYO cell — see
|
||||
# the CTO flag in the PR body.
|
||||
byo_meta_runtime_arm() { # $1 = runtime label (kimi | kimi-cli)
|
||||
local rt="$1"
|
||||
local resp wid status auth get_resp db_status reg_dm online_status
|
||||
log " [$rt] create (external:true, runtime=$rt)..."
|
||||
resp=$(tenant_call POST /workspaces \
|
||||
-d "$(printf '{"name":"ext-%s-e2e","runtime":"%s","external":true}' "$rt" "$rt")")
|
||||
wid=$(echo "$resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))")
|
||||
status=$(echo "$resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
auth=$(echo "$resp" | python3 -c "
|
||||
import json,sys
|
||||
try:
|
||||
d=json.load(sys.stdin); conn=d.get('connection') or {}
|
||||
print(conn.get('auth_token','') or d.get('auth_token',''))
|
||||
except Exception:
|
||||
print('')
|
||||
")
|
||||
[ -z "$wid" ] && fail "[$rt] create missing id: $resp"
|
||||
[ "$status" = "awaiting_agent" ] || fail "[$rt] create status='$status' (expected awaiting_agent — external/poll path)"
|
||||
[ -z "$auth" ] && fail "[$rt] create returned no workspace auth token — register impossible"
|
||||
|
||||
# Assert the runtime LABEL was preserved (NOT coerced to generic 'external').
|
||||
get_resp=$(tenant_call GET "/workspaces/$wid")
|
||||
db_status=$(echo "$get_resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
local db_runtime
|
||||
db_runtime=$(echo "$get_resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('runtime',''))")
|
||||
[ "$db_status" = "awaiting_agent" ] || fail "[$rt] DB row status=$db_status (expected awaiting_agent)"
|
||||
[ "$db_runtime" = "$rt" ] || fail "[$rt] runtime label coerced to '$db_runtime' (expected '$rt' — normalizeExternalRuntime must PRESERVE the BYO meta-runtime label, workspace.go:752-770)"
|
||||
ok " [$rt] create → awaiting_agent, runtime label preserved ✓"
|
||||
|
||||
# register(poll) → online. Reuse register_with_retry by setting WS_AUTH_TOKEN
|
||||
# (the helper reads it as a global). REGISTER_RESP is set by the helper.
|
||||
WS_AUTH_TOKEN="$auth"
|
||||
local body
|
||||
body=$(printf '{"id":"%s","url":"https://example.invalid:443","delivery_mode":"poll","agent_card":{"name":"e2e-%s","skills":[{"id":"echo","name":"Echo"}]}}' "$wid" "$rt")
|
||||
REGISTER_RESP=""
|
||||
register_with_retry "[$rt] register" "$body" \
|
||||
|| fail "[$rt] register returned non-200 after bounded retries — body: $(printf '%s' "$REGISTER_RESP" | sanitize_http_body | head -c 300)"
|
||||
online_status=$(tenant_call GET "/workspaces/$wid" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
[ "$online_status" = "online" ] || fail "[$rt] expected online after register, got $online_status"
|
||||
reg_dm=$(echo "$REGISTER_RESP" | head -n1 | python3 -c "import json,sys; print(json.load(sys.stdin).get('delivery_mode',''))" 2>/dev/null || echo "")
|
||||
[ "$reg_dm" = "poll" ] || fail "[$rt] register response delivery_mode='$reg_dm' (expected poll)"
|
||||
ok " [$rt] register → online (delivery_mode=poll) ✓"
|
||||
|
||||
# A2A → assert poll-mode {status:"queued"} envelope. Bounded retry on the
|
||||
# transient cold-edge 5xx class; a 4xx/non-queued 2xx is a real bug.
|
||||
local a2a_payload a2a_tmp a2a_code a2a_rc a2a_status attempt
|
||||
a2a_payload=$(python3 -c "
|
||||
import json, uuid
|
||||
print(json.dumps({
|
||||
'jsonrpc':'2.0','method':'message/send','id':'e2e-byo-1',
|
||||
'params':{'message':{'role':'user','messageId':f'e2e-{uuid.uuid4().hex[:8]}',
|
||||
'parts':[{'kind':'text','text':'BYO meta-runtime poll-route smoke. Respond: OK'}]}}
|
||||
}))
|
||||
")
|
||||
a2a_tmp=$(mktemp -t byo_a2a.XXXXXX)
|
||||
for attempt in $(seq 1 8); do
|
||||
: >"$a2a_tmp"
|
||||
set +e
|
||||
a2a_code=$(curl -sS --max-time 60 -X POST "$TENANT_URL/workspaces/$wid/a2a" \
|
||||
-H "Authorization: Bearer $TENANT_TOKEN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$a2a_payload" -o "$a2a_tmp" -w '%{http_code}' 2>/dev/null)
|
||||
a2a_rc=$?
|
||||
set -e
|
||||
a2a_code=${a2a_code:-000}
|
||||
if [ "$a2a_rc" = "0" ] && [ "$a2a_code" = "200" ]; then break; fi
|
||||
if echo "$a2a_code" | grep -Eq '^(502|503|504)$' && [ "$attempt" -lt 8 ]; then
|
||||
log " [$rt] A2A transient $a2a_code attempt $attempt/8"; sleep 10; continue
|
||||
fi
|
||||
break
|
||||
done
|
||||
a2a_status=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('status',''))" "$a2a_tmp" 2>/dev/null || echo "")
|
||||
local a2a_dm
|
||||
a2a_dm=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('delivery_mode',''))" "$a2a_tmp" 2>/dev/null || echo "")
|
||||
rm -f "$a2a_tmp"
|
||||
[ "$a2a_rc" = "0" ] && [ "$a2a_code" = "200" ] \
|
||||
|| fail "[$rt] A2A POST failed (rc=$a2a_rc, http=$a2a_code) — a BYO meta-runtime poll-mode A2A must 200 with a queued envelope, not error"
|
||||
[ "$a2a_status" = "queued" ] && [ "$a2a_dm" = "poll" ] \
|
||||
|| fail "[$rt] A2A returned status='$a2a_status' delivery_mode='$a2a_dm' (expected queued/poll — a2a proxy must route a BYO meta-runtime to the poll queue, a2a_proxy.go:462-477)"
|
||||
ok " [$rt] A2A → poll-mode queued envelope ✓ (provision→online→A2A proven for $rt)"
|
||||
}
|
||||
|
||||
log "7c/8 BYO meta-runtime arms (kimi, kimi-cli) — provision→online→A2A..."
|
||||
byo_meta_runtime_arm "kimi"
|
||||
byo_meta_runtime_arm "kimi-cli"
|
||||
ok "BYO meta-runtime arms passed for kimi + kimi-cli"
|
||||
|
||||
# ─── 8. Done — cleanup runs in the EXIT trap ───────────────────────────
|
||||
# REQUIRE_LIVE belt-and-braces: assert here too (in addition to the EXIT
|
||||
# trap) so the failure surfaces in step order, not only post-teardown.
|
||||
|
||||
@@ -24,6 +24,19 @@
|
||||
#
|
||||
# Optional env:
|
||||
# E2E_RUNTIME hermes (default) | claude-code | codex | openclaw
|
||||
# | seo-agent | google-adk
|
||||
# - seo-agent: a claude-code-adapter template
|
||||
# VARIANT (not a distinct registry runtime).
|
||||
# Selected via the `template` field (config.yaml
|
||||
# resolves runtime=claude-code); reuses the
|
||||
# same MiniMax/claude-code key path. See the
|
||||
# TEMPLATE derivation + SECRETS_JSON block.
|
||||
# - google-adk: Gemini. The AI-Studio-keyed BYOK
|
||||
# path (E2E_GOOGLE_API_KEY) is staging-
|
||||
# exercisable here; the keyless Vertex PROD
|
||||
# path needs WIF (see header note + the CTO
|
||||
# flag in the PR body) and is selected via
|
||||
# E2E_LLM_PATH=platform + a platform: model.
|
||||
# E2E_PROVISION_TIMEOUT_SECS default 900 (15 min cold EC2 budget)
|
||||
# E2E_WORKSPACE_ONLINE_TIMEOUT_SECS default 3600 (60 min — hermes
|
||||
# cold-boot worst-case + slack). Raised from
|
||||
@@ -47,6 +60,18 @@
|
||||
# tear down cleanly (and exit 4 on leak).
|
||||
# Used by a dedicated sanity workflow
|
||||
# that verifies the safety net.
|
||||
# E2E_LIFECYCLE auto (default) | off
|
||||
# When auto + MODE=full, exercises the
|
||||
# pause→resume→online and hibernate→resume(wake)
|
||||
# state transitions on the provisioned parent
|
||||
# (step 10b). These are REAL transitions on the
|
||||
# live tenant (Pause stops the container + sets
|
||||
# status=paused; Resume re-provisions →
|
||||
# provisioning → online; Hibernate stops +
|
||||
# status=hibernated; the next A2A auto-wakes it).
|
||||
# Set `off` for a fast smoke that skips the
|
||||
# ~2x-reprovision cost. In smoke MODE it is
|
||||
# skipped regardless (no parent stability budget).
|
||||
# E2E_REQUIRE_LIVE 1 → fail-closed-on-skip guard (CI sets this).
|
||||
# When set, the run MUST actually complete
|
||||
# ≥1 full provision→online→A2A cycle. A run
|
||||
@@ -592,6 +617,24 @@ print(json.dumps({
|
||||
'ANTHROPIC_API_KEY': k,
|
||||
}))
|
||||
")
|
||||
elif [ -n "${E2E_GOOGLE_API_KEY:-}" ]; then
|
||||
# google-adk AI-Studio BYOK path. The `google` provider entry
|
||||
# (providers.yaml:401-413) reads GEMINI_API_KEY / GOOGLE_API_KEY and dials
|
||||
# generativelanguage.googleapis.com — the tenant's OWN key, distinct from the
|
||||
# keyless-Vertex PROD path (which routes through the CP proxy + server-side
|
||||
# WIF and carries NO tenant credential). This branch exercises google-adk
|
||||
# being PROVISIONED AT ALL on staging; the Vertex-specific WIF path is flagged
|
||||
# for the CTO (needs extra provisioning) and is NOT reachable here. Inject
|
||||
# under both env names the provider accepts so the adapter resolves regardless
|
||||
# of which one it reads first.
|
||||
SECRETS_JSON=$(python3 -c "
|
||||
import json, os
|
||||
k = os.environ['E2E_GOOGLE_API_KEY']
|
||||
print(json.dumps({
|
||||
'GOOGLE_API_KEY': k,
|
||||
'GEMINI_API_KEY': k,
|
||||
}))
|
||||
")
|
||||
elif [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "
|
||||
import json, os
|
||||
@@ -611,11 +654,79 @@ fi
|
||||
MODEL_SLUG=$(pick_model_slug "$RUNTIME")
|
||||
log " MODEL_SLUG=$MODEL_SLUG"
|
||||
|
||||
log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..."
|
||||
# ─── runtime → provision-selector resolution ────────────────────────────
|
||||
# Most runtimes are selected directly by the `runtime` field. seo-agent is
|
||||
# the exception: it is NOT a registry runtime (absent from manifest.json +
|
||||
# runtime_registry.go knownRuntimes) — it is a claude-code-adapter template
|
||||
# VARIANT selected by the `template` field. The ws-server Create handler reads
|
||||
# the template's config.yaml, which declares `runtime: claude-code`, and
|
||||
# resolves the concrete runtime from there (workspace.go:290-336). So for
|
||||
# seo-agent we send template="seo-agent" and OMIT runtime, letting the
|
||||
# template resolve it — sending an explicit runtime="seo-agent" would
|
||||
# RUNTIME_UNSUPPORTED-422 at workspace.go:374-384 because it is not in
|
||||
# knownRuntimes. PROVISION_TEMPLATE is "" for every real registry runtime.
|
||||
PROVISION_TEMPLATE=""
|
||||
case "$RUNTIME" in
|
||||
seo-agent) PROVISION_TEMPLATE="seo-agent" ;;
|
||||
esac
|
||||
|
||||
# Build the create payload in Python so the optional `template`/`runtime`
|
||||
# fields are emitted conditionally and the secrets blob is embedded without
|
||||
# shell-escaping hazards. Args: name, [parent_id|""].
|
||||
build_create_payload() {
|
||||
local name="$1" parent_id="${2:-}"
|
||||
E2E_WS_NAME="$name" \
|
||||
E2E_WS_PARENT_ID="$parent_id" \
|
||||
E2E_WS_RUNTIME="$RUNTIME" \
|
||||
E2E_WS_TEMPLATE="$PROVISION_TEMPLATE" \
|
||||
E2E_WS_MODEL="$MODEL_SLUG" \
|
||||
E2E_WS_SECRETS="$SECRETS_JSON" \
|
||||
python3 -c "
|
||||
import json, os
|
||||
secrets = json.loads(os.environ['E2E_WS_SECRETS'] or '{}')
|
||||
payload = {
|
||||
'name': os.environ['E2E_WS_NAME'],
|
||||
'tier': 2,
|
||||
'model': os.environ['E2E_WS_MODEL'],
|
||||
'secrets': secrets,
|
||||
}
|
||||
tmpl = os.environ.get('E2E_WS_TEMPLATE', '')
|
||||
if tmpl:
|
||||
# Template-selected variant (seo-agent): the template's config.yaml
|
||||
# resolves runtime=claude-code server-side. Do NOT also send an explicit
|
||||
# runtime — seo-agent is not a registry runtime and would 422.
|
||||
payload['template'] = tmpl
|
||||
else:
|
||||
payload['runtime'] = os.environ['E2E_WS_RUNTIME']
|
||||
pid = os.environ.get('E2E_WS_PARENT_ID', '')
|
||||
if pid:
|
||||
payload['parent_id'] = pid
|
||||
print(json.dumps(payload))
|
||||
"
|
||||
}
|
||||
|
||||
if [ -n "$PROVISION_TEMPLATE" ]; then
|
||||
log "5/11 Provisioning parent workspace (runtime=$RUNTIME via template=$PROVISION_TEMPLATE → claude-code adapter)..."
|
||||
else
|
||||
log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..."
|
||||
fi
|
||||
PARENT_RESP=$(tenant_call POST /workspaces \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"E2E Parent\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"$MODEL_SLUG\",\"secrets\":$SECRETS_JSON}")
|
||||
PARENT_ID=$(echo "$PARENT_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])")
|
||||
-d "$(build_create_payload 'E2E Parent')")
|
||||
# Surface the workspace-create error CLEARLY instead of dying on a Python
|
||||
# KeyError when the response has no 'id'. The load-bearing cases this names:
|
||||
# - google-adk: RUNTIME_UNSUPPORTED 422 if google-adk is absent from the
|
||||
# deployed manifest.json's workspace_templates (the Create-handler
|
||||
# allowlist is manifest-derived — runtime_registry.go). google-adk is in
|
||||
# providers.yaml + provisioner/registry.go + registry_gen but NOT (yet) in
|
||||
# manifest.json, so it cannot be provisioned by `runtime` until the
|
||||
# manifest gains it. Flagged for the CTO — this arm REDS until then.
|
||||
# - seo-agent: an "invalid template" 400 if the seo-agent template isn't
|
||||
# present in the tenant's configs/cache dir (template-cache refresh gap).
|
||||
PARENT_ID=$(echo "$PARENT_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
|
||||
if [ -z "$PARENT_ID" ]; then
|
||||
fail "Parent workspace create returned no 'id' (runtime=$RUNTIME, template=${PROVISION_TEMPLATE:-<none>}). Response: $(printf '%s' "$PARENT_RESP" | sanitize_http_body)"
|
||||
fi
|
||||
log " PARENT_ID=$PARENT_ID"
|
||||
|
||||
# ─── 6. Provision child (full mode only) ────────────────────────────────
|
||||
@@ -624,8 +735,11 @@ if [ "$MODE" = "full" ]; then
|
||||
log "6/11 Provisioning child workspace..."
|
||||
CHILD_RESP=$(tenant_call POST /workspaces \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"E2E Child\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"$MODEL_SLUG\",\"parent_id\":\"$PARENT_ID\",\"secrets\":$SECRETS_JSON}")
|
||||
CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])")
|
||||
-d "$(build_create_payload 'E2E Child' "$PARENT_ID")")
|
||||
CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
|
||||
if [ -z "$CHILD_ID" ]; then
|
||||
fail "Child workspace create returned no 'id' (runtime=$RUNTIME, template=${PROVISION_TEMPLATE:-<none>}). Response: $(printf '%s' "$CHILD_RESP" | sanitize_http_body)"
|
||||
fi
|
||||
log " CHILD_ID=$CHILD_ID"
|
||||
else
|
||||
log "6/11 Canary mode — skipping child workspace"
|
||||
@@ -1416,6 +1530,135 @@ except Exception:
|
||||
fi
|
||||
fi
|
||||
|
||||
# ─── 10b. Pause/Resume + Hibernate/Resume lifecycle transitions ─────────
|
||||
# Exercise the REAL workspace lifecycle state machine on the provisioned
|
||||
# parent — the transitions that previously had only handler unit tests
|
||||
# (handlers_additional_test.go / hibernation_test.go) and NO real-infra
|
||||
# coverage. Each transition is asserted against the live DB-backed status the
|
||||
# GET /workspaces/:id endpoint returns, so a regression in the Pause/Resume/
|
||||
# Hibernate handlers (workspace_restart.go) or their CP stop/re-provision
|
||||
# wiring fails the gate instead of silently leaking an EC2 / wedging a tenant.
|
||||
#
|
||||
# Contract (workspace_restart.go):
|
||||
# POST /pause online → 'paused' (container stopped, url cleared) {"status":"paused"}
|
||||
# POST /resume paused → 'provisioning' → … → 'online' (re-provision) {"status":"provisioning"}
|
||||
# POST /hibernate online → 'hibernating' → 'hibernated' (container stopped) {"status":"hibernated"}
|
||||
# auto-wake next A2A message/send on a hibernated ws → online
|
||||
#
|
||||
# Gated to full MODE (smoke has no parent-stability budget) + E2E_LIFECYCLE.
|
||||
# Runs LAST (after all read-only A2A/memory/peer checks) so the pause/stop
|
||||
# cycles don't disturb the earlier assertions. Skips are LOUD (logged), and
|
||||
# any broken transition hard-fails — never a silent pass.
|
||||
if [ "$MODE" = "full" ] && [ "${E2E_LIFECYCLE:-auto}" != "off" ]; then
|
||||
log "10b/11 Lifecycle transitions: pause→resume→online, hibernate→resume(wake) on parent $PARENT_ID..."
|
||||
|
||||
lifecycle_status() { # echoes the live workspace status
|
||||
tenant_call GET "/workspaces/$PARENT_ID" 2>/dev/null \
|
||||
| python3 -c "import json,sys; print(json.load(sys.stdin).get('status') or '')" 2>/dev/null || echo ""
|
||||
}
|
||||
# Bounded readiness-poll for a target status — same fail-closed shape as
|
||||
# wait_workspaces_online_routable, but for an arbitrary terminal status.
|
||||
wait_status() { # $1=target $2=timeout_secs $3=label
|
||||
local target="$1" timeout="$2" label="$3"
|
||||
local deadline cur last=""
|
||||
deadline=$(( $(date +%s) + timeout ))
|
||||
while true; do
|
||||
cur=$(lifecycle_status)
|
||||
if [ "$cur" != "$last" ]; then log " parent status → ${cur:-<empty>}"; last="$cur"; fi
|
||||
[ "$cur" = "$target" ] && return 0
|
||||
if [ "$(date +%s)" -gt "$deadline" ]; then
|
||||
log " [lifecycle] $label never reached '$target' within ${timeout}s (last='$cur')"
|
||||
return 1
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
}
|
||||
|
||||
# ── pause → paused ──
|
||||
PAUSE_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/pause" 2>/dev/null || echo '{}')
|
||||
PAUSE_STATUS=$(echo "$PAUSE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
|
||||
[ "$PAUSE_STATUS" = "paused" ] || fail "Pause: POST /pause returned status='$PAUSE_STATUS' (expected 'paused'). Body: ${PAUSE_RESP:0:200}"
|
||||
# Poll the DB-backed status — the response body could lie; the GET proves the row.
|
||||
wait_status "paused" 120 "pause" || fail "Pause: workspace $PARENT_ID never settled at status=paused (DB row) — Pause handler / CP stop regression (workspace_restart.go Pause)."
|
||||
ok " pause → paused (DB-verified)"
|
||||
|
||||
# ── resume → provisioning → online ──
|
||||
RESUME_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/resume" 2>/dev/null || echo '{}')
|
||||
RESUME_STATUS=$(echo "$RESUME_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
|
||||
[ "$RESUME_STATUS" = "provisioning" ] || fail "Resume: POST /resume returned status='$RESUME_STATUS' (expected 'provisioning'). Body: ${RESUME_RESP:0:200}"
|
||||
# Resume re-provisions from the preserved config volume; reuse the same
|
||||
# online+routable readiness boundary the initial boot used (no fresh EC2
|
||||
# cold-start, but CP re-provision + heartbeat recovery can still take minutes).
|
||||
wait_workspaces_online_routable " Waiting for parent to return online after resume (up to $((WORKSPACE_ONLINE_TIMEOUT_SECS/60)) min)..." "$PARENT_ID"
|
||||
ok " resume → provisioning → online (DB-verified)"
|
||||
|
||||
# ── hibernate → hibernated ──
|
||||
HIB_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/hibernate?force=true" 2>/dev/null || echo '{}')
|
||||
HIB_STATUS=$(echo "$HIB_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
|
||||
[ "$HIB_STATUS" = "hibernated" ] || fail "Hibernate: POST /hibernate?force=true returned status='$HIB_STATUS' (expected 'hibernated'). Body: ${HIB_RESP:0:200}"
|
||||
# The handler runs the claim→stop→'hibernated' sequence; poll the DB row to
|
||||
# confirm it landed on 'hibernated' (not stuck mid-'hibernating').
|
||||
wait_status "hibernated" 120 "hibernate" || fail "Hibernate: workspace $PARENT_ID never settled at status=hibernated (DB row) — Hibernate handler / CP stop regression (workspace_restart.go HibernateWorkspace)."
|
||||
ok " hibernate → hibernated (DB-verified)"
|
||||
|
||||
# ── resume-from-hibernate via auto-wake on next A2A ──
|
||||
# A hibernated workspace auto-wakes on the next incoming A2A message/send
|
||||
# (no explicit /resume — Resume only handles status=paused). Send a wake
|
||||
# A2A and assert the workspace returns to online. We accept transient cold
|
||||
# 5xx during wake (same edge class the PONG probe tolerates) and poll the
|
||||
# status to the online boundary rather than asserting on the single A2A code.
|
||||
log " Hibernate auto-wake: sending A2A to wake hibernated parent..."
|
||||
WAKE_PAYLOAD=$(python3 -c "
|
||||
import json, uuid
|
||||
print(json.dumps({
|
||||
'jsonrpc': '2.0',
|
||||
'method': 'message/send',
|
||||
'id': 'e2e-wake-1',
|
||||
'params': {
|
||||
'message': {
|
||||
'role': 'user',
|
||||
'messageId': f'e2e-wake-{uuid.uuid4().hex[:8]}',
|
||||
'parts': [{'kind': 'text', 'text': 'This is the platform lifecycle smoke test waking a hibernated workspace. No tools or memory are needed — please respond with exactly the single token: WOKE'}]
|
||||
}
|
||||
}
|
||||
}))
|
||||
")
|
||||
WAKE_TMP=$(mktemp -t wake_a2a.XXXXXX)
|
||||
for WAKE_ATTEMPT in $(seq 1 12); do
|
||||
: >"$WAKE_TMP"
|
||||
set +e
|
||||
WAKE_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
|
||||
--max-time 90 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$WAKE_PAYLOAD" \
|
||||
-o "$WAKE_TMP" -w '%{http_code}' 2>/dev/null)
|
||||
WAKE_RC=$?
|
||||
set -e
|
||||
WAKE_CODE=${WAKE_CODE:-000}
|
||||
if [ "$WAKE_RC" = "0" ] && [ "$WAKE_CODE" -ge 200 ] && [ "$WAKE_CODE" -lt 300 ]; then
|
||||
break
|
||||
fi
|
||||
WAKE_SAFE_BODY=$(cat "$WAKE_TMP" 2>/dev/null | sanitize_http_body)
|
||||
# Wake legitimately returns transient 5xx while the container restarts —
|
||||
# retry that class only (bounded), never a 4xx.
|
||||
if echo "$WAKE_CODE" | grep -Eq '^(502|503|504)$' && [ "$WAKE_ATTEMPT" -lt 12 ]; then
|
||||
log " wake A2A cold/restart attempt $WAKE_ATTEMPT/12 returned $WAKE_CODE: ${WAKE_SAFE_BODY:0:120}"
|
||||
sleep 15
|
||||
continue
|
||||
fi
|
||||
break
|
||||
done
|
||||
rm -f "$WAKE_TMP"
|
||||
# The auto-wake contract is the STATUS transition (hibernated → online), not
|
||||
# the A2A body content — assert the live DB row, the real readiness signal.
|
||||
wait_status "online" "$WORKSPACE_ONLINE_TIMEOUT_SECS" "hibernate-wake" \
|
||||
|| fail "Hibernate auto-wake: parent $PARENT_ID never returned to status=online after a wake A2A (last A2A http=$WAKE_CODE) — auto-wake-on-message regression (a hibernated ws must re-provision on the next A2A)."
|
||||
ok " hibernate → online via auto-wake A2A (DB-verified)"
|
||||
ok "Lifecycle transitions passed: pause→resume→online + hibernate→wake→online"
|
||||
else
|
||||
log "10b/11 Lifecycle transitions skipped (MODE=$MODE, E2E_LIFECYCLE=${E2E_LIFECYCLE:-auto}) — pause/resume/hibernate only run in full mode with E2E_LIFECYCLE!=off."
|
||||
fi
|
||||
|
||||
# ─── 11. Teardown runs via trap ────────────────────────────────────────
|
||||
# Fail-closed-on-skip: before declaring PASS, assert (when CI demanded a live
|
||||
# run) that every load-bearing lifecycle milestone actually fired. A run that
|
||||
|
||||
Reference in New Issue
Block a user