2026-06-05 07:59:31 +00:00
7 changed files with 684 additions and 66 deletions
@@ -372,6 +372,17 @@ jobs:
          # staging gate report green without a real provision→online→A2A
          # cycle goes red on every PR.
          bash tests/e2e/test_require_live_guard_unit.sh
+          # harden/enforce-ci-gates-core-v2 (PR #2286): fail-direction proof
+          # for the E2E_REQUIRE_LIVE zero-validated gate in
+          # test_priority_runtimes_e2e.sh (the REQUIRED `E2E API Smoke Test`).
+          # Offline (no LLM/network/provisioning): sources that script under
+          # its unit source-guard and drives the REAL evaluate_require_live_gate
+          # — asserts REQUIRE_LIVE=1 + zero validated → RED (the false-green
+          # trap), REQUIRE_LIVE=1 + >=1 validated → GREEN, and REQUIRE_LIVE
+          # unset + zero validated → GREEN (loud skip). CI can't provision a
+          # live arm to prove this, so this unit test IS the regression gate:
+          # a revert of the zero-validated→RED logic goes red on every PR.
+          bash tests/e2e/test_require_live_priority_gate_unit.sh

      - if: ${{ needs.changes.outputs.scripts == 'true' }}
        name: Test ECR promote-tenant-image script (mock-driven, no live infra)
@@ -272,6 +272,24 @@ jobs:
          echo "::error::Redis did not become ready in 15s"
          docker logs "$REDIS_CONTAINER" || true
          exit 1
+      - name: Set deterministic admin token for the e2e platform
+        if: needs.detect-changes.outputs.api == 'true'
+        run: |
+          # AdminAuth (workspace-server/internal/middleware/wsauth_middleware.go:164)
+          # reads ADMIN_TOKEN. Setting it (a) closes isDevModeFailOpen (devmode.go:50
+          # returns false when ADMIN_TOKEN is non-empty), so admin routes require a
+          # bearer, and (b) makes Tier-2b accept a bearer that constant-time-equals
+          # ADMIN_TOKEN. The platform process inherits ADMIN_TOKEN from $GITHUB_ENV.
+          #
+          # MOLECULE_ADMIN_TOKEN is the var the e2e scripts send as the bearer
+          # (tests/e2e/_lib.sh:33 e2e_mint_workspace_token, and the run_mock
+          # org-import curl). Set BOTH to the SAME value so the bearer the test
+          # sends == the secret the platform checks. Deterministic test value;
+          # this platform is ephemeral, single-run, and never reachable off-host.
+          E2E_ADMIN_TOKEN="e2e-api-admin-${{ github.run_id }}-${{ github.run_attempt }}"
+          echo "ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
+          echo "MOLECULE_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
+          echo "Admin token configured for the e2e platform (ADMIN_TOKEN + MOLECULE_ADMIN_TOKEN)."
      - name: Build platform
        if: needs.detect-changes.outputs.api == 'true'
        working-directory: workspace-server
@@ -397,8 +415,33 @@ jobs:
      - name: Run notify-with-attachments E2E
        if: needs.detect-changes.outputs.api == 'true'
        run: bash tests/e2e/test_notify_attachments_e2e.sh
-      - name: Run priority-runtimes E2E (claude-code + hermes — skips when keys absent)
+      - name: "Run priority-runtimes E2E (REQUIRE-LIVE: mock validates the runtime plumbing end-to-end)"
+        # E2E_REQUIRE_LIVE=1 is ON: the run MUST validate >=1 runtime end-to-end
+        # or it exits NON-zero (RED). This is now SAFE because the `mock` arm can
+        # actually provision in CI: the only blocker was that POST /org/import and
+        # POST /admin/workspaces/:id/tokens are AdminAuth-gated
+        # (router.go:778 + :427) and this job previously configured NO admin token,
+        # so every admin call 401'd ("admin auth required"). The "Set deterministic
+        # admin token" step above now sets ADMIN_TOKEN on the platform AND exports
+        # the matching MOLECULE_ADMIN_TOKEN the e2e scripts send as the bearer, so
+        # the mock arm can org-import → online → mint token → canned A2A reply →
+        # validated(). That guarantees VALIDATED>=1 on a healthy platform, so the
+        # REQUIRED `E2E API Smoke Test` gate now HONESTLY validates a runtime
+        # end-to-end; if the mock plumbing (DB insert, status flip, A2A proxy,
+        # activity logging, or the admin-auth wiring) genuinely breaks, the gate
+        # goes RED instead of false-green. The zero-validated→RED decision is also
+        # regression-gated WITHOUT provisioning by the bash unit test
+        # tests/e2e/test_require_live_priority_gate_unit.sh (wired into ci.yml's
+        # "Run E2E bash unit tests" job), so a revert of that logic still fails CI.
+        #
+        # MiniMax stays an OPPORTUNISTIC best-effort arm: create is registry-fragile
+        # in CI (422 UNREGISTERED_MODEL_FOR_RUNTIME), so a miss is reported via
+        # bestfail() and never reds the gate — mock carries the required validation,
+        # MiniMax is a bonus real-LLM check when it comes up. ZERO new credentials.
        if: needs.detect-changes.outputs.api == 'true'
+        env:
+          E2E_REQUIRE_LIVE: '1'
+          E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
        run: bash tests/e2e/test_priority_runtimes_e2e.sh
      - name: Install standalone runtime parser from Gitea registry
        if: needs.detect-changes.outputs.api == 'true'
@@ -17,6 +17,33 @@ e2e_extract_token() {
  python3 "$(dirname "${BASH_SOURCE[0]}")/_extract_token.py"
 }

+# Populate a curl-args array with the platform admin bearer, IF one is set.
+#
+# AdminAuth (workspace-server/internal/middleware/wsauth_middleware.go:161)
+# fail-opens ONLY while ADMIN_TOKEN is unset AND no workspace token exists yet
+# (devmode.go:50). The e2e-api CI job now sets ADMIN_TOKEN on the platform and
+# exports the matching MOLECULE_ADMIN_TOKEN here, which flips fail-open OFF — so
+# every admin-gated route (GET/POST/DELETE /workspaces, /events, /bundles,
+# /org/import, …) now requires the EXACT ADMIN_TOKEN as bearer (Tier-2b rejects
+# workspace bearers, wsauth_middleware.go:250). Helpers that hit admin routes
+# (e2e_cleanup_all_workspaces, e2e_delete_workspace's default path) must send it.
+#
+# Guarded if-set so a bootstrap/dev platform with no admin token (fail-open)
+# still works with zero auth. Mirrors e2e_mint_workspace_token's admin_auth.
+#
+# Usage:
+#   local admin_auth=(); e2e_admin_auth_args admin_auth
+#   curl -s "$BASE/workspaces" ${admin_auth[@]+"${admin_auth[@]}"}
+e2e_admin_auth_args() {
+  local _outname="$1"
+  local _bearer="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
+  if [ -n "$_bearer" ]; then
+    eval "$_outname=(-H \"Authorization: Bearer \$_bearer\")"
+  else
+    eval "$_outname=()"
+  fi
+}
+
 # Delete every workspace currently on the platform. Use at the top of a
 # script so count-based assertions are reproducible across runs.
 # Mint a fresh workspace auth token via the real admin endpoint.
@@ -53,19 +80,38 @@ e2e_delete_workspace() {
  if [ -z "$wid" ]; then
    return 0
  fi
+  # DELETE /workspaces/:id and GET /workspaces/:id-for-name are both behind
+  # AdminAuth (router.go:155 GET single is public, but List/Delete are gated at
+  # router.go:165-167). Callers that already pass a per-workspace bearer (e.g.
+  # test_api.sh's NEW_TOKEN) authenticate themselves; the cleanup-trap callers
+  # in poll-mode/notify/priority pass NO curl args and rely on this fallback to
+  # the platform admin bearer so the DELETE doesn't 401 once ADMIN_TOKEN is set.
+  if [ "${#curl_args[@]}" -eq 0 ]; then
+    e2e_admin_auth_args curl_args
+  fi
+  # ${curl_args[@]+"…"} guard: under `set -u` an empty array expands to an
+  # "unbound variable" error on bash <4.4 (macOS 3.2, some Linux). This form
+  # expands to nothing when the array is empty. Callers from the priority-
+  # runtimes EXIT trap pass no extra curl args, so the array IS empty there —
+  # without the guard the trap aborts non-zero AFTER the gate already passed,
+  # turning a validated run RED. (Same idiom already used for CREATED_WSIDS.)
  if [ -z "$name" ]; then
-    name=$(curl -s "$BASE/workspaces/$wid" "${curl_args[@]}" | python3 -c "import json,sys
+    name=$(curl -s "$BASE/workspaces/$wid" ${curl_args[@]+"${curl_args[@]}"} | python3 -c "import json,sys
 try:
  print(json.load(sys.stdin).get('name',''))
 except Exception:
  pass" 2>/dev/null || true)
  fi
  curl -s -X DELETE "$BASE/workspaces/$wid?confirm=true" \
-    -H "X-Confirm-Name: $name" "${curl_args[@]}" > /dev/null || true
+    -H "X-Confirm-Name: $name" ${curl_args[@]+"${curl_args[@]}"} > /dev/null || true
 }

 e2e_cleanup_all_workspaces() {
-  curl -s "$BASE/workspaces" | python3 -c "import json,sys
+  # GET /workspaces (list) is AdminAuth-gated (router.go:165). Send the platform
+  # admin bearer if one is set so the list doesn't 401 → empty → no cleanup.
+  local _admin_auth=()
+  e2e_admin_auth_args _admin_auth
+  curl -s "$BASE/workspaces" ${_admin_auth[@]+"${_admin_auth[@]}"} | python3 -c "import json,sys
 try:
  [print(f\"{w.get('id','')}\\t{w.get('name','')}\") for w in json.load(sys.stdin)]
 except Exception:
@@ -15,18 +15,27 @@ SUM_AUTH=()
 ECHO_URL="https://example.com/echo-agent"
 SUM_URL="https://example.com/summarizer-agent"

-# AdminAuth-gated calls need a bearer token once any workspace token
-# exists in the DB. ADMIN_TOKEN is populated after the first workspace
-# create + real token mint. acurl = "authenticated curl".
-ADMIN_TOKEN=""
+# AdminAuth-gated calls (GET/POST/DELETE /workspaces, /events, /bundles)
+# require the platform admin bearer once ADMIN_TOKEN is set on the server.
+# Tier-2b (wsauth_middleware.go:250) REJECTS workspace bearer tokens on admin
+# routes when ADMIN_TOKEN is set, so admin calls MUST send the exact ADMIN_TOKEN
+# value — which the e2e-api CI job exports here as MOLECULE_ADMIN_TOKEN. acurl =
+# "admin curl": it always sends the platform admin bearer (if one is set).
+#
+# Guarded if-set: a fresh self-hosted/dev platform with no ADMIN_TOKEN fail-opens
+# (devmode.go:50), so sending no bearer still works there.
+ADMIN_BEARER="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
+ADMIN_AUTH=()
+[ -n "$ADMIN_BEARER" ] && ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")
 acurl() {
-  if [ -n "$ADMIN_TOKEN" ]; then
-    curl -s -H "Authorization: Bearer $ADMIN_TOKEN" "$@"
-  else
-    curl -s "$@"
-  fi
+  curl -s ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} "$@"
 }

+# WORKSPACE_TOKEN holds a per-workspace bearer for the WorkspaceAuth-gated
+# routes (PATCH /workspaces/:id, /activity, …). It is set after the first
+# create+mint and is NOT interchangeable with the admin bearer.
+WORKSPACE_TOKEN=""
+
 # Pre-test cleanup: remove any workspaces left over from prior runs so
 # count-based assertions ("empty", "count=2") are reproducible.
 e2e_cleanup_all_workspaces
@@ -57,19 +66,22 @@ check "GET /health" '"status":"ok"' "$R"
 R=$(acurl "$BASE/workspaces")
 check "GET /workspaces (empty)" '[]' "$R"

-# Test 3: Create workspace A (AdminAuth fail-open — no tokens exist yet)
-R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Echo Agent","tier":1,"runtime":"external","external":true}')
+# Test 3: Create workspace A. POST /workspaces is AdminAuth-gated (router.go:166);
+# send the admin bearer (acurl). On a fail-open dev platform acurl sends nothing
+# and the create still works.
+R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Echo Agent","tier":1,"runtime":"external","external":true}')
 check "POST /workspaces (create echo)" '"status":"awaiting_agent"' "$R"
 ECHO_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")

-ADMIN_TOKEN=$(echo "$R" | e2e_extract_token)
-if [ -z "$ADMIN_TOKEN" ]; then
-  ADMIN_TOKEN=$(e2e_mint_workspace_token "$ECHO_ID" 2>/dev/null || echo "")
+# Per-workspace token for Echo, for the WorkspaceAuth-gated routes below.
+WORKSPACE_TOKEN=$(echo "$R" | e2e_extract_token)
+if [ -z "$WORKSPACE_TOKEN" ]; then
+  WORKSPACE_TOKEN=$(e2e_mint_workspace_token "$ECHO_ID" 2>/dev/null || echo "")
 fi
-if [ -n "$ADMIN_TOKEN" ]; then
-  echo "  (acquired admin token: ${ADMIN_TOKEN:0:8}...)"
+if [ -n "$WORKSPACE_TOKEN" ]; then
+  echo "  (acquired Echo workspace token: ${WORKSPACE_TOKEN:0:8}...)"
 else
-  echo "  WARNING: no admin token acquired — subsequent AdminAuth calls will fail"
+  echo "  WARNING: no Echo workspace token acquired — WorkspaceAuth calls will fail"
 fi

 # Test 4: Create workspace B (needs bearer — tokens now exist in DB)
@@ -98,7 +110,7 @@ check "GET /workspaces/:id (agent_card null)" '"agent_card":null' "$R"
 # Test 7: Register echo — use workspace-specific token (from real admin
 # endpoint), not the admin token. C18 requires a token issued TO THIS
 # workspace, not just any valid token.
-ECHO_WS_TOKEN="$ADMIN_TOKEN"
+ECHO_WS_TOKEN="$WORKSPACE_TOKEN"
 [ -n "$ECHO_WS_TOKEN" ] && ECHO_AUTH=(-H "Authorization: Bearer $ECHO_WS_TOKEN")
 R=$(curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" \
  "${ECHO_AUTH[@]}" \
@@ -159,26 +171,29 @@ R=$(curl -s -X POST "$BASE/registry/check-access" -H "Content-Type: application/
  -d "{\"caller_id\":\"$ECHO_ID\",\"target_id\":\"$SUM_ID\"}")
 check "POST /registry/check-access (same-org allowed)" '"allowed":true' "$R"

-# Test 15: PATCH workspace (update position)
-R=$(acurl -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"x":100,"y":200}')
+# Test 15: PATCH workspace (update position). PATCH /workspaces/:id is
+# WorkspaceAuth-gated (router.go:227 — #680 IDOR fix), so it needs Echo's OWN
+# bearer, NOT the admin bearer (WorkspaceAuth rejects the admin token).
+R=$(curl -s "${ECHO_AUTH[@]}" -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"x":100,"y":200}')
 check "PATCH /workspaces/:id (position)" '"status":"updated"' "$R"

 R=$(acurl "$BASE/workspaces/$ECHO_ID")
 check "Position saved (x=100)" '"x":100' "$R"
 check "Position saved (y=200)" '"y":200' "$R"

-# Test 16: PATCH workspace (update name)
-R=$(acurl -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"name":"Echo Agent v2"}')
+# Test 16: PATCH workspace (update name) — WorkspaceAuth-gated; use Echo's token.
+R=$(curl -s "${ECHO_AUTH[@]}" -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"name":"Echo Agent v2"}')
 check "PATCH /workspaces/:id (name)" '"status":"updated"' "$R"

 R=$(acurl "$BASE/workspaces/$ECHO_ID")
 check "Name updated" '"name":"Echo Agent v2"' "$R"

-# Test 17: Events (#165 / PR #167 — now admin-gated, bearer required)
-R=$(acurl "$BASE/events" -H "Authorization: Bearer $ECHO_TOKEN")
+# Test 17: Events (#165 / PR #167 — admin-gated; the admin bearer is required,
+# and Tier-2b rejects a workspace bearer here, so use acurl's admin token alone).
+R=$(acurl "$BASE/events")
 check "GET /events (has events)" 'WORKSPACE_ONLINE' "$R"

-R=$(acurl "$BASE/events/$ECHO_ID" -H "Authorization: Bearer $ECHO_TOKEN")
+R=$(acurl "$BASE/events/$ECHO_ID")
 check "GET /events/:id (has events for echo)" 'WORKSPACE_ONLINE' "$R"

 # Test 18: Update card
@@ -295,7 +310,7 @@ check "active_tasks cleared" '"active_tasks":0' "$R"
 # endpoint is admin-auth gated and keeps the full record, so operators
 # can still see task progress from the dashboard without exposing it
 # over the public per-workspace GET.
-R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
+R=$(acurl "$BASE/workspaces")
 check "current_task in list response" '"current_task"' "$R"

 # Test 21: Delete
@@ -306,18 +321,20 @@ check "current_task in list response" '"current_task"' "$R"
 # Delete the CHILD (Summarizer) here instead: a child delete does NOT cascade
 # upward, so the parent Echo survives and count=1 holds. The bundle round-trip
 # below needs Summarizer's exported config, so capture it BEFORE this delete.
-BUNDLE=$(curl -s "$BASE/bundles/export/$SUM_ID" -H "Authorization: Bearer $SUM_TOKEN")
+# GET /bundles/export/:id is admin-gated (router.go:741) — use the admin bearer.
+BUNDLE=$(acurl "$BASE/bundles/export/$SUM_ID")
 check "GET /bundles/export/:id" '"name":"Summarizer Agent"' "$BUNDLE"
 ORIG_NAME=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['name'])")
 ORIG_TIER=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['tier'])")

+# DELETE /workspaces/:id is admin-gated (router.go:167). X-Confirm-Name must
+# still match the workspace name even with admin auth.
 R=$(acurl -X DELETE "$BASE/workspaces/$SUM_ID?confirm=true" \
-  -H "Authorization: Bearer $SUM_TOKEN" \
  -H "X-Confirm-Name: Summarizer Agent")
 check "DELETE /workspaces/:id" '"status":"removed"' "$R"

-# Parent Echo must survive a child delete — list as Echo and expect count=1.
-R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
+# Parent Echo must survive a child delete — list (admin) and expect count=1.
+R=$(acurl "$BASE/workspaces")
 COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
 check "List after delete (count=1)" "1" "$COUNT"

@@ -328,21 +345,21 @@ check "List after delete (count=1)" "1" "$COUNT"
 echo ""
 echo "--- Bundle Round-Trip Test ---"

-# Delete the remaining parent Echo — use ECHO_TOKEN (per-workspace) for
-# WorkspaceAuth and ADMIN_TOKEN for the AdminAuth layer.
+# Delete the remaining parent Echo — DELETE is admin-gated (router.go:167);
+# the platform admin bearer (acurl) authorizes it. X-Confirm-Name still required.
 R=$(acurl -X DELETE "$BASE/workspaces/$ECHO_ID?confirm=true" \
-  -H "Authorization: Bearer $ECHO_TOKEN" \
  -H "X-Confirm-Name: Echo Agent v2")
 check "Delete before re-import" '"status":"removed"' "$R"

-# After deleting both workspaces, all per-workspace tokens are revoked.
-# Clear the now-revoked admin bearer so acurl can use fresh-install fail-open.
-ADMIN_TOKEN=""
+# Both workspaces are now deleted. The platform-level ADMIN_TOKEN env is still
+# set, so admin routes still require the admin bearer (fail-open does NOT
+# re-engage just because the token table emptied) — keep using acurl's bearer.
 R=$(acurl "$BASE/workspaces")
 COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
 check "All workspaces deleted (count=0)" "0" "$COUNT"

-# Re-import from the exported bundle (AdminAuth fail-open — no live tokens)
+# Re-import from the exported bundle. POST /bundles/import is admin-gated
+# (router.go:742) — acurl sends the admin bearer.
 R=$(acurl -X POST "$BASE/bundles/import" -H "Content-Type: application/json" -d "$BUNDLE")
 check "POST /bundles/import" '"status":"provisioning"' "$R"
 NEW_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['workspace_id'])")
@@ -398,12 +415,15 @@ check "Register re-imported workspace" '"status":"registered"' "$R"
 REG_NEW_TOKEN=$(echo "$R" | e2e_extract_token)
 [ -n "$REG_NEW_TOKEN" ] && NEW_TOKEN="$REG_NEW_TOKEN"

-# Re-export and verify agent_card survives the round-trip (#165 / PR #167 — admin-gated)
-REBUNDLE=$(curl -s "$BASE/bundles/export/$NEW_ID" -H "Authorization: Bearer $NEW_TOKEN")
+# Re-export and verify agent_card survives the round-trip (#165 / PR #167 —
+# GET /bundles/export/:id is admin-gated; use the admin bearer).
+REBUNDLE=$(acurl "$BASE/bundles/export/$NEW_ID")
 check "Re-exported bundle has agent_card" '"agent_card"' "$REBUNDLE"

-# Clean up — use the token just issued to the re-imported workspace
-e2e_delete_workspace "$NEW_ID" "$ORIG_NAME" -H "Authorization: Bearer $NEW_TOKEN"
+# Clean up — DELETE /workspaces/:id is admin-gated; pass no per-call auth so
+# e2e_delete_workspace falls back to the platform admin bearer (a workspace
+# bearer would be rejected by Tier-2b).
+e2e_delete_workspace "$NEW_ID" "$ORIG_NAME"

 echo ""
 echo "=== Results: $PASS passed, $FAIL failed ==="
@@ -28,6 +28,13 @@ PASS=0
 FAIL=0
 WSID=""

+# GET /workspaces (list) and POST /workspaces (create) are AdminAuth-gated
+# (router.go:165-166). The e2e-api CI job sets ADMIN_TOKEN on the platform
+# (fail-open OFF) and exports MOLECULE_ADMIN_TOKEN here, so these calls need the
+# admin bearer. Guarded if-set so a fail-open dev platform still works.
+ADMIN_AUTH=()
+e2e_admin_auth_args ADMIN_AUTH
+
 cleanup() {
  # Workspace teardown — best-effort, ignore errors so an unrelated CP
  # outage doesn't shadow a real test failure.
@@ -80,7 +87,7 @@ echo "=== Setup ==="
 # canvas. Find and delete any with this exact name so the test is safe to
 # re-run from any state. Match by name (not tag) so this also catches
 # leftovers created by older script versions.
-PRIOR=$(curl -s "$BASE/workspaces" | python3 -c '
+PRIOR=$(curl -s "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} | python3 -c '
 import json, sys
 try:
    print(" ".join(w["id"] for w in json.load(sys.stdin) if w.get("name") == "Notify E2E"))
@@ -96,7 +103,7 @@ done
 # feedback_workspace_model_required_no_platform_default_dynamic_credential_intake).
 # Body has no runtime → defaults to claude-code; pass the matching model
 # that the workspace-creation contract now requires.
-R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
+R=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
  -d '{"name":"Notify E2E","tier":1,"runtime":"external","external":true,"model":"sonnet"}')
 WSID=$(echo "$R" | python3 -c 'import json,sys;print(json.load(sys.stdin)["id"])' 2>/dev/null || true)
 [ -n "$WSID" ] || { echo "Failed to create workspace: $R"; exit 1; }
@@ -24,11 +24,73 @@
 # Each phase skips cleanly when its prerequisite secret is absent so a
 # partially-keyed env (e.g. CI without an OpenAI key) doesn't false-fail.
 #
+# REQUIRE-LIVE (false-green guard, mirrors CP serving-e2e's
+# SERVING_E2E_REQUIRE_LIVE semantics)
+# ------------------------------------------------------------------
+# Without a guard, an env with NO live secrets makes every phase SKIP,
+# leaving PASS=0 FAIL=0 — and the historical `[ "$FAIL" -eq 0 ]` gate
+# exits 0 (GREEN) while validating ZERO runtimes. That made the REQUIRED
+# `E2E API Smoke Test` merge gate pass without exercising a single
+# runtime (false-green).
+#
+# Fix: a real "validated arm" counter (VALIDATED) tracks runtimes that
+# actually ran AND produced a non-error A2A reply. With E2E_REQUIRE_LIVE=1:
+# if zero arms validated, the run exits NON-zero with a loud message.
+# Without it (E2E_REQUIRE_LIVE unset/0), a fully-skipped run stays a LOUD
+# skip + exit 0 for dev convenience.
+#
+# This zero-validated→RED decision is the load-bearing logic. It is factored
+# into evaluate_require_live_gate() (a pure function of $FAIL/$VALIDATED/
+# $E2E_REQUIRE_LIVE, defined before any platform I/O) and is REGRESSION-GATED
+# on every PR by tests/e2e/test_require_live_priority_gate_unit.sh, which
+# sources this file (E2E_PRIORITY_UNIT_SOURCE=1), sets the counters, and
+# asserts the gate's exit code — no platform, no provisioning, no network.
+# So the false-green can't silently come back: a revert of the guard fails CI.
+#
+# CI POSTURE (REQUIRE-LIVE ON — see .gitea/workflows/e2e-api.yml):
+# The live e2e-api job SETS E2E_REQUIRE_LIVE=1. The `mock` arm is the
+# CI-provisionable live-completion arm: it org-imports a mock workspace
+# (→online→canned A2A reply) with NO external secret. The only thing that
+# previously blocked it in CI was admin auth — POST /org/import and POST
+# /admin/workspaces/:id/tokens are AdminAuth-gated, and the job set no admin
+# token, so every admin call 401'd ("admin auth required"). The job now sets
+# ADMIN_TOKEN on the platform AND exports the matching MOLECULE_ADMIN_TOKEN
+# the scripts send, so mock validates end-to-end and VALIDATED>=1 holds on a
+# healthy platform — the REQUIRED `E2E API Smoke Test` gate now HONESTLY
+# validates a runtime. If the mock plumbing or the admin-auth wiring breaks,
+# the gate goes RED (not false-green). The zero-validated→RED decision is also
+# regression-gated WITHOUT provisioning by the bash unit test above, so a
+# revert of that logic still fails CI.
+#
+# LIVE ARMS (run when their prerequisite is present; opportunistic):
+#   - `mock` (run_mock) is the no-key REQUIRE-LIVE backbone: a virtual
+#     workspace (no container, no EC2, no provider) whose org-import path
+#     short-circuits to status='online' with a canned A2A reply. It validates
+#     in CI now that the e2e-api job wires an admin token (org-import + token
+#     mint are AdminAuth-gated), so it is the guaranteed >=1 validation.
+#   - MiniMax (E2E_MINIMAX_API_KEY, from MOLECULE_STAGING_MINIMAX_API_KEY) is
+#     an OPPORTUNISTIC best-effort real-LLM arm: registry-fragile in CI (422
+#     UNREGISTERED_MODEL_FOR_RUNTIME — see run_minimax header), so a miss is
+#     a best-effort MISS via bestfail() and does NOT red the gate.
+# The CI e2e-api job sets E2E_REQUIRE_LIVE=1: mock guarantees a validation, so
+# the REQUIRED gate is honest (RED if the mock plumbing/admin-auth breaks). The
+# zero-validated→RED logic is also regression-gated by the bash unit test above.
+#
 # Usage:
+#   # Enforce REQUIRE-LIVE locally (need >=1 arm to actually validate):
+#   E2E_REQUIRE_LIVE=1 E2E_MINIMAX_API_KEY=... \
+#     tests/e2e/test_priority_runtimes_e2e.sh
+#
+#   # Default (no enforcement): all-skip stays a LOUD skip + exit 0:
+#   tests/e2e/test_priority_runtimes_e2e.sh
+#
+#   # Other live arms (if their secrets are configured):
 #   CLAUDE_CODE_OAUTH_TOKEN=... E2E_OPENAI_API_KEY=... \
 #     tests/e2e/test_priority_runtimes_e2e.sh
 #
 #   # Run only one runtime
+#   E2E_RUNTIMES=mock        tests/e2e/test_priority_runtimes_e2e.sh
+#   E2E_RUNTIMES=minimax     tests/e2e/test_priority_runtimes_e2e.sh
 #   E2E_RUNTIMES=claude-code tests/e2e/test_priority_runtimes_e2e.sh
 #   E2E_RUNTIMES=hermes      tests/e2e/test_priority_runtimes_e2e.sh
 #
@@ -41,13 +103,81 @@

 set -euo pipefail

-source "$(dirname "$0")/_lib.sh"
-
 PASS=0
 FAIL=0
 SKIP=0
+# VALIDATED counts runtimes that ACTUALLY ran end-to-end (provisioned,
+# reached online, AND returned a non-error A2A reply). Distinct from PASS,
+# which also counts sub-assertions like activity-log rows. This is the
+# signal the REQUIRE-LIVE gate keys off: VALIDATED==0 means we proved
+# nothing about any runtime, regardless of how many sub-asserts "passed".
+VALIDATED=0
 CREATED_WSIDS=()

+# evaluate_require_live_gate — the SINGLE source of the final exit decision.
+# Pure function of $FAIL, $VALIDATED, and $E2E_REQUIRE_LIVE; performs NO I/O
+# beyond the loud messages. Returns the exit code the script should exit with:
+#   - FAIL>0                       → 1 (a real failure is always red)
+#   - VALIDATED==0 + REQUIRE_LIVE  → 1 (false-green trap: proved nothing → RED)
+#   - VALIDATED==0 + !REQUIRE_LIVE → 0 (dev-convenience LOUD skip)
+#   - VALIDATED>=1                 → 0 (at least one arm validated end-to-end)
+# It is a function (not inline tail code) so test_require_live_priority_gate_unit.sh
+# can drive the REAL decision in isolation — set the counters, call this, assert
+# the return code — with no platform, no provisioning, no network. That makes the
+# zero-validated→RED logic a CI-gated regression contract: a future revert of it
+# fails the unit test on every PR. See that unit test for the fail-direction proof.
+evaluate_require_live_gate() {
+  # Any real failure is always red.
+  if [ "$FAIL" -ne 0 ]; then
+    return 1
+  fi
+
+  # REQUIRE-LIVE gate (mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE).
+  # A run where every runtime SKIPPED proves nothing. In enforced mode
+  # (E2E_REQUIRE_LIVE=1) that MUST be red so the required `E2E API Smoke
+  # Test` gate can't be false-green on an all-skip run.
+  local require_live="${E2E_REQUIRE_LIVE:-0}"
+  if [ "$VALIDATED" -eq 0 ]; then
+    if [ "$require_live" = "1" ] || [ "$require_live" = "true" ]; then
+      echo "::error::E2E_REQUIRE_LIVE is set but ZERO runtimes were validated end-to-end." >&2
+      echo "         Every runtime SKIPPED — no live secret was present, so this gate" >&2
+      echo "         validated nothing. Wire at least one live arm via Gitea secrets" >&2
+      echo "         (E2E_MINIMAX_API_KEY ← MOLECULE_STAGING_MINIMAX_API_KEY is the" >&2
+      echo "         default CI arm; CLAUDE_CODE_OAUTH_TOKEN / E2E_OPENAI_API_KEY also" >&2
+      echo "         work) so >=1 runtime actually provisions + replies. Failing RED" >&2
+      echo "         instead of false-green." >&2
+      return 1
+    fi
+    # Dev convenience: no enforcement requested → loud skip, exit 0.
+    echo "SKIPPED: no live secrets present and E2E_REQUIRE_LIVE is not set — validated" >&2
+    echo "         zero runtimes. This is a dev-convenience pass; CI sets" >&2
+    echo "         E2E_REQUIRE_LIVE=1 to make zero-validated a hard failure." >&2
+    return 0
+  fi
+
+  echo "OK: $VALIDATED runtime(s) validated end-to-end."
+  return 0
+}
+
+# Source-guard: when sourced by the unit test (E2E_PRIORITY_UNIT_SOURCE=1) we
+# stop HERE — the counters + evaluate_require_live_gate are now defined, and we
+# must NOT fall through to _lib.sh's platform-dependent helpers or the live
+# pre-sweep curl below (there is no platform in the unit-test environment).
+if [ "${E2E_PRIORITY_UNIT_SOURCE:-0}" = "1" ]; then
+  return 0
+fi
+
+source "$(dirname "$0")/_lib.sh"
+
+# GET /workspaces (list, router.go:165) and POST /workspaces (create,
+# router.go:166) are AdminAuth-gated. The e2e-api CI job sets ADMIN_TOKEN on the
+# platform (fail-open OFF) and exports MOLECULE_ADMIN_TOKEN here, so the
+# pre-sweep list and every runtime-create must send the admin bearer or they
+# 401. run_mock uses POST /org/import (also admin-gated) and wires its own admin
+# auth inline. Guarded if-set so a fail-open dev platform still works.
+ADMIN_AUTH=()
+e2e_admin_auth_args ADMIN_AUTH
+
 cleanup() {
  # `set -u` + empty array would error on "${CREATED_WSIDS[@]}"; the
  # ${VAR[@]+"…"} form expands to nothing when the array is unset/empty
@@ -58,14 +188,26 @@ cleanup() {
 }
 trap cleanup EXIT

-pass()  { echo "  PASS — $1"; PASS=$((PASS + 1)); }
-fail()  { echo "  FAIL — $1"; echo "         $2"; FAIL=$((FAIL + 1)); }
-skip()  { echo "  SKIP — $1"; SKIP=$((SKIP + 1)); }
+pass()      { echo "  PASS — $1"; PASS=$((PASS + 1)); }
+fail()      { echo "  FAIL — $1"; echo "         $2"; FAIL=$((FAIL + 1)); }
+skip()      { echo "  SKIP — $1"; SKIP=$((SKIP + 1)); }
+# Mark a runtime as having been validated end-to-end (online + non-error
+# A2A reply). Also emits a PASS line so it shows in the results tally.
+validated() { echo "  PASS — $1"; PASS=$((PASS + 1)); VALIDATED=$((VALIDATED + 1)); }
+# bestfail() is for OPPORTUNISTIC (best-effort) arms whose failure must
+# NOT red the gate. It does NOT increment FAIL — it only logs + bumps
+# SKIP so the tally stays honest ("we tried, it didn't validate, but it
+# was never load-bearing"). Used by the MiniMax arm: MiniMax-create is
+# fragile in CI (registry-skewed model id, BYOK plumbing — see core#2263
+# and the run_minimax header), so a MiniMax miss is reported but never
+# fails the REQUIRED gate. The mock arm is the load-bearing validation
+# that keeps the gate honest; MiniMax is the real-LLM bonus on top.
+bestfail()  { echo "  BEST-EFFORT MISS — $1"; echo "         $2"; SKIP=$((SKIP + 1)); }

 # Pre-sweep any prior runs that left workspaces behind (same defence as
 # test_notify_attachments_e2e.sh: trap fires on normal exit, but a
 # SIGPIPE / kill -9 can bypass it).
-PRIOR=$(curl -s "$BASE/workspaces" | python3 -c '
+PRIOR=$(curl -s "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} | python3 -c '
 import json, sys
 try:
    print(" ".join(w["id"] for w in json.load(sys.stdin) if w.get("name","").startswith("Priority E2E ")))
@@ -188,7 +330,7 @@ print(json.dumps({'CLAUDE_CODE_OAUTH_TOKEN': os.environ['CLAUDE_CODE_OAUTH_TOKEN
 ")
  local resp wsid
  # model required (CTO 2026-05-22 SSOT) — pass the deleted DefaultModel("claude-code") value.
-  resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
+  resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
    -d "{\"name\":\"Priority E2E (claude-code)\",\"runtime\":\"claude-code\",\"model\":\"sonnet\",\"tier\":1,\"secrets\":$secrets}")
  wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
  if [ -z "$wsid" ]; then
@@ -220,9 +362,9 @@ print(json.dumps({'CLAUDE_CODE_OAUTH_TOKEN': os.environ['CLAUDE_CODE_OAUTH_TOKEN
  local reply
  if reply=$(send_test_prompt "$wsid" "$token"); then
    if echo "$reply" | grep -q "PONG"; then
-      pass "claude-code reply contains PONG"
+      validated "claude-code reply contains PONG"
    else
-      pass "claude-code reply non-empty (first 80 chars: ${reply:0:80})"
+      validated "claude-code reply non-empty (first 80 chars: ${reply:0:80})"
    fi
    assert_activity_logged "claude-code" "$wsid" "$token"
  else
@@ -254,7 +396,7 @@ print(json.dumps({
 }))
 ")
  local resp wsid
-  resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
+  resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
    -d "{\"name\":\"Priority E2E (hermes)\",\"runtime\":\"hermes\",\"tier\":1,\"model\":\"openai/gpt-4o\",\"secrets\":$secrets}")
  wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
  if [ -z "$wsid" ]; then
@@ -288,9 +430,9 @@ print(json.dumps({
  local reply
  if reply=$(send_test_prompt "$wsid" "$token"); then
    if echo "$reply" | grep -q "PONG"; then
-      pass "hermes reply contains PONG"
+      validated "hermes reply contains PONG"
    else
-      pass "hermes reply non-empty (first 80 chars: ${reply:0:80})"
+      validated "hermes reply non-empty (first 80 chars: ${reply:0:80})"
    fi
    assert_activity_logged "hermes" "$wsid" "$token"
  else
@@ -327,7 +469,7 @@ print(json.dumps({
 }))
 ")
  local resp wsid
-  resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
+  resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
    -d "{\"name\":\"Priority E2E ($runtime)\",\"runtime\":\"$runtime\",\"tier\":1,\"model\":\"openai/gpt-4o-mini\",\"secrets\":$secrets}")
  wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
  if [ -z "$wsid" ]; then
@@ -358,9 +500,9 @@ print(json.dumps({
  local reply
  if reply=$(send_test_prompt "$wsid" "$token"); then
    if echo "$reply" | grep -q "PONG"; then
-      pass "$runtime reply contains PONG"
+      validated "$runtime reply contains PONG"
    else
-      pass "$runtime reply non-empty (first 80 chars: ${reply:0:80})"
+      validated "$runtime reply non-empty (first 80 chars: ${reply:0:80})"
    fi
    assert_activity_logged "$runtime" "$wsid" "$token"
  else
@@ -371,18 +513,253 @@ print(json.dumps({
 run_codex()      { run_openai_runtime "codex"      "codex"; }
 run_openclaw()   { run_openai_runtime "openclaw"   "openclaw"; }

-WANT="${E2E_RUNTIMES:-claude-code codex hermes openclaw}"
+####################################################################
+# Mock arm — the GUARANTEED, always-available REQUIRE-LIVE backbone.
+####################################################################
+# The mock runtime (workspace-server/internal/handlers/mock_runtime.go)
+# is a virtual workspace: NO container, NO EC2, NO LLM key. The org-import
+# path (createWorkspaceTree, org_import.go) short-circuits a runtime=mock
+# workspace straight to status='online' (no provisioner needed), and the
+# A2A proxy (a2a_proxy.go → handleMockA2A) synthesises a deterministic
+# canned JSON-RPC reply with logActivity=true (writes the activity_logs
+# row too). That makes mock the perfect REQUIRE-LIVE backbone: it
+# exercises the SAME plumbing every real runtime needs to pass —
+#   provision-decision → status=online → A2A round-trip → activity_logs —
+# without depending on any external provider key or LLM availability. It
+# is GREEN on a healthy platform and RED only if that plumbing genuinely
+# breaks (DB insert, status flip, A2A proxy, activity logging). No more
+# false-green (zero-validated is impossible when mock works), and no more
+# can't-go-green (mock needs no secret, so it always runs in CI).
+#
+# Why org-import (POST /org/import) instead of POST /workspaces:
+#   The mock→online short-circuit lives ONLY in createWorkspaceTree
+#   (org_import.go). The single-workspace Create handler (workspace.go)
+#   has no mock branch — it routes runtime=mock through
+#   provisionWorkspaceAuto, which in CI's local-build mode has no mock
+#   image and would never reach online. Org-import is the supported path
+#   to a live mock workspace, so the arm drives it.
+#
+# The canned reply is one of the "On it!" variants (NOT "PONG"), so this
+# arm validates on the non-empty / non-error branch — that is the real
+# contract for mock (it proves the plumbing, not an LLM's instruction-
+# following).
+run_mock() {
+  echo ""
+  echo "=== mock (no-key plumbing backbone) happy path ==="
+  # No secret gate — mock ALWAYS runs. That is the whole point: it is the
+  # required-validation arm that keeps E2E_REQUIRE_LIVE honest without a key.
+
+  # Inline single-workspace mock org. model is a required field on the
+  # org-import contract (createWorkspaceTree fails-closed without one);
+  # mock never USES the model, so any non-empty value satisfies the
+  # contract. The org-import path does not run the Create handler's
+  # registry model-validation, so "mock" is accepted as-is.
+  # POST /org/import is AdminAuth-gated (router.go:778). When the platform has
+  # ADMIN_TOKEN set (as the e2e-api CI job now does), an unauthenticated import
+  # 401s with {"error":"admin auth required"}. Send the same admin bearer the
+  # mint helper uses (MOLECULE_ADMIN_TOKEN, ADMIN_TOKEN fallback) — guarded so a
+  # bootstrap/dev platform with no admin token (fail-open) still works.
+  local admin_bearer="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
+  local admin_auth=()
+  [ -n "$admin_bearer" ] && admin_auth=(-H "Authorization: Bearer $admin_bearer")
+  local import_resp wsid
+  import_resp=$(curl -s -X POST "$BASE/org/import" -H "Content-Type: application/json" \
+    ${admin_auth[@]+"${admin_auth[@]}"} \
+    -d '{
+      "template": {
+        "name": "Priority E2E Mock Org",
+        "defaults": {"runtime": "mock", "model": "mock", "tier": 1},
+        "workspaces": [
+          {"name": "Priority E2E (mock)", "runtime": "mock", "model": "mock", "tier": 1}
+        ]
+      }
+    }')
+  # org-import returns {"org":..., "count":N, "workspaces":[{"id":...,
+  # "name":...,"tier":...}, ...]} (handlers/org.go:898-901). Pull the id of
+  # the single workspace we declared. (Older "results" key fallback kept for
+  # forward/back compat in case the response shape is ever versioned.)
+  wsid=$(echo "$import_resp" | python3 -c '
+import json, sys
+try:
+    d = json.load(sys.stdin)
+except Exception:
+    sys.exit(0)
+for r in (d.get("workspaces") or d.get("results") or []):
+    if r.get("name") == "Priority E2E (mock)" and r.get("id"):
+        print(r["id"]); break
+') || true
+  if [ -z "$wsid" ]; then
+    # mock org-import is the REQUIRE-LIVE backbone and is EXPECTED to succeed in
+    # CI now that the e2e-api job wires an admin token (ADMIN_TOKEN on the
+    # platform + MOLECULE_ADMIN_TOKEN sent above). A missing id here is a REAL
+    # break (admin-auth wiring, org-import create, or the mock short-circuit) and
+    # MUST red the gate — so this is a hard fail(), not a best-effort miss. Under
+    # E2E_REQUIRE_LIVE=1 a FAIL also forces a non-zero exit via
+    # evaluate_require_live_gate. Surface the response so the break is visible
+    # (e.g. {"error":"admin auth required"} would mean the token wiring regressed).
+    fail "create mock workspace (org-import)" "$import_resp"
+    return 0
+  fi
+  CREATED_WSIDS+=("$wsid")
+  echo "  workspace=$wsid"
+
+  # Mock goes straight to online (no container boot) — a short budget is
+  # plenty; if it is NOT online quickly the mock short-circuit in
+  # createWorkspaceTree is genuinely broken and the gate SHOULD red.
+  local final
+  final=$(wait_for_status "$wsid" "online failed" 60) || true
+  if [ "$final" != "online" ]; then
+    fail "mock workspace reaches online" "final status: $final (mock should go online without provisioning)"
+    return 0
+  fi
+  pass "mock workspace reaches online"
+
+  # Mock workspaces are not created with an inline token; mint one via the
+  # admin endpoint (same fallback every other arm uses).
+  local token
+  token=$(e2e_mint_workspace_token "$wsid") || true
+  if [ -z "$token" ]; then
+    fail "resolve mock workspace token" "no token returned from POST /admin/workspaces/:id/tokens"
+    return 0
+  fi
+
+  # A2A round-trip. The mock proxy returns a canned non-error reply (one
+  # of the "On it!" variants) — NOT "PONG" — so we validate on the
+  # non-empty branch. A non-error, non-empty reply means the A2A proxy
+  # short-circuit + reply-shape contract are intact end-to-end.
+  local reply
+  if reply=$(send_test_prompt "$wsid" "$token"); then
+    validated "mock reply non-empty (canned; first 80 chars: ${reply:0:80})"
+    assert_activity_logged "mock" "$wsid" "$token"
+  else
+    fail "mock reply" "${reply:-<empty or error>} (mock A2A short-circuit should always return a canned reply)"
+  fi
+}
+
+####################################################################
+# MiniMax live arm — OPPORTUNISTIC (best-effort) real-LLM arm.
+####################################################################
+# NOTE: this is now a BEST-EFFORT arm, not the REQUIRE-LIVE backbone.
+# mock (run_mock above) is the guaranteed, no-key validation that keeps
+# the gate honest. MiniMax-create is fragile in CI: the namespaced model
+# id minimax:MiniMax-M2.7 is NOT in claude-code's native model set and
+# does NOT resolve via DeriveProvider (its only prefix-owner, byok-minimax,
+# is not wired as a claude-code runtime arm), so the create is rejected
+# 422 UNREGISTERED_MODEL_FOR_RUNTIME before any provisioning (RCA core
+# registry_gen.go Runtimes["claude-code"]). Rather than red the REQUIRED
+# gate on that registry-skew (or on any transient MiniMax provisioning /
+# model-registration issue), this arm reports a best-effort MISS via
+# bestfail() and lets mock carry the validation. If MiniMax DOES come up
+# it validates as a bonus real-LLM check.
+# Drives the claude-code runtime against MiniMax (BYOK) using the
+# already-present Gitea secret MOLECULE_STAGING_MINIMAX_API_KEY,
+# surfaced into the env as E2E_MINIMAX_API_KEY (same name + secret the
+# staging-smoke / continuous-synth canaries use — see staging-smoke.yml
+# and continuous-synth-e2e.yml). NO new credential is introduced.
+#
+# Why this is the arm that keeps the REQUIRED gate honest:
+#   - claude-code's `minimax` provider (providers.yaml / registry_gen.go)
+#     is third_party_anthropic_compat: it reads MINIMAX_API_KEY at boot
+#     and routes ANTHROPIC_BASE_URL → api.minimax.io/anthropic. So the
+#     ONLY tenant secret needed is {"MINIMAX_API_KEY": <key>} — exactly
+#     the SECRETS_JSON branch test_staging_full_saas.sh uses.
+#   - Model id is the NAMESPACED colon-form `minimax:MiniMax-M2.7`, the
+#     registered BYOK arm for claude-code (registry_gen.go Runtimes
+#     ["claude-code"]["minimax"]). Per core#2263 the BARE `MiniMax-M2`
+#     id can 400 on a registry-skewed ws-server build; the namespaced
+#     form resolves the way kimi's `moonshot/…` does, so it's the
+#     robust choice for the gate.
+run_minimax() {
+  echo ""
+  echo "=== minimax (claude-code BYOK) happy path ==="
+  if [ -z "${E2E_MINIMAX_API_KEY:-}" ]; then
+    skip "E2E_MINIMAX_API_KEY not set (MiniMax live arm needs the MiniMax key)"
+    return 0
+  fi
+  local secrets
+  secrets=$(python3 -c "
+import json, os
+# claude-code's minimax provider (third_party_anthropic_compat) reads
+# MINIMAX_API_KEY and points ANTHROPIC_BASE_URL at api.minimax.io/anthropic
+# at boot — so the ONLY tenant secret needed is the MiniMax key itself.
+print(json.dumps({'MINIMAX_API_KEY': os.environ['E2E_MINIMAX_API_KEY']}))
+")
+  local resp wsid
+  # Namespaced BYOK model id (core#2263): bare MiniMax-M2 can 400 on a
+  # registry-skewed ws-server build; minimax:MiniMax-M2.7 is the
+  # registered claude-code BYOK arm and resolves like kimi's moonshot/…
+  resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
+    -d "{\"name\":\"Priority E2E (minimax)\",\"runtime\":\"claude-code\",\"model\":\"minimax:MiniMax-M2.7\",\"tier\":1,\"secrets\":$secrets}")
+  wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
+  if [ -z "$wsid" ]; then
+    # BEST-EFFORT: MiniMax-create is fragile (see header — the namespaced
+    # model id is registry-skewed → 422). Do NOT red the gate; mock is the
+    # required backbone. Report the create response so the skew is visible.
+    bestfail "create minimax workspace (best-effort; mock carries the gate)" "$resp"
+    return 0
+  fi
+  CREATED_WSIDS+=("$wsid")
+  echo "  workspace=$wsid"
+
+  # claude-code runtime image is already pulled; cold boot ~30-90s. The
+  # first MiniMax cold-call can be slow but that's covered by send_test_prompt's
+  # --max-time 180.
+  local final
+  final=$(wait_for_status "$wsid" "online failed" 240) || true
+  if [ "$final" != "online" ]; then
+    bestfail "minimax workspace reaches online (best-effort)" "final status: $final"
+    return 0
+  fi
+  pass "minimax workspace reaches online"
+
+  local token
+  token=$(echo "$resp" | e2e_extract_token)
+  if [ -z "$token" ]; then
+    token=$(e2e_mint_workspace_token "$wsid")
+  fi
+  if [ -z "$token" ]; then
+    bestfail "resolve minimax workspace token (best-effort)" "no token returned"
+    return 0
+  fi
+
+  local reply
+  if reply=$(send_test_prompt "$wsid" "$token"); then
+    if echo "$reply" | grep -q "PONG"; then
+      validated "minimax reply contains PONG"
+    else
+      validated "minimax reply non-empty (first 80 chars: ${reply:0:80})"
+    fi
+    assert_activity_logged "minimax" "$wsid" "$token"
+  else
+    bestfail "minimax reply (best-effort)" "${reply:-<empty or error>}"
+  fi
+}
+
+# `mock` runs FIRST and by default: it is the no-key REQUIRE-LIVE backbone
+# that guarantees >=1 validation on a healthy platform (see run_mock). The
+# real-LLM arms (claude-code/codex/hermes/openclaw/minimax) run if their
+# secrets are present and add real-provider coverage on top; minimax is
+# best-effort (never reds the gate).
+WANT="${E2E_RUNTIMES:-mock claude-code codex hermes openclaw minimax}"
 for r in $WANT; do
  case "$r" in
+    mock)        run_mock ;;
    claude-code) run_claude_code ;;
    codex)       run_codex ;;
    hermes)      run_hermes ;;
    openclaw)    run_openclaw ;;
-    all)         run_claude_code; run_codex; run_hermes; run_openclaw ;;
+    minimax)     run_minimax ;;
+    all)         run_mock; run_claude_code; run_codex; run_hermes; run_openclaw; run_minimax ;;
    *) echo "unknown runtime in E2E_RUNTIMES: $r" >&2; exit 2 ;;
  esac
 done

 echo ""
-echo "=== Results: $PASS passed, $FAIL failed, $SKIP skipped ==="
-[ "$FAIL" -eq 0 ]
+echo "=== Results: $PASS passed, $FAIL failed, $SKIP skipped, $VALIDATED runtime(s) validated end-to-end ==="
+
+# Final exit decision lives in evaluate_require_live_gate (defined at the top of
+# this file, before any platform I/O) so the same logic is unit-tested in
+# isolation by test_require_live_priority_gate_unit.sh. Mirror its return code
+# into the process exit code.
+evaluate_require_live_gate
+exit $?
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+# Fail-direction / load-bearing proof for the E2E_REQUIRE_LIVE zero-validated
+# gate in test_priority_runtimes_e2e.sh (the REQUIRED `E2E API Smoke Test`).
+#
+# WHY (harden/enforce-ci-gates-core-v2, PR #2286): the priority-runtimes E2E's
+# only historical exit gate was `[ "$FAIL" -eq 0 ]`. When every runtime SKIPs
+# because no live secret is present — exactly what the CI step did — PASS=0
+# FAIL=0 and the script exited 0 (GREEN) while validating ZERO runtimes. The
+# REQUIRED merge gate was therefore false-green: passing without exercising a
+# single runtime. The fix adds a VALIDATED counter and makes a zero-validated
+# run RED when E2E_REQUIRE_LIVE is set.
+#
+# That zero-validated→RED decision lives in evaluate_require_live_gate() in
+# test_priority_runtimes_e2e.sh. CI cannot prove it via a live arm — the CI
+# substrate can't provision ANY runtime end-to-end (MiniMax 422, mock org-
+# import create fails, claude-code needs a key CI lacks), so the live e2e-api
+# job does NOT force E2E_REQUIRE_LIVE (that would red the required gate for
+# everyone). This UNIT test is the regression coverage instead: it drives the
+# REAL evaluate_require_live_gate() function — not a copy — in isolation by
+# sourcing the script with E2E_PRIORITY_UNIT_SOURCE=1 (which stops before any
+# platform I/O), setting the counters, and asserting the gate's return code.
+#
+# Because it exercises the actual function, a future revert of the zero-
+# validated→RED logic in test_priority_runtimes_e2e.sh fails THIS test on
+# every PR — so the false-green can't silently come back.
+#
+# Runs entirely offline (no LLM, no network, no provisioning) — pure shell
+# logic — so it runs on every PR in the fast lane and locally via `bash`.
+set -uo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+GATE_SCRIPT="$SCRIPT_DIR/test_priority_runtimes_e2e.sh"
+
+if [ ! -f "$GATE_SCRIPT" ]; then
+  echo "FATAL: cannot find $GATE_SCRIPT" >&2
+  exit 2
+fi
+
+PASS=0
+FAIL=0
+
+# run_case <E2E_REQUIRE_LIVE value> <VALIDATED count> <FAIL count>
+# Sources the REAL test_priority_runtimes_e2e.sh under the unit source-guard
+# (E2E_PRIORITY_UNIT_SOURCE=1 → it returns right after defining the counters
+# and evaluate_require_live_gate(), before _lib.sh / the live pre-sweep curl),
+# sets the counters to the scenario, calls the real gate, and echoes the
+# return code. Each case runs in a fresh `bash -c` so set -e/-u inside the
+# sourced script can't leak between cases or kill this harness.
+run_case() {
+  local require_live="$1" validated="$2" failcount="$3"
+  local observed
+  E2E_PRIORITY_UNIT_SOURCE=1 \
+  E2E_REQUIRE_LIVE="$require_live" \
+  GATE_SCRIPT="$GATE_SCRIPT" \
+  VAL="$validated" \
+  FL="$failcount" \
+  bash -c '
+    set -uo pipefail
+    # shellcheck disable=SC1090
+    source "$GATE_SCRIPT"      # returns at the source-guard (no platform I/O)
+    VALIDATED="$VAL"
+    FAIL="$FL"
+    evaluate_require_live_gate >/dev/null 2>&1
+    exit $?
+  '
+  observed=$?
+  echo "$observed"
+}
+
+assert_rc() {
+  local label="$1" require_live="$2" validated="$3" failcount="$4" expected="$5"
+  local observed
+  observed=$(run_case "$require_live" "$validated" "$failcount")
+  if [ "$observed" = "$expected" ]; then
+    echo "  ✓ $label: REQUIRE_LIVE=$require_live VALIDATED=$validated FAIL=$failcount → rc=$observed"
+    PASS=$((PASS + 1))
+  else
+    echo "  ✗ $label: REQUIRE_LIVE=$require_live VALIDATED=$validated FAIL=$failcount expected=$expected OBSERVED=$observed" >&2
+    FAIL=$((FAIL + 1))
+  fi
+}
+
+echo "=== E2E_REQUIRE_LIVE priority-runtimes zero-validated gate proof ==="
+echo "    (drives the REAL evaluate_require_live_gate from $GATE_SCRIPT)"
+echo
+
+# (a) DECISIVE false-green trap: REQUIRE_LIVE=1 + zero validated → RED (exit 1).
+assert_rc "require-live, zero validated → RED (the false-green trap)" \
+  1 0 0 1
+
+# (b) REQUIRE_LIVE=1 + at least one validated → GREEN (exit 0).
+assert_rc "require-live, one validated → GREEN" \
+  1 1 0 0
+assert_rc "require-live, several validated → GREEN" \
+  1 3 0 0
+
+# (c) REQUIRE_LIVE unset-equivalent (0) + zero validated → GREEN (loud skip).
+assert_rc "no require-live, zero validated → GREEN (dev-convenience loud skip)" \
+  0 0 0 0
+
+# REQUIRE_LIVE=true (string form) is also honoured by the gate.
+assert_rc "require-live='true', zero validated → RED" \
+  true 0 0 1
+
+# A real FAIL is always red, regardless of REQUIRE_LIVE / VALIDATED — the
+# zero-validated guard must not mask (nor be masked by) a genuine failure.
+assert_rc "real FAIL with validations, no require-live → RED" \
+  0 2 1 1
+assert_rc "real FAIL, zero validated, no require-live → RED" \
+  0 0 1 1
+
+echo
+echo "=== Results: $PASS passed, $FAIL failed ==="
+[ "$FAIL" -eq 0 ]