Merge branch 'main' into fix/plugin-uninstall-exec-errors

Merge PR #2356 via Gitea merge queue
Serialized merge by gitea-merge-queue after current-main, genuine approvals, and required CI checks were green.
2026-06-06 10:50:18 +00:00 · 2026-06-06 10:13:50 +00:00 · 2026-06-06 03:07:06 -07:00 · 2026-06-06 03:02:58 -07:00 · 2026-06-06 09:58:53 +00:00 · 2026-06-06 09:14:54 +00:00
113 changed files with 10977 additions and 954 deletions
@@ -19,13 +19,22 @@ REDIS_URL=redis://localhost:6379
 # itself to 3000 in canvas/package.json, so sourcing this file before
 # `npm run dev` won't accidentally make Next.js try to bind 8080.
 PORT=8080
-# ---- Admin credential — REQUIRED to close issue #684 (AdminAuth bearer bypass) ----
+# ---- Admin credential — REQUIRED in EVERY environment (auth is fail-closed) ----
+# Auth is fail-CLOSED everywhere now (harden/no-fail-open-auth): there is NO
+# dev-mode escape hatch. AdminAuth / WorkspaceAuth / discovery all require a
+# real credential. The canvas authenticates by sending this value as a bearer
+# (it reads NEXT_PUBLIC_ADMIN_TOKEN — set it to the SAME value).
 # When ADMIN_TOKEN is set, only this value is accepted on /admin/* and /approvals/* routes.
-# Without it, any valid workspace bearer token can call admin endpoints (backward compat
-# fallback, still vulnerable). Set this in every environment, rotate when compromised.
-# Generate: openssl rand -base64 32
+# (When unset, a fresh install 401s on admin routes and any valid workspace bearer
+#  is the only deprecated fallback once tokens exist — set ADMIN_TOKEN to close #684.)
+# Generate: openssl rand -base64 32   (scripts/dev-start.sh provisions a fixed dev value)
 # Store in fly secrets / deployment env — NEVER commit the actual value here.
 ADMIN_TOKEN=
+# NEXT_PUBLIC_ADMIN_TOKEN=            # Canvas-side mirror of ADMIN_TOKEN. The canvas
+                                      # bakes this into its bundle and sends it as the
+                                      # bearer. MUST equal ADMIN_TOKEN (next.config.ts
+                                      # warns if the pair is half-set). dev-start.sh
+                                      # exports it for you.
 SECRETS_ENCRYPTION_KEY=        # 32-byte key (raw or base64). Leave empty for plaintext (dev only).
 CONFIGS_DIR=                   # Path to workspace-configs-templates/ (auto-discovered if empty)
 PLUGINS_DIR=                   # Path to plugins/ directory (default: /plugins in container)
@@ -34,7 +43,7 @@ PLUGINS_DIR=                   # Path to plugins/ directory (default: /plugins i
 # MOLECULE_MCP_ALLOW_SEND_MESSAGE=              # Set to "true" to include send_message_to_user in the MCP bridge tool list (issue #810). Excluded by default to prevent unintended WebSocket pushes from CLI sessions.
 # MOLECULE_MCP_URL=http://localhost:8080        # Platform URL for opencode MCP config (opencode.json). Same as PLATFORM_URL; separate var so opencode configs can reference it without ambiguity.
 # WORKSPACE_DIR=                                 # Optional global host path bind-mounted to /workspace in every container. Per-workspace workspace_dir column overrides this; if neither is set each workspace gets an isolated Docker named volume.
-MOLECULE_ENV=development                       # Environment label (development/staging/production). Used for log tagging and for the AdminAuth dev-mode escape hatch (lets the Canvas dashboard keep working after the first workspace is created, when ADMIN_TOKEN is unset). SaaS deployments MUST set MOLECULE_ENV=production.
+MOLECULE_ENV=development                       # Environment label (development/staging/production). Used for log tagging and for NON-security local-dev conveniences (loopback HTTP bind, relaxed rate-limit bucket). It is NOT an auth lever — auth is fail-closed in every environment. SaaS deployments MUST set MOLECULE_ENV=production.
 # MOLECULE_ENABLE_TEST_TOKENS=                   # Set to 1 to expose GET /admin/workspaces/:id/test-token (mints a fresh bearer token for E2E scripts). The route is auto-enabled when MOLECULE_ENV != production; this flag is the explicit override. Leave unset/0 in prod — the route 404s unless enabled.
 # MOLECULE_ORG_ID=                               # SaaS only: org UUID set by control plane on tenant machines. When set, workspace provisioning auto-routes through the control plane API instead of Docker.
 # CP_PROVISION_URL=                              # Override control plane URL for workspace provisioning (default: https://api.moleculesai.app). Only needed for testing against a non-production control plane.
@@ -361,15 +361,17 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
    """Returns (findings, debug). Empty findings == no drift.

    Raises:
-        ApiError: propagated from the protection fetch only when the
-                  failure is likely a transient Gitea outage (5xx).
-                  403/404 from the protection endpoint is treated as
-                  "cannot determine drift for this branch" — a token-
-                  scope issue (missing repo-admin on DRIFT_BOT_TOKEN) or
-                  a repo with no protection set should not turn the
-                  hourly cron red. The workflow continues to the next
-                  branch; no [ci-drift] issue is filed for a branch
-                  whose protection cannot be read.
+        ApiError: propagated (fail-closed) on a transient Gitea outage
+                  (5xx) AND on a 401/403 auth failure from the protection
+                  endpoint. A 401/403 means DRIFT_BOT_TOKEN cannot read
+                  branch protections at all — drift is UNVERIFIABLE, so
+                  this HARD gate must fail loud rather than green
+                  undetected drift (the regression class it exists to
+                  catch). An authenticated 404 (branch genuinely has no
+                  protection, e.g. staging pre-rollout) is the one
+                  tolerated skip: it returns ([], debug) with a loud
+                  ::warning:: and the workflow continues to the next
+                  branch.
    """
    findings: list[str] = []

@@ -403,17 +405,38 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
        m = _re.search(r"HTTP (\d{3})", msg)
        if m:
            http_status = int(m.group(1))
-        if http_status in (403, 404):
-            # Token lacks scope OR branch has no protection. Cannot
-            # determine drift — skip this branch. Do NOT exit non-zero;
-            # the issue IS the alarm, not a red workflow.
+        # FAIL-CLOSED contract (was fail-open: 403 AND 404 both returned
+        # [] with no signal — fixed). This is a HARD gate (no
+        # continue-on-error → false) running hourly on a PROTECTED context
+        # (schedule/dispatch on main). We split auth-failure from
+        # genuinely-absent:
+        #   401/403 → AUTH FAILURE: the token cannot read branch
+        #     protections at all, so drift CANNOT be determined for ANY
+        #     branch. Greening the hourly cron here means jobs↔protection
+        #     drift goes silently undetected — exactly the regression class
+        #     this sentinel exists to catch. Raise so the workflow fails
+        #     loud / fails closed.
+        #   404 → authenticated absent resource: this specific branch has
+        #     no protection (e.g. `staging` before its protection rollout).
+        #     Genuinely nothing to diff against — skip THIS branch with a
+        #     loud ::warning::, continue to the next.
+        if http_status in (401, 403):
            sys.stderr.write(
-                f"::error::GET {protection_path} returned HTTP {http_status} — "
-                f"DRIFT_BOT_TOKEN lacks repo-admin scope (Gitea 1.22.6 "
-                f"requires it for this endpoint) OR branch has no protection "
-                f"configured. Cannot determine drift for {branch}; "
-                f"skipping. Fix: grant repo-admin to mc-drift-bot or "
-                f"configure protection on {branch}.\n"
+                f"::error::GET {protection_path} returned HTTP "
+                f"{http_status} — DRIFT_BOT_TOKEN cannot read branch "
+                f"protections (needs repo-admin scope). AUTH FAILURE: "
+                f"drift CANNOT be determined, so this HARD gate FAILS "
+                f"CLOSED rather than greening undetected drift. Fix: grant "
+                f"repo-admin to mc-drift-bot (org team `drift-bot`, "
+                f"perm=admin) — fix the token, not the lint.\n"
+            )
+            raise
+        if http_status == 404:
+            sys.stderr.write(
+                f"::warning::GET {protection_path} returned HTTP 404 — "
+                f"branch '{branch}' has no protection configured "
+                f"(authenticated absent resource). Skipping drift check for "
+                f"{branch}; if it SHOULD be protected, configure it.\n"
            )
            debug = {
                "branch": branch,
@@ -424,7 +447,7 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
                "audit_env_checks": sorted(env_set),
            }
            return [], debug
-        # 5xx — propagate (transient outage, fail loud per design).
+        # 5xx / other — propagate (transient outage, fail loud per design).
        raise
    if not isinstance(protection, dict):
        sys.stderr.write(
@@ -1,16 +1,77 @@
 #!/usr/bin/env python3
 """gitea-merge-queue — conservative serialized merge bot for Gitea.

-Gitea 1.22.6 has auto-merge (`pull_auto_merge`) but no GitHub-style merge
+Gitea 1.22.6+ has auto-merge (`pull_auto_merge`) but no GitHub-style merge
 queue. This script provides the missing serialized policy in user space:

-1. Pick the oldest open PR carrying QUEUE_LABEL.
-2. Refuse to act unless main is green.
+1. Scan open same-repo PRs that are NOT opted out (auto-discovery, see below),
+   oldest-first, skipping drafts, until an ACTIONABLE one is found. A non-ready
+   candidate (REQUEST_CHANGES, mergeable!=True, insufficient genuine approvals,
+   or red required CI) is SKIPPED so it cannot head-of-line block newer ready
+   PRs; the scan continues to the next candidate.
+2. Refuse to act unless main's BP-required contexts are green.
 3. Refuse fork PRs; the queue may only mutate same-repo branches.
 4. If the PR branch does not contain current main, call Gitea's
   /pulls/{n}/update endpoint and stop. CI must rerun on the updated head.
-5. If the updated PR head has all required contexts green, merge with the
-   non-bypass merge actor token.
+5. Merge ONLY when, on the PR's CURRENT head sha:
+     - >= REQUIRED_APPROVALS distinct GENUINE official APPROVED reviews from
+       the recognised reviewer set (not stale, not dismissed, commit_id ==
+       current head), AND
+     - no open official REQUEST_CHANGES on the current head, AND
+     - every BP-required status context is green, AND
+     - the PR is mergeable.
+
+Authoritative gates (fail-closed):
+  - The REQUIRED status contexts come from BRANCH PROTECTION
+    (`status_check_contexts`), not a hand-maintained env list. If branch
+    protection cannot be enumerated, the queue HOLDS (does not merge blindly).
+  - NON-required reds (qa-review, security-review, sop-tier, sop-checklist
+    when not branch-required, E2E Chat, Staging SaaS, ci-arm64-advisory, any
+    continue-on-error job) MUST NOT block. They are reported, never gating.
+  - `force_merge=true` is used ONLY when the merge is blocked *solely* by
+    missing-but-non-required governance contexts (required are green + genuine
+    approvals present). It is NEVER used to bypass a failing REQUIRED context
+    or missing approvals.
+
+Auto-discovery (opt-OUT, label-optional):
+  The queue is SELF-SUSTAINING — a ready PR does NOT need a human (or an agent)
+  to add the `merge-queue` label first. When AUTO_DISCOVER is on (default), the
+  queue enumerates ALL open same-repo PRs and considers any that meets the full
+  merge bar (genuine approvals on current head + BP-required green + mergeable +
+  no open REQUEST_CHANGES). The merge bar above is UNCHANGED; auto-discovery only
+  changes WHICH PRs are considered, not whether they are mergeable.
+
+  This deliberately removes the historical dependency on an agent adding the
+  `merge-queue` label — agent Gitea tokens lack `write:issue` (labels are
+  issue-scoped), so they could never self-label and the queue stalled. The label
+  is now OPTIONAL metadata, not a gate.
+
+  SAFETY is preserved as opt-OUT: any PR carrying an opt-out label
+  (OPT_OUT_LABELS — `merge-queue-hold`, `do-not-auto-merge`, `wip`, `draft` by
+  default) is skipped (never auto-considered, never merged). Draft PRs
+  (draft=true STATE) are also skipped; the literal `draft` LABEL is an
+  additional explicit opt-out a human can apply without converting to a draft.
+  A human who wants to keep a PR out of autonomous merging just adds one of
+  those labels. Setting AUTO_DISCOVER=0 restores the legacy opt-IN behaviour
+  (only PRs already carrying QUEUE_LABEL are considered).
+
+Head-of-line (HOL) safety has two complementary layers:
+  (a) The queue SCANS THROUGH the FIFO candidate list and skips any non-ready
+      PR (REQUEST_CHANGES, mergeable!=True, insufficient genuine approvals, or
+      red required CI) instead of locking on the oldest and waiting, so a PR
+      that can never become ready without human action does not block newer
+      ready PRs.
+  (b) For the candidate the scan acts on, two permanent failure modes HOLD the
+      PR (apply HOLD_LABEL) and let the scan CONTINUE to the next candidate
+      rather than re-selecting the same wedged PR every tick:
+        - a permanent permission/4xx merge error (403/404/405), and
+        - a persistent branch-update conflict (the /update endpoint returns
+          HTTP 409 because the PR branch cannot be merged with main without a
+          manual rebase). A conflict will not self-resolve, so retrying it
+          every tick would HOL-block every ready PR behind it (issue #2352).
+
+Status-fetch is fail-closed: if the combined status for a sha cannot be
+fetched, the PR is skipped this tick (never treated as green).

 The script is intentionally one-PR-per-run. Workflow/cron concurrency should
 serialize invocations so two green PRs cannot merge against the same main.
@@ -40,6 +101,33 @@ WATCH_BRANCH = _env("WATCH_BRANCH", default="main")
 QUEUE_LABEL = _env("QUEUE_LABEL", default="merge-queue")
 HOLD_LABEL = _env("HOLD_LABEL", default="merge-queue-hold")
 UPDATE_STYLE = _env("UPDATE_STYLE", default="merge")
+# Auto-discovery (opt-OUT). When truthy (default), the queue considers ALL open
+# same-repo PRs that meet the merge bar, not only PRs already carrying
+# QUEUE_LABEL — so the queue is self-sustaining without any human/agent labeling
+# (agent tokens lack write:issue and cannot self-label). Set AUTO_DISCOVER=0 to
+# restore the legacy opt-IN behaviour (QUEUE_LABEL required to be considered).
+AUTO_DISCOVER = _env("AUTO_DISCOVER", default="1").strip().lower() not in {
+    "0",
+    "false",
+    "no",
+    "off",
+    "",
+}
+# Opt-OUT labels. A PR carrying ANY of these is skipped (never auto-considered,
+# never merged) — the human escape hatch from autonomous merging. HOLD_LABEL is
+# always included so the existing hold semantics keep working. `do-not-auto-merge`
+# and `wip` let a human keep a PR out of the auto-merge path without removing it.
+# `draft` is included as a literal label too: Gitea draft STATE (draft=true) is
+# already skipped via _issue_is_draft, but a "draft" LABEL is an additional,
+# explicit opt-out signal a human can apply without converting the PR to a draft.
+OPT_OUT_LABELS = {
+    name.strip()
+    for name in _env(
+        "OPT_OUT_LABELS",
+        default="do-not-auto-merge,wip,draft",
+    ).split(",")
+    if name.strip()
+} | ({HOLD_LABEL} if HOLD_LABEL else set())
 REQUIRED_CONTEXTS_RAW = _env(
    "REQUIRED_CONTEXTS",
    default=(
@@ -57,6 +145,24 @@ PUSH_REQUIRED_CONTEXTS_RAW = _env(
    default="CI / all-required (push)",
 )

+# Recognised official-reviewer set. A merge requires this many DISTINCT genuine
+# approvals (not stale/dismissed, on the current head sha) from accounts in
+# this set. The set is the real agents-team reviewer roster; founder/CTO-agent
+# accounts are intentionally excluded so the queue cannot be satisfied by a
+# human/owner approval alone — it must be a genuine peer review.
+REVIEWER_SET = {
+    name.strip()
+    for name in _env(
+        "REVIEWER_SET",
+        default="agent-reviewer,agent-researcher,agent-reviewer-cr2",
+    ).split(",")
+    if name.strip()
+}
+# Default mirrors molecule-core branch protection (required_approvals: 2). The
+# authoritative value is read from branch protection at runtime; this is only
+# the fallback when BP does not specify one.
+REQUIRED_APPROVALS_DEFAULT = int(_env("REQUIRED_APPROVALS", default="2") or "2")
+
 OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
 API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""

@@ -67,7 +173,27 @@ class ApiError(RuntimeError):

 class MergePermissionError(ApiError):
    """Merge failed with a permanent permission error (403/404/405).
-    The queue should skip this PR and move to the next one."""
+    The queue should HOLD this PR and move to the next one."""
+
+
+class BranchUpdateConflictError(ApiError):
+    """Updating the PR branch with the base hit a merge-conflict (HTTP 409).
+
+    A true merge-conflict is NOT transient: the branch cannot be auto-updated
+    until a human/agent rebases it. The queue should HOLD this PR (apply
+    HOLD_LABEL) and advance to the next candidate, exactly like the permission
+    path — otherwise the conflicted PR sits at the queue head and is retried
+    every tick forever, head-of-line-blocking every ready PR behind it.
+
+    NOTE: distinct from mergeable=None, which is Gitea STILL COMPUTING conflict
+    state — that case is handled as a transient WAIT (no hold). This error is
+    only raised on an explicit 409 returned by the /update endpoint."""
+
+
+class BranchProtectionUnavailable(ApiError):
+    """Branch protection (the authoritative required-context source) could not
+    be enumerated. The queue must HOLD rather than merge with an unverified
+    required-context set (fail-closed, no fail-open)."""


@dataclasses.dataclass(frozen=True)
@@ -75,6 +201,20 @@ class MergeDecision:
    ready: bool
    action: str
    reason: str
+    # When ready is True, force indicates the merge is blocked SOLELY by
+    # missing-but-non-required governance contexts (required are green +
+    # genuine approvals present), so force_merge=true is justified to bypass
+    # ONLY those non-required contexts. Defaults False.
+    force: bool = False
+
+
+@dataclasses.dataclass(frozen=True)
+class BranchProtection:
+    """The subset of branch protection the queue depends on."""
+
+    required_contexts: list[str]
+    required_approvals: int
+    block_on_rejected_reviews: bool


 def _require_runtime_env() -> None:
@@ -191,6 +331,117 @@ def required_contexts_green(
    return not missing_or_bad, missing_or_bad


+def parse_branch_protection(body: Any) -> BranchProtection:
+    """Extract the queue-relevant fields from a branch_protections payload.
+
+    Fail-closed: raises BranchProtectionUnavailable when status checks are
+    expected but the required-context list cannot be enumerated. We never fall
+    back to a hand-maintained env list as the authoritative required set —
+    doing so risks merging when a real required context is red/missing.
+    """
+    if not isinstance(body, dict):
+        raise BranchProtectionUnavailable("branch protection response not an object")
+    enable = bool(body.get("enable_status_check"))
+    contexts_raw = body.get("status_check_contexts")
+    if not enable:
+        # Status checks not enforced by BP at all. With no required contexts
+        # the queue would gate on approvals only — acceptable, but make it
+        # explicit and let the caller decide.
+        contexts: list[str] = []
+    else:
+        if not isinstance(contexts_raw, list):
+            raise BranchProtectionUnavailable(
+                "enable_status_check is true but status_check_contexts is not a list"
+            )
+        contexts = [c for c in contexts_raw if isinstance(c, str) and c.strip()]
+        if not contexts:
+            raise BranchProtectionUnavailable(
+                "enable_status_check is true but status_check_contexts is empty"
+            )
+    approvals = body.get("required_approvals")
+    required_approvals = (
+        int(approvals) if isinstance(approvals, int) else REQUIRED_APPROVALS_DEFAULT
+    )
+    return BranchProtection(
+        required_contexts=contexts,
+        required_approvals=required_approvals,
+        block_on_rejected_reviews=bool(body.get("block_on_rejected_reviews")),
+    )
+
+
+def get_branch_protection(branch: str) -> BranchProtection:
+    """Fetch branch protection for `branch`; fail-closed if unavailable."""
+    try:
+        _, body = api("GET", f"/repos/{OWNER}/{NAME}/branch_protections/{branch}")
+    except ApiError as exc:
+        raise BranchProtectionUnavailable(
+            f"could not fetch branch protection for {branch}: {exc}"
+        ) from exc
+    return parse_branch_protection(body)
+
+
+def genuine_approvals(
+    reviews: list[dict],
+    *,
+    head_sha: str,
+    reviewer_set: set[str],
+) -> tuple[set[str], list[str]]:
+    """Reduce a PR's reviews to genuine official approvals on the CURRENT head.
+
+    Returns (approvers, request_changes) where:
+      - approvers is the set of distinct logins (in reviewer_set) whose LATEST
+        review on the current head is an official, non-stale, non-dismissed
+        APPROVED, and
+      - request_changes is the list of logins (in reviewer_set) whose latest
+        official review on the current head is REQUEST_CHANGES.
+
+    "Current head" is enforced two ways, because Gitea exposes both signals:
+    a review must be `official` and NOT `stale`/`dismissed`, AND when the
+    review carries a commit_id it must equal head_sha. A review with no
+    commit_id but stale=False/dismissed=False is accepted (older Gitea rows).
+    We take each reviewer's LATEST submission (reviews arrive oldest-first), so
+    a later REQUEST_CHANGES correctly supersedes an earlier APPROVED and vice
+    versa.
+    """
+    latest_by_user: dict[str, dict] = {}
+    for review in reviews:
+        if not isinstance(review, dict):
+            continue
+        user = (review.get("user") or {}).get("login")
+        if not isinstance(user, str) or user not in reviewer_set:
+            continue
+        state = str(review.get("state") or "").upper()
+        if state not in {"APPROVED", "REQUEST_CHANGES"}:
+            continue  # ignore COMMENT/PENDING/DISMISSED-state rows
+        # reviews are returned oldest-first; later entries overwrite → latest wins
+        latest_by_user[user] = review
+
+    approvers: set[str] = set()
+    request_changes: list[str] = []
+    for user, review in latest_by_user.items():
+        if not review.get("official"):
+            continue
+        if review.get("stale") or review.get("dismissed"):
+            continue
+        commit_id = review.get("commit_id")
+        if isinstance(commit_id, str) and commit_id and head_sha:
+            if commit_id != head_sha:
+                continue  # review was on a previous head
+        state = str(review.get("state") or "").upper()
+        if state == "APPROVED":
+            approvers.add(user)
+        elif state == "REQUEST_CHANGES":
+            request_changes.append(user)
+    return approvers, request_changes
+
+
+def get_pull_reviews(pr_number: int) -> list[dict]:
+    _, body = api("GET", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/reviews")
+    if not isinstance(body, list):
+        raise ApiError(f"PR #{pr_number} reviews response not list")
+    return body
+
+
 def label_names(issue: dict) -> set[str]:
    return {
        label["name"]
@@ -219,6 +470,85 @@ def choose_next_queued_issue(
    return candidates[0] if candidates else None


+def _issue_is_draft(issue: dict) -> bool:
+    """True if the issue/PR is a draft.
+
+    The /issues listing exposes draft state under the `pull_request` sub-object
+    (`{"draft": true}`); some Gitea versions also surface a top-level `draft`.
+    Either is honoured. Drafts are never auto-considered for merging.
+    """
+    pr = issue.get("pull_request")
+    if isinstance(pr, dict) and pr.get("draft") is True:
+        return True
+    return issue.get("draft") is True
+
+
+def choose_candidate_issues(
+    issues: list[dict],
+    *,
+    queue_label: str,
+    opt_out_labels: set[str],
+    auto_discover: bool,
+) -> list[dict]:
+    """All open PRs eligible for a merge attempt this tick, oldest-first.
+
+    This is the auto-discovery selector. It does NOT change the merge bar — it
+    only changes WHICH PRs are considered:
+
+      - auto_discover=True (default): every open same-repo PR is a candidate,
+        EXCEPT those carrying an opt-out label or marked draft. The QUEUE_LABEL
+        is optional metadata, not a gate, so a ready PR reaches the queue with no
+        human/agent labeling (the write:issue gap is removed).
+      - auto_discover=False: legacy opt-IN — only PRs carrying queue_label are
+        candidates (still skipping opt-out labels and drafts).
+
+    Opt-out is the safety escape hatch: any opt_out_labels member present skips
+    the PR entirely (never considered, never merged). Ordering is oldest-first
+    (created_at, then number) to preserve the serialized FIFO ordering.
+
+    Returns the FULL ordered list (not just the head) so process_once can SCAN
+    THROUGH non-ready candidates instead of locking on the oldest. A non-ready
+    auto-discovered PR (e.g. one with REQUEST_CHANGES or mergeable=false, which
+    can never become ready without human action) must NOT head-of-line block the
+    newer ready PRs behind it — the readiness check happens per-candidate in
+    process_once, and a `wait` candidate is skipped to the next one.
+    """
+    candidates = []
+    for issue in issues:
+        if "pull_request" not in issue:
+            continue
+        labels = label_names(issue)
+        if opt_out_labels & labels:
+            continue  # opt-out: human kept this PR out of autonomous merging
+        if _issue_is_draft(issue):
+            continue  # drafts are never auto-merged
+        if not auto_discover and queue_label not in labels:
+            continue  # legacy opt-IN: require the queue label
+        candidates.append(issue)
+    candidates.sort(key=lambda issue: (issue.get("created_at") or "", int(issue["number"])))
+    return candidates
+
+
+def choose_next_candidate_issue(
+    issues: list[dict],
+    *,
+    queue_label: str,
+    opt_out_labels: set[str],
+    auto_discover: bool,
+) -> dict | None:
+    """The oldest eligible candidate, or None. Thin head-of-list wrapper around
+    choose_candidate_issues; retained for callers/tests that only want the head.
+    process_once uses the full list (choose_candidate_issues) so it can scan past
+    non-ready PRs rather than HOL-block on the oldest."""
+    candidates = choose_candidate_issues(
+        issues,
+        queue_label=queue_label,
+        opt_out_labels=opt_out_labels,
+        auto_discover=auto_discover,
+    )
+    return candidates[0] if candidates else None
+
+
 def pr_contains_base_sha(commits: list[dict], base_sha: str) -> bool:
    for commit in commits:
        sha = commit.get("sha") or commit.get("id")
@@ -233,36 +563,87 @@ def pr_has_current_base(pr: dict, commits: list[dict], main_sha: str) -> bool:
    return pr_contains_base_sha(commits, main_sha)


+def _non_required_red_present(
+    latest: dict[str, dict],
+    required_contexts: list[str],
+) -> bool:
+    """True if any NON-required context is non-success.
+
+    Such reds are the governance/SOP/advisory checks Gitea may still treat as
+    "missing required context" at merge time even though branch protection does
+    not require them. Their presence is what justifies force_merge=true (we
+    have already verified every REQUIRED context is green and approvals are
+    genuine, so force only bypasses these non-required reds).
+    """
+    required = set(required_contexts)
+    for context, status in latest.items():
+        if context in required:
+            continue
+        if status_state(status) != "success":
+            return True
+    return False
+
+
 def evaluate_merge_readiness(
    *,
    main_status: dict,
    pr_status: dict,
    required_contexts: list[str],
+    required_approvals: int,
+    approvers: set[str],
+    request_changes: list[str],
    pr_has_current_base: bool,
+    mergeable: bool,
    pr_labels: set[str] | None = None,
 ) -> MergeDecision:
-    # Check push-required contexts explicitly instead of combined state.
-    # Combined state can be "failure" due to non-blocking jobs
-    # (continue-on-error: true) that don't actually gate merges.
-    # CI / all-required (push) is the authoritative gate — it respects
-    # continue-on-error and correctly aggregates all blocking failures.
+    # 1) Main's push-required contexts must be green. Combined state can be
+    #    "failure" due to non-blocking jobs (continue-on-error: true) that do
+    #    not gate merges, so check the explicit required set, not combined.
    main_latest = latest_statuses_by_context(main_status.get("statuses") or [])
    main_ok, main_bad = required_contexts_green(main_latest, push_required_contexts())
    if not main_ok:
        return MergeDecision(False, "pause", "main required contexts not green: " + ", ".join(main_bad))
+
+    # 2) PR head must contain current main.
    if not pr_has_current_base:
        return MergeDecision(False, "update", "PR head does not contain current main")

-    # Check explicit required contexts instead of combined state. Combined state
-    # can be "failure" due to non-blocking jobs with continue-on-error: true
-    # (e.g. publish-runtime-autobump/pr-validate, qa-review on stale tokens).
-    # The required_contexts list is the authoritative gate — it includes only
-    # the checks that actually block merges.
+    # 3) No open official REQUEST_CHANGES on the current head.
+    if request_changes:
+        return MergeDecision(
+            False, "wait",
+            "open REQUEST_CHANGES on current head from: " + ", ".join(sorted(request_changes)),
+        )
+
+    # 4) Enough distinct genuine official approvals on the current head.
+    if len(approvers) < required_approvals:
+        return MergeDecision(
+            False, "wait",
+            f"insufficient genuine approvals on current head: have "
+            f"{len(approvers)} ({', '.join(sorted(approvers)) or 'none'}), "
+            f"need {required_approvals}",
+        )
+
+    # 5) Every BRANCH-PROTECTION-REQUIRED status context must be green. This is
+    #    the authoritative status gate — NON-required reds (qa-review,
+    #    security-review, sop-tier/sop-checklist when not BP-required, E2E Chat,
+    #    Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
+    #    consulted here and must not block.
    latest = latest_statuses_by_context(pr_status.get("statuses") or [])
    ok, missing_or_bad = required_contexts_green(latest, required_contexts, pr_labels)
    if not ok:
        return MergeDecision(False, "wait", "required contexts not green: " + ", ".join(missing_or_bad))
-    return MergeDecision(True, "merge", "ready")
+
+    # 6) Gitea must consider the PR mergeable (no conflicts).
+    if not mergeable:
+        return MergeDecision(False, "wait", "PR is not mergeable (conflicts)")
+
+    # Ready. Use force_merge ONLY if the merge would otherwise be blocked by
+    # missing-but-non-required governance contexts. Required are green and
+    # approvals are genuine, so force only bypasses non-required reds — never a
+    # failing required context or missing approval.
+    force = _non_required_red_present(latest, required_contexts)
+    return MergeDecision(True, "merge", "ready", force=force)


 def get_branch_head(branch: str) -> str:
@@ -280,6 +661,12 @@ def get_combined_status(sha: str) -> dict:
    The /status endpoint caps the `statuses` array at 30 entries (Gitea
    default page size), so we fetch the full list via /statuses with a
    higher limit. The combined `state` still comes from /status.
+
+    Fail-closed: the PRIMARY /status fetch must succeed. If it raises, the
+    error propagates so the caller skips this PR this tick (we never treat a
+    failed status fetch as green — dev-sop "no fail-open"). Only the SECONDARY
+    /statuses enrichment (which merely extends the per-context list beyond the
+    30-entry cap) is best-effort; if it fails we still have the combined set.
    """
    _, combined = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
    if not isinstance(combined, dict):
@@ -329,6 +716,31 @@ def list_queued_issues() -> list[dict]:
    return body


+def list_candidate_issues(*, auto_discover: bool) -> list[dict]:
+    """Open PR issues eligible for consideration this tick.
+
+    With auto_discover=True (default) this enumerates ALL open PRs (no label
+    filter) so the queue is self-sustaining — a ready PR is considered without
+    any human/agent first adding QUEUE_LABEL. With auto_discover=False it falls
+    back to the legacy label-filtered listing (opt-IN). Opt-out filtering and
+    draft-skipping happen in choose_next_candidate_issue, not here.
+    """
+    if not auto_discover:
+        return list_queued_issues()
+    _, body = api(
+        "GET",
+        f"/repos/{OWNER}/{NAME}/issues",
+        query={
+            "state": "open",
+            "type": "pulls",
+            "limit": "50",
+        },
+    )
+    if not isinstance(body, list):
+        raise ApiError("candidate issues response not list")
+    return body
+
+
 def get_pull(pr_number: int) -> dict:
    _, body = api("GET", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}")
    if not isinstance(body, dict):
@@ -354,30 +766,97 @@ def update_pull(pr_number: int, *, dry_run: bool) -> None:
    print(f"::notice::updating PR #{pr_number} with base branch via style={UPDATE_STYLE}")
    if dry_run:
        return
+    try:
+        api(
+            "POST",
+            f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/update",
+            query={"style": UPDATE_STYLE},
+            expect_json=False,
+        )
+    except ApiError as exc:
+        # Gitea returns HTTP 409 when the base cannot be merged into the PR
+        # branch because of a real conflict. The queue cannot auto-resolve a
+        # conflict, so re-raise as BranchUpdateConflictError; process_once HOLDs
+        # the PR and advances (HOL guard) instead of retrying it forever.
+        # Match the HTTP STATUS token ("-> HTTP 409") specifically, not a bare
+        # "409" substring — the PR number or path can itself contain "409"
+        # (e.g. /pulls/1409/update) and must not be misread as a conflict.
+        if "-> HTTP 409" in str(exc):
+            raise BranchUpdateConflictError(str(exc)) from exc
+        raise  # re-raise other ApiErrors unchanged
+
+
+def add_label_by_name(pr_number: int, label_name: str, *, dry_run: bool) -> None:
+    """Apply an existing repo label (by name) to a PR/issue.
+
+    Used to HOLD a wedged PR so the queue advances. Resolves the label id from
+    the repo label set; if the label does not exist, raises ApiError (the
+    caller decides whether that is fatal).
+    """
+    print(f"::notice::applying label '{label_name}' to PR #{pr_number}")
+    if dry_run:
+        return
+    _, labels = api("GET", f"/repos/{OWNER}/{NAME}/labels", query={"limit": "100"})
+    label_id = None
+    if isinstance(labels, list):
+        for label in labels:
+            if isinstance(label, dict) and label.get("name") == label_name:
+                label_id = label.get("id")
+                break
+    if label_id is None:
+        raise ApiError(f"label '{label_name}' not found in repo {OWNER}/{NAME}")
    api(
        "POST",
-        f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/update",
-        query={"style": UPDATE_STYLE},
-        expect_json=False,
+        f"/repos/{OWNER}/{NAME}/issues/{pr_number}/labels",
+        body={"labels": [label_id]},
    )


-def merge_pull(pr_number: int, *, dry_run: bool) -> None:
-    payload = {
+def hold_pr(pr_number: int, hold_note: str, *, dry_run: bool) -> None:
+    """Apply HOLD_LABEL to a wedged PR so the queue advances past it.
+
+    choose_next_queued_issue skips HOLD_LABEL-bearing PRs, so this is the HOL
+    guard: a PR the queue cannot make progress on (permanent permission error
+    or unresolvable branch-update conflict) is held and a human/agent fixes it,
+    rather than the queue re-selecting it every tick forever. If the label
+    cannot be applied we still post the explanatory comment so the wedge is at
+    least visible — but we never loop on the PR.
+    """
+    try:
+        add_label_by_name(pr_number, HOLD_LABEL, dry_run=dry_run)
+    except ApiError as label_exc:
+        sys.stderr.write(
+            f"::error::could not apply HOLD_LABEL to PR #{pr_number}: {label_exc}\n"
+        )
+        hold_note += (
+            f"\n\n(NOTE: could not apply the hold label automatically: "
+            f"{label_exc}. Please add `{HOLD_LABEL}` manually.)"
+        )
+    post_comment(pr_number, hold_note, dry_run=dry_run)
+
+
+def merge_pull(pr_number: int, *, dry_run: bool, force: bool = False) -> None:
+    payload: dict[str, Any] = {
        "Do": "merge",
        "MergeTitleField": f"Merge PR #{pr_number} via Gitea merge queue",
        "MergeMessageField": (
            "Serialized merge by gitea-merge-queue after current-main, "
-            "SOP, and required CI checks were green."
+            "genuine approvals, and required CI checks were green."
        ),
    }
-    print(f"::notice::merging PR #{pr_number}")
+    if force:
+        # force_merge bypasses ONLY missing-but-non-required governance
+        # contexts. The caller has already verified required contexts are green
+        # and genuine approvals are present, so this never bypasses a failing
+        # required context or an approval shortfall.
+        payload["force_merge"] = True
+    print(f"::notice::merging PR #{pr_number}{' (force_merge: non-required reds)' if force else ''}")
    if dry_run:
        return
    try:
        api("POST", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/merge", body=payload, expect_json=False)
    except ApiError as exc:
-        # Re-raise permission-like errors so process_once can skip this PR.
+        # Re-raise permission-like errors so process_once can HOLD this PR.
        # 403 = no push access, 404 = repo/pr not found, 405 = not allowed.
        msg = str(exc)
        for code in ("403", "404", "405"):
@@ -387,7 +866,25 @@ def merge_pull(pr_number: int, *, dry_run: bool) -> None:


 def process_once(*, dry_run: bool = False) -> int:
-    contexts = required_contexts(REQUIRED_CONTEXTS_RAW)
+    # Required status contexts come from BRANCH PROTECTION, not a hand-kept env
+    # list. Fail-closed: if BP cannot be enumerated, HOLD the whole tick rather
+    # than merge against an unverified required set.
+    try:
+        bp = get_branch_protection(WATCH_BRANCH)
+    except BranchProtectionUnavailable as exc:
+        sys.stderr.write(
+            f"::error::queue held: branch protection for {WATCH_BRANCH} "
+            f"unavailable (fail-closed): {exc}\n"
+        )
+        return 0
+    contexts = bp.required_contexts
+    required_approvals = bp.required_approvals
+    print(
+        f"::notice::queue policy from branch protection: "
+        f"required_approvals={required_approvals} "
+        f"required_contexts={contexts or '[none]'}"
+    )
+
    main_sha = get_branch_head(WATCH_BRANCH)
    main_status = get_combined_status(main_sha)
    # Check push-required contexts explicitly instead of combined state.
@@ -398,83 +895,199 @@ def process_once(*, dry_run: bool = False) -> int:
        print(f"::notice::queue paused: {WATCH_BRANCH}@{main_sha[:8]} required contexts not green: {', '.join(main_bad)}")
        return 0

-    issue = choose_next_queued_issue(
-        list_queued_issues(),
+    candidates = choose_candidate_issues(
+        list_candidate_issues(auto_discover=AUTO_DISCOVER),
        queue_label=QUEUE_LABEL,
-        hold_label=HOLD_LABEL,
+        opt_out_labels=OPT_OUT_LABELS,
+        auto_discover=AUTO_DISCOVER,
    )
-    if not issue:
-        print("::notice::merge queue empty")
+    if not candidates:
+        print(
+            "::notice::no merge candidates "
+            f"(auto_discover={'on' if AUTO_DISCOVER else 'off'})"
+        )
        return 0

+    # HOL fix: SCAN THROUGH the FIFO candidate list until a PR we can ACT on is
+    # found, instead of locking on the oldest and waiting. A non-ready candidate
+    # (decision.action == "wait": REQUEST_CHANGES, mergeable!=True, insufficient
+    # genuine approvals, or red required CI) is SKIPPED — it must NOT head-of-line
+    # block the newer ready PRs behind it. The merge bar is unchanged: a skipped
+    # PR is never merged, and the first ACTIONABLE candidate (an "update" that
+    # advances a stale branch, or a fully-ready "merge") terminates the scan.
+    #
+    # `update` is treated as actionable, not skippable: a PR whose head merely
+    # lacks current main is in a legitimate in-progress state (updating it +
+    # rerunning CI moves it toward ready), unlike a PR that can never become
+    # ready without a human (RC / conflict), which is a `wait` and gets skipped.
+    for issue in candidates:
+        decision, ctx = _evaluate_candidate(
+            issue,
+            main_sha=main_sha,
+            main_status=main_status,
+            required_contexts=contexts,
+            required_approvals=required_approvals,
+            dry_run=dry_run,
+        )
+        if decision is None:
+            continue  # not merge-eligible (not-open / opted-out / fork / wrong base)
+        pr_number = ctx["pr_number"]
+        print(f"::notice::PR #{pr_number} decision={decision.action}: {decision.reason}")
+        if decision.action == "wait":
+            # Non-ready: skip to the next candidate (no HOL block, no merge).
+            continue
+        if decision.action == "update":
+            try:
+                update_pull(pr_number, dry_run=dry_run)
+            except BranchUpdateConflictError as exc:
+                # The branch cannot be updated with main because of a real
+                # conflict (HTTP 409 from /update). This is the #2352 HOL guard:
+                # a conflict will not self-resolve without a human/agent rebase,
+                # so re-attempting the update every tick would head-of-line block
+                # every ready PR behind it. HOLD this PR (apply HOLD_LABEL, which
+                # is an opt-out label so later ticks skip it) and CONTINUE the
+                # scan so a newer ready PR can still merge this tick. Fail-closed:
+                # a held PR is skipped, never merged.
+                sys.stderr.write(
+                    f"::error::branch-update conflict for PR #{pr_number}: {exc}\n"
+                )
+                hold_note = (
+                    "merge-queue: could not update this branch with "
+                    f"`{WATCH_BRANCH}` — the update returned a merge conflict "
+                    f"(HTTP 409) that the queue cannot auto-resolve ({exc}). "
+                    f"Applied `{HOLD_LABEL}` to unblock the queue (HOL guard). "
+                    f"Fix: rebase/merge `{WATCH_BRANCH}` into this branch and "
+                    f"resolve the conflicts, then remove `{HOLD_LABEL}` to requeue."
+                )
+                hold_pr(pr_number, hold_note, dry_run=dry_run)
+                continue  # held — keep scanning for a mergeable candidate
+            post_comment(
+                pr_number,
+                (
+                    f"merge-queue: updated this branch with `{WATCH_BRANCH}` at "
+                    f"`{main_sha[:12]}`. Waiting for CI on the refreshed head."
+                ),
+                dry_run=dry_run,
+            )
+            return 0
+        if decision.ready:
+            latest_main_sha = get_branch_head(WATCH_BRANCH)
+            if latest_main_sha != main_sha:
+                print(
+                    f"::notice::main moved {main_sha[:8]} -> {latest_main_sha[:8]}; "
+                    "deferring to next tick"
+                )
+                return 0
+            try:
+                merge_pull(pr_number, dry_run=dry_run, force=decision.force)
+            except MergePermissionError as exc:
+                # Permanent merge failure (HTTP 403/404/405). HOLD this PR by
+                # applying HOLD_LABEL (it becomes an opt-out label, so subsequent
+                # ticks skip it) and CONTINUE scanning so the queue still advances
+                # to the next ready PR this tick rather than stalling.
+                sys.stderr.write(f"::error::merge permission error for PR #{pr_number}: {exc}\n")
+                hold_note = (
+                    "merge-queue: merge failed with a permanent permission error "
+                    f"({exc}). No available token has Can-merge permission for this "
+                    f"PR. Applied `{HOLD_LABEL}` to unblock the queue (HOL guard). "
+                    f"Fix: grant Can-merge to the queue token, then remove "
+                    f"`{HOLD_LABEL}` to requeue."
+                )
+                try:
+                    add_label_by_name(pr_number, HOLD_LABEL, dry_run=dry_run)
+                except ApiError as label_exc:
+                    # If we cannot even apply the hold label, fall back to a comment
+                    # so the wedge is at least visible; do NOT loop on this PR.
+                    sys.stderr.write(
+                        f"::error::could not apply HOLD_LABEL to PR #{pr_number}: {label_exc}\n"
+                    )
+                    hold_note += (
+                        f"\n\n(NOTE: could not apply the hold label automatically: "
+                        f"{label_exc}. Please add `{HOLD_LABEL}` manually.)"
+                    )
+                post_comment(pr_number, hold_note, dry_run=dry_run)
+                continue  # held — keep scanning for a mergeable candidate
+            return 0
+    return 0
+
+
+def _evaluate_candidate(
+    issue: dict,
+    *,
+    main_sha: str,
+    main_status: dict,
+    required_contexts: list[str],
+    required_approvals: int,
+    dry_run: bool,
+) -> tuple[MergeDecision | None, dict]:
+    """Evaluate a single auto-discovered candidate against the full merge bar.
+
+    Returns (decision, ctx) where ctx carries {"pr_number"}. A None decision
+    means the PR is not merge-eligible at all (not open / opted-out / draft /
+    fork / wrong base) and the caller should skip to the next candidate; for
+    fork / wrong-base the explanatory comment is posted here before returning.
+
+    The merge bar is UNCHANGED from the single-PR path — this only factors the
+    per-PR evaluation out so process_once can scan multiple candidates. A failed
+    status fetch still raises (fail-closed): it propagates to the caller so the
+    PR is never treated as green.
+    """
    pr_number = int(issue["number"])
+    ctx = {"pr_number": pr_number}
    pr = get_pull(pr_number)
    if pr.get("state") != "open":
        print(f"::notice::PR #{pr_number} is not open; skipping")
-        return 0
+        return None, ctx
+    # Defensive opt-out/draft re-check on the authoritative pull payload: the
+    # /issues listing's label/draft view can lag, but the merge bar must respect
+    # the live pull state. (choose_candidate_issues already filtered on the
+    # listing; this guards against a stale listing racing a just-added opt-out.)
+    if OPT_OUT_LABELS & label_names(pr):
+        print(f"::notice::PR #{pr_number} carries an opt-out label; skipping")
+        return None, ctx
+    if pr.get("draft") is True:
+        print(f"::notice::PR #{pr_number} is a draft; skipping")
+        return None, ctx
    if pr.get("base", {}).get("ref") != WATCH_BRANCH:
        post_comment(pr_number, f"merge-queue: skipped; base branch is not `{WATCH_BRANCH}`.", dry_run=dry_run)
-        return 0
+        return None, ctx
    if pr.get("head", {}).get("repo_id") != pr.get("base", {}).get("repo_id"):
        post_comment(pr_number, "merge-queue: skipped; fork PRs are not supported by the serialized queue.", dry_run=dry_run)
-        return 0
+        return None, ctx

    head_sha = pr.get("head", {}).get("sha")
    if not isinstance(head_sha, str) or len(head_sha) < 7:
        raise ApiError(f"PR #{pr_number} missing head sha")
    commits = get_pull_commits(pr_number)
    current_base = pr_has_current_base(pr, commits, main_sha)
+    # Fail-closed: a failed status fetch raises here and propagates (the PR is
+    # never treated as green).
    pr_status = get_combined_status(head_sha)
    pr_labels = label_names(pr)
+    # FAIL-CLOSED: Gitea returns mergeable=None (or omits the field) while it is
+    # still COMPUTING conflict state. Only the literal True is decisive proof the
+    # PR is conflict-free; None and False both mean "not (yet) mergeable". We must
+    # NOT autonomously merge on an unknown — treat anything but True as not-yet-
+    # mergeable so evaluate_merge_readiness returns a "wait" decision.
+    mergeable = pr.get("mergeable") is True
+
+    reviews = get_pull_reviews(pr_number)
+    approvers, request_changes = genuine_approvals(
+        reviews, head_sha=head_sha, reviewer_set=REVIEWER_SET
+    )
+
    decision = evaluate_merge_readiness(
        main_status=main_status,
        pr_status=pr_status,
-        required_contexts=contexts,
+        required_contexts=required_contexts,
+        required_approvals=required_approvals,
+        approvers=approvers,
+        request_changes=request_changes,
        pr_has_current_base=current_base,
+        mergeable=mergeable,
        pr_labels=pr_labels,
    )
-
-    print(f"::notice::PR #{pr_number} decision={decision.action}: {decision.reason}")
-    if decision.action == "update":
-        update_pull(pr_number, dry_run=dry_run)
-        post_comment(
-            pr_number,
-            (
-                f"merge-queue: updated this branch with `{WATCH_BRANCH}` at "
-                f"`{main_sha[:12]}`. Waiting for CI on the refreshed head."
-            ),
-            dry_run=dry_run,
-        )
-        return 0
-    if decision.ready:
-        latest_main_sha = get_branch_head(WATCH_BRANCH)
-        if latest_main_sha != main_sha:
-            print(
-                f"::notice::main moved {main_sha[:8]} -> {latest_main_sha[:8]}; "
-                "deferring to next tick"
-            )
-            return 0
-        try:
-            merge_pull(pr_number, dry_run=dry_run)
-        except MergePermissionError as exc:
-            # Permanent merge failure (HTTP 403/404/405). Post a comment so
-            # maintainers know why, then return 0 so this tick is done.
-            # The PR stays in the queue; future ticks can retry after the
-            # permission issue is resolved.
-            sys.stderr.write(f"::error::merge permission error for PR #{pr_number}: {exc}\n")
-            post_comment(
-                pr_number,
-                (
-                    "merge-queue: merge failed with HTTP 405 'User not allowed to merge PR'. "
-                    "No available token has Can-merge permission on this repo. "
-                    "Fix: grant Can-merge to a token, or add a maintain/admin collaborator. "
-                    "Skipping to next queued PR on next tick."
-                ),
-                dry_run=dry_run,
-            )
-            return 0
-        return 0
-    return 0
+    return decision, ctx


 def main() -> int:
@@ -40,20 +40,24 @@ Context-format note (Gitea 1.22.6):

 Exit codes:
  0 — no required workflow has a paths/paths-ignore filter (clean) OR
-      branch_protections endpoint returned 403/404 (token-scope issue;
-      surfaced via ::error:: but non-fatal so a missing scope doesn't
-      red-X every PR — fix the token, not the lint).
+      branch_protections returned an authenticated 404 (branch
+      genuinely has no protection; ::warning:: surfaced).
  1 — at least one required workflow has a paths/paths-ignore filter
      (the gate-degrading defect class).
  2 — env contract violation (missing GITEA_TOKEN/HOST/REPO/BRANCH).
  3 — workflows directory missing or workflow YAML unparseable.
-  4 — protection response shape unexpected (non-dict body on 2xx).
+  4 — FAIL-CLOSED verification failure: branch_protections 401/403
+      auth failure (token can't read BP), 5xx transient (propagated
+      ApiError), or unexpected response shape. This is a HARD gate on
+      a protected context — it MUST NOT green when it cannot verify.

 Auth note: `GET /repos/.../branch_protections/{branch}` requires
 repo-admin role in Gitea 1.22.6. The workflow-default `GITHUB_TOKEN`
 is non-admin; we re-use `DRIFT_BOT_TOKEN` (same persona that powers
-ci-required-drift.yml). If `DRIFT_BOT_TOKEN` is unavailable in a future
-context, the script falls through gracefully (exit 0 + ::error::).
+ci-required-drift.yml). A 401/403 from a missing-scope token is an
+AUTH FAILURE that FAILS CLOSED (exit 4) — fix the token, not the
+lint. Only an authenticated 404 (genuinely-absent protection) is a
+tolerated graceful skip.
 """
 from __future__ import annotations

@@ -309,14 +313,36 @@ def run() -> int:
        msg = str(e)
        m = re.search(r"HTTP (\d{3})", msg)
        http_status = int(m.group(1)) if m else None
-        if http_status in (403, 404):
+        # FAIL-CLOSED contract (was fail-open: 403 AND 404 both exit 0 —
+        # fixed). This is a HARD gate (no continue-on-error → false) on a
+        # PROTECTED context: pull_request (same-repo; fork PRs can't carry
+        # DRIFT_BOT_TOKEN) + workflow_dispatch. We split auth-failure from
+        # genuinely-absent:
+        #   401/403 → AUTH FAILURE: the token cannot read branch
+        #     protections, so we CANNOT enumerate the required-check set
+        #     and CANNOT verify the no-paths-filter invariant. Fail loud /
+        #     fail closed (exit 4) — do NOT green an unverifiable gate.
+        #   404 → authenticated absent resource: branch genuinely has no
+        #     protection. Nothing to enumerate; tolerated degradation,
+        #     surfaced loudly (exit 0 with ::warning::).
+        if http_status in (401, 403):
            sys.stderr.write(
-                f"::error::GET {protection_path} returned HTTP {http_status} — "
-                f"DRIFT_BOT_TOKEN lacks repo-admin scope (Gitea 1.22.6 "
-                f"requires it for this endpoint) OR branch '{BRANCH}' has "
-                f"no protection configured. Cannot enumerate required "
-                f"checks; skipping lint with exit 0 to avoid red-X on "
-                f"every PR. Fix: grant repo-admin to mc-drift-bot.\n"
+                f"::error::GET {protection_path} returned HTTP "
+                f"{http_status} — DRIFT_BOT_TOKEN cannot read branch "
+                f"protections (needs repo-admin scope). AUTH FAILURE: "
+                f"cannot enumerate required checks, so this lint FAILS "
+                f"CLOSED rather than greening a gate it could not verify. "
+                f"Fix: grant repo-admin to mc-drift-bot (org team "
+                f"`drift-bot`, perm=admin) — fix the token, not the lint.\n"
+            )
+            return 4
+        if http_status == 404:
+            sys.stderr.write(
+                f"::warning::GET {protection_path} returned HTTP 404 — "
+                f"branch '{BRANCH}' has no protection configured "
+                f"(authenticated absent resource). No required contexts to "
+                f"check. If '{BRANCH}' SHOULD be protected, this is a real "
+                f"finding.\n"
            )
            return 0
        raise
@@ -36,7 +36,8 @@ Daily scheduled run + workflow_dispatch:

  1. GET `branch_protections/{BRANCH}` (needs DRIFT_BOT_TOKEN with
     repo-admin scope; same persona as ci-required-drift.yml).
-     Graceful-degrade on 403/404 per Tier 2a contract.
+     FAIL CLOSED on 401/403 (auth failure → exit 2); a genuine
+     authenticated 404 (no protection) is a loud ::warning:: skip.

  2. Walk `.gitea/workflows/*.yml` via PyYAML AST. For each workflow,
     enumerate its emitted contexts: `{workflow.name} / {job.name or
@@ -59,10 +60,14 @@ Daily scheduled run + workflow_dispatch:

 Exit codes
 ----------
-  0 — clean OR API 403/404 (graceful-degrade, surfaces ::error::).
+  0 — clean, OR an authenticated 404 (branch genuinely has no
+      protection — surfaces ::warning::, not a fail-open).
  1 — at least one BP context has no emitter.
-  2 — env contract violation, workflows-dir missing, or YAML parse
-      error.
+  2 — env contract violation, workflows-dir missing, YAML parse
+      error, OR a fail-closed verification failure: 401/403 auth
+      failure (token can't read BP) or transient/unexpected API
+      error. This is a HARD gate on a protected context (schedule/
+      dispatch on main) — it MUST NOT green when it cannot verify.

 Env
 ---
@@ -394,28 +399,49 @@ def run() -> int:
        return 2

    # 1. Pull BP.
+    #
+    # FAIL-CLOSED contract (was fail-open with exit 0 — fixed). This lint
+    # is a HARD gate (continue-on-error: false) and only ever runs on a
+    # PROTECTED context: schedule + workflow_dispatch on `main`. There is
+    # NO fork/advisory split here — the DRIFT_BOT_TOKEN secret is always
+    # present and trusted, so an auth failure or transient error is a real
+    # inability-to-verify, not a legitimate degradation. We MUST fail loud
+    # (`::error::` + nonzero) rather than green a gate we could not check.
    status, bp = api("GET", f"/repos/{repo}/branch_protections/{branch}")
    if status == "forbidden":
        sys.stderr.write(
-            f"::error::GET branch_protections/{branch} returned HTTP 403 — "
-            f"DRIFT_BOT_TOKEN lacks repo-admin scope (Gitea 1.22.6 requires "
-            f"it for this endpoint). Skipping lint with exit 0 to avoid "
-            f"red-X on every run. Fix: grant repo-admin to mc-drift-bot. "
-            f"Per Tier 2a contract.\n"
+            f"::error::GET branch_protections/{branch} returned HTTP "
+            f"401/403 — DRIFT_BOT_TOKEN cannot read branch protections "
+            f"(needs repo-admin scope; Gitea requires it for this "
+            f"endpoint). This is an AUTH FAILURE, not an absent resource: "
+            f"the lint CANNOT verify the BP↔emitter invariant, so it FAILS "
+            f"CLOSED instead of greening a gate it could not check. Fix: "
+            f"grant repo-admin to mc-drift-bot (org team `drift-bot`, "
+            f"perm=admin) — fix the token, not the lint.\n"
        )
-        return 0
+        return 2
    if status == "not_found":
+        # Genuine 404 WITH a valid token = branch has no protection
+        # configured. On `main` this is itself suspicious (main should
+        # always be protected) but it is a real, authenticated read of an
+        # absent resource — not an auth failure — so we surface it loudly
+        # but do not hard-fail on the genuinely-absent case.
        print(
-            f"::notice::branch '{branch}' has no protection configured; "
-            f"nothing to lint."
+            f"::warning::branch '{branch}' has no protection configured "
+            f"(authenticated 404); nothing to lint. If '{branch}' SHOULD be "
+            f"protected, this is a real finding — configure branch "
+            f"protection."
        )
        return 0
    if status != "ok" or not isinstance(bp, dict):
        sys.stderr.write(
-            f"::error::branch_protections/{branch} response unexpected; "
-            f"status={status}. Treating as transient; exit 0.\n"
+            f"::error::branch_protections/{branch} read failed with "
+            f"status={status} (transient/unexpected). The lint CANNOT "
+            f"verify the BP↔emitter invariant on this run; FAILING CLOSED "
+            f"rather than greening unverified. Re-run; if it persists, "
+            f"investigate Gitea API health / token validity.\n"
        )
-        return 0
+        return 2

    bp_contexts: list[str] = list(bp.get("status_check_contexts") or [])
    if not bp_contexts:
@@ -57,10 +57,14 @@ comment unrelated to the new job.
 Exit codes
 ----------
  0 — no new emissions, all new emissions have valid directives,
-      or BP read errored (graceful-degrade per Tier 2a contract).
+      OR an authenticated 404 (branch genuinely has no protection
+      to verify against — surfaces ::warning::, not a fail-open).
  1 — at least one new emission lacks a directive, or has
      `bp-required: yes` but the context is missing from BP.
-  2 — env contract violation or YAML parse error.
+  2 — env contract violation, YAML parse error, OR a fail-closed
+      verification failure: 401/403 auth failure (token can't read
+      BP) or transient/unexpected API error. HARD gate on a
+      same-repo PR context — MUST NOT green when it cannot verify.

 Env
 ---
@@ -420,33 +424,51 @@ def run() -> int:
        return 0

    # Step 3 — fetch BP context list.
+    #
+    # FAIL-CLOSED contract (was fail-open with exit 0 — fixed). This is a
+    # HARD gate (continue-on-error: false) that runs on `pull_request`
+    # against `main`. On molecule-core, `pull_request` runs are same-repo
+    # (fork PRs cannot carry the DRIFT_BOT_TOKEN secret), so this is a
+    # PROTECTED/trusted context with no legitimate fork-degradation. An
+    # auth failure or transient error means we CANNOT verify a NEW
+    # bp-required emission is actually in BP — so we MUST fail loud rather
+    # than green the gate. (A genuinely-absent 404 read with a valid token
+    # is the one tolerated degradation: there is no BP to check against.)
    status, bp = api("GET", f"/repos/{repo}/branch_protections/{branch}")
    bp_contexts: set[str] = set()
    if status == "forbidden":
        sys.stderr.write(
-            f"::error::GET branch_protections/{branch} returned HTTP 403 — "
-            f"DRIFT_BOT_TOKEN lacks repo-admin scope. Cannot verify "
-            f"bp-required directives; skipping lint with exit 0 per "
-            f"Tier 2a contract. Fix the token, not the lint.\n"
+            f"::error::GET branch_protections/{branch} returned HTTP "
+            f"401/403 — DRIFT_BOT_TOKEN cannot read branch protections "
+            f"(needs repo-admin scope). This is an AUTH FAILURE: the lint "
+            f"CANNOT verify the bp-required directives on this PR, so it "
+            f"FAILS CLOSED instead of greening unverified. Fix: grant "
+            f"repo-admin to mc-drift-bot (org team `drift-bot`) — fix the "
+            f"token, not the lint.\n"
        )
-        return 0
+        return 2
    elif status == "not_found":
-        # Branch has no protection — nothing to verify against; the
-        # bp-required: yes directive can't be satisfied. Treat as
-        # graceful-skip rather than red-X.
+        # Authenticated 404 — branch genuinely has no protection. There is
+        # nothing to verify a `bp-required: yes` directive against, so this
+        # is the one tolerated degradation. Surface loudly (on `main` a
+        # missing protection is itself a real finding) but do not hard-fail.
        print(
-            f"::notice::branch '{branch}' has no protection; cannot verify "
-            f"bp-required directives. Skipping (exit 0)."
+            f"::warning::branch '{branch}' has no protection (authenticated "
+            f"404); cannot verify bp-required directives. If '{branch}' "
+            f"SHOULD be protected this is a real finding."
        )
        return 0
    elif status == "ok" and isinstance(bp, dict):
        bp_contexts = set(bp.get("status_check_contexts") or [])
    else:
        sys.stderr.write(
-            f"::error::branch_protections/{branch} response unexpected; "
-            f"status={status}. Treating as transient; exit 0.\n"
+            f"::error::branch_protections/{branch} read failed with "
+            f"status={status} (transient/unexpected). CANNOT verify "
+            f"bp-required directives on this PR; FAILING CLOSED rather than "
+            f"greening unverified. Re-run; if persistent, check Gitea API "
+            f"health / token validity.\n"
        )
-        return 0
+        return 2

    # Step 4 — validate each new emission's directive.
    violations: list[str] = []
@@ -174,6 +174,16 @@ def parse_directives(
        if not parts:
            continue
        first = parts[0]
+        # Em-dash (U+2014) is a common visual separator in user-written
+        # notes, e.g.  /sop-ack Five-Axis — five-axis-review
+        # If raw_slug contains an em-dash, split on the first one so
+        # the part before becomes the slug and the rest becomes the note.
+        note_from_slug = ""
+        slug_source = raw_slug
+        emdash_idx = raw_slug.find("—")
+        if emdash_idx != -1:
+            slug_source = raw_slug[:emdash_idx].strip()
+            note_from_slug = raw_slug[emdash_idx + 1 :].strip()
        # If the slug-capture greedily matched multiple words (e.g.
        # "comprehensive testing"), preserve normalize behavior: join
        # the WHOLE first-word-token only; trailing words get appended to
@@ -186,13 +196,19 @@ def parse_directives(
            # as slug and "testing extra-note" as note. We defer the
            # disambiguation to the caller via the returned canonical
            # slug. For simplicity: try the WHOLE captured string first.
-            canonical = normalize_slug(raw_slug, numeric_aliases)
+            canonical = normalize_slug(slug_source, numeric_aliases)
        else:
-            canonical = normalize_slug(first, numeric_aliases)
+            canonical = normalize_slug(slug_source, numeric_aliases)
        note_from_group = (m.group(3) or "").strip()
-        # If we collapsed multi-word slug into kebab and there's a
-        # trailing-text group too, append it.
-        entry = (kind, canonical, note_from_group)
+        # The em-dash (U+2014) is a visual separator; the regex puts it
+        # in group(3) because it is outside the slug character class.
+        # Strip it so "/sop-ack slug — note" yields just "note".
+        if note_from_group.startswith("—"):
+            note_from_group = note_from_group[1:].strip()
+        # Combine note_from_slug (em-dash split) with note_from_group
+        # (trailing text after the slug captured by the regex group).
+        combined_note = (note_from_slug + " " + note_from_group).strip()
+        entry = (kind, canonical, combined_note)
        if kind == "sop-n/a":
            na_directives.append(entry)
        else:
@@ -290,48 +290,75 @@ debug "approvers: $(echo "$APPROVERS" | tr '\n' ' ')"
 # Pre/post spaces ensure case patterns *${_t}* match even when the name
 # is the first or last entry (bash case *word* needs delimiters on both sides).
 #
-# FALLBACK: if ALL team probes return 403 (token lacks read:org scope),
-# fall back to /orgs/{org}/members/{user}. This returns 204 for any org
-# member — a superset of team membership. Accepting it as a fallback means
-# the gate passes when the token is scoped to repo+user only (core-bot PAT).
-# This is safe because: (a) org membership is a prerequisite for every
-# eligible team; (b) the AND-composition of internal#189 still requires
-# multiple independent approvers; (c) any token with read:repository can
-# see the approving reviews, so bypass requires a colluding approver.
+# FAIL-CLOSED AUTHORIZATION (security: SOP tier gate is an AUTHORIZATION gate).
+#
+# This used to fall back to /orgs/{org}/members/{user} whenever every team
+# probe failed and credit any org member as a member of EVERY queried team.
+# That was a privilege-escalation: org membership is NOT team membership, so
+# a 403/visibility/token-scope gap on the team probes silently promoted a
+# plain org member to satisfy tier:high (ceo). An inability-to-verify became
+# an authorization GRANT. The fallback is REMOVED — org membership must never
+# satisfy a team-gated tier.
+#
+# A team-membership probe has exactly three meaningful outcomes:
+#   200 / 204  → the user IS a member of that team       (credit it)
+#   404        → the user is definitively NOT a member    (no credit, verified)
+#   anything else (403 / 401 / 5xx / curl failure / non-numeric)
+#              → membership CANNOT be read                 (cannot-verify)
+#
+# Per the dev-sop fail-closed rule (inability-to-verify = failure, never a
+# pass — and here, never an authorization grant), a cannot-verify outcome on
+# ANY probe is a HARD infra failure: we publish a loud cannot-verify error and
+# exit non-zero. We do NOT proceed to evaluate the tier expression on a partial
+# / unverifiable membership picture, because doing so could let an unverifiable
+# approver's clause silently fail-or-pass on incomplete data. Fix the token
+# scope (read:organization) or the runner network — not the gate.
 declare -A APPROVER_TEAMS
+_verify_failed=""   # accumulates "<user>:<team>(HTTP <code>)" for probes we could not read
 for U in $APPROVERS; do
  [ "$U" = "$PR_AUTHOR" ] && debug "skip self-review by $U" && continue
-  _any_team_success="no"
  for T in "${!TEAM_ID[@]}"; do
    ID="${TEAM_ID[$T]}"
+    set +e
    CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
      "${API}/teams/${ID}/members/${U}")
-    debug "probe: $U in team $T (id=$ID) → HTTP $CODE"
-    if [ "$CODE" = "200" ] || [ "$CODE" = "204" ]; then
-      APPROVER_TEAMS[$U]="${APPROVER_TEAMS[$U]:- } ${APPROVER_TEAMS[$U]:+ }$T "
-      debug "$U qualifies for team $T"
-      _any_team_success="yes"
+    _curl_exit=$?
+    set -e
+    debug "probe: $U in team $T (id=$ID) → HTTP $CODE (curl exit=$_curl_exit)"
+    if [ "$_curl_exit" -ne 0 ]; then
+      # curl itself failed (DNS, connection refused, timeout) — unreachable.
+      _verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(curl exit ${_curl_exit})"
+      continue
    fi
-  done
-  # Fallback: if every team probe returned 403, try org membership.
-  # "??" teams were never resolved to IDs so they never entered the loop.
-  # If the user is an org member, credit them as being in each queried team
-  # (engineers, managers, ceo are all org-level). This is safe because org
-  # membership is a prerequisite for all three, and bypass requires a colluding
-  # approver (same risk as before the AND-composition).
-  if [ "$_any_team_success" = "no" ]; then
-    ORG_CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
-      "${API}/orgs/${OWNER}/members/${U}")
-    debug "probe: $U in org $OWNER (fallback) → HTTP $ORG_CODE"
-    if [ "$ORG_CODE" = "204" ]; then
-      for T in "${!TEAM_ID[@]}"; do
+    case "$CODE" in
+      200|204)
        APPROVER_TEAMS[$U]="${APPROVER_TEAMS[$U]:- } ${APPROVER_TEAMS[$U]:+ }$T "
-      done
-      debug "$U credited as org member for all queried teams (fallback — token may lack read:org)"
-    fi
-  fi
+        debug "$U qualifies for team $T"
+        ;;
+      404)
+        # Definitively not a member of this team — a verified negative.
+        debug "$U is NOT a member of team $T (verified 404)"
+        ;;
+      *)
+        # 403/401/5xx/etc — membership is unreadable. Do NOT treat as "not a
+        # member" and do NOT fall back to org membership. This is cannot-verify.
+        _verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(HTTP ${CODE})"
+        ;;
+    esac
+  done
 done

+# Fail-closed: if ANY membership probe could not be read, we cannot make an
+# authorization decision. Publish a loud cannot-verify / infra-failed status
+# and exit non-zero. Never grant the tier on unverifiable membership.
+if [ -n "$_verify_failed" ]; then
+  echo "::error::sop-tier-check CANNOT VERIFY team membership — gate FAILS CLOSED."
+  echo "::error::Unreadable membership probe(s): ${_verify_failed}"
+  echo "::error::A team-membership probe returned 403/401/5xx (or curl failed). The SOP tier gate is an authorization gate; an inability to verify team membership is treated as a FAILURE, never a pass. Org membership is NOT team membership and is never credited as a fallback."
+  echo "::error::Fix: ensure GITEA_TOKEN (SOP_TIER_CHECK_TOKEN) has read:organization scope and the Gitea API is reachable from the runner, then re-run. Do NOT relax this gate."
+  exit 1
+fi
+
 # 7. Evaluate the tier expression.
 #
 # legacy OR-gate: use the simplified loop from before internal#189.
@@ -105,12 +105,26 @@ if [ "${SOP_REFIRE_DISABLE_RATE_LIMIT:-}" != "1" ]; then
 fi

 # 3. Invoke sop-tier-check.sh with the env it expects.
-# The canonical workflow intentionally fail-opens the job conclusion
-# (`bash .gitea/scripts/sop-tier-check.sh || true`) while Gitea branch
-# protection enforces reviewer approvals separately. Keep the refire path
-# aligned with that workflow status behavior; otherwise /refire-tier-check can
-# post a hard failure that the canonical pull_request_target workflow would
-# not publish.
+#
+# FAIL-CLOSED contract (was fail-open — fixed 2026-06-05,
+# fix/core-ci-fail-closed). The previous shape was:
+#     bash "$SCRIPT" || true
+#     TIER_EXIT=0          # <-- hardcoded success
+# which discarded the real verdict and ALWAYS POSTed
+# `state=success` for the REQUIRED context
+# `sop-tier-check / tier-check (pull_request)`. That meant ANY
+# collaborator could comment `/refire-tier-check` to forcibly green
+# the SOP-6 approval gate on the PR head SHA — a fail-open AND a
+# privilege bypass of branch protection. The canonical
+# pull_request_target workflow's conclusion publishes the same
+# context honestly (red on a real violation); the refire MUST mirror
+# THAT honesty, not a discarded exit code.
+#
+# We now capture the script's real exit code under `set +e` and POST
+# success ONLY when it actually exited 0. sop-tier-check.sh itself
+# fails closed on infra faults (no SOP_FAIL_OPEN in this refire env),
+# so a bad token / unreachable API / missing jq → non-zero → we POST
+# `state=failure`, never a false green.
 #
 # SOP_REFIRE_TIER_CHECK_SCRIPT env var lets tests substitute a mock —
 # sop-tier-check.sh uses bash 4+ associative arrays which trigger a known
@@ -125,7 +139,10 @@ if [ ! -f "$SCRIPT" ]; then
 fi

 # Re-invoke. Pipe stdout/stderr through so the runner log shows the
-# tier-check decision inline.
+# tier-check decision inline. Capture the REAL exit code (set +e so a
+# non-zero verdict doesn't abort this script under set -e) — the POST
+# below keys off it, so a failed tier-check posts state=failure.
+set +e
 GITEA_TOKEN="$GITEA_TOKEN" \
  GITEA_HOST="$GITEA_HOST" \
  REPO="$REPO" \
@@ -133,8 +150,9 @@ GITEA_TOKEN="$GITEA_TOKEN" \
  PR_AUTHOR="$PR_AUTHOR" \
  SOP_DEBUG="${SOP_DEBUG:-0}" \
  SOP_LEGACY_CHECK="${SOP_LEGACY_CHECK:-0}" \
-  bash "$SCRIPT" || true
-TIER_EXIT=0
+  bash "$SCRIPT"
+TIER_EXIT=$?
+set -e
 debug "sop-tier-check.sh exit=$TIER_EXIT"

 # 4. POST the resulting status.
@@ -170,4 +188,12 @@ if [ "$POST_HTTP" != "200" ] && [ "$POST_HTTP" != "201" ]; then
 fi

 echo "::notice::sop-tier-refire posted state=$STATE for context=\"$CONTEXT\" on sha=$HEAD_SHA"
-exit "$TIER_EXIT"
+# Exit 0: the refire JOB succeeded — it re-evaluated the gate and posted
+# an HONEST status. The gate VERDICT is carried by the POSTed status
+# ($STATE), which is what branch protection reads; a failing tier-check
+# posts state=failure (red on the PR), so there is no fail-open. We do
+# NOT also exit non-zero on a failing verdict — that would double-signal
+# the same failure as both a red status AND a red refire job. The
+# fail-open that mattered (TIER_EXIT hardcoded to 0 → always state=success)
+# is fixed above by capturing the real exit code.
+exit 0
@@ -208,6 +208,22 @@ class TestParseDirectives(unittest.TestCase):
        d = self.parse_ack_revoke("/sop-ack Comprehensive_Testing")
        self.assertEqual(d[0][1], "comprehensive-testing")

+    def test_emdash_separator_parsed_correctly(self):
+        # Em-dash (U+2014) between slug and note is common in practice.
+        # /sop-ack Five-Axis — five-axis-review
+        # → slug = five-axis, note = — five-axis-review
+        d = self.parse_ack_revoke("/sop-ack Five-Axis — five-axis-review")
+        self.assertEqual(len(d), 1)
+        self.assertEqual(d[0][1], "five-axis")
+        self.assertIn("five-axis-review", d[0][2])
+
+    def test_emdash_no_note(self):
+        # Em-dash at end of slug: only slug, no note content
+        d = self.parse_ack_revoke("/sop-ack Five-Axis —")
+        self.assertEqual(len(d), 1)
+        self.assertEqual(d[0][1], "five-axis")
+        self.assertEqual(d[0][2], "")  # em-dash is separator-only → empty note
+

 # ---------------------------------------------------------------------------
 # section_marker_present
@@ -0,0 +1,272 @@
+#!/usr/bin/env bash
+# Security regression test for the SOP tier-gate AUTHORIZATION bypass.
+#
+# Bug (fixed in fix/sop-tier-authz-no-org-fallback):
+#   sop-tier-check.sh probed team membership at /teams/{id}/members/{user}.
+#   If EVERY team probe failed (e.g. 403 — token lacks read:organization, or
+#   any visibility/flakiness gap), it FELL BACK to /orgs/{org}/members/{user}
+#   and credited that org member as a member of EVERY queried team. The
+#   evaluator then treated those synthetic memberships as real, so a plain
+#   NON-CEO org member satisfied tier:high (ceo). A visibility/auth gap became
+#   a real highest-tier authorization PASS — privilege escalation.
+#
+# Fix (fail-closed authorization):
+#   - The org-member ⇒ "member of all teams" fallback is REMOVED. Org
+#     membership is never credited as team membership.
+#   - A team probe that returns anything other than 200/204 (member) or 404
+#     (verified non-member) is a CANNOT-VERIFY condition: the gate fails loud
+#     (exit 1) with a cannot-verify status and never grants the tier.
+#
+# Method: this is a true end-to-end test. It prepends a fake `curl` to PATH
+# that serves canned Gitea API responses keyed by URL, then runs the REAL
+# sop-tier-check.sh. The fake exercises the genuine probe→credit→evaluate
+# path — no logic is re-implemented in the test.
+
+set -euo pipefail
+
+THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
+SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
+SCRIPT="$SCRIPT_DIR/sop-tier-check.sh"
+
+command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
+[ -f "$SCRIPT" ] || { echo "::error::sop-tier-check.sh not found at $SCRIPT — test must fail loudly if the script is absent"; exit 1; }
+
+# sop-tier-check.sh uses `declare -A` (associative arrays), which require
+# bash >= 4. CI runners (Ubuntu) ship bash 5; macOS ships 3.2. Resolve a
+# bash >= 4 to run the script under.
+pick_bash() {
+  local c
+  for c in bash /opt/homebrew/bin/bash /usr/local/bin/bash /bin/bash; do
+    local p; p="$(command -v "$c" 2>/dev/null || true)"
+    [ -n "$p" ] || continue
+    local maj; maj="$("$p" -c 'echo "${BASH_VERSINFO[0]}"' 2>/dev/null || echo 0)"
+    if [ "${maj:-0}" -ge 4 ]; then echo "$p"; return 0; fi
+  done
+  return 1
+}
+BASH4="$(pick_bash)" || { echo "::error::need bash >= 4 to run sop-tier-check.sh (associative arrays); none found"; exit 1; }
+echo "using bash: $BASH4 ($("$BASH4" -c 'echo $BASH_VERSION'))"
+
+PASS=0
+FAIL=0
+
+assert_eq() {
+  local label="$1" expected="$2" got="$3"
+  if [ "$expected" = "$got" ]; then
+    echo "  PASS  $label"
+    PASS=$((PASS + 1))
+  else
+    echo "  FAIL  $label"
+    echo "        expected: <$expected>"
+    echo "        got:      <$got>"
+    FAIL=$((FAIL + 1))
+  fi
+}
+
+assert_contains() {
+  local label="$1" haystack="$2" needle="$3"
+  if printf '%s' "$haystack" | grep -qF -- "$needle"; then
+    echo "  PASS  $label"
+    PASS=$((PASS + 1))
+  else
+    echo "  FAIL  $label (missing substring: <$needle>)"
+    FAIL=$((FAIL + 1))
+  fi
+}
+
+assert_not_contains() {
+  local label="$1" haystack="$2" needle="$3"
+  if printf '%s' "$haystack" | grep -qF -- "$needle"; then
+    echo "  FAIL  $label (unexpected substring present: <$needle>)"
+    FAIL=$((FAIL + 1))
+  else
+    echo "  PASS  $label"
+    PASS=$((PASS + 1))
+  fi
+}
+
+# ---------------------------------------------------------------------------
+# Fake-curl harness.
+#
+# The real script calls curl in two shapes:
+#   (a) body capture:   curl -sS -H AUTH URL                 -> prints JSON body
+#   (b) http-code:      curl -sS -o FILE -w '%{http_code}' -H AUTH URL
+#   (c) http-code only: curl -sS -o /dev/null -w '%{http_code}' -H AUTH URL
+#
+# Our fake reads the URL (last non-flag arg), looks up a response in fixture
+# files under $FIXDIR, and emits body and/or http-code accordingly.
+# ---------------------------------------------------------------------------
+
+make_harness() {
+  # $1 = scenario dir to populate with fixtures
+  local FIXDIR="$1"
+  local BIN="$FIXDIR/bin"
+  mkdir -p "$BIN"
+  cat > "$BIN/curl" <<'FAKE'
+#!/usr/bin/env bash
+# Fake curl for sop-tier-check authz tests. Looks up canned responses by URL.
+set -u
+FIXDIR="${SOP_TEST_FIXDIR:?SOP_TEST_FIXDIR unset}"
+
+url=""
+out=""
+want_code="no"
+prev=""
+for a in "$@"; do
+  case "$prev" in
+    -o) out="$a" ;;
+  esac
+  case "$a" in
+    http*://*) url="$a" ;;
+    '%{http_code}') want_code="yes" ;;
+  esac
+  # -w '%{http_code}' arrives as the value of the -w flag
+  if [ "$prev" = "-w" ] && [ "$a" = '%{http_code}' ]; then want_code="yes"; fi
+  prev="$a"
+done
+
+# Map URL -> fixture key (a filename-safe slug).
+# We only need the path after /api/v1.
+path="${url#*/api/v1}"
+slug="$(printf '%s' "$path" | tr '/?=&' '____')"
+
+body_file="$FIXDIR/body${slug}"
+code_file="$FIXDIR/code${slug}"
+
+# Emit body to -o target (or capture for stdout) when a body fixture exists.
+body=""
+if [ -f "$body_file" ]; then body="$(cat "$body_file")"; fi
+if [ -n "$out" ]; then
+  printf '%s' "$body" > "$out"
+else
+  printf '%s' "$body"
+fi
+
+# Emit http code when requested.
+if [ "$want_code" = "yes" ]; then
+  if [ -f "$code_file" ]; then
+    printf '%s' "$(cat "$code_file")"
+  else
+    printf '200'
+  fi
+fi
+exit 0
+FAKE
+  chmod +x "$BIN/curl"
+  echo "$BIN"
+}
+
+# Common fixtures shared by scenarios. $1 = FIXDIR, $2 = approver login,
+# $3 = tier label name (e.g. tier:high), $4 = teams JSON.
+seed_common() {
+  local FIXDIR="$1" approver="$2" tier="$3" teams_json="$4"
+  mkdir -p "$FIXDIR"
+  # /user -> whoami
+  printf '%s' '{"login":"sop-bot"}' > "$FIXDIR/body_user"
+  # PR head sha
+  printf '%s' '{"head":{"sha":"headsha1"}}' \
+    > "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42"
+  # labels
+  printf '%s' "[{\"name\":\"$tier\"}]" \
+    > "$FIXDIR/body_repos_molecule-ai_molecule-core_issues_42_labels"
+  # org teams list
+  printf '%s' "$teams_json" > "$FIXDIR/body_orgs_molecule-ai_teams"
+  printf '%s' '200' > "$FIXDIR/code_orgs_molecule-ai_teams"
+  # reviews: one APPROVED on current head by $approver
+  printf '%s' "[{\"state\":\"APPROVED\",\"commit_id\":\"headsha1\",\"user\":{\"login\":\"$approver\"}}]" \
+    > "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42_reviews"
+}
+
+run_script() {
+  # $1 = FIXDIR (must contain bin/curl). Returns combined stdout+stderr; sets RC.
+  local FIXDIR="$1"
+  local BIN="$FIXDIR/bin"
+  set +e
+  OUT=$(
+    SOP_TEST_FIXDIR="$FIXDIR" \
+    PATH="$BIN:$PATH" \
+    GITEA_TOKEN="faketoken" \
+    GITEA_HOST="git.moleculesai.app" \
+    REPO="molecule-ai/molecule-core" \
+    PR_NUMBER="42" \
+    PR_AUTHOR="pr-author" \
+    SOP_DEBUG="0" \
+    SOP_LEGACY_CHECK="0" \
+    "$BASH4" "$SCRIPT" 2>&1
+  )
+  RC=$?
+  set -e
+  printf '%s' "$OUT"
+  return $RC
+}
+
+TEAMS_JSON='[{"name":"ceo","id":10},{"name":"engineers","id":11},{"name":"managers","id":12}]'
+
+echo "=============================================================="
+echo "Scenario 1: tier:high, team probe 403 (cannot read), approver"
+echo "            is a plain org member but NOT in ceo team."
+echo "            EXPECT: tier NOT granted (fail-closed cannot-verify)."
+echo "=============================================================="
+S1="$(mktemp -d)"
+make_harness "$S1" >/dev/null
+seed_common "$S1" "org-only-bob" "tier:high" "$TEAMS_JSON"
+# Team membership probe for ceo (id=10) returns 403 — cannot read.
+printf '%s' '403' > "$S1/code_teams_10_members_org-only-bob"
+# The OLD bug path: org membership probe would 204 and synthetic-credit.
+printf '%s' '204' > "$S1/code_orgs_molecule-ai_members_org-only-bob"
+set +e
+OUT1="$(run_script "$S1")"; RC1=$?
+set -e
+echo "$OUT1" | sed 's/^/    /'
+echo "    (exit=$RC1)"
+assert_eq "S1 exit non-zero (tier NOT granted)" "1" "$([ "$RC1" -ne 0 ] && echo 1 || echo 0)"
+assert_not_contains "S1 did NOT print PASSED" "$OUT1" "sop-tier-check PASSED"
+assert_contains "S1 cannot-verify error surfaced" "$OUT1" "CANNOT VERIFY"
+assert_contains "S1 names the unreadable probe (403)" "$OUT1" "HTTP 403"
+rm -rf "$S1"
+
+echo
+echo "=============================================================="
+echo "Scenario 2: tier:high, genuine ceo team member (probe 204)."
+echo "            EXPECT: tier GRANTED."
+echo "=============================================================="
+S2="$(mktemp -d)"
+make_harness "$S2" >/dev/null
+seed_common "$S2" "real-ceo" "tier:high" "$TEAMS_JSON"
+printf '%s' '204' > "$S2/code_teams_10_members_real-ceo"   # ceo team: member
+set +e
+OUT2="$(run_script "$S2")"; RC2=$?
+set -e
+echo "$OUT2" | sed 's/^/    /'
+echo "    (exit=$RC2)"
+assert_eq "S2 exit zero (granted)" "0" "$RC2"
+assert_contains "S2 printed PASSED" "$OUT2" "sop-tier-check PASSED"
+rm -rf "$S2"
+
+echo
+echo "=============================================================="
+echo "Scenario 3: tier:high, approver is an org member but a VERIFIED"
+echo "            non-member of ceo (team probe 404). Org probe would"
+echo "            204 — must NEVER be synthetic-credited."
+echo "            EXPECT: tier NOT granted (clause FAIL), no fallback."
+echo "=============================================================="
+S3="$(mktemp -d)"
+make_harness "$S3" >/dev/null
+seed_common "$S3" "org-member-carol" "tier:high" "$TEAMS_JSON"
+printf '%s' '404' > "$S3/code_teams_10_members_org-member-carol"  # verified NOT in ceo
+printf '%s' '204' > "$S3/code_orgs_molecule-ai_members_org-member-carol" # org member (must be ignored)
+set +e
+OUT3="$(run_script "$S3")"; RC3=$?
+set -e
+echo "$OUT3" | sed 's/^/    /'
+echo "    (exit=$RC3)"
+assert_eq "S3 exit non-zero (tier NOT granted)" "1" "$([ "$RC3" -ne 0 ] && echo 1 || echo 0)"
+assert_not_contains "S3 did NOT print PASSED" "$OUT3" "sop-tier-check PASSED"
+assert_contains "S3 reported a real clause FAIL (not cannot-verify)" "$OUT3" "FAILED for tier:high"
+assert_not_contains "S3 did NOT cannot-verify (404 is a verified negative)" "$OUT3" "CANNOT VERIFY"
+rm -rf "$S3"
+
+echo
+echo "------"
+echo "PASS=$PASS FAIL=$FAIL"
+[ "$FAIL" -eq 0 ]
@@ -246,21 +246,24 @@ assert_contains "T1 POST context is sop-tier-check / tier-check" \
  '"context": "sop-tier-check / tier-check (pull_request)"' "$POSTED"
 assert_contains "T1 description names commenter" "test-runner" "$POSTED"

-# T2: missing tier label → tier-check fails internally, but refire status
-# matches the canonical workflow's fail-open job conclusion.
+# T2: missing tier label → tier-check fails internally (mock exits 1).
+# FAIL-CLOSED contract (fix/core-ci-fail-closed): refire now captures the
+# REAL exit code and POSTs state=failure — it does NOT forge a green on
+# the required context. The refire job itself still exits 0 (it succeeded
+# at posting an honest failure status).
 run_scenario "T2_no_tier_label" "fail_no_label"
 RC=$(cat "$FIX_STATE_DIR/last_rc")
 POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
-assert_eq "T2 exit code 0 (canonical fail-open)" "0" "$RC"
-assert_contains "T2 POSTed state=success" '"state": "success"' "$POSTED"
+assert_eq "T2 exit code 0 (posted an honest status)" "0" "$RC"
+assert_contains "T2 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"

-# T3: tier:low present but ZERO approving reviews → internal tier check fails,
-# refire status remains aligned with the canonical workflow.
+# T3: tier:low present but ZERO approving reviews → internal tier check
+# fails (mock exits 1). Refire POSTs state=failure, never a false green.
 run_scenario "T3_no_approvals" "fail_no_approvals"
 RC=$(cat "$FIX_STATE_DIR/last_rc")
 POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
-assert_eq "T3 exit code 0 (canonical fail-open)" "0" "$RC"
-assert_contains "T3 POSTed state=success" '"state": "success"' "$POSTED"
+assert_eq "T3 exit code 0 (posted an honest status)" "0" "$RC"
+assert_contains "T3 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"

 # T4: closed PR — refire is a no-op (no POST, exit 0)
 run_scenario "T4_closed" "pass"
@@ -205,5 +205,5 @@ n/a_gates:
    required_teams: [security, managers, ceo]
    description: >-
      Security review N/A when this change has no security surface
-      (docs-only, pure-frontend, dependency-only). A security/owners
+      (docs-only, pure-frontend, dependency-only). A security/managers/ceo
      member must post /sop-n/a security-review to activate.
@@ -34,11 +34,6 @@ jobs:
  check:
    name: Block forbidden paths
    runs-on: ubuntu-latest
-    # Phase 3 (RFC #219 §1): surface broken workflows without blocking
-    # the PR. Follow-up PR flips this off after surfaced defects are
-    # triaged.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
@@ -364,6 +364,25 @@ jobs:
          # check missed. If a refactor weakens the gate to a shape check,
          # this step goes red on every PR.
          bash tests/e2e/test_completion_assert_unit.sh
+          # harden/e2e-staging-saas-failclosed: fail-direction proof for the
+          # E2E_REQUIRE_LIVE fail-closed-on-skip guard in
+          # test_staging_full_saas.sh. Offline (no LLM/network/provisioning):
+          # asserts the guard exits 5 when a live lifecycle did NOT run and
+          # passes when all milestones fired — so a refactor that lets the
+          # staging gate report green without a real provision→online→A2A
+          # cycle goes red on every PR.
+          bash tests/e2e/test_require_live_guard_unit.sh
+          # harden/enforce-ci-gates-core-v2 (PR #2286): fail-direction proof
+          # for the E2E_REQUIRE_LIVE zero-validated gate in
+          # test_priority_runtimes_e2e.sh (the REQUIRED `E2E API Smoke Test`).
+          # Offline (no LLM/network/provisioning): sources that script under
+          # its unit source-guard and drives the REAL evaluate_require_live_gate
+          # — asserts REQUIRE_LIVE=1 + zero validated → RED (the false-green
+          # trap), REQUIRE_LIVE=1 + >=1 validated → GREEN, and REQUIRE_LIVE
+          # unset + zero validated → GREEN (loud skip). CI can't provision a
+          # live arm to prove this, so this unit test IS the regression gate:
+          # a revert of the zero-validated→RED logic goes red on every PR.
+          bash tests/e2e/test_require_live_priority_gate_unit.sh

      - if: ${{ needs.changes.outputs.scripts == 'true' }}
        name: Test ECR promote-tenant-image script (mock-driven, no live infra)
@@ -272,6 +272,33 @@ jobs:
          echo "::error::Redis did not become ready in 15s"
          docker logs "$REDIS_CONTAINER" || true
          exit 1
+      - name: Set deterministic admin token for the e2e platform
+        if: needs.detect-changes.outputs.api == 'true'
+        run: |
+          # AdminAuth (workspace-server/internal/middleware/wsauth_middleware.go:164)
+          # reads ADMIN_TOKEN. Setting it (a) closes isDevModeFailOpen (devmode.go:50
+          # returns false when ADMIN_TOKEN is non-empty), so admin routes require a
+          # bearer, and (b) makes Tier-2b accept a bearer that constant-time-equals
+          # ADMIN_TOKEN. The platform process inherits ADMIN_TOKEN from $GITHUB_ENV.
+          #
+          # MOLECULE_ADMIN_TOKEN is the var the e2e scripts send as the bearer
+          # (tests/e2e/_lib.sh:33 e2e_mint_workspace_token, and the run_mock
+          # org-import curl). Set BOTH to the SAME value so the bearer the test
+          # sends == the secret the platform checks. Deterministic test value;
+          # this platform is ephemeral, single-run, and never reachable off-host.
+          E2E_ADMIN_TOKEN="e2e-api-admin-${{ github.run_id }}-${{ github.run_attempt }}"
+          echo "ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
+          echo "MOLECULE_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
+          echo "Admin token configured for the e2e platform (ADMIN_TOKEN + MOLECULE_ADMIN_TOKEN)."
+          # Channels e2e test seam (core#2332 P1.10). These env-gated overrides
+          # let the LIVE Slack-webhook send path + Telegram discover path target
+          # the local mock upstreams that tests/e2e/test_channels_e2e.sh binds,
+          # so the outbound serialize+POST is provable in CI (was unit-mock-only).
+          # Inert in prod/staging — those deploys never set these. The fixed
+          # loopback ports MUST match the script's E2E_CHANNELS_*_PORT defaults.
+          echo "MOLECULE_CHANNELS_TEST_WEBHOOK_BASE=http://127.0.0.1:18099/" >> "$GITHUB_ENV"
+          echo "MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE=http://127.0.0.1:18098" >> "$GITHUB_ENV"
+          echo "Channels test seam configured (webhook+telegram mock bases on fixed loopback ports)."
      - name: Build platform
        if: needs.detect-changes.outputs.api == 'true'
        working-directory: workspace-server
@@ -394,11 +421,65 @@ jobs:
      - name: Run E2E API tests
        if: needs.detect-changes.outputs.api == 'true'
        run: bash tests/e2e/test_api.sh
+      - name: Run keyless feature-contract E2E (terminal-diagnose / webhooks / budget / checkpoints / audit / traces / session-search / rescue / llm-billing-mode / resume / hibernate)
+        # Keyless required-lane coverage for feature endpoints that ship without
+        # an LLM key (runtime=external fixture). Each asserts the real HTTP
+        # contract + a meaningful failure mode (401/400/fail-closed) so a
+        # regression goes RED, not silently green. The mock-runtime A2A canned
+        # round-trip is covered by the priority-runtimes `mock` arm, not here.
+        if: needs.detect-changes.outputs.api == 'true'
+        run: bash tests/e2e/test_keyless_feature_contracts_e2e.sh
+      - name: Run secrets-dispatch contract test (keyless SECRETS_JSON branch order)
+        # Previously orphaned (no workflow referenced it). Hermetic unit-style
+        # contract over test_staging_full_saas.sh's LLM-key branch precedence —
+        # needs no platform, no bearer, no network. Guards the 2026-05-03
+        # "wrong key shape wins" incident class.
+        if: needs.detect-changes.outputs.api == 'true'
+        run: bash tests/e2e/test_secrets_dispatch.sh
      - name: Run notify-with-attachments E2E
        if: needs.detect-changes.outputs.api == 'true'
        run: bash tests/e2e/test_notify_attachments_e2e.sh
-      - name: Run priority-runtimes E2E (claude-code + hermes — skips when keys absent)
+      - name: "Run channels + data-prune E2E (REQUIRE-LIVE: mock upstream proves send+discover, purge proves prune)"
+        # core#2332 P1.10. Stands up a local mock upstream, points the LIVE
+        # Slack-webhook send + Telegram discover paths at it via the
+        # production-inert test seam configured above, and asserts the mock
+        # RECEIVED the serialized payload (send) + round-tripped the bot/chat
+        # (discover). Then exercises the RFC #734 data-prune: DELETE
+        # ?purge=true removes the target's durable child data while a sibling
+        # survives. E2E_REQUIRE_LIVE=1 ⇒ a missing/regressed seam is RED, not a
+        # silent skip. The platform inherits the MOLECULE_CHANNELS_TEST_* bases
+        # from $GITHUB_ENV; the script's mock ports match them (18099/18098).
        if: needs.detect-changes.outputs.api == 'true'
+        env:
+          E2E_REQUIRE_LIVE: '1'
+        run: bash tests/e2e/test_channels_e2e.sh
+      - name: "Run priority-runtimes E2E (REQUIRE-LIVE: mock validates the runtime plumbing end-to-end)"
+        # E2E_REQUIRE_LIVE=1 is ON: the run MUST validate >=1 runtime end-to-end
+        # or it exits NON-zero (RED). This is now SAFE because the `mock` arm can
+        # actually provision in CI: the only blocker was that POST /org/import and
+        # POST /admin/workspaces/:id/tokens are AdminAuth-gated
+        # (router.go:778 + :427) and this job previously configured NO admin token,
+        # so every admin call 401'd ("admin auth required"). The "Set deterministic
+        # admin token" step above now sets ADMIN_TOKEN on the platform AND exports
+        # the matching MOLECULE_ADMIN_TOKEN the e2e scripts send as the bearer, so
+        # the mock arm can org-import → online → mint token → canned A2A reply →
+        # validated(). That guarantees VALIDATED>=1 on a healthy platform, so the
+        # REQUIRED `E2E API Smoke Test` gate now HONESTLY validates a runtime
+        # end-to-end; if the mock plumbing (DB insert, status flip, A2A proxy,
+        # activity logging, or the admin-auth wiring) genuinely breaks, the gate
+        # goes RED instead of false-green. The zero-validated→RED decision is also
+        # regression-gated WITHOUT provisioning by the bash unit test
+        # tests/e2e/test_require_live_priority_gate_unit.sh (wired into ci.yml's
+        # "Run E2E bash unit tests" job), so a revert of that logic still fails CI.
+        #
+        # MiniMax stays an OPPORTUNISTIC best-effort arm: create is registry-fragile
+        # in CI (422 UNREGISTERED_MODEL_FOR_RUNTIME), so a miss is reported via
+        # bestfail() and never reds the gate — mock carries the required validation,
+        # MiniMax is a bonus real-LLM check when it comes up. ZERO new credentials.
+        if: needs.detect-changes.outputs.api == 'true'
+        env:
+          E2E_REQUIRE_LIVE: '1'
+          E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
        run: bash tests/e2e/test_priority_runtimes_e2e.sh
      - name: Install standalone runtime parser from Gitea registry
        if: needs.detect-changes.outputs.api == 'true'
@@ -113,6 +113,29 @@ jobs:
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    #
+    # PROMOTION-READINESS (toward required gate — do NOT flip continue-on-error
+    # without CTO sign-off, that's the irreversible call):
+    #   NOW FAIL-CLOSED:
+    #     - Postgres/Redis/platform/canvas readiness are already bounded
+    #       readiness-polls that hard-fail (and dump logs) at their deadline,
+    #       not fixed sleeps — preserved.
+    #     - passWithNoTests:false + forbidOnly (playwright.config.ts) → a
+    #       renamed/moved spec or stray test.only can no longer green the lane.
+    #     - REQUIRE-LIVE guard in "Run Playwright E2E tests" → chat==true must
+    #       actually execute >=1 test, else exit 1.
+    #     - chat-desktop "activity log" test no longer swallows its assertion.
+    #   STILL BLOCKS PROMOTION:
+    #     - The echo round-trip asserts on rendered "Echo: ..." text but never
+    #       asserts the echo runtime actually RECEIVED the A2A request
+    #       (fixtures/echo-runtime.ts exposes lastRequest, unused) — an
+    #       optimistic client-side render could pass without a real round-trip.
+    #       Add a server-received assertion before required.
+    #     - The "No-op pass" path (detect-changes chat!=true) is a legitimate
+    #       paths-filter skip, but a required gate needs it to be a neutral
+    #       check, not a green "success", so a skipped heavy lane can't be
+    #       mistaken for a passed one.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 15
    env:
@@ -334,11 +357,32 @@ jobs:
      - name: Run Playwright E2E tests
        if: needs.detect-changes.outputs.chat == 'true'
        working-directory: canvas
+        env:
+          # CI=1 activates forbidOnly in playwright.config.ts (a stray
+          # `test.only` would otherwise green the suite while skipping the
+          # rest). passWithNoTests:false (also in the config) already makes
+          # a zero-match selection exit non-zero.
+          CI: "1"
        run: |
+          set -euo pipefail
          export E2E_PLATFORM_URL="http://127.0.0.1:${PLATFORM_PORT}"
          export E2E_DATABASE_URL="${DATABASE_URL}"
          export PLAYWRIGHT_BASE_URL="http://localhost:${CANVAS_PORT}"
-          npx playwright test e2e/chat-desktop.spec.ts e2e/chat-mobile.spec.ts
+
+          # REQUIRE-LIVE guard (mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE):
+          # this lane reached here only because detect-changes said chat==true,
+          # so it MUST actually execute the round-trip specs. `pipefail` makes
+          # a real test failure (playwright non-zero) abort here under `set -e`;
+          # passWithNoTests:false makes a zero-match selection non-zero too. The
+          # explicit grep below is belt-and-braces: assert the list reporter
+          # printed an executed-count summary, so a silent all-skip / no-op can
+          # never report green.
+          npx playwright test e2e/chat-desktop.spec.ts e2e/chat-mobile.spec.ts \
+            --reporter=list 2>&1 | tee /tmp/pw-chat.out
+          if ! grep -qE '[0-9]+ (passed|failed|skipped)' /tmp/pw-chat.out; then
+            echo "::error::E2E Chat REQUIRE-LIVE: chat==true but Playwright reported no executed tests — specs missing or all-skipped, refusing to report green."
+            exit 1
+          fi

      - name: Dump platform log on failure
        if: failure() && needs.detect-changes.outputs.chat == 'true'
@@ -12,9 +12,30 @@ name: E2E Staging Canvas (Playwright)
 #

 # Playwright test suite that provisions a fresh staging org per run and
-# verifies every workspace-panel tab renders without crashing. Complements
-# e2e-staging-saas.yml (which tests the API shape) by exercising the
-# actual browser + canvas bundle against live staging.
+# verifies every workspace-panel tab renders REAL content (not just an
+# empty/errored container). Complements e2e-staging-saas.yml (which tests
+# the API shape) by exercising the actual browser + canvas bundle against
+# live staging.
+#
+# PROMOTION-READINESS (toward making this a HARD merge-gate):
+#   NOW RELIABLE (spec hardened — staging-tabs.spec.ts):
+#     - All waits condition-based (toBeVisible/toHaveAttribute/expect.poll);
+#       no fixed waitForTimeout in the spec.
+#     - Tabs asserted on settled REAL content, not "container visible".
+#     - ErrorBoundary + visible error alerts fail non-degraded tabs.
+#     - Tab-list parity-checked vs live DOM; fail-closed on missing tenant.
+#   STILL BLOCKS PROMOTION-TO-REQUIRED (do NOT remove continue-on-error —
+#   CTO-owned, RFC internal#219 §1):
+#     - Infra dependency: real staging EC2 per run (12-20 min cold boot);
+#       AWS/Cloudflare/CP availability would become merge-blockers.
+#     - Shared-zone TLS/DNS/ACME propagation flake surface is upstream of
+#       this repo and outside its control.
+#     - Required-gate correctness needs CP_STAGING_ADMIN_API_TOKEN GUARANTEED
+#       present; today's skip-if-absent (core#2225) is right for non-gating
+#       but would skip-green a required check.
+#     - Single hermes/platform_managed workspace; agent-dependent content
+#       (live chat/traces round-trip) not exercised on staging (#2162).
+#   The full checklist lives at the foot of canvas/e2e/staging-tabs.spec.ts.
 #
 # Triggers: push to main, PR touching canvas sources + this workflow only
 # after the PR enters `merge-queue`, manual dispatch, and scheduled cron to
@@ -85,6 +85,26 @@ jobs:
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    #
+    # PROMOTION-READINESS (toward required gate — do NOT flip continue-on-error
+    # without CTO sign-off, that's the irreversible call):
+    #   NOW FAIL-CLOSED:
+    #     - Missing CP_STAGING_ADMIN_API_TOKEN → hard exit 2 (preflight).
+    #     - Staging CP unhealthy → hard exit 1 (preflight, not a workspace bug).
+    #     - Harness E2E_REQUIRE_LIVE=1 → exit 5 if a clean exit didn't prove
+    #       all four awaiting_agent transitions (no silent skip).
+    #     - Sweep transition (step 6) is now a bounded readiness-poll, not a
+    #       fixed sleep + one-shot assert → no more sweep-cadence flake.
+    #     - register / re-register retry ONLY transient edge 5xx (bounded),
+    #       fail closed on 4xx → no more cold-boot-502 flake.
+    #   STILL BLOCKS PROMOTION:
+    #     - Single shared staging tenant + EC2 quota window: an infra-side
+    #       provisioning outage (not a code bug) would turn the gate red.
+    #       Needs an infra-class vs code-class signal split before required.
+    #     - "CP unhealthy → exit 1" currently looks identical to a real
+    #       failure on the run page; required-gate would need it demoted to
+    #       a neutral/skip so staging flakiness can't block merges.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 25

@@ -124,6 +144,15 @@ jobs:

      - name: Run external-runtime E2E
        id: e2e
+        # E2E_REQUIRE_LIVE=1: the harness fails CLOSED (exit 5) if it ever
+        # reaches a clean exit without proving all four awaiting_agent
+        # transitions. Mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE — a
+        # silent skip / early-return / dropped assertion can no longer
+        # masquerade as green. Token-missing and CP-unhealthy already
+        # hard-fail in the two preflight steps above, so reaching this step
+        # means a real cycle is expected.
+        env:
+          E2E_REQUIRE_LIVE: "1"
        run: bash tests/e2e/test_staging_external_runtime.sh

      # Mirror the e2e-staging-saas.yml safety net: if the runner is
@@ -101,18 +101,29 @@ jobs:
      # so teardown MUST positively confirm no slug-tagged box survives.
      E2E_AWS_LEAK_CHECK: required
      E2E_AWS_TERMINATE_LEAKS: '1'
-      # claude-code + MiniMax is the cheapest boot-to-online path (same as the
-      # saas job). The reconciler test never makes a completion, but the key is
-      # wired so the first boot reaches online on the same path the saas
-      # harness uses. First non-empty wins in the script's priority chain.
-      E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
-      E2E_ANTHROPIC_API_KEY: ${{ secrets.MOLECULE_STAGING_ANTHROPIC_API_KEY }}
-      E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
      E2E_RUNTIME: claude-code
      # Platform-managed create path (moonshot/kimi-k2.6, no tenant key) — the
      # combo proven to create cleanly; this test only needs the ws online.
+      #
+      # DELIBERATELY no E2E_MODEL_SLUG and no E2E_*_API_KEY here — mirror the
+      # e2e-staging-platform-boot job in e2e-staging-saas.yml. On
+      # E2E_LLM_PATH=platform the harness sends EMPTY secrets and lets
+      # pick_model_slug return the platform default moonshot/kimi-k2.6 (a member
+      # of the providers.yaml claude-code `platform` arm → provider=platform,
+      # billed by the CP LLM proxy, NO tenant key required).
+      #
+      # The previous wiring set E2E_MODEL_SLUG: MiniMax-M2 (a BARE id in the
+      # providers.yaml `minimax` BYOK arm → provider=minimax, requires
+      # MINIMAX_API_KEY) while sending secrets={} on the platform path. Because
+      # E2E_MODEL_SLUG wins over the E2E_LLM_PATH=platform branch in
+      # pick_model_slug, the workspace got a keyless BYOK-minimax model, could
+      # not resolve a serving path, and booted to status=failed — never online
+      # (run 223233: "MODEL_SLUG=MiniMax-M2" then "→ failed", "never reached
+      # status=online within 900s"). The BYOK key wiring was equally misleading:
+      # the harness ignores E2E_*_API_KEY on E2E_LLM_PATH=platform, so the keys
+      # only made the contradiction harder to spot. Platform-only is correct
+      # here — this test exercises instance-state, never an LLM completion.
      E2E_LLM_PATH: platform
-      E2E_MODEL_SLUG: MiniMax-M2
      E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
      E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}

@@ -124,7 +124,12 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
-    timeout-minutes: 45
+    # Raised 45→75: step 10b now exercises pause→resume→online +
+    # hibernate→wake→online, each of which RE-PROVISIONS the parent (CP
+    # re-provision + heartbeat recovery, not a fresh EC2 cold start, but still
+    # minutes). The base provision→online→A2A matrix fits in ~35 min; the two
+    # extra lifecycle reprovisions need headroom under WORKSPACE_ONLINE_TIMEOUT.
+    timeout-minutes: 75
    permissions:
      contents: read

@@ -173,17 +178,31 @@ jobs:
      # workflow_dispatch flow (no input wired here yet — runtime
      # override is enough for ad-hoc).
      #
-      # #2263 deploy-skew: the claude-code default is the COLON-namespaced BYOK
-      # id `minimax:MiniMax-M2.7`, NOT bare `MiniMax-M2`. The deployed staging
-      # ws-server's compiled registry can lag source; validateRegisteredModelForRuntime
-      # 400s the bare form on an older image (the sibling Platform Boot job, on
-      # the SAME image, succeeds with namespaced `moonshot/kimi-k2.6`). The colon
-      # form stays in the BYOK `minimax` arm (providers.yaml:851) so it resolves
-      # provider=minimax (BYOK) and the #1994 byok-not-platform guard still
-      # passes — the slash/platform form `minimax/MiniMax-M2.7` would not.
-      E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'minimax:MiniMax-M2.7' }}
+      # claude-code MiniMax slug must be the BARE registered id `MiniMax-M2.7`.
+      # It is the BYOK-minimax form: registry_gen.go:88 registers it on the
+      # `minimax` arm (resolves provider=minimax via MINIMAX_API_KEY), so the
+      # #1994 byok-not-platform guard still passes. The COLON form
+      # `minimax:MiniMax-M2.7` is UNREGISTERED on claude-code (internal#718;
+      # derive_provider_matrix_test.go:288) — the claude-code adapter can't
+      # strip the `minimax:` prefix, so workspace-create 422s
+      # UNREGISTERED_MODEL_FOR_RUNTIME (real failure: job 295233, main 4b3590e3).
+      # The slash form `minimax/MiniMax-M2.7` is the platform-billed arm and
+      # would trip the byok guard. #2311 fixed the same colon-vs-bare bug in the
+      # pick_model_slug lib (tests/e2e/lib/model_slug.sh), but this env var
+      # OVERRIDES that lib, so the bare fix has to live here too.
+      E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2.7' }}
      E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
      E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
+      # Lifecycle transitions (step 10b): pause→resume→online +
+      # hibernate→wake→online on the provisioned parent. `auto` runs them in
+      # full mode (this job). Set `off` to skip the ~2x-reprovision cost on an
+      # ad-hoc dispatch. The timeout-minutes above is sized for this being on.
+      E2E_LIFECYCLE: auto
+      # Fail-closed-on-skip: in CI the harness MUST prove ≥1 full
+      # provision→online→A2A cycle. If it reaches the end having validated
+      # nothing (a future short-circuit / skip path), it exits 5 rather than
+      # reporting a false green. Mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE.
+      E2E_REQUIRE_LIVE: '1'

    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -381,6 +400,10 @@ jobs:
      E2E_MODE: smoke
      E2E_RUN_ID: "platform-${{ github.run_id }}-${{ github.run_attempt }}"
      E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
+      # Fail-closed-on-skip (see BYOK job). smoke mode still runs steps 2/4/7/8b,
+      # so all four required milestones (provisioned/tenant_online/
+      # workspace_online/a2a_roundtrip) fire — the guard is valid for this lane too.
+      E2E_REQUIRE_LIVE: '1'

    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -0,0 +1,129 @@
+name: E2E Workspace Lifecycle (staginge2e)
+
+# core#2332 P1.10 — close the workspace-lifecycle coverage gap.
+#
+# soft-restart / pause / resume / hibernate were only unit-tested (httptest in
+# workspace-server/internal/handlers/*_test.go) and never proven against a real
+# container. This drives the Go staginge2e suite
+# (workspace-server/internal/staginge2e/workspace_lifecycle_test.go) which
+# provisions a REAL throwaway staging tenant, exercises each lifecycle endpoint,
+# and asserts OBSERVABLE container state (status transitions + serve reachability
+# + url-cleared-on-stop) — not just HTTP 200.
+#
+# ADVISORY-BY-INFRA. It needs a live staging tenant (~30+ min cold EC2 path), so
+# the real run is workflow_dispatch / schedule only — NOT per-PR and NOT a
+# required check. Promotion to a required branch-protection context is a separate
+# CTO decision (mirrors the cp internal/staginge2e suite, cp#386, and the
+# peer-visibility flip-to-required pattern, molecule-core#1296).
+#
+# HONEST GATE — NO continue-on-error mask (feedback_fix_root_not_symptom). The
+# PR job validates that the suite COMPILES under -tags=staging_e2e and SKIPs LOUD
+# without creds (the suite's contract) — a broken test file fails at PR time. The
+# real assertion runs on dispatch/cron with staging creds.
+#
+# Gitea 1.22.6 / act_runner notes honored: no cross-repo uses (mirrored
+# actions/checkout SHA), per-SHA concurrency, pinned GITHUB_SERVER_URL.
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'workspace-server/internal/handlers/workspace_restart.go'
+      - 'workspace-server/internal/handlers/workspace_crud.go'
+      - 'workspace-server/internal/staginge2e/**'
+      - '.gitea/workflows/e2e-workspace-lifecycle.yml'
+  pull_request:
+    branches: [main]
+    paths:
+      - 'workspace-server/internal/handlers/workspace_restart.go'
+      - 'workspace-server/internal/handlers/workspace_crud.go'
+      - 'workspace-server/internal/staginge2e/**'
+      - '.gitea/workflows/e2e-workspace-lifecycle.yml'
+  workflow_dispatch:
+  schedule:
+    # 08:00 UTC daily — offset from e2e-staging-saas (07:00) and
+    # e2e-peer-visibility (07:30) so the three don't collide on the staging
+    # org-creation quota.
+    - cron: '0 8 * * *'
+
+concurrency:
+  # Per-SHA (feedback_concurrency_group_per_sha).
+  group: e2e-workspace-lifecycle-${{ github.event.pull_request.head.sha || github.sha }}
+  cancel-in-progress: false
+
+env:
+  GITHUB_SERVER_URL: https://git.moleculesai.app
+
+jobs:
+  # PR / compile gate: prove the staginge2e suite compiles under the build tag
+  # and skips LOUD without creds. Cheap, honest, non-required. This is NOT a
+  # fake-green mask of the real assertion — it fails if the test file stops
+  # compiling. bp-required: pending CTO decision (see header).
+  lifecycle-compile-skip:
+    name: E2E Workspace Lifecycle (compile+skip)
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
+        with:
+          go-version: 'stable'
+          cache: true
+          cache-dependency-path: workspace-server/go.sum
+      - name: go vet (staging_e2e tag)
+        working-directory: workspace-server
+        run: go vet -tags staging_e2e ./internal/staginge2e/...
+      - name: Compile + skip-run (must SKIP LOUD without STAGING_E2E)
+        working-directory: workspace-server
+        run: |
+          # No STAGING_E2E / creds → the suite MUST skip (not pass-with-zero-
+          # assertions, not fail-open). `go test` exit 0 with a SKIP line is the
+          # contract. -run pins to the one test so this stays fast.
+          out=$(go test -tags staging_e2e ./internal/staginge2e/ -run TestWorkspaceLifecycle -count=1 -v 2>&1)
+          echo "$out"
+          echo "$out" | grep -q "SKIP: TestWorkspaceLifecycle_Staging" \
+            || { echo "::error::expected a LOUD skip of TestWorkspaceLifecycle_Staging without creds"; exit 1; }
+
+  # Real STAGING gate: provisions a throwaway tenant, drives the lifecycle
+  # endpoints, asserts observable transitions, scoped teardown.
+  # dispatch / schedule only (30+ min cold EC2).
+  lifecycle-staging:
+    name: E2E Workspace Lifecycle (staging)
+    runs-on: ubuntu-latest
+    if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
+    timeout-minutes: 60
+    env:
+      CP_BASE_URL: https://staging-api.moleculesai.app
+      CP_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
+      STAGING_E2E: '1'
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
+        with:
+          go-version: 'stable'
+          cache: true
+          cache-dependency-path: workspace-server/go.sum
+      - name: Verify admin token present
+        run: |
+          if [ -z "$CP_ADMIN_API_TOKEN" ]; then
+            echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
+            exit 2
+          fi
+          echo "Admin token present"
+      - name: CP staging health preflight
+        run: |
+          code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$CP_BASE_URL/health")
+          if [ "$code" != "200" ]; then
+            echo "::error::Staging CP unhealthy (HTTP $code) — infra, not a lifecycle bug. Failing loud per feedback_fix_root_not_symptom."
+            exit 1
+          fi
+          echo "Staging CP healthy"
+      - name: Run workspace-lifecycle staginge2e
+        working-directory: workspace-server
+        run: go test -tags staging_e2e ./internal/staginge2e/ -run TestWorkspaceLifecycle_Staging -count=1 -v -timeout 50m
+      # Teardown: the test installs a t.Cleanup admin-DELETE of its own tenant
+      # (runs even on a Fatal). We deliberately do NOT add a broad in-workflow
+      # "sweep all e2e-life-* slugs" net here — that could delete a concurrently
+      # running dispatch's fresh tenant (the slug is not run-id scoped). The
+      # age-guarded `sweep-stale-e2e-orgs` workflow (30-min floor, e2e- prefix)
+      # is the final safety net for a tenant orphaned by a hard runner cancel.
@@ -7,10 +7,13 @@ name: gitea-merge-queue
 # the user-space queue bot, one PR per tick, using the non-bypass merge actor.
 #
 # Queue contract:
-#   - add label `merge-queue` to an open same-repo PR
+#   - auto-discovery (default): any open same-repo PR is considered — no
+#     `merge-queue` label required (the label is optional metadata now)
 #   - bot updates stale PR heads with current main, then waits for CI
-#   - bot merges only when current main is green and required PR contexts pass
-#   - add `merge-queue-hold` to pause a queued PR without removing it
+#   - bot merges only when current main is green, genuine approvals are present
+#     on the current head, required PR contexts pass, and the PR is mergeable
+#   - add `merge-queue-hold`, `do-not-auto-merge`, or `wip` to keep a PR OUT of
+#     autonomous merging; draft PRs are also skipped

 on:
  # Schedule moved to operator-config:
@@ -48,10 +51,34 @@ jobs:
          WATCH_BRANCH: ${{ github.event.repository.default_branch }}
          QUEUE_LABEL: merge-queue
          HOLD_LABEL: merge-queue-hold
+          # Auto-discovery (opt-OUT). When on (default), the queue considers ALL
+          # open same-repo PRs that meet the merge bar — it does NOT wait for a
+          # human/agent to add `merge-queue`. Agent Gitea tokens lack
+          # write:issue (labels are issue-scoped) and could never self-label,
+          # which stalled the queue; the label is now OPTIONAL metadata. The
+          # merge bar is UNCHANGED — only candidate selection widens. Set
+          # AUTO_DISCOVER=0 to restore legacy opt-IN (require the merge-queue
+          # label to be considered).
+          AUTO_DISCOVER: "1"
+          # Opt-OUT labels: any of these on a PR keeps it OUT of autonomous
+          # merging (the human escape hatch). HOLD_LABEL is always also honoured.
+          # A human who wants a PR held just adds one of these labels.
+          OPT_OUT_LABELS: do-not-auto-merge,wip
          UPDATE_STYLE: merge
-          REQUIRED_CONTEXTS: >-
-            CI / all-required (pull_request),
-            sop-checklist / all-items-acked (pull_request)
+          # Recognised official-reviewer set. A merge needs >= required_approvals
+          # DISTINCT genuine official approvals from these accounts on the
+          # CURRENT head sha (not stale/dismissed). The required_approvals count
+          # itself is read from branch protection at runtime.
+          REVIEWER_SET: agent-reviewer,agent-researcher,agent-reviewer-cr2
+          # NOTE: REQUIRED_CONTEXTS is no longer the authoritative PR gate. The
+          # queue now reads the required status contexts from BRANCH PROTECTION
+          # (status_check_contexts) so non-required governance reds (qa-review,
+          # security-review, sop-tier, sop-checklist when not branch-required,
+          # E2E Chat, Staging SaaS, ci-arm64-advisory) cannot block a merge.
+          # If branch protection cannot be enumerated the queue HOLDS
+          # (fail-closed). REQUIRED_APPROVALS below is only a fallback used when
+          # branch protection does not specify required_approvals.
+          REQUIRED_APPROVALS: "2"
          # Push-side required contexts. Checking CI / all-required (push)
          # explicitly instead of the combined state avoids false-pause when
          # non-blocking jobs (continue-on-error: true) have failed — those
@@ -99,7 +99,7 @@ jobs:
    # all violate this lint at first — intentional. Flip to false
    # follow-up after main is clean for 3 days. mc#1982.
    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true  # mc#1982 Phase 3 mask — 14d forced-renewal cadence
+    continue-on-error: true  # internal#837 Phase 3 mask — 14d forced-renewal cadence
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5.6.0
@@ -123,7 +123,14 @@ jobs:
      # with a per-entry ::error:: annotation naming the missing repo
      # (issue #2192). This is the push-time complement to PR #2186's
      # PR-time manifest-entry-existence gate.
+      #
+      # Token: workspace-template-* repos are PRIVATE, so the existence check
+      # must authenticate (same AUTO_SYNC_TOKEN as the clone step). Without it
+      # an unauthenticated GET 404s on private repos and false-prunes them
+      # (regression that dropped seo-agent/google-adk from the palette).
      - name: Validate manifest entries exist
+        env:
+          MOLECULE_GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
        run: |
          set -euo pipefail
          bash scripts/check-manifest-repos-exist.sh manifest.json
@@ -33,11 +33,24 @@
 #                           2026-05-17 (internal#189 Phase 1).
 #
 # BURN-IN CLOSED 2026-05-17 (internal#189 Phase 1): The 7-day burn-in
-# window closed. continue-on-error: true has been removed from the
-# tier-check job; AND-composition is now fully enforced. If you need
-# to temporarily re-introduce a mask, file a tracker and follow the
-# mc#1982 protocol (Tier 2e lint requires a current tracker within
-# 2 lines of any continue-on-error: true).
+# window closed. As of 2026-06-04 the residual masks left behind by the
+# burn-in are removed for real (the comment previously claimed this while
+# the masks still persisted — that was stale):
+#   - continue-on-error: true on the jq-install step (redundant; the step
+#     already exits 0) and on the tier-check step (the burn-in mask).
+#   - the `|| true` after the sop-tier-check.sh invocation, which masked
+#     real tier-gate verdicts.
+# AND-composition is now fully enforced and the tier-check step can
+# honestly red CI on a real SOP-6 violation.
+#
+# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed): this is a
+# REQUIRED branch-protected gate on `pull_request_target` (always
+# same-repo, secrets always present — no fork/advisory split). Failing
+# open on a token/network/jq fault greened the SOP-6 approval gate
+# WITHOUT verifying approvals — a fail-open on a required context. The
+# gate now FAILS CLOSED on infra faults too: fix the token/runner, not
+# the gate. If you ever need to temporarily re-introduce a mask, file a
+# tracker and follow the mc#1982 protocol.

 name: sop-tier-check

@@ -90,10 +103,11 @@ jobs:
        # GitHub releases may be unreachable from some runner networks
        # (infra#241 follow-up: GitHub timeout after 3s on 5.78.80.188
        # runners). The sop-tier-check script has its own fallback as a
-        # third line of defense. continue-on-error: true ensures this step
-        # failing does not block the job.
-        # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-        continue-on-error: true
+        # third line of defense, and this step's final command
+        # (`jq --version ... || echo`) already exits 0 unconditionally — so
+        # the step cannot fail the job on its own.
+        # continue-on-error REMOVED 2026-06-04 (mc#1982 directive: root-fix
+        # and remove, do not renew). It was redundant masking, not a gate.
        run: |
          # apt-get is the primary method — Ubuntu package mirrors are reliably
          # reachable from runner containers. GitHub releases may be blocked
@@ -110,11 +124,11 @@ jobs:
          jq --version 2>/dev/null || echo "::notice::jq not yet available — script fallback will retry"

      - name: Verify tier label + reviewer team membership
-        # continue-on-error: true at step level — job-level is ignored by Gitea
-        # Actions (quirk #10, internal runbooks). Belt-and-suspenders with
-        # SOP_FAIL_OPEN=1 + || true below.
-        # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-        continue-on-error: true
+        # continue-on-error REMOVED 2026-06-04 (expired internal#189 Phase 1
+        # burn-in, window closed 2026-05-17; mc#1982 directive: root-fix and
+        # remove, do not renew). SOP_FAIL_OPEN REMOVED 2026-06-05
+        # (fix/core-ci-fail-closed): the gate now fails CLOSED on infra
+        # faults too (see the env block below), not just on a real verdict.
        env:
          GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
          GITEA_HOST: git.moleculesai.app
@@ -123,9 +137,26 @@ jobs:
          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
          SOP_DEBUG: '0'
          SOP_LEGACY_CHECK: '0'
-          # SOP_FAIL_OPEN=1 makes the script always exit 0. The UI enforces
-          # the actual merge gate. Combined with continue-on-error: true
-          # above, this step never fails the job regardless of script exit.
-          SOP_FAIL_OPEN: '1'
+          # SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed).
+          #
+          # This is the REQUIRED branch-protected gate
+          # `sop-tier-check / tier-check (pull_request)`. It runs on
+          # `pull_request_target`, which ALWAYS executes from the base
+          # branch WITH secrets present — there is NO fork/advisory split
+          # and no legitimate "secrets genuinely absent" degradation here.
+          #
+          # SOP_FAIL_OPEN=1 made the script `exit 0` on an empty/invalid
+          # token, an unreachable Gitea API, or missing jq — i.e. an AUTH
+          # FAILURE or unreachable-dependency would green the SOP-6
+          # approval gate WITHOUT verifying that the required teams
+          # actually approved. That is a fail-open on a required gate: a
+          # mis-wired or under-scoped SOP_TIER_CHECK_TOKEN would let any PR
+          # merge past the approval requirement.
+          #
+          # Removing the env unsets it → `${SOP_FAIL_OPEN:-}` is empty in
+          # sop-tier-check.sh → every guarded `exit 0` branch instead falls
+          # through to `exit 1`. Infra faults (bad token / API down / no
+          # jq) now FAIL CLOSED with a loud `::error::`, exactly like a real
+          # SOP-6 violation. Fix the token/runner, not the gate.
        run: |
-          bash .gitea/scripts/sop-tier-check.sh || true
+          bash .gitea/scripts/sop-tier-check.sh
@@ -26,11 +26,14 @@ name: sync-providers-yaml
 # sentinel does not fire on it.
 #
 # AUTH: uses AUTO_SYNC_TOKEN (the existing cross-repo read token used to sync
-# template/provider content from sibling repos). If the secret is absent the
-# job emits a clear ::warning:: and exits 0 — the hermetic sha pin in
-# sync_canonical_test.go is the always-on backstop, so a missing cross-repo
-# token degrades to "hand-edit still caught, live canonical drift not caught"
-# rather than a hard red that blocks unrelated PRs.
+# template/provider content from sibling repos). If the secret is absent:
+#   * Trusted contexts (push to main/staging, schedule, same-repo PR,
+#     workflow_dispatch): hard ::error:: + exit 1 (#2158 — silent
+#     fail-open was masking live canonical drift from the daily schedule).
+#   * Untrusted fork PRs: soft ::warning:: + exit 0 (forks cannot receive
+#     secrets, so a hard-fail here would block every fork PR).
+# The hermetic sha pin in sync_canonical_test.go is the always-on backstop
+# for hand-edits of core's synced copy regardless of AUTO_SYNC_TOKEN state.

 on:
  pull_request:
@@ -74,10 +77,37 @@ jobs:
          API_ROOT: ${{ github.server_url }}/api/v1
        run: |
          set -euo pipefail
+          # Trusted-context detection (per #2158): AUTO_SYNC_TOKEN absence
+          # is a hard failure on contexts that *should* have the secret
+          # (push to main/staging, schedule, same-repo PRs, workflow_dispatch).
+          # Fork PRs cannot receive secrets, so the soft warning is preserved
+          # for that one untrusted case. The hermetic sha pin in
+          # sync_canonical_test.go remains the always-on backstop for
+          # hand-edits of core's synced copy.
+          case "${{ github.event_name }}" in
+            push|schedule|workflow_dispatch)
+              is_trusted=true
+              ;;
+            pull_request)
+              if [ "${{ github.event.pull_request.head.repo.fork }}" = "false" ]; then
+                is_trusted=true
+              else
+                is_trusted=false
+              fi
+              ;;
+            *)
+              # Unknown event type — treat as trusted to avoid silent failures
+              # on a future event we haven't enumerated.
+              is_trusted=true
+              ;;
+          esac
          if [ -z "${AUTO_SYNC_TOKEN:-}" ]; then
-            echo "::warning::AUTO_SYNC_TOKEN secret missing — skipping the live cross-repo compare."
+            if [ "$is_trusted" = "true" ]; then
+              echo "::error::AUTO_SYNC_TOKEN secret missing on trusted context (${{ github.event_name }}). Live cross-repo canonical-drift detection cannot run — this would silently mask a controlplane-side providers.yaml change from going red on the daily schedule and on same-repo PRs. Provision AUTO_SYNC_TOKEN (read scope on molecule-controlplane) to restore detection."
+              exit 1
+            fi
+            echo "::warning::AUTO_SYNC_TOKEN secret missing on untrusted fork PR — skipping the live cross-repo compare (forks cannot receive secrets)."
            echo "The hermetic sha pin (sync_canonical_test.go) still gates hand-edits of core's copy."
-            echo "Provision AUTO_SYNC_TOKEN (read scope on molecule-controlplane) to enable live canonical-drift detection."
            exit 0
          fi
          CANON_URL="${API_ROOT}/repos/molecule-ai/molecule-controlplane/raw/internal/providers/providers.yaml?ref=main"
@@ -90,7 +90,13 @@ jobs:
          # checked-in artifact; exit 1 (RED) on any drift. This is the
          # single source of the gate's verdict — the same code path
          # `go test ./cmd/gen-providers` exercises.
-          go run ./cmd/gen-providers -check
+          if ! go run ./cmd/gen-providers -check; then
+            echo "::error::workspace-server/internal/providers/gen/registry_gen.go is stale (drifted from providers.yaml)."
+            echo "Regenerate and commit it (run from repo root):"
+            echo "  make gen          # native (needs a local Go toolchain)"
+            echo "  make gen-docker   # Docker only — no local Go needed"
+            exit 1
+          fi

      - name: Belt-and-braces — regenerate in place and assert clean tree
        run: |
@@ -101,7 +107,9 @@ jobs:
          go generate ./...
          if ! git diff --quiet -- internal/providers/gen/registry_gen.go; then
            echo "::error::workspace-server/internal/providers/gen/registry_gen.go drifted from providers.yaml."
-            echo "Run 'go generate ./...' (or 'go run ./cmd/gen-providers') in workspace-server/ and commit the result."
+            echo "Regenerate and commit it. No local Go? Use Docker (run from repo root):"
+            echo "  make gen          # native (needs a local Go toolchain)"
+            echo "  make gen-docker   # Docker only — no local Go needed"
            git --no-pager diff -- internal/providers/gen/registry_gen.go | head -80
            exit 1
          fi
@@ -4,7 +4,27 @@
 # use this Makefile; CI calls docker compose / go test directly so the
 # Makefile can evolve without breaking the build.

-.PHONY: help dev up down logs build test e2e-peer-visibility openapi-spec openapi-spec-check
+.PHONY: help dev up down logs build test e2e-peer-visibility openapi-spec openapi-spec-check gen gen-docker gen-check gen-check-docker
+
+# ─── Provider-registry SSOT codegen (internal#718) ─────────────────────
+# The Go module lives in workspace-server/. The checked-in artifact
+# workspace-server/internal/providers/gen/registry_gen.go is a gofmt'd
+# projection of providers.yaml, drift-gated by
+# .gitea/workflows/verify-providers-gen.yml. `make gen-docker` runs the SAME
+# generator inside the pinned golang image so a toolchain-less env (an agent
+# without Go) can regenerate without a local Go install (core#2332 follow-up).
+#
+# BYTE-EQUIVALENCE: gen-docker is byte-identical to native only while
+# GO_VERSION below matches the `go` directive in workspace-server/go.mod.
+# NOTE: the CI verify workflow pins setup-go go-version: 'stable' (not '1.25');
+# that is a latent hazard — a future Go minor could reformat the artifact in CI
+# vs a 1.25 local. Pin CI to '1.25' to close it (tracked alongside this change).
+GO_VERSION ?= 1.25
+GO_IMAGE   ?= golang:$(GO_VERSION)
+DOCKER     ?= docker
+# Mount the Go module (workspace-server) read-write; Go's default -mod=readonly
+# keeps go.mod/go.sum untouched — only the artifact is written in-place.
+DOCKER_RUN_WS = $(DOCKER) run --rm -v "$(CURDIR)/workspace-server":/src -w /src $(GO_IMAGE)

 help: ## Show this help.
 	@grep -E '^[a-zA-Z0-9_-]+:.*?## ' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-22s\033[0m %s\n", $$1, $$2}'
@@ -56,3 +76,16 @@ openapi-spec: ## Regenerate OpenAPI spec from workspace-server handler annotatio
 openapi-spec-check: openapi-spec ## CI gate — fail if openapi-spec produces a diff vs the committed file.
 	@git diff --exit-code -- workspace-server/docs/openapi/ \
 	  || (echo "openapi-spec is stale — run 'make openapi-spec' and commit the result" && exit 1)
+
+# ─── Provider-registry codegen targets ────────────────────────────────
+gen: ## Regenerate the providers registry artifact natively (needs local Go).
+	cd workspace-server && go generate ./...
+
+gen-docker: ## Same, inside the pinned $(GO_IMAGE) — Docker only, no local Go.
+	$(DOCKER_RUN_WS) go generate ./...
+
+gen-check: ## Drift gate (native): exit 1 if the artifact is stale.
+	cd workspace-server && go run ./cmd/gen-providers -check
+
+gen-check-docker: ## Drift gate inside the pinned $(GO_IMAGE) — Docker only.
+	$(DOCKER_RUN_WS) go run ./cmd/gen-providers -check
@@ -101,10 +101,19 @@ test.describe("Desktop ChatTab", () => {
    await textarea.fill("Trigger activity");
    await page.getByRole("button", { name: /Send/ }).first().click();

-    // Activity log container should appear during the send flow.
-    await expect(page.locator("[data-testid='activity-log']").first()).toBeVisible({ timeout: 10_000 }).catch(() => {
-      // Activity log may not be present in all layouts.
-    });
+    // FALSE-GREEN FIX: the prior `.catch(() => {})` swallowed the assertion
+    // entirely, so this test passed whether or not the activity log ever
+    // rendered. The activity-log container is optional per layout, so we
+    // gate on its presence in the DOM: if it's not part of this layout,
+    // skip explicitly (a recorded skip, not a silent pass); if it IS
+    // present, it MUST become visible during the send flow — that's the
+    // behaviour this test exists to protect.
+    const activityLog = page.locator("[data-testid='activity-log']").first();
+    if ((await activityLog.count()) === 0) {
+      test.skip(true, "activity-log not part of this layout");
+      return;
+    }
+    await expect(activityLog).toBeVisible({ timeout: 10_000 });
  });
 });

@@ -0,0 +1,461 @@
+/**
+ * Staging canvas E2E — desktop take-control RECONNECT + LEASE-RENEWAL path
+ * (core#2332 "P0.7", the e2e gap left by core#2216).
+ *
+ * Sibling to staging-display.spec.ts. That spec proves the happy path
+ * (acquire → noVNC WS upgrade → first framebuffer frame). It does NOT cover
+ * the two behaviours core#2216 added on top of that happy path:
+ *
+ *   (A) RECONNECT re-acquires a FRESH token. When the live WS drops uncleanly
+ *       (idle/network blip), DisplayTab.tsx:391-446 calls connect(reacquire=true),
+ *       which first awaits reacquireSession() (DisplayTab.tsx:83-99 →
+ *       POST /display/control/acquire) to mint a NON-stale lease+token before
+ *       reopening the socket. Without this, the cached ~300s token can be past
+ *       its expiry and the reconnect would 401 — a dead session that LOOKS like
+ *       a reconnect. We assert the reconnect path yields a token bound to a NEW
+ *       expires_at AND that a NEW WS opened with that fresh token resumes the
+ *       framebuffer (a real frame, not a 1006/403).
+ *
+ *   (B) The lease SURVIVES past the 300s window via the renewal cadence.
+ *       The lock is a 300s lease with NO server-side auto-renewal
+ *       (workspace_display_control.go:27 displayControlDefaultTTLSeconds=300;
+ *       loadActiveDisplayControl filters `expires_at > now()`). DisplayTab.tsx:105-111
+ *       runs a 120_000ms setInterval that re-acquires as the same holder, which
+ *       the server's ON-CONFLICT upsert (workspace_display_control.go:116-123,
+ *       `controlled_by = EXCLUDED.controlled_by`) treats as a lease EXTENSION:
+ *       expires_at moves forward by a fresh 300s each renewal. We do NOT sleep
+ *       300s of wall-clock to prove this — we drive the renewal CALL the timer
+ *       fires (reacquireSession === the same POST) and assert it pushes
+ *       expires_at strictly past the ORIGINAL lease window, then confirm the
+ *       lock is still live (GET /display/control returns the holder) after a
+ *       point in time at which the original, un-renewed lease would already be
+ *       expired. That is the observable, deterministic proxy for "the 120s
+ *       timer keeps the user from being kicked every ~5 min."
+ *
+ * Auth model, gating, and fail-closed philosophy are IDENTICAL to
+ * staging-display.spec.ts — see that file's header for the full rationale
+ * (same-origin-canvas Origin for the WS upgrade; per-tenant admin bearer for
+ * the acquire/GET POSTs; STAGING_DISPLAY_WORKSPACE_ID is the single activation
+ * knob and a standing desktop EC2 is a CTO cost item; any failure once the gate
+ * env is present is a HARD error, never a silent green, no "flaky" disposition).
+ *
+ * Promote-to-required is a CTO call: like its sibling this only runs when a
+ * standing desktop-capable staging workspace exists, so it cannot be a blanket
+ * required context until that workspace is funded and STAGING_DISPLAY_* is wired
+ * into the e2e-staging-canvas workflow.
+ */
+
+import { test, expect } from "@playwright/test";
+
+const STAGING = process.env.CANVAS_E2E_STAGING === "1";
+
+// The standing desktop-capable workspace id. Absent => skip loud. Same single
+// activation knob as staging-display.spec.ts; see that file's header.
+const DISPLAY_WS_ID = process.env.STAGING_DISPLAY_WORKSPACE_ID;
+
+test.skip(!STAGING, "CANVAS_E2E_STAGING not set — skipping staging-only tests");
+test.skip(
+  !DISPLAY_WS_ID,
+  "STAGING_DISPLAY_WORKSPACE_ID not set — no standing desktop-capable staging " +
+    "workspace to exercise the reconnect/renewal path. Set it to a workspace whose " +
+    "compute.display.mode == 'desktop-control' to activate this real-e2e gate. " +
+    "(Standing that workspace up is a CTO cost item — one always-on desktop EC2.)",
+);
+
+// WS upgrade + first-frame budgets mirror staging-display.spec.ts:75-76 — the
+// EIC tunnel + websockify handshake adds real latency; bounded so a dead path
+// fails LOUD instead of hanging to the suite timeout.
+const WS_UPGRADE_TIMEOUT_MS = 30_000;
+const FIRST_FRAME_TIMEOUT_MS = 30_000;
+
+// The production lease/renewal contract we are asserting against:
+//   - DEFAULT_TTL_SECONDS: the 300s lease the canvas requests
+//     (DisplayTab.tsx:88 ttl_seconds:300; server default
+//     workspace_display_control.go:27).
+//   - RENEWAL_INTERVAL_MS: the cadence the canvas renews on
+//     (DisplayTab.tsx:109 setInterval(..., 120_000)). We don't sleep it; we
+//     assert the renewal CALL pushes the lease forward.
+const DEFAULT_TTL_SECONDS = 300;
+const RENEWAL_INTERVAL_MS = 120_000;
+
+// Open a real noVNC WebSocket from inside the page (so the browser sends
+// Origin: <tenant> and the same-origin-canvas AdminAuth path accepts the
+// upgrade — a browser WS can't set Authorization). Returns the outcome of the
+// upgrade + first-frame, exactly like staging-display.spec.ts's evaluate
+// block. Reused here for BOTH the initial connect and the post-drop reconnect
+// so the two are compared on identical wire mechanics.
+type WsResult = {
+  ok: boolean;
+  stage: string;
+  detail: string;
+  frameBytes?: number;
+  frameKind?: string;
+  closeCode?: number;
+};
+
+async function openDisplayWs(
+  page: import("@playwright/test").Page,
+  rawSessionUrl: string,
+): Promise<WsResult> {
+  return page.evaluate(
+    async ({ rawSessionUrl, upgradeTimeoutMs, frameTimeoutMs }) => {
+      // Reproduce DisplayTab.tsx:545-552 (displayWebSocketConnection): resolve
+      // against the tenant origin, pull token from the #token fragment, strip
+      // the fragment, switch http(s)->ws(s). Then connect with the exact
+      // subprotocols the canvas uses (DisplayTab.tsx:402).
+      const u = new URL(rawSessionUrl, window.location.href);
+      const token =
+        new URLSearchParams(u.hash.replace(/^#/, "")).get("token") ?? "";
+      if (!token) {
+        return { ok: false, stage: "token-parse", detail: "no #token in session_url" };
+      }
+      u.hash = "";
+      u.protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
+      const wsUrl = u.toString();
+
+      return await new Promise<{
+        ok: boolean;
+        stage: string;
+        detail: string;
+        frameBytes?: number;
+        frameKind?: string;
+        closeCode?: number;
+      }>((resolve) => {
+        let upgraded = false;
+        let settled = false;
+        const finish = (r: {
+          ok: boolean;
+          stage: string;
+          detail: string;
+          frameBytes?: number;
+          frameKind?: string;
+          closeCode?: number;
+        }) => {
+          if (settled) return;
+          settled = true;
+          try {
+            ws.close();
+          } catch {
+            /* ignore */
+          }
+          resolve(r);
+        };
+
+        let ws: WebSocket;
+        try {
+          ws = new WebSocket(wsUrl, [`binary`, `molecule-display-token.${token}`]);
+        } catch (e) {
+          resolve({ ok: false, stage: "construct", detail: String(e) });
+          return;
+        }
+        ws.binaryType = "arraybuffer";
+
+        const upgradeTimer = setTimeout(() => {
+          finish({
+            ok: false,
+            stage: "upgrade-timeout",
+            detail: `WS did not open within ${upgradeTimeoutMs}ms (readyState=${ws.readyState})`,
+          });
+        }, upgradeTimeoutMs);
+
+        let frameTimer: ReturnType<typeof setTimeout> | null = null;
+
+        ws.onopen = () => {
+          upgraded = true;
+          clearTimeout(upgradeTimer);
+          frameTimer = setTimeout(() => {
+            finish({
+              ok: false,
+              stage: "frame-timeout",
+              detail: `WS upgraded but no framebuffer message within ${frameTimeoutMs}ms`,
+            });
+          }, frameTimeoutMs);
+        };
+
+        ws.onmessage = (ev) => {
+          if (frameTimer) clearTimeout(frameTimer);
+          let bytes = 0;
+          let kind: string = typeof ev.data;
+          if (ev.data instanceof ArrayBuffer) {
+            bytes = ev.data.byteLength;
+            kind = "ArrayBuffer";
+          } else if (typeof Blob !== "undefined" && ev.data instanceof Blob) {
+            bytes = ev.data.size;
+            kind = "Blob";
+          } else if (typeof ev.data === "string") {
+            bytes = ev.data.length;
+            kind = "string";
+          }
+          finish({
+            ok: bytes > 0,
+            stage: "frame",
+            detail:
+              bytes > 0 ? "received framebuffer message" : "first message was empty",
+            frameBytes: bytes,
+            frameKind: kind,
+          });
+        };
+
+        ws.onclose = (ev) => {
+          if (!upgraded) {
+            clearTimeout(upgradeTimer);
+            finish({
+              ok: false,
+              stage: "upgrade-close",
+              detail: `WS closed before upgrade (code=${ev.code}, reason="${ev.reason}") — handshake rejected somewhere in edge → ws-proxy → EIC → websockify → x11vnc`,
+              closeCode: ev.code,
+            });
+          }
+        };
+
+        ws.onerror = () => {
+          if (!upgraded) {
+            clearTimeout(upgradeTimer);
+            finish({
+              ok: false,
+              stage: "upgrade-error",
+              detail: "WS error before upgrade — proxy chain rejected the handshake",
+            });
+          }
+        };
+      });
+    },
+    {
+      rawSessionUrl,
+      upgradeTimeoutMs: WS_UPGRADE_TIMEOUT_MS,
+      frameTimeoutMs: FIRST_FRAME_TIMEOUT_MS,
+    },
+  );
+}
+
+// Pull the opaque signed token out of a session_url's #token= fragment so we
+// can compare reconnect tokens for freshness (a reconnect MUST mint a new one
+// — same token would mean the cached, possibly-expired URL was reused).
+function tokenOf(sessionUrl: string): string {
+  const hashIdx = sessionUrl.indexOf("#token=");
+  return hashIdx >= 0 ? sessionUrl.slice(hashIdx + "#token=".length) : "";
+}
+
+test.describe("staging desktop take-control — reconnect + lease renewal (core#2216)", () => {
+  // Shared staging context resolution — identical to staging-display.spec.ts:90-120.
+  function resolveTenant() {
+    const tenantURL =
+      process.env.STAGING_DISPLAY_TENANT_URL || process.env.STAGING_TENANT_URL;
+    const tenantToken =
+      process.env.STAGING_DISPLAY_TENANT_TOKEN || process.env.STAGING_TENANT_TOKEN;
+    const orgID = process.env.STAGING_DISPLAY_ORG_ID || process.env.STAGING_ORG_ID;
+    if (!tenantURL || !tenantToken) {
+      throw new Error(
+        "STAGING_DISPLAY_WORKSPACE_ID is set but no tenant URL/token is available " +
+          "for the reconnect/renewal gate. Set STAGING_DISPLAY_SLUG so staging-setup.ts " +
+          "resolves STAGING_DISPLAY_TENANT_URL / STAGING_DISPLAY_TENANT_TOKEN for the " +
+          "standing desktop org (or ensure the ephemeral STAGING_TENANT_* exports exist).",
+      );
+    }
+    return { tenantURL, tenantToken, orgID };
+  }
+
+  test.beforeEach(async ({ context }) => {
+    const { tenantToken, orgID } = resolveTenant();
+    await context.setExtraHTTPHeaders({
+      Authorization: `Bearer ${tenantToken}`,
+      ...(orgID ? { "X-Molecule-Org-Id": orgID } : {}),
+    });
+  });
+
+  test("reconnect re-acquires a FRESH token and the framebuffer resumes", async ({
+    page,
+  }) => {
+    const { tenantURL } = resolveTenant();
+    const workspaceId = DISPLAY_WS_ID as string;
+
+    // Sanity: workspace must be display-available, else the gate is meaningless.
+    const availResp = await page.request.get(
+      `${tenantURL}/workspaces/${workspaceId}/display`,
+    );
+    expect(availResp.status(), `GET /display for ${workspaceId} should be 200`).toBe(200);
+    const avail = await availResp.json();
+    expect(
+      avail.available,
+      `workspace ${workspaceId} is not display-available (reason=${avail.reason}).`,
+    ).toBe(true);
+
+    // 1. Initial acquire — the happy-path lease the user starts with.
+    const firstResp = await page.request.post(
+      `${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
+      { data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
+    );
+    expect(
+      firstResp.status(),
+      `initial acquire should be 200; body: ${await firstResp.text()}`,
+    ).toBe(200);
+    const first = await firstResp.json();
+    expect(first.controller, "controller should be 'user'").toBe("user");
+    expect(typeof first.session_url, "acquire missing session_url").toBe("string");
+    const firstUrl: string = first.session_url;
+    expect(firstUrl, "session_url should carry #token=").toContain("#token=");
+    const firstToken = tokenOf(firstUrl);
+    expect(firstToken.length, "first token should be non-empty").toBeGreaterThan(0);
+
+    // Anchor Origin to the tenant so the same-origin-canvas WS upgrade is accepted.
+    await page.goto(tenantURL, { waitUntil: "domcontentloaded" });
+
+    // 2. Establish the live WS on the FIRST token — proves the session is real.
+    const initial = await openDisplayWs(page, firstUrl);
+    expect(
+      initial.ok,
+      `initial connect failed at stage="${initial.stage}": ${initial.detail}` +
+        (initial.closeCode ? ` (close code ${initial.closeCode})` : ""),
+    ).toBe(true);
+    expect(initial.stage, `initial connect should reach 'frame'; got '${initial.stage}'`).toBe(
+      "frame",
+    );
+
+    // 3. Simulate an unclean drop. openDisplayWs() already closed its socket
+    //    on finish(), so the live stream is gone here — exactly the state
+    //    DisplayTab's "disconnect" handler (DisplayTab.tsx:426-442) enters
+    //    before it calls connect(reacquire=true).
+
+    // 4. Reconnect path: mint a FRESH lease+token FIRST, the way
+    //    connect(reacquire=true) → reacquireSession() does (DisplayTab.tsx:397
+    //    / :83-99). This is a re-acquire by the SAME holder, so the server's
+    //    ON-CONFLICT upsert extends the lease and returns a new signed URL.
+    const reResp = await page.request.post(
+      `${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
+      { data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
+    );
+    expect(
+      reResp.status(),
+      `reconnect re-acquire should be 200 (same holder extends, not 409); body: ${await reResp.text()}`,
+    ).toBe(200);
+    const re = await reResp.json();
+    expect(re.controller, "reconnect controller should still be 'user'").toBe("user");
+    expect(typeof re.session_url, "reconnect acquire missing session_url").toBe("string");
+    const reUrl: string = re.session_url;
+    const reToken = tokenOf(reUrl);
+    expect(reToken.length, "reconnect token should be non-empty").toBeGreaterThan(0);
+
+    // The reconnect token MUST be fresh — bound to the new expires_at. A
+    // reused token would mean the canvas fell back to a cached, soon-expiring
+    // URL, which is precisely the 401-on-reconnect bug core#2216 fixed. The
+    // signed token embeds expires_at.Unix() (workspace_display_control.go:390),
+    // so a later expiry => a different signature => a different token.
+    expect(
+      reToken,
+      "reconnect should mint a FRESH token (bound to the renewed expires_at), " +
+        "not reuse the original ~300s token — a reused token is the core#2216 401 bug.",
+    ).not.toBe(firstToken);
+    expect(
+      new Date(re.expires_at).getTime(),
+      "renewed expires_at should be >= the original (lease extended, not shrunk)",
+    ).toBeGreaterThanOrEqual(new Date(first.expires_at).getTime());
+
+    // 5. Reopen the WS on the FRESH token and assert the framebuffer RESUMES —
+    //    a real frame, not a dead 1006/403 session. This is the crux: the
+    //    reconnect produces a LIVE stream, not a stale-token rejection.
+    const reconnected = await openDisplayWs(page, reUrl);
+    expect(
+      reconnected.ok,
+      `RECONNECT failed at stage="${reconnected.stage}": ${reconnected.detail}` +
+        (reconnected.closeCode ? ` (close code ${reconnected.closeCode})` : "") +
+        " — a 1006/403 here means the fresh-token reconnect did NOT re-establish " +
+        "the proxy chain (edge → ws-proxy → EIC → websockify → x11vnc).",
+    ).toBe(true);
+    expect(
+      reconnected.stage,
+      `reconnect should reach 'frame' (framebuffer resumed); got '${reconnected.stage}' (${reconnected.detail})`,
+    ).toBe("frame");
+    expect(
+      reconnected.frameBytes ?? 0,
+      `resumed framebuffer message should be non-empty (kind=${reconnected.frameKind})`,
+    ).toBeGreaterThan(0);
+  });
+
+  test("renewal pushes the lease past the original 300s window (no kick at ~5min)", async ({
+    page,
+  }) => {
+    const { tenantURL } = resolveTenant();
+    const workspaceId = DISPLAY_WS_ID as string;
+
+    // 1. Acquire the initial 300s lease.
+    const firstResp = await page.request.post(
+      `${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
+      { data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
+    );
+    expect(
+      firstResp.status(),
+      `initial acquire should be 200; body: ${await firstResp.text()}`,
+    ).toBe(200);
+    const first = await firstResp.json();
+    const firstExpiry = new Date(first.expires_at).getTime();
+    expect(Number.isFinite(firstExpiry), "first expires_at should parse").toBe(true);
+
+    // The original lease's hard ceiling: when the un-renewed token/lock dies.
+    const originalLeaseDeadlineMs = firstExpiry;
+
+    // 2. Fire the renewal CALL the 120s timer fires (DisplayTab.tsx:107-109 →
+    //    reacquireSession → this same POST). We don't sleep RENEWAL_INTERVAL_MS
+    //    of wall-clock; we drive the observable call the timer would make and
+    //    assert its EFFECT on the lease. RENEWAL_INTERVAL_MS is asserted to sit
+    //    safely inside the TTL so the renew always lands before expiry — if a
+    //    future change widened the interval past the TTL, this guard fails.
+    expect(
+      RENEWAL_INTERVAL_MS,
+      "renewal interval must be strictly inside the lease TTL, else the lease " +
+        "expires before the timer renews it (user gets kicked).",
+    ).toBeLessThan(DEFAULT_TTL_SECONDS * 1000);
+
+    const renewResp = await page.request.post(
+      `${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
+      { data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
+    );
+    expect(
+      renewResp.status(),
+      `renewal re-acquire should be 200 (same holder extends); body: ${await renewResp.text()}`,
+    ).toBe(200);
+    const renew = await renewResp.json();
+    const renewedExpiry = new Date(renew.expires_at).getTime();
+
+    // 3. The renewal MUST push expires_at strictly PAST the original lease
+    //    window — that is the whole point of core#2216's renewal timer: a
+    //    fresh 300s starting now, so the lease outlives the original ~300s
+    //    deadline and the user is not kicked every ~5 minutes. (now()+300s,
+    //    fired before the original 300s elapsed, is strictly later than the
+    //    original now()+300s.)
+    expect(
+      renewedExpiry,
+      "renewal should extend the lease strictly past the original 300s deadline " +
+        `(original=${first.expires_at}, renewed=${renew.expires_at}). Equal-or-earlier ` +
+        "means the renewal did NOT extend — the 120s timer would not save the session.",
+    ).toBeGreaterThan(originalLeaseDeadlineMs);
+
+    // 4. Confirm the lock is still LIVE after renewal — GET /display/control
+    //    only returns a holder when expires_at > now() (loadActiveDisplayControl,
+    //    workspace_display_control.go:280). A held controller here proves the
+    //    renewed lease is active, not expired.
+    const ctrlResp = await page.request.get(
+      `${tenantURL}/workspaces/${workspaceId}/display/control`,
+    );
+    expect(ctrlResp.status(), "GET /display/control should be 200").toBe(200);
+    const ctrl = await ctrlResp.json();
+    expect(
+      ctrl.controller,
+      "after renewal the lock should still report a live holder (not 'none')",
+    ).toBe("user");
+    expect(
+      new Date(ctrl.expires_at).getTime(),
+      "the live lock's expires_at should match the renewed lease (lease is the " +
+        "renewed one, not the original).",
+    ).toBeGreaterThan(originalLeaseDeadlineMs);
+
+    // TODO(core#2332, CTO cost item): the assertions above prove the renewal
+    // CALL extends the lease past the original window — the deterministic proxy
+    // for "the 120s interval keeps the lease alive past 300s." To additionally
+    // prove the lease survives a FULL real-time 300s+ idle WS (the literal
+    // wall-clock claim), a long-lived test would hold one WS open >300s while
+    // the 120s timer renews underneath and assert the SAME socket never 1006s.
+    // That needs >5 min of standing-desktop wall-clock per run and is gated on
+    // the standing desktop EC2 being funded; it is NOT exercised here. Promote
+    // either form to a REQUIRED context only on CTO sign-off (cost + cadence).
+  });
+});
@@ -0,0 +1,329 @@
+/**
+ * Staging canvas E2E — REAL desktop take-control path (core#2261 "Gap 1").
+ *
+ * This is the live-e2e gate that the existing staging-tabs.spec.ts does NOT
+ * provide. staging-tabs only opens the 13 declared workspace-panel tabs
+ * (TAB_IDS at staging-tabs.spec.ts:24-38 — `display` is NOT among them) and
+ * asserts they render without a "Failed to load" toast. It never acquires
+ * display control, never opens the noVNC WebSocket, and never asserts a
+ * framebuffer frame arrives. The companion unit test
+ * canvas/src/components/tabs/__tests__/DisplayTab.test.tsx mocks the RFB
+ * constructor (vi.mock("@novnc/novnc"), see its lines 8/20-39) so NO real
+ * WebSocket is ever opened there either. Result: a broken take-control path
+ * (acquire → noVNC WS upgrade → ws-proxy → EIC → websockify → x11vnc → Xvfb)
+ * ships GREEN. This spec closes that gap by exercising the REAL wire path
+ * end to end against a live, desktop-capable staging workspace.
+ *
+ * What it asserts (the real path, no mocks):
+ *   1. POST /workspaces/<id>/display/control/acquire returns 200 with a
+ *      session_url that carries the signed token in its `#token=` fragment
+ *      (mirrors workspace_display_control.go:signedDisplaySessionURL).
+ *   2. Opening the noVNC WebSocket at session_url with the subprotocols
+ *      ["binary", "molecule-display-token.<token>"] (exactly what the canvas
+ *      sends — DisplayTab.tsx:339) UPGRADES (onopen fires, readyState===OPEN,
+ *      no immediate 1006 abnormal close). A 1006 / 403 means the handshake
+ *      failed somewhere in the proxy chain.
+ *   3. At least one BINARY framebuffer message arrives on that socket — a
+ *      real frame off x11vnc, not just a panel mount. RFB sends a
+ *      ProtocolVersion banner ("RFB 003.00x\n") as the first server message,
+ *      which proves the upstream VNC server is live behind the EIC tunnel.
+ *
+ * Auth model (important): the WS upgrade is gated by workspace-server
+ * middleware.AdminAuth. A browser WebSocket CANNOT set an Authorization
+ * header, so in production the canvas WS upgrade passes AdminAuth via the
+ * same-origin-canvas path (wsauth_middleware.go:isSameOriginCanvas, which
+ * keys off the Origin header the browser sets automatically on a same-origin
+ * WS upgrade). We therefore open the socket from inside the browser page via
+ * page.evaluate AFTER navigating to the tenant origin — so the browser sends
+ * `Origin: https://<slug>.staging.moleculesai.app`, exactly as production
+ * does. The acquire POST (which CAN carry a header) uses the per-tenant admin
+ * bearer set on the context. This is the faithful production handshake, not a
+ * synthetic one.
+ *
+ * Gate / cost: this test only runs when STAGING_DISPLAY_WORKSPACE_ID points
+ * at a STANDING desktop-capable workspace (compute.display.mode ==
+ * "desktop-control"). We deliberately do NOT provision one in the shared
+ * staging-setup.ts: a desktop AMI boots in ~12-15 min and would tax the
+ * existing tabs harness on every run. Standing that workspace up is a cost
+ * item for the CTO (one always-on desktop EC2 on staging). Until that exists,
+ * the test SKIPS loud. When the env IS present, any failure in
+ * provision/acquire/upgrade is a HARD error — fail-closed, never silently
+ * green (no "flaky" disposition: a 1006 names a broken proxy hop).
+ */
+
+import { test, expect } from "@playwright/test";
+
+const STAGING = process.env.CANVAS_E2E_STAGING === "1";
+
+// The standing desktop-capable workspace id. Absent => skip loud. This is
+// the single knob that activates the gate; see file header for the cost note.
+const DISPLAY_WS_ID = process.env.STAGING_DISPLAY_WORKSPACE_ID;
+
+test.skip(!STAGING, "CANVAS_E2E_STAGING not set — skipping staging-only tests");
+test.skip(
+  !DISPLAY_WS_ID,
+  "STAGING_DISPLAY_WORKSPACE_ID not set — no standing desktop-capable staging " +
+    "workspace to exercise the take-control path. Set it to a workspace whose " +
+    "compute.display.mode == 'desktop-control' to activate this real-e2e gate. " +
+    "(Standing that workspace up is a CTO cost item — one always-on desktop EC2.)",
+);
+
+// How long we wait for the WS to upgrade + deliver the first frame. The EIC
+// tunnel + websockify handshake adds real latency on top of the edge; budget
+// generously but bounded, so a genuinely-dead path fails LOUD instead of
+// hanging to the suite timeout.
+const WS_UPGRADE_TIMEOUT_MS = 30_000;
+const FIRST_FRAME_TIMEOUT_MS = 30_000;
+
+test.describe("staging desktop take-control (real noVNC path)", () => {
+  test("acquire → WS upgrades → first framebuffer frame arrives", async ({
+    page,
+    context,
+  }) => {
+    // The standing desktop workspace lives in its OWN standing org (it can't
+    // live in the per-run ephemeral org — that gets torn down each run). When
+    // STAGING_DISPLAY_SLUG is configured, staging-setup.ts resolves that org's
+    // tenant URL / admin token / org id and exports them under STAGING_DISPLAY_*.
+    // Fall back to the ephemeral org's exports only if the display org wasn't
+    // separately configured (e.g. the desktop workspace happens to live in the
+    // run's own tenant — not the expected topology, but supported).
+    const tenantURL =
+      process.env.STAGING_DISPLAY_TENANT_URL || process.env.STAGING_TENANT_URL;
+    const tenantToken =
+      process.env.STAGING_DISPLAY_TENANT_TOKEN || process.env.STAGING_TENANT_TOKEN;
+    const orgID =
+      process.env.STAGING_DISPLAY_ORG_ID || process.env.STAGING_ORG_ID;
+
+    // Fail-closed: when the gate env IS present (we got past the skips above),
+    // the rest of the staging context MUST be wired or this is a hard error,
+    // never a silent pass. Mirrors staging-tabs.spec.ts:53-57.
+    if (!tenantURL || !tenantToken) {
+      throw new Error(
+        "STAGING_DISPLAY_WORKSPACE_ID is set but no tenant URL/token is available " +
+          "for the take-control gate. Set STAGING_DISPLAY_SLUG so staging-setup.ts " +
+          "resolves STAGING_DISPLAY_TENANT_URL / STAGING_DISPLAY_TENANT_TOKEN for the " +
+          "standing desktop org (or ensure the ephemeral STAGING_TENANT_* exports exist).",
+      );
+    }
+
+    const workspaceId = DISPLAY_WS_ID as string;
+
+    // The per-tenant admin bearer satisfies AdminAuth for the acquire POST
+    // (which can carry a header). The WS upgrade below relies on Origin
+    // (same-origin canvas), NOT this header.
+    await context.setExtraHTTPHeaders({
+      Authorization: `Bearer ${tenantToken}`,
+      // X-Molecule-Org-Id is required by workspace-server TenantGuard for
+      // cross-org requests routed through the CP edge; staging-setup exports it.
+      // Harmless (and correct) to send on the same-origin tenant box too.
+      ...(orgID ? { "X-Molecule-Org-Id": orgID } : {}),
+    });
+
+    // 0. Sanity: the workspace must actually be display-enabled, else the
+    //    whole gate is meaningless. Hit the availability endpoint first so a
+    //    mis-pointed STAGING_DISPLAY_WORKSPACE_ID fails with a precise message
+    //    instead of an opaque acquire error.
+    const availResp = await page.request.get(
+      `${tenantURL}/workspaces/${workspaceId}/display`,
+    );
+    expect(
+      availResp.status(),
+      `GET /display for ${workspaceId} should be 200`,
+    ).toBe(200);
+    const avail = await availResp.json();
+    expect(
+      avail.available,
+      `workspace ${workspaceId} is not display-available (reason=${avail.reason}). ` +
+        "STAGING_DISPLAY_WORKSPACE_ID must point at a workspace with " +
+        "compute.display.mode == 'desktop-control' AND a live instance_id.",
+    ).toBe(true);
+
+    // 1. Acquire display control. The handler returns session_url +
+    //    expires_at; session_url embeds the signed token in its #token=
+    //    fragment (workspace_display_control.go:signedDisplaySessionURL).
+    const acquireResp = await page.request.post(
+      `${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
+      { data: { controller: "user", ttl_seconds: 300 } },
+    );
+    expect(
+      acquireResp.status(),
+      `acquire should be 200; body: ${await acquireResp.text()}`,
+    ).toBe(200);
+    const acquire = await acquireResp.json();
+    expect(acquire.controller, "controller should be 'user'").toBe("user");
+    expect(
+      typeof acquire.session_url,
+      `acquire response missing session_url: ${JSON.stringify(acquire)}`,
+    ).toBe("string");
+
+    // The token rides in the URL fragment (#token=...), never as a query
+    // param — confirm the contract the client (DisplayTab.tsx:459-466)
+    // depends on so a server-side change to the URL shape fails HERE.
+    const sessionUrl: string = acquire.session_url;
+    expect(
+      sessionUrl,
+      `session_url should carry the token in a #token= fragment: ${sessionUrl}`,
+    ).toContain("#token=");
+
+    // 2. Open the REAL noVNC WebSocket from inside the page, so the browser
+    //    sends Origin: <tenant> and the same-origin-canvas AdminAuth path
+    //    accepts the upgrade (a browser WS can't set Authorization). We
+    //    navigate to the tenant origin first purely to anchor the Origin
+    //    header; we don't need the canvas bundle to hydrate.
+    await page.goto(tenantURL, { waitUntil: "domcontentloaded" });
+
+    // Reproduce DisplayTab.tsx:459-466 (displayWebSocketConnection): resolve
+    // session_url against the tenant origin, pull the token out of the
+    // fragment, strip the fragment, switch http(s)->ws(s). Then connect with
+    // the exact subprotocols the canvas uses (DisplayTab.tsx:339).
+    const result = await page.evaluate(
+      async ({ rawSessionUrl, upgradeTimeoutMs, frameTimeoutMs }) => {
+        const u = new URL(rawSessionUrl, window.location.href);
+        const token =
+          new URLSearchParams(u.hash.replace(/^#/, "")).get("token") ?? "";
+        if (!token) {
+          return { ok: false, stage: "token-parse", detail: "no #token in session_url" };
+        }
+        u.hash = "";
+        u.protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
+        const wsUrl = u.toString();
+
+        return await new Promise<{
+          ok: boolean;
+          stage: string;
+          detail: string;
+          frameBytes?: number;
+          frameKind?: string;
+          closeCode?: number;
+        }>((resolve) => {
+          let upgraded = false;
+          let settled = false;
+          const finish = (r: {
+            ok: boolean;
+            stage: string;
+            detail: string;
+            frameBytes?: number;
+            frameKind?: string;
+            closeCode?: number;
+          }) => {
+            if (settled) return;
+            settled = true;
+            try {
+              ws.close();
+            } catch {
+              /* ignore */
+            }
+            resolve(r);
+          };
+
+          let ws: WebSocket;
+          try {
+            ws = new WebSocket(wsUrl, [`binary`, `molecule-display-token.${token}`]);
+          } catch (e) {
+            resolve({ ok: false, stage: "construct", detail: String(e) });
+            return;
+          }
+          ws.binaryType = "arraybuffer";
+
+          const upgradeTimer = setTimeout(() => {
+            finish({
+              ok: false,
+              stage: "upgrade-timeout",
+              detail: `WS did not open within ${upgradeTimeoutMs}ms (readyState=${ws.readyState})`,
+            });
+          }, upgradeTimeoutMs);
+
+          let frameTimer: ReturnType<typeof setTimeout> | null = null;
+
+          ws.onopen = () => {
+            upgraded = true;
+            clearTimeout(upgradeTimer);
+            // Now wait for the first server message. RFB's ProtocolVersion
+            // banner is the first thing x11vnc sends; if nothing arrives the
+            // tunnel opened but the VNC server behind it is dead.
+            frameTimer = setTimeout(() => {
+              finish({
+                ok: false,
+                stage: "frame-timeout",
+                detail: `WS upgraded but no framebuffer message within ${frameTimeoutMs}ms`,
+              });
+            }, frameTimeoutMs);
+          };
+
+          ws.onmessage = (ev) => {
+            if (frameTimer) clearTimeout(frameTimer);
+            let bytes = 0;
+            let kind: string = typeof ev.data;
+            if (ev.data instanceof ArrayBuffer) {
+              bytes = ev.data.byteLength;
+              kind = "ArrayBuffer";
+            } else if (typeof Blob !== "undefined" && ev.data instanceof Blob) {
+              bytes = ev.data.size;
+              kind = "Blob";
+            } else if (typeof ev.data === "string") {
+              bytes = ev.data.length;
+              kind = "string";
+            }
+            finish({
+              ok: bytes > 0,
+              stage: "frame",
+              detail:
+                bytes > 0
+                  ? "received framebuffer message"
+                  : "first message was empty",
+              frameBytes: bytes,
+              frameKind: kind,
+            });
+          };
+
+          ws.onclose = (ev) => {
+            // A close BEFORE open === failed upgrade (1006 abnormal / 403
+            // forbidden surface here). A close AFTER we already saw a frame is
+            // benign (our own finish() triggered it).
+            if (!upgraded) {
+              clearTimeout(upgradeTimer);
+              finish({
+                ok: false,
+                stage: "upgrade-close",
+                detail: `WS closed before upgrade (code=${ev.code}, reason="${ev.reason}") — handshake rejected somewhere in edge → ws-proxy → EIC → websockify → x11vnc`,
+                closeCode: ev.code,
+              });
+            }
+          };
+
+          ws.onerror = () => {
+            if (!upgraded) {
+              clearTimeout(upgradeTimer);
+              finish({
+                ok: false,
+                stage: "upgrade-error",
+                detail: "WS error before upgrade — proxy chain rejected the handshake",
+              });
+            }
+          };
+        });
+      },
+      {
+        rawSessionUrl: sessionUrl,
+        upgradeTimeoutMs: WS_UPGRADE_TIMEOUT_MS,
+        frameTimeoutMs: FIRST_FRAME_TIMEOUT_MS,
+      },
+    );
+
+    // 3. Assert the real outcome. No "flaky" escape hatch: each failure stage
+    //    names the broken hop so a reviewer can act on it directly.
+    expect(
+      result.ok,
+      `take-control failed at stage="${result.stage}": ${result.detail}` +
+        (result.closeCode ? ` (close code ${result.closeCode})` : ""),
+    ).toBe(true);
+    expect(
+      result.stage,
+      `expected to reach the 'frame' stage; got '${result.stage}' (${result.detail})`,
+    ).toBe("frame");
+    expect(
+      result.frameBytes ?? 0,
+      `framebuffer message should be non-empty (kind=${result.frameKind})`,
+    ).toBeGreaterThan(0);
+  });
+});
@@ -337,13 +337,99 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {

  // 7. Hand state off to tests + teardown — overwrite the slug-only
  // bootstrap state with the full state spec tests need.
-  writeFileSync(
-    stateFile,
-    JSON.stringify({ slug, tenantURL, workspaceId, tenantToken }, null, 2),
-  );
+  //
+  // FAIL-CLOSED handoff: every field the spec reads must be non-empty. If
+  // any is missing here, the spec's env-presence guard would throw with a
+  // generic "did setup run?" message that hides WHICH field was lost. Catch
+  // it at the source — a partial provision must hard-fail setup, never hand
+  // off a half-built state that the spec then has to diagnose (or worse,
+  // skip). This is the loud, fail-closed contract: STAGING was requested,
+  // so an incomplete provision is an error, not a skip.
+  const handoff = { slug, tenantURL, workspaceId, tenantToken };
+  const missingFields = Object.entries(handoff)
+    .filter(([, v]) => !v)
+    .map(([k]) => k);
+  if (missingFields.length > 0) {
+    throw new Error(
+      `[staging-setup] provision incomplete — empty handoff field(s): ` +
+        `${missingFields.join(", ")}. Refusing to hand off a partial state ` +
+        `that would surface downstream as an opaque spec failure.`,
+    );
+  }
+  writeFileSync(stateFile, JSON.stringify(handoff, null, 2));
  process.env.STAGING_SLUG = slug;
  process.env.STAGING_TENANT_URL = tenantURL;
  process.env.STAGING_WORKSPACE_ID = workspaceId;
  process.env.STAGING_TENANT_TOKEN = tenantToken;
+  // The ephemeral org's UUID — exported so specs that route through the CP
+  // edge can send X-Molecule-Org-Id (workspace-server TenantGuard). The tabs
+  // harness hits the tenant box same-origin and doesn't need it, but the
+  // take-control gate (staging-display.spec.ts) does.
+  process.env.STAGING_ORG_ID = orgID;
  console.log(`[staging-setup] Ready — ${stateFile}`);
+
+  // 8. (core#2261 Gap 1) Resolve the STANDING desktop-capable org, if one is
+  // configured, for the live take-control e2e (staging-display.spec.ts).
+  //
+  // This block is FULLY env-gated and additive: it provisions NOTHING and is
+  // a no-op unless STAGING_DISPLAY_SLUG is set. We deliberately do NOT spin a
+  // desktop workspace inside this shared setup — a desktop AMI boots in
+  // ~12-15 min and would tax every tabs run. Instead an operator stands up
+  // one always-on desktop org once (a CTO cost item) and points
+  // STAGING_DISPLAY_SLUG + STAGING_DISPLAY_WORKSPACE_ID at it. Here we just
+  // resolve that standing org's tenant URL, admin token, and org id so the
+  // display spec can reach it. Fail-closed: if STAGING_DISPLAY_SLUG is set but
+  // we can't resolve its token/id, we THROW — the gate must never silently
+  // fall back to the (non-desktop) ephemeral org and pass.
+  const displaySlug = process.env.STAGING_DISPLAY_SLUG;
+  if (displaySlug) {
+    console.log(`[staging-setup] Resolving standing desktop org: ${displaySlug}`);
+
+    // org id for the standing slug (admin-orgs row carries it + status).
+    const orgsRes = await jsonFetch(`${CP_URL}/cp/admin/orgs`, { headers: adminAuth });
+    if (orgsRes.status !== 200) {
+      throw new Error(
+        `STAGING_DISPLAY_SLUG=${displaySlug} set, but GET /cp/admin/orgs returned ` +
+          `${orgsRes.status} — cannot resolve the standing desktop org for the ` +
+          `take-control gate.`,
+      );
+    }
+    const displayRow = (orgsRes.body?.orgs || []).find(
+      (o: any) => o.slug === displaySlug,
+    );
+    if (!displayRow?.id) {
+      throw new Error(
+        `STAGING_DISPLAY_SLUG=${displaySlug} not found in /cp/admin/orgs — the ` +
+          `standing desktop org for the take-control gate does not exist. Provision ` +
+          `it (one always-on desktop EC2) or unset STAGING_DISPLAY_SLUG/` +
+          `STAGING_DISPLAY_WORKSPACE_ID to skip the gate.`,
+      );
+    }
+    if (displayRow.instance_status !== "running") {
+      throw new Error(
+        `Standing desktop org ${displaySlug} is '${displayRow.instance_status}', ` +
+          `not 'running' — the take-control gate needs a live desktop tenant. ` +
+          `full row: ${JSON.stringify(displayRow)}`,
+      );
+    }
+
+    const displayTokRes = await jsonFetch(
+      `${CP_URL}/cp/admin/orgs/${displaySlug}/admin-token`,
+      { headers: adminAuth },
+    );
+    if (displayTokRes.status !== 200 || !displayTokRes.body?.admin_token) {
+      throw new Error(
+        `admin-token fetch for standing desktop org ${displaySlug} returned ` +
+          `${displayTokRes.status}: ${JSON.stringify(displayTokRes.body)}`,
+      );
+    }
+
+    process.env.STAGING_DISPLAY_ORG_ID = displayRow.id;
+    process.env.STAGING_DISPLAY_TENANT_URL = `https://${displaySlug}.${TENANT_DOMAIN}`;
+    process.env.STAGING_DISPLAY_TENANT_TOKEN = displayTokRes.body.admin_token;
+    console.log(
+      `[staging-setup] Standing desktop org resolved: ${displaySlug} ` +
+        `(org_id=${displayRow.id}, url=${process.env.STAGING_DISPLAY_TENANT_URL})`,
+    );
+  }
 }
@@ -1,7 +1,8 @@
 /**
- * Staging canvas E2E — opens each of the 13 workspace-panel tabs against a
- * fresh staging org provisioned in the global setup. Asserts each tab
- * renders without throwing and captures a screenshot for visual review.
+ * Staging canvas E2E — opens each workspace-panel tab against a fresh
+ * staging org provisioned in the global setup. Asserts each tab renders
+ * REAL content (not an empty container, not an error state) and captures a
+ * screenshot for visual review.
 *
 * Auth model: the tenant platform's AdminAuth middleware accepts a bearer
 * token OR a WorkOS session cookie. Playwright can't mint a WorkOS
@@ -10,17 +11,39 @@
 * Bearer header via context.setExtraHTTPHeaders(). Every browser
 * request inherits the header.
 *
- * Known SaaS gaps — documented in #1369 and allowed to render errored
- * content without failing the test (the gate is "no hard crash, no
- * 'Failed to load' toast"):
+ * PROMOTION-READINESS (see § at bottom of file): this suite is being
+ * hardened toward becoming a HARD merge-gate. It currently runs under
+ * `continue-on-error: true` (RFC internal#219 §1, non-gating) — that is a
+ * deliberate, CTO-owned call and is NOT changed here. The hardening makes
+ * every assertion deterministic so that WHEN promotion happens the gate
+ * does not flap. See the PROMOTION-READINESS block at the foot of this
+ * file for what is now reliable and what still blocks promotion.
+ *
+ * Known SaaS gaps — documented in #1369. These tabs legitimately cannot
+ * load real content in SaaS mode and are allowed an in-panel empty/error
+ * state (NOT a hard crash, NOT an ErrorBoundary):
 *   - Files tab: empty (platform can't docker exec into a remote EC2)
 *   - Terminal tab: WS connect fails
 *   - Peers tab: 401 without workspace-scoped token
+ * These are enumerated in KNOWN_DEGRADED_TABS below and asserted with a
+ * weaker (but still non-trivial) contract: the panel renders and does not
+ * crash the app. Every OTHER tab must render real content.
 */

-import { test, expect } from "@playwright/test";
+import { test, expect, type Page } from "@playwright/test";

 // Tab ids as declared in canvas/src/components/SidePanel.tsx TABS.
+//
+// NOTE (drift guard): this list is asserted-complete against the live DOM
+// below (see "tab list parity" step) so it cannot silently drift out of
+// sync with SidePanel.tsx TABS the way a hand-maintained constant does.
+// `display` and `container-config` are intentionally EXCLUDED here:
+//   - `display` is owned by the in-flight take-control e2e (PR #2275 /
+//     staging-display.spec.ts); asserting it here would collide.
+//   - `container-config` only renders when selectedNodeId is set AND is
+//     gated on tier; it is covered by container-config-specific specs.
+// The parity check accounts for these via EXPECTED_EXTRA_TABS so a NEW
+// tab appearing in SidePanel still trips the guard.
 const TAB_IDS = [
  "chat",
  "activity",
@@ -37,12 +60,131 @@ const TAB_IDS = [
  "audit",
 ] as const;

+// Tabs present in the DOM that this spec intentionally does not drive.
+// Keeping this explicit means a genuinely-new tab (not one of these) makes
+// the parity assertion fail LOUD instead of being silently un-tested.
+const EXPECTED_EXTRA_TABS = ["display", "container-config"] as const;
+
+// Tabs that are KNOWN to degrade in SaaS mode (#1369). They get the weaker
+// "renders + no crash" contract instead of the "real content" contract.
+// Anything NOT in this set must render real content or the test fails.
+const KNOWN_DEGRADED_TABS = new Set<string>(["terminal", "files"]);
+
 const STAGING = process.env.CANVAS_E2E_STAGING === "1";

-test.skip(!STAGING, "CANVAS_E2E_STAGING not set — skipping staging-only tests");
+// IMPORTANT — fail-closed, not skip-green.
+//
+// `test.skip(!STAGING)` is correct ONLY when the operator never asked for a
+// staging run (CANVAS_E2E_STAGING unset). In that case the workflow's
+// detect-changes / token-check gates have already decided not to exercise
+// staging, and skipping is the documented contract.
+//
+// But if STAGING *is* requested (CANVAS_E2E_STAGING=1) and global setup did
+// NOT hand off the tenant state, that is a HARD failure, not a skip — see
+// the explicit env-presence throw inside the test body. A silent skip there
+// would let a broken provision ship green, which is exactly the
+// weak-gate failure this hardening removes (§ No flakes / internal#828).
+test.skip(!STAGING, "CANVAS_E2E_STAGING not set — staging-only suite, not requested");
+
+/**
+ * Assert the panel for `tabId` rendered real content.
+ *
+ * Deterministic contract (no fixed waits — every step is condition-based
+ * with Playwright's built-in retry / expect.poll):
+ *   1. The tabpanel container is visible.
+ *   2. The global ErrorBoundary did NOT trip ("Something went wrong").
+ *   3. No visible error alert is shown in the panel.
+ *   4. For non-degraded tabs: the panel settles to non-empty,
+ *      non-spinner content (so an empty <div/> or a stuck "Loading…"
+ *      spinner FAILS instead of passing as it did before).
+ */
+async function assertPanelRendered(page: Page, tabId: string): Promise<void> {
+  const panel = page.locator(`#panel-${tabId}`);
+
+  // (1) Container visible. Built-in retry up to the expect timeout — no
+  // arbitrary waitForTimeout. Mechanism: replaces any reliance on a fixed
+  // settle delay with a real visibility condition.
+  await expect(panel, `panel for ${tabId} never became visible`).toBeVisible({
+    timeout: 10_000,
+  });
+
+  // (2) ErrorBoundary trip = hard crash anywhere in the React subtree.
+  // canvas/src/components/ErrorBoundary.tsx renders "Something went wrong".
+  // The OLD gate only looked for a "Failed to load" toast and would ship
+  // an ErrorBoundary-crashed panel GREEN. Mechanism: assert the crash
+  // surface is absent, retried via expect.poll so a late-mounting crash
+  // banner is still caught.
+  await expect
+    .poll(
+      async () =>
+        page.getByText("Something went wrong", { exact: false }).count(),
+      {
+        message: `tab ${tabId}: ErrorBoundary tripped (Something went wrong)`,
+        timeout: 5_000,
+      },
+    )
+    .toBe(0);
+
+  // (3) No visible error alert inside the panel. Tabs surface load errors
+  // as role="alert" with the real error text (EventsTab/ChannelsTab/
+  // ConfigTab/...). The OLD gate matched ONLY [role=alert]:has-text("Failed
+  // to load") — it missed (a) error messages that don't contain that exact
+  // phrase and (b) error divs that omit role="alert" entirely (e.g.
+  // ActivityTab). We replace it with a broader, but still SaaS-gap-aware,
+  // check: any *visible* alert OR red error banner inside the panel.
+  //
+  // Degraded tabs (#1369) are allowed an error state — for those we only
+  // require no app-level crash (covered by step 2). For every other tab a
+  // visible error alert is a real regression.
+  if (!KNOWN_DEGRADED_TABS.has(tabId)) {
+    const visibleAlerts = panel.locator('[role="alert"]:visible');
+    await expect
+      .poll(async () => visibleAlerts.count(), {
+        message:
+          `tab ${tabId}: a visible error alert is shown in the panel ` +
+          `(was a weak "Failed to load"-only check before)`,
+        timeout: 5_000,
+      })
+      .toBe(0);
+  }
+
+  // (4) Real content. The tabpanel CONTAINER always mounts, so the old
+  // toBeVisible() on the container passed even when the child rendered
+  // nothing. Assert the panel's trimmed innerText is non-empty AND not
+  // stuck on a loading spinner. expect.poll retries until the async
+  // fetch+render settles — replacing the implicit "the network finished
+  // by now" timing assumption with an explicit polled condition.
+  //
+  // Degraded tabs may legitimately be empty (Files in SaaS mode), so they
+  // are exempt from the non-empty requirement; step 2 still guards them
+  // against a hard crash.
+  if (!KNOWN_DEGRADED_TABS.has(tabId)) {
+    await expect
+      .poll(
+        async () => {
+          const text = ((await panel.innerText()) || "").trim();
+          // A panel still showing only a loading spinner has not settled.
+          const stillLoading = /^(loading\b|loading…|loading\.\.\.)/i.test(
+            text,
+          );
+          return text.length > 0 && !stillLoading;
+        },
+        {
+          message:
+            `tab ${tabId}: panel rendered empty or stuck on a loading ` +
+            `spinner — no real content settled (weak "container visible" ` +
+            `gate would have passed this)`,
+          // Generous: real tabs fetch from the tenant over the network.
+          // Polled, so it returns as soon as content appears.
+          timeout: 20_000,
+        },
+      )
+      .toBe(true);
+  }
+}

 test.describe("staging canvas tabs", () => {
-  test("each workspace-panel tab renders without error", async ({
+  test("each workspace-panel tab renders real content", async ({
    page,
    context,
  }) => {
@@ -50,9 +192,16 @@ test.describe("staging canvas tabs", () => {
    const tenantToken = process.env.STAGING_TENANT_TOKEN;
    const workspaceId = process.env.STAGING_WORKSPACE_ID;

+    // FAIL-CLOSED (not skip): STAGING was requested but global setup did
+    // not export tenant state. A silent skip here would paint a broken
+    // provision GREEN. This is the loud-fail the hardening mandates.
    if (!tenantURL || !tenantToken || !workspaceId) {
      throw new Error(
-        "staging-setup.ts did not export STAGING_TENANT_URL / STAGING_TENANT_TOKEN / STAGING_WORKSPACE_ID — did global setup run?",
+        "staging-setup.ts did not export STAGING_TENANT_URL / " +
+          "STAGING_TENANT_TOKEN / STAGING_WORKSPACE_ID. CANVAS_E2E_STAGING=1 " +
+          "was set (staging WAS requested) but global setup produced no " +
+          "tenant — this is a provisioning failure, NOT a reason to skip. " +
+          "Check the [staging-setup] log above for the real error.",
      );
    }

@@ -152,11 +301,19 @@ test.describe("staging canvas tabs", () => {
    // omit the URL, so we'd otherwise be flying blind. Logged to the
    // test's stdout (visible in the workflow log under the failed step).
    page.on("requestfailed", (req) => {
-      console.log(`[e2e/requestfailed] ${req.method()} ${req.url()}: ${req.failure()?.errorText ?? "?"}`);
+      console.log(
+        `[e2e/requestfailed] ${req.method()} ${req.url()}: ${
+          req.failure()?.errorText ?? "?"
+        }`,
+      );
    });
    page.on("response", (res) => {
      if (res.status() >= 400) {
-        console.log(`[e2e/response-${res.status()}] ${res.request().method()} ${res.url()}`);
+        console.log(
+          `[e2e/response-${res.status()}] ${res
+            .request()
+            .method()} ${res.url()}`,
+        );
      }
    });

@@ -173,9 +330,8 @@ test.describe("staging canvas tabs", () => {
    // hydrated, even with zero workspaces) or the hydration-error
    // banner — whichever wins first. Previous version of this wait
    // used `[role="tablist"]`, but that selector only appears AFTER
-    // a workspace node is clicked (which happens below at L100), so
-    // the wait would always time out at 45s before any meaningful
-    // failure surfaced.
+    // a workspace node is clicked, so the wait would always time out
+    // at 45s before any meaningful failure surfaced.
    await page.waitForSelector(
      '[aria-label="Molecule AI workspace canvas"], [data-testid="hydration-error"]',
      { timeout: 45_000 },
@@ -189,10 +345,20 @@ test.describe("staging canvas tabs", () => {
      "canvas hydration failed — check staging CP + tenant reachability",
    ).toBe(0);

+    // The global ErrorBoundary must not have tripped at the app root
+    // either — a crash before the side panel even opens would otherwise
+    // be invisible until a tab assertion happened to notice it.
+    await expect(
+      page.getByText("Something went wrong", { exact: false }),
+      "app-level ErrorBoundary tripped during hydration",
+    ).toHaveCount(0);
+
    // Click the workspace node to open the side panel. Try a data
    // attribute first, fall back to a generic role-based selector so
    // the test doesn't break when the node-card markup changes.
-    const byDataAttr = page.locator(`[data-workspace-id="${workspaceId}"]`).first();
+    const byDataAttr = page
+      .locator(`[data-workspace-id="${workspaceId}"]`)
+      .first();
    if ((await byDataAttr.count()) > 0) {
      await byDataAttr.click({ timeout: 10_000 });
    } else {
@@ -202,19 +368,56 @@ test.describe("staging canvas tabs", () => {
      await firstNode.click({ timeout: 10_000 });
    }

-    await page.waitForSelector('[role="tablist"]', { timeout: 15_000 });
+    // The tablist appears once the side panel mounts. Condition-based
+    // wait — no fixed delay.
+    const tablist = page.locator('[role="tablist"]');
+    await expect(
+      tablist,
+      "side panel tablist never appeared after clicking the workspace node",
+    ).toBeVisible({ timeout: 15_000 });
+
+    // Tab-list parity guard. The hand-maintained TAB_IDS constant used to
+    // be able to drift silently out of sync with SidePanel.tsx TABS — a
+    // tab could be added to the UI and never get an assertion, shipping
+    // broken-but-untested. Read the actual tab ids from the DOM and assert
+    // every live tab is either driven by this spec (TAB_IDS) or explicitly
+    // excluded (EXPECTED_EXTRA_TABS). A genuinely-new tab fails LOUD.
+    const liveTabIds = (
+      await tablist.locator('[role="tab"][id^="tab-"]').evaluateAll((els) =>
+        els.map((el) => el.id.replace(/^tab-/, "")),
+      )
+    ).sort();
+    const accountedFor = new Set<string>([
+      ...TAB_IDS,
+      ...EXPECTED_EXTRA_TABS,
+    ]);
+    const unaccounted = liveTabIds.filter((id) => !accountedFor.has(id));
+    expect(
+      unaccounted,
+      `SidePanel exposes tab(s) this spec neither drives nor excludes: ` +
+        `${unaccounted.join(", ")}. Add them to TAB_IDS (and assert their ` +
+        `content) or to EXPECTED_EXTRA_TABS with a reason.`,
+    ).toHaveLength(0);
+    // And the inverse: every TAB_ID we intend to drive must actually exist
+    // in the DOM, so a renamed/removed tab fails here instead of timing out
+    // on a missing #tab-<id> selector with an opaque message.
+    const missing = TAB_IDS.filter((id) => !liveTabIds.includes(id));
+    expect(
+      missing,
+      `TAB_IDS references tab(s) not present in SidePanel: ${missing.join(
+        ", ",
+      )} — the spec's tab list has drifted from SidePanel.tsx TABS.`,
+    ).toHaveLength(0);

    for (const tabId of TAB_IDS) {
      await test.step(`tab: ${tabId}`, async () => {
        const tabButton = page.locator(`#tab-${tabId}`);
-        // The TABS bar is `overflow-x-auto` (SidePanel.tsx:~tabs
-        // wrapper) — tabs after position ~3 are clipped behind the
-        // right-edge fade gradient on smaller viewports. Playwright's
-        // `toBeVisible()` returns false for clipped elements, so a
-        // bare visibility check fails on `skills` and later tabs in
-        // CI. scrollIntoViewIfNeeded brings the button into view
-        // before the visibility check, mirroring what SidePanel's own
-        // keyboard handler does on arrow-key navigation.
+        // The TABS bar is `overflow-x-auto` — tabs past position ~3 are
+        // clipped behind the right-edge fade gradient on smaller
+        // viewports. Playwright's toBeVisible() returns false for clipped
+        // elements, so a bare visibility check fails on later tabs in CI.
+        // scrollIntoViewIfNeeded brings the button into view before the
+        // visibility check.
        await tabButton.scrollIntoViewIfNeeded({ timeout: 5_000 });
        await expect(
          tabButton,
@@ -222,18 +425,34 @@ test.describe("staging canvas tabs", () => {
        ).toBeVisible({ timeout: 5_000 });
        await tabButton.click();

-        const panel = page.locator(`#panel-${tabId}`);
-        await expect(panel, `panel for ${tabId} never rendered`).toBeVisible({
-          timeout: 10_000,
-        });
+        // Confirm the click actually activated this tab before asserting
+        // its content — aria-selected flips on the active tab. This closes
+        // a race where a slow click handler left the PREVIOUS tab's panel
+        // mounted and we asserted the wrong panel's content. Built-in
+        // retry, condition-based, no fixed wait.
+        await expect(
+          tabButton,
+          `tab-${tabId} did not become the selected tab after click`,
+        ).toHaveAttribute("aria-selected", "true", { timeout: 5_000 });

-        // "Failed to load" toast = hard crash. Known SaaS-mode gaps
-        // (Files empty, Terminal disconnected, Peers 401) surface as
-        // in-panel content, not toasts.
+        // Real-content assertion (the core hardening). See
+        // assertPanelRendered: container visible + no ErrorBoundary + no
+        // visible error alert + settled non-empty content for non-degraded
+        // tabs. Replaces the old "panel visible + no Failed-to-load toast"
+        // pair, which shipped empty/errored panels green.
+        await assertPanelRendered(page, tabId);
+
+        // Belt to the braces: the original toast check stays. A global
+        // "Failed to load" toast (role=alert outside the panel) is still a
+        // crash signal worth catching even though the in-panel checks above
+        // now do the heavy lifting.
        const errorToasts = await page
          .locator('[role="alert"]:has-text("Failed to load")')
          .count();
-        expect(errorToasts, `tab ${tabId}: "Failed to load" toast`).toBe(0);
+        expect(
+          errorToasts,
+          `tab ${tabId}: a global "Failed to load" toast is showing`,
+        ).toBe(0);

        await page.screenshot({
          path: `test-results/staging-tab-${tabId}.png`,
@@ -267,3 +486,56 @@ test.describe("staging canvas tabs", () => {
    ).toHaveLength(0);
  });
 });
+
+/*
+ * PROMOTION-READINESS — staging canvas E2E → HARD merge-gate
+ * ----------------------------------------------------------
+ * NOW RELIABLE (deterministic; these no longer flap on timing):
+ *   - Every wait is condition-based (toBeVisible / toHaveAttribute /
+ *     expect.poll). There is NO fixed waitForTimeout / sleep in the spec;
+ *     the only setTimeout is the bounded poll-interval inside
+ *     staging-setup.ts waitFor(), which has a hard deadline.
+ *   - Tabs are asserted on REAL settled content (non-empty, non-spinner),
+ *     not just "container is visible" — an empty or stuck-loading panel now
+ *     fails instead of shipping green.
+ *   - The ErrorBoundary ("Something went wrong") is asserted absent at app
+ *     hydration AND per tab — a React subtree crash can no longer pass.
+ *   - Visible error alerts inside a panel fail non-degraded tabs (was a
+ *     weak [role=alert]:has-text("Failed to load")-only check that missed
+ *     both other error phrasings and role-less error divs).
+ *   - The driven tab list is parity-checked against the live DOM, so a new
+ *     SidePanel tab can't ship un-tested and a removed one fails loud.
+ *   - Click→activation is confirmed (aria-selected) before asserting the
+ *     panel, removing a wrong-panel race.
+ *   - The suite is fail-closed: CANVAS_E2E_STAGING=1 with no tenant state
+ *     hard-errors (never skips→green); CANVAS_E2E_STAGING unset cleanly
+ *     skips (operator did not request staging).
+ *
+ * STILL BLOCKS PROMOTION-TO-REQUIRED (do NOT flip continue-on-error here —
+ * CTO-owned, RFC internal#219 §1):
+ *   - INFRA DEPENDENCY: each run provisions a real staging EC2 tenant
+ *     (12-20 min cold boot). Required-gate latency + AWS/Cloudflare/CP
+ *     availability become merge-blockers. A staging outage would freeze
+ *     main even though the code is fine — unacceptable for a required check
+ *     until staging has an SLA or this runs against a warm pre-provisioned
+ *     pool.
+ *   - SHARED-RESOURCE FLAKE SURFACE: TLS/DNS/ACME propagation on a shared
+ *     staging zone (staging-setup TLS_TIMEOUT_MS) is outside this repo's
+ *     control. Deterministic here ≠ deterministic upstream.
+ *   - SECRET DEPENDENCY: CP_STAGING_ADMIN_API_TOKEN must be present on the
+ *     runner. The workflow's skip-if-absent (core#2225) keeps a missing
+ *     secret from painting red — correct for non-gating, but a REQUIRED
+ *     check must instead guarantee the secret is always present, else it
+ *     skip-greens the very thing it is supposed to enforce.
+ *   - SINGLE-WORKSPACE COVERAGE: one hermes/platform_managed workspace that
+ *     does NOT boot an agent on staging (no CP LLM proxy env, workspace-
+ *     server #2162). Tabs render, but agent-dependent content paths (live
+ *     chat round-trip, traces from a real run) are not exercised.
+ *
+ * PROMOTION CHECKLIST (when CTO signs off on making this required):
+ *   1. Warm pre-provisioned tenant pool OR a staging SLA bounding boot time.
+ *   2. Guarantee CP_STAGING_ADMIN_API_TOKEN on the gating runner; turn the
+ *      skip-if-absent into a hard error for the required path.
+ *   3. Decide whether agent-dependent tabs need a wired LLM proxy on the
+ *      staging tenant (covers chat/traces real content) before gating them.
+ */
@@ -7,6 +7,14 @@ export default defineConfig({
  fullyParallel: false,
  workers: 1,
  retries: 0,
+  // Fail CLOSED when an explicit spec selection matches zero tests.
+  // Playwright defaults this to true, so `playwright test e2e/chat-*.spec.ts`
+  // would exit 0 (green) if those files were renamed/moved/deleted — a
+  // false-green that would silently gut the e2e-chat gate after a refactor.
+  // forbidOnly likewise stops a stray `test.only` from green-ing the suite
+  // while skipping every other case.
+  passWithNoTests: false,
+  forbidOnly: !!process.env.CI,
  use: {
    baseURL: process.env.PLAYWRIGHT_BASE_URL || "http://localhost:3000",
    headless: true,
@@ -0,0 +1,225 @@
+# Fail-closed BYOK billing
+
+**Status:** Proposal — CTO (王泓铭)-refined 2026-06-05.
+Owners: hongming (CTO)
+Base: molecule-core main @ `1955fdd0` (2026-06-04)
+
+This RFC formalizes the **fail-closed BYOK billing** model: the contract that a
+workspace which intends to run an LLM on the tenant's own credential
+(bring-your-own-key) must be **rejected at the create API** if that credential is
+missing or dead — loudly, comprehensively, and synchronously — never created and
+then wedged at provision time, and never silently fell-through to a
+platform-billed default.
+
+It writes down the four hard requirements, audits the current implementation
+against them (two are met today, one partial, one missing), and specifies the
+two gaps to close. The derive-from-model SSOT and the platform proxy boundary are
+**non-goals** here — this RFC is only about closing the credential-validation
+holes around an already-correct billing-mode resolver.
+
+## TL;DR
+
+```
+create API request (runtime, model[, billing override])
+        │
+        ▼
+  derive provider/mode from providers.yaml registry SSOT   ── Req1 MET today
+  (explicit operator-override column = escape hatch)
+        │
+        ├─ mode == platform_managed ──────────────► create OK (proxy bills)
+        │
+        └─ mode == BYOK
+              │
+              ├─ GAP A: credential PRESENT for the derived provider?
+              │         (no → 422 MISSING_BYOK_CREDENTIAL, synchronous, loud)
+              │
+              ├─ GAP B: credential VALID? (cheap authed provider call;
+              │         401/403 → 422 INVALID_BYOK_CREDENTIAL, loud)
+              │
+              ▼
+        create OK → provision (re-checks presence as defense-in-depth)
+```
+
+## The model — four hard requirements
+
+1. **Explicit selection drives the adapter.** Provider/mode is *selected*, never
+   guessed. Today the selection is **derived deterministically** from the chosen
+   model via the `providers.yaml` registry SSOT (`DeriveProvider(runtime, model,
+   availableAuthEnv)`); the per-workspace operator-override column is the explicit
+   escape hatch with top precedence. There is no heuristic fallback to a vendor.
+
+2. **BYOK requires the credential, validated AT CREATION, fail-closed.** A
+   BYOK workspace with no usable credential for the derived provider must be
+   **REJECTED at the create API** with a clear, comprehensive error (which
+   credential / env var, which provider, what to do). It must NOT be created
+   (201) and then wedged late at provision.
+
+3. **Preflight-validate the credential is VALID, not just present.** Presence is
+   necessary but not sufficient: a present-but-dead token (revoked, expired,
+   wrong-scope) must be caught by a *cheap authenticated provider call* (a
+   models-list or a 1-token completion) and the workspace rejected on 401/403
+   before it goes live.
+
+4. **Fail LOUD, never silent.** Any missing / invalid / rejected credential
+   errors loudly: comprehensive server logs (provider, env var, code, workspace)
+   plus a user-visible structured reason. It must NEVER silently fall through to
+   `platform_managed` or to any default that bills the platform for what the
+   tenant declared as BYOK.
+
+## Current-state audit
+
+References are `path:line` at base `1955fdd0`. Workspace-server paths are relative
+to `workspace-server/`; the proxy/charge layer lives in the controlplane repo.
+
+### Req1 — Explicit selection drives the adapter — **MET**
+
+- `internal/handlers/llm_billing_mode.go:197-264` — `ResolveLLMBillingModeDerived`:
+  precedence 1 = explicit workspace override column; precedence 2 = derive the
+  provider from `(runtime, model)` via the embedded `providers.yaml` registry
+  (`manifest.DeriveProvider`). A specific non-platform vendor → `byok`; a platform
+  provider → `platform_managed`. No guessing.
+- `internal/handlers/workspace.go:420-503` — create-time validation already
+  hard-rejects (422) an unregistered `(runtime, model)` pair
+  (`UNREGISTERED_MODEL_FOR_RUNTIME`) and a model whose derived provider is absent
+  from the catalog (`DERIVED_PROVIDER_NOT_IN_REGISTRY`), and requires an explicit
+  model (`MODEL_REQUIRED`). The selection input is validated against the SSOT at
+  the boundary.
+
+### Req4 — Fail loud, never silent — **MET**
+
+- Default-closed on ambiguity: `internal/handlers/llm_billing_mode.go:26-39` and
+  `:217-252` — every ambiguous / error / no-id path resolves to
+  `platform_managed` *with the error surfaced* (logged + returned on the
+  resolution struct), never a silent BYOK→platform flip that bills the tenant
+  by surprise.
+- Proxy is platform-managed-only: controlplane `internal/handlers/llm_proxy.go:94,
+  158,223,664-748` — the platform LLM proxy only serves platform-managed traffic;
+  BYOK never routes through it.
+- Charge layer never bills the platform for BYOK: controlplane
+  `internal/credits/llm_billing.go:156-233` — BYOK usage is not charged to the
+  platform ledger.
+
+### Req2 — Credential validated at creation, fail-closed — **PARTIAL**
+
+- The fail-closed BYOK check EXISTS but only at **provision** time:
+  `internal/handlers/workspace_provision_shared.go:225-232` — if
+  `ResolvedMode == BYOK && !HasUsableLLMCred`, the provisioner aborts with
+  `MISSING_BYOK_CREDENTIAL` (molecule-core#1994).
+- Gap: a credential-less BYOK **create** returns **201** and only fails later at
+  provision. That violates Req2's "rejected at the create API, not
+  created-then-wedged" — the user gets a workspace row and a delayed, async
+  failure instead of a synchronous 4xx.
+
+### Req3 — Credential is VALID, not just present — **MISSING**
+
+- `HasUsableLLMCred` is **presence-only**:
+  `internal/handlers/workspace_provision.go:1138-1145` —
+  `hasAnyPlatformManagedLLMKey` returns true if any auth-env key is a non-empty
+  string. There is **no liveness probe anywhere** — a present-but-revoked token
+  passes every gate and the workspace goes live, then wedges at first real LLM
+  call (the failure Req3 exists to pull forward).
+
+## Scope of work — the two gaps
+
+### Gap A (Req2): BYOK credential-presence check at the CREATE boundary
+
+Add a synchronous presence check inside the create handler
+(`(h *WorkspaceHandler) Create`, `internal/handlers/workspace.go:242`), after
+billing-mode resolution and the existing registry validation, **in addition to**
+the provision-time check (keep that as defense-in-depth — do not remove it).
+
+- When the resolved mode is `byok`, resolve the derived provider's accepted auth
+  env-var names from the `providers.yaml` registry (`auth_env` list, e.g.
+  `[ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN]` for `anthropic-api`) and confirm at
+  least one is present (non-empty) for the workspace at any in-scope secret level.
+- On absence: **422** with a structured body:
+  `code: MISSING_BYOK_CREDENTIAL`, plus `provider`, `missing_env` (the candidate
+  env-var names), `billing_mode: byok`, and a human `error` that names the
+  provider, the missing credential, and the remediation ("set
+  `ANTHROPIC_API_KEY` as a workspace or org secret, then retry create"). Reuse the
+  existing `formatMissingBYOKCredentialError` wording where possible so create and
+  provision speak with one voice.
+- Log loudly with the same `MISSING_BYOK_CREDENTIAL` code the provisioner uses, so
+  the two checkpoints are greppable as one class.
+
+### Gap B (Req3): credential LIVENESS preflight
+
+Add a minimal authenticated probe per provider, driven entirely by the
+`providers.yaml` SSOT — no hardcoded endpoints.
+
+- Derive the probe target from the registry entry: `protocol`/`auth_mode`,
+  `base_url_template` or `base_url_anthropic`, and the `auth_env` /
+  `auth_token_env` that carries the secret. Make the cheapest authenticated call
+  the surface offers (models-list where available, else a 1-token completion).
+- Fail-closed on **401/403**: reject the create with **422**
+  `code: INVALID_BYOK_CREDENTIAL` (provider, env var, upstream status, remediation
+  "the credential was found but the provider rejected it — rotate the key").
+- **Recommendation: probe at create** for fast feedback, with a **provision-time
+  re-check** (the credential can be revoked between create and provision; the
+  provisioner is the last gate before the workspace is live). The provision
+  re-check upgrades `workspace_provision_shared.go:225-232` from presence-only to
+  presence-and-liveness for BYOK.
+- The probe **must be cheap and time-bounded** (see Risks).
+- **OAuth-provider nuance:** registry entries with `auth_mode: oauth` and
+  `base_url: null` (e.g. `anthropic-oauth`, codex chatgpt-subscription) have no
+  HTTP surface the platform dials — the CLI talks to the vendor directly. For
+  these, the liveness probe has no cheap server-side equivalent; scope Gap B's
+  *active* probe to keyed providers with a non-null base URL and fall back to the
+  presence check (Gap A) for OAuth modes. Do not block on inventing an OAuth
+  liveness call in this RFC.
+
+## Non-goals
+
+- **Not** changing the derive-from-model SSOT. Selection stays
+  `providers.yaml` → `DeriveProvider`; the operator-override column stays the only
+  escape hatch. No new heuristics.
+- **Not** routing BYOK through the platform proxy. The proxy stays
+  platform-managed-only; this RFC adds validation around BYOK, it does not move
+  BYOK onto a platform code path.
+- **Not** re-billing or changing the charge layer. BYOK stays off the platform
+  ledger.
+- **Not** adding an OAuth-subscription liveness call (deferred — see Gap B
+  nuance).
+
+## Risks
+
+- **Preflight latency on create.** An authenticated provider round-trip adds
+  hundreds of ms to a few seconds to create. Mitigate with a hard, short timeout
+  (target ≤ ~3s) and a clear, distinct error on timeout — a probe timeout must
+  NOT be treated as "valid" (fail-closed) but must also be distinguishable from a
+  real 401/403 so transient upstream blips are diagnosable. Consider whether a
+  probe timeout should 422 (strict fail-closed) or surface a soft warning and
+  defer to the provision-time re-check; default to fail-closed at create for the
+  loud-feedback goal, with the provision re-check as the safety net.
+- **Provider rate-limits.** A models-list / 1-token probe consumes the tenant's
+  quota and can be rate-limited (429). A 429 is NOT an auth failure — treat it as
+  inconclusive (do not reject as `INVALID_BYOK_CREDENTIAL`), log it, and defer to
+  the presence check + provision-time re-check rather than blocking create on a
+  429.
+- **Provider-side flakiness.** 5xx from the provider is inconclusive, same
+  handling as 429 — never silently pass, never hard-reject on a 5xx; log and
+  defer.
+
+## Test plan
+
+1. **Gap A — create-time presence (unit + handler):**
+   - BYOK-deriving `(runtime, model)` with NO credential in any scope → **422
+     `MISSING_BYOK_CREDENTIAL`**, body names provider + missing env; no workspace
+     row created.
+   - Same with the credential present → create proceeds (mode `byok`).
+   - `platform_managed`-deriving model with no tenant key → create proceeds
+     (unchanged; proxy path).
+2. **Gap B — liveness (unit with a stubbed provider HTTP surface):**
+   - Present-but-401/403 key → **422 `INVALID_BYOK_CREDENTIAL`**.
+   - Valid key → create proceeds.
+   - 429 / 5xx / timeout → inconclusive: create NOT rejected as invalid; logged;
+     provision re-check still runs.
+   - `auth_mode: oauth` + `base_url: null` provider → active probe skipped,
+     presence check governs.
+3. **Provision defense-in-depth (existing + extended):**
+   - Credential revoked between create and provision → provisioner aborts
+     (presence today; liveness re-check after Gap B).
+   - Existing `MISSING_BYOK_CREDENTIAL` provision-abort test stays green.
+4. **Req4 regression guard:** assert no path flips a BYOK selection to
+   `platform_managed` silently — an absent/dead BYOK credential always produces a
+   loud 4xx with a code, never a 201 that bills the platform.
@@ -114,7 +114,7 @@ Opt-in pattern: when `idle_prompt` is non-empty in `config.yaml`, the workspace

 Three Gin middleware classes gate server-side routes. Full contract in `docs/runbooks/admin-auth.md`.

- **`middleware.AdminAuth(db.DB)`** — strict bearer-only. Used for any route where a forged request could leak prompts/memory, create/mutate workspaces, or leak ops intel. Lazy-bootstrap fail-open when `HasAnyLiveTokenGlobal` returns 0.
+- **`middleware.AdminAuth(db.DB)`** — strict bearer-only and **fail-closed in every environment** (harden/no-fail-open-auth). Used for any route where a forged request could leak prompts/memory, create/mutate workspaces, or leak ops intel. The former lazy-bootstrap fail-open (pass when `HasAnyLiveTokenGlobal` returns 0) and the dev-mode escape hatch have both been removed — a fresh install must provision `ADMIN_TOKEN` to reach admin routes.
 - **`middleware.CanvasOrBearer(db.DB)`** — accepts a bearer token OR an Origin matching `CORS_ORIGINS`. Used **only** for cosmetic routes where a forged request has zero data/security impact. Currently only on `PUT /canvas/viewport`. Do not extend this to any route that leaks data or creates resources — see the runbook.
 - **`middleware.WorkspaceAuth(db.DB)`** — binds a bearer token to `:id`. Workspace A's token cannot hit workspace B's sub-routes. Used for the entire `/workspaces/:id/*` group except the A2A proxy (which has its own `CanCommunicate` layer).

@@ -24,7 +24,7 @@ cd molecule-core

 That single script:

-1. Generates an `ADMIN_TOKEN` into `.env` (first run only — preserved on re-runs)
+1. Generates an `ADMIN_TOKEN` into `.env` (first run only — preserved on re-runs) and exports the matching `NEXT_PUBLIC_ADMIN_TOKEN` so the canvas authenticates with it. Auth is **fail-closed in every environment** (including local dev) — there is no dev-mode fail-open; the canvas reaches admin/workspace routes only because it sends this bearer.
 2. Brings up Postgres, Redis, Langfuse, ClickHouse, and Temporal via `infra/scripts/setup.sh`
 3. Populates the workspace template + plugin registry from `manifest.json`
 4. Builds and starts the platform on `http://localhost:8080`
@@ -62,11 +62,17 @@ If you only want the raw compose flow:
 docker compose -f docker-compose.infra.yml up -d
 ```

+> **Auth is fail-closed even in local dev.** Pick any local admin token and
+> set it on *both* sides — the platform (`ADMIN_TOKEN`) and the canvas
+> (`NEXT_PUBLIC_ADMIN_TOKEN`, same value). Without it the canvas 401s on every
+> admin/workspace call. (`scripts/dev-start.sh` does this for you; the manual
+> steps below set it explicitly.)
+
 ### Step 3: Start the platform

 ```bash
 cd workspace-server
-go run ./cmd/server
+ADMIN_TOKEN=dev-local-admin-token MOLECULE_ENV=development go run ./cmd/server
 ```

 The control plane listens on `http://localhost:8080`.
@@ -78,7 +84,7 @@ In a new terminal:
 ```bash
 cd canvas
 npm install
-npm run dev
+NEXT_PUBLIC_ADMIN_TOKEN=dev-local-admin-token npm run dev   # MUST match ADMIN_TOKEN above
 ```

 Open `http://localhost:3000`.
@@ -1,5 +1,29 @@
 # Admin Authentication Runbook

+## Auth is fail-CLOSED in every environment — `ADMIN_TOKEN` is the bootstrap credential
+
+Per the CTO "nothing should be fail-open" directive, **every** auth path on the
+workspace-server fails closed — there is no dev-mode / zero-token / DB-outage
+hatch that grants access. This includes:
+
+- `AdminAuth` and `WorkspaceAuth` (admin + per-workspace routes),
+- `CanvasOrBearer` (the cosmetic `PUT /canvas/viewport` route), and
+- `validateDiscoveryCaller` (`/registry/:id/peers`, `/registry/discover/:id`).
+
+Consequence for **bootstrap**: a brand-new self-hosted / dev install has **no
+DB-backed tokens yet**, and there is no longer a fail-open that lets the first
+request through. The **only** way to reach admin routes (and to mint the first
+workspace token via `POST /admin/workspaces/:id/tokens`) is to set `ADMIN_TOKEN`
+in the platform environment and present it as the bearer. This is the "local
+mimics production" principle: there is no zero-config bootstrap.
+
+- **Local dev:** `scripts/dev-start.sh` provisions a deterministic
+  `ADMIN_TOKEN` into `.env` (and exports the matching `NEXT_PUBLIC_ADMIN_TOKEN`
+  so the canvas authenticates with it). See `docs/quickstart.md`.
+- **Self-hosted / SaaS:** set `ADMIN_TOKEN` to a strong random secret
+  (`openssl rand -base64 32`) in the platform env and bake the matching
+  `NEXT_PUBLIC_ADMIN_TOKEN` into the canvas bundle.
+
 ## Required: set `MOLECULE_ENV` in all non-dev environments

 ```bash
@@ -7,8 +31,10 @@
 MOLECULE_ENV=production
 ```

-This matches the production tenant default and disables development-only
-shortcuts. Staging and production smoke tests should use the real user/API
+This matches the production tenant default. NOTE: `MOLECULE_ENV` no longer gates
+any auth decision — it only drives NON-security local-dev conveniences (loopback
+bind, relaxed rate limit). Setting it to `dev`/`development` does **not** relax
+authentication. Staging and production smoke tests should use the real user/API
 workflow: create a workspace, then mint a one-time displayed workspace bearer
 with `POST /admin/workspaces/:id/tokens`.

@@ -23,5 +49,7 @@ The platform uses `ADMIN_TOKEN` as the bearer credential for admin-gated endpoin
 | `POST /org/import` | `Authorization: Bearer <ADMIN_TOKEN>` |
 | `POST /admin/workspaces/:id/tokens` | `Authorization: Bearer <ADMIN_TOKEN>`; plaintext token returned once |

-Missing or invalid `ADMIN_TOKEN` → AdminAuth fails open in dev mode (no token set), or
-returns 401 in production mode (token set but invalid).
+Missing or invalid bearer → **401 in every environment** (fail-closed; no
+dev-mode fail-open). If the auth datastore is unreachable, auth-gated routes
+return **503** (`platform_unavailable`) — an availability tradeoff that grants no
+access — rather than allowing the request through.
@@ -28,7 +28,9 @@
    {"name": "claude-code-default", "repo": "molecule-ai/molecule-ai-workspace-template-claude-code", "ref": "main"},
    {"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "main"},
    {"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
-    {"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"}
+    {"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
+    {"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "main"},
+    {"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "main"}
  ],
  "org_templates": [
    {"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "main"},
@@ -121,6 +121,92 @@ python -m pytest .gitea/scripts/tests/test_gate_auto_fire_live.py -v

 ---

+## 6. Fail-closed CI integrity — no fail-open gates (MERGE-BLOCKING)
+
+**Rule:** No CI workflow, CI script, or test check may **FAIL OPEN** — i.e. it
+must never report GREEN (exit 0, skip, warn-and-continue, `|| true`, or any
+"return success") when it could **not actually verify its invariant**. A check
+that cannot verify MUST **fail loud** (`::error::` annotation **and** a nonzero
+exit) and **fail closed** (treat inability-to-verify as **FAILURE**, never as a
+pass). An unverifiable check is a red check, full stop.
+
+This is the same family of bug as the no-flakes rule (§ *No flakes*): a green
+that isn't real. A flake is a green/red that flips for an unnamed reason; a
+fail-open gate is a green that was never earned. Both let unverified code reach
+`main`, and both are merge-blocking.
+
+### Applies to
+
+Required / hard gates on **protected contexts**: pushes to `main`, internal
+protected branches, and **same-repo** PRs (`pull_request_target`). On these
+contexts the *cause* of an unverifiable run is **irrelevant** — every one of the
+following MUST fail closed:
+
+- auth failure (401 / 403),
+- missing token or identity,
+- under-scoped credential,
+- unreachable dependency (network, Infisical, control-plane, registry),
+- a required test file that is absent or collects zero tests,
+- any transient error the check cannot prove was benign.
+
+"I couldn't check" is reported and scored exactly like "the check failed." A
+gate that can be silently defanged by removing a secret is not a gate.
+
+### The one allowed exception — explicit trust-boundary split
+
+Legitimate degradation is permitted **only** where the secret genuinely cannot
+exist — e.g. **fork PRs**, which by design have no access to repo secrets. Such
+degradation is allowed **only** when it is:
+
+1. gated behind an **explicit** fork / advisory branch in the workflow logic
+   (an intentional trust-boundary split, not an incidental `if: secrets...`),
+2. **clearly marked advisory** in its name and output, and
+3. **NOT counted as a passing REQUIRED context** — it may inform, it may not
+   satisfy the gate.
+
+Silent degradation that satisfies a required gate is **forbidden**. If a fork PR
+needs the real check, it must run via a maintainer-triggered same-repo path
+(where the secret exists and the check therefore fails closed), not by quietly
+passing the required context with no verification.
+
+### Auth-failure vs. genuine-absence — do not conflate
+
+Distinguish the two so a real finding is never masked and a masked finding is
+never mistaken for real:
+
+- **`403` (or 401) on a protected context → fail closed.** You could not verify;
+  that is a check failure, not a finding about the resource.
+- **A real `404` from a read made *with a valid, sufficiently-scoped token* →
+  the real finding.** The resource is genuinely absent; report it as such.
+
+A `403` reported as "resource not found" is itself a fail-open bug.
+
+### Required practice
+
+Every gate that depends on a token, an identity, or an external read MUST ship
+with a test or workflow-lint covering the **absent-identity / unauthorized /
+missing-file path** that asserts the gate **FAILS** (not skips, not passes).
+Add or update that coverage in the **same PR** that adds or changes the gate.
+A gate without a proven failure path is not yet a gate.
+
+### Violations seen in this codebase (all merge-blocking if reintroduced)
+
+- **serving-e2e** reporting vacuously GREEN when the Infisical identity is
+  absent (no per-(provider × auth) completion was actually exercised).
+- **branch-protection / BP-drift lints** returning `0` on a `403` instead of
+  failing closed on the unverifiable response.
+- **verify-template-models** run without `-strict`, so a drift it could not
+  confirm passed silently.
+- A **referenced-but-absent pytest file** that collects zero tests and reports
+  green — silent pass with no assertions executed.
+
+Each of these is a fail-open gate and is a merge blocker until it fails loud and
+closed on protected contexts. See also the production fail-closed defaults in
+`runbooks/sop-production-cicd.md` (*Production Defaults*), which apply the same
+principle to deploy-time gates.
+
+---
+
 ## References

 - #2159 — gate auto-trigger not firing (root cause: stale PR heads lacking
@@ -8,26 +8,39 @@ against the latest `main`.

 ## Queue Contract

-Add the `merge-queue` label to an open PR when it is ready to merge.
+**Auto-discovery (opt-OUT, default).** You do NOT need to label a PR. The bot
+auto-discovers every open same-repo PR and merges any that meets the bar. The
+`merge-queue` label is now optional metadata, not a gate. This removed the
+historical autonomy gap: agent Gitea tokens lack `write:issue` (labels are
+issue-scoped), so agents could never self-label and ready PRs stalled.
+
+To keep a PR OUT of autonomous merging, add an opt-OUT label:
+`merge-queue-hold`, `do-not-auto-merge`, or `wip`. Draft PRs are also skipped.

 The bot processes one PR per tick:

-1. Confirms `main` is green.
-2. Selects the oldest open PR carrying `merge-queue`.
-3. Skips PRs with `merge-queue-hold`.
-4. Rejects fork PRs because the queue may only update same-repo branches.
-5. If the PR head does not contain current `main`, calls Gitea's
+1. Confirms `main`'s branch-protection-required push contexts are green.
+2. Selects the oldest open same-repo PR that is NOT opt-out-labeled and NOT a
+   draft (auto-discovery). With `AUTO_DISCOVER=0` it falls back to legacy
+   opt-IN: only PRs carrying `merge-queue` are considered.
+3. Rejects fork PRs because the queue may only update same-repo branches.
+4. If the PR head does not contain current `main`, calls Gitea's
   `/pulls/{n}/update?style=merge` endpoint and waits for CI on the new head.
-6. Merges only after the current PR head has required contexts green:
-   - `CI / all-required (pull_request)`
-   - `sop-checklist / all-items-acked (pull_request)`
+5. Merges only when, on the PR's CURRENT head sha:
+   - `>= required_approvals` distinct genuine official `APPROVED` reviews from
+     the recognised reviewer set (read from branch protection; default 2),
+   - no open official `REQUEST_CHANGES`,
+   - every branch-protection-required status context is green, and
+   - the PR is `mergeable` (Gitea returns `True`; `None`/`False` = wait).

-The workflow is serialized with `concurrency`, so two queued PRs cannot be
+The merge bar is unchanged by auto-discovery — only WHICH PRs are considered
+changes. The workflow is serialized with `concurrency`, so two PRs cannot be
 merged against the same observed `main`.

 ## Operator Commands

-Queue a PR:
+Queue a PR (optional — auto-discovery already considers every ready PR; the
+label is just visible metadata):

 ```bash
 curl -fsS -X POST \
@@ -37,7 +50,8 @@ curl -fsS -X POST \
  -d '{"labels":["merge-queue"]}'
 ```

-Temporarily hold a queued PR:
+Keep a PR OUT of autonomous merging (opt-OUT — use `merge-queue-hold`,
+`do-not-auto-merge`, or `wip`):

 ```bash
 curl -fsS -X POST \
@@ -56,9 +70,11 @@ REPO=molecule-ai/molecule-core \
 WATCH_BRANCH=main \
 QUEUE_LABEL=merge-queue \
 HOLD_LABEL=merge-queue-hold \
+AUTO_DISCOVER=1 \
+OPT_OUT_LABELS=do-not-auto-merge,wip \
+REVIEWER_SET=agent-reviewer,agent-researcher,agent-reviewer-cr2 \
 UPDATE_STYLE=merge \
-REQUIRED_CONTEXTS='CI / all-required (pull_request),sop-checklist / all-items-acked (pull_request)' \
-python3 .gitea/scripts/gitea-merge-queue.py
+python3 .gitea/scripts/gitea-merge-queue.py --dry-run
 ```

 Dry run:
@@ -35,6 +35,7 @@ Every production CI/CD PR must include concrete answers for:
 - Verification: how production state is proven after deployment.
 - Logging: proof that CI logs do not contain raw production runtime, SSM, or secret-adjacent output.
 - Rollback: the exact command, variable, or workflow to return to a known-good tag/digest.
+- No fail-open gates: required checks fail loud + closed on protected contexts (no skip/`|| true`/`403`-as-pass). See `runbooks/dev-sop.md` § *Fail-closed CI integrity*.

 ## Human Review

@@ -50,8 +50,22 @@ check_category() {
        repo=$(echo "$MANIFEST_JSON" | jq -r ".${category}[$i].repo")
        TOTAL=$((TOTAL + 1))

-        # Check repo existence via Gitea API (public endpoint, no auth needed)
-        http_code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10 "${GITEA_API}/${repo}" 2>/dev/null || true)
+        # Check repo existence via Gitea API. Many manifest repos are PRIVATE
+        # (e.g. the workspace templates), so an *unauthenticated* GET returns
+        # 404 even when the repo exists — indistinguishable from a genuinely
+        # missing repo. We therefore authenticate with the same token
+        # clone-manifest.sh uses (MOLECULE_GITEA_TOKEN). A 404 *with* a valid
+        # token still means the repo is truly missing, which is what we want
+        # to catch. If the token is unset (local dev), fall back to an
+        # unauthenticated request — private repos will then 404, so run the
+        # check in CI where the token is present.
+        if [ -n "${MOLECULE_GITEA_TOKEN:-}" ]; then
+            http_code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10 \
+                -H "Authorization: token ${MOLECULE_GITEA_TOKEN}" \
+                "${GITEA_API}/${repo}" 2>/dev/null || true)
+        else
+            http_code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10 "${GITEA_API}/${repo}" 2>/dev/null || true)
+        fi

        if [ "$http_code" != "200" ]; then
            echo "::error::manifest.json ${category} entry '${name}' → repo '${repo}' returned HTTP ${http_code} (expected 200). Delete the manifest entry BEFORE deleting the repo." >&2
@@ -46,46 +46,67 @@ cleanup() {
 trap cleanup EXIT INT TERM

 # ─────────────────────────────────────────────── 1. dev-mode auth posture
-
-# The AdminAuth middleware closes its fail-open the moment the first
-# workspace token lands in the DB — at which point /workspaces and
-# other admin routes 401 unless the caller has either ADMIN_TOKEN or
-# the dev-mode escape hatch. The canvas at localhost:3000 has no
-# bearer token to send, so without one of those two paths it can't
-# call admin endpoints after a workspace exists.
 #
-# For local dev the right posture is the dev-mode escape hatch:
+# SECURITY (harden/no-fail-open-auth): the workspace-server auth chain is
+# now fail-CLOSED in EVERY environment, dev included. There is NO dev-mode
+# fail-open escape hatch anymore — AdminAuth / WorkspaceAuth / discovery all
+# require a real credential. So local dev must AUTHENTICATE, not run open.
 #
-#   MOLECULE_ENV=development AND ADMIN_TOKEN unset
+# The clean way to keep the canvas working locally is to provision a
+# deterministic ADMIN_TOKEN and hand the matching NEXT_PUBLIC_ADMIN_TOKEN to
+# the canvas bundle. The canvas already attaches `Authorization: Bearer
+# $NEXT_PUBLIC_ADMIN_TOKEN` on every platform call (canvas/src/lib/api.ts),
+# and next.config.ts warns if the pair is half-set. We set BOTH here.
 #
-# That makes middleware.isDevModeFailOpen() return true and lets the
-# canvas keep working without a bearer. Setting ADMIN_TOKEN here
-# would BREAK the canvas (it has no way to read that token in dev).
+#   MOLECULE_ENV=development   — dev conveniences (loopback bind, relaxed
+#                                rate limit). NOT an auth lever.
+#   ADMIN_TOKEN=<dev value>    — server-side bearer AdminAuth/WorkspaceAuth
+#                                enforce (Tier-2b). Real credential.
+#   NEXT_PUBLIC_ADMIN_TOKEN    — same value, baked into the canvas bundle so
+#                                the browser sends the matching bearer.
 #
-# For SaaS the platform is provisioned with ADMIN_TOKEN set AND
-# MOLECULE_ENV=production — either one closes the hatch. So the dev
-# mode signal here is safe (it's only active when both other knobs
-# are absent).
+# For SaaS the platform is provisioned with a random ADMIN_TOKEN + the
+# canvas image baked with the matching NEXT_PUBLIC_ADMIN_TOKEN, plus
+# MOLECULE_ENV=production. Same shape, stronger secret.
 if [ -f "$ENV_FILE" ] && grep -q '^MOLECULE_ENV=' "$ENV_FILE"; then
    echo "==> Reusing MOLECULE_ENV from existing .env"
 else
-    echo "==> Setting MOLECULE_ENV=development in .env (dev-mode auth hatch)"
+    echo "==> Setting MOLECULE_ENV=development in .env"
    {
        if [ -f "$ENV_FILE" ]; then
            cat "$ENV_FILE"
            echo ""
        fi
        echo "# Generated by scripts/dev-start.sh on $(date -u +%Y-%m-%dT%H:%M:%SZ)"
-        echo "# Local-dev auth posture: dev-mode fail-open lets the canvas at"
-        echo "# localhost:3000 call admin endpoints without a bearer token."
-        echo "# DO NOT set ADMIN_TOKEN here in dev — it would close the hatch"
-        echo "# and the canvas would 401 on every admin call."
+        echo "# Local-dev conveniences (loopback bind, relaxed rate limit)."
+        echo "# Auth is fail-closed even in dev — see ADMIN_TOKEN below."
        echo "MOLECULE_ENV=development"
    } > "$ENV_FILE.tmp"
    mv "$ENV_FILE.tmp" "$ENV_FILE"
    echo "    Saved to $ENV_FILE"
 fi

+# Provision a deterministic dev ADMIN_TOKEN (idempotent — preserved across
+# re-runs). This is the credential the canvas authenticates with locally; it
+# is NOT a secret (it only guards your own localhost stack), so a fixed,
+# well-known value is fine and keeps re-runs reproducible.
+DEV_ADMIN_TOKEN="dev-local-admin-token"
+if [ -f "$ENV_FILE" ] && grep -q '^ADMIN_TOKEN=' "$ENV_FILE"; then
+    echo "==> Reusing ADMIN_TOKEN from existing .env"
+else
+    echo "==> Provisioning dev ADMIN_TOKEN in .env (fail-closed auth, authenticated canvas)"
+    {
+        cat "$ENV_FILE"
+        echo ""
+        echo "# Dev ADMIN_TOKEN — the canvas authenticates with this locally."
+        echo "# Auth is fail-closed; without a matching bearer the canvas 401s."
+        echo "# Fixed value is fine: it only guards your localhost stack."
+        echo "ADMIN_TOKEN=$DEV_ADMIN_TOKEN"
+    } > "$ENV_FILE.tmp"
+    mv "$ENV_FILE.tmp" "$ENV_FILE"
+    echo "    Saved to $ENV_FILE"
+fi
+
 # Source .env so the platform inherits ADMIN_TOKEN (and anything else
 # the user has added — e.g. ANTHROPIC_API_KEY for skipping the canvas
 # Secrets UI). `set -a` exports every assignment in the sourced file
@@ -95,6 +116,12 @@ set -a
 . "$ENV_FILE"
 set +a

+# The canvas reads NEXT_PUBLIC_ADMIN_TOKEN at build/dev time and attaches it
+# as the bearer on every platform call. Mirror the server-side ADMIN_TOKEN
+# into it so the matched-pair guard in canvas/next.config.ts is satisfied and
+# the browser authenticates. Exported for the `npm run dev` child below.
+export NEXT_PUBLIC_ADMIN_TOKEN="$ADMIN_TOKEN"
+
 # ─────────────────────────────────────────────── 2. infra + templates

 # Use setup.sh (not raw docker-compose) so the template registry gets
@@ -195,7 +222,9 @@ cat <<EOF
  Molecule AI dev environment ready

  Canvas:   http://localhost:3000
-  Platform: http://localhost:8080
+  Platform: http://localhost:8080  (bound to loopback in dev)
+  Auth:     fail-closed — canvas authenticates with the dev ADMIN_TOKEN
+            (ADMIN_TOKEN + NEXT_PUBLIC_ADMIN_TOKEN, see .env)
  Logs:     /tmp/molecule-platform.log
            /tmp/molecule-canvas.log

@@ -17,6 +17,33 @@ e2e_extract_token() {
  python3 "$(dirname "${BASH_SOURCE[0]}")/_extract_token.py"
 }

+# Populate a curl-args array with the platform admin bearer, IF one is set.
+#
+# AdminAuth (workspace-server/internal/middleware/wsauth_middleware.go:161)
+# fail-opens ONLY while ADMIN_TOKEN is unset AND no workspace token exists yet
+# (devmode.go:50). The e2e-api CI job now sets ADMIN_TOKEN on the platform and
+# exports the matching MOLECULE_ADMIN_TOKEN here, which flips fail-open OFF — so
+# every admin-gated route (GET/POST/DELETE /workspaces, /events, /bundles,
+# /org/import, …) now requires the EXACT ADMIN_TOKEN as bearer (Tier-2b rejects
+# workspace bearers, wsauth_middleware.go:250). Helpers that hit admin routes
+# (e2e_cleanup_all_workspaces, e2e_delete_workspace's default path) must send it.
+#
+# Guarded if-set so a bootstrap/dev platform with no admin token (fail-open)
+# still works with zero auth. Mirrors e2e_mint_workspace_token's admin_auth.
+#
+# Usage:
+#   local admin_auth=(); e2e_admin_auth_args admin_auth
+#   curl -s "$BASE/workspaces" ${admin_auth[@]+"${admin_auth[@]}"}
+e2e_admin_auth_args() {
+  local _outname="$1"
+  local _bearer="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
+  if [ -n "$_bearer" ]; then
+    eval "$_outname=(-H \"Authorization: Bearer \$_bearer\")"
+  else
+    eval "$_outname=()"
+  fi
+}
+
 # Delete every workspace currently on the platform. Use at the top of a
 # script so count-based assertions are reproducible across runs.
 # Mint a fresh workspace auth token via the real admin endpoint.
@@ -53,19 +80,38 @@ e2e_delete_workspace() {
  if [ -z "$wid" ]; then
    return 0
  fi
+  # DELETE /workspaces/:id and GET /workspaces/:id-for-name are both behind
+  # AdminAuth (router.go:155 GET single is public, but List/Delete are gated at
+  # router.go:165-167). Callers that already pass a per-workspace bearer (e.g.
+  # test_api.sh's NEW_TOKEN) authenticate themselves; the cleanup-trap callers
+  # in poll-mode/notify/priority pass NO curl args and rely on this fallback to
+  # the platform admin bearer so the DELETE doesn't 401 once ADMIN_TOKEN is set.
+  if [ "${#curl_args[@]}" -eq 0 ]; then
+    e2e_admin_auth_args curl_args
+  fi
+  # ${curl_args[@]+"…"} guard: under `set -u` an empty array expands to an
+  # "unbound variable" error on bash <4.4 (macOS 3.2, some Linux). This form
+  # expands to nothing when the array is empty. Callers from the priority-
+  # runtimes EXIT trap pass no extra curl args, so the array IS empty there —
+  # without the guard the trap aborts non-zero AFTER the gate already passed,
+  # turning a validated run RED. (Same idiom already used for CREATED_WSIDS.)
  if [ -z "$name" ]; then
-    name=$(curl -s "$BASE/workspaces/$wid" "${curl_args[@]}" | python3 -c "import json,sys
+    name=$(curl -s "$BASE/workspaces/$wid" ${curl_args[@]+"${curl_args[@]}"} | python3 -c "import json,sys
 try:
  print(json.load(sys.stdin).get('name',''))
 except Exception:
  pass" 2>/dev/null || true)
  fi
  curl -s -X DELETE "$BASE/workspaces/$wid?confirm=true" \
-    -H "X-Confirm-Name: $name" "${curl_args[@]}" > /dev/null || true
+    -H "X-Confirm-Name: $name" ${curl_args[@]+"${curl_args[@]}"} > /dev/null || true
 }

 e2e_cleanup_all_workspaces() {
-  curl -s "$BASE/workspaces" | python3 -c "import json,sys
+  # GET /workspaces (list) is AdminAuth-gated (router.go:165). Send the platform
+  # admin bearer if one is set so the list doesn't 401 → empty → no cleanup.
+  local _admin_auth=()
+  e2e_admin_auth_args _admin_auth
+  curl -s "$BASE/workspaces" ${_admin_auth[@]+"${_admin_auth[@]}"} | python3 -c "import json,sys
 try:
  [print(f\"{w.get('id','')}\\t{w.get('name','')}\") for w in json.load(sys.stdin)]
 except Exception:
@@ -11,10 +11,10 @@
 #                                    default + 401, see PR #1714.)
 #
 #   claude-code → auth-aware:
-#                  E2E_MINIMAX_API_KEY    → "minimax:MiniMax-M2.7"
-#                                           (colon-namespaced BYOK id; bare
-#                                            "MiniMax-M2" 400s on a deploy-skewed
-#                                            staging registry — #2263)
+#                  E2E_MINIMAX_API_KEY    → "MiniMax-M2.7"
+#                                           (BARE registered BYOK id — see the
+#                                            claude-code dispatch arm below for
+#                                            why bare, not the colon form)
 #                  E2E_ANTHROPIC_API_KEY  → "claude-sonnet-4-6"
 #                  otherwise              → "sonnet"
 #
@@ -83,25 +83,62 @@ pick_model_slug() {
  fi
  case "$runtime" in
    hermes)      printf 'openai/gpt-4o' ;;
-    claude-code)
+    # seo-agent is a claude-code-adapter template VARIANT selected by
+    # template name (template="seo-agent"), not a distinct registry runtime
+    # (it is absent from manifest.json + runtime_registry.go). Its config.yaml
+    # declares `runtime: claude-code` and copies the claude-code `providers:`
+    # block (providers.yaml:21 "The same block is copy-pasted into the seo-agent
+    # template"), so its model dispatch is IDENTICAL to claude-code's: the BARE
+    # registered MiniMax BYOK id (the staging-default key path), else direct
+    # Anthropic, else the OAuth `sonnet` alias. Sharing the claude-code branch
+    # keeps the SSOT one place — a seo-agent run is just a claude-code run
+    # behind a productized template skin, and (because the runtime resolves to
+    # claude-code server-side) its model must be a *claude-code-registered* form.
+    claude-code|seo-agent)
      if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
-        # Namespaced (colon) BYOK id, not bare "MiniMax-M2" (#2263 deploy-skew):
-        # bare ids can lag the deployed staging ws-server's compiled registry,
-        # so workspace-create's validateRegisteredModelForRuntime 400s the bare
-        # form on an older image. The colon-namespaced `minimax:MiniMax-M2.7`
-        # resolves the same way the proven-working sibling `moonshot/kimi-k2.6`
-        # does. It stays in the BYOK `minimax` arm (providers.yaml:851), so
-        # DeriveProvider -> provider_selection=minimax (BYOK) and the #1994
-        # byok-not-platform guard (test_staging_full_saas.sh:1000) still passes —
-        # unlike the slash/platform form `minimax/MiniMax-M2.7`, which resolves
-        # to provider=platform and would trip that guard.
-        printf 'minimax:MiniMax-M2.7'
+        # BARE registered BYOK id `MiniMax-M2.7`, NOT the colon form
+        # `minimax:MiniMax-M2.7`. On the claude-code runtime the three MiniMax
+        # spellings have three DISTINCT, intentional outcomes (provider-registry
+        # SSOT, internal#718; pinned by workspace-server/internal/providers/
+        # derive_provider_matrix_test.go, the #2263/#2274 "colon-vs-slash-vs-bare
+        # triple"):
+        #   * bare  "MiniMax-M2.7"        -> provider=minimax  (BYOK, MINIMAX_API_KEY)
+        #   * slash "minimax/MiniMax-M2.7" -> provider=platform (CP proxy bills)
+        #   * colon "minimax:MiniMax-M2.7" -> UNREGISTERED 422  (the claude-code
+        #         adapter CANNOT strip the `minimax:` prefix, so the id is not a
+        #         registered model for runtime claude-code; create-validation,
+        #         internal#718, rejects it)
+        # The bare form is registered in the claude-code `minimax` arm
+        # (registry_gen.go:88 Models=[MiniMax-M2,MiniMax-M2.7,
+        # MiniMax-M2.7-highspeed,MiniMax-M3]) and derives provider=minimax (BYOK
+        # via MINIMAX_API_KEY), so it satisfies the #1994 byok-not-platform guard
+        # (test_staging_full_saas.sh) AND passes create-validation — unlike the
+        # colon form, which 422'd "5/11 Provisioning parent workspace" with
+        # UNREGISTERED_MODEL_FOR_RUNTIME on real staging (job 295075).
+        # NOTE: the colon form IS the correct BYOK-minimax id on openclaw/hermes
+        # (those adapters DO strip `minimax:` — matrix test), but this dispatch
+        # arm only emits for claude-code/seo-agent, where bare is the right form.
+        printf 'MiniMax-M2.7'
      elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
        printf 'claude-sonnet-4-6'
      else
        printf 'sonnet'
      fi
      ;;
+    # google-adk: Gemini via two distinct provider arms in providers.yaml
+    # runtimes.google-adk:
+    #   * platform arm → `platform:gemini-2.5-pro` (keyless Vertex via the CP
+    #     LLM proxy + server-side WIF mint; the org-compliant PROD path). This
+    #     id is selected via E2E_LLM_PATH=platform above, NOT here.
+    #   * google arm (AI Studio BYOK) → bare `gemini-2.5-pro` with the tenant's
+    #     own GOOGLE_API_KEY. This is the staging-exercisable path (no WIF
+    #     provisioning needed) and is what this branch selects.
+    # The workflow may further override with E2E_MODEL_SLUG=google_genai:gemini-2.5-pro
+    # (the adapter's provider:model spelling) — E2E_MODEL_SLUG wins at the top
+    # of this function, so both forms are supported.
+    google-adk)
+      printf 'gemini-2.5-pro'
+      ;;
    *)           printf 'openai/gpt-4o' ;;  # safest fallback (matches hermes)
  esac
 }
@@ -15,18 +15,27 @@ SUM_AUTH=()
 ECHO_URL="https://example.com/echo-agent"
 SUM_URL="https://example.com/summarizer-agent"

-# AdminAuth-gated calls need a bearer token once any workspace token
-# exists in the DB. ADMIN_TOKEN is populated after the first workspace
-# create + real token mint. acurl = "authenticated curl".
-ADMIN_TOKEN=""
+# AdminAuth-gated calls (GET/POST/DELETE /workspaces, /events, /bundles)
+# require the platform admin bearer once ADMIN_TOKEN is set on the server.
+# Tier-2b (wsauth_middleware.go:250) REJECTS workspace bearer tokens on admin
+# routes when ADMIN_TOKEN is set, so admin calls MUST send the exact ADMIN_TOKEN
+# value — which the e2e-api CI job exports here as MOLECULE_ADMIN_TOKEN. acurl =
+# "admin curl": it always sends the platform admin bearer (if one is set).
+#
+# Guarded if-set: a fresh self-hosted/dev platform with no ADMIN_TOKEN fail-opens
+# (devmode.go:50), so sending no bearer still works there.
+ADMIN_BEARER="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
+ADMIN_AUTH=()
+[ -n "$ADMIN_BEARER" ] && ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")
 acurl() {
-  if [ -n "$ADMIN_TOKEN" ]; then
-    curl -s -H "Authorization: Bearer $ADMIN_TOKEN" "$@"
-  else
-    curl -s "$@"
-  fi
+  curl -s ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} "$@"
 }

+# WORKSPACE_TOKEN holds a per-workspace bearer for the WorkspaceAuth-gated
+# routes (PATCH /workspaces/:id, /activity, …). It is set after the first
+# create+mint and is NOT interchangeable with the admin bearer.
+WORKSPACE_TOKEN=""
+
 # Pre-test cleanup: remove any workspaces left over from prior runs so
 # count-based assertions ("empty", "count=2") are reproducible.
 e2e_cleanup_all_workspaces
@@ -57,19 +66,22 @@ check "GET /health" '"status":"ok"' "$R"
 R=$(acurl "$BASE/workspaces")
 check "GET /workspaces (empty)" '[]' "$R"

-# Test 3: Create workspace A (AdminAuth fail-open — no tokens exist yet)
-R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Echo Agent","tier":1,"runtime":"external","external":true}')
+# Test 3: Create workspace A. POST /workspaces is AdminAuth-gated (router.go:166);
+# send the admin bearer (acurl). On a fail-open dev platform acurl sends nothing
+# and the create still works.
+R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Echo Agent","tier":1,"runtime":"external","external":true}')
 check "POST /workspaces (create echo)" '"status":"awaiting_agent"' "$R"
 ECHO_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")

-ADMIN_TOKEN=$(echo "$R" | e2e_extract_token)
-if [ -z "$ADMIN_TOKEN" ]; then
-  ADMIN_TOKEN=$(e2e_mint_workspace_token "$ECHO_ID" 2>/dev/null || echo "")
+# Per-workspace token for Echo, for the WorkspaceAuth-gated routes below.
+WORKSPACE_TOKEN=$(echo "$R" | e2e_extract_token)
+if [ -z "$WORKSPACE_TOKEN" ]; then
+  WORKSPACE_TOKEN=$(e2e_mint_workspace_token "$ECHO_ID" 2>/dev/null || echo "")
 fi
-if [ -n "$ADMIN_TOKEN" ]; then
-  echo "  (acquired admin token: ${ADMIN_TOKEN:0:8}...)"
+if [ -n "$WORKSPACE_TOKEN" ]; then
+  echo "  (acquired Echo workspace token: ${WORKSPACE_TOKEN:0:8}...)"
 else
-  echo "  WARNING: no admin token acquired — subsequent AdminAuth calls will fail"
+  echo "  WARNING: no Echo workspace token acquired — WorkspaceAuth calls will fail"
 fi

 # Test 4: Create workspace B (needs bearer — tokens now exist in DB)
@@ -98,7 +110,7 @@ check "GET /workspaces/:id (agent_card null)" '"agent_card":null' "$R"
 # Test 7: Register echo — use workspace-specific token (from real admin
 # endpoint), not the admin token. C18 requires a token issued TO THIS
 # workspace, not just any valid token.
-ECHO_WS_TOKEN="$ADMIN_TOKEN"
+ECHO_WS_TOKEN="$WORKSPACE_TOKEN"
 [ -n "$ECHO_WS_TOKEN" ] && ECHO_AUTH=(-H "Authorization: Bearer $ECHO_WS_TOKEN")
 R=$(curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" \
  "${ECHO_AUTH[@]}" \
@@ -159,26 +171,29 @@ R=$(curl -s -X POST "$BASE/registry/check-access" -H "Content-Type: application/
  -d "{\"caller_id\":\"$ECHO_ID\",\"target_id\":\"$SUM_ID\"}")
 check "POST /registry/check-access (same-org allowed)" '"allowed":true' "$R"

-# Test 15: PATCH workspace (update position)
-R=$(acurl -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"x":100,"y":200}')
+# Test 15: PATCH workspace (update position). PATCH /workspaces/:id is
+# WorkspaceAuth-gated (router.go:227 — #680 IDOR fix), so it needs Echo's OWN
+# bearer, NOT the admin bearer (WorkspaceAuth rejects the admin token).
+R=$(curl -s "${ECHO_AUTH[@]}" -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"x":100,"y":200}')
 check "PATCH /workspaces/:id (position)" '"status":"updated"' "$R"

 R=$(acurl "$BASE/workspaces/$ECHO_ID")
 check "Position saved (x=100)" '"x":100' "$R"
 check "Position saved (y=200)" '"y":200' "$R"

-# Test 16: PATCH workspace (update name)
-R=$(acurl -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"name":"Echo Agent v2"}')
+# Test 16: PATCH workspace (update name) — WorkspaceAuth-gated; use Echo's token.
+R=$(curl -s "${ECHO_AUTH[@]}" -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"name":"Echo Agent v2"}')
 check "PATCH /workspaces/:id (name)" '"status":"updated"' "$R"

 R=$(acurl "$BASE/workspaces/$ECHO_ID")
 check "Name updated" '"name":"Echo Agent v2"' "$R"

-# Test 17: Events (#165 / PR #167 — now admin-gated, bearer required)
-R=$(acurl "$BASE/events" -H "Authorization: Bearer $ECHO_TOKEN")
+# Test 17: Events (#165 / PR #167 — admin-gated; the admin bearer is required,
+# and Tier-2b rejects a workspace bearer here, so use acurl's admin token alone).
+R=$(acurl "$BASE/events")
 check "GET /events (has events)" 'WORKSPACE_ONLINE' "$R"

-R=$(acurl "$BASE/events/$ECHO_ID" -H "Authorization: Bearer $ECHO_TOKEN")
+R=$(acurl "$BASE/events/$ECHO_ID")
 check "GET /events/:id (has events for echo)" 'WORKSPACE_ONLINE' "$R"

 # Test 18: Update card
@@ -295,7 +310,7 @@ check "active_tasks cleared" '"active_tasks":0' "$R"
 # endpoint is admin-auth gated and keeps the full record, so operators
 # can still see task progress from the dashboard without exposing it
 # over the public per-workspace GET.
-R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
+R=$(acurl "$BASE/workspaces")
 check "current_task in list response" '"current_task"' "$R"

 # Test 21: Delete
@@ -306,18 +321,20 @@ check "current_task in list response" '"current_task"' "$R"
 # Delete the CHILD (Summarizer) here instead: a child delete does NOT cascade
 # upward, so the parent Echo survives and count=1 holds. The bundle round-trip
 # below needs Summarizer's exported config, so capture it BEFORE this delete.
-BUNDLE=$(curl -s "$BASE/bundles/export/$SUM_ID" -H "Authorization: Bearer $SUM_TOKEN")
+# GET /bundles/export/:id is admin-gated (router.go:741) — use the admin bearer.
+BUNDLE=$(acurl "$BASE/bundles/export/$SUM_ID")
 check "GET /bundles/export/:id" '"name":"Summarizer Agent"' "$BUNDLE"
 ORIG_NAME=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['name'])")
 ORIG_TIER=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['tier'])")

+# DELETE /workspaces/:id is admin-gated (router.go:167). X-Confirm-Name must
+# still match the workspace name even with admin auth.
 R=$(acurl -X DELETE "$BASE/workspaces/$SUM_ID?confirm=true" \
-  -H "Authorization: Bearer $SUM_TOKEN" \
  -H "X-Confirm-Name: Summarizer Agent")
 check "DELETE /workspaces/:id" '"status":"removed"' "$R"

-# Parent Echo must survive a child delete — list as Echo and expect count=1.
-R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
+# Parent Echo must survive a child delete — list (admin) and expect count=1.
+R=$(acurl "$BASE/workspaces")
 COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
 check "List after delete (count=1)" "1" "$COUNT"

@@ -328,21 +345,21 @@ check "List after delete (count=1)" "1" "$COUNT"
 echo ""
 echo "--- Bundle Round-Trip Test ---"

-# Delete the remaining parent Echo — use ECHO_TOKEN (per-workspace) for
-# WorkspaceAuth and ADMIN_TOKEN for the AdminAuth layer.
+# Delete the remaining parent Echo — DELETE is admin-gated (router.go:167);
+# the platform admin bearer (acurl) authorizes it. X-Confirm-Name still required.
 R=$(acurl -X DELETE "$BASE/workspaces/$ECHO_ID?confirm=true" \
-  -H "Authorization: Bearer $ECHO_TOKEN" \
  -H "X-Confirm-Name: Echo Agent v2")
 check "Delete before re-import" '"status":"removed"' "$R"

-# After deleting both workspaces, all per-workspace tokens are revoked.
-# Clear the now-revoked admin bearer so acurl can use fresh-install fail-open.
-ADMIN_TOKEN=""
+# Both workspaces are now deleted. The platform-level ADMIN_TOKEN env is still
+# set, so admin routes still require the admin bearer (fail-open does NOT
+# re-engage just because the token table emptied) — keep using acurl's bearer.
 R=$(acurl "$BASE/workspaces")
 COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
 check "All workspaces deleted (count=0)" "0" "$COUNT"

-# Re-import from the exported bundle (AdminAuth fail-open — no live tokens)
+# Re-import from the exported bundle. POST /bundles/import is admin-gated
+# (router.go:742) — acurl sends the admin bearer.
 R=$(acurl -X POST "$BASE/bundles/import" -H "Content-Type: application/json" -d "$BUNDLE")
 check "POST /bundles/import" '"status":"provisioning"' "$R"
 NEW_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['workspace_id'])")
@@ -398,12 +415,15 @@ check "Register re-imported workspace" '"status":"registered"' "$R"
 REG_NEW_TOKEN=$(echo "$R" | e2e_extract_token)
 [ -n "$REG_NEW_TOKEN" ] && NEW_TOKEN="$REG_NEW_TOKEN"

-# Re-export and verify agent_card survives the round-trip (#165 / PR #167 — admin-gated)
-REBUNDLE=$(curl -s "$BASE/bundles/export/$NEW_ID" -H "Authorization: Bearer $NEW_TOKEN")
+# Re-export and verify agent_card survives the round-trip (#165 / PR #167 —
+# GET /bundles/export/:id is admin-gated; use the admin bearer).
+REBUNDLE=$(acurl "$BASE/bundles/export/$NEW_ID")
 check "Re-exported bundle has agent_card" '"agent_card"' "$REBUNDLE"

-# Clean up — use the token just issued to the re-imported workspace
-e2e_delete_workspace "$NEW_ID" "$ORIG_NAME" -H "Authorization: Bearer $NEW_TOKEN"
+# Clean up — DELETE /workspaces/:id is admin-gated; pass no per-call auth so
+# e2e_delete_workspace falls back to the platform admin bearer (a workspace
+# bearer would be rejected by Tier-2b).
+e2e_delete_workspace "$NEW_ID" "$ORIG_NAME"

 echo ""
 echo "=== Results: $PASS passed, $FAIL failed ==="
@@ -0,0 +1,468 @@
+#!/usr/bin/env bash
+# GATING E2E for the social-channels outbound + discover + data-prune paths
+# (core#2332 P1.10). Closes two coverage gaps that were previously only
+# unit-mocked, so a regression in any of them goes RED in the required
+# `E2E API Smoke Test` lane instead of slipping through:
+#
+#  (1) Channel SEND end-to-end. Every adapter's SendMessage was only ever
+#      asserted by unit tests that reconstruct the payload by hand and POST
+#      it themselves (see internal/channels/lark_test.go's "we can't change
+#      the prefix const" comment) — nothing proved that a message submitted
+#      through the LIVE platform API actually serializes and POSTs to a
+#      provider endpoint. Here we stand up a local mock-upstream, point a
+#      Slack Incoming-Webhook channel at it, send via
+#      POST /channels/:id/send, and assert the MOCK RECEIVED the correctly
+#      serialized {"text":"..."} body. Real serialize+POST, real HTTP stack,
+#      no real Slack account.
+#
+#  (2) Channel DISCOVER (POST /channels/discover). Had no test at all. We
+#      point the Telegram discover path at a mock Bot API that serves
+#      getMe + getUpdates and assert the discovered bot username + chat
+#      round-trip back through the handler.
+#
+#  (3) Workspace data-prune (RFC #734). The user-requested permanent delete
+#      with ?purge=true prunes a workspace's durable child data (channels,
+#      secrets, config, …). We create prunable data on a target workspace
+#      AND a sibling, purge the target, then assert the target's child rows
+#      are GONE while the sibling's SURVIVE.
+#
+# ── Test seam (production-inert) ────────────────────────────────────────
+# Adapters pin their outbound host to the real vendor (hooks.slack.com /
+# api.telegram.org). Two env-gated overrides — set ONLY by this lane, never
+# in any prod/staging deploy — let the live send/discover path target a
+# local mock so the round-trip is provable in CI:
+#
+#   MOLECULE_CHANNELS_TEST_WEBHOOK_BASE       (Slack webhook accept-prefix)
+#   MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE  (Telegram Bot API base)
+#
+# These must be present in the PLATFORM process env (the workflow exports
+# them via $GITHUB_ENV before "Start platform"), pointing at the fixed
+# loopback ports this script binds its mocks on. If they are absent the
+# platform rejects the mock URLs; under E2E_REQUIRE_LIVE=1 that is a hard
+# RED (the seam regressed / the workflow wiring broke), otherwise a LOUD
+# SKIP for ad-hoc local runs that didn't export them.
+#
+# NEVER fail-open: a missing assertion target fails the script.
+#
+# Required env (defaults shown):
+#   BASE                       http://127.0.0.1:8080
+#   MOLECULE_ADMIN_TOKEN       (admin bearer; matches the platform's ADMIN_TOKEN)
+#   E2E_CHANNELS_WEBHOOK_PORT  18099   (mock Slack webhook upstream)
+#   E2E_CHANNELS_TELEGRAM_PORT 18098   (mock Telegram Bot API upstream)
+#   E2E_REQUIRE_LIVE           0        (1 = seam-absent is RED, not skip)
+
+set -uo pipefail
+
+# shellcheck disable=SC1091
+source "$(dirname "$0")/_lib.sh"   # sets BASE default + admin/token helpers
+
+WEBHOOK_PORT="${E2E_CHANNELS_WEBHOOK_PORT:-18099}"
+TELEGRAM_PORT="${E2E_CHANNELS_TELEGRAM_PORT:-18098}"
+REQUIRE_LIVE="${E2E_REQUIRE_LIVE:-0}"
+
+# The base prefixes the PLATFORM must have been started with. We assert the
+# adapter accepted a URL under these — proving the platform's env matches.
+WEBHOOK_BASE="http://127.0.0.1:${WEBHOOK_PORT}/"
+TELEGRAM_BASE="http://127.0.0.1:${TELEGRAM_PORT}"
+
+PASS=0
+FAIL=0
+WORK_DIR="$(mktemp -d)"
+WS_TARGET=""
+WS_SIBLING=""
+WS_TARGET_TOK=""
+WS_SIBLING_TOK=""
+MOCK_PID=""
+
+ADMIN_BEARER="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
+ADMIN_AUTH=()
+[ -n "$ADMIN_BEARER" ] && ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")
+
+pass() { echo "PASS: $1"; PASS=$((PASS + 1)); }
+fail() { echo "FAIL: $1"; [ -n "${2:-}" ] && echo "  $2"; FAIL=$((FAIL + 1)); }
+
+# loud_skip records a SKIP and exits according to E2E_REQUIRE_LIVE. NEVER
+# silently passes — it either hard-fails (require-live) or exits 0 with a
+# loud banner (ad-hoc local). Mirrors the require-live gate pattern used by
+# test_priority_runtimes_e2e.sh.
+loud_skip() {
+  local reason="$1"
+  echo
+  echo "============================================================"
+  if [ "$REQUIRE_LIVE" = "1" ]; then
+    echo "E2E_REQUIRE_LIVE=1 but channels e2e seam is unavailable:"
+    echo "  $reason"
+    echo "This is a HARD FAILURE — the platform was not started with the"
+    echo "channels test seam env (MOLECULE_CHANNELS_TEST_WEBHOOK_BASE /"
+    echo "MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE) on the fixed loopback"
+    echo "ports, or the seam regressed. Fix the workflow wiring or the seam."
+    echo "============================================================"
+    cleanup
+    exit 1
+  fi
+  echo "SKIP (loud): $reason"
+  echo "Set MOLECULE_CHANNELS_TEST_WEBHOOK_BASE=$WEBHOOK_BASE and"
+  echo "MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE=$TELEGRAM_BASE in the"
+  echo "PLATFORM env before starting it, then re-run. (CI sets these.)"
+  echo "============================================================"
+  cleanup
+  exit 0
+}
+
+cleanup() {
+  set +e
+  if [ -n "$MOCK_PID" ]; then
+    kill "$MOCK_PID" 2>/dev/null
+    wait "$MOCK_PID" 2>/dev/null
+  fi
+  # Hard-purge any workspaces we created so repeat runs are deterministic.
+  for pair in "$WS_TARGET|$WS_TARGET_TOK|e2e-chan-target" \
+              "$WS_SIBLING|$WS_SIBLING_TOK|e2e-chan-sibling"; do
+    local wid tok name
+    wid="${pair%%|*}"; pair="${pair#*|}"
+    tok="${pair%%|*}"; name="${pair#*|}"
+    [ -z "$wid" ] && continue
+    local auth=("${ADMIN_AUTH[@]}")
+    [ -n "$tok" ] && auth=(-H "Authorization: Bearer $tok")
+    curl -s -X DELETE "$BASE/workspaces/$wid?confirm=true&purge=true" \
+      -H "X-Confirm-Name: $name" "${auth[@]}" >/dev/null 2>&1
+  done
+  rm -rf "$WORK_DIR" 2>/dev/null
+}
+trap cleanup EXIT INT TERM
+
+# ── mock upstream ───────────────────────────────────────────────────────
+# One Python process serves BOTH mocks (different ports). It records the
+# Slack webhook request body to $WORK_DIR/slack_body.json and answers the
+# Telegram getMe/getUpdates calls with a deterministic bot+chat fixture.
+start_mock() {
+  cat > "$WORK_DIR/mock.py" <<'PY'
+import json
+import os
+import sys
+import threading
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+
+WORK_DIR = os.environ["MOCK_WORK_DIR"]
+WEBHOOK_PORT = int(os.environ["MOCK_WEBHOOK_PORT"])
+TELEGRAM_PORT = int(os.environ["MOCK_TELEGRAM_PORT"])
+
+BOT_USERNAME = "e2e_mock_bot"
+CHAT_ID = -1009876543210
+CHAT_NAME = "E2E Mock Group"
+
+
+class SlackHandler(BaseHTTPRequestHandler):
+    def log_message(self, *a):  # silence
+        pass
+
+    def do_POST(self):
+        n = int(self.headers.get("Content-Length", "0") or "0")
+        body = self.rfile.read(n)
+        # Persist EXACTLY what the live Slack send path POSTed so the bash
+        # side can assert the serialized payload.
+        with open(os.path.join(WORK_DIR, "slack_body.json"), "wb") as f:
+            f.write(body)
+        with open(os.path.join(WORK_DIR, "slack_meta.json"), "w") as f:
+            json.dump({"path": self.path,
+                       "content_type": self.headers.get("Content-Type", "")}, f)
+        # Real Slack Incoming Webhooks reply 200 "ok".
+        self.send_response(200)
+        self.end_headers()
+        self.wfile.write(b"ok")
+
+
+class TelegramHandler(BaseHTTPRequestHandler):
+    def log_message(self, *a):
+        pass
+
+    def _send(self, obj):
+        payload = json.dumps(obj).encode()
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(payload)))
+        self.end_headers()
+        self.wfile.write(payload)
+
+    def _route(self):
+        # tgbotapi calls <base>/bot<token>/<method>
+        method = self.path.rsplit("/", 1)[-1]
+        if method == "getMe":
+            return self._send({"ok": True, "result": {
+                "id": 4242, "is_bot": True, "first_name": "E2E Mock",
+                "username": BOT_USERNAME, "can_read_all_group_messages": True}})
+        if method == "setMyCommands":
+            return self._send({"ok": True, "result": True})
+        if method == "deleteWebhook":
+            return self._send({"ok": True, "result": True})
+        if method == "getUpdates":
+            # One my_chat_member update so the bot "discovers" a group.
+            return self._send({"ok": True, "result": [{
+                "update_id": 1,
+                "my_chat_member": {
+                    "chat": {"id": CHAT_ID, "title": CHAT_NAME, "type": "supergroup"},
+                    "from": {"id": 1, "is_bot": False, "first_name": "Op"},
+                    "date": 0,
+                    "old_chat_member": {"user": {"id": 4242, "is_bot": True,
+                                                 "first_name": "E2E Mock"},
+                                        "status": "left"},
+                    "new_chat_member": {"user": {"id": 4242, "is_bot": True,
+                                                 "first_name": "E2E Mock"},
+                                        "status": "member"},
+                }}]})
+        # Default OK for any other bot method tgbotapi may probe.
+        return self._send({"ok": True, "result": True})
+
+    def do_POST(self):
+        n = int(self.headers.get("Content-Length", "0") or "0")
+        if n:
+            self.rfile.read(n)
+        self._route()
+
+    def do_GET(self):
+        self._route()
+
+
+def serve(port, handler):
+    ThreadingHTTPServer(("127.0.0.1", port), handler).serve_forever()
+
+
+t = threading.Thread(target=serve, args=(TELEGRAM_PORT, TelegramHandler), daemon=True)
+t.start()
+serve(WEBHOOK_PORT, SlackHandler)
+PY
+  MOCK_WORK_DIR="$WORK_DIR" MOCK_WEBHOOK_PORT="$WEBHOOK_PORT" \
+    MOCK_TELEGRAM_PORT="$TELEGRAM_PORT" \
+    python3 "$WORK_DIR/mock.py" &
+  MOCK_PID=$!
+  # Wait for both ports to accept connections (fail loudly if they never do).
+  local up=0
+  for _ in $(seq 1 50); do
+    if curl -s -o /dev/null "http://127.0.0.1:${WEBHOOK_PORT}/" \
+       && curl -s -o /dev/null "http://127.0.0.1:${TELEGRAM_PORT}/botX/getMe"; then
+      up=1; break
+    fi
+    sleep 0.1
+  done
+  if [ "$up" != "1" ]; then
+    echo "FATAL: mock upstream did not come up on ports $WEBHOOK_PORT/$TELEGRAM_PORT" >&2
+    cleanup
+    exit 2
+  fi
+}
+
+json_field() { python3 -c "import sys,json; print(json.load(sys.stdin).get('$1',''))"; }
+
+create_external_ws() {
+  local name="$1" resp wid
+  resp=$(curl -s -X POST "$BASE/workspaces" "${ADMIN_AUTH[@]}" \
+    -H "Content-Type: application/json" \
+    -d "{\"name\":\"$name\",\"runtime\":\"external\",\"external\":true,\"tier\":1}")
+  wid=$(printf '%s' "$resp" | json_field id)
+  if [ -z "$wid" ]; then
+    echo "FATAL: could not create workspace $name: $resp" >&2
+    cleanup
+    exit 1
+  fi
+  local tok
+  tok=$(printf '%s' "$resp" | e2e_extract_token)
+  [ -z "$tok" ] && tok=$(e2e_mint_workspace_token "$wid" 2>/dev/null || true)
+  printf '%s\t%s\n' "$wid" "$tok"
+}
+
+# ════════════════════════════════════════════════════════════════════════
+echo "=== Channels + data-prune E2E (core#2332 P1.10) ==="
+echo "BASE=$BASE  webhook_mock=$WEBHOOK_BASE  telegram_mock=$TELEGRAM_BASE"
+
+if ! curl -sf "$BASE/health" >/dev/null 2>&1; then
+  echo "FATAL: platform not reachable at $BASE/health" >&2
+  exit 2
+fi
+
+start_mock
+
+# ── workspaces ──────────────────────────────────────────────────────────
+IFS=$'\t' read -r WS_TARGET WS_TARGET_TOK < <(create_external_ws "e2e-chan-target-$$")
+IFS=$'\t' read -r WS_SIBLING WS_SIBLING_TOK < <(create_external_ws "e2e-chan-sibling-$$")
+echo "target=$WS_TARGET sibling=$WS_SIBLING"
+
+WS_AUTH=("${ADMIN_AUTH[@]}")
+[ -n "$WS_TARGET_TOK" ] && WS_AUTH=(-H "Authorization: Bearer $WS_TARGET_TOK")
+SIB_AUTH=("${ADMIN_AUTH[@]}")
+[ -n "$WS_SIBLING_TOK" ] && SIB_AUTH=(-H "Authorization: Bearer $WS_SIBLING_TOK")
+
+# ── (1) SEND end-to-end via a Slack Incoming-Webhook channel ────────────
+echo
+echo "--- (1) channel SEND → mock upstream receives serialized payload ---"
+
+# Create a slack channel whose webhook_url points at our mock. If the
+# platform wasn't started with the webhook test-base, ValidateConfig
+# rejects this URL → loud_skip / RED. chat_id is required by SendOutbound.
+SLACK_CFG=$(python3 -c "import json,sys; print(json.dumps({
+  'webhook_url': sys.argv[1] + 'services/T000/B000/e2e',
+  'chat_id': 'mock-chat'}))" "$WEBHOOK_BASE")
+CREATE=$(curl -s -X POST "$BASE/workspaces/$WS_TARGET/channels" "${WS_AUTH[@]}" \
+  -H "Content-Type: application/json" \
+  -d "{\"channel_type\":\"slack\",\"config\":$SLACK_CFG,\"enabled\":true}")
+CH_ID=$(printf '%s' "$CREATE" | json_field id)
+if [ -z "$CH_ID" ]; then
+  case "$CREATE" in
+    *"invalid channel config"*)
+      loud_skip "platform rejected mock webhook_url (MOLECULE_CHANNELS_TEST_WEBHOOK_BASE not set on platform): $CREATE" ;;
+    *)
+      fail "create slack channel" "$CREATE" ;;
+  esac
+else
+  pass "create slack channel pointed at mock upstream (id=$CH_ID)"
+
+  SEND_TEXT="hello from e2e $$"
+  # Send route: wsAuth.POST /workspaces/:id/channels/:channelId/send (the
+  # handler keys off :channelId; :id scopes the workspace bearer).
+  SEND=$(curl -s -w $'\n%{http_code}' -X POST \
+    "$BASE/workspaces/$WS_TARGET/channels/$CH_ID/send" "${WS_AUTH[@]}" \
+    -H "Content-Type: application/json" \
+    -d "{\"text\":\"$SEND_TEXT\"}")
+  SEND_CODE=$(printf '%s' "$SEND" | tail -n1)
+  if [ "$SEND_CODE" = "200" ]; then
+    pass "POST /channels/:id/send returned 200"
+  else
+    fail "POST /channels/:id/send" "code=$SEND_CODE body=$(printf '%s' "$SEND" | sed '$d')"
+  fi
+
+  # Give the async-free SendOutbound a beat to land at the mock.
+  RECEIVED=""
+  for _ in $(seq 1 30); do
+    if [ -s "$WORK_DIR/slack_body.json" ]; then RECEIVED=1; break; fi
+    sleep 0.1
+  done
+  if [ -n "$RECEIVED" ]; then
+    pass "mock upstream RECEIVED an outbound POST"
+    GOT_TEXT=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('text',''))" \
+      "$WORK_DIR/slack_body.json" 2>/dev/null || true)
+    if [ "$GOT_TEXT" = "$SEND_TEXT" ]; then
+      pass "mock received correctly-serialized {\"text\":...} payload (text matches end-to-end)"
+    else
+      fail "serialized payload mismatch" "want=[$SEND_TEXT] got=[$GOT_TEXT] raw=$(cat "$WORK_DIR/slack_body.json")"
+    fi
+  else
+    fail "mock upstream never received the outbound POST" "send path did not serialize+POST to the configured endpoint"
+  fi
+fi
+
+# ── (2) DISCOVER via the Telegram mock Bot API ──────────────────────────
+echo
+echo "--- (2) POST /channels/discover (telegram) → mock Bot API ---"
+# A token matching the telegramTokenRegex (\d+:[A-Za-z0-9_-]{30,}).
+DISC_TOKEN="424242:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+DISC=$(curl -s -w $'\n%{http_code}' -X POST "$BASE/channels/discover" \
+  "${ADMIN_AUTH[@]}" -H "Content-Type: application/json" \
+  -d "{\"channel_type\":\"telegram\",\"bot_token\":\"$DISC_TOKEN\",\"workspace_id\":\"$WS_TARGET\"}")
+DISC_CODE=$(printf '%s' "$DISC" | tail -n1)
+DISC_BODY=$(printf '%s' "$DISC" | sed '$d')
+if [ "$DISC_CODE" = "200" ]; then
+  pass "POST /channels/discover returned 200"
+  if printf '%s' "$DISC_BODY" | grep -qF '"bot_username":"e2e_mock_bot"'; then
+    pass "discover round-tripped the mock bot username"
+  else
+    fail "discover bot_username" "$DISC_BODY"
+  fi
+  if printf '%s' "$DISC_BODY" | grep -qF '"chat_id":"-1009876543210"'; then
+    pass "discover round-tripped the mock chat id"
+  else
+    fail "discover chat list" "$DISC_BODY"
+  fi
+else
+  case "$DISC_BODY" in
+    *"Cannot reach Telegram"*|*"Invalid bot token"*|*"Failed to connect"*)
+      # Platform reached the REAL api.telegram.org (seam not set) → can't prove.
+      loud_skip "discover hit real Telegram, not the mock (MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE not set on platform): code=$DISC_CODE $DISC_BODY" ;;
+    *)
+      fail "POST /channels/discover" "code=$DISC_CODE body=$DISC_BODY" ;;
+  esac
+fi
+
+# ── (3) Data-prune (RFC #734): purge removes prunable data, sibling survives
+echo
+echo "--- (3) data-prune: purge target's child data, sibling survives ---"
+
+# Seed prunable child data on BOTH workspaces: a channel (already on target)
+# + a secret on each. We assert via GET /channels which lists workspace_channels.
+seed_secret() {
+  local wid="$1"; shift
+  curl -s -o /dev/null -X POST "$BASE/workspaces/$wid/secrets" "$@" \
+    -H "Content-Type: application/json" \
+    -d '{"key":"E2E_PRUNE_PROBE","value":"v"}'
+}
+seed_secret "$WS_TARGET" "${WS_AUTH[@]}"
+# Sibling gets its OWN channel so we can prove its rows survive the target purge.
+SIB_SLACK_CFG=$(python3 -c "import json,sys; print(json.dumps({
+  'webhook_url': sys.argv[1] + 'services/T111/B111/sib',
+  'chat_id': 'sib-chat'}))" "$WEBHOOK_BASE")
+SIB_CH=$(curl -s -X POST "$BASE/workspaces/$WS_SIBLING/channels" "${SIB_AUTH[@]}" \
+  -H "Content-Type: application/json" \
+  -d "{\"channel_type\":\"slack\",\"config\":$SIB_SLACK_CFG,\"enabled\":true}")
+SIB_CH_ID=$(printf '%s' "$SIB_CH" | json_field id)
+
+# Pre-purge: confirm both workspaces have >=1 channel row.
+TGT_CH_PRE=$(curl -s "$BASE/workspaces/$WS_TARGET/channels" "${WS_AUTH[@]}")
+SIB_CH_PRE=$(curl -s "$BASE/workspaces/$WS_SIBLING/channels" "${SIB_AUTH[@]}")
+TGT_PRE_N=$(printf '%s' "$TGT_CH_PRE" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo 0)
+SIB_PRE_N=$(printf '%s' "$SIB_CH_PRE" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo 0)
+if [ "${TGT_PRE_N:-0}" -ge 1 ] && [ "${SIB_PRE_N:-0}" -ge 1 ]; then
+  pass "pre-purge: target ($TGT_PRE_N) and sibling ($SIB_PRE_N) both have channel data"
+else
+  fail "pre-purge seed" "target=$TGT_PRE_N sibling=$SIB_PRE_N (need >=1 each)"
+fi
+
+# Permanent delete WITH purge — the RFC #734 prune of durable child data.
+# DELETE /workspaces/:id is AdminAuth-gated (router.go:167); Tier-2b rejects a
+# workspace bearer when ADMIN_TOKEN is set, so this MUST use the admin bearer.
+# X-Confirm-Name must equal the workspace name (the destructive-delete guard).
+PURGE_AUTH=("${ADMIN_AUTH[@]}")
+[ ${#PURGE_AUTH[@]} -eq 0 ] && [ -n "$WS_TARGET_TOK" ] && PURGE_AUTH=(-H "Authorization: Bearer $WS_TARGET_TOK")
+PURGE=$(curl -s -w $'\n%{http_code}' -X DELETE \
+  "$BASE/workspaces/$WS_TARGET?confirm=true&purge=true" \
+  -H "X-Confirm-Name: e2e-chan-target-$$" "${PURGE_AUTH[@]}")
+PURGE_CODE=$(printf '%s' "$PURGE" | tail -n1)
+PURGE_BODY=$(printf '%s' "$PURGE" | sed '$d')
+if [ "$PURGE_CODE" = "200" ] && printf '%s' "$PURGE_BODY" | grep -qF '"status":"purged"'; then
+  pass "DELETE ?purge=true returned purged"
+else
+  fail "DELETE ?purge=true" "code=$PURGE_CODE body=$PURGE_BODY"
+fi
+# Target was purged → its token is revoked; query its channels with admin
+# bearer. The purge hard-deletes workspace_channels rows for the target.
+TGT_CH_POST=$(curl -s "$BASE/workspaces/$WS_TARGET/channels" "${ADMIN_AUTH[@]}")
+TGT_POST_N=$(printf '%s' "$TGT_CH_POST" | python3 -c "import sys,json
+try:
+  d=json.load(sys.stdin); print(len(d) if isinstance(d,list) else -1)
+except Exception:
+  print(-1)" 2>/dev/null || echo -1)
+if [ "${TGT_POST_N:-1}" = "0" ]; then
+  pass "post-purge: target's prunable channel data is GONE (0 rows)"
+else
+  fail "prune did not remove target channel data" "post-purge target rows=$TGT_POST_N body=$(printf '%s' "$TGT_CH_POST" | head -c 200)"
+fi
+WS_TARGET=""  # purged; don't re-delete in cleanup
+
+# Sibling (NON-prunable relative to the target purge) must be untouched.
+SIB_CH_POST=$(curl -s "$BASE/workspaces/$WS_SIBLING/channels" "${SIB_AUTH[@]}")
+SIB_POST_N=$(printf '%s' "$SIB_CH_POST" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo -1)
+if [ "${SIB_POST_N:-0}" -ge 1 ] && printf '%s' "$SIB_CH_POST" | grep -qF "$SIB_CH_ID"; then
+  pass "post-purge: sibling's non-prunable data SURVIVED ($SIB_POST_N rows, channel $SIB_CH_ID intact)"
+else
+  fail "purge over-reached: sibling data did not survive" "sibling rows=$SIB_POST_N body=$(printf '%s' "$SIB_CH_POST" | head -c 200)"
+fi
+
+# ── verdict ─────────────────────────────────────────────────────────────
+echo
+echo "=== channels+prune e2e: $PASS passed, $FAIL failed ==="
+if [ "$FAIL" -ne 0 ]; then
+  exit 1
+fi
+# Guard against a vacuous green: every section must have produced asserts.
+if [ "$PASS" -lt 9 ]; then
+  echo "FATAL: only $PASS assertions ran — expected >=9 (send + discover + prune). Refusing to report green." >&2
+  exit 1
+fi
+echo "ALL CHANNELS + PRUNE E2E CHECKS PASSED"
@@ -1,24 +1,30 @@
 #!/usr/bin/env bash
-# E2E regression suite for the local-dev escape hatches added in
-# fix/quickstart-bugless. These cover the exact user-facing breakages
-# that dropped out of the partial squash-merge of PR #1871:
+# E2E regression suite asserting that "dev mode" is fail-CLOSED.
 #
-#   1. GET /workspaces returns 200 with no bearer after tokens exist in
-#      the DB — exercises the AdminAuth Tier-1b dev-mode hatch
-#      (middleware/devmode.go::isDevModeFailOpen).
-#   2. GET /workspaces/:id/activity returns 200 with no bearer — the
-#      same hatch applied to WorkspaceAuth.
-#   3. POST /workspaces/:id/a2a doesn't 502-SSRF on a loopback workspace
-#      URL — exercises handlers/ssrf.go::devModeAllowsLoopback.
-#   4. GET /org/templates returns the curated set populated by
-#      clone-manifest.sh — exercises infra/scripts/setup.sh + the
-#      ListTemplates failure logging in handlers/org.go.
+# History: this file used to assert the local-dev fail-open escape hatches
+# (GET /workspaces 200 with NO bearer, /workspaces/:id/activity 200 with no
+# bearer) added in fix/quickstart-bugless. Under the CTO "nothing should be
+# fail-open" directive (harden/no-fail-open-auth) those hatches were REMOVED:
+# auth is fail-CLOSED in EVERY environment, local dev included. This suite now
+# pins the inverse contract — bearer-less admin/workspace requests 401, and the
+# SAME requests with the dev ADMIN_TOKEN bearer succeed.
 #
-# Requires: platform running on :8080 with MOLECULE_ENV=development and
-#           ADMIN_TOKEN unset. Matches the README quickstart env.
+# What it verifies:
+#   1. GET /workspaces 401s with NO bearer once tokens exist (was: 200 via the
+#      removed AdminAuth Tier-1b dev-mode hatch); 200 WITH the admin bearer.
+#   2. GET /workspaces/:id/activity (and /delegations, /approvals/pending) 401
+#      with no bearer (was: 200 via the WorkspaceAuth hatch); 200 WITH bearer.
+#   3. GET /org/templates returns the curated set populated by clone-manifest.sh
+#      (unauth-readable bootstrap surface — unchanged).
+#
+# Requires: platform running on :8080 with MOLECULE_ENV=development AND
+#           ADMIN_TOKEN set (the dev value), with MOLECULE_ADMIN_TOKEN (or
+#           ADMIN_TOKEN) exported here so the suite can present the bearer.
+#           scripts/dev-start.sh provisions ADMIN_TOKEN locally; the e2e-api CI
+#           job sets it on the platform and exports the matching bearer.
 #
 # Usage:
-#   bash tests/e2e/test_dev_mode.sh
+#   MOLECULE_ADMIN_TOKEN=dev-local-admin-token bash tests/e2e/test_dev_mode.sh
 set -euo pipefail

 # shellcheck source=_lib.sh
@@ -46,35 +52,44 @@ check_http() {
  fi
 }

-echo "=== Dev-mode escape-hatch regression tests ==="
+echo "=== Dev-mode fail-CLOSED regression tests ==="
 echo ""

-# Pre-test: ensure MOLECULE_ENV=development and no ADMIN_TOKEN are in the
-# platform's env. The request path doesn't let us read the platform's
-# env directly, but we can verify the hatch is active by confirming the
-# expected behaviour under the conditions the test otherwise sets up.
+# The platform is fail-closed in every environment now, so the suite MUST have
+# the admin bearer to drive the authenticated (200) assertions. Without it we
+# cannot create / clean up workspaces — bail loudly rather than silently skip.
+ADMIN_BEARER="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
+if [ -z "$ADMIN_BEARER" ]; then
+  echo "FAIL: MOLECULE_ADMIN_TOKEN/ADMIN_TOKEN not set — auth is fail-closed in"
+  echo "      every environment, so this suite needs the dev ADMIN_TOKEN bearer."
+  echo "      e.g. MOLECULE_ADMIN_TOKEN=dev-local-admin-token bash $0"
+  exit 1
+fi
+ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")

 e2e_cleanup_all_workspaces

 # ----------------------------------------------------------------------
-# Section 1 — AdminAuth dev-mode hatch
+# Section 1 — AdminAuth is fail-CLOSED (dev-mode hatch removed)
 # ----------------------------------------------------------------------
-# Before fix: once any workspace had tokens in the DB, GET /workspaces
-# closed to unauthenticated callers and the Canvas broke. The hatch
-# keeps it open specifically in dev mode.
-
-echo "--- Section 1: AdminAuth dev-mode hatch ---"
+echo "--- Section 1: AdminAuth fail-closed ---"

+# No bearer → 401 in dev mode (the removed hatch used to return 200).
 R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces")
-check_http "GET /workspaces (empty DB)" "200" "$R"
+check_http "GET /workspaces (no bearer) is fail-CLOSED" "401" "$R"

-# Create a workspace so tokens land in the DB.
+# With the dev admin bearer → 200.
+R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces" "${ADMIN_AUTH[@]}")
+check_http "GET /workspaces (with admin bearer)" "200" "$R"
+
+# Create a workspace (authenticated) so tokens land in the DB.
 R=$(curl -s -w "\n%{http_code}" -X POST "$BASE/workspaces" \
+  "${ADMIN_AUTH[@]}" \
  -H "Content-Type: application/json" \
  -d '{"name":"Dev-Mode-Test","tier":1,"runtime":"external","external":true}')
 CODE=$(echo "$R" | tail -n1)
 BODY=$(echo "$R" | sed '$d')
-check_http "POST /workspaces (create)" "201" "$CODE"
+check_http "POST /workspaces (create, with admin bearer)" "201" "$CODE"

 WS_ID=$(echo "$BODY" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || true)
 if [ -z "$WS_ID" ]; then
@@ -83,43 +98,55 @@ if [ -z "$WS_ID" ]; then
  exit 1
 fi

-# Ensure a real workspace token exists so AdminAuth now sees a live token. On
-# pre-fix builds the next /workspaces call would 401 — on post-fix it
-# must stay 200 because MOLECULE_ENV=development + ADMIN_TOKEN unset.
+# Ensure a real workspace token exists so AdminAuth sees a live token globally.
 TOKEN=$(echo "$BODY" | e2e_extract_token)
 if [ -z "$TOKEN" ]; then
  e2e_mint_workspace_token "$WS_ID" >/dev/null
 fi

+# With tokens now in the DB, the bearer-less call STILL 401s (no lazy-bootstrap
+# / dev-mode fall-through), and the authenticated call still 200s.
 R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces")
-check_http "GET /workspaces (after token minted, no bearer)" "200" "$R"
+check_http "GET /workspaces (after token minted, no bearer) is fail-CLOSED" "401" "$R"
+
+R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces" "${ADMIN_AUTH[@]}")
+check_http "GET /workspaces (after token minted, with admin bearer)" "200" "$R"

 # ----------------------------------------------------------------------
-# Section 2 — WorkspaceAuth dev-mode hatch
+# Section 2 — WorkspaceAuth is fail-CLOSED (dev-mode hatch removed)
 # ----------------------------------------------------------------------
-# Before fix: /workspaces/:id/activity 401'd once tokens existed —
-# the Canvas side panel's chat history load broke.
-
 echo ""
-echo "--- Section 2: WorkspaceAuth dev-mode hatch ---"
+echo "--- Section 2: WorkspaceAuth fail-closed ---"

+# No bearer → 401 (the removed hatch used to return 200).
 R=$(curl -s -o /dev/null -w "%{http_code}" \
  "$BASE/workspaces/$WS_ID/activity?type=a2a_receive&limit=50")
-check_http "GET /workspaces/:id/activity (no bearer)" "200" "$R"
+check_http "GET /workspaces/:id/activity (no bearer) is fail-CLOSED" "401" "$R"

 R=$(curl -s -o /dev/null -w "%{http_code}" \
  "$BASE/workspaces/$WS_ID/delegations")
-check_http "GET /workspaces/:id/delegations (no bearer)" "200" "$R"
+check_http "GET /workspaces/:id/delegations (no bearer) is fail-CLOSED" "401" "$R"

 R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/approvals/pending")
-check_http "GET /approvals/pending (no bearer)" "200" "$R"
+check_http "GET /approvals/pending (no bearer) is fail-CLOSED" "401" "$R"
+
+# Same requests WITH the admin bearer → 200.
+R=$(curl -s -o /dev/null -w "%{http_code}" \
+  "$BASE/workspaces/$WS_ID/activity?type=a2a_receive&limit=50" "${ADMIN_AUTH[@]}")
+check_http "GET /workspaces/:id/activity (with admin bearer)" "200" "$R"
+
+R=$(curl -s -o /dev/null -w "%{http_code}" \
+  "$BASE/workspaces/$WS_ID/delegations" "${ADMIN_AUTH[@]}")
+check_http "GET /workspaces/:id/delegations (with admin bearer)" "200" "$R"
+
+R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/approvals/pending" "${ADMIN_AUTH[@]}")
+check_http "GET /approvals/pending (with admin bearer)" "200" "$R"

 # ----------------------------------------------------------------------
 # Section 3 — Template registry populated by setup.sh
 # ----------------------------------------------------------------------
-# Before fix: setup.sh didn't run clone-manifest.sh so the template
-# palette was empty and the molecule-dev in-tree copy was broken.
-
+# GET /org/templates is an unauthenticated bootstrap surface (the template
+# palette must render before the user has a credential) — unchanged.
 echo ""
 echo "--- Section 3: Template registry ---"

@@ -0,0 +1,332 @@
+#!/usr/bin/env bash
+set -uo pipefail
+#
+# test_keyless_feature_contracts_e2e.sh — REQUIRED-lane (E2E API Smoke Test)
+# keyless HTTP-contract coverage for feature endpoints that ship WITHOUT an
+# LLM key and had NO e2e assertion before (coverage-audit gap list).
+#
+# Why a NEW script (not added to test_api.sh): PR #2286 is concurrently
+# rewriting test_api.sh's auth helpers + _lib.sh (e2e_admin_auth_args) and the
+# test_priority_runtimes mock arm. Keeping these assertions in a standalone
+# file avoids a merge conflict with that in-flight PR and keeps the new feature
+# coverage independently reviewable. The mock-runtime A2A canned round-trip is
+# OWNED by #2286's `mock` arm (run_mock) — intentionally NOT duplicated here.
+#
+# Every endpoint below is exercised against a runtime=external workspace so NO
+# LLM key is needed. For each we assert the real HTTP contract: the happy path
+# AND a meaningful failure mode (401 without auth, 400 on bad input, or the
+# documented fail-closed status) so the test catches REAL regressions, not
+# just 200s.
+#
+# Auth model (matches workspace-server/internal/middleware/wsauth_middleware.go):
+#   * WorkspaceAuth (/workspaces/:id/*) is STRICT once a token exists — a
+#     bearer-less request 401s (devmode fail-open needs MOLECULE_ENV=dev AND
+#     ADMIN_TOKEN unset, neither of which the e2e-api job sets).
+#   * AdminAuth routes accept the platform ADMIN_TOKEN (post-#2286) OR, when no
+#     ADMIN_TOKEN is configured, any valid workspace bearer (Tier-3 fallback) —
+#     so the workspace token we mint authenticates admin routes in BOTH the
+#     pre-#2286 (no ADMIN_TOKEN) and post-#2286 (ADMIN_TOKEN set) CI shapes.
+#
+# Local-run shape (mirrors the e2e-api job — real PG+Redis+platform):
+#   DATABASE_URL=... REDIS_URL=... ADMIN_TOKEN=... ./platform-server &
+#   BASE=http://127.0.0.1:$PORT bash tests/e2e/test_keyless_feature_contracts_e2e.sh
+
+source "$(dirname "$0")/_lib.sh"  # sets BASE default
+
+PASS=0
+FAIL=0
+
+pass() { echo "PASS: $1"; PASS=$((PASS + 1)); }
+fail() { echo "FAIL: $1"; echo "  $2"; FAIL=$((FAIL + 1)); }
+
+# assert_contains DESC EXPECTED_SUBSTRING ACTUAL
+assert_contains() {
+  if printf '%s' "$3" | grep -qF "$2"; then
+    pass "$1"
+  else
+    fail "$1" "expected to contain [$2] — got: $3"
+  fi
+}
+
+# http_code METHOD URL [curl-args...] → prints the HTTP status code only.
+http_code() {
+  local method="$1" url="$2"; shift 2
+  curl -s -o /dev/null -w "%{http_code}" -X "$method" "$url" "$@"
+}
+
+# body_and_code METHOD URL [curl-args...] → prints "<body>\n<code>".
+body_and_code() {
+  local method="$1" url="$2"; shift 2
+  curl -s -w $'\n%{http_code}' -X "$method" "$url" "$@"
+}
+
+echo "=== Keyless feature HTTP-contract E2E (required lane) ==="
+echo ""
+
+# Platform admin bearer when the job set one (#2286 shape). When ADMIN_TOKEN is
+# configured, AdminAuth's Tier-1 fail-open is OFF even before the first token
+# exists, so admin-gated create / list / delete must carry it from the start.
+# Pre-#2286 (no ADMIN_TOKEN) this is empty → fail-open create works bare.
+ENV_ADMIN="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
+ENV_ADMIN_AUTH=()
+[ -n "$ENV_ADMIN" ] && ENV_ADMIN_AUTH=(-H "Authorization: Bearer $ENV_ADMIN")
+
+# Reproducible counts across reruns. e2e_cleanup_all_workspaces hits the
+# admin-gated list/delete; the platform admin bearer (if set) goes via the
+# MOLECULE_ADMIN_TOKEN/ADMIN_TOKEN env the helper already reads.
+e2e_cleanup_all_workspaces
+
+# ---------------------------------------------------------------------------
+# Fixture: one external workspace, registered → online. Keyless (external=true
+# means no container is provisioned and no LLM key is consulted).
+# ---------------------------------------------------------------------------
+R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
+  ${ENV_ADMIN_AUTH[@]+"${ENV_ADMIN_AUTH[@]}"} \
+  -d '{"name":"Keyless Fixture","tier":1,"runtime":"external","external":true}')
+WS_ID=$(printf '%s' "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
+if [ -z "$WS_ID" ]; then
+  echo "FATAL: could not create fixture workspace — got: $R" >&2
+  exit 2
+fi
+assert_contains "POST /workspaces (external fixture created)" '"status":"awaiting_agent"' "$R"
+
+# Workspace token: register returns one; else mint via the admin endpoint.
+WS_TOKEN=$(printf '%s' "$R" | e2e_extract_token)
+if [ -z "$WS_TOKEN" ]; then
+  WS_TOKEN=$(e2e_mint_workspace_token "$WS_ID" 2>/dev/null || echo "")
+fi
+if [ -z "$WS_TOKEN" ]; then
+  echo "FATAL: could not obtain workspace token for $WS_ID" >&2
+  exit 2
+fi
+AUTH=(-H "Authorization: Bearer $WS_TOKEN")
+
+# Admin bearer: explicit platform ADMIN_TOKEN if the job set one (#2286 shape),
+# else the workspace token (AdminAuth Tier-3 accepts it pre-#2286).
+ADMIN_BEARER="${ENV_ADMIN:-$WS_TOKEN}"
+ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")
+
+# Bring the fixture online so lifecycle (hibernate) has a hibernatable state.
+curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" "${AUTH[@]}" \
+  -d "{\"id\":\"$WS_ID\",\"url\":\"https://example.com/keyless\",\"agent_card\":{\"name\":\"Keyless Fixture\",\"skills\":[{\"id\":\"noop\",\"name\":\"Noop\"}]}}" >/dev/null
+
+# ===========================================================================
+# 1. Terminal diagnose — GET /workspaces/:id/terminal/diagnose (wsAuth)
+#    External workspace has no instance_id → diagnoseLocal path → 200 with a
+#    deterministic report (ok=false, first_failure on docker/container). The
+#    /terminal endpoint itself is a WebSocket upgrade (not HTTP-assertable
+#    keyless); diagnose is its pure-HTTP sibling and the real contract surface.
+# ===========================================================================
+echo "--- /terminal/diagnose ---"
+BC=$(body_and_code GET "$BASE/workspaces/$WS_ID/terminal/diagnose" "${AUTH[@]}")
+DIAG_CODE=$(printf '%s' "$BC" | tail -n1)
+DIAG_BODY=$(printf '%s' "$BC" | sed '$d')
+assert_contains "GET /terminal/diagnose (200 report)" "200" "$DIAG_CODE"
+assert_contains "GET /terminal/diagnose (carries workspace_id)" "\"workspace_id\":\"$WS_ID\"" "$DIAG_BODY"
+assert_contains "GET /terminal/diagnose (has steps[])" '"steps"' "$DIAG_BODY"
+# Failure mode: no bearer → 401 (WorkspaceAuth strict once a token exists).
+assert_contains "GET /terminal/diagnose (no auth → 401)" "401" \
+  "$(http_code GET "$BASE/workspaces/$WS_ID/terminal/diagnose")"
+
+# ===========================================================================
+# 2. Webhooks (public) — POST /webhooks/:type
+#    Public, no auth. telegram adapter: empty update body → (nil,nil) → 200
+#    ignored; non-JSON → parse error → 400; unknown type → 404.
+# ===========================================================================
+echo "--- /webhooks/:type ---"
+BC=$(body_and_code POST "$BASE/webhooks/telegram" -H "Content-Type: application/json" -d '{}')
+WH_CODE=$(printf '%s' "$BC" | tail -n1)
+WH_BODY=$(printf '%s' "$BC" | sed '$d')
+assert_contains "POST /webhooks/telegram (non-message update → 200)" "200" "$WH_CODE"
+assert_contains "POST /webhooks/telegram (status ignored)" '"status":"ignored"' "$WH_BODY"
+assert_contains "POST /webhooks/telegram (bad JSON → 400)" "400" \
+  "$(http_code POST "$BASE/webhooks/telegram" -H 'Content-Type: application/json' -d 'not-json')"
+assert_contains "POST /webhooks/<unknown> (→ 404)" "404" \
+  "$(http_code POST "$BASE/webhooks/nope-not-a-channel" -H 'Content-Type: application/json' -d '{}')"
+
+# ===========================================================================
+# 3. Budget — GET /workspaces/:id/budget (wsAuth) + PATCH (admin)
+#    GET: fresh workspace → multi-period view, no limits, zero spend.
+#    PATCH: set monthly limit (admin) → reflected; bad input → 400.
+# ===========================================================================
+echo "--- /budget ---"
+BUD=$(curl -s "$BASE/workspaces/$WS_ID/budget" "${AUTH[@]}")
+assert_contains "GET /budget (has periods map)" '"periods"' "$BUD"
+assert_contains "GET /budget (monthly_spend 0 on fresh ws)" '"monthly_spend":0' "$BUD"
+# PATCH is admin-gated (router.go:419). Set a monthly limit and verify echo.
+PB=$(curl -s -X PATCH "$BASE/workspaces/$WS_ID/budget" -H "Content-Type: application/json" "${ADMIN_AUTH[@]}" \
+  -d '{"budget_limits":{"monthly":2000}}')
+assert_contains "PATCH /budget (monthly limit set → echoed)" '"budget_limit":2000' "$PB"
+# Re-read confirms persistence.
+assert_contains "GET /budget (limit persisted)" '"budget_limit":2000' \
+  "$(curl -s "$BASE/workspaces/$WS_ID/budget" "${AUTH[@]}")"
+# Failure: empty body → 400 "budget_limits or budget_limit field is required".
+assert_contains "PATCH /budget (empty body → 400)" "400" \
+  "$(http_code PATCH "$BASE/workspaces/$WS_ID/budget" -H 'Content-Type: application/json' "${ADMIN_AUTH[@]}" -d '{}')"
+# Failure: unknown period → 400.
+assert_contains "PATCH /budget (unknown period → 400)" "400" \
+  "$(http_code PATCH "$BASE/workspaces/$WS_ID/budget" -H 'Content-Type: application/json' "${ADMIN_AUTH[@]}" -d '{"budget_limits":{"yearly":1}}')"
+# Failure: GET without bearer → 401.
+assert_contains "GET /budget (no auth → 401)" "401" "$(http_code GET "$BASE/workspaces/$WS_ID/budget")"
+
+# ===========================================================================
+# 4. Checkpoints — POST/GET/DELETE /workspaces/:id/checkpoints* (wsAuth)
+#    Fully self-contained CRUD over workflow_checkpoints (#788). Upsert → latest
+#    → list-by-wfid → delete → 404. Failure modes: missing workflow_id → 400,
+#    empty latest → 404.
+# ===========================================================================
+echo "--- /checkpoints ---"
+WFID="kl-wf-$$"
+CP=$(curl -s -X POST "$BASE/workspaces/$WS_ID/checkpoints" -H "Content-Type: application/json" "${AUTH[@]}" \
+  -d "{\"workflow_id\":\"$WFID\",\"step_name\":\"step-a\",\"step_index\":1,\"payload\":{\"k\":\"v\"}}")
+assert_contains "POST /checkpoints (upsert → id + workflow_id)" "\"workflow_id\":\"$WFID\"" "$CP"
+assert_contains "GET /checkpoints/latest (200 newest)" "\"workflow_id\":\"$WFID\"" \
+  "$(curl -s "$BASE/workspaces/$WS_ID/checkpoints/latest" "${AUTH[@]}")"
+assert_contains "GET /checkpoints/:wfid (lists the step)" '"step_name":"step-a"' \
+  "$(curl -s "$BASE/workspaces/$WS_ID/checkpoints/$WFID" "${AUTH[@]}")"
+DEL=$(curl -s -X DELETE "$BASE/workspaces/$WS_ID/checkpoints/$WFID" "${AUTH[@]}")
+assert_contains "DELETE /checkpoints/:wfid (deleted count)" '"deleted":1' "$DEL"
+assert_contains "GET /checkpoints/:wfid (after delete → 404)" "404" \
+  "$(http_code GET "$BASE/workspaces/$WS_ID/checkpoints/$WFID" "${AUTH[@]}")"
+# Failure: missing workflow_id → 400 (binding:required).
+assert_contains "POST /checkpoints (missing workflow_id → 400)" "400" \
+  "$(http_code POST "$BASE/workspaces/$WS_ID/checkpoints" -H 'Content-Type: application/json' "${AUTH[@]}" -d '{"step_name":"x"}')"
+# Failure: no bearer → 401.
+assert_contains "POST /checkpoints (no auth → 401)" "401" \
+  "$(http_code POST "$BASE/workspaces/$WS_ID/checkpoints" -H 'Content-Type: application/json' -d '{"workflow_id":"x","step_name":"y"}')"
+
+# ===========================================================================
+# 5. Audit — GET /workspaces/:id/audit (wsAuth)
+#    EU AI Act ledger query (#594). Fresh ws → empty events, total 0,
+#    chain_valid null (AUDIT_LEDGER_SALT unset). Failure: bad RFC3339 from → 400.
+# ===========================================================================
+echo "--- /audit ---"
+AUD=$(curl -s "$BASE/workspaces/$WS_ID/audit" "${AUTH[@]}")
+assert_contains "GET /audit (total 0 on fresh ws)" '"total":0' "$AUD"
+assert_contains "GET /audit (chain_valid null without salt)" '"chain_valid":null' "$AUD"
+assert_contains "GET /audit (bad 'from' → 400)" "400" \
+  "$(http_code GET "$BASE/workspaces/$WS_ID/audit?from=not-a-date" "${AUTH[@]}")"
+assert_contains "GET /audit (no auth → 401)" "401" "$(http_code GET "$BASE/workspaces/$WS_ID/audit")"
+
+# ===========================================================================
+# 6. Traces — GET /workspaces/:id/traces (wsAuth)
+#    Langfuse proxy (#590). No LANGFUSE_* configured → 200 [] (graceful empty),
+#    never a 5xx. Failure: no auth → 401.
+# ===========================================================================
+echo "--- /traces ---"
+BC=$(body_and_code GET "$BASE/workspaces/$WS_ID/traces" "${AUTH[@]}")
+TR_CODE=$(printf '%s' "$BC" | tail -n1)
+TR_BODY=$(printf '%s' "$BC" | sed '$d')
+assert_contains "GET /traces (200 without Langfuse)" "200" "$TR_CODE"
+assert_contains "GET /traces (empty list)" '[]' "$TR_BODY"
+assert_contains "GET /traces (no auth → 401)" "401" "$(http_code GET "$BASE/workspaces/$WS_ID/traces")"
+
+# ===========================================================================
+# 7. Session search — GET /workspaces/:id/session-search (wsAuth)
+#    Searches activity_logs. Seed one activity row, then assert q-filter finds
+#    it and a non-matching q returns []. Failure: no auth → 401.
+# ===========================================================================
+echo "--- /session-search ---"
+curl -s -X POST "$BASE/workspaces/$WS_ID/activity" -H "Content-Type: application/json" "${AUTH[@]}" \
+  -d '{"activity_type":"agent_log","method":"inference","summary":"keyless-needle marker"}' >/dev/null
+assert_contains "GET /session-search?q=keyless-needle (finds row)" 'keyless-needle' \
+  "$(curl -s "$BASE/workspaces/$WS_ID/session-search?q=keyless-needle" "${AUTH[@]}")"
+assert_contains "GET /session-search?q=<no-match> (empty)" '[]' \
+  "$(curl -s "$BASE/workspaces/$WS_ID/session-search?q=zzz-no-such-token-zzz" "${AUTH[@]}")"
+assert_contains "GET /session-search (no auth → 401)" "401" \
+  "$(http_code GET "$BASE/workspaces/$WS_ID/session-search?q=x")"
+
+# ===========================================================================
+# 8. Rescue — GET /workspaces/:id/rescue (wsAuth)
+#    RFC internal#742. Fail-CLOSED contract: the e2e-api job has no
+#    MOLECULE_ORG_ID, so the handler returns 503 platform_misconfigured rather
+#    than leaking cross-org. That fail-closed behaviour IS the keyless contract
+#    we gate here (a regression that drops the org guard would flip this to a
+#    200/404 and turn this assertion RED). Failure mode: no auth → 401.
+# ===========================================================================
+echo "--- /rescue ---"
+BC=$(body_and_code GET "$BASE/workspaces/$WS_ID/rescue" "${AUTH[@]}")
+RES_CODE=$(printf '%s' "$BC" | tail -n1)
+RES_BODY=$(printf '%s' "$BC" | sed '$d')
+if [ "$RES_CODE" = "404" ]; then
+  # MOLECULE_ORG_ID was set in this environment → no-bundle path.
+  assert_contains "GET /rescue (no bundle → 404, org configured)" 'no rescue bundle' "$RES_BODY"
+else
+  # No MOLECULE_ORG_ID (the e2e-api default) → fail-closed 503.
+  assert_contains "GET /rescue (fail-closed 503 without MOLECULE_ORG_ID)" "503" "$RES_CODE"
+  assert_contains "GET /rescue (platform_misconfigured code)" 'platform_misconfigured' "$RES_BODY"
+fi
+assert_contains "GET /rescue (no auth → 401)" "401" "$(http_code GET "$BASE/workspaces/$WS_ID/rescue")"
+
+# ===========================================================================
+# 9. LLM billing-mode admin toggle — GET/PUT /admin/workspaces/:id/llm-billing-mode
+#    (AdminAuth). Flip to byok → read back override; bad UUID → 400; missing
+#    'mode' key → 400; unknown mode → 400.
+# ===========================================================================
+echo "--- /admin/workspaces/:id/llm-billing-mode ---"
+assert_contains "GET llm-billing-mode (resolves a mode)" '"resolved_mode"' \
+  "$(curl -s "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" "${ADMIN_AUTH[@]}")"
+PUTBM=$(curl -s -X PUT "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" -H "Content-Type: application/json" "${ADMIN_AUTH[@]}" \
+  -d '{"mode":"byok"}')
+assert_contains "PUT llm-billing-mode byok (override set)" '"workspace_override":"byok"' "$PUTBM"
+assert_contains "GET llm-billing-mode (byok persisted)" '"workspace_override":"byok"' \
+  "$(curl -s "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" "${ADMIN_AUTH[@]}")"
+# Clear the override (null) so we don't leave fixture state skewed.
+curl -s -X PUT "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" -H "Content-Type: application/json" "${ADMIN_AUTH[@]}" \
+  -d '{"mode":null}' >/dev/null
+# Failure: malformed UUID → 400.
+assert_contains "PUT llm-billing-mode (bad UUID → 400)" "400" \
+  "$(http_code PUT "$BASE/admin/workspaces/not-a-uuid/llm-billing-mode" -H 'Content-Type: application/json' "${ADMIN_AUTH[@]}" -d '{"mode":"byok"}')"
+# Failure: missing 'mode' key → 400.
+assert_contains "PUT llm-billing-mode (missing mode → 400)" "400" \
+  "$(http_code PUT "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" -H 'Content-Type: application/json' "${ADMIN_AUTH[@]}" -d '{}')"
+# Failure: unknown mode string → 400.
+assert_contains "PUT llm-billing-mode (unknown mode → 400)" "400" \
+  "$(http_code PUT "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" -H 'Content-Type: application/json' "${ADMIN_AUTH[@]}" -d '{"mode":"bogus-mode"}')"
+
+# ===========================================================================
+# 10. Lifecycle — Pause → Resume + Hibernate (wsAuth)
+#     Pause works backend-agnostically (StopWorkspaceAuto no-ops on no backend)
+#     → status=paused. Resume re-provisions: 200 provisioning when a provisioner
+#     is wired (the e2e-api host has Docker), or 503 provisioner-not-available
+#     otherwise — both are valid contracts, so accept either. Failure modes:
+#     resume a non-paused ws → 404; hibernate a non-online ws → 404.
+# ===========================================================================
+echo "--- lifecycle (resume / hibernate) ---"
+# Pause the (online) fixture → status paused.
+PA=$(curl -s -X POST "$BASE/workspaces/$WS_ID/pause" "${AUTH[@]}")
+assert_contains "POST /pause (online → paused)" '"status":"paused"' "$PA"
+# Resume the paused fixture — accept 200 provisioning OR 503 (no provisioner).
+BC=$(body_and_code POST "$BASE/workspaces/$WS_ID/resume" "${AUTH[@]}")
+RSM_CODE=$(printf '%s' "$BC" | tail -n1)
+RSM_BODY=$(printf '%s' "$BC" | sed '$d')
+if [ "$RSM_CODE" = "200" ]; then
+  assert_contains "POST /resume (paused → provisioning)" '"status":"provisioning"' "$RSM_BODY"
+elif [ "$RSM_CODE" = "503" ]; then
+  assert_contains "POST /resume (no provisioner → 503 contract)" 'provisioner not available' "$RSM_BODY"
+else
+  fail "POST /resume (expected 200 or 503)" "got HTTP $RSM_CODE — $RSM_BODY"
+fi
+# Failure: resume a workspace that is NOT paused → 404.
+# (After the resume above it is provisioning/online, not paused.)
+assert_contains "POST /resume (not-paused → 404)" "404" \
+  "$(http_code POST "$BASE/workspaces/$WS_ID/resume" "${AUTH[@]}")"
+# Hibernate: bring the fixture back online first, then hibernate it.
+curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" "${AUTH[@]}" \
+  -d "{\"id\":\"$WS_ID\",\"url\":\"https://example.com/keyless\",\"agent_card\":{\"name\":\"Keyless Fixture\",\"skills\":[{\"id\":\"noop\",\"name\":\"Noop\"}]}}" >/dev/null
+HB=$(curl -s -X POST "$BASE/workspaces/$WS_ID/hibernate" "${AUTH[@]}")
+assert_contains "POST /hibernate (online → hibernated)" '"status":"hibernated"' "$HB"
+# Failure: hibernate again (now hibernated, not online/degraded) → 404.
+assert_contains "POST /hibernate (not-hibernatable → 404)" "404" \
+  "$(http_code POST "$BASE/workspaces/$WS_ID/hibernate" "${AUTH[@]}")"
+# Failure: no bearer → 401.
+assert_contains "POST /resume (no auth → 401)" "401" "$(http_code POST "$BASE/workspaces/$WS_ID/resume")"
+
+# ---------------------------------------------------------------------------
+# Cleanup — delete the fixture (admin-gated DELETE + per-workspace bearer).
+# ---------------------------------------------------------------------------
+e2e_delete_workspace "$WS_ID" "Keyless Fixture" "${ADMIN_AUTH[@]}"
+
+echo ""
+echo "=== Results: $PASS passed, $FAIL failed ==="
+[ "$FAIL" -eq 0 ]
@@ -48,14 +48,42 @@ run_test "hermes → slash-form (derive-provider.sh contract)"       hermes
 run_test "codex → slash-form fallback"                             codex       "openai/gpt-4o"
 run_test "claude-code → OAuth/default alias"                      claude-code "sonnet"

+# BARE registered BYOK id (registry_gen.go:88), NOT colon `minimax:…`. On
+# claude-code the colon form is intentionally UNREGISTERED (the adapter can't
+# strip `minimax:`) and 422s create-validation (internal#718, job 295075);
+# bare resolves to provider=minimax BYOK. Pinned by the matrix test's
+# colon-vs-slash-vs-bare triple in derive_provider_matrix_test.go.
 got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug claude-code)
-assert_eq "claude-code + MiniMax key → MiniMax model"             "$got" "minimax:MiniMax-M2.7"
+assert_eq "claude-code + MiniMax key → bare registered MiniMax model" "$got" "MiniMax-M2.7"

 got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug claude-code)
 assert_eq "claude-code + Anthropic API key → Anthropic API model" "$got" "claude-sonnet-4-6"

 got=$(unset E2E_MODEL_SLUG; E2E_MINIMAX_API_KEY="mx-priority" E2E_ANTHROPIC_API_KEY="sk-ant-loser" pick_model_slug claude-code)
-assert_eq "claude-code + both keys → MiniMax priority"            "$got" "minimax:MiniMax-M2.7"
+assert_eq "claude-code + both keys → MiniMax priority (bare)"     "$got" "MiniMax-M2.7"
+
+# ── seo-agent (claude-code-adapter template variant) ──
+# seo-agent shares the claude-code dispatch branch (it reuses the claude-code
+# adapter + the same copied providers block). Pin that it resolves IDENTICALLY
+# to claude-code for every key path so a future refactor can't accidentally
+# fork seo-agent's model selection from claude-code's.
+run_test "seo-agent → claude-code default alias"                  seo-agent   "sonnet"
+
+got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug seo-agent)
+assert_eq "seo-agent + MiniMax key → bare MiniMax model (==claude-code)" "$got" "MiniMax-M2.7"
+
+got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug seo-agent)
+assert_eq "seo-agent + Anthropic key → Anthropic model (==claude-code)" "$got" "claude-sonnet-4-6"
+
+# ── google-adk (Gemini) ──
+# AI-Studio BYOK arm → bare gemini-2.5-pro (providers.yaml runtimes.google-adk
+# `google` arm). The platform/Vertex arm is selected via E2E_LLM_PATH=platform
+# (a platform: id), not this dispatch. Pin the bare form so a drift to the
+# platform id (which would change billing/route) is caught.
+run_test "google-adk → AI-Studio bare gemini id"                  google-adk  "gemini-2.5-pro"
+
+got=$(E2E_MODEL_SLUG="google_genai:gemini-2.5-pro" pick_model_slug google-adk)
+assert_eq "google-adk + E2E_MODEL_SLUG override (adapter spelling)" "$got" "google_genai:gemini-2.5-pro"

 # ── Fallback for unknown runtime ──
 # Picks slash-form (hermes-shaped) since hermes is the historical
@@ -28,6 +28,13 @@ PASS=0
 FAIL=0
 WSID=""

+# GET /workspaces (list) and POST /workspaces (create) are AdminAuth-gated
+# (router.go:165-166). The e2e-api CI job sets ADMIN_TOKEN on the platform
+# (fail-open OFF) and exports MOLECULE_ADMIN_TOKEN here, so these calls need the
+# admin bearer. Guarded if-set so a fail-open dev platform still works.
+ADMIN_AUTH=()
+e2e_admin_auth_args ADMIN_AUTH
+
 cleanup() {
  # Workspace teardown — best-effort, ignore errors so an unrelated CP
  # outage doesn't shadow a real test failure.
@@ -80,7 +87,7 @@ echo "=== Setup ==="
 # canvas. Find and delete any with this exact name so the test is safe to
 # re-run from any state. Match by name (not tag) so this also catches
 # leftovers created by older script versions.
-PRIOR=$(curl -s "$BASE/workspaces" | python3 -c '
+PRIOR=$(curl -s "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} | python3 -c '
 import json, sys
 try:
    print(" ".join(w["id"] for w in json.load(sys.stdin) if w.get("name") == "Notify E2E"))
@@ -96,7 +103,7 @@ done
 # feedback_workspace_model_required_no_platform_default_dynamic_credential_intake).
 # Body has no runtime → defaults to claude-code; pass the matching model
 # that the workspace-creation contract now requires.
-R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
+R=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
  -d '{"name":"Notify E2E","tier":1,"runtime":"external","external":true,"model":"sonnet"}')
 WSID=$(echo "$R" | python3 -c 'import json,sys;print(json.load(sys.stdin)["id"])' 2>/dev/null || true)
 [ -n "$WSID" ] || { echo "Failed to create workspace: $R"; exit 1; }
@@ -300,7 +300,14 @@ rows = json.load(sys.stdin)
 def text_of(r):
    body = r.get('request_body') or {}
    parts = (body.get('params') or {}).get('message', {}).get('parts') or []
-    return ''.join(p.get('text','') for p in parts if p.get('type')=='text')
+    # A2A v0.3 keys the Part discriminator on 'kind'; legacy senders used
+    # 'type'. ProxyA2A.normalizeA2APayload (#2251) rewrites 'type' -> 'kind'
+    # on ingest, so the stored request_body carries 'kind' even when the
+    # caller posted 'type'. Accept EITHER so this parser asserts on the text
+    # payload, not on which discriminator field the server happened to store.
+    def is_text(p):
+        return p.get('kind') == 'text' or p.get('type') == 'text'
+    return ''.join(p.get('text', '') for p in parts if is_text(p))
 if len(rows) < 2:
    print('NEED2_GOT_'+str(len(rows)))
 else:
@@ -309,6 +316,29 @@ else:
 check_eq "since_id feed orders ASC (oldest-new first, newest-new last)" \
  "hello-from-e2e-2|hello-from-e2e-3" "$ASC_FIRST"

+# Wire-contract gate (#2251): the caller posted parts with the LEGACY "type"
+# discriminator, but ProxyA2A.normalizeA2APayload rewrites "type" -> "kind"
+# (A2A v0.3) BEFORE the row is durably logged. Assert the stored request_body
+# carries "kind" and no longer carries "type", so a regression that drops the
+# rename — or a feed that stops storing the normalized body — fails loudly here
+# instead of silently feeding the polling agent an untagged Part. This is the
+# end-to-end half of the Go unit tests in a2a_proxy_test.go (which assert the
+# rename in isolation); this proves it survives the durable activity_logs path.
+DISC=$(echo "$ASC_RESP" | python3 -c "
+import json, sys
+rows = json.load(sys.stdin)
+kinds, types = [], []
+for r in rows:
+    body = r.get('request_body') or {}
+    parts = (body.get('params') or {}).get('message', {}).get('parts') or []
+    for p in parts:
+        if 'kind' in p: kinds.append(p['kind'])
+        if 'type' in p: types.append(p['type'])
+print(('kind' if kinds and not types else 'BAD') + ':' + ','.join(kinds) + '/' + ','.join(types))
+")
+check_eq "stored Part uses v0.3 'kind' discriminator, never legacy 'type' (#2251)" \
+  "kind:text,text/" "$DISC"
+
 # ---------- Phase 6: stale cursor returns 410 ----------
 echo ""
 echo "--- Phase 6: Stale / unknown cursor returns 410 ---"
@@ -7,12 +7,14 @@
 # extraction (and ongoing template work) can't silently break any
 # runtime.
 #
-# Runtimes covered: claude-code, codex, hermes, openclaw.
+# Runtimes covered: claude-code, codex, hermes, openclaw, google-adk.
 # claude-code + hermes have unique
 # provisioning quirks (claude-code OAuth, hermes 15-min cold-boot)
 # and stay first-class with their own run_<runtime> functions; the
-# OpenAI-backed runtimes share run_openai_runtime. Each phase skips cleanly
-# if its prerequisite secret is missing.
+# OpenAI-backed runtimes share run_openai_runtime. google-adk has its own
+# run_google_adk (it asserts manifest registration unconditionally, then drives
+# its AI-Studio BYOK live arm — keyless-Vertex needs platform WIF CI lacks).
+# Each phase skips cleanly if its prerequisite secret is missing.
 #
 # What this proves:
 #   1. Provisioning + container boot works for each runtime.
@@ -24,13 +26,76 @@
 # Each phase skips cleanly when its prerequisite secret is absent so a
 # partially-keyed env (e.g. CI without an OpenAI key) doesn't false-fail.
 #
+# REQUIRE-LIVE (false-green guard, mirrors CP serving-e2e's
+# SERVING_E2E_REQUIRE_LIVE semantics)
+# ------------------------------------------------------------------
+# Without a guard, an env with NO live secrets makes every phase SKIP,
+# leaving PASS=0 FAIL=0 — and the historical `[ "$FAIL" -eq 0 ]` gate
+# exits 0 (GREEN) while validating ZERO runtimes. That made the REQUIRED
+# `E2E API Smoke Test` merge gate pass without exercising a single
+# runtime (false-green).
+#
+# Fix: a real "validated arm" counter (VALIDATED) tracks runtimes that
+# actually ran AND produced a non-error A2A reply. With E2E_REQUIRE_LIVE=1:
+# if zero arms validated, the run exits NON-zero with a loud message.
+# Without it (E2E_REQUIRE_LIVE unset/0), a fully-skipped run stays a LOUD
+# skip + exit 0 for dev convenience.
+#
+# This zero-validated→RED decision is the load-bearing logic. It is factored
+# into evaluate_require_live_gate() (a pure function of $FAIL/$VALIDATED/
+# $E2E_REQUIRE_LIVE, defined before any platform I/O) and is REGRESSION-GATED
+# on every PR by tests/e2e/test_require_live_priority_gate_unit.sh, which
+# sources this file (E2E_PRIORITY_UNIT_SOURCE=1), sets the counters, and
+# asserts the gate's exit code — no platform, no provisioning, no network.
+# So the false-green can't silently come back: a revert of the guard fails CI.
+#
+# CI POSTURE (REQUIRE-LIVE ON — see .gitea/workflows/e2e-api.yml):
+# The live e2e-api job SETS E2E_REQUIRE_LIVE=1. The `mock` arm is the
+# CI-provisionable live-completion arm: it org-imports a mock workspace
+# (→online→canned A2A reply) with NO external secret. The only thing that
+# previously blocked it in CI was admin auth — POST /org/import and POST
+# /admin/workspaces/:id/tokens are AdminAuth-gated, and the job set no admin
+# token, so every admin call 401'd ("admin auth required"). The job now sets
+# ADMIN_TOKEN on the platform AND exports the matching MOLECULE_ADMIN_TOKEN
+# the scripts send, so mock validates end-to-end and VALIDATED>=1 holds on a
+# healthy platform — the REQUIRED `E2E API Smoke Test` gate now HONESTLY
+# validates a runtime. If the mock plumbing or the admin-auth wiring breaks,
+# the gate goes RED (not false-green). The zero-validated→RED decision is also
+# regression-gated WITHOUT provisioning by the bash unit test above, so a
+# revert of that logic still fails CI.
+#
+# LIVE ARMS (run when their prerequisite is present; opportunistic):
+#   - `mock` (run_mock) is the no-key REQUIRE-LIVE backbone: a virtual
+#     workspace (no container, no EC2, no provider) whose org-import path
+#     short-circuits to status='online' with a canned A2A reply. It validates
+#     in CI now that the e2e-api job wires an admin token (org-import + token
+#     mint are AdminAuth-gated), so it is the guaranteed >=1 validation.
+#   - MiniMax (E2E_MINIMAX_API_KEY, from MOLECULE_STAGING_MINIMAX_API_KEY) is
+#     an OPPORTUNISTIC best-effort real-LLM arm: registry-fragile in CI (422
+#     UNREGISTERED_MODEL_FOR_RUNTIME — see run_minimax header), so a miss is
+#     a best-effort MISS via bestfail() and does NOT red the gate.
+# The CI e2e-api job sets E2E_REQUIRE_LIVE=1: mock guarantees a validation, so
+# the REQUIRED gate is honest (RED if the mock plumbing/admin-auth breaks). The
+# zero-validated→RED logic is also regression-gated by the bash unit test above.
+#
 # Usage:
+#   # Enforce REQUIRE-LIVE locally (need >=1 arm to actually validate):
+#   E2E_REQUIRE_LIVE=1 E2E_MINIMAX_API_KEY=... \
+#     tests/e2e/test_priority_runtimes_e2e.sh
+#
+#   # Default (no enforcement): all-skip stays a LOUD skip + exit 0:
+#   tests/e2e/test_priority_runtimes_e2e.sh
+#
+#   # Other live arms (if their secrets are configured):
 #   CLAUDE_CODE_OAUTH_TOKEN=... E2E_OPENAI_API_KEY=... \
 #     tests/e2e/test_priority_runtimes_e2e.sh
 #
 #   # Run only one runtime
+#   E2E_RUNTIMES=mock        tests/e2e/test_priority_runtimes_e2e.sh
+#   E2E_RUNTIMES=minimax     tests/e2e/test_priority_runtimes_e2e.sh
 #   E2E_RUNTIMES=claude-code tests/e2e/test_priority_runtimes_e2e.sh
 #   E2E_RUNTIMES=hermes      tests/e2e/test_priority_runtimes_e2e.sh
+#   E2E_RUNTIMES=google-adk  tests/e2e/test_priority_runtimes_e2e.sh  # registration always; live arm needs E2E_GOOGLE_API_KEY
 #
 # Prereqs:
 #   - workspace-server on http://localhost:8080
@@ -41,13 +106,81 @@

 set -euo pipefail

-source "$(dirname "$0")/_lib.sh"
-
 PASS=0
 FAIL=0
 SKIP=0
+# VALIDATED counts runtimes that ACTUALLY ran end-to-end (provisioned,
+# reached online, AND returned a non-error A2A reply). Distinct from PASS,
+# which also counts sub-assertions like activity-log rows. This is the
+# signal the REQUIRE-LIVE gate keys off: VALIDATED==0 means we proved
+# nothing about any runtime, regardless of how many sub-asserts "passed".
+VALIDATED=0
 CREATED_WSIDS=()

+# evaluate_require_live_gate — the SINGLE source of the final exit decision.
+# Pure function of $FAIL, $VALIDATED, and $E2E_REQUIRE_LIVE; performs NO I/O
+# beyond the loud messages. Returns the exit code the script should exit with:
+#   - FAIL>0                       → 1 (a real failure is always red)
+#   - VALIDATED==0 + REQUIRE_LIVE  → 1 (false-green trap: proved nothing → RED)
+#   - VALIDATED==0 + !REQUIRE_LIVE → 0 (dev-convenience LOUD skip)
+#   - VALIDATED>=1                 → 0 (at least one arm validated end-to-end)
+# It is a function (not inline tail code) so test_require_live_priority_gate_unit.sh
+# can drive the REAL decision in isolation — set the counters, call this, assert
+# the return code — with no platform, no provisioning, no network. That makes the
+# zero-validated→RED logic a CI-gated regression contract: a future revert of it
+# fails the unit test on every PR. See that unit test for the fail-direction proof.
+evaluate_require_live_gate() {
+  # Any real failure is always red.
+  if [ "$FAIL" -ne 0 ]; then
+    return 1
+  fi
+
+  # REQUIRE-LIVE gate (mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE).
+  # A run where every runtime SKIPPED proves nothing. In enforced mode
+  # (E2E_REQUIRE_LIVE=1) that MUST be red so the required `E2E API Smoke
+  # Test` gate can't be false-green on an all-skip run.
+  local require_live="${E2E_REQUIRE_LIVE:-0}"
+  if [ "$VALIDATED" -eq 0 ]; then
+    if [ "$require_live" = "1" ] || [ "$require_live" = "true" ]; then
+      echo "::error::E2E_REQUIRE_LIVE is set but ZERO runtimes were validated end-to-end." >&2
+      echo "         Every runtime SKIPPED — no live secret was present, so this gate" >&2
+      echo "         validated nothing. Wire at least one live arm via Gitea secrets" >&2
+      echo "         (E2E_MINIMAX_API_KEY ← MOLECULE_STAGING_MINIMAX_API_KEY is the" >&2
+      echo "         default CI arm; CLAUDE_CODE_OAUTH_TOKEN / E2E_OPENAI_API_KEY also" >&2
+      echo "         work) so >=1 runtime actually provisions + replies. Failing RED" >&2
+      echo "         instead of false-green." >&2
+      return 1
+    fi
+    # Dev convenience: no enforcement requested → loud skip, exit 0.
+    echo "SKIPPED: no live secrets present and E2E_REQUIRE_LIVE is not set — validated" >&2
+    echo "         zero runtimes. This is a dev-convenience pass; CI sets" >&2
+    echo "         E2E_REQUIRE_LIVE=1 to make zero-validated a hard failure." >&2
+    return 0
+  fi
+
+  echo "OK: $VALIDATED runtime(s) validated end-to-end."
+  return 0
+}
+
+# Source-guard: when sourced by the unit test (E2E_PRIORITY_UNIT_SOURCE=1) we
+# stop HERE — the counters + evaluate_require_live_gate are now defined, and we
+# must NOT fall through to _lib.sh's platform-dependent helpers or the live
+# pre-sweep curl below (there is no platform in the unit-test environment).
+if [ "${E2E_PRIORITY_UNIT_SOURCE:-0}" = "1" ]; then
+  return 0
+fi
+
+source "$(dirname "$0")/_lib.sh"
+
+# GET /workspaces (list, router.go:165) and POST /workspaces (create,
+# router.go:166) are AdminAuth-gated. The e2e-api CI job sets ADMIN_TOKEN on the
+# platform (fail-open OFF) and exports MOLECULE_ADMIN_TOKEN here, so the
+# pre-sweep list and every runtime-create must send the admin bearer or they
+# 401. run_mock uses POST /org/import (also admin-gated) and wires its own admin
+# auth inline. Guarded if-set so a fail-open dev platform still works.
+ADMIN_AUTH=()
+e2e_admin_auth_args ADMIN_AUTH
+
 cleanup() {
  # `set -u` + empty array would error on "${CREATED_WSIDS[@]}"; the
  # ${VAR[@]+"…"} form expands to nothing when the array is unset/empty
@@ -58,14 +191,26 @@ cleanup() {
 }
 trap cleanup EXIT

-pass()  { echo "  PASS — $1"; PASS=$((PASS + 1)); }
-fail()  { echo "  FAIL — $1"; echo "         $2"; FAIL=$((FAIL + 1)); }
-skip()  { echo "  SKIP — $1"; SKIP=$((SKIP + 1)); }
+pass()      { echo "  PASS — $1"; PASS=$((PASS + 1)); }
+fail()      { echo "  FAIL — $1"; echo "         $2"; FAIL=$((FAIL + 1)); }
+skip()      { echo "  SKIP — $1"; SKIP=$((SKIP + 1)); }
+# Mark a runtime as having been validated end-to-end (online + non-error
+# A2A reply). Also emits a PASS line so it shows in the results tally.
+validated() { echo "  PASS — $1"; PASS=$((PASS + 1)); VALIDATED=$((VALIDATED + 1)); }
+# bestfail() is for OPPORTUNISTIC (best-effort) arms whose failure must
+# NOT red the gate. It does NOT increment FAIL — it only logs + bumps
+# SKIP so the tally stays honest ("we tried, it didn't validate, but it
+# was never load-bearing"). Used by the MiniMax arm: MiniMax-create is
+# fragile in CI (registry-skewed model id, BYOK plumbing — see core#2263
+# and the run_minimax header), so a MiniMax miss is reported but never
+# fails the REQUIRED gate. The mock arm is the load-bearing validation
+# that keeps the gate honest; MiniMax is the real-LLM bonus on top.
+bestfail()  { echo "  BEST-EFFORT MISS — $1"; echo "         $2"; SKIP=$((SKIP + 1)); }

 # Pre-sweep any prior runs that left workspaces behind (same defence as
 # test_notify_attachments_e2e.sh: trap fires on normal exit, but a
 # SIGPIPE / kill -9 can bypass it).
-PRIOR=$(curl -s "$BASE/workspaces" | python3 -c '
+PRIOR=$(curl -s "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} | python3 -c '
 import json, sys
 try:
    print(" ".join(w["id"] for w in json.load(sys.stdin) if w.get("name","").startswith("Priority E2E ")))
@@ -188,7 +333,7 @@ print(json.dumps({'CLAUDE_CODE_OAUTH_TOKEN': os.environ['CLAUDE_CODE_OAUTH_TOKEN
 ")
  local resp wsid
  # model required (CTO 2026-05-22 SSOT) — pass the deleted DefaultModel("claude-code") value.
-  resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
+  resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
    -d "{\"name\":\"Priority E2E (claude-code)\",\"runtime\":\"claude-code\",\"model\":\"sonnet\",\"tier\":1,\"secrets\":$secrets}")
  wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
  if [ -z "$wsid" ]; then
@@ -220,9 +365,9 @@ print(json.dumps({'CLAUDE_CODE_OAUTH_TOKEN': os.environ['CLAUDE_CODE_OAUTH_TOKEN
  local reply
  if reply=$(send_test_prompt "$wsid" "$token"); then
    if echo "$reply" | grep -q "PONG"; then
-      pass "claude-code reply contains PONG"
+      validated "claude-code reply contains PONG"
    else
-      pass "claude-code reply non-empty (first 80 chars: ${reply:0:80})"
+      validated "claude-code reply non-empty (first 80 chars: ${reply:0:80})"
    fi
    assert_activity_logged "claude-code" "$wsid" "$token"
  else
@@ -254,7 +399,7 @@ print(json.dumps({
 }))
 ")
  local resp wsid
-  resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
+  resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
    -d "{\"name\":\"Priority E2E (hermes)\",\"runtime\":\"hermes\",\"tier\":1,\"model\":\"openai/gpt-4o\",\"secrets\":$secrets}")
  wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
  if [ -z "$wsid" ]; then
@@ -288,9 +433,9 @@ print(json.dumps({
  local reply
  if reply=$(send_test_prompt "$wsid" "$token"); then
    if echo "$reply" | grep -q "PONG"; then
-      pass "hermes reply contains PONG"
+      validated "hermes reply contains PONG"
    else
-      pass "hermes reply non-empty (first 80 chars: ${reply:0:80})"
+      validated "hermes reply non-empty (first 80 chars: ${reply:0:80})"
    fi
    assert_activity_logged "hermes" "$wsid" "$token"
  else
@@ -327,7 +472,7 @@ print(json.dumps({
 }))
 ")
  local resp wsid
-  resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
+  resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
    -d "{\"name\":\"Priority E2E ($runtime)\",\"runtime\":\"$runtime\",\"tier\":1,\"model\":\"openai/gpt-4o-mini\",\"secrets\":$secrets}")
  wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
  if [ -z "$wsid" ]; then
@@ -358,9 +503,9 @@ print(json.dumps({
  local reply
  if reply=$(send_test_prompt "$wsid" "$token"); then
    if echo "$reply" | grep -q "PONG"; then
-      pass "$runtime reply contains PONG"
+      validated "$runtime reply contains PONG"
    else
-      pass "$runtime reply non-empty (first 80 chars: ${reply:0:80})"
+      validated "$runtime reply non-empty (first 80 chars: ${reply:0:80})"
    fi
    assert_activity_logged "$runtime" "$wsid" "$token"
  else
@@ -371,18 +516,387 @@ print(json.dumps({
 run_codex()      { run_openai_runtime "codex"      "codex"; }
 run_openclaw()   { run_openai_runtime "openclaw"   "openclaw"; }

-WANT="${E2E_RUNTIMES:-claude-code codex hermes openclaw}"
+####################################################################
+# google-adk arm — Gemini. REGISTRATION asserted always; LIVE arm is
+# REQUIRED-when-keyed, LOUD-skip-when-absent (NEVER best-effort/fail-open).
+####################################################################
+# google-adk serves Gemini two ways (providers.yaml runtimes.google-adk):
+#   * platform arm  → keyless Vertex via the Molecule LLM proxy (server-side
+#     WIF mint, platform_managed billing — the org-default PROD path). It needs
+#     a platform WIF identity that CI does NOT have, so this arm does NOT drive
+#     the keyless-Vertex path (no fail-open arm — we never green a path we can't
+#     actually exercise).
+#   * google arm   → AI Studio API-key BYOK (the tenant's OWN GOOGLE/GEMINI
+#     key), bare `gemini-2.5-pro`. This is the CI-/staging-exercisable path and
+#     is what the LIVE portion below drives when E2E_GOOGLE_API_KEY is present.
+#
+# Two-part contract (core#2332 P0.1 — google-adk previously had ZERO e2e):
+#   1. REGISTRATION (always, NO live creds): google-adk MUST be present in the
+#      deployed manifest.json's workspace_templates — that file is the SSOT the
+#      Create-handler's runtime allowlist is derived from (runtime_registry.go::
+#      loadRuntimesFromManifest). If it is absent, a google-adk create 422s
+#      RUNTIME_UNSUPPORTED, so registration is the precondition for ANY serving.
+#      Asserting it offline means even a key-less CI run proves google-adk is
+#      registered (a regression that drops it from the manifest reds the gate).
+#      This does NOT bump VALIDATED — registration is not end-to-end serving.
+#   2. LIVE (REQUIRED-when-keyed): with E2E_GOOGLE_API_KEY set, provision the
+#      AI-Studio BYOK arm end-to-end (online + non-error A2A reply). A miss here
+#      is a HARD fail() (fail-closed-if-present), exactly like the claude-code /
+#      hermes / openai arms — NOT a best-effort miss. Without the key the live
+#      portion is a LOUD skip() (dev-convenience), same as every keyed arm.
+run_google_adk() {
+  echo ""
+  echo "=== google-adk (Gemini) — registration + AI-Studio BYOK happy path ==="
+
+  # ── Part 1: REGISTRATION (always; no live creds needed) ──────────────────
+  # Assert google-adk is in the manifest.json workspace_templates SSOT (the
+  # Create-handler allowlist source). WORKSPACE_MANIFEST_PATH override mirrors
+  # the server's own env (runtime_registry.go::manifestPath); otherwise resolve
+  # the monorepo-root manifest.json relative to this script (tests/e2e/ -> repo
+  # root is two levels up).
+  local manifest="${WORKSPACE_MANIFEST_PATH:-$(cd "$(dirname "$0")/../.." && pwd)/manifest.json}"
+  if [ ! -f "$manifest" ]; then
+    fail "google-adk registration" "manifest.json not found at $manifest (cannot verify the runtime allowlist SSOT)"
+    return 0
+  fi
+  local registered
+  registered=$(python3 -c '
+import json, sys
+try:
+    m = json.load(open(sys.argv[1]))
+except Exception as e:
+    print("ERR:%s" % e); sys.exit(0)
+names = [t.get("name") for t in m.get("workspace_templates", [])]
+# loadRuntimesFromManifest strips the "-default" vanilla suffix; match the same.
+norm = {n[:-len("-default")] if isinstance(n, str) and n.endswith("-default") else n for n in names}
+print("yes" if "google-adk" in norm else "no:%s" % sorted(n for n in norm if n))
+' "$manifest")
+  if [ "$registered" != "yes" ]; then
+    fail "google-adk registered in manifest.json workspace_templates" \
+      "google-adk absent from the Create-handler runtime allowlist SSOT ($registered) — a create would 422 RUNTIME_UNSUPPORTED"
+    return 0
+  fi
+  pass "google-adk registered in manifest.json workspace_templates (Create-handler allowlist SSOT)"
+
+  # ── Part 2: LIVE arm (REQUIRED-when-keyed, LOUD-skip-when-absent) ─────────
+  # AI-Studio BYOK path: the tenant's own GOOGLE_API_KEY/GEMINI_API_KEY. The
+  # keyless-Vertex PROD path needs a platform WIF identity CI lacks, so it is
+  # NOT exercised here (no fail-open arm). Same env name the staging-full-saas
+  # google-adk arm uses (E2E_GOOGLE_API_KEY).
+  if [ -z "${E2E_GOOGLE_API_KEY:-}" ]; then
+    skip "E2E_GOOGLE_API_KEY not set (google-adk live arm needs an AI-Studio Gemini key; keyless-Vertex needs platform WIF, not available in CI)"
+    return 0
+  fi
+  local secrets
+  secrets=$(python3 -c "
+import json, os
+# The google provider (providers.yaml) reads GEMINI_API_KEY / GOOGLE_API_KEY and
+# dials generativelanguage.googleapis.com with the tenant's OWN key. Inject under
+# both names the provider accepts so the adapter resolves regardless of order.
+k = os.environ['E2E_GOOGLE_API_KEY']
+print(json.dumps({'GOOGLE_API_KEY': k, 'GEMINI_API_KEY': k}))
+")
+  local resp wsid
+  # Bare `gemini-2.5-pro` is the registered AI-Studio BYOK id for google-adk
+  # (providers.yaml runtimes.google-adk `google` arm). DeriveProvider routes the
+  # bare gemini- id to the google vendor (third_party_anthropic_compat, BYOK).
+  resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
+    -d "{\"name\":\"Priority E2E (google-adk)\",\"runtime\":\"google-adk\",\"tier\":1,\"model\":\"gemini-2.5-pro\",\"secrets\":$secrets}")
+  wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
+  if [ -z "$wsid" ]; then
+    fail "create google-adk workspace" "$resp"
+    return 0
+  fi
+  CREATED_WSIDS+=("$wsid")
+  echo "  workspace=$wsid"
+
+  # google-adk runtime image cold boot ~30-90s (image already pulled).
+  local final
+  final=$(wait_for_status "$wsid" "online failed" 240) || true
+  if [ "$final" != "online" ]; then
+    fail "google-adk workspace reaches online" "final status: $final"
+    return 0
+  fi
+  pass "google-adk workspace reaches online"
+
+  local token
+  token=$(echo "$resp" | e2e_extract_token)
+  if [ -z "$token" ]; then
+    token=$(e2e_mint_workspace_token "$wsid")
+  fi
+  if [ -z "$token" ]; then
+    fail "resolve google-adk workspace token" "no token returned"
+    return 0
+  fi
+
+  local reply
+  if reply=$(send_test_prompt "$wsid" "$token"); then
+    if echo "$reply" | grep -q "PONG"; then
+      validated "google-adk reply contains PONG"
+    else
+      validated "google-adk reply non-empty (first 80 chars: ${reply:0:80})"
+    fi
+    assert_activity_logged "google-adk" "$wsid" "$token"
+  else
+    fail "google-adk reply" "${reply:-<empty or error>}"
+  fi
+}
+
+####################################################################
+# Mock arm — the GUARANTEED, always-available REQUIRE-LIVE backbone.
+####################################################################
+# The mock runtime (workspace-server/internal/handlers/mock_runtime.go)
+# is a virtual workspace: NO container, NO EC2, NO LLM key. The org-import
+# path (createWorkspaceTree, org_import.go) short-circuits a runtime=mock
+# workspace straight to status='online' (no provisioner needed), and the
+# A2A proxy (a2a_proxy.go → handleMockA2A) synthesises a deterministic
+# canned JSON-RPC reply with logActivity=true (writes the activity_logs
+# row too). That makes mock the perfect REQUIRE-LIVE backbone: it
+# exercises the SAME plumbing every real runtime needs to pass —
+#   provision-decision → status=online → A2A round-trip → activity_logs —
+# without depending on any external provider key or LLM availability. It
+# is GREEN on a healthy platform and RED only if that plumbing genuinely
+# breaks (DB insert, status flip, A2A proxy, activity logging). No more
+# false-green (zero-validated is impossible when mock works), and no more
+# can't-go-green (mock needs no secret, so it always runs in CI).
+#
+# Why org-import (POST /org/import) instead of POST /workspaces:
+#   The mock→online short-circuit lives ONLY in createWorkspaceTree
+#   (org_import.go). The single-workspace Create handler (workspace.go)
+#   has no mock branch — it routes runtime=mock through
+#   provisionWorkspaceAuto, which in CI's local-build mode has no mock
+#   image and would never reach online. Org-import is the supported path
+#   to a live mock workspace, so the arm drives it.
+#
+# The canned reply is one of the "On it!" variants (NOT "PONG"), so this
+# arm validates on the non-empty / non-error branch — that is the real
+# contract for mock (it proves the plumbing, not an LLM's instruction-
+# following).
+run_mock() {
+  echo ""
+  echo "=== mock (no-key plumbing backbone) happy path ==="
+  # No secret gate — mock ALWAYS runs. That is the whole point: it is the
+  # required-validation arm that keeps E2E_REQUIRE_LIVE honest without a key.
+
+  # Inline single-workspace mock org. model is a required field on the
+  # org-import contract (createWorkspaceTree fails-closed without one);
+  # mock never USES the model, so any non-empty value satisfies the
+  # contract. The org-import path does not run the Create handler's
+  # registry model-validation, so "mock" is accepted as-is.
+  # POST /org/import is AdminAuth-gated (router.go:778). When the platform has
+  # ADMIN_TOKEN set (as the e2e-api CI job now does), an unauthenticated import
+  # 401s with {"error":"admin auth required"}. Send the same admin bearer the
+  # mint helper uses (MOLECULE_ADMIN_TOKEN, ADMIN_TOKEN fallback) — guarded so a
+  # bootstrap/dev platform with no admin token (fail-open) still works.
+  local admin_bearer="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
+  local admin_auth=()
+  [ -n "$admin_bearer" ] && admin_auth=(-H "Authorization: Bearer $admin_bearer")
+  local import_resp wsid
+  import_resp=$(curl -s -X POST "$BASE/org/import" -H "Content-Type: application/json" \
+    ${admin_auth[@]+"${admin_auth[@]}"} \
+    -d '{
+      "template": {
+        "name": "Priority E2E Mock Org",
+        "defaults": {"runtime": "mock", "model": "mock", "tier": 1},
+        "workspaces": [
+          {"name": "Priority E2E (mock)", "runtime": "mock", "model": "mock", "tier": 1}
+        ]
+      }
+    }')
+  # org-import returns {"org":..., "count":N, "workspaces":[{"id":...,
+  # "name":...,"tier":...}, ...]} (handlers/org.go:898-901). Pull the id of
+  # the single workspace we declared. (Older "results" key fallback kept for
+  # forward/back compat in case the response shape is ever versioned.)
+  wsid=$(echo "$import_resp" | python3 -c '
+import json, sys
+try:
+    d = json.load(sys.stdin)
+except Exception:
+    sys.exit(0)
+for r in (d.get("workspaces") or d.get("results") or []):
+    if r.get("name") == "Priority E2E (mock)" and r.get("id"):
+        print(r["id"]); break
+') || true
+  if [ -z "$wsid" ]; then
+    # mock org-import is the REQUIRE-LIVE backbone and is EXPECTED to succeed in
+    # CI now that the e2e-api job wires an admin token (ADMIN_TOKEN on the
+    # platform + MOLECULE_ADMIN_TOKEN sent above). A missing id here is a REAL
+    # break (admin-auth wiring, org-import create, or the mock short-circuit) and
+    # MUST red the gate — so this is a hard fail(), not a best-effort miss. Under
+    # E2E_REQUIRE_LIVE=1 a FAIL also forces a non-zero exit via
+    # evaluate_require_live_gate. Surface the response so the break is visible
+    # (e.g. {"error":"admin auth required"} would mean the token wiring regressed).
+    fail "create mock workspace (org-import)" "$import_resp"
+    return 0
+  fi
+  CREATED_WSIDS+=("$wsid")
+  echo "  workspace=$wsid"
+
+  # Mock goes straight to online (no container boot) — a short budget is
+  # plenty; if it is NOT online quickly the mock short-circuit in
+  # createWorkspaceTree is genuinely broken and the gate SHOULD red.
+  local final
+  final=$(wait_for_status "$wsid" "online failed" 60) || true
+  if [ "$final" != "online" ]; then
+    fail "mock workspace reaches online" "final status: $final (mock should go online without provisioning)"
+    return 0
+  fi
+  pass "mock workspace reaches online"
+
+  # Mock workspaces are not created with an inline token; mint one via the
+  # admin endpoint (same fallback every other arm uses).
+  local token
+  token=$(e2e_mint_workspace_token "$wsid") || true
+  if [ -z "$token" ]; then
+    fail "resolve mock workspace token" "no token returned from POST /admin/workspaces/:id/tokens"
+    return 0
+  fi
+
+  # A2A round-trip. The mock proxy returns a canned non-error reply (one
+  # of the "On it!" variants) — NOT "PONG" — so we validate on the
+  # non-empty branch. A non-error, non-empty reply means the A2A proxy
+  # short-circuit + reply-shape contract are intact end-to-end.
+  local reply
+  if reply=$(send_test_prompt "$wsid" "$token"); then
+    validated "mock reply non-empty (canned; first 80 chars: ${reply:0:80})"
+    assert_activity_logged "mock" "$wsid" "$token"
+  else
+    fail "mock reply" "${reply:-<empty or error>} (mock A2A short-circuit should always return a canned reply)"
+  fi
+}
+
+####################################################################
+# MiniMax live arm — OPPORTUNISTIC (best-effort) real-LLM arm.
+####################################################################
+# NOTE: this is now a BEST-EFFORT arm, not the REQUIRE-LIVE backbone.
+# mock (run_mock above) is the guaranteed, no-key validation that keeps
+# the gate honest. This arm uses the BARE registered BYOK id `MiniMax-M2.7`
+# (NOT the colon `minimax:MiniMax-M2.7`): on claude-code the colon form is
+# INTENTIONALLY unregistered — the claude-code adapter cannot strip the
+# `minimax:` prefix, so DeriveProvider rejects it 422
+# UNREGISTERED_MODEL_FOR_RUNTIME before any provisioning (provider-registry
+# SSOT, internal#718; pinned by derive_provider_matrix_test.go's
+# colon-vs-slash-vs-bare triple, and observed on real staging job 295075).
+# The bare id is in claude-code's `minimax` arm (registry_gen.go:88
+# Models=[MiniMax-M2,MiniMax-M2.7,MiniMax-M2.7-highspeed,MiniMax-M3]) and
+# derives provider=minimax (BYOK via MINIMAX_API_KEY), so create-validation
+# accepts it. This arm stays BEST-EFFORT (bestfail, non-gating) for transient
+# MiniMax provisioning / backend issues — mock carries the REQUIRED gate; if
+# MiniMax DOES come up it validates as a bonus real-LLM check.
+# Drives the claude-code runtime against MiniMax (BYOK) using the
+# already-present Gitea secret MOLECULE_STAGING_MINIMAX_API_KEY,
+# surfaced into the env as E2E_MINIMAX_API_KEY (same name + secret the
+# staging-smoke / continuous-synth canaries use — see staging-smoke.yml
+# and continuous-synth-e2e.yml). NO new credential is introduced.
+#
+# Why this is the arm that keeps the REQUIRED gate honest:
+#   - claude-code's `minimax` provider (providers.yaml / registry_gen.go)
+#     is third_party_anthropic_compat: it reads MINIMAX_API_KEY at boot
+#     and routes ANTHROPIC_BASE_URL → api.minimax.io/anthropic. So the
+#     ONLY tenant secret needed is {"MINIMAX_API_KEY": <key>} — exactly
+#     the SECRETS_JSON branch test_staging_full_saas.sh uses.
+#   - Model id is the BARE `MiniMax-M2.7`, the registered BYOK arm for
+#     claude-code (registry_gen.go:88 Runtimes["claude-code"]["minimax"]
+#     Models). DeriveProvider routes bare → provider=minimax (BYOK). The
+#     colon-namespaced `minimax:MiniMax-M2.7` is UNREGISTERED on claude-code
+#     (the adapter can't strip `minimax:`; internal#718) and 422s create —
+#     it is only the correct BYOK id on openclaw/hermes, which DO strip it.
+run_minimax() {
+  echo ""
+  echo "=== minimax (claude-code BYOK) happy path ==="
+  if [ -z "${E2E_MINIMAX_API_KEY:-}" ]; then
+    skip "E2E_MINIMAX_API_KEY not set (MiniMax live arm needs the MiniMax key)"
+    return 0
+  fi
+  local secrets
+  secrets=$(python3 -c "
+import json, os
+# claude-code's minimax provider (third_party_anthropic_compat) reads
+# MINIMAX_API_KEY and points ANTHROPIC_BASE_URL at api.minimax.io/anthropic
+# at boot — so the ONLY tenant secret needed is the MiniMax key itself.
+print(json.dumps({'MINIMAX_API_KEY': os.environ['E2E_MINIMAX_API_KEY']}))
+")
+  local resp wsid
+  # BARE registered BYOK model id `MiniMax-M2.7` (registry_gen.go:88). The
+  # colon form `minimax:MiniMax-M2.7` is UNREGISTERED on claude-code (adapter
+  # can't strip `minimax:`; internal#718) and 422s create — bare derives
+  # provider=minimax (BYOK via MINIMAX_API_KEY) and passes create-validation.
+  resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
+    -d "{\"name\":\"Priority E2E (minimax)\",\"runtime\":\"claude-code\",\"model\":\"MiniMax-M2.7\",\"tier\":1,\"secrets\":$secrets}")
+  wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
+  if [ -z "$wsid" ]; then
+    # BEST-EFFORT: real MiniMax create/provision can still miss on transient
+    # backend / provisioning issues (the bare model id itself is registered —
+    # see header). Do NOT red the gate; mock is the required backbone. Report
+    # the create response so any miss is visible.
+    bestfail "create minimax workspace (best-effort; mock carries the gate)" "$resp"
+    return 0
+  fi
+  CREATED_WSIDS+=("$wsid")
+  echo "  workspace=$wsid"
+
+  # claude-code runtime image is already pulled; cold boot ~30-90s. The
+  # first MiniMax cold-call can be slow but that's covered by send_test_prompt's
+  # --max-time 180.
+  local final
+  final=$(wait_for_status "$wsid" "online failed" 240) || true
+  if [ "$final" != "online" ]; then
+    bestfail "minimax workspace reaches online (best-effort)" "final status: $final"
+    return 0
+  fi
+  pass "minimax workspace reaches online"
+
+  local token
+  token=$(echo "$resp" | e2e_extract_token)
+  if [ -z "$token" ]; then
+    token=$(e2e_mint_workspace_token "$wsid")
+  fi
+  if [ -z "$token" ]; then
+    bestfail "resolve minimax workspace token (best-effort)" "no token returned"
+    return 0
+  fi
+
+  local reply
+  if reply=$(send_test_prompt "$wsid" "$token"); then
+    if echo "$reply" | grep -q "PONG"; then
+      validated "minimax reply contains PONG"
+    else
+      validated "minimax reply non-empty (first 80 chars: ${reply:0:80})"
+    fi
+    assert_activity_logged "minimax" "$wsid" "$token"
+  else
+    bestfail "minimax reply (best-effort)" "${reply:-<empty or error>}"
+  fi
+}
+
+# `mock` runs FIRST and by default: it is the no-key REQUIRE-LIVE backbone
+# that guarantees >=1 validation on a healthy platform (see run_mock). The
+# real-LLM arms (claude-code/codex/hermes/openclaw/minimax/google-adk) run if
+# their secrets are present and add real-provider coverage on top; minimax is
+# best-effort (never reds the gate). google-adk ALSO asserts its registration
+# unconditionally (no key needed), then drives its AI-Studio BYOK live arm as a
+# REQUIRED-when-keyed (fail-closed-if-present), LOUD-skip-when-absent arm.
+WANT="${E2E_RUNTIMES:-mock claude-code codex hermes openclaw minimax google-adk}"
 for r in $WANT; do
  case "$r" in
+    mock)        run_mock ;;
    claude-code) run_claude_code ;;
    codex)       run_codex ;;
    hermes)      run_hermes ;;
    openclaw)    run_openclaw ;;
-    all)         run_claude_code; run_codex; run_hermes; run_openclaw ;;
+    minimax)     run_minimax ;;
+    google-adk)  run_google_adk ;;
+    all)         run_mock; run_claude_code; run_codex; run_hermes; run_openclaw; run_minimax; run_google_adk ;;
    *) echo "unknown runtime in E2E_RUNTIMES: $r" >&2; exit 2 ;;
  esac
 done

 echo ""
-echo "=== Results: $PASS passed, $FAIL failed, $SKIP skipped ==="
-[ "$FAIL" -eq 0 ]
+echo "=== Results: $PASS passed, $FAIL failed, $SKIP skipped, $VALIDATED runtime(s) validated end-to-end ==="
+
+# Final exit decision lives in evaluate_require_live_gate (defined at the top of
+# this file, before any platform I/O) so the same logic is unit-tested in
+# isolation by test_require_live_priority_gate_unit.sh. Mirror its return code
+# into the process exit code.
+evaluate_require_live_gate
+exit $?
@@ -389,8 +389,24 @@ INSTANCE_ID_GRACE_SECS="${E2E_INSTANCE_ID_GRACE_SECS:-45}"
 WS_LAST_STATUS=""
 while true; do
  if [ "$(date +%s)" -gt "$ONLINE_DEADLINE" ]; then
+    # Boot-failure diagnostic burst (#2310-class): last_sample_error is often
+    # EMPTY for a config-resolution failure (the agent never sampled — it
+    # failed before its first heartbeat), so a bare "err=" tells us nothing
+    # (run 223233). Surface the FULL workspace record + every plausible error
+    # field so the actual reason (e.g. unservable provider, missing key, wrong
+    # model arm) is visible without re-running.
    WS_LAST_ERR=$(ws_field "$WS_ID" "last_sample_error")
-    fail "Workspace $WS_ID never reached status=online within ${WORKSPACE_ONLINE_TIMEOUT_SECS}s (last status=$WS_LAST_STATUS, err=$WS_LAST_ERR)"
+    log "── DIAGNOSTIC BURST (step 4 — workspace never reached online) ──"
+    log "    model=$MODEL_SLUG  llm_path=${E2E_LLM_PATH:-platform}  secrets=$([ "$SECRETS_JSON" = '{}' ] && echo '(none)' || echo '(set)')"
+    for f in status last_sample_error last_error error provisioning_error instance_id instance_status; do
+      log "    ${f}=$(ws_field "$WS_ID" "$f")"
+    done
+    log "    full record:"
+    tenant_call GET "/workspaces/$WS_ID" 2>/dev/null \
+      | python3 -m json.tool 2>/dev/null | sed 's/^/      /' \
+      || log "      (could not fetch /workspaces/$WS_ID)"
+    log "── END DIAGNOSTIC ──"
+    fail "Workspace $WS_ID never reached status=online within ${WORKSPACE_ONLINE_TIMEOUT_SECS}s (last status=$WS_LAST_STATUS, err=$WS_LAST_ERR; see diagnostic burst above)"
  fi
  WS_STATUS=$(ws_field "$WS_ID" "status")
  if [ "$WS_STATUS" != "$WS_LAST_STATUS" ]; then
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+# Fail-direction / load-bearing proof for the E2E_REQUIRE_LIVE
+# fail-closed-on-skip guard in test_staging_full_saas.sh.
+#
+# WHY (harden/e2e-staging-saas-failclosed): the staging SaaS E2E is being
+# hardened to become a HARD merge-gate. A gate that can reach its final `ok`
+# WITHOUT having actually exercised a provision→online→A2A cycle is a
+# false-green — it would let a refactor that short-circuits the lifecycle
+# (or a skip path that swallows it) report PASS. require_live_or_die() is the
+# guard; this test proves it FAILS (exit 5) when milestones are missing and
+# PASSES when all fired — the watch-it-fail counterpart the dev-SOP requires.
+#
+# Runs entirely offline (no LLM, no network, no provisioning) — pure shell
+# logic — so it can run on every PR in the fast lane and locally via `bash`.
+set -uo pipefail
+
+# Scratch dir for the generated guard-runner stubs. EXIT trap guarantees
+# cleanup even when an assertion exits the test non-zero (lint_cleanup_traps).
+TMPDIR_E2E=$(mktemp -d -t require-live-guard-XXXXXX)
+trap 'rm -rf "$TMPDIR_E2E"' EXIT INT TERM
+
+PASS=0
+FAIL=0
+
+# Reproduce the EXACT guard logic from test_staging_full_saas.sh. Kept in
+# lockstep with the host script: if the host logic changes, this test must
+# change with it (and a divergence is itself a signal to re-prove the gate).
+make_guard_runner() {
+  cat <<'EOF'
+REQUIRE_LIVE="${E2E_REQUIRE_LIVE:-0}"
+LIVE_MILESTONES=""
+live_milestone() {
+  case " $LIVE_MILESTONES " in
+    *" $1 "*) ;;
+    *) LIVE_MILESTONES="$LIVE_MILESTONES $1" ;;
+  esac
+}
+require_live_or_die() {
+  [ "$REQUIRE_LIVE" = "1" ] || return 0
+  local required="provisioned tenant_online workspace_online a2a_roundtrip"
+  local m missing=""
+  for m in $required; do
+    case " $LIVE_MILESTONES " in
+      *" $m "*) ;;
+      *) missing="$missing $m" ;;
+    esac
+  done
+  if [ -n "$missing" ]; then
+    echo "MISSING:${missing}" >&2
+    exit 5
+  fi
+}
+EOF
+}
+
+# run_case <E2E_REQUIRE_LIVE value> <space-separated milestones to stamp>
+# echoes the observed exit code.
+run_case() {
+  local require_live="$1"; shift
+  local milestones="$1"; shift || true
+  local stub observed m
+  stub=$(mktemp "$TMPDIR_E2E/stub.XXXXXX")
+  {
+    echo "#!/usr/bin/env bash"
+    echo "set -uo pipefail"
+    make_guard_runner
+    for m in $milestones; do
+      echo "live_milestone $m"
+    done
+    echo "require_live_or_die"
+    echo 'echo REACHED_END'
+  } > "$stub"
+  E2E_REQUIRE_LIVE="$require_live" bash "$stub" >/dev/null 2>&1
+  observed=$?
+  rm -f "$stub"
+  echo "$observed"
+}
+
+assert_rc() {
+  local label="$1" require_live="$2" milestones="$3" expected="$4"
+  local observed
+  observed=$(run_case "$require_live" "$milestones")
+  if [ "$observed" = "$expected" ]; then
+    echo "  ✓ $label: REQUIRE_LIVE=$require_live milestones='$milestones' → rc=$observed"
+    PASS=$((PASS+1))
+  else
+    echo "  ✗ $label: REQUIRE_LIVE=$require_live milestones='$milestones' expected=$expected OBSERVED=$observed" >&2
+    FAIL=$((FAIL+1))
+  fi
+}
+
+echo "=== E2E_REQUIRE_LIVE fail-closed-on-skip guard proof ==="
+echo
+
+# DECISIVE (false-green trap): REQUIRE_LIVE=1 but NO lifecycle ran → exit 5.
+assert_rc "require-live, nothing ran → exit 5 (the false-green trap)" \
+  1 "" 5
+
+# REQUIRE_LIVE=1 with a partial lifecycle (provisioned but no A2A) → exit 5.
+assert_rc "require-live, partial lifecycle → exit 5" \
+  1 "provisioned tenant_online workspace_online" 5
+
+# REQUIRE_LIVE=1 with every required milestone → pass (rc=0).
+assert_rc "require-live, full lifecycle → pass" \
+  1 "provisioned tenant_online workspace_online a2a_roundtrip" 0
+
+# Idempotency: duplicate stamps don't break membership; full set still passes.
+assert_rc "require-live, duplicate stamps still pass" \
+  1 "provisioned provisioned tenant_online workspace_online a2a_roundtrip a2a_roundtrip" 0
+
+# Guard is a no-op when CI did not demand a live run: a non-live local run
+# with nothing stamped must NOT exit 5 (we don't break local/debug runs).
+assert_rc "no require-live, nothing ran → pass (guard is opt-in)" \
+  0 "" 0
+assert_rc "require-live unset-equivalent (0), partial → pass" \
+  0 "provisioned" 0
+
+# Extra unknown milestone is harmless as long as required set is present.
+assert_rc "require-live, extra milestone tolerated" \
+  1 "provisioned tenant_online workspace_online a2a_roundtrip extra_thing" 0
+
+echo
+echo "=== Results: $PASS passed, $FAIL failed ==="
+[ "$FAIL" -eq 0 ]
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+# Fail-direction / load-bearing proof for the E2E_REQUIRE_LIVE zero-validated
+# gate in test_priority_runtimes_e2e.sh (the REQUIRED `E2E API Smoke Test`).
+#
+# WHY (harden/enforce-ci-gates-core-v2, PR #2286): the priority-runtimes E2E's
+# only historical exit gate was `[ "$FAIL" -eq 0 ]`. When every runtime SKIPs
+# because no live secret is present — exactly what the CI step did — PASS=0
+# FAIL=0 and the script exited 0 (GREEN) while validating ZERO runtimes. The
+# REQUIRED merge gate was therefore false-green: passing without exercising a
+# single runtime. The fix adds a VALIDATED counter and makes a zero-validated
+# run RED when E2E_REQUIRE_LIVE is set.
+#
+# That zero-validated→RED decision lives in evaluate_require_live_gate() in
+# test_priority_runtimes_e2e.sh. CI cannot prove it via a live arm — the CI
+# substrate can't provision ANY runtime end-to-end (MiniMax 422, mock org-
+# import create fails, claude-code needs a key CI lacks), so the live e2e-api
+# job does NOT force E2E_REQUIRE_LIVE (that would red the required gate for
+# everyone). This UNIT test is the regression coverage instead: it drives the
+# REAL evaluate_require_live_gate() function — not a copy — in isolation by
+# sourcing the script with E2E_PRIORITY_UNIT_SOURCE=1 (which stops before any
+# platform I/O), setting the counters, and asserting the gate's return code.
+#
+# Because it exercises the actual function, a future revert of the zero-
+# validated→RED logic in test_priority_runtimes_e2e.sh fails THIS test on
+# every PR — so the false-green can't silently come back.
+#
+# Runs entirely offline (no LLM, no network, no provisioning) — pure shell
+# logic — so it runs on every PR in the fast lane and locally via `bash`.
+set -uo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+GATE_SCRIPT="$SCRIPT_DIR/test_priority_runtimes_e2e.sh"
+
+if [ ! -f "$GATE_SCRIPT" ]; then
+  echo "FATAL: cannot find $GATE_SCRIPT" >&2
+  exit 2
+fi
+
+PASS=0
+FAIL=0
+
+# run_case <E2E_REQUIRE_LIVE value> <VALIDATED count> <FAIL count>
+# Sources the REAL test_priority_runtimes_e2e.sh under the unit source-guard
+# (E2E_PRIORITY_UNIT_SOURCE=1 → it returns right after defining the counters
+# and evaluate_require_live_gate(), before _lib.sh / the live pre-sweep curl),
+# sets the counters to the scenario, calls the real gate, and echoes the
+# return code. Each case runs in a fresh `bash -c` so set -e/-u inside the
+# sourced script can't leak between cases or kill this harness.
+run_case() {
+  local require_live="$1" validated="$2" failcount="$3"
+  local observed
+  E2E_PRIORITY_UNIT_SOURCE=1 \
+  E2E_REQUIRE_LIVE="$require_live" \
+  GATE_SCRIPT="$GATE_SCRIPT" \
+  VAL="$validated" \
+  FL="$failcount" \
+  bash -c '
+    set -uo pipefail
+    # shellcheck disable=SC1090
+    source "$GATE_SCRIPT"      # returns at the source-guard (no platform I/O)
+    VALIDATED="$VAL"
+    FAIL="$FL"
+    evaluate_require_live_gate >/dev/null 2>&1
+    exit $?
+  '
+  observed=$?
+  echo "$observed"
+}
+
+assert_rc() {
+  local label="$1" require_live="$2" validated="$3" failcount="$4" expected="$5"
+  local observed
+  observed=$(run_case "$require_live" "$validated" "$failcount")
+  if [ "$observed" = "$expected" ]; then
+    echo "  ✓ $label: REQUIRE_LIVE=$require_live VALIDATED=$validated FAIL=$failcount → rc=$observed"
+    PASS=$((PASS + 1))
+  else
+    echo "  ✗ $label: REQUIRE_LIVE=$require_live VALIDATED=$validated FAIL=$failcount expected=$expected OBSERVED=$observed" >&2
+    FAIL=$((FAIL + 1))
+  fi
+}
+
+echo "=== E2E_REQUIRE_LIVE priority-runtimes zero-validated gate proof ==="
+echo "    (drives the REAL evaluate_require_live_gate from $GATE_SCRIPT)"
+echo
+
+# (a) DECISIVE false-green trap: REQUIRE_LIVE=1 + zero validated → RED (exit 1).
+assert_rc "require-live, zero validated → RED (the false-green trap)" \
+  1 0 0 1
+
+# (b) REQUIRE_LIVE=1 + at least one validated → GREEN (exit 0).
+assert_rc "require-live, one validated → GREEN" \
+  1 1 0 0
+assert_rc "require-live, several validated → GREEN" \
+  1 3 0 0
+
+# (c) REQUIRE_LIVE unset-equivalent (0) + zero validated → GREEN (loud skip).
+assert_rc "no require-live, zero validated → GREEN (dev-convenience loud skip)" \
+  0 0 0 0
+
+# REQUIRE_LIVE=true (string form) is also honoured by the gate.
+assert_rc "require-live='true', zero validated → RED" \
+  true 0 0 1
+
+# A real FAIL is always red, regardless of REQUIRE_LIVE / VALIDATED — the
+# zero-validated guard must not mask (nor be masked by) a genuine failure.
+assert_rc "real FAIL with validations, no require-live → RED" \
+  0 2 1 1
+assert_rc "real FAIL, zero validated, no require-live → RED" \
+  0 0 1 1
+
+echo
+echo "=== Results: $PASS passed, $FAIL failed ==="
+[ "$FAIL" -eq 0 ]
@@ -26,7 +26,26 @@
 #      the workspace stuck on 'online' indefinitely.)
 #
 # Hibernation is intentionally NOT covered here — it has its own timing
-# model (idle threshold) and warrants a separate harness.
+# model (idle threshold) and warrants a separate harness. (The
+# pause→resume + hibernate→wake transitions for PLATFORM-compute runtimes
+# are covered by test_staging_full_saas.sh step 10b.)
+#
+# BYO meta-runtime arms (kimi, kimi-cli) — added 2026-06-05:
+#   kimi and kimi-cli are BYO-compute meta-runtimes (isExternalLikeRuntime:
+#   runtime_registry.go:141-147) that go through the SAME external/poll
+#   provisioning path as `external` — create with external:true →
+#   awaiting_agent, register → online — but with their runtime LABEL
+#   PRESERVED (workspace.go:752-770 normalizeExternalRuntime keeps the
+#   specific label, does NOT coerce to generic "external", so the canvas
+#   shows the right runtime). They had ONLY validation/unit coverage and
+#   were NEVER provisioned→online in any e2e. Step 9 adds, for EACH of
+#   {kimi, kimi-cli}: create → assert awaiting_agent + label-preserved →
+#   register(poll) → assert online + label-preserved → A2A → assert the
+#   poll-mode {status:"queued"} envelope (a2a_proxy.go:462-477). The A2A
+#   arm proves the a2a proxy routes a BYO meta-runtime to the poll queue
+#   (200 + queued) rather than 404/500 — the meaningful round-trip for a
+#   workspace with no standing live agent. A real BYO-agent COMPLETION
+#   needs a standing kimi BYO cell (flagged for the CTO in the PR body).
 #
 # Required env (mirrors test_staging_full_saas.sh):
 #   MOLECULE_CP_URL          default: https://staging-api.moleculesai.app
@@ -40,9 +59,25 @@
 #   E2E_INTENTIONAL_FAILURE     1 → break a step on purpose to verify
 #                               the EXIT trap still tears down (mirrors
 #                               the full-saas harness's safety net).
+#   E2E_REQUIRE_LIVE            1 → fail-closed if the harness exits 0
+#                               WITHOUT having driven all four
+#                               awaiting_agent transitions. CI sets this
+#                               so a future skip / early-return can never
+#                               masquerade as a green run. Mirrors CP
+#                               serving-e2e SERVING_E2E_REQUIRE_LIVE.
+#   E2E_STALE_POLL_DEADLINE_SECS  default 240. Upper bound for the
+#                               heartbeat-staleness READINESS poll (step
+#                               6). Replaces the old fixed sleep+one-shot
+#                               assert that raced the sweep cadence.
+#   E2E_TRANSIENT_RETRIES      default 8. Bounded retries for register /
+#                               re-register against transient edge errors
+#                               (502/503/504 from Caddy during cold TLS /
+#                               agent boot). Mirrors the full-saas
+#                               cold-start retry loop — NOT a bare sleep.
 #
 # Exit codes: 0 happy, 1 generic, 2 missing env, 3 provision timeout,
-# 4 teardown leak.
+# 4 teardown leak, 5 REQUIRE_LIVE violation (exited 0 having validated
+# nothing).

 set -euo pipefail

@@ -51,6 +86,13 @@ ADMIN_TOKEN="${MOLECULE_ADMIN_TOKEN:?MOLECULE_ADMIN_TOKEN required — Railway s
 PROVISION_TIMEOUT_SECS="${E2E_PROVISION_TIMEOUT_SECS:-900}"
 RUN_ID_SUFFIX="${E2E_RUN_ID:-$(date +%H%M%S)-$$}"
 STALE_WAIT_SECS="${E2E_STALE_WAIT_SECS:-180}"
+# Readiness-poll deadline for the sweep transition (step 6). Must exceed
+# STALE_WAIT_SECS (the no-heartbeat window) by at least one sweep
+# interval so a slightly-late sweep tick is polled-for, not misread as a
+# stuck 'online'. 240 = 180s window + 60s sweep-cadence headroom.
+STALE_POLL_DEADLINE_SECS="${E2E_STALE_POLL_DEADLINE_SECS:-240}"
+TRANSIENT_RETRIES="${E2E_TRANSIENT_RETRIES:-8}"
+REQUIRE_LIVE="${E2E_REQUIRE_LIVE:-0}"

 SLUG="e2e-ext-$(date +%Y%m%d)-${RUN_ID_SUFFIX}"
 SLUG=$(echo "$SLUG" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-' | head -c 32)
@@ -59,6 +101,66 @@ log()  { echo "[$(date +%H:%M:%S)] $*"; }
 fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; }
 ok()   { echo "[$(date +%H:%M:%S)] ✅ $*"; }

+# REQUIRE_LIVE bookkeeping: count the four awaiting_agent transitions the
+# test is contracted to prove. The EXIT trap fails-closed (exit 5) if the
+# script reaches a clean exit without all four — so a silent skip, an
+# early `return 0`, or a refactor that drops a step can never show green.
+TRANSITIONS_VERIFIED=0
+EXPECTED_TRANSITIONS=4
+require_transition() {  # $1 = human label
+  TRANSITIONS_VERIFIED=$((TRANSITIONS_VERIFIED + 1))
+  log "    [require-live] transition ${TRANSITIONS_VERIFIED}/${EXPECTED_TRANSITIONS} proven: $1"
+}
+
+# Redact bearer tokens from any HTTP body before logging (mirrors the
+# full-saas sanitize_http_body so transient-error logs never leak creds).
+sanitize_http_body() {
+  sed -E 's/(Bearer|token)[[:space:]]+[A-Za-z0-9._-]+/\1 REDACTED/g'
+}
+
+# Bounded retry-on-transient for POST /registry/register. The tenant edge
+# (Caddy) returns 502/503/504 with an identifiable body while TLS / the
+# workspace agent finishes cold-booting — a single shot here was the
+# un-named flake (a transient edge error misread as a register failure).
+# This mirrors the full-saas cold-start loop (test_staging_full_saas.sh
+# ~L780-816): retry ONLY on a transient TRANSPORT class (5xx + body
+# match), bounded by TRANSIENT_RETRIES, and FAIL CLOSED (non-zero) once
+# the budget is spent. It deliberately does NOT retry on a 4xx — that's a
+# real contract bug (e.g. wrong payload field) and must stay red.
+# Sets REGISTER_RESP (body + trailing "HTTP_CODE=NNN" line) on success;
+# returns non-zero (caller `fail`s) when the bounded budget is exhausted.
+register_with_retry() {  # $1 = step label, $2 = request body
+  local label="$1" body="$2"
+  local attempt code resp safe
+  for attempt in $(seq 1 "$TRANSIENT_RETRIES"); do
+    set +e
+    resp=$(curl -sS --max-time 30 -w "\nHTTP_CODE=%{http_code}" -X POST \
+      "$TENANT_URL/registry/register" \
+      -H "Authorization: Bearer $WS_AUTH_TOKEN" \
+      -H "X-Molecule-Org-Id: $ORG_ID" \
+      -H "Content-Type: application/json" \
+      -d "$body")
+    set -e
+    code=$(printf '%s' "$resp" | sed -n 's/^HTTP_CODE=//p' | tail -n1)
+    code=${code:-000}
+    if [ "$code" = "200" ]; then
+      REGISTER_RESP="$resp"
+      return 0
+    fi
+    safe=$(printf '%s' "$resp" | sanitize_http_body | head -c 300)
+    # Retry ONLY on a transient transport class; a 4xx is a real bug.
+    if echo "$code" | grep -Eq '^(502|503|504)$' \
+       && echo "$safe" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|error code: 502|error code: 504|workspace agent unreachable|connection refused|no healthy upstream'; then
+      log "    ${label} transient $code attempt ${attempt}/${TRANSIENT_RETRIES}: $safe"
+      [ "$attempt" -lt "$TRANSIENT_RETRIES" ] && { sleep 10; continue; }
+    fi
+    # Non-transient (4xx, or unrecognized 5xx body): stop and fail closed.
+    REGISTER_RESP="$resp"
+    return 1
+  done
+  return 1
+}
+
 CURL_COMMON=(-sS --fail-with-body --max-time 30)

 # ─── cleanup trap (mirrors full-saas) ────────────────────────────────────
@@ -98,8 +200,19 @@ cleanup_org() {
  fi
  ok "Teardown clean — no orphan resources for $SLUG (${elapsed}s)"

+  # REQUIRE_LIVE fail-closed gate. Only meaningful on an OTHERWISE-CLEAN
+  # exit (entry_rc==0): a script that completed all steps but somehow did
+  # not register all four transitions (a skip, an early return, a dropped
+  # assertion in a refactor) must NOT report success. A non-zero entry_rc
+  # already carries its own failure semantics — don't mask it with 5.
+  if [ "$entry_rc" = "0" ] && [ "${REQUIRE_LIVE}" = "1" ] \
+     && [ "$TRANSITIONS_VERIFIED" -lt "$EXPECTED_TRANSITIONS" ]; then
+    echo "❌ REQUIRE_LIVE: exited 0 but only ${TRANSITIONS_VERIFIED}/${EXPECTED_TRANSITIONS} awaiting_agent transitions were proven — refusing to report green." >&2
+    exit 5
+  fi
+
  case "$entry_rc" in
-    0|1|2|3|4) ;;
+    0|1|2|3|4|5) ;;
    *) exit 1 ;;
  esac
 }
@@ -125,10 +238,17 @@ admin_call() {

 # ─── 1. Create org ──────────────────────────────────────────────────────
 log "1/8 Creating org $SLUG..."
+# admin_call inherits CURL_COMMON's --fail-with-body: a non-2xx makes curl
+# exit 22, which under `set -euo pipefail` would abort this bare command
+# substitution BEFORE the `fail "... missing 'id'"` handler below can print
+# the body. set +e / `|| true` keeps the 22 from tripping `set -e`; curl
+# still wrote the body, so CREATE_RESP holds it and the id-check surfaces why.
+set +e
 CREATE_RESP=$(admin_call POST /cp/admin/orgs \
  -d "{\"slug\":\"$SLUG\",\"name\":\"E2E ext $SLUG\",\"owner_user_id\":\"e2e-runner:$SLUG\"}")
-ORG_ID=$(echo "$CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))")
-[ -z "$ORG_ID" ] && fail "Org create response missing 'id'"
+set -e
+ORG_ID=$(echo "$CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
+[ -z "$ORG_ID" ] && fail "Org create response missing 'id': $(printf '%s' "$CREATE_RESP" | sanitize_http_body 2>/dev/null || printf '%s' "$CREATE_RESP")"
 ok "Org created (id=$ORG_ID)"

 # ─── 2. Wait for tenant provisioning ────────────────────────────────────
@@ -221,8 +341,13 @@ tenant_call() {
 # on whatever the create handler set first (typically 'provisioning')
 # because the follow-up UPDATE failed the enum cast.
 log "4/8 Creating external workspace (no URL — exercises workspace.go:333)..."
+# tenant_call inherits CURL_COMMON's --fail-with-body: guard the same way as
+# the org create above so a non-2xx returns the body to the id/status checks
+# below instead of aborting opaquely on curl exit 22.
+set +e
 WS_CREATE_RESP=$(tenant_call POST /workspaces \
  -d '{"name":"ext-e2e","runtime":"external","external":true}')
+set -e

 WS_ID=$(echo "$WS_CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))")
 WS_RESP_STATUS=$(echo "$WS_CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
@@ -235,7 +360,7 @@ try:
 except Exception:
    print('')
 ")
-[ -z "$WS_ID" ] && fail "Workspace create missing id: $WS_CREATE_RESP"
+[ -z "$WS_ID" ] && fail "Workspace create missing id: $(printf '%s' "$WS_CREATE_RESP" | sanitize_http_body 2>/dev/null || printf '%s' "$WS_CREATE_RESP")"
 [ "$WS_RESP_STATUS" != "awaiting_agent" ] && fail "Expected response status=awaiting_agent, got $WS_RESP_STATUS"
 ok "Workspace created (id=$WS_ID, response status=awaiting_agent)"

@@ -248,6 +373,7 @@ GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
 DB_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
 [ "$DB_STATUS" != "awaiting_agent" ] && fail "DB row status=$DB_STATUS (expected awaiting_agent — migration 046 likely not applied)"
 ok "DB row stored as awaiting_agent (proof migration 046 applied)"
+require_transition "create: provisioning → awaiting_agent (DB-verified)"

 # ─── 5. Register the workspace (transitions to online) ──────────────────
 # Pre-fix this path was actually fine because it writes 'online', a value
@@ -277,20 +403,20 @@ log "5/8 Registering workspace via /registry/register..."
 #   url           — accepted but not dispatched-to in poll mode, so
 #                   example.invalid is a valid sentinel.
 REGISTER_BODY=$(printf '{"id":"%s","url":"https://example.invalid:443","delivery_mode":"poll","agent_card":{"name":"e2e-ext","skills":[{"id":"echo","name":"Echo"}]}}' "$WS_ID")
-# Disable --fail-with-body for this one call so a 4xx surfaces the response
-# body (the bare CURL_COMMON would `set -e`-kill before we could log it).
-REGISTER_RESP=$(curl -sS --max-time 30 -w "\nHTTP_CODE=%{http_code}" -X POST "$TENANT_URL/registry/register" \
-  -H "Authorization: Bearer $WS_AUTH_TOKEN" \
-  -H "X-Molecule-Org-Id: $ORG_ID" \
-  -H "Content-Type: application/json" \
-  -d "$REGISTER_BODY") || true
-log "    register response: $(echo "$REGISTER_RESP" | head -c 300)"
-echo "$REGISTER_RESP" | grep -q "HTTP_CODE=200" || fail "register returned non-200 — see body above"
+# Bounded retry-on-transient (see register_with_retry). The previous
+# single-shot here would `fail` on a cold-boot 502 from the tenant edge —
+# an un-named transient misread as a register break. The helper retries
+# ONLY that class and fails closed on a real 4xx or an exhausted budget.
+REGISTER_RESP=""
+register_with_retry "register" "$REGISTER_BODY" \
+  || fail "register returned non-200 after bounded retries — body: $(printf '%s' "$REGISTER_RESP" | sanitize_http_body | head -c 300)"
+log "    register response: $(echo "$REGISTER_RESP" | sanitize_http_body | head -c 300)"

 GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
 ONLINE_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
 [ "$ONLINE_STATUS" != "online" ] && fail "Expected online after register, got $ONLINE_STATUS"
 ok "Workspace transitioned to online"
+require_transition "register: awaiting_agent → online"

 # Confirm the register handler echoed back delivery_mode=poll. We read
 # this from the register RESPONSE, not the workspace GET response, because
@@ -310,38 +436,165 @@ fi
 # This is the SECOND silent-failure path (registry/healthsweep.go's
 # sweepStaleRemoteWorkspaces). Pre-migration-046 the heartbeat-staleness
 # UPDATE silently failed and the workspace stuck on 'online' forever
-# even though no agent was alive. We wait the full window + a sweep
-# interval and assert the row transitions back to 'awaiting_agent'.
-log "6/8 Waiting ${STALE_WAIT_SECS}s for heartbeat-staleness sweep (no heartbeat sent)..."
+# even though no agent was alive.
+#
+# FLAKE FIX (named: sweep-cadence race). The old code did a FIXED
+# `sleep $STALE_WAIT_SECS` then a SINGLE assert. The staleness sweep is a
+# periodic tick (REMOTE_LIVENESS_STALE_AFTER + a sweep interval); if the
+# tick that flips the row lands even one second after the fixed sleep, the
+# one-shot GET reads 'online' and the test fails — a real transition,
+# misread as a flake because the assert was racing the sweep cadence.
+# Replace with: sleep through the mandatory no-heartbeat window ONCE (the
+# sweep cannot fire before the window elapses, so polling earlier is
+# pointless), then READINESS-POLL for the awaiting_agent transition up to
+# STALE_POLL_DEADLINE_SECS, hard-failing with a clear message at the
+# deadline. Deterministic: a slow-but-working sweep passes; a genuinely
+# stuck 'online' still fails (now with how long we actually waited).
+log "6/8 Waiting ${STALE_WAIT_SECS}s no-heartbeat window, then polling for sweep (up to ${STALE_POLL_DEADLINE_SECS}s total)..."
+[ "$STALE_POLL_DEADLINE_SECS" -le "$STALE_WAIT_SECS" ] && \
+  fail "Misconfigured: STALE_POLL_DEADLINE_SECS ($STALE_POLL_DEADLINE_SECS) must exceed STALE_WAIT_SECS ($STALE_WAIT_SECS) by at least one sweep interval"
 sleep "$STALE_WAIT_SECS"

-GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
-STALE_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
-[ "$STALE_STATUS" != "awaiting_agent" ] && \
-  fail "After ${STALE_WAIT_SECS}s with no heartbeat, expected status=awaiting_agent (sweep transition), got $STALE_STATUS — migration 046 likely not applied OR sweep not running"
+STALE_DEADLINE=$(( $(date +%s) + (STALE_POLL_DEADLINE_SECS - STALE_WAIT_SECS) ))
+STALE_STATUS=""
+while true; do
+  GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
+  STALE_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
+  [ "$STALE_STATUS" = "awaiting_agent" ] && break
+  if [ "$(date +%s)" -gt "$STALE_DEADLINE" ]; then
+    fail "After ${STALE_POLL_DEADLINE_SECS}s with no heartbeat, status still '$STALE_STATUS' (expected awaiting_agent sweep transition) — migration 046 likely not applied OR sweep not running"
+  fi
+  sleep 10
+done
 ok "Heartbeat-staleness sweep transitioned online → awaiting_agent (proof healthsweep.go fix working)"
+require_transition "sweep: online → awaiting_agent (no heartbeat)"

 # ─── 7. Re-register and confirm we can come back online ─────────────────
 # This proves the awaiting_agent state is recoverable (re-registrable),
 # which is the whole point of using it instead of 'offline'.
 log "7/8 Re-registering after stale → confirming recovery to online..."
 # Same payload contract as step 5 (id + agent_card both required). See note
-# there for why workspace_id would 400.
-REREG_RESP=$(curl -sS --max-time 30 -w "\nHTTP_CODE=%{http_code}" -X POST "$TENANT_URL/registry/register" \
-  -H "Authorization: Bearer $WS_AUTH_TOKEN" \
-  -H "X-Molecule-Org-Id: $ORG_ID" \
-  -H "Content-Type: application/json" \
-  -d "$REGISTER_BODY") || true
-log "    re-register response: $(echo "$REREG_RESP" | head -c 300)"
-echo "$REREG_RESP" | grep -q "HTTP_CODE=200" || fail "re-register returned non-200 — see body above"
+# there for why workspace_id would 400. Same bounded retry-on-transient.
+REGISTER_RESP=""
+register_with_retry "re-register" "$REGISTER_BODY" \
+  || fail "re-register returned non-200 after bounded retries — body: $(printf '%s' "$REGISTER_RESP" | sanitize_http_body | head -c 300)"
+log "    re-register response: $(echo "$REGISTER_RESP" | sanitize_http_body | head -c 300)"

 GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
 RECOVERED_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
 [ "$RECOVERED_STATUS" != "online" ] && \
  fail "Expected re-register to return workspace to online, got $RECOVERED_STATUS"
 ok "Re-register succeeded — awaiting_agent → online (operator-recoverable)"
+require_transition "re-register: awaiting_agent → online (recovery)"
+
+# ─── 7b. BYO meta-runtime arms: kimi + kimi-cli ─────────────────────────
+# kimi and kimi-cli are BYO-compute meta-runtimes (isExternalLikeRuntime).
+# They share the external/poll provisioning path but PRESERVE their runtime
+# label (workspace.go normalizeExternalRuntime). They had no provision→online
+# e2e until now. For EACH: create(external:true, runtime=<rt>) → assert
+# awaiting_agent + label preserved → register(poll) → assert online + label
+# preserved → A2A → assert the poll-mode {status:"queued"} envelope.
+#
+# Why poll-mode {queued} is the A2A assertion (not a real completion): there
+# is no standing live BYO agent in staging, so the meaningful round-trip is
+# that the a2a proxy ROUTES a BYO meta-runtime to the poll queue (HTTP 200 +
+# {status:"queued", delivery_mode:"poll"}, a2a_proxy.go:462-477) instead of
+# 404/500. A real BYO-agent COMPLETION needs a standing kimi BYO cell — see
+# the CTO flag in the PR body.
+byo_meta_runtime_arm() {  # $1 = runtime label (kimi | kimi-cli)
+  local rt="$1"
+  local resp wid status auth get_resp db_status reg_dm online_status
+  log "    [$rt] create (external:true, runtime=$rt)..."
+  resp=$(tenant_call POST /workspaces \
+    -d "$(printf '{"name":"ext-%s-e2e","runtime":"%s","external":true}' "$rt" "$rt")")
+  wid=$(echo "$resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))")
+  status=$(echo "$resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
+  auth=$(echo "$resp" | python3 -c "
+import json,sys
+try:
+    d=json.load(sys.stdin); conn=d.get('connection') or {}
+    print(conn.get('auth_token','') or d.get('auth_token',''))
+except Exception:
+    print('')
+")
+  [ -z "$wid" ] && fail "[$rt] create missing id: $resp"
+  [ "$status" = "awaiting_agent" ] || fail "[$rt] create status='$status' (expected awaiting_agent — external/poll path)"
+  [ -z "$auth" ] && fail "[$rt] create returned no workspace auth token — register impossible"
+
+  # Assert the runtime LABEL was preserved (NOT coerced to generic 'external').
+  get_resp=$(tenant_call GET "/workspaces/$wid")
+  db_status=$(echo "$get_resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
+  local db_runtime
+  db_runtime=$(echo "$get_resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('runtime',''))")
+  [ "$db_status" = "awaiting_agent" ] || fail "[$rt] DB row status=$db_status (expected awaiting_agent)"
+  [ "$db_runtime" = "$rt" ] || fail "[$rt] runtime label coerced to '$db_runtime' (expected '$rt' — normalizeExternalRuntime must PRESERVE the BYO meta-runtime label, workspace.go:752-770)"
+  ok "    [$rt] create → awaiting_agent, runtime label preserved ✓"
+
+  # register(poll) → online. Reuse register_with_retry by setting WS_AUTH_TOKEN
+  # (the helper reads it as a global). REGISTER_RESP is set by the helper.
+  WS_AUTH_TOKEN="$auth"
+  local body
+  body=$(printf '{"id":"%s","url":"https://example.invalid:443","delivery_mode":"poll","agent_card":{"name":"e2e-%s","skills":[{"id":"echo","name":"Echo"}]}}' "$wid" "$rt")
+  REGISTER_RESP=""
+  register_with_retry "[$rt] register" "$body" \
+    || fail "[$rt] register returned non-200 after bounded retries — body: $(printf '%s' "$REGISTER_RESP" | sanitize_http_body | head -c 300)"
+  online_status=$(tenant_call GET "/workspaces/$wid" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
+  [ "$online_status" = "online" ] || fail "[$rt] expected online after register, got $online_status"
+  reg_dm=$(echo "$REGISTER_RESP" | head -n1 | python3 -c "import json,sys; print(json.load(sys.stdin).get('delivery_mode',''))" 2>/dev/null || echo "")
+  [ "$reg_dm" = "poll" ] || fail "[$rt] register response delivery_mode='$reg_dm' (expected poll)"
+  ok "    [$rt] register → online (delivery_mode=poll) ✓"
+
+  # A2A → assert poll-mode {status:"queued"} envelope. Bounded retry on the
+  # transient cold-edge 5xx class; a 4xx/non-queued 2xx is a real bug.
+  local a2a_payload a2a_tmp a2a_code a2a_rc a2a_status attempt
+  a2a_payload=$(python3 -c "
+import json, uuid
+print(json.dumps({
+    'jsonrpc':'2.0','method':'message/send','id':'e2e-byo-1',
+    'params':{'message':{'role':'user','messageId':f'e2e-{uuid.uuid4().hex[:8]}',
+        'parts':[{'kind':'text','text':'BYO meta-runtime poll-route smoke. Respond: OK'}]}}
+}))
+")
+  a2a_tmp=$(mktemp -t byo_a2a.XXXXXX)
+  for attempt in $(seq 1 8); do
+    : >"$a2a_tmp"
+    set +e
+    a2a_code=$(curl -sS --max-time 60 -X POST "$TENANT_URL/workspaces/$wid/a2a" \
+      -H "Authorization: Bearer $TENANT_TOKEN" \
+      -H "X-Molecule-Org-Id: $ORG_ID" \
+      -H "Content-Type: application/json" \
+      -d "$a2a_payload" -o "$a2a_tmp" -w '%{http_code}' 2>/dev/null)
+    a2a_rc=$?
+    set -e
+    a2a_code=${a2a_code:-000}
+    if [ "$a2a_rc" = "0" ] && [ "$a2a_code" = "200" ]; then break; fi
+    if echo "$a2a_code" | grep -Eq '^(502|503|504)$' && [ "$attempt" -lt 8 ]; then
+      log "    [$rt] A2A transient $a2a_code attempt $attempt/8"; sleep 10; continue
+    fi
+    break
+  done
+  a2a_status=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('status',''))" "$a2a_tmp" 2>/dev/null || echo "")
+  local a2a_dm
+  a2a_dm=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('delivery_mode',''))" "$a2a_tmp" 2>/dev/null || echo "")
+  rm -f "$a2a_tmp"
+  [ "$a2a_rc" = "0" ] && [ "$a2a_code" = "200" ] \
+    || fail "[$rt] A2A POST failed (rc=$a2a_rc, http=$a2a_code) — a BYO meta-runtime poll-mode A2A must 200 with a queued envelope, not error"
+  [ "$a2a_status" = "queued" ] && [ "$a2a_dm" = "poll" ] \
+    || fail "[$rt] A2A returned status='$a2a_status' delivery_mode='$a2a_dm' (expected queued/poll — a2a proxy must route a BYO meta-runtime to the poll queue, a2a_proxy.go:462-477)"
+  ok "    [$rt] A2A → poll-mode queued envelope ✓ (provision→online→A2A proven for $rt)"
+}
+
+log "7c/8 BYO meta-runtime arms (kimi, kimi-cli) — provision→online→A2A..."
+byo_meta_runtime_arm "kimi"
+byo_meta_runtime_arm "kimi-cli"
+ok "BYO meta-runtime arms passed for kimi + kimi-cli"

 # ─── 8. Done — cleanup runs in the EXIT trap ───────────────────────────
+# REQUIRE_LIVE belt-and-braces: assert here too (in addition to the EXIT
+# trap) so the failure surfaces in step order, not only post-teardown.
+if [ "${REQUIRE_LIVE}" = "1" ] && [ "$TRANSITIONS_VERIFIED" -lt "$EXPECTED_TRANSITIONS" ]; then
+  fail "REQUIRE_LIVE: only ${TRANSITIONS_VERIFIED}/${EXPECTED_TRANSITIONS} transitions proven at end of run"
+fi
 log "8/8 All four awaiting_agent transitions verified."
 log "═══════════════════════════════════════════════════════════════════"
 ok "External-runtime E2E PASSED on $SLUG"
@@ -24,6 +24,19 @@
 #
 # Optional env:
 #   E2E_RUNTIME                  hermes (default) | claude-code | codex | openclaw
+#                                | seo-agent | google-adk
+#                                  - seo-agent: a claude-code-adapter template
+#                                    VARIANT (not a distinct registry runtime).
+#                                    Selected via the `template` field (config.yaml
+#                                    resolves runtime=claude-code); reuses the
+#                                    same MiniMax/claude-code key path. See the
+#                                    TEMPLATE derivation + SECRETS_JSON block.
+#                                  - google-adk: Gemini. The AI-Studio-keyed BYOK
+#                                    path (E2E_GOOGLE_API_KEY) is staging-
+#                                    exercisable here; the keyless Vertex PROD
+#                                    path needs WIF (see header note + the CTO
+#                                    flag in the PR body) and is selected via
+#                                    E2E_LLM_PATH=platform + a platform: model.
 #   E2E_PROVISION_TIMEOUT_SECS   default 900 (15 min cold EC2 budget)
 #   E2E_WORKSPACE_ONLINE_TIMEOUT_SECS  default 3600 (60 min — hermes
 #                                cold-boot worst-case + slack). Raised from
@@ -47,6 +60,27 @@
 #                                tear down cleanly (and exit 4 on leak).
 #                                Used by a dedicated sanity workflow
 #                                that verifies the safety net.
+#   E2E_LIFECYCLE                auto (default) | off
+#                                When auto + MODE=full, exercises the
+#                                pause→resume→online and hibernate→resume(wake)
+#                                state transitions on the provisioned parent
+#                                (step 10b). These are REAL transitions on the
+#                                live tenant (Pause stops the container + sets
+#                                status=paused; Resume re-provisions →
+#                                provisioning → online; Hibernate stops +
+#                                status=hibernated; the next A2A auto-wakes it).
+#                                Set `off` for a fast smoke that skips the
+#                                ~2x-reprovision cost. In smoke MODE it is
+#                                skipped regardless (no parent stability budget).
+#   E2E_REQUIRE_LIVE             1 → fail-closed-on-skip guard (CI sets this).
+#                                When set, the run MUST actually complete
+#                                ≥1 full provision→online→A2A cycle. A run
+#                                that reaches the end without having proven
+#                                a real round-trip (e.g. a future refactor
+#                                short-circuits a stage, or a skip path
+#                                swallows the lifecycle) exits 5 rather than
+#                                reporting a false green. Mirrors CP
+#                                serving-e2e's SERVING_E2E_REQUIRE_LIVE.
 #
 # Exit codes:
 #   0  happy path
@@ -54,6 +88,37 @@
 #   2  missing required env
 #   3  provisioning timed out
 #   4  teardown left orphan resources
+#   5  E2E_REQUIRE_LIVE set but the run validated no real lifecycle (no
+#      false-green-on-skip)
+#
+# ─────────────────────────────────────────────────────────────────────────
+# PROMOTION-READINESS (harden/e2e-staging-saas-failclosed):
+#   This harness is being hardened so `E2E Staging SaaS` + `E2E Staging
+#   Platform Boot` can become HARD merge-gates. continue-on-error is NOT
+#   flipped here — that promotion is the CTO's irreversible branch-protection
+#   call. What this branch makes fail-closed (was false-green / un-named
+#   flake before):
+#     • Provision/online waits are bounded readiness-POLLS, not fixed sleeps;
+#       each hard-fails with a named mechanism + last-seen signal on deadline,
+#       never a silent timeout (cp#245 boot-timeout class).
+#     • Peer-discovery (9b) asserts a real 2xx, not just "not 404" — a 5xx /
+#       000 / empty no longer reads as "reachable".
+#     • Activity-log (9b) is ASSERTED reachable (2xx + parseable), not
+#       logged-and-ignored behind `|| echo '[]'`.
+#     • Child activity provenance (10) is asserted (was soft-logged).
+#     • E2E_REQUIRE_LIVE=1 (CI) makes the run exit 5 if it reached the end
+#       without proving a real provision→online→A2A round-trip — no
+#       false-green-on-skip.
+#   STILL BLOCKS making it REQUIRED (must clear before the CTO flips
+#   continue-on-error→false in .gitea/workflows/e2e-staging-saas.yml):
+#     • De-flake window: N consecutive green runs on main for BOTH jobs
+#       (platform-boot shares the cp#245 boot surface — #2187 tracks its
+#       flip). This harness removes the harness-side flake mechanisms; the
+#       remaining surface is real-infra (EC2 cold boot, CF DNS) latency,
+#       already bounded by the readiness polls above.
+#     • Branch-protection required-context wiring is a repo-settings change,
+#       not a code change in this PR.
+# ─────────────────────────────────────────────────────────────────────────

 set -euo pipefail

@@ -90,6 +155,41 @@ log()  { echo "[$(date +%H:%M:%S)] $*"; }
 fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; }
 ok()   { echo "[$(date +%H:%M:%S)] ✅ $*"; }

+# ─── fail-closed-on-skip live-lifecycle guard ───────────────────────────
+# E2E_REQUIRE_LIVE=1 (set by CI) asserts this run ACTUALLY exercised a full
+# provision→online→A2A cycle. Each load-bearing lifecycle stage stamps a
+# milestone via live_milestone(); at the very end, require_live_or_die()
+# checks every required milestone fired. Mechanism: without this, a future
+# refactor that short-circuits a stage — or a skip/early-return path that
+# swallows the lifecycle — would let the script reach its final `ok` and
+# report GREEN having validated nothing. Mirrors CP serving-e2e's
+# SERVING_E2E_REQUIRE_LIVE (skip-if-absent must be LOUD, never silent green).
+REQUIRE_LIVE="${E2E_REQUIRE_LIVE:-0}"
+LIVE_MILESTONES=""
+live_milestone() {
+  # Idempotent set-membership append. Space-delimited; names are tokens.
+  case " $LIVE_MILESTONES " in
+    *" $1 "*) ;;
+    *) LIVE_MILESTONES="$LIVE_MILESTONES $1" ;;
+  esac
+}
+require_live_or_die() {
+  # No-op unless CI demanded a live run.
+  [ "$REQUIRE_LIVE" = "1" ] || return 0
+  local required="provisioned tenant_online workspace_online a2a_roundtrip"
+  local m missing=""
+  for m in $required; do
+    case " $LIVE_MILESTONES " in
+      *" $m "*) ;;
+      *) missing="$missing $m" ;;
+    esac
+  done
+  if [ -n "$missing" ]; then
+    echo "[$(date +%H:%M:%S)] ❌ E2E_REQUIRE_LIVE=1 but the run did NOT prove a full live lifecycle — missing milestone(s):${missing}. Reached:${LIVE_MILESTONES:-<none>}. This is a false-green-on-skip guard: a run that validates no real provision→online→A2A cycle MUST NOT report green." >&2
+    exit 5
+  fi
+}
+
 # Per-runtime model slug dispatch — see lib/model_slug.sh for the rationale.
 # Extracted so unit tests (tests/e2e/test_model_slug.sh) can pin every branch
 # without booting the full 11-step lifecycle.
@@ -197,7 +297,7 @@ cleanup_org() {
  # case statement, and opens a false-positive priority-high
  # "safety net broken" issue (#2159, 2026-04-27).
  case "$entry_rc" in
-    0|1|2|3|4) ;;          # contracted codes — let bash use entry_rc
+    0|1|2|3|4|5) ;;        # contracted codes — let bash use entry_rc
    *) exit 1 ;;            # anything else is a generic failure
  esac
 }
@@ -295,6 +395,7 @@ print('(no org row found for slug=$SLUG — DB drift?)')
  esac
 done
 ok "Tenant provisioning complete"
+live_milestone provisioned

 # Derive tenant domain from CP hostname so the same harness works in
 # both prod (api.moleculesai.app → moleculesai.app) and staging
@@ -351,6 +452,7 @@ while true; do
  sleep 5
 done
 ok "Tenant reachable at $TENANT_URL"
+live_milestone tenant_online

 # Sanity-test path: once the tenant is provisioned, poisoning the
 # tenant token proves the EXIT trap + leak assertion still fire.
@@ -515,6 +617,24 @@ print(json.dumps({
    'ANTHROPIC_API_KEY': k,
 }))
 ")
+elif [ -n "${E2E_GOOGLE_API_KEY:-}" ]; then
+  # google-adk AI-Studio BYOK path. The `google` provider entry
+  # (providers.yaml:401-413) reads GEMINI_API_KEY / GOOGLE_API_KEY and dials
+  # generativelanguage.googleapis.com — the tenant's OWN key, distinct from the
+  # keyless-Vertex PROD path (which routes through the CP proxy + server-side
+  # WIF and carries NO tenant credential). This branch exercises google-adk
+  # being PROVISIONED AT ALL on staging; the Vertex-specific WIF path is flagged
+  # for the CTO (needs extra provisioning) and is NOT reachable here. Inject
+  # under both env names the provider accepts so the adapter resolves regardless
+  # of which one it reads first.
+  SECRETS_JSON=$(python3 -c "
+import json, os
+k = os.environ['E2E_GOOGLE_API_KEY']
+print(json.dumps({
+    'GOOGLE_API_KEY': k,
+    'GEMINI_API_KEY': k,
+}))
+")
 elif [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
  SECRETS_JSON=$(python3 -c "
 import json, os
@@ -534,22 +654,234 @@ fi
 MODEL_SLUG=$(pick_model_slug "$RUNTIME")
 log "    MODEL_SLUG=$MODEL_SLUG"

-log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..."
+# ─── BYOK opt-in split (secret-write gate requires explicit byok) ───────
+# Every vendor-key arm above (MiniMax / Anthropic / Google / OpenAI-hermes)
+# writes one or more keys that workspace-server's secret-write gate —
+# rejectPlatformManagedDirectLLMBypassForWorkspace in
+# workspace-server/internal/handlers/secrets.go — STRIPS/BLOCKS while a
+# workspace's resolved billing mode is platform_managed (the org/CTO default).
+# The strip-list (secrets.go platformManagedDirectLLMBypassKeys) includes
+# MINIMAX_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY/_BASE_URL,
+# HERMES_CUSTOM_API_KEY/_BASE_URL, etc. A bare vendor key in the CREATE payload
+# does NOT auto-derive byok: at create time no auth-env is present yet, so the
+# resolver derives platform_managed and the write is rejected. The resolver's
+# org rung was retired (internal#718 P2-B) — ResolveLLMBillingMode now ignores
+# the org default — so the ONLY way to opt a workspace into byok is an explicit
+# per-workspace override via PUT /admin/workspaces/:id/llm-billing-mode.
+#
+# Real evidence — staging job 295385 (main f1558b54), AFTER #2311/#2312 made
+# bare `MiniMax-M2.7` registry-valid: parent-create passed model validation but
+# FAILED with
+#   {"error":"direct vendor key writes are blocked for platform-managed
+#    workspaces; ... or set this workspace's billing mode to 'byok' via
+#    /admin/workspaces/:id/llm-billing-mode","key":"MINIMAX_API_KEY"}
+# That 400 is INTENDED product behavior, not a product bug. The e2e must mirror
+# the real BYOK user flow: opt the workspace into byok FIRST, then write the key.
+#
+# Mechanism: per-workspace override (NOT org-default), because the org rung is
+# retired — an org-create billing field could not satisfy this gate even if
+# /cp/admin/orgs accepted one. So for any arm that ships strip-listed keys we:
+#   1. create the workspace WITHOUT those keys (create succeeds platform_managed),
+#   2. PUT billing-mode=byok on that workspace id (per-tenant admin token),
+#   3. write the deferred strip-listed keys (now allowed by the gate),
+# then continue. The #1994 byok-routing guard (8c) then sees a LEGITIMATELY
+# byok workspace (explicit override) and still validates real routing — NOT
+# masked.
+#
+# The PLATFORM path (E2E_LLM_PATH=platform) produces SECRETS_JSON='{}', so it
+# carries NO strip-listed key → CREATE_SECRETS_JSON stays '{}' and no opt-in
+# fires. It remains platform_managed (the moonshot/kimi NOT_CONFIGURED
+# regression guard) — deliberately untouched.
+#
+# Keep this strip-list BYTE-IN-SYNC with secrets.go platformManagedDirectLLMBypassKeys.
+BYOK_STRIP_KEYS="AI_GATEWAY_API_KEY ANTHROPIC_API_KEY ANTHROPIC_AUTH_TOKEN ARCEEAI_API_KEY CLAUDE_CODE_OAUTH_TOKEN CODEX_AUTH_JSON DASHSCOPE_API_KEY DEEPSEEK_API_KEY GEMINI_API_KEY GLM_API_KEY HERMES_CUSTOM_API_KEY HERMES_CUSTOM_BASE_URL HF_TOKEN KIMI_API_KEY KIMI_CN_API_KEY MINIMAX_API_KEY MINIMAX_CN_API_KEY NOUS_API_KEY OPENAI_API_KEY OPENAI_BASE_URL OPENROUTER_API_KEY XAI_API_KEY ZAI_API_KEY"
+# Split SECRETS_JSON into CREATE_SECRETS_JSON (gate-safe, written at create)
+# and DEFERRED_SECRETS_JSON (strip-listed keys, written AFTER byok opt-in).
+# Emit the two JSON blobs on SEPARATE LINES (not space-separated) — a value or
+# a json.dumps default separator contains spaces, which whitespace-`read` would
+# mangle. read -r line1 → CREATE, line2 → DEFERRED.
+{
+  read -r CREATE_SECRETS_JSON
+  read -r DEFERRED_SECRETS_JSON
+} < <(
+  BYOK_STRIP_KEYS="$BYOK_STRIP_KEYS" E2E_WS_SECRETS="$SECRETS_JSON" python3 -c "
+import json, os
+strip = set(os.environ['BYOK_STRIP_KEYS'].split())
+d = json.loads(os.environ['E2E_WS_SECRETS'] or '{}')
+create = {k: v for k, v in d.items() if k not in strip}
+deferred = {k: v for k, v in d.items() if k in strip}
+print(json.dumps(create))
+print(json.dumps(deferred))
+"
+)
+# Defensive: if the split somehow produced empty (read failure), treat as
+# no-deferred so we never PUT byok on a workspace that has no vendor key.
+[ -n "$DEFERRED_SECRETS_JSON" ] || DEFERRED_SECRETS_JSON='{}'
+[ -n "$CREATE_SECRETS_JSON" ] || CREATE_SECRETS_JSON='{}'
+if [ "$DEFERRED_SECRETS_JSON" != "{}" ]; then
+  log "    BYOK opt-in required — deferring vendor key(s) until after billing-mode=byok"
+fi
+
+# byok_opt_in_and_write_deferred <workspace_id>
+#   For the byok arms (DEFERRED_SECRETS_JSON non-empty): PUT billing-mode=byok
+#   on the workspace, then write each deferred strip-listed secret (now allowed
+#   by the secret-write gate). No-op for the platform/no-key path. See the
+#   BYOK-opt-in block above + secrets.go rejectPlatformManagedDirectLLMBypassForWorkspace.
+byok_opt_in_and_write_deferred() {
+  local _id="$1"
+  if [ "$DEFERRED_SECRETS_JSON" = "{}" ]; then
+    return 0
+  fi
+  # Explicit byok opt-in (per-workspace override).
+  local _bm_resp _bm_mode
+  set +e
+  _bm_resp=$(tenant_call PUT "/admin/workspaces/$_id/llm-billing-mode" \
+    -H "Content-Type: application/json" \
+    -d '{"mode":"byok"}' 2>/dev/null)
+  local _bm_rc=$?
+  set -e
+  if [ "$_bm_rc" != "0" ]; then
+    fail "byok opt-in: PUT /admin/workspaces/$_id/llm-billing-mode {mode:byok} failed (rc=$_bm_rc). Raw: $(printf '%s' "$_bm_resp" | sanitize_http_body)"
+  fi
+  _bm_mode=$(echo "$_bm_resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('resolved_mode',''))" 2>/dev/null || echo "")
+  [ "$_bm_mode" = "byok" ] || fail "byok opt-in: workspace $_id resolved_mode='$_bm_mode' after PUT mode=byok (want byok). Raw: $(printf '%s' "$_bm_resp" | sanitize_http_body)"
+
+  # Write each deferred strip-listed secret one-per-call (the Set endpoint
+  # takes {key,value}). The gate now passes because resolved=byok. Bodies are
+  # built in Python (env-only) so secret values never hit a command line.
+  local _keys _k _sec_body _sec_tmp _sec_code _sec_out
+  _keys=$(echo "$DEFERRED_SECRETS_JSON" | python3 -c "import json,sys; print('\n'.join(json.load(sys.stdin).keys()))")
+  while IFS= read -r _k; do
+    [ -n "$_k" ] || continue
+    _sec_body=$(BYOK_K="$_k" E2E_WS_DEFERRED="$DEFERRED_SECRETS_JSON" python3 -c "
+import json, os
+d = json.loads(os.environ['E2E_WS_DEFERRED'])
+print(json.dumps({'key': os.environ['BYOK_K'], 'value': d[os.environ['BYOK_K']]}))
+")
+    _sec_tmp=$(mktemp -t synth_byok_secret.XXXXXX)
+    _sec_code=$(printf '%s' "$_sec_body" | tenant_call POST "/workspaces/$_id/secrets" \
+      -H "Content-Type: application/json" \
+      -d @- \
+      -o "$_sec_tmp" -w '%{http_code}' 2>/dev/null || echo "000")
+    if [ "$_sec_code" != "200" ] && [ "$_sec_code" != "201" ] && [ "$_sec_code" != "204" ]; then
+      _sec_out=$(cat "$_sec_tmp" 2>/dev/null | sanitize_http_body)
+      rm -f "$_sec_tmp"
+      fail "byok vendor-key write: POST /workspaces/$_id/secrets ($_k) returned $_sec_code: $_sec_out — secret-write gate should allow it after the byok opt-in (secrets.go rejectPlatformManagedDirectLLMBypassForWorkspace)."
+    fi
+    rm -f "$_sec_tmp"
+  done <<< "$_keys"
+  ok "    $_id byok opt-in + deferred vendor key(s) written"
+}
+
+# ─── runtime → provision-selector resolution ────────────────────────────
+# Most runtimes are selected directly by the `runtime` field. seo-agent is
+# the exception: it is NOT a registry runtime (absent from manifest.json +
+# runtime_registry.go knownRuntimes) — it is a claude-code-adapter template
+# VARIANT selected by the `template` field. The ws-server Create handler reads
+# the template's config.yaml, which declares `runtime: claude-code`, and
+# resolves the concrete runtime from there (workspace.go:290-336). So for
+# seo-agent we send template="seo-agent" and OMIT runtime, letting the
+# template resolve it — sending an explicit runtime="seo-agent" would
+# RUNTIME_UNSUPPORTED-422 at workspace.go:374-384 because it is not in
+# knownRuntimes. PROVISION_TEMPLATE is "" for every real registry runtime.
+PROVISION_TEMPLATE=""
+case "$RUNTIME" in
+  seo-agent) PROVISION_TEMPLATE="seo-agent" ;;
+esac
+
+# Build the create payload in Python so the optional `template`/`runtime`
+# fields are emitted conditionally and the secrets blob is embedded without
+# shell-escaping hazards. Args: name, [parent_id|""].
+build_create_payload() {
+  local name="$1" parent_id="${2:-}"
+  E2E_WS_NAME="$name" \
+  E2E_WS_PARENT_ID="$parent_id" \
+  E2E_WS_RUNTIME="$RUNTIME" \
+  E2E_WS_TEMPLATE="$PROVISION_TEMPLATE" \
+  E2E_WS_MODEL="$MODEL_SLUG" \
+  E2E_WS_SECRETS="$CREATE_SECRETS_JSON" \
+  python3 -c "
+import json, os
+secrets = json.loads(os.environ['E2E_WS_SECRETS'] or '{}')
+payload = {
+    'name': os.environ['E2E_WS_NAME'],
+    'tier': 2,
+    'model': os.environ['E2E_WS_MODEL'],
+    'secrets': secrets,
+}
+tmpl = os.environ.get('E2E_WS_TEMPLATE', '')
+if tmpl:
+    # Template-selected variant (seo-agent): the template's config.yaml
+    # resolves runtime=claude-code server-side. Do NOT also send an explicit
+    # runtime — seo-agent is not a registry runtime and would 422.
+    payload['template'] = tmpl
+else:
+    payload['runtime'] = os.environ['E2E_WS_RUNTIME']
+pid = os.environ.get('E2E_WS_PARENT_ID', '')
+if pid:
+    payload['parent_id'] = pid
+print(json.dumps(payload))
+"
+}
+
+if [ -n "$PROVISION_TEMPLATE" ]; then
+  log "5/11 Provisioning parent workspace (runtime=$RUNTIME via template=$PROVISION_TEMPLATE → claude-code adapter)..."
+else
+  log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..."
+fi
+# tenant_call inherits CURL_COMMON's --fail-with-body, so a non-2xx create
+# (e.g. the 422 RUNTIME_UNSUPPORTED below) makes curl exit 22. Capturing it
+# bare as $(tenant_call ...) propagates that 22 through the command
+# substitution and, under `set -euo pipefail`, ABORTS the whole script right
+# here — before the `fail "... Response: ..."` handler below can print the
+# body. The result was an opaque `curl: (22) ... error: 422` + teardown with
+# no body (run 220702, main f78fef4c, step "5/11 Provisioning parent
+# workspace"). set +e / `|| true` keeps the 22 from tripping `set -e`; curl
+# still WROTE the body to stdout (that's what --fail-with-body does), so
+# PARENT_RESP holds the 422 JSON and the id-check below surfaces WHY.
+set +e
 PARENT_RESP=$(tenant_call POST /workspaces \
  -H "Content-Type: application/json" \
-  -d "{\"name\":\"E2E Parent\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"$MODEL_SLUG\",\"secrets\":$SECRETS_JSON}")
-PARENT_ID=$(echo "$PARENT_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])")
+  -d "$(build_create_payload 'E2E Parent')")
+set -e
+# Surface the workspace-create error CLEARLY instead of dying on a Python
+# KeyError when the response has no 'id'. The load-bearing cases this names:
+#   - google-adk: RUNTIME_UNSUPPORTED 422 if google-adk is absent from the
+#     deployed manifest.json's workspace_templates (the Create-handler
+#     allowlist is manifest-derived — runtime_registry.go). google-adk is in
+#     providers.yaml + provisioner/registry.go + registry_gen but NOT (yet) in
+#     manifest.json, so it cannot be provisioned by `runtime` until the
+#     manifest gains it. Flagged for the CTO — this arm REDS until then.
+#   - seo-agent: an "invalid template" 400 if the seo-agent template isn't
+#     present in the tenant's configs/cache dir (template-cache refresh gap).
+PARENT_ID=$(echo "$PARENT_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
+if [ -z "$PARENT_ID" ]; then
+  fail "Parent workspace create returned no 'id' (runtime=$RUNTIME, template=${PROVISION_TEMPLATE:-<none>}). Response: $(printf '%s' "$PARENT_RESP" | sanitize_http_body)"
+fi
 log "    PARENT_ID=$PARENT_ID"
+# BYOK arms only: opt the workspace into byok, then write the deferred vendor
+# key(s). No-op for the platform/no-key path. (See the BYOK opt-in block.)
+byok_opt_in_and_write_deferred "$PARENT_ID"

 # ─── 6. Provision child (full mode only) ────────────────────────────────
 CHILD_ID=""
 if [ "$MODE" = "full" ]; then
  log "6/11 Provisioning child workspace..."
+  # Same --fail-with-body / set -e abort guard as the parent create above:
+  # let a non-2xx return the body so the id-check below surfaces it instead
+  # of the script dying opaquely on curl exit 22.
+  set +e
  CHILD_RESP=$(tenant_call POST /workspaces \
    -H "Content-Type: application/json" \
-    -d "{\"name\":\"E2E Child\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"$MODEL_SLUG\",\"parent_id\":\"$PARENT_ID\",\"secrets\":$SECRETS_JSON}")
-  CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])")
+    -d "$(build_create_payload 'E2E Child' "$PARENT_ID")")
+  set -e
+  CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
+  if [ -z "$CHILD_ID" ]; then
+    fail "Child workspace create returned no 'id' (runtime=$RUNTIME, template=${PROVISION_TEMPLATE:-<none>}). Response: $(printf '%s' "$CHILD_RESP" | sanitize_http_body)"
+  fi
  log "    CHILD_ID=$CHILD_ID"
+  # Same BYOK opt-in as the parent — the child also carries the vendor key(s).
+  byok_opt_in_and_write_deferred "$CHILD_ID"
 else
  log "6/11 Canary mode — skipping child workspace"
 fi
@@ -570,6 +902,7 @@ fi
 WS_TO_CHECK=("$PARENT_ID")
 [ -n "$CHILD_ID" ] && WS_TO_CHECK+=("$CHILD_ID")
 wait_workspaces_online_routable "7/11 Waiting for workspace(s) to reach status=online (up to $((WORKSPACE_ONLINE_TIMEOUT_SECS/60)) min — hermes cold boot)..." "${WS_TO_CHECK[@]}"
+live_milestone workspace_online

 # ─── 7a. Real chat image upload/download round-trip ───────────────────
 # This deliberately uses the production workflow: tenant admin/session auth
@@ -671,6 +1004,12 @@ for wid in "${WS_TO_CHECK[@]}"; do
  else
    DIAG_FAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('first_failure','unknown'))" 2>/dev/null || echo "unknown")
    DIAG_DETAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); s=[x for x in d.get('steps',[]) if not x.get('ok')]; step=s[0] if s else {}; print(' — '.join(x for x in [step.get('error',''), step.get('detail','')] if x))" 2>/dev/null || echo "")
+    # #767: always emit the full diagnose JSON so operators see every step's
+    # Detail field even when the Python extraction above fails or the shape
+    # drifts. The burst is bracketed like steps 2 and 4 for grep-friendly CI.
+    log "── DIAGNOSTIC BURST (step 7b — terminal diagnose for $wid) ──"
+    echo "$DIAG_JSON" | python3 -m json.tool 2>/dev/null || echo "$DIAG_JSON"
+    log "── END DIAGNOSTIC ──"
    fail "Workspace $wid terminal diagnose failed at step '$DIAG_FAIL': $DIAG_DETAIL — check tenant SG has tcp/22 from the configured EIC endpoint SG, MOLECULE_EIC_ENDPOINT_SG_ID is set in Railway, and EIC endpoint health"
  fi
 done
@@ -886,7 +1225,7 @@ fi
 # identical on main's scheduled synthetic E2E and on PRs (so it is an
 # environmental backend regression, never PR-introduced).
 if echo "$AGENT_TEXT" | grep -qiF "message contained no text content"; then
-  fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (the claude-code default is minimax:MiniMax-M2.7 since #2263; was bare MiniMax-M2 #2710) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT"
+  fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (the claude-code MiniMax-BYOK default is the BARE registered id MiniMax-M2.7 — the colon minimax:MiniMax-M2.7 is UNREGISTERED on claude-code, internal#718) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT"
 fi
 # Generic catch-all — falls through if none of the known regressions hit.
 if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then
@@ -981,6 +1320,11 @@ except Exception:
 " 2>/dev/null || echo "")
 # CORE GATE: contains PINEAPPLE (real round-trip) AND no error-as-text.
 a2a_assert_real_completion "$KA_TEXT" "PINEAPPLE" "A2A known-answer (parent, $RUNTIME/$MODEL_SLUG)"
+# Real, deterministic LLM round-trip proven — the load-bearing milestone for
+# the fail-closed-on-skip guard. Stamped AFTER a2a_assert_real_completion (not
+# after the looser PONG check) so the milestone means a verified completion,
+# not just a 2xx-with-text.
+live_milestone a2a_roundtrip

 # ─── 8c. byok-routing regression guard (#1994) ─────────────────────────
 # The parent was provisioned with the customer's OWN vendor key
@@ -1096,28 +1440,92 @@ print(json.dumps({
    'scope': 'LOCAL'
 }))
 ")
-  tenant_call POST "/workspaces/$PARENT_ID/memories" \
+  # SURFACE THE BODY (mirrors the step-9b / A2A pattern): the previous
+  # `>/dev/null || fail "memory POST failed"` discarded the response body
+  # that --fail-with-body deliberately preserves on a non-2xx, so a 500 from
+  # the workspace-server HMA path (e.g. "failed to store memory" /
+  # "failed to resolve writable namespaces", or a 503 "memory plugin is not
+  # configured") was reported as a bare "memory POST failed" — opaque, the
+  # same #2310-class blind spot. Route http_code into -w and body into -o,
+  # then fail with the sanitized status+body so the mechanism is visible.
+  MEM_POST_TMP=$(e2e_tmp /tmp/e2e_mem_post.XXXXXX)
+  set +e
+  MEM_POST_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/memories" \
    -H "Content-Type: application/json" \
-    -d "$MEM_PAYLOAD" >/dev/null || fail "memory POST failed"
-  MEM_LIST=$(tenant_call GET "/workspaces/$PARENT_ID/memories?scope=LOCAL")
+    -d "$MEM_PAYLOAD" \
+    -o "$MEM_POST_TMP" -w "%{http_code}" 2>/dev/null)
+  MEM_POST_RC=$?
+  set -e
+  MEM_POST_CODE=${MEM_POST_CODE:-000}
+  if [ "$MEM_POST_RC" != "0" ] || [ "$MEM_POST_CODE" -lt 200 ] || [ "$MEM_POST_CODE" -ge 300 ]; then
+    MEM_POST_BODY=$(head -c 400 "$MEM_POST_TMP" 2>/dev/null | sanitize_http_body)
+    fail "memory POST /workspaces/$PARENT_ID/memories failed (curl_rc=$MEM_POST_RC, http=$MEM_POST_CODE): ${MEM_POST_BODY:-<empty body>}"
+  fi
+
+  # Same fail-closed surfacing for the read-back: a 5xx / network error here
+  # previously slipped through the bare `$(tenant_call ...)` capture and only
+  # showed up as "not readable" with an empty list.
+  MEM_LIST_TMP=$(e2e_tmp /tmp/e2e_mem_list.XXXXXX)
+  set +e
+  MEM_LIST_CODE=$(tenant_call GET "/workspaces/$PARENT_ID/memories?scope=LOCAL" \
+    -o "$MEM_LIST_TMP" -w "%{http_code}" 2>/dev/null)
+  MEM_LIST_RC=$?
+  set -e
+  MEM_LIST_CODE=${MEM_LIST_CODE:-000}
+  MEM_LIST=$(cat "$MEM_LIST_TMP" 2>/dev/null || echo "")
+  if [ "$MEM_LIST_RC" != "0" ] || [ "$MEM_LIST_CODE" -lt 200 ] || [ "$MEM_LIST_CODE" -ge 300 ]; then
+    fail "memory GET /workspaces/$PARENT_ID/memories failed (curl_rc=$MEM_LIST_RC, http=$MEM_LIST_CODE): $(printf '%s' "$MEM_LIST" | sanitize_http_body | head -c 400)"
+  fi
  if ! echo "$MEM_LIST" | grep -q "run $SLUG"; then
-    fail "HMA memory not readable after write. List: ${MEM_LIST:0:200}"
+    fail "HMA memory not readable after write (http=$MEM_LIST_CODE). List: $(printf '%s' "$MEM_LIST" | sanitize_http_body | head -c 200)"
  fi
  ok "HMA memory write+read roundtripped"

  log "9b.  Peer discovery + activity log smoke..."
+  # FAIL-CLOSED: assert a real 2xx, not merely "not 404". The previous
+  # `[ "$PEERS_CODE" = "404" ] && fail` only caught the route-missing case —
+  # a 5xx, 000 (connection failure), or empty capture ALL fell through to
+  # "reachable" (false-green: a broken-but-present route read as healthy).
+  # Mechanism: route the http_code into its own tempfile (no stderr capture,
+  # which the old `2>&1 | head -1` could pollute with a curl error line) and
+  # require 2xx explicitly.
+  PEERS_TMP=$(e2e_tmp /tmp/e2e_peers.XXXXXX)
  set +e
-  tenant_call GET "/registry/$PARENT_ID/peers" -o /dev/null -w "%{http_code}\n" 2>&1 | head -1 > /tmp/peers_code.txt
+  PEERS_CODE=$(tenant_call GET "/registry/$PARENT_ID/peers" \
+    -o "$PEERS_TMP" -w "%{http_code}" 2>/dev/null)
+  PEERS_RC=$?
  set -e
-  PEERS_CODE=$(cat /tmp/peers_code.txt)
-  [ "$PEERS_CODE" = "404" ] && fail "Peers endpoint missing (404) — route regression"
+  PEERS_CODE=${PEERS_CODE:-000}
+  if [ "$PEERS_CODE" = "404" ]; then
+    fail "Peers endpoint missing (404) — route regression. /registry/$PARENT_ID/peers"
+  fi
+  if [ "$PEERS_RC" != "0" ] || [ "$PEERS_CODE" -lt 200 ] || [ "$PEERS_CODE" -ge 300 ]; then
+    fail "Peers endpoint unhealthy (curl_rc=$PEERS_RC, http=$PEERS_CODE) — not a clean 2xx, so 'reachable' would be a false-green. Body: $(head -c 200 "$PEERS_TMP" 2>/dev/null | sanitize_http_body)"
+  fi
  ok "Peers endpoint reachable (HTTP $PEERS_CODE)"

-  ACTIVITY=$(tenant_call GET "/activity?workspace_id=$PARENT_ID&limit=5" 2>/dev/null || echo '[]')
-  ACTIVITY_COUNT=$(echo "$ACTIVITY" | python3 -c "import json,sys
-d=json.load(sys.stdin)
-print(len(d if isinstance(d, list) else d.get('events', [])))" 2>/dev/null || echo 0)
-  log "    Activity events observed: $ACTIVITY_COUNT"
+  # FAIL-CLOSED: the activity-log read was `|| echo '[]'` then the count was
+  # only LOGGED, never asserted — a 5xx / network failure silently became an
+  # empty list and the step exited 0 having validated nothing (false-green:
+  # "validated nothing" class). Assert the endpoint returns a 2xx and a
+  # parseable activity shape. We do NOT assert count>0 (the parent may
+  # legitimately have 0 events this early — that's a real, valid state), but
+  # we DO require the call to have actually succeeded and returned valid JSON.
+  ACTIVITY_TMP=$(e2e_tmp /tmp/e2e_activity.XXXXXX)
+  set +e
+  ACTIVITY_CODE=$(tenant_call GET "/activity?workspace_id=$PARENT_ID&limit=5" \
+    -o "$ACTIVITY_TMP" -w "%{http_code}" 2>/dev/null)
+  ACTIVITY_RC=$?
+  set -e
+  ACTIVITY_CODE=${ACTIVITY_CODE:-000}
+  if [ "$ACTIVITY_RC" != "0" ] || [ "$ACTIVITY_CODE" -lt 200 ] || [ "$ACTIVITY_CODE" -ge 300 ]; then
+    fail "Activity-log endpoint unhealthy (curl_rc=$ACTIVITY_RC, http=$ACTIVITY_CODE) — was previously swallowed by '|| echo []' and reported as 0 events (false-green). Body: $(head -c 200 "$ACTIVITY_TMP" 2>/dev/null | sanitize_http_body)"
+  fi
+  ACTIVITY_COUNT=$(python3 -c "import json,sys
+d=json.load(open(sys.argv[1]))
+print(len(d if isinstance(d, list) else d.get('events', [])))" "$ACTIVITY_TMP" 2>/dev/null) \
+    || fail "Activity-log returned HTTP $ACTIVITY_CODE but body was not parseable JSON (events array / {events:[...]}). Body: $(head -c 200 "$ACTIVITY_TMP" 2>/dev/null | sanitize_http_body)"
+  log "    Activity events observed: $ACTIVITY_COUNT (endpoint 2xx + parseable ✓)"

  # ─── 9c. Workspace KV memory Edit round-trip ─────────────────────────
  # Pins the Edit affordance added to the canvas Memory tab. The UI calls
@@ -1268,14 +1676,173 @@ except Exception:
  [ -z "$DELEG_TEXT" ] && fail "Delegation returned no text. Raw: ${DELEG_RESP:0:200}"
  ok "Delegation proxy works (child responded: \"${DELEG_TEXT:0:60}\")"

-  CHILD_ACT=$(tenant_call GET "/activity?workspace_id=$CHILD_ID&limit=20" 2>/dev/null || echo '[]')
-  if echo "$CHILD_ACT" | grep -q "$PARENT_ID"; then
+  # FAIL-CLOSED via bounded readiness-POLL (was soft-logged false-green).
+  # The activity pipeline is async, so an immediate single read can miss the
+  # parent reference — but "did not reference parent" was previously just
+  # LOGGED and the step passed regardless, so a genuinely broken provenance
+  # pipeline (parent never recorded as source) read as success. Mechanism:
+  # poll the child activity log for the parent id for a bounded window
+  # (E2E_CHILD_ACTIVITY_TIMEOUT_SECS, default 60s) — this is the real
+  # readiness signal (provenance row materialised), not a fixed sleep — and
+  # hard-fail with a named mechanism if it never appears.
+  CHILD_ACT_DEADLINE=$(( $(date +%s) + ${E2E_CHILD_ACTIVITY_TIMEOUT_SECS:-60} ))
+  CHILD_ACT_SEEN=0
+  CHILD_ACT_LASTCODE="000"
+  while true; do
+    CHILD_ACT_TMP=$(e2e_tmp /tmp/e2e_child_act.XXXXXX)
+    set +e
+    CHILD_ACT_CODE=$(tenant_call GET "/activity?workspace_id=$CHILD_ID&limit=20" \
+      -o "$CHILD_ACT_TMP" -w "%{http_code}" 2>/dev/null)
+    set -e
+    CHILD_ACT_LASTCODE=${CHILD_ACT_CODE:-000}
+    if grep -q "$PARENT_ID" "$CHILD_ACT_TMP" 2>/dev/null; then
+      CHILD_ACT_SEEN=1
+      break
+    fi
+    [ "$(date +%s)" -ge "$CHILD_ACT_DEADLINE" ] && break
+    sleep 5
+  done
+  if [ "$CHILD_ACT_SEEN" = "1" ]; then
    ok "Child activity log records parent as source"
  else
-    log "Child activity log did not reference parent (pipeline may be async)"
+    fail "Child activity log never referenced parent $PARENT_ID within ${E2E_CHILD_ACTIVITY_TIMEOUT_SECS:-60}s (last http=$CHILD_ACT_LASTCODE) — delegation-provenance pipeline regression (parent not recorded as source). Previously soft-logged → false-green."
  fi
 fi

+# ─── 10b. Pause/Resume + Hibernate/Resume lifecycle transitions ─────────
+# Exercise the REAL workspace lifecycle state machine on the provisioned
+# parent — the transitions that previously had only handler unit tests
+# (handlers_additional_test.go / hibernation_test.go) and NO real-infra
+# coverage. Each transition is asserted against the live DB-backed status the
+# GET /workspaces/:id endpoint returns, so a regression in the Pause/Resume/
+# Hibernate handlers (workspace_restart.go) or their CP stop/re-provision
+# wiring fails the gate instead of silently leaking an EC2 / wedging a tenant.
+#
+# Contract (workspace_restart.go):
+#   POST /pause     online → 'paused'  (container stopped, url cleared)  {"status":"paused"}
+#   POST /resume    paused → 'provisioning' → … → 'online' (re-provision) {"status":"provisioning"}
+#   POST /hibernate online → 'hibernating' → 'hibernated' (container stopped) {"status":"hibernated"}
+#   auto-wake       next A2A message/send on a hibernated ws → online
+#
+# Gated to full MODE (smoke has no parent-stability budget) + E2E_LIFECYCLE.
+# Runs LAST (after all read-only A2A/memory/peer checks) so the pause/stop
+# cycles don't disturb the earlier assertions. Skips are LOUD (logged), and
+# any broken transition hard-fails — never a silent pass.
+if [ "$MODE" = "full" ] && [ "${E2E_LIFECYCLE:-auto}" != "off" ]; then
+  log "10b/11 Lifecycle transitions: pause→resume→online, hibernate→resume(wake) on parent $PARENT_ID..."
+
+  lifecycle_status() {  # echoes the live workspace status
+    tenant_call GET "/workspaces/$PARENT_ID" 2>/dev/null \
+      | python3 -c "import json,sys; print(json.load(sys.stdin).get('status') or '')" 2>/dev/null || echo ""
+  }
+  # Bounded readiness-poll for a target status — same fail-closed shape as
+  # wait_workspaces_online_routable, but for an arbitrary terminal status.
+  wait_status() {  # $1=target $2=timeout_secs $3=label
+    local target="$1" timeout="$2" label="$3"
+    local deadline cur last=""
+    deadline=$(( $(date +%s) + timeout ))
+    while true; do
+      cur=$(lifecycle_status)
+      if [ "$cur" != "$last" ]; then log "    parent status → ${cur:-<empty>}"; last="$cur"; fi
+      [ "$cur" = "$target" ] && return 0
+      if [ "$(date +%s)" -gt "$deadline" ]; then
+        log "    [lifecycle] $label never reached '$target' within ${timeout}s (last='$cur')"
+        return 1
+      fi
+      sleep 10
+    done
+  }
+
+  # ── pause → paused ──
+  PAUSE_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/pause" 2>/dev/null || echo '{}')
+  PAUSE_STATUS=$(echo "$PAUSE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
+  [ "$PAUSE_STATUS" = "paused" ] || fail "Pause: POST /pause returned status='$PAUSE_STATUS' (expected 'paused'). Body: ${PAUSE_RESP:0:200}"
+  # Poll the DB-backed status — the response body could lie; the GET proves the row.
+  wait_status "paused" 120 "pause" || fail "Pause: workspace $PARENT_ID never settled at status=paused (DB row) — Pause handler / CP stop regression (workspace_restart.go Pause)."
+  ok "    pause → paused (DB-verified)"
+
+  # ── resume → provisioning → online ──
+  RESUME_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/resume" 2>/dev/null || echo '{}')
+  RESUME_STATUS=$(echo "$RESUME_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
+  [ "$RESUME_STATUS" = "provisioning" ] || fail "Resume: POST /resume returned status='$RESUME_STATUS' (expected 'provisioning'). Body: ${RESUME_RESP:0:200}"
+  # Resume re-provisions from the preserved config volume; reuse the same
+  # online+routable readiness boundary the initial boot used (no fresh EC2
+  # cold-start, but CP re-provision + heartbeat recovery can still take minutes).
+  wait_workspaces_online_routable "    Waiting for parent to return online after resume (up to $((WORKSPACE_ONLINE_TIMEOUT_SECS/60)) min)..." "$PARENT_ID"
+  ok "    resume → provisioning → online (DB-verified)"
+
+  # ── hibernate → hibernated ──
+  HIB_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/hibernate?force=true" 2>/dev/null || echo '{}')
+  HIB_STATUS=$(echo "$HIB_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
+  [ "$HIB_STATUS" = "hibernated" ] || fail "Hibernate: POST /hibernate?force=true returned status='$HIB_STATUS' (expected 'hibernated'). Body: ${HIB_RESP:0:200}"
+  # The handler runs the claim→stop→'hibernated' sequence; poll the DB row to
+  # confirm it landed on 'hibernated' (not stuck mid-'hibernating').
+  wait_status "hibernated" 120 "hibernate" || fail "Hibernate: workspace $PARENT_ID never settled at status=hibernated (DB row) — Hibernate handler / CP stop regression (workspace_restart.go HibernateWorkspace)."
+  ok "    hibernate → hibernated (DB-verified)"
+
+  # ── resume-from-hibernate via auto-wake on next A2A ──
+  # A hibernated workspace auto-wakes on the next incoming A2A message/send
+  # (no explicit /resume — Resume only handles status=paused). Send a wake
+  # A2A and assert the workspace returns to online. We accept transient cold
+  # 5xx during wake (same edge class the PONG probe tolerates) and poll the
+  # status to the online boundary rather than asserting on the single A2A code.
+  log "    Hibernate auto-wake: sending A2A to wake hibernated parent..."
+  WAKE_PAYLOAD=$(python3 -c "
+import json, uuid
+print(json.dumps({
+    'jsonrpc': '2.0',
+    'method': 'message/send',
+    'id': 'e2e-wake-1',
+    'params': {
+        'message': {
+            'role': 'user',
+            'messageId': f'e2e-wake-{uuid.uuid4().hex[:8]}',
+            'parts': [{'kind': 'text', 'text': 'This is the platform lifecycle smoke test waking a hibernated workspace. No tools or memory are needed — please respond with exactly the single token: WOKE'}]
+        }
+    }
+}))
+")
+  WAKE_TMP=$(mktemp -t wake_a2a.XXXXXX)
+  for WAKE_ATTEMPT in $(seq 1 12); do
+    : >"$WAKE_TMP"
+    set +e
+    WAKE_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
+      --max-time 90 \
+      -H "Content-Type: application/json" \
+      -d "$WAKE_PAYLOAD" \
+      -o "$WAKE_TMP" -w '%{http_code}' 2>/dev/null)
+    WAKE_RC=$?
+    set -e
+    WAKE_CODE=${WAKE_CODE:-000}
+    if [ "$WAKE_RC" = "0" ] && [ "$WAKE_CODE" -ge 200 ] && [ "$WAKE_CODE" -lt 300 ]; then
+      break
+    fi
+    WAKE_SAFE_BODY=$(cat "$WAKE_TMP" 2>/dev/null | sanitize_http_body)
+    # Wake legitimately returns transient 5xx while the container restarts —
+    # retry that class only (bounded), never a 4xx.
+    if echo "$WAKE_CODE" | grep -Eq '^(502|503|504)$' && [ "$WAKE_ATTEMPT" -lt 12 ]; then
+      log "    wake A2A cold/restart attempt $WAKE_ATTEMPT/12 returned $WAKE_CODE: ${WAKE_SAFE_BODY:0:120}"
+      sleep 15
+      continue
+    fi
+    break
+  done
+  rm -f "$WAKE_TMP"
+  # The auto-wake contract is the STATUS transition (hibernated → online), not
+  # the A2A body content — assert the live DB row, the real readiness signal.
+  wait_status "online" "$WORKSPACE_ONLINE_TIMEOUT_SECS" "hibernate-wake" \
+    || fail "Hibernate auto-wake: parent $PARENT_ID never returned to status=online after a wake A2A (last A2A http=$WAKE_CODE) — auto-wake-on-message regression (a hibernated ws must re-provision on the next A2A)."
+  ok "    hibernate → online via auto-wake A2A (DB-verified)"
+  ok "Lifecycle transitions passed: pause→resume→online + hibernate→wake→online"
+else
+  log "10b/11 Lifecycle transitions skipped (MODE=$MODE, E2E_LIFECYCLE=${E2E_LIFECYCLE:-auto}) — pause/resume/hibernate only run in full mode with E2E_LIFECYCLE!=off."
+fi
+
 # ─── 11. Teardown runs via trap ────────────────────────────────────────
+# Fail-closed-on-skip: before declaring PASS, assert (when CI demanded a live
+# run) that every load-bearing lifecycle milestone actually fired. A run that
+# reaches here without provision→online→A2A having truly happened exits 5
+# instead of reporting green. Teardown still runs (EXIT trap) on that exit.
+require_live_or_die
 log "11/11 All checks passed. Teardown runs via EXIT trap."
 ok "═══ STAGING $MODE-SAAS E2E PASSED ═══"
@@ -203,6 +203,60 @@ def test_f1_job_missing_from_sentinel_needs(drift_module, tmp_path, monkeypatch)
    assert any("F1 —" in f and "test" in f for f in findings), findings


+def test_detect_drift_403_fails_closed(drift_module, tmp_path, monkeypatch):
+    """AUTH FAILURE on branch_protections (HTTP 401/403) → RAISE (fail
+    closed). The token can't read BP, so drift is UNVERIFIABLE; greening
+    the hourly cron here would let jobs↔protection drift go silently
+    undetected — exactly the regression class this sentinel exists to
+    catch. fix/core-ci-fail-closed.
+    """
+    ci = _write_ci_yaml(
+        tmp_path,
+        jobs={"build": {"runs-on": "ubuntu-latest"}},
+        sentinel_needs=["build"],
+    )
+    audit = _write_audit_yaml(tmp_path, ["ci / build (pull_request)"])
+    _patch_paths(drift_module, monkeypatch, ci, audit)
+
+    stub = _make_stub_api({
+        ("GET", "/repos/owner/repo/branch_protections/main"): (
+            drift_module.ApiError(
+                "GET /repos/owner/repo/branch_protections/main → HTTP 403: forbidden"
+            )
+        ),
+    })
+    monkeypatch.setattr(drift_module, "api", stub)
+    with pytest.raises(drift_module.ApiError):
+        drift_module.detect_drift("main")
+
+
+def test_detect_drift_404_skips_branch(drift_module, tmp_path, monkeypatch):
+    """Authenticated 404 (branch genuinely has no protection, e.g. staging
+    pre-rollout) → tolerated skip: return ([], debug) with
+    protection_contexts_skipped True. NOT a fail-open (real read of an
+    absent resource with a valid token)."""
+    ci = _write_ci_yaml(
+        tmp_path,
+        jobs={"build": {"runs-on": "ubuntu-latest"}},
+        sentinel_needs=["build"],
+    )
+    audit = _write_audit_yaml(tmp_path, ["ci / build (pull_request)"])
+    _patch_paths(drift_module, monkeypatch, ci, audit)
+
+    stub = _make_stub_api({
+        ("GET", "/repos/owner/repo/branch_protections/staging"): (
+            drift_module.ApiError(
+                "GET /repos/owner/repo/branch_protections/staging → HTTP 404: not found"
+            )
+        ),
+    })
+    monkeypatch.setattr(drift_module, "api", stub)
+    findings, debug = drift_module.detect_drift("staging")
+    assert findings == []
+    assert debug.get("protection_contexts_skipped") is True
+    assert debug.get("protection_http_status") == 404
+
+
 def test_f1b_sentinel_needs_typo(drift_module, tmp_path, monkeypatch):
    """F1b: sentinel.needs lists a job not present in ci.yml (typo).

@@ -34,9 +34,12 @@ Test classes (per `feedback_branch_count_before_approving`):
    together, not short-circuited.
  - test_bp_empty_lints_nothing            — BP has no contexts.
    Exit 0 cleanly.
-  - test_api_403_skips_gracefully          — branch_protections endpoint
-    403s (token-scope). Exit 0 with ::error::, do NOT red-X.
-  - test_api_404_skips_gracefully          — branch has no protection.
+  - test_api_403_fails_closed              — branch_protections endpoint
+    401/403s (auth failure). FAIL CLOSED (exit 2) with ::error::.
+  - test_api_transient_fails_closed        — transient/unexpected API
+    error. FAIL CLOSED (exit 2).
+  - test_api_404_skips_gracefully          — branch has no protection
+    (authenticated absent resource). Tolerated skip (exit 0 + warning).
    Exit 0 cleanly.
  - test_context_event_match_required      — BP context says `(push)` and
    workflow only emits on `pull_request`. That's NOT a match — the
@@ -247,9 +250,10 @@ def test_bp_empty_lints_nothing(envset, monkeypatch, capsys):


 # ---------------------------------------------------------------------------
-# API 403 — graceful-degrade.
+# API 403 — AUTH FAILURE → FAIL CLOSED (exit 2). This is a HARD gate on a
+# protected context; a token that can't read BP must NOT green the lint.
 # ---------------------------------------------------------------------------
-def test_api_403_skips_gracefully(envset, monkeypatch, capsys):
+def test_api_403_fails_closed(envset, monkeypatch, capsys):
    _write_wf(
        envset,
        "ci.yml",
@@ -259,13 +263,30 @@ def test_api_403_skips_gracefully(envset, monkeypatch, capsys):
    m = _import_lint()
    _stub_api(monkeypatch, m, ("forbidden", None))
    rc = m.run()
-    assert rc == 0
+    assert rc == 2
    err = capsys.readouterr().err
    assert "403" in err or "scope" in err.lower() or "token" in err.lower()


 # ---------------------------------------------------------------------------
-# API 404 — branch has no protection → clean exit.
+# API transient/unexpected error → FAIL CLOSED (exit 2).
+# ---------------------------------------------------------------------------
+def test_api_transient_fails_closed(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "ci.yml",
+        "name: CI\non:\n  pull_request:\n    branches: [main]\njobs:\n"
+        "  j:\n    runs-on: x\n    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_api(monkeypatch, m, ("error", None))
+    rc = m.run()
+    assert rc == 2
+
+
+# ---------------------------------------------------------------------------
+# API 404 — authenticated absent resource (branch has no protection) →
+# tolerated graceful skip (exit 0 with ::warning::), NOT a fail-open.
 # ---------------------------------------------------------------------------
 def test_api_404_skips_gracefully(envset, monkeypatch, capsys):
    _write_wf(
@@ -47,7 +47,10 @@ Test classes (per `feedback_branch_count_before_approving`):
    (the OLD context name disappears; the NEW one needs validation).
  - test_unrelated_workflow_edit_is_not_new       — edit a comment in
    an existing emitter; no new context introduced; pass.
-  - test_api_403_skips_gracefully                 — BP read 403; exit 0
+  - test_api_403_fails_closed                     — BP read 401/403 auth
+    failure → FAIL CLOSED (exit 2)
+  - test_api_transient_fails_closed               — transient → exit 2
+  - test_api_404_skips_gracefully                 — authenticated 404 → exit 0
    with stderr ::error::.
  - test_directive_must_be_in_workflow_yml        — directive in PR
    body alone is NOT sufficient; the comment must live in the
@@ -392,9 +395,10 @@ def test_unrelated_workflow_edit_is_not_new(env, monkeypatch, capsys):


 # ---------------------------------------------------------------------------
-# BP API 403 → exit 0 with ::error::.
+# BP API 401/403 = AUTH FAILURE → FAIL CLOSED (exit 2). A new emission can't
+# be verified against BP if the token can't read BP — must not green.
 # ---------------------------------------------------------------------------
-def test_api_403_skips_gracefully(env, monkeypatch, capsys):
+def test_api_403_fails_closed(env, monkeypatch, capsys):
    m = _import_lint()
    _stub_git_and_api(
        monkeypatch,
@@ -404,11 +408,44 @@ def test_api_403_skips_gracefully(env, monkeypatch, capsys):
        bp_response=("forbidden", None),
    )
    rc = m.run()
-    assert rc == 0
+    assert rc == 2
    err = capsys.readouterr().err
    assert "403" in err or "scope" in err.lower() or "token" in err.lower()


+# ---------------------------------------------------------------------------
+# BP API transient/unexpected error → FAIL CLOSED (exit 2).
+# ---------------------------------------------------------------------------
+def test_api_transient_fails_closed(env, monkeypatch, capsys):
+    m = _import_lint()
+    _stub_git_and_api(
+        monkeypatch,
+        m,
+        base_files={".gitea/workflows/ci.yml": WF_CI_BASE},
+        head_files={".gitea/workflows/ci.yml": WF_CI_NEW_JOB},
+        bp_response=("error", None),
+    )
+    rc = m.run()
+    assert rc == 2
+
+
+# ---------------------------------------------------------------------------
+# BP API authenticated 404 (branch genuinely unprotected) → tolerated
+# graceful skip (exit 0 with ::warning::), NOT a fail-open.
+# ---------------------------------------------------------------------------
+def test_api_404_skips_gracefully(env, monkeypatch, capsys):
+    m = _import_lint()
+    _stub_git_and_api(
+        monkeypatch,
+        m,
+        base_files={".gitea/workflows/ci.yml": WF_CI_BASE},
+        head_files={".gitea/workflows/ci.yml": WF_CI_NEW_JOB},
+        bp_response=("not_found", None),
+    )
+    rc = m.run()
+    assert rc == 0
+
+
 # ---------------------------------------------------------------------------
 # Directive must be in the workflow YML, not PR body.
 # ---------------------------------------------------------------------------
@@ -527,15 +527,13 @@ def test_multi_required_one_bad_one_good_fails(
            assert "good.yml" not in ln


-def test_protection_403_treated_as_skip(lint_module, monkeypatch, capsys):
-    """If the token can't read branch_protections (HTTP 403), exit 0
-    with a clear ::error::-but-non-fatal note. Same scope-fallback shape
-    as ci-required-drift.py per the precedent.
-
-    Rationale: if the lint workflow itself can't read protection, the PR
-    can't make THIS state worse (a paths-filter PR was already addable
-    without the lint). Better to surface a token-scope problem loudly
-    than to red-X every PR until the token is fixed.
+def test_protection_403_fails_closed(lint_module, monkeypatch, capsys):
+    """AUTH FAILURE → FAIL CLOSED (exit 4). If the token can't read
+    branch_protections (HTTP 401/403), the lint CANNOT enumerate the
+    required-check set and therefore CANNOT verify the no-paths-filter
+    invariant. This is a HARD gate on a protected (same-repo PR) context,
+    so it MUST fail loud rather than green an unverifiable gate — fix the
+    token, not the lint.
    """
    stub = _make_stub_api({
        ("GET", "/repos/owner/repo/branch_protections/main"): (
@@ -546,7 +544,26 @@ def test_protection_403_treated_as_skip(lint_module, monkeypatch, capsys):
    })
    monkeypatch.setattr(lint_module, "api", stub)
    rc = lint_module.run()
-    assert rc == 0
+    assert rc == 4
    err = capsys.readouterr().err
    assert "::error::" in err
    assert "403" in err
+
+
+def test_protection_404_skips_gracefully(lint_module, monkeypatch, capsys):
+    """Authenticated 404 (branch genuinely has no protection) is the one
+    tolerated degradation: there are no required contexts to check.
+    Exit 0 with a ::warning:: — NOT a fail-open (this is a real read of an
+    absent resource with a valid token, not an auth failure)."""
+    stub = _make_stub_api({
+        ("GET", "/repos/owner/repo/branch_protections/main"): (
+            lint_module.ApiError(
+                "GET /repos/owner/repo/branch_protections/main → HTTP 404: not found"
+            )
+        ),
+    })
+    monkeypatch.setattr(lint_module, "api", stub)
+    rc = lint_module.run()
+    assert rc == 0
+    err = capsys.readouterr().err
+    assert "404" in err
@@ -2,10 +2,15 @@ package main

 import "testing"

-// TestResolveBindHost pins the precedence: BIND_ADDR explicit > dev-mode
-// fail-open default of 127.0.0.1 > production-shape empty (all interfaces).
+// TestResolveBindHost pins the precedence: BIND_ADDR explicit > local-dev
+// loopback default of 127.0.0.1 > production-shape empty (all interfaces).
 //
-// Mutation-test invariant: removing the IsDevModeFailOpen() branch makes
+// (harden/no-fail-open-auth) The loopback default is now keyed on
+// MOLECULE_ENV alone (IsLocalDevEnv), decoupled from ADMIN_TOKEN — a dev box
+// defaults to loopback even when it provisions an ADMIN_TOKEN. This is
+// defense-in-depth, not an auth lever; auth is fail-closed in every env.
+//
+// Mutation-test invariant: removing the IsLocalDevEnv() branch makes
 // "no_bindaddr_devmode_unset_admin" fail (returns "" instead of "127.0.0.1").
 // Removing the BIND_ADDR branch makes "explicit_bindaddr_*" cases fail.
 func TestResolveBindHost(t *testing.T) {
@@ -35,7 +40,10 @@ func TestResolveBindHost(t *testing.T) {
 			bindAddr:   "",
 			adminToken: "secret",
 			molEnv:     "dev",
-			want:       "", // ADMIN_TOKEN flips IsDevModeFailOpen to false → all interfaces
+			// harden/no-fail-open-auth: loopback default is keyed on
+			// MOLECULE_ENV alone now — a dev box defaults to loopback even
+			// with ADMIN_TOKEN provisioned (which dev-start.sh now does).
+			want: "127.0.0.1",
 		},
 		{
 			name:       "no_bindaddr_production_env",
@@ -474,12 +474,12 @@ func main() {

 	// HTTP server with graceful shutdown.
 	//
-	// Bind host: in dev-mode (no ADMIN_TOKEN, MOLECULE_ENV=dev|development)
-	// the AdminAuth chain fails open by design; pairing that with a wildcard
-	// bind would expose unauth /workspaces to any same-LAN peer. Default to
-	// loopback when fail-open is active. Operators who need LAN exposure set
-	// BIND_ADDR=0.0.0.0 explicitly. Production (ADMIN_TOKEN set) is unchanged.
-	// See molecule-core#7.
+	// Bind host: in local dev (MOLECULE_ENV=dev|development) default the
+	// listener to loopback as defense-in-depth — a dev box shouldn't be
+	// reachable from the LAN. This is NOT an auth lever (auth is fail-closed
+	// in every env now); it's strictly the safer default. Operators who need
+	// LAN exposure set BIND_ADDR=0.0.0.0 explicitly. Production binds all
+	// interfaces (existing shape). See molecule-core#7.
 	bindHost := resolveBindHost()
 	srv := &http.Server{
 		Addr:              fmt.Sprintf("%s:%s", bindHost, port),
@@ -489,7 +489,7 @@ func main() {

 	// Start server in goroutine
 	go func() {
-		log.Printf("Platform starting on %s:%s (dev-mode-fail-open=%v)", bindHost, port, middleware.IsDevModeFailOpen())
+		log.Printf("Platform starting on %s:%s (local-dev-env=%v)", bindHost, port, middleware.IsLocalDevEnv())
 		if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
 			log.Fatalf("Server failed: %v", err)
 		}
@@ -528,20 +528,20 @@ func envOr(key, fallback string) string {
 //
 // Precedence:
 //  1. BIND_ADDR — explicit operator override (any value, including "0.0.0.0").
-//  2. dev-mode fail-open active → "127.0.0.1" (loopback only).
+//  2. local dev (MOLECULE_ENV=dev|development) → "127.0.0.1" (loopback only).
 //  3. otherwise → "" (Go binds every interface; existing prod/self-host shape).
 //
-// Coupling the loopback default to middleware.IsDevModeFailOpen() means the
-// two safety levers — bind narrowness and auth strength — move together. A
-// production deploy (ADMIN_TOKEN set) keeps binding to all interfaces because
-// the auth chain is doing its job; a dev Mac (no ADMIN_TOKEN, MOLECULE_ENV=dev)
-// is reachable only via loopback because the auth chain is fail-open. See
-// molecule-core#7 for the original LAN exposure finding.
+// NOTE (harden/no-fail-open-auth): this is a defense-in-depth default, NOT an
+// auth lever. Auth is fail-closed in every environment now, so the loopback
+// default no longer compensates for a weak auth chain — it simply keeps a dev
+// box off the LAN by default. It is keyed on MOLECULE_ENV alone (decoupled
+// from ADMIN_TOKEN), because dev now provisions an ADMIN_TOKEN yet should
+// still default to loopback. See molecule-core#7 for the original LAN finding.
 func resolveBindHost() string {
 	if v := os.Getenv("BIND_ADDR"); v != "" {
 		return v
 	}
-	if middleware.IsDevModeFailOpen() {
+	if middleware.IsLocalDevEnv() {
 		return "127.0.0.1"
 	}
 	return ""
@@ -21,6 +21,27 @@ const (

 var slackHTTPClient = &http.Client{Timeout: slackHTTPTimeout}

+// slackWebhookAccepted reports whether a Slack Incoming Webhook URL is allowed
+// as a send destination. Production accepts only the real hooks.slack.com host.
+//
+// TEST SEAM (gating e2e): when MOLECULE_CHANNELS_TEST_WEBHOOK_BASE is set, a
+// URL with that prefix is ALSO accepted so tests/e2e/test_channels_e2e.sh can
+// point the live Slack send path at a local mock-upstream and assert the mock
+// actually received the serialized {"text":...} payload end-to-end (the unit
+// tests can only assert the body shape — see lark_test.go's prefix-gate
+// workaround comment). The env var is NEVER set in any production/staging
+// deploy; channelsTestWebhookBase() returns "" there and only the real
+// hooks.slack.com prefix passes, so this changes no production behaviour.
+func slackWebhookAccepted(u string) bool {
+	if strings.HasPrefix(u, slackWebhookPrefix) {
+		return true
+	}
+	if base := channelsTestWebhookBase(); base != "" && strings.HasPrefix(u, base) {
+		return true
+	}
+	return false
+}
+
 // SlackAdapter implements ChannelAdapter for Slack Incoming Webhooks.
 //
 // Outbound messages are sent via Slack Incoming Webhooks (the simple,
@@ -98,7 +119,7 @@ func (s *SlackAdapter) ValidateConfig(config map[string]interface{}) error {
 			return fmt.Errorf("bot_token mode requires channel_id")
 		}
 	}
-	if webhookURL != "" && !strings.HasPrefix(webhookURL, slackWebhookPrefix) {
+	if webhookURL != "" && !slackWebhookAccepted(webhookURL) {
 		return fmt.Errorf("invalid Slack webhook URL")
 	}
 	return nil
@@ -197,7 +218,7 @@ func (s *SlackAdapter) sendWebhookMessage(ctx context.Context, config map[string
 	if webhookURL == "" {
 		return fmt.Errorf("webhook_url not configured")
 	}
-	if !strings.HasPrefix(webhookURL, slackWebhookPrefix) {
+	if !slackWebhookAccepted(webhookURL) {
 		return fmt.Errorf("invalid Slack webhook URL")
 	}

@@ -148,7 +148,18 @@ func (t *TelegramAdapter) DiscoverChats(ctx context.Context, botToken string) (*
 		return nil, errors.New("invalid bot token format")
 	}

-	bot, err := tgbotapi.NewBotAPI(botToken)
+	// TEST SEAM: when MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE is set (only in
+	// the gating channels e2e — never in prod/staging), build the bot client
+	// against a local mock API base instead of api.telegram.org so
+	// POST /channels/discover can be proven end-to-end. The format string is
+	// "<base>/bot%s/%s" (token, method), matching tgbotapi.APIEndpoint.
+	var bot *tgbotapi.BotAPI
+	var err error
+	if apiBase := channelsTestTelegramAPIBase(); apiBase != "" {
+		bot, err = tgbotapi.NewBotAPIWithAPIEndpoint(botToken, apiBase+"/bot%s/%s")
+	} else {
+		bot, err = tgbotapi.NewBotAPI(botToken)
+	}
 	if err != nil {
 		return nil, fmt.Errorf("invalid bot token: %w", err)
 	}
@@ -0,0 +1,47 @@
+package channels
+
+import "os"
+
+// Test seams for the GATING channels e2e (tests/e2e/test_channels_e2e.sh).
+//
+// Every adapter pins its outbound destination to the real vendor host
+// (hooks.slack.com, discord.com, api.telegram.org) in both ValidateConfig and
+// SendMessage. That host pin is correct for production, but it means a real
+// end-to-end test cannot point the LIVE send/discover path at a local mock
+// upstream — so today the outbound serialize+POST is only ever asserted by
+// unit tests that reconstruct the payload by hand (see lark_test.go's
+// "we can't change the prefix const" comment) and never proven through the
+// running platform.
+//
+// These two env-gated overrides close that gap WITHOUT changing any
+// production behaviour:
+//
+//   - MOLECULE_CHANNELS_TEST_WEBHOOK_BASE — when set, Slack Incoming Webhook
+//     URLs with this prefix are accepted as send destinations (in addition to
+//     the real hooks.slack.com host). Lets the e2e create a slack channel whose
+//     webhook_url points at a local httptest mock and assert the mock RECEIVED
+//     the serialized {"text":...} payload.
+//
+//   - MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE — when set, TelegramAdapter.
+//     DiscoverChats builds its bot client against this API base instead of
+//     api.telegram.org, so POST /channels/discover can be exercised against a
+//     mock that serves getMe/getUpdates and the e2e can assert the discovered
+//     chats round-trip.
+//
+// Both vars are NEVER set in any production or staging deploy. The helpers
+// return "" there, so the real vendor-host pins are the only thing that
+// passes — production behaviour is byte-for-byte unchanged. Reading os.Getenv
+// on each call (not caching) keeps the seam honest: a process that never sets
+// the var can never accidentally enable it.
+
+// channelsTestWebhookBase returns the test-only accepted webhook base prefix,
+// or "" in production. See package doc above.
+func channelsTestWebhookBase() string {
+	return os.Getenv("MOLECULE_CHANNELS_TEST_WEBHOOK_BASE")
+}
+
+// channelsTestTelegramAPIBase returns the test-only Telegram Bot API base
+// (a printf format string "<base>/bot%s/%s"), or "" in production.
+func channelsTestTelegramAPIBase() string {
+	return os.Getenv("MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE")
+}
@@ -0,0 +1,177 @@
+package handlers
+
+// a2a_full_body_delivery_guard_test.go — regression guard for core#2175.
+//
+// core#2175 RCA: the long-believed "A2A truncation" was a MISDIAGNOSIS.
+// A2A message delivery preserves the FULL body on every agent-facing path.
+// Only HUMAN-facing DISPLAY previews are capped (activity title 80 runes,
+// broadcast 120, delegation summary 80, canvas response_preview 200 bytes).
+// Those caps live on display/broadcast fields, NOT on the bytes an agent
+// reads off the wire.
+//
+// This file locks in the correct behaviour so a FUTURE change cannot
+// silently reintroduce REAL truncation on the agent-facing delivery paths:
+//
+//   1. DequeueNext (a2a_queue.go) — the drain/read path does
+//      `SELECT ... body::text ...` and returns item.Body. The delivered
+//      body MUST equal the enqueued body byte-for-byte.
+//
+//   2. toolCheckTaskStatus (mcp_tools.go) — reads activity_logs.response_body
+//      and surfaces result["result"] = extractA2AText(responseBody). The
+//      returned text MUST be the COMPLETE response text, not a preview.
+//
+// Both bodies used here are WELL over 200 chars (> the largest preview cap,
+// canvas response_preview at 200 bytes) so a regression that wired any
+// display cap into a delivery path would fail loudly.
+//
+// Style: matches the sibling a2a_queue_test.go / mcp_tools_test.go — sqlmock,
+// no integration build tag. These paths are deterministically exercisable
+// against the mock because the truncation guard is about what the Go code
+// does with the row value, not about Postgres-side text handling. CI's
+// real-PG integration arm (a2a_*_integration tests) additionally exercises
+// the live `body::text` round-trip.
+
+import (
+	"context"
+	"database/sql"
+	"strings"
+	"testing"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
+	"github.com/DATA-DOG/go-sqlmock"
+)
+
+// largeA2ABody builds a syntactically valid A2A JSON-RPC message body whose
+// embedded text part is `textLen` runes long, so the whole body comfortably
+// exceeds every human-facing preview cap (max 200 bytes).
+func largeA2ABody(textLen int) string {
+	longText := strings.Repeat("A", textLen)
+	return `{"jsonrpc":"2.0","method":"message/send","params":{"message":{"role":"user","messageId":"guard-2175","parts":[{"type":"text","text":"` + longText + `"}]}}}`
+}
+
+// TestDequeueNext_PreservesFullBody_NoTruncation is the guard for the queue
+// drain/read path. It asserts that the body returned from DequeueNext equals
+// the enqueued body byte-for-byte, even when far longer than any preview cap.
+func TestDequeueNext_PreservesFullBody_NoTruncation(t *testing.T) {
+	// 4000-char text part → total body well over the 200-byte canvas cap and
+	// every other display preview cap.
+	fullBody := largeA2ABody(4000)
+	if len(fullBody) <= 200 {
+		t.Fatalf("test setup error: body must exceed the largest preview cap (200); got %d", len(fullBody))
+	}
+
+	mockDB, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherEqual))
+	if err != nil {
+		t.Fatalf("failed to create sqlmock: %v", err)
+	}
+	prevDB := db.DB
+	db.DB = mockDB
+	t.Cleanup(func() { db.DB = prevDB; mockDB.Close() })
+
+	const wsID = "ws-guard-2175"
+	const itemID = "qid-guard-2175"
+
+	// DequeueNext runs BEGIN → SELECT ... body::text ... → UPDATE → COMMIT.
+	// The mocked SELECT returns the FULL body in the body column; the guard
+	// is that DequeueNext propagates it untouched into item.Body.
+	mock.ExpectBegin()
+	mock.ExpectQuery(
+		"SELECT id, workspace_id, caller_id, priority, body::text, method, attempts FROM a2a_queue WHERE workspace_id = $1 AND status = 'queued' AND (expires_at IS NULL OR expires_at > now()) ORDER BY priority DESC, enqueued_at ASC FOR UPDATE SKIP LOCKED LIMIT 1").
+		WithArgs(wsID).
+		WillReturnRows(sqlmock.NewRows([]string{
+			"id", "workspace_id", "caller_id", "priority", "body", "method", "attempts",
+		}).AddRow(
+			itemID, wsID, sql.NullString{Valid: false}, PriorityTask,
+			fullBody, sql.NullString{String: "message/send", Valid: true}, 0,
+		))
+	mock.ExpectExec(
+		"UPDATE a2a_queue SET status = 'dispatched', dispatched_at = now(), attempts = attempts + 1 WHERE id = $1").
+		WithArgs(itemID).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectCommit()
+
+	item, err := DequeueNext(context.Background(), wsID)
+	if err != nil {
+		t.Fatalf("DequeueNext returned error: %v", err)
+	}
+	if item == nil {
+		t.Fatal("DequeueNext returned nil item for a non-empty queue")
+	}
+
+	if got := string(item.Body); got != fullBody {
+		t.Errorf("delivered body was truncated/altered.\n  enqueued len=%d\n  delivered len=%d\n  REGRESSION: a delivery path must NOT apply a display preview cap (core#2175)",
+			len(fullBody), len(got))
+	}
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestToolCheckTaskStatus_ReturnsFullResponseBody_NoTruncation is the guard
+// for the check_task_status agent-facing read path. It asserts that the text
+// surfaced in result["result"] (via extractA2AText over response_body) is the
+// COMPLETE response text — never a preview-capped slice.
+func TestToolCheckTaskStatus_ReturnsFullResponseBody_NoTruncation(t *testing.T) {
+	// 3000-char response text, far above any preview cap.
+	fullText := strings.Repeat("B", 3000)
+	responseBody := `{"jsonrpc":"2.0","result":{"artifacts":[{"parts":[{"type":"text","text":"` + fullText + `"}]}]}}`
+
+	mockDB, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("failed to create sqlmock: %v", err)
+	}
+	t.Cleanup(func() { mockDB.Close() })
+
+	h := &MCPHandler{database: mockDB}
+
+	const callerID = "ws-caller-2175"
+	const targetID = "ws-target-2175"
+	const taskID = "del-guard-2175"
+
+	mock.ExpectQuery(`SELECT status, error_detail, response_body`).
+		WithArgs(callerID, targetID, taskID).
+		WillReturnRows(sqlmock.NewRows([]string{"status", "error_detail", "response_body"}).
+			AddRow("completed", sql.NullString{Valid: false}, []byte(responseBody)))
+
+	out, err := h.toolCheckTaskStatus(context.Background(), callerID, map[string]interface{}{
+		"workspace_id": targetID,
+		"task_id":      taskID,
+	})
+	if err != nil {
+		t.Fatalf("toolCheckTaskStatus returned error: %v", err)
+	}
+
+	// The full text must appear in the serialized result. If a future change
+	// applied a preview cap (e.g. TruncateBytes(…, 200)) to the agent-facing
+	// result, this substring check would fail.
+	if !strings.Contains(out, fullText) {
+		t.Errorf("check_task_status result was truncated.\n  expected full %d-char response text in result\n  REGRESSION: the agent-facing check_task_status path must return the COMPLETE response_body, not a display preview (core#2175)",
+			len(fullText))
+	}
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestExtractA2AText_FullBodyNoCap is a focused unit-level guard on the
+// extractor itself: extractA2AText must return the entire text part with no
+// length cap, for both supported A2A response shapes.
+func TestExtractA2AText_FullBodyNoCap(t *testing.T) {
+	fullText := strings.Repeat("C", 2500)
+
+	cases := map[string]string{
+		"artifacts shape": `{"result":{"artifacts":[{"parts":[{"type":"text","text":"` + fullText + `"}]}]}}`,
+		"message shape":   `{"result":{"message":{"parts":[{"type":"text","text":"` + fullText + `"}]}}}`,
+	}
+	for name, body := range cases {
+		t.Run(name, func(t *testing.T) {
+			got := extractA2AText([]byte(body))
+			if got != fullText {
+				t.Errorf("extractA2AText capped/altered the text.\n  want len=%d\n  got  len=%d\n  REGRESSION: extractor must not truncate (core#2175)",
+					len(fullText), len(got))
+			}
+		})
+	}
+}
@@ -0,0 +1,141 @@
+package handlers
+
+// a2a_outbound_envelope_test.go — outbound A2A `message/send` envelope
+// CONTRACT gate (issue #2251).
+//
+// #2251: an outbound A2A envelope shipped without `role` and with text
+// parts keyed `type` instead of the v0.3-canonical `kind`. The receiver's
+// a-2-a-sdk v0.3 Pydantic validator silently rejected the message
+// post-dispatch — the sender saw a happy 200/202 while the brief was
+// dropped (the same invisible-rejection failure class as the v0.2→v0.3
+// content bug pinned by a2a_corpus_test.go, but on the SEND side).
+//
+// The inbound corpus replay (a2a_corpus_test.go) proves normalizeA2APayload
+// produces `parts[].kind` + a non-empty messageId, but it does NOT assert
+// `role`, and it only covers what we RECEIVE. Nothing pins what core
+// EMITS. This file pins the emit contract at the helper that builds the
+// parts (buildA2AMessageParts, used by both delegate_task and
+// delegate_task_async) and asserts the canonical Part key is `kind`.
+//
+// Part-object schema (A2A v0.3): every Part MUST carry a `kind`
+// discriminator ("text" | "file" | "data"); there is NO `type` key. A
+// text Part is {"kind":"text","text":"..."}. Emitting `type` makes the
+// v0.3 validator drop the Part.
+
+import (
+	"encoding/json"
+	"testing"
+)
+
+// TestBuildA2AMessageParts_TextPartUsesKindNotType pins the v0.3 Part
+// discriminator for the text part emitted on every outbound A2A
+// delegation. RED before #2251's fix (the helper emitted
+// {"type":"text",...}); the receiver's v0.3 Pydantic validator drops a
+// Part keyed `type`, silently losing the task text.
+func TestBuildA2AMessageParts_TextPartUsesKindNotType(t *testing.T) {
+	parts := buildA2AMessageParts("do the work", nil)
+	if len(parts) == 0 {
+		t.Fatal("buildA2AMessageParts returned no parts for a non-empty task")
+	}
+	text := parts[0]
+
+	if _, hasType := text["type"]; hasType {
+		t.Errorf("text part uses forbidden v0.2 key `type` %v — A2A v0.3 Parts discriminate on `kind`; `type` is dropped by the receiver's validator (#2251)", text)
+	}
+	kind, ok := text["kind"].(string)
+	if !ok {
+		t.Fatalf("text part missing string `kind` discriminator; got %v", text)
+	}
+	if kind != "text" {
+		t.Errorf("text part kind = %q, want \"text\"", kind)
+	}
+	if text["text"] != "do the work" {
+		t.Errorf("text part text = %v, want \"do the work\"", text["text"])
+	}
+}
+
+// TestBuildA2AMessageParts_FilePartUsesKind guards the file-attachment
+// Part the same way. The file path was already correct (it used `kind`),
+// so this is a non-regression pin — it must STAY `kind` when the text
+// path is fixed (a careless "make them consistent" edit could flip both
+// to the wrong key).
+func TestBuildA2AMessageParts_FilePartUsesKind(t *testing.T) {
+	atts := []AgentMessageAttachment{
+		{URI: "https://example.com/a.png", MimeType: "image/png", Name: "a.png"},
+	}
+	parts := buildA2AMessageParts("caption", atts)
+	if len(parts) < 2 {
+		t.Fatalf("expected text + file parts, got %d", len(parts))
+	}
+	file := parts[1]
+	if _, hasType := file["type"]; hasType {
+		t.Errorf("file part uses forbidden `type` key: %v", file)
+	}
+	if _, hasKind := file["kind"]; !hasKind {
+		t.Errorf("file part missing `kind` discriminator: %v", file)
+	}
+}
+
+// TestDelegationOutboundEnvelope_RoleAndKind pins the FULL outbound
+// envelope contract — role + parts[].kind — on the canonical helper.
+// A v0.3 `message` MUST carry `role` ("user" for a delegation request)
+// and `parts` whose every entry discriminates on `kind`. This is the
+// shape the receiver's MessageSendParams validator accepts; an envelope
+// missing `role` or keyed `type` is silently rejected (#2251).
+//
+// Built from the same primitives delegation.go / mcp_tools.go assemble
+// (role:"user" + buildA2AMessageParts) so the round-trip through
+// json.Marshal proves the wire bytes are v0.3-valid.
+func TestDelegationOutboundEnvelope_RoleAndKind(t *testing.T) {
+	envelope := map[string]interface{}{
+		"method": "message/send",
+		"params": map[string]interface{}{
+			"message": map[string]interface{}{
+				"role":      "user",
+				"messageId": "deleg-1",
+				"parts":     buildA2AMessageParts("do the work", nil),
+			},
+		},
+	}
+	raw, err := json.Marshal(envelope)
+	if err != nil {
+		t.Fatalf("marshal envelope: %v", err)
+	}
+	var parsed map[string]interface{}
+	if err := json.Unmarshal(raw, &parsed); err != nil {
+		t.Fatalf("unmarshal envelope: %v", err)
+	}
+
+	params, _ := parsed["params"].(map[string]interface{})
+	if params == nil {
+		t.Fatal("envelope missing params")
+	}
+	msg, _ := params["message"].(map[string]interface{})
+	if msg == nil {
+		t.Fatal("envelope missing params.message")
+	}
+
+	// role is mandatory on a v0.3 message — the receiver rejects without it.
+	role, hasRole := msg["role"].(string)
+	if !hasRole || role == "" {
+		t.Errorf("params.message missing non-empty `role` — v0.3 requires it; omitting it is the other half of #2251")
+	}
+
+	parts, _ := msg["parts"].([]interface{})
+	if len(parts) == 0 {
+		t.Fatal("params.message.parts is empty")
+	}
+	for i, p := range parts {
+		pm, _ := p.(map[string]interface{})
+		if pm == nil {
+			t.Errorf("part %d is not an object: %v", i, p)
+			continue
+		}
+		if _, hasType := pm["type"]; hasType {
+			t.Errorf("part %d uses forbidden `type` key (must be `kind`): %v", i, pm)
+		}
+		if _, hasKind := pm["kind"]; !hasKind {
+			t.Errorf("part %d missing `kind` discriminator: %v", i, pm)
+		}
+	}
+}
@@ -801,6 +801,18 @@ func normalizeA2APayload(body []byte) ([]byte, string, *proxyA2AError) {
 			if _, hasID := msg["messageId"]; !hasID {
 				msg["messageId"] = uuid.New().String()
 			}
+			// #2251: default params.message.role to "user" when absent.
+			// The downstream a2a-sdk v0.3 Pydantic validator marks role a
+			// REQUIRED field; a role-less envelope fails parse with
+			// "params.message.role Field required". The Go builders
+			// (mcp_tools/delegation/scheduler/channels) already set it, but
+			// raw external/canvas POSTs to ProxyA2A may omit it — making this
+			// the single canonical choke that guarantees a schema-valid role.
+			// Mirror the messageId default exactly: inject only when missing,
+			// never overwrite a caller-supplied role (e.g. "agent").
+			if _, hasRole := msg["role"]; !hasRole {
+				msg["role"] = "user"
+			}
 			_, hasParts := msg["parts"]
 			rawContent, hasContent := msg["content"]
 			if !hasParts {
@@ -832,6 +844,27 @@ func normalizeA2APayload(body []byte) ([]byte, string, *proxyA2AError) {
 					}
 				}
 			}
+			// #2251: wire hygiene — the A2A v0.3 Part discriminator is
+			// "kind", but some builders/clients emit the legacy "type" key
+			// (e.g. delegation.go). The v0.3 Pydantic validator keys on
+			// "kind"; a stray "type" leaves the Part untagged. Rename
+			// "type" → "kind" on every Part that lacks an explicit "kind"
+			// so the discriminator is always present on the wire.
+			if parts, ok := msg["parts"].([]interface{}); ok {
+				for _, p := range parts {
+					part, ok := p.(map[string]interface{})
+					if !ok {
+						continue
+					}
+					if _, hasKind := part["kind"]; hasKind {
+						continue
+					}
+					if t, hasType := part["type"]; hasType {
+						part["kind"] = t
+						delete(part, "type")
+					}
+				}
+			}
 		}
 	}

@@ -1514,6 +1514,142 @@ func TestNormalizeA2APayload_NoMessageNoCheck(t *testing.T) {
 	}
 }

+// --- #2251: role default + part-kind hygiene contract tests ---
+//
+// These assert normalizeA2APayload is the single canonical Go choke that
+// guarantees a schema-valid outbound message/send envelope: it injects a
+// default params.message.role="user" when the sender omitted role (the bug
+// that made delegate_task fail the peer's a2a Pydantic validator with
+// "params.message.role Field required" while reply_to_workspace worked), and
+// it renames the legacy Part discriminator "type"→"kind" for wire hygiene.
+
+// normMsg is a small helper that runs normalizeA2APayload and returns the
+// resolved params.message map, failing the test on any normalization error.
+func normMsg(t *testing.T, raw string) map[string]interface{} {
+	t.Helper()
+	out, _, perr := normalizeA2APayload([]byte(raw))
+	if perr != nil {
+		t.Fatalf("normalizeA2APayload returned error: %+v", perr)
+	}
+	var parsed map[string]interface{}
+	if err := json.Unmarshal(out, &parsed); err != nil {
+		t.Fatalf("output not valid JSON: %v", err)
+	}
+	params, ok := parsed["params"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("output missing params object: %s", string(out))
+	}
+	msg, ok := params["message"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("output missing params.message object: %s", string(out))
+	}
+	return msg
+}
+
+func TestNormalizeA2APayload_DefaultsRoleWhenMissing(t *testing.T) {
+	cases := []struct {
+		name string
+		raw  string
+	}{
+		{
+			name: "v0.3 parts, no role",
+			raw:  `{"method":"message/send","params":{"message":{"parts":[{"kind":"text","text":"hi"}]}}}`,
+		},
+		{
+			name: "v0.2 string content, no role",
+			raw:  `{"method":"message/send","params":{"message":{"content":"hi"}}}`,
+		},
+		{
+			name: "legacy type part, no role",
+			raw:  `{"method":"message/send","params":{"message":{"parts":[{"type":"text","text":"hi"}]}}}`,
+		},
+		{
+			name: "already wrapped jsonrpc, no role",
+			raw:  `{"jsonrpc":"2.0","id":"x","method":"message/send","params":{"message":{"parts":[{"kind":"text","text":"hi"}]}}}`,
+		},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			msg := normMsg(t, tc.raw)
+			if msg["role"] != "user" {
+				t.Errorf("expected role defaulted to \"user\", got %v", msg["role"])
+			}
+			// Parts must remain valid (non-empty) after normalization.
+			parts, ok := msg["parts"].([]interface{})
+			if !ok || len(parts) == 0 {
+				t.Fatalf("expected non-empty parts after normalization, got %v", msg["parts"])
+			}
+			// Every part must carry the v0.3 "kind" discriminator.
+			for i, p := range parts {
+				part, ok := p.(map[string]interface{})
+				if !ok {
+					t.Fatalf("part %d is not an object: %v", i, p)
+				}
+				if _, hasKind := part["kind"]; !hasKind {
+					t.Errorf("part %d missing \"kind\" discriminator: %v", i, part)
+				}
+				if _, hasType := part["type"]; hasType {
+					t.Errorf("part %d still has legacy \"type\" key: %v", i, part)
+				}
+			}
+		})
+	}
+}
+
+func TestNormalizeA2APayload_PreservesExplicitRole(t *testing.T) {
+	// A caller-supplied role (e.g. "agent") must NOT be overwritten with "user".
+	msg := normMsg(t, `{"method":"message/send","params":{"message":{"role":"agent","parts":[{"kind":"text","text":"hi"}]}}}`)
+	if msg["role"] != "agent" {
+		t.Errorf("explicit role overwritten: expected \"agent\", got %v", msg["role"])
+	}
+}
+
+func TestNormalizeA2APayload_RenamesPartTypeToKind(t *testing.T) {
+	// Mirrors delegation.go's builder which emits {"type":"text",...}. After
+	// normalization the wire Part must be discriminated by "kind".
+	msg := normMsg(t, `{"method":"message/send","params":{"message":{"role":"user","parts":[{"type":"text","text":"a"},{"type":"file","uri":"workspace:/x"}]}}}`)
+	parts := msg["parts"].([]interface{})
+	if len(parts) != 2 {
+		t.Fatalf("expected 2 parts, got %d", len(parts))
+	}
+	wantKind := []string{"text", "file"}
+	for i, p := range parts {
+		part := p.(map[string]interface{})
+		if part["kind"] != wantKind[i] {
+			t.Errorf("part %d: expected kind=%q, got %v", i, wantKind[i], part["kind"])
+		}
+		if _, hasType := part["type"]; hasType {
+			t.Errorf("part %d still carries legacy \"type\": %v", i, part)
+		}
+	}
+}
+
+func TestNormalizeA2APayload_DoesNotClobberKindWithType(t *testing.T) {
+	// If a part has BOTH kind and type, kind wins and is left untouched.
+	msg := normMsg(t, `{"method":"message/send","params":{"message":{"role":"user","parts":[{"kind":"text","type":"ignored","text":"a"}]}}}`)
+	part := msg["parts"].([]interface{})[0].(map[string]interface{})
+	if part["kind"] != "text" {
+		t.Errorf("expected kind preserved as \"text\", got %v", part["kind"])
+	}
+}
+
+// TestNormalizeA2APayload_RoleDefault_ContractRegression documents the
+// pre-fix failure: without the role default, a role-less message/send body
+// emerged from normalization still missing params.message.role, which the
+// peer's a2a Pydantic validator rejects. This asserts the POST-fix invariant
+// (role present) directly; before the a2a_proxy.go change this assertion
+// fails (role is absent → msg["role"] == nil).
+func TestNormalizeA2APayload_RoleDefault_ContractRegression(t *testing.T) {
+	msg := normMsg(t, `{"method":"message/send","params":{"message":{"parts":[{"kind":"text","text":"delegate this"}]}}}`)
+	role, hasRole := msg["role"]
+	if !hasRole {
+		t.Fatal("REGRESSION (#2251): params.message.role absent after normalization — peer a2a validator will reject with 'role Field required'")
+	}
+	if role != "user" {
+		t.Errorf("expected default role \"user\", got %v", role)
+	}
+}
+
 // --- resolveAgentURL direct unit tests ---

 func TestResolveAgentURL_CacheHit(t *testing.T) {
@@ -9,6 +9,7 @@ import (
 	"log"
 	"net/http"
 	"os"
+	"sort"
 	"strings"
 	"time"

@@ -18,6 +19,7 @@ import (
 	dockerclient "github.com/docker/docker/client"
 	"github.com/gin-gonic/gin"

+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
 )

@@ -41,10 +43,53 @@ func NewWorkspaceImageService(docker *dockerclient.Client) *WorkspaceImageServic
 	return &WorkspaceImageService{docker: docker}
 }

-// AllRuntimes is the canonical list mirroring docs/workspace-runtime-package.md.
-// Update both when a new template is added.
-var AllRuntimes = []string{
-	"claude-code", "codex", "hermes", "openclaw",
+// AllRuntimes is the canonical set of workspace runtimes this tenant will
+// pull/recreate template images for. It is DERIVED from the same providers
+// manifest SSOT (internal/providers/providers.yaml `runtimes:` block, mirrored
+// from CP's providers.yaml) that the rest of the platform routes against —
+// NOT a second hand-maintained list.
+//
+// Why derive instead of hardcode (controlplane#578): the old hardcoded slice
+// here ({claude-code, codex, hermes, openclaw}) silently DRIFTED from CP, which
+// already accepts `google-adk` for pin-promote/redeploy. A google-adk pin would
+// be accepted CP-side, then this tenant's POST /admin/workspace-images/refresh
+// ?runtime=google-adk rejected it 400 ("unknown runtime"), so google-adk image
+// fixes never deployed. Deriving from the manifest makes the tenant allowlist
+// and the CP allowlist provably the same set — they can't drift again.
+//
+// imageRefreshFallbackRuntimes is used ONLY if the embedded providers manifest
+// fails to load (which would be a build/CI failure caught by the providers
+// package's own tests, never a healthy prod). It preserves the historical
+// behavior — plus google-adk — so a manifest regression can never take the
+// refresh endpoint fully offline. Kept in lockstep with the providers.yaml
+// `runtimes:` keys; the drift guard in admin_workspace_images_test.go asserts
+// the two match.
+var imageRefreshFallbackRuntimes = []string{
+	"claude-code", "codex", "google-adk", "hermes", "openclaw",
+}
+
+// AllRuntimes is computed once at package init from the providers SSOT.
+var AllRuntimes = loadImageRefreshRuntimes()
+
+// loadImageRefreshRuntimes returns the sorted runtime names declared in the
+// providers manifest, falling back to imageRefreshFallbackRuntimes if the
+// manifest can't be loaded.
+func loadImageRefreshRuntimes() []string {
+	m, err := providers.LoadManifest()
+	if err != nil || len(m.Runtimes) == 0 {
+		if err != nil {
+			log.Printf("workspace-images: providers.LoadManifest failed (%v); falling back to static runtime allowlist", err)
+		}
+		out := append([]string(nil), imageRefreshFallbackRuntimes...)
+		sort.Strings(out)
+		return out
+	}
+	out := make([]string, 0, len(m.Runtimes))
+	for rt := range m.Runtimes {
+		out = append(out, rt)
+	}
+	sort.Strings(out)
+	return out
 }

 // RefreshResult is the per-call outcome surfaced to HTTP callers AND logged
@@ -197,7 +242,7 @@ func (s *WorkspaceImageService) Refresh(ctx context.Context, runtimes []string,

 // AdminWorkspaceImagesHandler serves POST /admin/workspace-images/refresh.
 //
-//	?runtime=claude-code   (optional; default = all 8 templates)
+//	?runtime=claude-code   (optional; default = all runtimes in AllRuntimes)
 //	&recreate=true|false   (default true; false = pull only)
 //
 // Returns JSON {pulled: [...], failed: [...], recreated: [...]}
@@ -3,7 +3,14 @@ package handlers
 import (
 	"encoding/base64"
 	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"sort"
 	"testing"
+
+	"github.com/gin-gonic/gin"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
 )

 func TestGHCRAuthHeader_NoEnvReturnsEmpty(t *testing.T) {
@@ -92,6 +99,119 @@ func TestGHCRAuthHeader_RespectsRegistryEnv(t *testing.T) {
 	}
 }

+// runtimeListContains is a tiny membership helper for the runtime-allowlist tests.
+func runtimeListContains(s []string, v string) bool {
+	for _, x := range s {
+		if x == v {
+			return true
+		}
+	}
+	return false
+}
+
+// TestAllRuntimes_IncludesGoogleADK is the direct regression for
+// controlplane#578: a google-adk pin promote/redeploy is accepted CP-side, so
+// the tenant image-refresh allowlist MUST also accept google-adk or the image
+// fix never deploys (tenant returned 400 "unknown runtime"). google-adk lives
+// in the providers SSOT, so the derived AllRuntimes must contain it.
+func TestAllRuntimes_IncludesGoogleADK(t *testing.T) {
+	if !runtimeListContains(AllRuntimes, "google-adk") {
+		t.Fatalf("AllRuntimes must include google-adk (controlplane#578 drift); got %v", AllRuntimes)
+	}
+}
+
+// TestAllRuntimes_MatchesProvidersSSOT is the drift guard. AllRuntimes is
+// derived from providers.LoadManifest().Runtimes — assert it equals exactly the
+// runtime keys the providers manifest (mirrored from CP's providers.yaml)
+// declares. If CP adds/removes a runtime, this test fails RED until the tenant
+// re-derives, so the tenant image-refresh allowlist can never silently drift
+// from the CP pin-promote allowlist again.
+func TestAllRuntimes_MatchesProvidersSSOT(t *testing.T) {
+	m, err := providers.LoadManifest()
+	if err != nil {
+		t.Fatalf("providers.LoadManifest: %v", err)
+	}
+	want := make([]string, 0, len(m.Runtimes))
+	for rt := range m.Runtimes {
+		want = append(want, rt)
+	}
+	sort.Strings(want)
+
+	got := append([]string(nil), AllRuntimes...)
+	sort.Strings(got)
+
+	if len(got) != len(want) {
+		t.Fatalf("AllRuntimes drift: got %v, want %v (providers SSOT)", got, want)
+	}
+	for i := range want {
+		if got[i] != want[i] {
+			t.Fatalf("AllRuntimes drift at %d: got %v, want %v (providers SSOT)", i, got, want)
+		}
+	}
+}
+
+// TestImageRefreshFallbackMatchesSSOT pins the static fallback (used only when
+// the embedded manifest fails to load) to the providers SSOT. If a runtime is
+// added to providers.yaml but not to imageRefreshFallbackRuntimes, this fails
+// RED — so a manifest-load failure can't silently drop a supported runtime.
+func TestImageRefreshFallbackMatchesSSOT(t *testing.T) {
+	m, err := providers.LoadManifest()
+	if err != nil {
+		t.Fatalf("providers.LoadManifest: %v", err)
+	}
+	want := make([]string, 0, len(m.Runtimes))
+	for rt := range m.Runtimes {
+		want = append(want, rt)
+	}
+	sort.Strings(want)
+
+	got := append([]string(nil), imageRefreshFallbackRuntimes...)
+	sort.Strings(got)
+
+	if len(got) != len(want) {
+		t.Fatalf("fallback drift: got %v, want %v (providers SSOT)", got, want)
+	}
+	for i := range want {
+		if got[i] != want[i] {
+			t.Fatalf("fallback drift at %d: got %v, want %v (providers SSOT)", i, got, want)
+		}
+	}
+}
+
+// TestRefresh_RejectsUnknownRuntime asserts a genuinely unknown runtime still
+// 400s (the guard isn't removed) AND that the 400 body lists google-adk in
+// known_runtimes (proving the allowlist now advertises it). This exercises the
+// gin handler's reject branch, which runs entirely before any Docker call.
+func TestRefresh_RejectsUnknownRuntime(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	// nil docker client is safe: the unknown-runtime branch returns 400
+	// before svc.Refresh (which is the only path that touches Docker).
+	h := &AdminWorkspaceImagesHandler{svc: &WorkspaceImageService{}}
+
+	r := gin.New()
+	r.POST("/admin/workspace-images/refresh", h.Refresh)
+
+	req := httptest.NewRequest(http.MethodPost, "/admin/workspace-images/refresh?runtime=not-a-real-runtime", nil)
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusBadRequest {
+		t.Fatalf("unknown runtime: got status %d, want 400; body=%s", rec.Code, rec.Body.String())
+	}
+
+	var body struct {
+		Error         string   `json:"error"`
+		KnownRuntimes []string `json:"known_runtimes"`
+	}
+	if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode 400 body: %v (raw=%s)", err, rec.Body.String())
+	}
+	if !runtimeListContains(body.KnownRuntimes, "google-adk") {
+		t.Errorf("400 known_runtimes must advertise google-adk (controlplane#578); got %v", body.KnownRuntimes)
+	}
+}
+
 func TestGHCRAuthHeader_TrimsWhitespace(t *testing.T) {
 	t.Setenv("MOLECULE_IMAGE_REGISTRY", "")
 	// .env lines often have trailing newlines or accidental spaces. Without
@@ -73,6 +73,7 @@ func (h *ChannelHandler) List(c *gin.Context) {
 		var config map[string]interface{}
 		if err := json.Unmarshal(configJSON, &config); err != nil {
 			log.Printf("Channels: unmarshal config for channel %s: %v", id, err)
+			config = map[string]interface{}{}
 		}
 		// #319: decrypt sensitive fields first so the mask operates on
 		// plaintext (first-4 / last-4 of the real token, not the ciphertext
@@ -94,6 +95,7 @@ func (h *ChannelHandler) List(c *gin.Context) {
 		var allowed []string
 		if err := json.Unmarshal(allowedJSON, &allowed); err != nil {
 			log.Printf("Channels: unmarshal allowed_users for channel %s: %v", id, err)
+			allowed = []string{}
 		}

 		entry := map[string]interface{}{
@@ -540,9 +542,11 @@ func (h *ChannelHandler) Webhook(c *gin.Context) {
 		}
 		if err := json.Unmarshal(configJSON, &row.Config); err != nil {
 			log.Printf("Channels: unmarshal config for webhook row %s: %v", row.ID, err)
+			row.Config = map[string]interface{}{}
 		}
 		if err := json.Unmarshal(allowedJSON, &row.AllowedUsers); err != nil {
 			log.Printf("Channels: unmarshal allowed_users for webhook row %s: %v", row.ID, err)
+			row.AllowedUsers = []string{}
 		}
 		if err := channels.DecryptSensitiveFields(row.Config); err != nil {
 			log.Printf("Channels: decrypt webhook row %s: %v", row.ID, err)
@@ -116,6 +116,56 @@ func TestChannelHandler_List(t *testing.T) {
 	}
 }

+func TestChannelHandler_List_InvalidJSON_FallsBack(t *testing.T) {
+	mock := setupTestDB(t)
+	handler := NewChannelHandler(newTestChannelManager())
+
+	rows := sqlmock.NewRows([]string{
+		"id", "workspace_id", "channel_type", "channel_config", "enabled",
+		"allowed_users", "last_message_at", "message_count", "created_at", "updated_at",
+	}).AddRow(
+		"ch-bad", "ws-1", "telegram",
+		[]byte(`{not valid json`),
+		true, []byte(`[also not json`), nil, 0, nil, nil,
+	)
+	mock.ExpectQuery("SELECT .* FROM workspace_channels WHERE workspace_id").
+		WithArgs("ws-1").
+		WillReturnRows(rows)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Request, _ = http.NewRequest("GET", "/workspaces/ws-1/channels", nil)
+	c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
+
+	handler.List(c)
+
+	if w.Code != 200 {
+		t.Errorf("expected 200, got %d", w.Code)
+	}
+
+	var result []map[string]interface{}
+	json.Unmarshal(w.Body.Bytes(), &result)
+	if len(result) != 1 {
+		t.Fatalf("expected 1 channel, got %d", len(result))
+	}
+
+	config, ok := result[0]["config"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("expected config to be a map, got %T", result[0]["config"])
+	}
+	if len(config) != 0 {
+		t.Errorf("expected empty config after unmarshal fallback, got %v", config)
+	}
+
+	allowed, ok := result[0]["allowed_users"].([]interface{})
+	if !ok {
+		t.Fatalf("expected allowed_users to be a slice, got %T", result[0]["allowed_users"])
+	}
+	if len(allowed) != 0 {
+		t.Errorf("expected empty allowed_users after unmarshal fallback, got %v", allowed)
+	}
+}
+
 // ==================== Create ====================

 func TestChannelHandler_Create_Success(t *testing.T) {
@@ -546,6 +596,41 @@ func TestChannelHandler_Webhook_UnknownType(t *testing.T) {
 	}
 }

+// TestChannelHandler_Webhook_InvalidJSON_FallsBack verifies that when the DB
+// row contains invalid JSON for channel_config or allowed_users, the webhook
+// handler logs the error and falls back to an empty map/slice rather than
+// leaving the fields nil (which would panic on downstream code that expects
+// concrete values). With empty config there is no chat_id match, so the
+// handler returns {"status":"no_channel"}.
+func TestChannelHandler_Webhook_InvalidJSON_FallsBack(t *testing.T) {
+	mock := setupTestDB(t)
+	handler := NewChannelHandler(newTestChannelManager())
+
+	mock.ExpectQuery(`SELECT id, workspace_id, channel_type, channel_config, enabled, allowed_users FROM workspace_channels WHERE channel_type = .* AND enabled = true`).
+		WithArgs("telegram").
+		WillReturnRows(sqlmock.NewRows([]string{
+			"id", "workspace_id", "channel_type", "channel_config", "enabled", "allowed_users",
+		}).AddRow("ch-bad", "ws-1", "telegram", []byte(`{bad json`), true, []byte(`[bad json`)))
+
+	body := `{"update_id":1,"message":{"message_id":1,"from":{"id":111,"is_bot":false,"first_name":"Test","username":"testuser"},"chat":{"id":-100123,"title":"Test Group","type":"supergroup"},"date":1700000000,"text":"hello"}}`
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Request = httptest.NewRequest(http.MethodPost, "/webhooks/telegram", strings.NewReader(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+	c.Params = gin.Params{{Key: "type", Value: "telegram"}}
+
+	handler.Webhook(c)
+
+	if w.Code != 200 {
+		t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp map[string]interface{}
+	json.Unmarshal(w.Body.Bytes(), &resp)
+	if resp["status"] != "no_channel" {
+		t.Errorf("expected status 'no_channel', got %v", resp["status"])
+	}
+}
+
 // ==================== Discover ====================

 func TestChannelHandler_Discover_MissingToken(t *testing.T) {
@@ -68,6 +68,10 @@ func TestPeers_CrossTenant_OrgRootNotLeaked(t *testing.T) {

 	caller := "org-a-root" // parent_id IS NULL — an org root for tenant A

+	// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
+	// (Unordered match is set above, so this can be consumed at any point.)
+	seedDiscoveryGrandfather(mock, caller)
+
 	// parent_id lookup → NULL (caller is an org root)
 	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
 		WithArgs(caller).
@@ -128,6 +132,9 @@ func TestPeers_SameOrg_SiblingsStillWork(t *testing.T) {
 	caller := "org-a-child-1"
 	parent := "org-a-root"

+	// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
+	seedDiscoveryGrandfather(mock, caller)
+
 	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
 		WithArgs(caller).
 		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(parent))
@@ -179,8 +179,11 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
 			"message": map[string]interface{}{
 				"role":      "user",
 				"messageId": delegationID,
-				"parts":     []map[string]interface{}{{"type": "text", "text": body.Task}},
-				"metadata":  map[string]interface{}{"delegation_id": delegationID},
+				// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251) —
+				// a `type`-keyed Part is dropped by the receiver's v0.3
+				// validator, silently losing the delegated task.
+				"parts":    []map[string]interface{}{{"kind": "text", "text": body.Task}},
+				"metadata": map[string]interface{}{"delegation_id": delegationID},
 			},
 		},
 	})
@@ -422,28 +422,33 @@ func (h *DiscoveryHandler) CheckAccess(c *gin.Context) {
 // workspaces with tokens must present a matching Bearer, token binding
 // is strict (A's token cannot authenticate caller B).
 //
-// Fail-open on DB hiccups. Unlike secrets.Values (which returns plaintext
-// secrets and must fail closed), discovery only exposes peer URLs that
-// are already behind the existing `CanCommunicate` hierarchy check — a
-// momentary DB outage shouldn't take agent-to-agent discovery offline.
+// (harden/no-fail-open-auth) Fails CLOSED on DB error. This used to return nil
+// (allow) on a HasAnyLiveToken hiccup "because discovery only exposes peer URLs
+// already behind CanCommunicate" — but the CTO "nothing fail-open" directive is
+// absolute, and a request must never gain access because the auth datastore is
+// unreachable. A datastore error now writes 503 (availability tradeoff that
+// grants NO access) and returns a non-nil error; the caller already does
+// `if err != nil { return }` so the 503 body is what the client sees.
 func validateDiscoveryCaller(ctx context.Context, c *gin.Context, workspaceID string) error {
 	hasLive, err := wsauth.HasAnyLiveToken(ctx, db.DB, workspaceID)
 	if err != nil {
-		log.Printf("wsauth: discovery HasAnyLiveToken(%s) failed: %v — allowing request", workspaceID, err)
-		return nil
+		log.Printf("wsauth: discovery HasAnyLiveToken(%s): datastore lookup failed (returning 503): %v", workspaceID, err)
+		c.JSON(http.StatusServiceUnavailable, gin.H{
+			"error": "platform datastore unavailable — retry shortly",
+			"code":  "platform_unavailable",
+		})
+		return errors.New("auth datastore unavailable")
 	}
 	if !hasLive {
 		return nil // legacy / pre-upgrade
 	}
-	// Tier-1b dev-mode hatch — same escape hatch AdminAuth and
-	// WorkspaceAuth apply on a local Docker setup. Without this, the
-	// canvas Details tab can never load peers for a workspace that has
-	// registered its live token, producing the 401 the user sees.
-	// Gated by MOLECULE_ENV=development + empty ADMIN_TOKEN, so SaaS
-	// production stays strict.
-	if middleware.IsDevModeFailOpen() {
-		return nil
-	}
+	// (harden/no-fail-open-auth) The former dev-mode escape hatch that
+	// returned nil (allow) here when MOLECULE_ENV=dev + ADMIN_TOKEN unset
+	// has been REMOVED. Discovery callers must present a verified CP
+	// session or a valid bearer in every environment. Local dev now
+	// authenticates the Canvas with a provisioned ADMIN_TOKEN /
+	// NEXT_PUBLIC_ADMIN_TOKEN (see scripts/dev-start.sh), so the Details
+	// tab loads peers with a real credential rather than via fail-open.

 	// Try session cookie auth first (SaaS canvas path).
 	// verifiedCPSession returns (valid, presented):
@@ -49,6 +49,10 @@ func TestDiscover_WorkspaceNotFound_WithCaller(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

+	// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first;
+	// grandfather (count=0) so the bearer-less request is allowed through.
+	seedDiscoveryGrandfather(mock, "ws-caller")
+
 	// CanCommunicate will need DB lookups — both workspace name lookups
 	// For the access check: caller lookup succeeds, target lookup fails
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
@@ -113,6 +117,9 @@ func TestPeers_WithParent(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

+	// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
+	seedDiscoveryGrandfather(mock, "ws-sibling-1")
+
 	// Expect parent_id lookup for the requesting workspace
 	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-sibling-1").
@@ -165,6 +172,9 @@ func TestPeers_NotFound(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

+	// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
+	seedDiscoveryGrandfather(mock, "ws-ghost")
+
 	// Workspace not found
 	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-ghost").
@@ -191,6 +201,11 @@ func TestPeers_DBError(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

+	// Auth probe grandfathers; this test targets a DB error on the
+	// *handler-body* parent_id query → 500 (distinct from the auth-probe
+	// DB error which now fails closed with 503).
+	seedDiscoveryGrandfather(mock, "ws-dberr")
+
 	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-dberr").
 		WillReturnError(sql.ErrConnDone)
@@ -216,6 +231,9 @@ func TestPeers_RootWorkspace_NoPeers(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

+	// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
+	seedDiscoveryGrandfather(mock, "ws-root-alone")
+
 	// Root workspace (parent_id is NULL)
 	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-root-alone").
@@ -270,6 +288,9 @@ func peersFilterFixture(t *testing.T) (*DiscoveryHandler, sqlmock.Sqlmock) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

+	// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
+	seedDiscoveryGrandfather(mock, "ws-self")
+
 	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-self").
 		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow("ws-pm"))
@@ -927,13 +948,14 @@ func TestDiscoverHostPeer_Smoke_Success(t *testing.T) {
 	}
 }

-// ==================== Peers auth — dev-mode fail-open gate ====================
+// ==================== Peers auth — fail-CLOSED gate ====================
 //
-// validateDiscoveryCaller applies a Tier-1b dev-mode hatch so the canvas
-// user session (which holds no workspace-scoped bearer) can still load
-// the Details → PEERS list on a local Docker setup. The gate must pass
-// ONLY when MOLECULE_ENV is development AND ADMIN_TOKEN is empty.
-// These tests pin that contract against accidental polarity flips.
+// (harden/no-fail-open-auth) validateDiscoveryCaller USED to apply a
+// Tier-1b dev-mode hatch that let the bearer-less canvas session load the
+// Details → PEERS list when MOLECULE_ENV=development AND ADMIN_TOKEN empty.
+// That hatch has been REMOVED — discovery callers must present a verified
+// CP session or a valid bearer in every environment. These tests pin the
+// fail-closed contract against accidental re-introduction.

 // peersAuthFixtureHasLiveToken seeds the mock rows required for the
 // Peers handler to reach the auth branch: HasAnyLiveToken → true (a
@@ -946,10 +968,30 @@ func peersAuthFixtureHasLiveToken(mock sqlmock.Sqlmock, workspaceID string) {
 		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
 }

-func TestPeers_DevModeFailOpen_AllowsBearerlessRequest(t *testing.T) {
-	// Dev mode: MOLECULE_ENV=development AND ADMIN_TOKEN empty. Canvas
-	// sends no bearer token; validateDiscoveryCaller must return nil
-	// (allow) and the handler must proceed to return the peer list.
+// seedDiscoveryGrandfather seeds the FIRST query validateDiscoveryCaller
+// issues (HasAnyLiveToken → 0 = legacy / pre-upgrade) so a bearer-less
+// discovery request grandfathers through and the test can exercise the
+// handler body.
+//
+// (harden/no-fail-open-auth) Before this branch, validateDiscoveryCaller
+// returned nil (allow) when the HasAnyLiveToken probe ERRORED — so these
+// handler-body tests never had to seed the probe at all; the unmatched
+// COUNT query erred and the fail-open swallowed it. Now that the DB-error
+// path fails CLOSED (503), the probe must be seeded explicitly. count=0 is
+// the legitimate grandfather path (no live tokens for this workspace yet),
+// which is what these pre-existing tests intend.
+func seedDiscoveryGrandfather(mock sqlmock.Sqlmock, workspaceID string) {
+	mock.ExpectQuery("SELECT COUNT.+workspace_auth_tokens").
+		WithArgs(workspaceID).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+}
+
+func TestPeers_DevMode_BearerlessRequest_FailsClosed(t *testing.T) {
+	// (harden/no-fail-open-auth) Exact old-hatch conditions:
+	// MOLECULE_ENV=development AND ADMIN_TOKEN empty, with a live token in
+	// the DB. The bearer-less canvas-style request must now 401 — the
+	// dev-mode hatch that returned nil (allow) here is gone. Local dev
+	// authenticates via a provisioned ADMIN_TOKEN (scripts/dev-start.sh).
 	t.Setenv("MOLECULE_ENV", "development")
 	t.Setenv("ADMIN_TOKEN", "")

@@ -957,22 +999,10 @@ func TestPeers_DevModeFailOpen_AllowsBearerlessRequest(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

+	// Only the HasAnyLiveToken probe runs; auth 401s before the peer
+	// queries, so no further expectations are seeded.
 	peersAuthFixtureHasLiveToken(mock, "ws-dev")

-	// Root workspace → children+parent queries still fire but the
-	// parent_id lookup comes first.
-	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
-		WithArgs("ws-dev").
-		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
-	peerCols := []string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}
-	mock.ExpectQuery("SELECT w.id.+WHERE w.parent_id IS NULL AND w.id").
-		WithArgs("ws-dev").
-		WillReturnRows(sqlmock.NewRows(peerCols))
-	// #383 — children query gained explicit `w.id != $2` self-filter.
-	mock.ExpectQuery("SELECT w.id.+WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
-		WithArgs("ws-dev", "ws-dev").
-		WillReturnRows(sqlmock.NewRows(peerCols))
-
 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
 	c.Params = gin.Params{{Key: "id", Value: "ws-dev"}}
@@ -980,8 +1010,8 @@ func TestPeers_DevModeFailOpen_AllowsBearerlessRequest(t *testing.T) {

 	handler.Peers(c)

-	if w.Code != http.StatusOK {
-		t.Fatalf("expected 200 under dev-mode hatch, got %d: %s", w.Code, w.Body.String())
+	if w.Code != http.StatusUnauthorized {
+		t.Fatalf("expected 401 (fail-closed) under old dev-mode hatch conditions, got %d: %s", w.Code, w.Body.String())
 	}
 }

@@ -1034,6 +1064,70 @@ func TestPeers_DevModeFailOpen_ClosedInProduction(t *testing.T) {
 	}
 }

+// TestPeers_AuthProbeDBError_FailsClosed pins the removal of
+// validateDiscoveryCaller's fail-open-on-DB-error branch
+// (harden/no-fail-open-auth). When the HasAnyLiveToken auth probe ERRORS, the
+// request must NOT be allowed through — it now returns 503 (availability
+// tradeoff that grants NO access). Before this branch the function returned nil
+// (allow) on a DB hiccup, so the request reached the peer queries.
+//
+// Watch-it-fail: restore `if err != nil { log; return nil }` in
+// validateDiscoveryCaller → this flips 503→(200/handler path) and fails.
+func TestPeers_AuthProbeDBError_FailsClosed(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	handler := NewDiscoveryHandler()
+
+	// The FIRST query validateDiscoveryCaller issues (HasAnyLiveToken) errors.
+	// No further expectations: a fail-closed 503 must be written before the
+	// peer-list queries run.
+	mock.ExpectQuery("SELECT COUNT.+workspace_auth_tokens").
+		WithArgs("ws-dberr-auth").
+		WillReturnError(sql.ErrConnDone)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-dberr-auth"}}
+	c.Request = httptest.NewRequest("GET", "/registry/ws-dberr-auth/peers", nil)
+
+	handler.Peers(c)
+
+	if w.Code != http.StatusServiceUnavailable {
+		t.Fatalf("auth-probe DB error must fail CLOSED: expected 503, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestDiscover_AuthProbeDBError_FailsClosed is the Discover-endpoint companion
+// to TestPeers_AuthProbeDBError_FailsClosed: a HasAnyLiveToken error on the
+// caller's discovery request fails CLOSED with 503 (was: fail-open allow).
+func TestDiscover_AuthProbeDBError_FailsClosed(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	handler := NewDiscoveryHandler()
+
+	mock.ExpectQuery("SELECT COUNT.+workspace_auth_tokens").
+		WithArgs("ws-caller").
+		WillReturnError(sql.ErrConnDone)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-target"}}
+	c.Request = httptest.NewRequest("GET", "/registry/discover/ws-target", nil)
+	c.Request.Header.Set("X-Workspace-ID", "ws-caller")
+
+	handler.Discover(c)
+
+	if w.Code != http.StatusServiceUnavailable {
+		t.Fatalf("Discover auth-probe DB error must fail CLOSED: expected 503, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
 // ==================== Peers — #383 self never appears in result ====================

 // TestPeers_ExcludeSelf_DefenseInDepth verifies the final-line filter in
@@ -1056,6 +1150,9 @@ func TestPeers_ExcludeSelf_DefenseInDepth(t *testing.T) {

 	const selfID = "ws-xiaodong"

+	// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
+	seedDiscoveryGrandfather(mock, selfID)
+
 	// parent_id lookup — workspace has a parent.
 	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
 		WithArgs(selfID).
@@ -551,6 +551,9 @@ func TestDiscover_AccessDenied(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

+	// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first; grandfather.
+	seedDiscoveryGrandfather(mock, "ws-child-a")
+
 	// CanCommunicate: different parents → denied
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-child-a").
@@ -582,6 +585,9 @@ func TestDiscover_TargetOffline(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

+	// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first; grandfather.
+	seedDiscoveryGrandfather(mock, "ws-caller")
+
 	// Share a parent so communication is allowed under post-#1955 rules
 	sharedParent := "ws-parent"
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
@@ -373,6 +373,9 @@ func TestExtended_DiscoverWithCallerID(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

+	// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first; grandfather.
+	seedDiscoveryGrandfather(mock, "ws-caller")
+
 	// CanCommunicate needs to look up both workspaces
 	// Share a parent so communication is allowed under post-#1955 rules
 	sharedParent := "ws-parent"
@@ -464,6 +467,9 @@ func TestExtended_Peers(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

+	// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
+	seedDiscoveryGrandfather(mock, "ws-peer")
+
 	// Expect parent_id lookup for requesting workspace (root-level, no parent)
 	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-peer").
@@ -508,6 +508,7 @@ func TestBuildProvisionerConfig_WorkspacePathFromPayload(t *testing.T) {
 		map[string][]byte{"config.yaml": []byte("name: test")},
 		models.CreateWorkspacePayload{Tier: 2, Runtime: "claude-code", WorkspaceDir: "/tmp/workspace", WorkspaceAccess: "read_write"},
 		map[string]string{"OPENAI_API_KEY": "sk-test"},
+		nil,
 		"/tmp/plugins",
 	)

@@ -192,7 +192,11 @@ func (h *MCPHandler) toolGetWorkspaceInfo(ctx context.Context, workspaceID strin
 // follow in the order provided, with kind derived from MIME type.
 func buildA2AMessageParts(task string, attachments []AgentMessageAttachment) []map[string]interface{} {
 	parts := []map[string]interface{}{
-		{"type": "text", "text": task},
+		// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251).
+		// The receiver's v0.3 Pydantic validator drops a Part keyed
+		// `type`, silently losing the task text — the file part below
+		// already uses `kind`, this is the matching fix for text.
+		{"kind": "text", "text": task},
 	}
 	for _, att := range attachments {
 		kind := kindFromMimeType(att.MimeType)
@@ -161,7 +161,7 @@ func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context,
 	// 1. Strip plugin's rule/fragment markers from CLAUDE.md (mirrors
 	//    AgentskillsAdaptor.uninstall lines 184-188). Best-effort: if
 	//    the user edited CLAUDE.md, our marker stays untouched.
-	h.stripPluginMarkersFromMemory(ctx, containerName, pluginName)
+	h.stripPluginMarkersFromMemory(ctx, workspaceID, containerName, pluginName)

 	// 2. Remove copied skill dirs declared in the plugin's plugin.yaml.
 	for _, skill := range skillNames {
@@ -171,9 +171,11 @@ func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context,
 			log.Printf("Plugin uninstall: skipping invalid skill name %q in %s: %v", skill, pluginName, err)
 			continue
 		}
-		_, _ = h.execAsRoot(ctx, containerName, []string{
+		if _, rmErr := h.execAsRoot(ctx, containerName, []string{
 			"rm", "-rf", "/configs/skills/" + skill,
-		})
+		}); rmErr != nil {
+			log.Printf("Plugin uninstall: failed to remove skill %s from %s: %v", skill, workspaceID, rmErr)
+		}
 	}

 	// 3. Delete the plugin directory itself (as root to handle file ownership).
@@ -393,7 +393,7 @@ func (h *PluginsHandler) readPluginSkillsFromContainer(ctx context.Context, cont
 // `# Plugin: <name> /` — mirrors AgentskillsAdaptor.uninstall's stripping
 // logic so install/uninstall are symmetric. Best-effort: silent on read or
 // write failure, since the rest of uninstall must still succeed.
-func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, containerName, pluginName string) {
+func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, workspaceID, containerName, pluginName string) {
 	// Use sed via bash -c for atomic in-place delete: drop the marker line
 	// and the blank line that follows it (install adds a leading blank line
 	// before the marker via append_to_memory). Three sed passes mirror the
@@ -417,7 +417,9 @@ func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, conta
 		`awk 'BEGIN{skip=0; blanks=0} /^%s/{skip=1; blanks=0; next} skip==1 && /^[[:space:]]*$/{blanks++; if(blanks>=2){skip=0; print; next} next} /^# Plugin: /{if(skip==1)skip=0} skip==1{next} {print}' /configs/CLAUDE.md > /tmp/claude.new && mv /tmp/claude.new /configs/CLAUDE.md`,
 		regexpEscapeForAwk(marker),
 	)
-	_, _ = h.execAsRoot(ctx, containerName, []string{"bash", "-c", script})
+	if _, awkErr := h.execAsRoot(ctx, containerName, []string{"bash", "-c", script}); awkErr != nil {
+		log.Printf("Plugin uninstall: failed to strip markers from CLAUDE.md for %s in %s: %v", pluginName, workspaceID, awkErr)
+	}
 }

 // regexpEscapeForAwk escapes characters that have special meaning inside an
@@ -89,13 +89,16 @@ func TestSecurity_GetTemplates_NoAuth_Returns401(t *testing.T) {
 	}
 }

-// TestSecurity_GetTemplates_FreshInstall_FailsOpen verifies that GET /templates
-// still succeeds on a fresh install (zero enrolled workspaces → AdminAuth fail-open).
-// This is the regression check: the auth gate must not break new deployments.
-func TestSecurity_GetTemplates_FreshInstall_FailsOpen(t *testing.T) {
+// TestSecurity_GetTemplates_FreshInstall_FailsClosed pins the post-hardening
+// contract (harden/no-fail-open-auth): GET /templates on a fresh install (zero
+// enrolled workspaces, no ADMIN_TOKEN) now 401s with no bearer. The former
+// AdminAuth Tier-1 lazy-bootstrap fail-open (fresh install ⇒ 200) is gone — a
+// new deployment must provision ADMIN_TOKEN (dev does so via dev-start.sh).
+func TestSecurity_GetTemplates_FreshInstall_FailsClosed(t *testing.T) {
 	setupTestDB(t)
 	setupTestRedis(t)
 	t.Setenv("ADMIN_TOKEN", "")
+	t.Setenv("MOLECULE_ENV", "")
 	authDB, authMock := newFreshInstallAuthDB(t)

 	tmpDir := t.TempDir()
@@ -108,8 +111,8 @@ func TestSecurity_GetTemplates_FreshInstall_FailsOpen(t *testing.T) {
 	req, _ := http.NewRequest(http.MethodGet, "/templates", nil)
 	r.ServeHTTP(w, req)

-	if w.Code != http.StatusOK {
-		t.Errorf("#686 GET /templates fresh-install: want 200 (fail-open), got %d body=%s", w.Code, w.Body.String())
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("#686 GET /templates fresh-install fail-closed: want 401, got %d body=%s", w.Code, w.Body.String())
 	}
 	if err := authMock.ExpectationsWereMet(); err != nil {
 		t.Errorf("unmet auth mock expectations: %v", err)
@@ -148,12 +151,14 @@ func TestSecurity_GetOrgTemplates_NoAuth_Returns401(t *testing.T) {
 	}
 }

-// TestSecurity_GetOrgTemplates_FreshInstall_FailsOpen mirrors the /templates
-// regression check for /org/templates — fresh installs must still work.
-func TestSecurity_GetOrgTemplates_FreshInstall_FailsOpen(t *testing.T) {
+// TestSecurity_GetOrgTemplates_FreshInstall_FailsClosed mirrors the /templates
+// fail-closed check for /org/templates (harden/no-fail-open-auth): a fresh
+// install with no bearer / no ADMIN_TOKEN now 401s rather than fail-open.
+func TestSecurity_GetOrgTemplates_FreshInstall_FailsClosed(t *testing.T) {
 	setupTestDB(t)
 	setupTestRedis(t)
 	t.Setenv("ADMIN_TOKEN", "")
+	t.Setenv("MOLECULE_ENV", "")
 	authDB, authMock := newFreshInstallAuthDB(t)

 	tmpDir := t.TempDir()
@@ -167,8 +172,8 @@ func TestSecurity_GetOrgTemplates_FreshInstall_FailsOpen(t *testing.T) {
 	req, _ := http.NewRequest(http.MethodGet, "/org/templates", nil)
 	r.ServeHTTP(w, req)

-	if w.Code != http.StatusOK {
-		t.Errorf("#686 GET /org/templates fresh-install: want 200 (fail-open), got %d body=%s", w.Code, w.Body.String())
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("#686 GET /org/templates fresh-install fail-closed: want 401, got %d body=%s", w.Code, w.Body.String())
 	}
 	if err := authMock.ExpectationsWereMet(); err != nil {
 		t.Errorf("unmet auth mock expectations: %v", err)
@@ -243,6 +243,7 @@ func TestBuildProvisionerConfig_CopiesComputeSizingFromPayload(t *testing.T) {
 			},
 		},
 		nil,
+		nil,
 		t.TempDir(),
 	)

@@ -129,7 +129,7 @@ func (h *WorkspaceHandler) provisionWorkspaceOpts(workspaceID, templatePath stri
 							workspaceID, filepath.Base(runtimeTemplate))
 						templatePath = runtimeTemplate
 						// Rebuild cfg with the recovered template path so Start() sees it.
-						cfg = h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, prepared.EnvVars, prepared.PluginsPath)
+						cfg = h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, prepared.EnvVars, prepared.Config.WorkspaceSecretKeys, prepared.PluginsPath)
 						cfg.ResetClaudeSession = resetClaudeSession
 						recovered = true
 						break
@@ -281,6 +281,7 @@ func (h *WorkspaceHandler) buildProvisionerConfig(
 	configFiles map[string][]byte,
 	payload models.CreateWorkspacePayload,
 	envVars map[string]string,
+	workspaceSecretKeys map[string]struct{},
 	pluginsPath string,
 ) provisioner.WorkspaceConfig {
 	// Per-workspace workspace_dir takes priority over global WORKSPACE_DIR env var.
@@ -331,14 +332,20 @@ func (h *WorkspaceHandler) buildProvisionerConfig(
 		InstanceType:    payload.Compute.InstanceType,
 		DiskGB:          int32(payload.Compute.Volume.RootGB),
 		DataPersistence: payload.Compute.DataPersistence,
+		Provider:        payload.Compute.Provider,
 		Display: provisioner.WorkspaceDisplayConfig{
 			Mode:     payload.Compute.Display.Mode,
 			Width:    payload.Compute.Display.Width,
 			Height:   payload.Compute.Display.Height,
 			Protocol: payload.Compute.Display.Protocol,
 		},
-		EnvVars:     envVars,
-		PlatformURL: h.platformURL,
+		EnvVars: envVars,
+		// Forensic #145: positive provenance set so the SCM-write-token guard
+		// (cp_provisioner.Start) exempts a workspace-authored GITEA_TOKEN from
+		// the operator-bleed strip while still stripping global/persona-merged
+		// SCM tokens. Carried by both Docker- and CP-mode configs.
+		WorkspaceSecretKeys: workspaceSecretKeys,
+		PlatformURL:         h.platformURL,
 		// Image left empty — molecule-core's runtime_image_pins table (mig
 		// 047, dead reader removed by RFC internal#617 / task #335) was an
 		// aspirational SSOT that never received a writer. CP's
@@ -1233,9 +1240,18 @@ func firstNonEmptyEnv(names ...string) string {
 // stores — NOT the user's own scoped PAT they explicitly authorized via
 // the per-workspace Secrets tab.
 //
+// The third return value (workspaceKeys) is the POSITIVE counterpart: the
+// set of keys authored via the per-workspace `workspace_secrets` table
+// (user / org-admin set, authenticated as the workspace owner). It is the
+// provenance signal the forensic #145 SCM-write-token guard consults to
+// EXEMPT a workspace-scoped GITEA_TOKEN (the intended, legitimate delivery
+// channel for a reviewer agent) from the operator-bleed strip. A key set
+// in BOTH stores lands here (workspace overrides global) and is removed
+// from globalKeys, matching the precedence semantic below.
+//
 // The merged map preserves the existing precedence semantic (workspace
 // rows overwrite global rows on key collision); only the provenance side-
-// channel is new. Existing single-return callers can ignore globalKeys.
+// channels are new. Existing callers can ignore globalKeys / workspaceKeys.
 //
 // F1086 / #1206: the returned error string is the SAFE-CANNED message that
 // gets persisted to workspaces.last_sample_error AND broadcast as the
@@ -1243,9 +1259,10 @@ func firstNonEmptyEnv(names ...string) string {
 // the encryption version, the decrypt-error text) is logged here, never
 // returned to the caller, so it can't leak via the canvas event stream
 // (cf. TestProvisionWorkspace_NoInternalErrorsInBroadcast).
-func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]string, map[string]struct{}, string) {
+func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]string, map[string]struct{}, map[string]struct{}, string) {
 	envVars := map[string]string{}
 	globalKeys := map[string]struct{}{}
+	workspaceKeys := map[string]struct{}{}
 	globalRows, globalErr := db.DB.QueryContext(ctx,
 		`SELECT key, encrypted_value, encryption_version FROM global_secrets`)
 	if globalErr == nil {
@@ -1266,7 +1283,7 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
 				decrypted, decErr := crypto.DecryptVersioned(v, ver)
 				if decErr != nil {
 					log.Printf("Provisioner: FATAL — failed to decrypt global secret %s (version=%d): %v — aborting provision of workspace %s", k, ver, decErr, workspaceID)
-					return nil, nil, "failed to decrypt global secret"
+					return nil, nil, nil, "failed to decrypt global secret"
 				}
 				envVars[k] = string(decrypted)
 				globalKeys[k] = struct{}{}
@@ -1300,7 +1317,7 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
 				decrypted, decErr := crypto.DecryptVersioned(v, ver)
 				if decErr != nil {
 					log.Printf("Provisioner: FATAL — failed to decrypt workspace secret %s (version=%d) for %s: %v — aborting provision", k, ver, workspaceID, decErr)
-					return nil, nil, "failed to decrypt workspace secret"
+					return nil, nil, nil, "failed to decrypt workspace secret"
 				}
 				envVars[k] = string(decrypted)
 				// User-authored workspace_secrets value supersedes any
@@ -1309,13 +1326,19 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
 				// re-set the value via the canvas Secrets tab, so it is
 				// no longer "the operator-store version."
 				delete(globalKeys, k)
+				// Positive provenance: record that this key was authored
+				// via workspace_secrets. The forensic #145 SCM-write-token
+				// guard exempts only keys in this set — a workspace-scoped
+				// GITEA_TOKEN is the intended delivery channel for that
+				// workspace's agent.
+				workspaceKeys[k] = struct{}{}
 			}
 		}
 		if err := wsRows.Err(); err != nil {
 			log.Printf("Provisioner: workspace_secrets rows.Err workspace=%s: %v", workspaceID, err)
 		}
 	}
-	return envVars, globalKeys, ""
+	return envVars, globalKeys, workspaceKeys, ""
 }

 // provisionWorkspaceCP provisions a workspace via the control plane API.
@@ -122,7 +122,7 @@ func (h *WorkspaceHandler) prepareProvisionContext(
 	payload models.CreateWorkspacePayload,
 	resetClaudeSession bool,
 ) (*preparedProvisionContext, *provisionAbort) {
-	envVars, globalSecretKeys, decryptErr := loadWorkspaceSecrets(ctx, workspaceID)
+	envVars, globalSecretKeys, workspaceSecretKeys, decryptErr := loadWorkspaceSecrets(ctx, workspaceID)
 	if decryptErr != "" {
 		return nil, &provisionAbort{Msg: decryptErr}
 	}
@@ -294,7 +294,7 @@ func (h *WorkspaceHandler) prepareProvisionContext(
 		return nil, abort
 	}

-	cfg := h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, envVars, pluginsPath)
+	cfg := h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, envVars, workspaceSecretKeys, pluginsPath)
 	cfg.ResetClaudeSession = resetClaudeSession

 	return &preparedProvisionContext{
@@ -845,6 +845,7 @@ func TestBuildProvisionerConfig_BasicFields(t *testing.T) {
 		map[string][]byte{"config.yaml": []byte("name: test")},
 		models.CreateWorkspacePayload{Tier: 1, Runtime: "claude-code"},
 		map[string]string{"API_KEY": "secret"},
+		nil,
 		pluginsPath,
 	)

@@ -893,6 +894,7 @@ func TestBuildProvisionerConfig_WorkspacePathFromEnv(t *testing.T) {
 		nil,
 		models.CreateWorkspacePayload{Tier: 2, Runtime: "claude-code"},
 		nil,
+		nil,
 		pluginsPath,
 	)

@@ -901,6 +903,71 @@ func TestBuildProvisionerConfig_WorkspacePathFromEnv(t *testing.T) {
 	}
 }

+// ==================== loadWorkspaceSecrets provenance (forensic #145) ====================
+
+// TestLoadWorkspaceSecrets_WorkspaceKeysProvenance pins the positive
+// provenance side-channel added for forensic #145: a key sourced from
+// workspace_secrets must land in the third return value (workspaceKeys),
+// while a key sourced only from global_secrets must NOT. A key present in
+// BOTH stores is treated as workspace-authored (workspace overrides global),
+// so it lands in workspaceKeys AND is removed from globalKeys.
+func TestLoadWorkspaceSecrets_WorkspaceKeysProvenance(t *testing.T) {
+	mock := setupTestDB(t)
+
+	// global_secrets: an operator-store GITEA_TOKEN (the bleed channel) and
+	// an OPERATOR_ONLY key that no workspace row re-sets.
+	globalRows := sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
+		AddRow("GITEA_TOKEN", []byte("operator-store-gitea"), 0).
+		AddRow("OPERATOR_ONLY", []byte("op-val"), 0)
+	mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
+		WillReturnRows(globalRows)
+
+	// workspace_secrets: the user/org-admin re-authors GITEA_TOKEN (override)
+	// and adds a workspace-only WS_ONLY key. encryption_version 0 = plaintext
+	// passthrough (crypto.DecryptVersioned).
+	wsRows := sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
+		AddRow("GITEA_TOKEN", []byte("workspace-authored-gitea"), 0).
+		AddRow("WS_ONLY", []byte("ws-val"), 0)
+	mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
+		WithArgs("ws-prov").
+		WillReturnRows(wsRows)
+
+	envVars, globalKeys, workspaceKeys, errMsg := loadWorkspaceSecrets(context.Background(), "ws-prov")
+	if errMsg != "" {
+		t.Fatalf("loadWorkspaceSecrets returned error: %q", errMsg)
+	}
+
+	// Workspace override wins on value precedence.
+	if got := envVars["GITEA_TOKEN"]; got != "workspace-authored-gitea" {
+		t.Errorf("GITEA_TOKEN value = %q; want workspace-authored override", got)
+	}
+
+	// workspaceKeys: both workspace-sourced keys present.
+	if _, ok := workspaceKeys["GITEA_TOKEN"]; !ok {
+		t.Errorf("GITEA_TOKEN (re-authored via workspace_secrets) missing from workspaceKeys: %v", workspaceKeys)
+	}
+	if _, ok := workspaceKeys["WS_ONLY"]; !ok {
+		t.Errorf("WS_ONLY (workspace_secrets) missing from workspaceKeys: %v", workspaceKeys)
+	}
+	// OPERATOR_ONLY came only from global_secrets → NOT workspace-authored.
+	if _, ok := workspaceKeys["OPERATOR_ONLY"]; ok {
+		t.Errorf("OPERATOR_ONLY (global_secrets only) wrongly present in workspaceKeys: %v", workspaceKeys)
+	}
+
+	// globalKeys: GITEA_TOKEN's operator-bleed flag dropped by the override;
+	// OPERATOR_ONLY stays flagged.
+	if _, ok := globalKeys["GITEA_TOKEN"]; ok {
+		t.Errorf("GITEA_TOKEN should be removed from globalKeys after workspace override: %v", globalKeys)
+	}
+	if _, ok := globalKeys["OPERATOR_ONLY"]; !ok {
+		t.Errorf("OPERATOR_ONLY missing from globalKeys: %v", globalKeys)
+	}
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("sqlmock expectations not met: %v", err)
+	}
+}
+
 // ==================== issueAndInjectToken (issue #418) ====================

 // TestIssueAndInjectToken_HappyPath verifies that on a normal (re)provision the
@@ -5,61 +5,53 @@ import (
 	"strings"
 )

-// Dev-mode escape hatch — factored out of AdminAuth + WorkspaceAuth so a
-// future third caller (or a change to what "dev mode" means) touches one
-// place. Narrowing the exposed seam also makes it grep-able from security
-// reviews: every `isDevModeFailOpen()` call is an intentional fail-open.
+// Local-dev environment detection.
 //
-// Why the helper exists at all: on `go run ./cmd/server` the Canvas (at
-// localhost:3000) calls the platform (at localhost:8080) cross-port. Both
-// `isSameOriginCanvas` (Referer==Host) and the AdminAuth Tier-1 fail-open
-// (no tokens in DB) close the moment the user creates their first
-// workspace. Without this hatch the Canvas 401s on every /workspaces
-// enumeration and every /workspaces/:id/* read until the operator sets
-// `ADMIN_TOKEN` and rebuilds the Canvas bundle with a matching
-// `NEXT_PUBLIC_ADMIN_TOKEN`. That's too much friction for a local smoke
-// test — hence the hatch.
+// SECURITY (harden/no-fail-open-auth): this file used to export an auth
+// escape hatch — `isDevModeFailOpen()` — that let AdminAuth, WorkspaceAuth,
+// and the discovery handler serve admin/workspace-protected endpoints with
+// NO bearer token whenever `ADMIN_TOKEN` was unset and `MOLECULE_ENV` was a
+// dev value. The CTO directive is "nothing should be fail-open": auth is now
+// fail-CLOSED in every environment, dev included. The hatch is GONE.
 //
-// Why it's safe for SaaS: hosted tenants are provisioned with both
-// `ADMIN_TOKEN` (a random secret, checked by Tier-2 above) and
-// `MOLECULE_ENV=production`. Either one being set makes this helper
-// return false, so the fail-open branch is unreachable in production.
-// Real token minting goes through AdminAuth, so local development keeps a
-// narrow fail-open mode for browser/API smoke tests without an admin secret.
+// What remains here is a NON-security predicate, `isLocalDevEnv()`, that
+// reports ONLY whether `MOLECULE_ENV` names a local-dev environment. It does
+// NOT consult `ADMIN_TOKEN` and it does NOT influence authentication. It is
+// used for two convenience/defense-in-depth knobs that never grant access:
+//
+//   - ratelimit.go: relax the per-caller request bucket on a single-user
+//     local stack (a DoS knob, not a credential — relaxing it cannot expose
+//     any protected data).
+//   - cmd/server resolveBindHost(): default the HTTP listener to loopback
+//     (127.0.0.1) in local dev. This is strictly *safer* than binding all
+//     interfaces and is unrelated to whether a request is authenticated.
+//
+// Local dev now stays AUTHENTICATED, not open: scripts/dev-start.sh
+// provisions a deterministic `ADMIN_TOKEN` and hands the matching
+// `NEXT_PUBLIC_ADMIN_TOKEN` to the Canvas, so the browser sends a real
+// bearer. See scripts/dev-start.sh and canvas/src/lib/api.ts.

 // devModeEnvValues is the set of MOLECULE_ENV values that count as
-// "explicit dev mode". Production callers don't set any of these.
+// "explicit local dev". Production callers don't set any of these.
 // Case-insensitive compare via strings.ToLower below.
 var devModeEnvValues = map[string]struct{}{
 	"development": {},
 	"dev":         {},
 }

-// isDevModeFailOpen reports whether the AdminAuth / WorkspaceAuth
-// middleware should let a bearer-less request through despite live
-// workspace tokens existing in the DB.
-//
-// True only when BOTH:
-//   - `ADMIN_TOKEN` is empty (operator has not opted in to the #684
-//     closure), AND
-//   - `MOLECULE_ENV` is explicitly a dev value ("development" / "dev").
-//
-// Either condition failing returns false — that's the SaaS safety
-// guarantee. Tests: `devmode_test.go` covers every branch.
-func isDevModeFailOpen() bool {
-	if os.Getenv("ADMIN_TOKEN") != "" {
-		return false
-	}
+// isLocalDevEnv reports whether MOLECULE_ENV names a local-dev environment
+// ("development" / "dev"). It carries NO authentication semantics — callers
+// must never use it to bypass a credential check. It exists only for
+// dev-convenience / defense-in-depth knobs (rate-limit relaxation, loopback
+// bind default) that cannot expose protected data.
+func isLocalDevEnv() bool {
 	env := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_ENV")))
 	_, ok := devModeEnvValues[env]
 	return ok
 }

-// IsDevModeFailOpen exposes isDevModeFailOpen to packages outside the
-// middleware module (handlers, discovery, etc.) so they can apply the
-// same Tier-1b escape hatch their sibling AdminAuth / WorkspaceAuth
-// already do. Keep every call site audit-tagged so security review can
-// grep them.
-func IsDevModeFailOpen() bool {
-	return isDevModeFailOpen()
+// IsLocalDevEnv exposes isLocalDevEnv to packages outside the middleware
+// module (cmd/server bind-host default). NON-security: see isLocalDevEnv.
+func IsLocalDevEnv() bool {
+	return isLocalDevEnv()
 }
@@ -4,74 +4,66 @@ import (
 	"testing"
 )

-// Unit tests for the isDevModeFailOpen predicate. The AdminAuth and
-// WorkspaceAuth middleware tests exercise the same helper indirectly via
-// HTTP, but a direct predicate test locks the pure-logic behaviour:
-// future callers can add themselves to `devmode.go` with confidence.
+// Unit tests for the isLocalDevEnv predicate.
+//
+// (harden/no-fail-open-auth) This predicate replaced the old
+// isDevModeFailOpen() auth escape hatch. It carries NO authentication
+// semantics and does NOT consult ADMIN_TOKEN — it reports ONLY whether
+// MOLECULE_ENV names a local-dev environment. It gates non-security knobs
+// (rate-limit relaxation, loopback bind default). The fail-CLOSED auth
+// behaviour is enforced by no_fail_open_test.go.

-func TestIsDevModeFailOpen_DevModeNoAdminToken_True(t *testing.T) {
+func TestIsLocalDevEnv_Development_True(t *testing.T) {
 	t.Setenv("MOLECULE_ENV", "development")
-	t.Setenv("ADMIN_TOKEN", "")
-	if !isDevModeFailOpen() {
-		t.Error("expected dev mode + no admin token to return true")
+	if !isLocalDevEnv() {
+		t.Error("expected MOLECULE_ENV=development to be local dev")
 	}
 }

-func TestIsDevModeFailOpen_DevModeShortAlias_True(t *testing.T) {
-	// "dev" is a valid alias for "development".
+func TestIsLocalDevEnv_ShortAlias_True(t *testing.T) {
 	t.Setenv("MOLECULE_ENV", "dev")
-	t.Setenv("ADMIN_TOKEN", "")
-	if !isDevModeFailOpen() {
-		t.Error("expected MOLECULE_ENV=dev to be treated as dev mode")
+	if !isLocalDevEnv() {
+		t.Error("expected MOLECULE_ENV=dev to be treated as local dev")
 	}
 }

-func TestIsDevModeFailOpen_AdminTokenSet_False(t *testing.T) {
-	// Setting ADMIN_TOKEN is the operator's explicit opt-in to the #684
-	// closure. Dev mode must NOT silently override that signal.
+func TestIsLocalDevEnv_IgnoresAdminToken(t *testing.T) {
+	// Decoupled from ADMIN_TOKEN: dev now provisions one, but the bind /
+	// rate-limit knobs still treat the env as local dev. Crucially this
+	// predicate grants no access, so the coupling no longer matters.
 	t.Setenv("MOLECULE_ENV", "development")
-	t.Setenv("ADMIN_TOKEN", "operator-explicitly-set-this")
-	if isDevModeFailOpen() {
-		t.Error("explicit ADMIN_TOKEN must suppress the dev-mode hatch")
+	t.Setenv("ADMIN_TOKEN", "operator-set-this")
+	if !isLocalDevEnv() {
+		t.Error("ADMIN_TOKEN must not affect isLocalDevEnv (env-only predicate)")
 	}
 }

-func TestIsDevModeFailOpen_Production_False(t *testing.T) {
-	// The SaaS-safety guarantee: production tenants always have
-	// MOLECULE_ENV=production, so the hatch is unreachable even if a
-	// misconfigured deployment also leaves ADMIN_TOKEN unset.
+func TestIsLocalDevEnv_Production_False(t *testing.T) {
 	t.Setenv("MOLECULE_ENV", "production")
-	t.Setenv("ADMIN_TOKEN", "")
-	if isDevModeFailOpen() {
-		t.Error("production must never hit the dev-mode fail-open branch")
+	if isLocalDevEnv() {
+		t.Error("production must not count as local dev")
 	}
 }

-func TestIsDevModeFailOpen_CaseInsensitive(t *testing.T) {
-	// Operators shouldn't have to remember exact casing for a dev-only
-	// convenience. "Development", "DEV", "  dev  " all count.
+func TestIsLocalDevEnv_CaseInsensitive(t *testing.T) {
 	cases := []string{"Development", "DEVELOPMENT", "Dev", "DEV", "  dev  "}
 	for _, env := range cases {
 		t.Run(env, func(t *testing.T) {
 			t.Setenv("MOLECULE_ENV", env)
-			t.Setenv("ADMIN_TOKEN", "")
-			if !isDevModeFailOpen() {
-				t.Errorf("MOLECULE_ENV=%q should count as dev mode", env)
+			if !isLocalDevEnv() {
+				t.Errorf("MOLECULE_ENV=%q should count as local dev", env)
 			}
 		})
 	}
 }

-func TestIsDevModeFailOpen_UnknownEnv_False(t *testing.T) {
-	// Arbitrary / unset MOLECULE_ENV values are NOT treated as dev mode.
-	// Keeps the fail-open branch narrow — no silent opt-in from a typo.
+func TestIsLocalDevEnv_UnknownEnv_False(t *testing.T) {
 	cases := []string{"", "staging", "local", "preview", "test", "devel"}
 	for _, env := range cases {
 		t.Run(env, func(t *testing.T) {
 			t.Setenv("MOLECULE_ENV", env)
-			t.Setenv("ADMIN_TOKEN", "")
-			if isDevModeFailOpen() {
-				t.Errorf("MOLECULE_ENV=%q must not enable fail-open", env)
+			if isLocalDevEnv() {
+				t.Errorf("MOLECULE_ENV=%q must not count as local dev", env)
 			}
 		})
 	}
@@ -0,0 +1,245 @@
+package middleware
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/gin-gonic/gin"
+)
+
+// no_fail_open_test.go is the regression gate for the CTO directive
+// "nothing should be fail-open" (branch harden/no-fail-open-auth).
+//
+// It asserts that AdminAuth and WorkspaceAuth fail CLOSED (401) under the
+// EXACT conditions that used to trigger the removed dev-mode fail-open hatch:
+//   - ADMIN_TOKEN unset, AND
+//   - MOLECULE_ENV is a dev value ("development" / "dev"), AND
+//   - any HasAnyLiveTokenGlobal state (0 = fresh install, 1 = post-workspace).
+//
+// To prove this is RED against the old behaviour: temporarily restore the
+// `if isDevModeFailOpen() { c.Next(); return }` short-circuit in
+// wsauth_middleware.go (and the Tier-1 `if adminSecret == "" { c.Next() }`
+// branch) — every sub-case below flips from 401 to 200 and fails. After the
+// hardening, all sub-cases are 401.
+
+// failOpenConditions enumerates the (MOLECULE_ENV, hasLiveTokens) combinations
+// that the removed hatch keyed on. ADMIN_TOKEN is always unset here — that was
+// a precondition of the old fail-open.
+var failOpenConditions = []struct {
+	name      string
+	molEnv    string
+	liveCount int
+}{
+	{"dev_alias_fresh_install", "dev", 0},
+	{"dev_alias_post_workspace", "dev", 1},
+	{"development_fresh_install", "development", 0},
+	{"development_post_workspace", "development", 1},
+}
+
+func TestAdminAuth_NoFailOpen_UnderOldHatchConditions(t *testing.T) {
+	for _, tc := range failOpenConditions {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Setenv("ADMIN_TOKEN", "")
+			t.Setenv("MOLECULE_ENV", tc.molEnv)
+			// Ensure no CP-session path can accidentally pass.
+			t.Setenv("CP_UPSTREAM_URL", "")
+
+			mockDB, mock, err := sqlmock.New()
+			if err != nil {
+				t.Fatalf("sqlmock.New: %v", err)
+			}
+			defer mockDB.Close()
+
+			// AdminAuth always probes HasAnyLiveTokenGlobal (for the 503-on-
+			// outage semantics), so it must be expected for both counts.
+			mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
+				WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(tc.liveCount))
+
+			r := gin.New()
+			r.GET("/admin/secrets", AdminAuth(mockDB), func(c *gin.Context) {
+				c.JSON(http.StatusOK, gin.H{"ok": true})
+			})
+
+			w := httptest.NewRecorder()
+			req, _ := http.NewRequest(http.MethodGet, "/admin/secrets", nil)
+			r.ServeHTTP(w, req)
+
+			if w.Code != http.StatusUnauthorized {
+				t.Errorf("AdminAuth must fail CLOSED under old hatch conditions "+
+					"(MOLECULE_ENV=%q, ADMIN_TOKEN unset, liveTokens=%d): expected 401, got %d: %s",
+					tc.molEnv, tc.liveCount, w.Code, w.Body.String())
+			}
+			if err := mock.ExpectationsWereMet(); err != nil {
+				t.Errorf("unmet sqlmock expectations: %v", err)
+			}
+		})
+	}
+}
+
+func TestWorkspaceAuth_NoFailOpen_UnderOldHatchConditions(t *testing.T) {
+	for _, tc := range failOpenConditions {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Setenv("ADMIN_TOKEN", "")
+			t.Setenv("MOLECULE_ENV", tc.molEnv)
+			t.Setenv("CP_UPSTREAM_URL", "")
+
+			// WorkspaceAuth 401s before any DB lookup when there is no
+			// bearer / cookie, so no queries are expected regardless of
+			// the nominal live-token count.
+			mockDB, _, err := sqlmock.New()
+			if err != nil {
+				t.Fatalf("sqlmock.New: %v", err)
+			}
+			defer mockDB.Close()
+
+			r := gin.New()
+			r.GET("/workspaces/:id/activity", WorkspaceAuth(mockDB), func(c *gin.Context) {
+				c.JSON(http.StatusOK, gin.H{"ok": true})
+			})
+
+			w := httptest.NewRecorder()
+			req, _ := http.NewRequest(http.MethodGet,
+				"/workspaces/00000000-0000-0000-0000-000000000000/activity", nil)
+			r.ServeHTTP(w, req)
+
+			if w.Code != http.StatusUnauthorized {
+				t.Errorf("WorkspaceAuth must fail CLOSED under old hatch conditions "+
+					"(MOLECULE_ENV=%q, ADMIN_TOKEN unset): expected 401, got %d: %s",
+					tc.molEnv, w.Code, w.Body.String())
+			}
+		})
+	}
+}
+
+// TestCanvasOrBearer_NoFailOpen_UnderOldHatchConditions is the regression gate
+// for the two fail-open branches removed from CanvasOrBearer
+// (harden/no-fail-open-auth, "nothing fail-open" pass 2):
+//
+//	(a) lazy-bootstrap pass: `if !hasLive { c.Next(); return }` — a zero-token
+//	    install used to pass EVERYTHING through. Now a bearer-less request on a
+//	    fresh install (HasAnyLiveTokenGlobal → 0) fails CLOSED with 401.
+//	(b) fail-open-on-DB-error: `if err != nil { log; c.Next(); return }` — a
+//	    HasAnyLiveTokenGlobal error used to ALLOW. Now it fails CLOSED with 503.
+//
+// Watch-it-fail: restore either short-circuit in CanvasOrBearer and the
+// matching sub-case flips (401→200 / 503→200) and fails.
+func TestCanvasOrBearer_NoFailOpen_UnderOldHatchConditions(t *testing.T) {
+	// (a) Fresh install (0 live tokens), no bearer, no ADMIN_TOKEN → 401.
+	t.Run("zero_token_install_no_bearer_fails_closed_401", func(t *testing.T) {
+		t.Setenv("ADMIN_TOKEN", "")
+		t.Setenv("CORS_ORIGINS", "")
+
+		mockDB, mock, err := sqlmock.New()
+		if err != nil {
+			t.Fatalf("sqlmock.New: %v", err)
+		}
+		defer mockDB.Close()
+
+		mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
+			WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+
+		handlerCalled := false
+		r := gin.New()
+		r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
+			handlerCalled = true
+			c.JSON(http.StatusOK, gin.H{"ok": true})
+		})
+
+		w := httptest.NewRecorder()
+		req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
+		r.ServeHTTP(w, req)
+
+		if w.Code != http.StatusUnauthorized {
+			t.Errorf("CanvasOrBearer lazy-bootstrap fail-open removed: zero-token install must 401, got %d: %s",
+				w.Code, w.Body.String())
+		}
+		if handlerCalled {
+			t.Error("handler reached on a fresh-install bearer-less request — lazy-bootstrap fail-open not removed")
+		}
+	})
+
+	// (b) Auth datastore error → 503 (NOT allow).
+	t.Run("db_error_fails_closed_503", func(t *testing.T) {
+		mockDB, mock, err := sqlmock.New()
+		if err != nil {
+			t.Fatalf("sqlmock.New: %v", err)
+		}
+		defer mockDB.Close()
+
+		mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
+			WillReturnError(http.ErrAbortHandler) // any non-nil error suffices
+
+		handlerCalled := false
+		r := gin.New()
+		r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
+			handlerCalled = true
+			c.JSON(http.StatusOK, gin.H{"ok": true})
+		})
+
+		w := httptest.NewRecorder()
+		req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
+		r.ServeHTTP(w, req)
+
+		if w.Code != http.StatusServiceUnavailable {
+			t.Errorf("CanvasOrBearer DB-error fail-open removed: must 503, got %d: %s", w.Code, w.Body.String())
+		}
+		if handlerCalled {
+			t.Error("handler reached on a datastore-error request — DB-error fail-open not removed")
+		}
+	})
+}
+
+// TestNoFailOpenAuthHelperReexists is a source-guard: it asserts that no
+// fail-open auth helper (the removed isDevModeFailOpen / IsDevModeFailOpen)
+// has crept back into the middleware package as real code. The replacement
+// predicate is the NON-security isLocalDevEnv (bind / rate-limit only);
+// re-introducing the old fail-open identifier as a declaration or call is a
+// regression of the CTO directive.
+//
+// It matches the *invocation/declaration* form `isDevModeFailOpen(` (which
+// only appears in live code) and deliberately ignores prose mentions in
+// `//` comments, so the historical references kept in doc comments don't
+// trip the guard.
+func TestNoFailOpenAuthHelperReexists(t *testing.T) {
+	forbidden := []string{"isDevModeFailOpen(", "IsDevModeFailOpen("}
+
+	entries, err := os.ReadDir(".")
+	if err != nil {
+		t.Fatalf("ReadDir: %v", err)
+	}
+	for _, e := range entries {
+		name := e.Name()
+		if e.IsDir() || !strings.HasSuffix(name, ".go") {
+			continue
+		}
+		// Skip this guard file itself (it names the forbidden tokens on
+		// purpose, including inside a comment).
+		if name == "no_fail_open_test.go" {
+			continue
+		}
+		data, err := os.ReadFile(filepath.Clean(name))
+		if err != nil {
+			t.Fatalf("ReadFile %s: %v", name, err)
+		}
+		for i, line := range strings.Split(string(data), "\n") {
+			// Ignore single-line comments — historical mentions live there.
+			code := line
+			if idx := strings.Index(code, "//"); idx >= 0 {
+				code = code[:idx]
+			}
+			for _, f := range forbidden {
+				if strings.Contains(code, f) {
+					t.Errorf("%s:%d uses forbidden fail-open auth helper %q — "+
+						"the dev-mode fail-open hatch must stay removed (harden/no-fail-open-auth). "+
+						"Use isLocalDevEnv (NON-security) for dev-only knobs instead.",
+						name, i+1, strings.TrimSuffix(f, "("))
+				}
+			}
+		}
+	}
+}
@@ -102,15 +102,16 @@ func (rl *RateLimiter) keyFor(c *gin.Context) string {
 // the priority list and rationale.
 func (rl *RateLimiter) Middleware() gin.HandlerFunc {
 	return func(c *gin.Context) {
-		// Tier-1b dev-mode hatch — same gate as AdminAuth / WorkspaceAuth /
-		// discovery. On a local single-user Docker setup the 600-req/min
-		// bucket fills fast: a 15-workspace canvas + activity polling +
-		// approvals polling + A2A overlay + initial hydration all land in
-		// one bucket (whichever keyFor returns — typically the dev user's
-		// IP or shared admin token), so a minute of active use can trip
-		// 429 and blank the page. Gated by MOLECULE_ENV=development +
-		// empty ADMIN_TOKEN so SaaS production keeps the bucket.
-		if isDevModeFailOpen() {
+		// Local-dev rate-limit relaxation (NON-security; see devmode.go).
+		// On a local single-user stack the 600-req/min bucket fills fast:
+		// a 15-workspace canvas + activity polling + approvals polling +
+		// A2A overlay + initial hydration all land in one bucket, so a
+		// minute of active use can trip 429 and blank the page. This only
+		// relaxes a DoS knob — it grants no access and is unrelated to
+		// authentication (auth is fail-closed in every env). Gated solely
+		// by MOLECULE_ENV=dev/development so SaaS production keeps the
+		// bucket. Decoupled from ADMIN_TOKEN (dev now provisions one).
+		if isLocalDevEnv() {
 			c.Header("X-RateLimit-Limit", "unlimited")
 			c.Next()
 			return
@@ -120,12 +120,12 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc {
 				return
 			}
 		}
-		// Local-dev escape hatch — see devmode.go. Unreachable on SaaS
-		// (hosted tenants always have ADMIN_TOKEN + MOLECULE_ENV=production).
-		if isDevModeFailOpen() {
-			c.Next()
-			return
-		}
+		// No bearer, no verified CP session: fail CLOSED in EVERY
+		// environment (harden/no-fail-open-auth). The old local-dev
+		// escape hatch that let bearer-less requests through when
+		// ADMIN_TOKEN was unset + MOLECULE_ENV=dev has been removed —
+		// local dev now authenticates with a provisioned ADMIN_TOKEN
+		// (see scripts/dev-start.sh).
 		c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "missing workspace auth token"})
 	}
 }
@@ -133,11 +133,18 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc {
 // AdminAuth returns a Gin middleware for global/admin routes (e.g.
 // /settings/secrets, /admin/secrets) that have no per-workspace scope.
 //
+// FAIL-CLOSED in every environment (harden/no-fail-open-auth): there is no
+// bearer-less path through this middleware. A request reaches the handler
+// ONLY by presenting a valid credential (verified CP session cookie, org
+// token, ADMIN_TOKEN, or — deprecated — a live workspace token). The former
+// "Tier-1 lazy-bootstrap fail-open" (no live tokens + no ADMIN_TOKEN ⇒ pass)
+// has been removed: it let an attacker pre-empt the first user by POSTing
+// /org/import before any token was minted (C4 SaaS-launch finding). A fresh
+// install must set ADMIN_TOKEN to reach admin routes.
+//
 // # Credential tier (evaluated in order)
 //
-//  1. Lazy-bootstrap fail-open: if no live workspace token exists anywhere on
-//     the platform (fresh install / pre-Phase-30 upgrade), every request passes
-//     through so existing deployments keep working.
+//  1. Verified CP session cookie (SaaS canvas) — upstream-confirmed.
 //
 //  2. ADMIN_TOKEN env var (recommended, closes #684): when set, the bearer
 //     MUST equal this value exactly (constant-time comparison). Workspace
@@ -163,33 +170,17 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
 		ctx := c.Request.Context()
 		adminSecret := os.Getenv("ADMIN_TOKEN")

-		hasLive, err := wsauth.HasAnyLiveTokenGlobal(ctx, database)
-		if err != nil {
+		// (harden/no-fail-open-auth) Both former fail-open branches have
+		// been REMOVED here:
+		//   - Tier-1 lazy-bootstrap (no live tokens + no ADMIN_TOKEN ⇒ pass)
+		//   - Tier-1b local-dev escape hatch (isDevModeFailOpen ⇒ pass)
+		// Admin auth is now fail-CLOSED in every environment. We still probe
+		// HasAnyLiveTokenGlobal so a datastore outage returns a structured
+		// 503 (not a silent pass), but its result no longer opens any path.
+		if _, err := wsauth.HasAnyLiveTokenGlobal(ctx, database); err != nil {
 			abortAuthLookupError(c, "AdminAuth: HasAnyLiveTokenGlobal", err)
 			return
 		}
-		if !hasLive {
-			// Tier 1: fail-open is ONLY safe when ADMIN_TOKEN is unset
-			// (self-hosted dev, pre-Phase-30 upgrade). Hosted SaaS always
-			// sets ADMIN_TOKEN at provision time, and C4 (SaaS-launch
-			// blocker) showed that without this guard an attacker can
-			// pre-empt the first user by POSTing /org/import before any
-			// token gets minted. When ADMIN_TOKEN is set we fall through
-			// into the same bearer-check path Tier-2 uses below.
-			if adminSecret == "" {
-				c.Next()
-				return
-			}
-		}
-
-		// Tier 1b: Local-dev escape hatch — see devmode.go. Lets the
-		// Canvas dashboard keep working after the first workspace token
-		// lands in the DB on `go run ./cmd/server`. Unreachable on SaaS
-		// (hosted tenants always have ADMIN_TOKEN + MOLECULE_ENV=production).
-		if isDevModeFailOpen() {
-			c.Next()
-			return
-		}

 		// SaaS-canvas path: when the request carries a WorkOS session
 		// cookie AND the CP confirms it's valid, accept without a
@@ -281,34 +272,46 @@ func cpSessionActor(cookieHeader string) string {
 // Accepts either:
 //
 //  1. A valid bearer token (same contract as AdminAuth) — covers molecli,
-//     agent-to-platform calls, and anyone using the API directly.
-//  2. A browser Origin header that matches CORS_ORIGINS (canvas itself).
-//     This is NOT a strict auth boundary — curl can forge Origin — but for
-//     cosmetic-only routes the trade-off is acceptable. Non-cosmetic routes
-//     MUST NOT use this middleware (see #194 review on why it would re-open
-//     #164 CRITICAL if applied to /bundles/import).
+//     agent-to-platform calls, the browser canvas (which now sends
+//     Authorization: Bearer $NEXT_PUBLIC_ADMIN_TOKEN on every platform
+//     call — see canvas/src/lib/api.ts platformAuthHeaders), and anyone
+//     using the API directly.
+//  2. A same-origin canvas request (Referer/Host match), but ONLY when the
+//     combined-tenant canvas proxy is active (CANVAS_PROXY_URL set). This is
+//     a real same-origin check the browser cannot forge cross-origin (see
+//     isSameOriginCanvas / IsVerifiedCanvasSession, #623/#194) — NOT the
+//     trivially-forgeable cross-origin Origin header. The forgeable
+//     CORS_ORIGINS Origin-match path was REMOVED under the CTO
+//     "nothing fail-open" directive (a no-bearer request passing purely on a
+//     spoofable Origin is effectively open even for a cosmetic route, and is
+//     no longer needed now that the canvas always sends a bearer).
 //
-// Lazy-bootstrap fail-open preserved: zero-token installs pass everything
-// through so fresh self-hosted / dev sessions aren't bricked.
+// Non-cosmetic routes MUST NOT use this middleware (see #194 review on why it
+// would re-open #164 CRITICAL if applied to /bundles/import).
+//
+// (harden/no-fail-open-auth) Two former fail-open branches are REMOVED:
+//   - DB-error on HasAnyLiveTokenGlobal used to `c.Next()` (allow); it now
+//     fails CLOSED with 503 (availability tradeoff that grants NO access).
+//   - The lazy-bootstrap pass (`!hasLive ⇒ c.Next()`) used to let a
+//     zero-token install through EVERYTHING; it is gone. Bootstrap is now via
+//     ADMIN_TOKEN (provisioned by scripts/dev-start.sh for local dev,
+//     operator/SaaS-set in production) — local mimics production.
 func CanvasOrBearer(database *sql.DB) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		ctx := c.Request.Context()

-		hasLive, err := wsauth.HasAnyLiveTokenGlobal(ctx, database)
-		if err != nil {
-			log.Printf("wsauth: CanvasOrBearer HasAnyLiveTokenGlobal failed: %v — allowing request", err)
-			c.Next()
-			return
-		}
-		if !hasLive {
-			c.Next()
+		// Probe global token state for the (no-bearer) same-origin path
+		// below. Fail CLOSED on a datastore error — an availability tradeoff
+		// that does NOT grant access (was: log + c.Next() fail-open).
+		if _, err := wsauth.HasAnyLiveTokenGlobal(ctx, database); err != nil {
+			abortAuthLookupError(c, "CanvasOrBearer: HasAnyLiveTokenGlobal", err)
 			return
 		}

 		// Path 1: bearer present → bearer MUST validate. Do not fall through
-		// to Origin on an invalid bearer — an attacker with a revoked /
-		// expired token + a matching Origin would otherwise bypass auth.
-		// Empty bearer → skip to Origin path (canvas never sends one).
+		// to the same-origin path on an invalid bearer — an attacker with a
+		// revoked / expired token would otherwise bypass auth.
+		// Empty bearer → fall to the same-origin canvas path.
 		if tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")); tok != "" {
 			// Admin token accepted for canvas dashboard
 			adminSecret := os.Getenv("ADMIN_TOKEN")
@@ -324,13 +327,10 @@ func CanvasOrBearer(database *sql.DB) gin.HandlerFunc {
 			return
 		}

-		// Path 2: canvas origin match (cross-origin canvas).
-		if canvasOriginAllowed(c.GetHeader("Origin")) {
-			c.Next()
-			return
-		}
-
-		// Path 3: same-origin canvas (tenant image).
+		// Path 2: same-origin canvas (combined-tenant image). Gated behind
+		// canvasProxyActive (CANVAS_PROXY_URL) and a non-forgeable
+		// Referer/Host same-origin check — NOT the spoofable cross-origin
+		// Origin header (that path was removed, see doc comment above).
 		if isSameOriginCanvas(c) {
 			c.Next()
 			return
@@ -340,30 +340,14 @@ func CanvasOrBearer(database *sql.DB) gin.HandlerFunc {
 	}
 }

-// canvasOriginAllowed returns true if origin matches any entry in the
-// CORS_ORIGINS env var (comma-separated) or the localhost defaults.
-// Exact-match only; no prefix or wildcard logic — that's handled by the
-// real CORS middleware upstream. The intent here is "did this request come
-// from the canvas page the user is already logged into?" — a binary check.
-func canvasOriginAllowed(origin string) bool {
-	if origin == "" {
-		return false
-	}
-	allowed := []string{"http://localhost:3000", "http://localhost:3001"}
-	if v := os.Getenv("CORS_ORIGINS"); v != "" {
-		for _, o := range strings.Split(v, ",") {
-			if o = strings.TrimSpace(o); o != "" {
-				allowed = append(allowed, o)
-			}
-		}
-	}
-	for _, a := range allowed {
-		if a == origin {
-			return true
-		}
-	}
-	return false
-}
+// (harden/no-fail-open-auth) canvasOriginAllowed was REMOVED. It matched a
+// request's (trivially forgeable, cross-origin) Origin header against
+// CORS_ORIGINS and was the basis of CanvasOrBearer's no-bearer Origin-match
+// pass — effectively open to any curl that sets a matching Origin. Under the
+// CTO "nothing fail-open" directive that path is gone; the canvas now always
+// sends a bearer (NEXT_PUBLIC_ADMIN_TOKEN), so nothing legitimate relied on it.
+// The CORS *response-header* allowlist is handled by the real CORS middleware
+// upstream, unaffected by this removal.

 // isSameOriginCanvas returns true when the request appears to come from the
 // canvas UI served by the same Go process (tenant image). In this topology,
@@ -143,11 +143,15 @@ func TestCanvasOrBearer_AdminTokenEnv_Passes(t *testing.T) {
 	}
 }

-// TestCanvasOrBearer_DBError_FailOpen pins the documented behavior on a
-// HasAnyLiveTokenGlobal failure. The middleware logs and falls open so a
-// flaky DB doesn't lock canvas users out of cosmetic routes. Hardcoded in
-// the comment block; this is a reminder if anyone changes that semantic.
-func TestCanvasOrBearer_DBError_FailOpen(t *testing.T) {
+// TestCanvasOrBearer_DBError_FailsClosed pins the removal of the
+// fail-open-on-DB-error branch (harden/no-fail-open-auth). A
+// HasAnyLiveTokenGlobal failure used to log + c.Next() (allow); it now fails
+// CLOSED with 503 — an availability tradeoff that grants NO access. The
+// handler must NOT be reached.
+//
+// Watch-it-fail: restore `if err != nil { log; c.Next(); return }` in
+// CanvasOrBearer → this flips 503→200 and fails.
+func TestCanvasOrBearer_DBError_FailsClosed(t *testing.T) {
 	mockDB, mock, err := sqlmock.New()
 	if err != nil {
 		t.Fatalf("sqlmock: %v", err)
@@ -156,8 +160,10 @@ func TestCanvasOrBearer_DBError_FailOpen(t *testing.T) {
 	mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
 		WillReturnError(http.ErrAbortHandler) // any non-nil error suffices

+	handlerCalled := false
 	r := gin.New()
 	r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
+		handlerCalled = true
 		c.JSON(http.StatusOK, gin.H{"ok": true})
 	})

@@ -165,8 +171,11 @@ func TestCanvasOrBearer_DBError_FailOpen(t *testing.T) {
 	req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
 	r.ServeHTTP(w, req)

-	if w.Code != http.StatusOK {
-		t.Errorf("DB error fail-open: got %d, want 200 (%s)", w.Code, w.Body.String())
+	if w.Code != http.StatusServiceUnavailable {
+		t.Errorf("DB error must fail CLOSED: got %d, want 503 (%s)", w.Code, w.Body.String())
+	}
+	if handlerCalled {
+		t.Error("handler reached on a datastore-error request — DB-error fail-open not removed")
 	}
 }

@@ -339,15 +339,24 @@ func TestWorkspaceAuth_WrongWorkspace_Returns401(t *testing.T) {
 // TestAdminAuth_FailOpen_NoTokensGlobally — C10/C11: on a fresh install (no
 // live tokens anywhere) the middleware must let the request through so existing
 // deployments keep working during the Phase-30 rollout.
-func TestAdminAuth_FailOpen_NoTokensGlobally(t *testing.T) {
+// TestAdminAuth_FreshInstallNoTokens_FailsClosed pins the post-hardening
+// contract (harden/no-fail-open-auth): on a fresh install with NO live
+// tokens anywhere AND no ADMIN_TOKEN, a bearer-less admin request now 401s.
+// The former Tier-1 "lazy-bootstrap fail-open" (no tokens ⇒ 200) is GONE —
+// it let an attacker pre-empt the first user via /org/import (C4). A fresh
+// install must provision ADMIN_TOKEN to reach admin routes.
+func TestAdminAuth_FreshInstallNoTokens_FailsClosed(t *testing.T) {
 	t.Setenv("ADMIN_TOKEN", "")
+	t.Setenv("MOLECULE_ENV", "")
 	mockDB, mock, err := sqlmock.New()
 	if err != nil {
 		t.Fatalf("sqlmock.New: %v", err)
 	}
 	defer mockDB.Close()

-	// HasAnyLiveTokenGlobal returns 0 — fresh install.
+	// HasAnyLiveTokenGlobal returns 0 — fresh install. We still probe it
+	// (so a DB outage yields a structured 503), but the result no longer
+	// opens any path.
 	mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
 		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))

@@ -360,8 +369,8 @@ func TestAdminAuth_FailOpen_NoTokensGlobally(t *testing.T) {
 	req, _ := http.NewRequest(http.MethodGet, "/admin/secrets", nil)
 	r.ServeHTTP(w, req)

-	if w.Code != http.StatusOK {
-		t.Errorf("C10 fail-open (no global tokens): expected 200, got %d: %s", w.Code, w.Body.String())
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("fresh-install no-token fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
 	}
 	if err := mock.ExpectationsWereMet(); err != nil {
 		t.Errorf("unmet sqlmock expectations: %v", err)
@@ -831,18 +840,23 @@ func TestAdminAuth_Issue180_ApprovalsListing_NoBearer_Returns401(t *testing.T) {
 	}
 }

-// TestAdminAuth_Issue180_ApprovalsListing_FailOpen_NoTokens documents the
-// fail-open contract: on a fresh install (no tokens anywhere), the middleware
-// must not block the canvas from polling /approvals/pending.
-func TestAdminAuth_Issue180_ApprovalsListing_FailOpen_NoTokens(t *testing.T) {
+// TestAdminAuth_Issue180_ApprovalsListing_FreshInstall_FailsClosed pins the
+// post-hardening contract (harden/no-fail-open-auth): on a fresh install (no
+// tokens anywhere, no ADMIN_TOKEN), the canvas polling /approvals/pending with
+// no bearer now gets 401. The former #180 fail-open (200 on no-tokens) is gone
+// — local dev now provisions an ADMIN_TOKEN and the canvas authenticates with
+// it (scripts/dev-start.sh).
+func TestAdminAuth_Issue180_ApprovalsListing_FreshInstall_FailsClosed(t *testing.T) {
 	t.Setenv("ADMIN_TOKEN", "")
+	t.Setenv("MOLECULE_ENV", "")
 	mockDB, mock, err := sqlmock.New()
 	if err != nil {
 		t.Fatalf("sqlmock.New: %v", err)
 	}
 	defer mockDB.Close()

-	// HasAnyLiveTokenGlobal returns 0 — fresh install, no tokens yet.
+	// HasAnyLiveTokenGlobal returns 0 — fresh install, no tokens yet. Probed
+	// for the 503-on-outage semantics, but it opens no path now.
 	mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
 		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))

@@ -855,24 +869,21 @@ func TestAdminAuth_Issue180_ApprovalsListing_FailOpen_NoTokens(t *testing.T) {
 	req, _ := http.NewRequest(http.MethodGet, "/approvals/pending", nil)
 	r.ServeHTTP(w, req)

-	if w.Code != http.StatusOK {
-		t.Errorf("#180 fail-open (no tokens): expected 200, got %d: %s", w.Code, w.Body.String())
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("#180 fresh-install fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
 	}
 	if err := mock.ExpectationsWereMet(); err != nil {
 		t.Errorf("unmet sqlmock expectations: %v", err)
 	}
 }

-// TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen documents the
-// local-dev escape hatch on WorkspaceAuth. On `go run ./cmd/server` +
-// `npm run dev`, Canvas at localhost:3000 calls the platform at
-// localhost:8080 cross-port, so isSameOriginCanvas's Host==Referer
-// check fails. Without this hatch the Canvas can't show per-workspace
-// activity/delegations.
-//
-// SaaS never fires this branch because tenant provisioning sets both
-// MOLECULE_ENV=production and ADMIN_TOKEN.
-func TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen(t *testing.T) {
+// TestWorkspaceAuth_DevMode_NoBearer_FailsClosed pins the post-hardening
+// contract (harden/no-fail-open-auth): the former local-dev escape hatch on
+// WorkspaceAuth — which let a bearer-less request through when
+// MOLECULE_ENV=dev + ADMIN_TOKEN unset — is GONE. Under exactly those
+// conditions the request now 401s. Local dev authenticates with a
+// provisioned ADMIN_TOKEN handed to the Canvas (scripts/dev-start.sh).
+func TestWorkspaceAuth_DevMode_NoBearer_FailsClosed(t *testing.T) {
 	t.Setenv("MOLECULE_ENV", "development")
 	t.Setenv("ADMIN_TOKEN", "")

@@ -882,7 +893,9 @@ func TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen(t *testing.T) {
 	}
 	defer mockDB.Close()

-	// No DB queries expected — the hatch short-circuits before any lookup.
+	// No DB queries expected — WorkspaceAuth 401s before any lookup when
+	// there is no bearer / cookie. The hatch that used to short-circuit
+	// here no longer exists.

 	r := gin.New()
 	r.GET("/workspaces/:id/activity", WorkspaceAuth(mockDB), func(c *gin.Context) {
@@ -894,8 +907,8 @@ func TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen(t *testing.T) {
 		"/workspaces/00000000-0000-0000-0000-000000000000/activity", nil)
 	r.ServeHTTP(w, req)

-	if w.Code != http.StatusOK {
-		t.Errorf("WorkspaceAuth dev-mode hatch: expected 200, got %d: %s", w.Code, w.Body.String())
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("WorkspaceAuth dev-mode fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
 	}
 }

@@ -957,15 +970,14 @@ func TestWorkspaceAuth_DevModeEscapeHatch_IgnoredWhenAdminTokenSet(t *testing.T)
 	}
 }

-// TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens documents the
-// Tier-1b dev-mode escape hatch. When the platform runs with MOLECULE_ENV=development
-// and ADMIN_TOKEN is unset, AdminAuth must stay fail-open even after workspace
-// tokens land in the DB. This keeps the Canvas dashboard usable in local dev
-// after the first workspace is created (PR #1871 — quickstart bugless).
-//
-// SaaS never hits this path because tenant provisioning sets both
-// ADMIN_TOKEN and MOLECULE_ENV=production.
-func TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens(t *testing.T) {
+// TestAdminAuth_DevMode_NoBearer_FailsClosed pins the post-hardening contract
+// (harden/no-fail-open-auth): the former Tier-1b dev-mode escape hatch — which
+// let AdminAuth pass a bearer-less request when MOLECULE_ENV=dev + ADMIN_TOKEN
+// unset, even with live tokens in the DB — is GONE. Under exactly those
+// conditions the request now 401s. Local dev authenticates with a provisioned
+// ADMIN_TOKEN handed to the Canvas as NEXT_PUBLIC_ADMIN_TOKEN
+// (scripts/dev-start.sh).
+func TestAdminAuth_DevMode_NoBearer_FailsClosed(t *testing.T) {
 	t.Setenv("MOLECULE_ENV", "development")
 	t.Setenv("ADMIN_TOKEN", "")

@@ -976,7 +988,7 @@ func TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens(t *testing.T) {
 	defer mockDB.Close()

 	// HasAnyLiveTokenGlobal returns 1 — tokens exist (post first-workspace).
-	// The Tier-1 fail-open branch WOULD close here. Tier-1b must still open.
+	// Probed for the 503-on-outage semantics, but it opens no path now.
 	mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
 		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))

@@ -989,8 +1001,8 @@ func TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens(t *testing.T) {
 	req, _ := http.NewRequest(http.MethodGet, "/workspaces", nil)
 	r.ServeHTTP(w, req)

-	if w.Code != http.StatusOK {
-		t.Errorf("dev-mode escape hatch: expected 200, got %d: %s", w.Code, w.Body.String())
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("dev-mode fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
 	}
 	if err := mock.ExpectationsWereMet(); err != nil {
 		t.Errorf("unmet sqlmock expectations: %v", err)
@@ -1104,7 +1116,16 @@ func TestAdminAuth_Issue120_PatchWorkspace_NoBearer_Returns401(t *testing.T) {
 // Accepts bearer or a matching Origin header. MUST NOT be used anywhere a
 // forged request would leak data or create resources.

-func TestCanvasOrBearer_NoTokens_FailOpen(t *testing.T) {
+// TestCanvasOrBearer_NoTokens_FailsClosed pins the removal of the
+// lazy-bootstrap fail-open (harden/no-fail-open-auth): a zero-token install
+// must NOT pass everything through. A bearer-less request on a fresh install
+// (HasAnyLiveTokenGlobal → 0) now 401s. Bootstrap is via ADMIN_TOKEN
+// (scripts/dev-start.sh provisions it for local dev; operator/SaaS sets it in
+// production) — not a zero-config fail-open.
+//
+// Watch-it-fail: restore `if !hasLive { c.Next(); return }` in CanvasOrBearer
+// → this flips 401→200 and fails.
+func TestCanvasOrBearer_NoTokens_FailsClosed(t *testing.T) {
 	mockDB, mock, err := sqlmock.New()
 	if err != nil {
 		t.Fatalf("sqlmock: %v", err)
@@ -1114,8 +1135,10 @@ func TestCanvasOrBearer_NoTokens_FailOpen(t *testing.T) {
 	mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
 		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))

+	handlerCalled := false
 	r := gin.New()
 	r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
+		handlerCalled = true
 		c.JSON(http.StatusOK, gin.H{"ok": true})
 	})

@@ -1123,8 +1146,11 @@ func TestCanvasOrBearer_NoTokens_FailOpen(t *testing.T) {
 	req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
 	r.ServeHTTP(w, req)

-	if w.Code != http.StatusOK {
-		t.Errorf("bootstrap fail-open: got %d, want 200 (%s)", w.Code, w.Body.String())
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("zero-token install must fail CLOSED (lazy-bootstrap fail-open removed): got %d, want 401 (%s)", w.Code, w.Body.String())
+	}
+	if handlerCalled {
+		t.Error("handler reached on a fresh-install bearer-less request — lazy-bootstrap fail-open not removed")
 	}
 }

@@ -1195,7 +1221,16 @@ func TestCanvasOrBearer_TokensExist_WrongOrigin_Returns401(t *testing.T) {
 	}
 }

-func TestCanvasOrBearer_TokensExist_CanvasOrigin_Passes(t *testing.T) {
+// TestCanvasOrBearer_TokensExist_ForgeableOrigin_NoBearer_FailsClosed pins the
+// removal of the cross-origin Origin-match cosmetic path
+// (harden/no-fail-open-auth). A no-bearer request whose forgeable Origin header
+// matches CORS_ORIGINS used to pass; it now 401s. The canvas always sends a
+// bearer (NEXT_PUBLIC_ADMIN_TOKEN), so legitimate traffic is unaffected, and a
+// curl that forges Origin can no longer reach even a cosmetic route.
+//
+// Watch-it-fail: restore `if canvasOriginAllowed(c.GetHeader("Origin")) {
+// c.Next(); return }` in CanvasOrBearer → this flips 401→200 and fails.
+func TestCanvasOrBearer_TokensExist_ForgeableOrigin_NoBearer_FailsClosed(t *testing.T) {
 	mockDB, mock, err := sqlmock.New()
 	if err != nil {
 		t.Fatalf("sqlmock: %v", err)
@@ -1207,18 +1242,24 @@ func TestCanvasOrBearer_TokensExist_CanvasOrigin_Passes(t *testing.T) {

 	t.Setenv("CORS_ORIGINS", "https://acme.moleculesai.app,https://bob.moleculesai.app")

+	handlerCalled := false
 	r := gin.New()
 	r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
+		handlerCalled = true
 		c.JSON(http.StatusOK, gin.H{"ok": true})
 	})

 	w := httptest.NewRecorder()
 	req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
+	// A matching-but-forgeable Origin with NO bearer must NOT pass anymore.
 	req.Header.Set("Origin", "https://acme.moleculesai.app")
 	r.ServeHTTP(w, req)

-	if w.Code != http.StatusOK {
-		t.Errorf("canvas origin: got %d, want 200 (%s)", w.Code, w.Body.String())
+	if w.Code != http.StatusUnauthorized {
+		t.Errorf("no-bearer request on a forgeable matching Origin must fail CLOSED (Origin-match path removed): got %d, want 401 (%s)", w.Code, w.Body.String())
+	}
+	if handlerCalled {
+		t.Error("handler reached on a no-bearer forgeable-Origin request — Origin-match fail-open not removed")
 	}
 }

@@ -1298,21 +1339,9 @@ func TestCanvasOrBearer_WrongOrigin_Blocked(t *testing.T) {
 	}
 }

-func TestCanvasOriginAllowed_EmptyOriginRejected(t *testing.T) {
-	if canvasOriginAllowed("") {
-		t.Error("empty Origin must not pass")
-	}
-}
-
-func TestCanvasOriginAllowed_LocalhostDefault(t *testing.T) {
-	t.Setenv("CORS_ORIGINS", "")
-	if !canvasOriginAllowed("http://localhost:3000") {
-		t.Error("localhost:3000 should be allowed by default")
-	}
-	if canvasOriginAllowed("http://evil.example.com") {
-		t.Error("random origin should not be allowed")
-	}
-}
+// (harden/no-fail-open-auth) TestCanvasOriginAllowed_* were REMOVED along with
+// the canvasOriginAllowed helper they exercised — the forgeable cross-origin
+// Origin-match cosmetic path no longer exists in CanvasOrBearer.

 // ── Issue #623 regression ─────────────────────────────────────────────────────
 // AdminAuth must NOT accept forged Origin headers. Any container on the Docker
--- a/Show More
+++ b/Show More