fix(observability): enrich server log on CommitMemory plugin error

The POST /workspaces/:id/memories handler returns a generic HTTP 500 'failed to store memory' when the underlying v2 memory plugin's CommitMemory call errors. The current log.Printf('Commit memory error (plugin): %v', err) emits only the error — operators have no workspace, scope, or namespace context to diagnose recurring main incidents (continuous-synth E2E + HMA memory-commit both currently fail with this 500; the backend error is swallowed). Fix: enrich the server-side log line with workspaceID, the requested scope, the resolved v2 namespace, a structured err_class=<type> field (for log-aggregator filtering), and the quoted err text (preserves trailing whitespace / special chars that %v would munge). Hard constraint (same discipline as the #2392 leak fix): the underlying err stays server-log-only. The HTTP response body is UNCHANGED — still 500 'failed to store memory' with no plugin error leaked to the client. No behavior change to the write path itself. The change is one log.Printf line + a 9-line comment explaining the no-leak discipline. The new log line is: log.Printf('Commit memory plugin error: workspace=%s scope=%s namespace=%s err_class=%T err=%q', workspaceID, body.Scope, nsName, err, err) Unblocks operator diagnosis of the memory-v2 backend without changing the client surface or weakening the server's error-disclosure posture.
feat(2403): remove SOP tier system — single uniform gate (CTO 2026-06-07) (#2407 )
2026-06-08 03:27:25 +00:00 · 2026-06-07 23:17:10 +00:00 · 2026-06-07 23:16:37 +00:00 · 2026-06-07 23:04:32 +00:00 · 2026-06-07 22:48:20 +00:00 · 2026-06-07 15:43:11 -07:00
130 changed files with 6228 additions and 2499 deletions
@@ -31,7 +31,7 @@
 #
 # REQUIRED_CHECKS (legacy) is a newline-separated list used when the
 # JSON variable is not set. Declared in the workflow YAML rather than
-# fetched from /branch_protections (which needs admin scope — sop-tier-bot
+# fetched from /branch_protections (which needs admin scope — 
 # has read-only). Trade dynamism for simplicity: when the required-check
 # set changes, update both branch protection AND this env. Keeping them
 # in sync is less complexity than granting the audit bot admin perms on
@@ -54,32 +54,57 @@ API="https://${GITEA_HOST}/api/v1"
 AUTH="Authorization: token ${GITEA_TOKEN}"

 # 1. Fetch the PR. If not merged, no-op.
-PR=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
-MERGED=$(echo "$PR" | jq -r '.merged // false')
+# Fail-closed: verify HTTP 200 before parsing. A 401/403/404 means the token
+# is invalid or the PR is inaccessible — we must NOT silently treat that as
+# "not merged" and skip the audit.
+PR_TMP=$(mktemp)
+PR_HTTP=$(curl -sS -o "$PR_TMP" -w '%{http_code}' -H "$AUTH" \
+  "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
+PR=$(cat "$PR_TMP")
+rm -f "$PR_TMP"
+if [ "$PR_HTTP" != "200" ]; then
+  echo "::error::GET /pulls/${PR_NUMBER} returned HTTP ${PR_HTTP} — cannot evaluate merge state."
+  exit 1
+fi
+# FAIL-CLOSED: a 200 response with a missing/malformed `merged` field must
+# NOT be treated as "not merged" (that would silently skip the audit).
+# We verify both presence AND correct type for every field we consume.
+PR_SCHEMA_OK=$(echo "$PR" | jq -r '
+  (.merged | type == "boolean") and
+  (.merge_commit_sha | type == "string") and
+  (.merged_by | type == "object") and (.merged_by.login | type == "string") and
+  (.base | type == "object") and (.base.ref | type == "string") and
+  (.head | type == "object") and (.head.sha | type == "string")
+')
+if [ "$PR_SCHEMA_OK" != "true" ]; then
+  echo "::error::GET /pulls/${PR_NUMBER} returned HTTP 200 but one or more required fields are missing, null, or of wrong type — cannot evaluate force-merge."
+  exit 1
+fi
+MERGED=$(echo "$PR" | jq -r '.merged')
 if [ "$MERGED" != "true" ]; then
  echo "::notice::PR #${PR_NUMBER} closed without merge — no audit emission."
  exit 0
 fi

-# NOTE: no || true — with set -euo pipefail, jq parse failures (e.g. field
-# missing from API response) propagate as hard errors. Use jq's // operator
-# for graceful defaults instead of bash || true guards. This was re-added by
-# 8c343e3a ("fix(gitea): add || true guards to jq pipelines") — reverted
-# here because the guards mask silent failures that hide malformed API responses.
-MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha // empty')
-MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login // "unknown"')
+MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha')
+MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login')
 TITLE=$(echo "$PR" | jq -r '.title // ""')
-BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref // "main"')
-HEAD_SHA=$(echo "$PR" | jq -r '.head.sha // empty')
-
-if [ -z "$MERGE_SHA" ]; then
-  echo "::warning::PR #${PR_NUMBER} merged=true but no merge_commit_sha — cannot evaluate force-merge."
-  exit 0
-fi
+BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref')
+HEAD_SHA=$(echo "$PR" | jq -r '.head.sha')

 # 2. Required status checks — branch-aware JSON dict takes precedence.
 if [ -n "${REQUIRED_CHECKS_JSON:-}" ]; then
-  REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] // [] | .[]')
+  # FAIL-CLOSED: if REQUIRED_CHECKS_JSON is set, the branch entry must exist
+  # and be an array. A missing branch or non-array value means the config is
+  # malformed or drifted — we must NOT silently treat it as "no checks".
+  _RC_JSON_OK=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '
+    has($branch) and (.[$branch] | type == "array")
+  ')
+  if [ "$_RC_JSON_OK" != "true" ]; then
+    echo "::error::REQUIRED_CHECKS_JSON missing or non-array entry for branch '$BASE_BRANCH' — cannot evaluate required checks."
+    exit 1
+  fi
+  REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] | .[]')
 else
  REQUIRED="$REQUIRED_CHECKS"
 fi
@@ -91,12 +116,28 @@ fi
 # 3. Status-check state at the PR HEAD (where checks ran). The merge
 #    commit doesn't get its own checks; we evaluate the PR's last
 #    commit, which is what branch protection compared against.
-STATUS=$(curl -sS -H "$AUTH" \
+# Fail-closed: verify HTTP 200. A 401/403/404 means the status is
+# unreadable — we must NOT treat that as "no statuses" and skip checks.
+STATUS_TMP=$(mktemp)
+STATUS_HTTP=$(curl -sS -o "$STATUS_TMP" -w '%{http_code}' -H "$AUTH" \
  "${API}/repos/${OWNER}/${NAME}/commits/${HEAD_SHA}/status")
+STATUS=$(cat "$STATUS_TMP")
+rm -f "$STATUS_TMP"
+if [ "$STATUS_HTTP" != "200" ]; then
+  echo "::error::GET /commits/${HEAD_SHA}/status returned HTTP ${STATUS_HTTP} — cannot evaluate required checks."
+  exit 1
+fi
+# FAIL-CLOSED: a 200 status response missing the 'statuses' array, or with
+# 'statuses' set to a non-array type (null/string/object), must NOT be treated
+# as "no checks" — that would silently declare all checks green.
+if ! echo "$STATUS" | jq -e '(.statuses | type) == "array"' >/dev/null; then
+  echo "::error::GET /commits/${HEAD_SHA}/status returned HTTP 200 but 'statuses' is missing or not an array — cannot evaluate required checks."
+  exit 1
+fi
 declare -A CHECK_STATE
 while IFS=$'\t' read -r ctx state; do
  [ -n "$ctx" ] && CHECK_STATE[$ctx]="$state"
-done < <(echo "$STATUS" | jq -r '.statuses // [] | .[] | "\(.context)\t\(.status)"')
+done < <(echo "$STATUS" | jq -r '.statuses | .[] | "\(.context)\t\(.status)"')

 # 4. For each required check, was it green at merge? YAML block scalars
 #    (`|`) leave a trailing newline; skip blank/whitespace-only lines.
@@ -317,7 +317,33 @@ def required_checks_env(audit_doc: dict, branch: str) -> set[str]:
                f"::error::REQUIRED_CHECKS_JSON['{branch}'] is {type(branch_checks).__name__}, expected list\n"
            )
            sys.exit(3)
-        return {str(item).strip() for item in branch_checks if str(item).strip()}
+        # Fail-closed validation: every entry must be a non-empty string.
+        # Reject null, int, dict, or empty/whitespace strings silently —
+        # they indicate a malformed manifest that drift-detect must not
+        # normalize away (that would hide config errors).
+        validated: set[str] = set()
+        for idx, item in enumerate(branch_checks):
+            if not isinstance(item, str):
+                sys.stderr.write(
+                    f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
+                    f"{type(item).__name__} (value={item!r}), expected str\n"
+                )
+                sys.exit(3)
+            stripped = item.strip()
+            if not stripped:
+                sys.stderr.write(
+                    f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
+                    f"empty/whitespace string\n"
+                )
+                sys.exit(3)
+            if stripped in validated:
+                sys.stderr.write(
+                    f"::error::REQUIRED_CHECKS_JSON['{branch}'] contains "
+                    f"duplicate context '{stripped}' at index {idx}\n"
+                )
+                sys.exit(3)
+            validated.add(stripped)
+        return validated

    # Legacy variant fallback.
    if found_legacy:
@@ -552,23 +578,34 @@ def find_open_issue(title: str) -> dict | None:
    hourly; failing one cycle loudly is strictly better than silently
    duplicating.

-    Gitea issue search returns at most page=50 per page; one page is
-    enough as long as `[ci-drift]` issues are a tiny minority. (See
-    follow-up issue for Link-header pagination.)
+    Paginates through all open issues (limit=50 per page) until the
+    title is found or the result set is exhausted. Previously only one
+    page was fetched, causing duplicate [ci-drift] issues when the
+    existing tracking issue fell beyond page 1.
    """
-    _, results = api(
-        "GET",
-        f"/repos/{OWNER}/{NAME}/issues",
-        query={"state": "open", "type": "issues", "limit": "50"},
-    )
-    if not isinstance(results, list):
-        raise ApiError(
-            f"issue search returned non-list body (got {type(results).__name__})"
+    page = 1
+    while True:
+        _, results = api(
+            "GET",
+            f"/repos/{OWNER}/{NAME}/issues",
+            query={
+                "state": "open",
+                "type": "issues",
+                "limit": "50",
+                "page": str(page),
+            },
        )
-    for issue in results:
-        if issue.get("title") == title:
-            return issue
-    return None
+        if not isinstance(results, list):
+            raise ApiError(
+                f"issue search returned non-list body (got {type(results).__name__})"
+            )
+        for issue in results:
+            if issue.get("title") == title:
+                return issue
+        # Fewer than limit results means last page reached.
+        if len(results) < 50:
+            return None
+        page += 1


 def render_body(branch: str, findings: list[str], debug: dict) -> str:
@@ -9,27 +9,43 @@ queue. This script provides the missing serialized policy in user space:
   candidate (REQUEST_CHANGES, mergeable!=True, insufficient genuine approvals,
   or red required CI) is SKIPPED so it cannot head-of-line block newer ready
   PRs; the scan continues to the next candidate.
-2. Refuse to act unless main's BP-required contexts are green.
+2. Refuse to act unless main's BP-required contexts are green. This is also
+   the serialized backstop for direct-merge (see below): after a direct merge,
+   main re-runs push CI and this gate PAUSES the queue if main goes red, so no
+   merge piles onto an unverified/red main (issue #2358).
 3. Refuse fork PRs; the queue may only mutate same-repo branches.
-4. If the PR branch does not contain current main, call Gitea's
-   /pulls/{n}/update endpoint and stop. CI must rerun on the updated head.
+4. DIRECT-MERGE when conflict-free (issue #2358). When Gitea reports the PR
+   conflict-free (mergeable is True) and the merge bar below is met, MERGE IT
+   DIRECTLY — even if its head does not contain current main. We do NOT call
+   /pulls/{n}/update first: branch protection does not require strict
+   up-to-date, so behind-main conflict-free PRs merge cleanly, and calling
+   /update would trigger Gitea dismiss_stale_approvals (dismissing the genuine
+   approvals and forcing a re-review every tick — the rebase-churn bottleneck).
+   The /update path is used ONLY when the PR is DEFINITIVELY not mergeable
+   (mergeable is literal False) AND its head lacks current main — refreshing the
+   branch may resolve a behind-main non-conflict; a real conflict returns HTTP
+   409 and the PR is HELD per #2352. mergeable=None/missing (Gitea STILL
+   COMPUTING conflict state) is a distinct fail-closed WAIT: never merged AND
+   never /update'd — calling /update during the compute window would dismiss the
+   PR's genuine approvals (dismiss_stale_approvals) and re-introduce the exact
+   rebase-churn this queue eliminates. None is re-checked next tick.
 5. Merge ONLY when, on the PR's CURRENT head sha:
     - >= REQUIRED_APPROVALS distinct GENUINE official APPROVED reviews from
       the recognised reviewer set (not stale, not dismissed, commit_id ==
       current head), AND
     - no open official REQUEST_CHANGES on the current head, AND
     - every BP-required status context is green, AND
-     - the PR is mergeable.
+     - the PR is mergeable (Gitea reports it conflict-free).

 Authoritative gates (fail-closed):
  - The REQUIRED status contexts come from BRANCH PROTECTION
-    (`status_check_contexts`), not a hand-maintained env list. If branch
-    protection cannot be enumerated, the queue HOLDS (does not merge blindly).
-  - NON-required reds (qa-review, security-review, sop-tier, sop-checklist
-    when not branch-required, E2E Chat, Staging SaaS, ci-arm64-advisory, any
+    (`status_check_contexts`) PLUS the hardcoded governance checks
+    (qa-review, security-review, sop-checklist). If branch protection
+    cannot be enumerated, the queue HOLDS (does not merge blindly).
+  - NON-required reds (E2E Chat, Staging SaaS, ci-arm64-advisory, any
    continue-on-error job) MUST NOT block. They are reported, never gating.
  - `force_merge=true` is used ONLY when the merge is blocked *solely* by
-    missing-but-non-required governance contexts (required are green + genuine
+    missing-but-non-required advisory contexts (required are green + genuine
    approvals present). It is NEVER used to bypass a failing REQUIRED context
    or missing approvals.

@@ -128,6 +144,15 @@ OPT_OUT_LABELS = {
    ).split(",")
    if name.strip()
 } | ({HOLD_LABEL} if HOLD_LABEL else set())
+# Governance checks that are ALWAYS required for every PR, regardless of
+# branch-protection configuration. These are the uniform-gate checks that
+# must pass before any PR can merge (SOP tier removal makes them mandatory
+# for all PRs, not just tier:medium/tier:high).
+GOVERNANCE_REQUIRED_CONTEXTS = [
+    "qa-review / approved (pull_request)",
+    "security-review / approved (pull_request)",
+    "sop-checklist / all-items-acked (pull_request)",
+]
 REQUIRED_CONTEXTS_RAW = _env(
    "REQUIRED_CONTEXTS",
    default=(
@@ -268,6 +293,34 @@ def api(
        return status, {"_raw": raw.decode("utf-8", errors="replace")}


+def api_paginated(
+    method: str,
+    path: str,
+    *,
+    query: dict[str, str] | None = None,
+    page_size: int = 50,
+) -> list[dict]:
+    """Fetch all pages of a paginated Gitea list endpoint.
+
+    Gitea paginates with `page` (1-indexed) and `limit`. We loop until a
+    page returns fewer than `page_size` items, indicating the end.
+    """
+    results: list[dict] = []
+    page = 1
+    while True:
+        page_query = dict(query or {})
+        page_query["page"] = str(page)
+        page_query["limit"] = str(page_size)
+        _, body = api(method, path, query=page_query)
+        if not isinstance(body, list):
+            raise ApiError(f"{path} paginated response not list")
+        results.extend(body)
+        if len(body) < page_size:
+            break
+        page += 1
+    return results
+
+
 def required_contexts(raw: str) -> list[str]:
    return [part.strip() for part in raw.split(",") if part.strip()]

@@ -293,40 +346,15 @@ def latest_statuses_by_context(statuses: list[dict]) -> dict[str, dict]:
    return latest


-def _is_tier_low_pending_ok(
-    latest_statuses: dict[str, dict],
-    context: str,
-    pr_labels: set[str],
-) -> bool:
-    """Return True if tier:low PR can tolerate sop-checklist pending state.
-
-    Per sop-checklist-config.yaml tier_failure_mode, tier:low uses soft-fail:
-    sop-checklist posts state=pending when acks are satisfied (missing
-    manager/ceo acks are informational only). The queue should accept
-    pending instead of waiting for success.
-    """
-    if "tier:low" not in pr_labels:
-        return False
-    if "sop-checklist" not in context:
-        return False
-    status = latest_statuses.get(context) or {}
-    return status_state(status) == "pending"
-
-
 def required_contexts_green(
    latest_statuses: dict[str, dict],
    contexts: list[str],
-    pr_labels: set[str] | None = None,
 ) -> tuple[bool, list[str]]:
    missing_or_bad: list[str] = []
    for context in contexts:
        status = latest_statuses.get(context)
        state = status_state(status or {})
        if state != "success":
-            if pr_labels and _is_tier_low_pending_ok(
-                latest_statuses, context, pr_labels
-            ):
-                continue  # tier:low soft-fail: accept pending sop-checklist
            missing_or_bad.append(f"{context}={state or 'missing'}")
    return not missing_or_bad, missing_or_bad

@@ -593,29 +621,32 @@ def evaluate_merge_readiness(
    approvers: set[str],
    request_changes: list[str],
    pr_has_current_base: bool,
-    mergeable: bool,
+    mergeable: bool | None,
    pr_labels: set[str] | None = None,
 ) -> MergeDecision:
    # 1) Main's push-required contexts must be green. Combined state can be
    #    "failure" due to non-blocking jobs (continue-on-error: true) that do
    #    not gate merges, so check the explicit required set, not combined.
+    #
+    #    This main-green gate is ALSO the serialized backstop that makes the
+    #    direct-merge (no update) path safe (issue #2358): after a direct merge
+    #    of a behind-main PR, main re-runs its push CI; if a semantic main-break
+    #    slips through (PR green standalone but broken when combined with newer
+    #    main), main's required contexts go red and this gate PAUSES the queue —
+    #    no further merge piles onto an unverified/red main until it is green.
    main_latest = latest_statuses_by_context(main_status.get("statuses") or [])
    main_ok, main_bad = required_contexts_green(main_latest, push_required_contexts())
    if not main_ok:
        return MergeDecision(False, "pause", "main required contexts not green: " + ", ".join(main_bad))

-    # 2) PR head must contain current main.
-    if not pr_has_current_base:
-        return MergeDecision(False, "update", "PR head does not contain current main")
-
-    # 3) No open official REQUEST_CHANGES on the current head.
+    # 2) No open official REQUEST_CHANGES on the current head.
    if request_changes:
        return MergeDecision(
            False, "wait",
            "open REQUEST_CHANGES on current head from: " + ", ".join(sorted(request_changes)),
        )

-    # 4) Enough distinct genuine official approvals on the current head.
+    # 3) Enough distinct genuine official approvals on the current head.
    if len(approvers) < required_approvals:
        return MergeDecision(
            False, "wait",
@@ -624,26 +655,63 @@ def evaluate_merge_readiness(
            f"need {required_approvals}",
        )

-    # 5) Every BRANCH-PROTECTION-REQUIRED status context must be green. This is
-    #    the authoritative status gate — NON-required reds (qa-review,
-    #    security-review, sop-tier/sop-checklist when not BP-required, E2E Chat,
-    #    Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
+    # 4) Every REQUIRED status context must be green. This includes both
+    #    branch-protection-required contexts AND the hardcoded governance checks
+    #    (qa-review, security-review, sop-checklist). NON-required reds (E2E
+    #    Chat, Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
    #    consulted here and must not block.
    latest = latest_statuses_by_context(pr_status.get("statuses") or [])
-    ok, missing_or_bad = required_contexts_green(latest, required_contexts, pr_labels)
+    ok, missing_or_bad = required_contexts_green(latest, required_contexts)
    if not ok:
        return MergeDecision(False, "wait", "required contexts not green: " + ", ".join(missing_or_bad))

-    # 6) Gitea must consider the PR mergeable (no conflicts).
-    if not mergeable:
-        return MergeDecision(False, "wait", "PR is not mergeable (conflicts)")
+    # 5) DIRECT-MERGE when conflict-free (issue #2358 — throughput fix).
+    #    If Gitea reports the PR conflict-free (mergeable is True), MERGE IT
+    #    DIRECTLY even if its head does not yet contain current main. Branch
+    #    protection does NOT require strict up-to-date, so a behind-main but
+    #    conflict-free PR merges cleanly. We deliberately do NOT call
+    #    /pulls/{n}/update first: update triggers Gitea dismiss_stale_approvals,
+    #    which would dismiss the PR's genuine approvals and force a full
+    #    re-review every tick — the rebase-churn bottleneck that collapsed
+    #    throughput to ~0/hr with dozens of mergeable PRs open.
+    #
+    #    The merge bar is UNCHANGED: we only reach here with main green +
+    #    >= required genuine approvals on the current head + no open
+    #    REQUEST_CHANGES + every BP-required context green. The trade-off is
+    #    that the PR's CI ran on a possibly-behind base, so a SEMANTIC main-break
+    #    is caught by POST-merge main CI (step 1's pause backstop) rather than
+    #    pre-merge. force_merge is used ONLY for missing-but-non-required
+    #    governance reds (required are green + approvals genuine), never to
+    #    bypass a failing required context or an approval shortfall.
+    if mergeable is True:
+        force = _non_required_red_present(latest, required_contexts)
+        return MergeDecision(True, "merge", "ready", force=force)

-    # Ready. Use force_merge ONLY if the merge would otherwise be blocked by
-    # missing-but-non-required governance contexts. Required are green and
-    # approvals are genuine, so force only bypasses non-required reds — never a
-    # failing required context or missing approval.
-    force = _non_required_red_present(latest, required_contexts)
-    return MergeDecision(True, "merge", "ready", force=force)
+    # 6) NOT (yet) mergeable. TRI-STATE, fail-closed — never merge on an unknown.
+    #    We MUST distinguish "still computing" (None/missing) from a "definitive
+    #    conflict" (False); collapsing them would route a behind-main but
+    #    STILL-COMPUTING PR into the /update path, whose dismiss_stale_approvals
+    #    is the rebase-churn this change eliminates.
+    #
+    #    mergeable is None  → Gitea has NOT finished computing conflict state.
+    #    WAIT: do nothing this tick — never /update (would dismiss genuine
+    #    approvals during the compute window → churn), never merge. Re-check next
+    #    tick once Gitea reports a decisive True/False.
+    if mergeable is None:
+        return MergeDecision(
+            False, "wait",
+            "PR mergeability is still being computed (mergeable=None) — waiting",
+        )
+
+    # mergeable is False → DEFINITIVE not-mergeable. If the head also does not
+    #    contain current main, try the /update path to refresh the branch (this
+    #    may resolve a behind-main non-conflict; a real conflict returns HTTP 409
+    #    and process_once HOLDs the PR per #2352). If the head already contains
+    #    current main yet Gitea still reports not-mergeable, there is nothing the
+    #    queue can do (genuine conflict against current main) — WAIT.
+    if not pr_has_current_base:
+        return MergeDecision(False, "update", "PR not mergeable and head does not contain current main")
+    return MergeDecision(False, "wait", "PR is not mergeable (conflicts)")


 def get_branch_head(branch: str) -> str:
@@ -659,32 +727,23 @@ def get_combined_status(sha: str) -> dict:
    """Combined status + all individual statuses for `sha`.

    The /status endpoint caps the `statuses` array at 30 entries (Gitea
-    default page size), so we fetch the full list via /statuses with a
-    higher limit. The combined `state` still comes from /status.
+    default page size), so we fetch the full list via /statuses. The combined
+    `state` still comes from /status.

-    Fail-closed: the PRIMARY /status fetch must succeed. If it raises, the
-    error propagates so the caller skips this PR this tick (we never treat a
-    failed status fetch as green — dev-sop "no fail-open"). Only the SECONDARY
-    /statuses enrichment (which merely extends the per-context list beyond the
-    30-entry cap) is best-effort; if it fails we still have the combined set.
+    Fail-closed: BOTH the PRIMARY /status fetch AND the SECONDARY /statuses
+    enrichment must succeed. If either raises, the error propagates so the
+    caller skips this PR this tick (we never treat a failed status fetch as
+    green — dev-sop "no fail-open"). A paginated /statuses error must NOT
+    silently degrade to an incomplete status set.
    """
    _, combined = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
    if not isinstance(combined, dict):
        raise ApiError(f"status for {sha} response not object")
    combined_statuses: list[dict] = combined.get("statuses") or []
-    try:
-        _, all_statuses_raw = api(
-            "GET",
-            f"/repos/{OWNER}/{NAME}/commits/{sha}/statuses",
-            query={"limit": "50"},
-        )
-        if isinstance(all_statuses_raw, list):
-            all_statuses: list[dict] = list(all_statuses_raw)
-        else:
-            all_statuses = []
-    except (ApiError, urllib.error.URLError, TimeoutError, OSError) as exc:
-        sys.stderr.write(f"::warning::could not fetch full statuses list for {sha[:8]}: {exc}\n")
-        all_statuses = []
+    all_statuses = api_paginated(
+        "GET",
+        f"/repos/{OWNER}/{NAME}/commits/{sha}/statuses",
+    )
    # Build latest per context: process combined (ascending→reverse=newest
    # first), then fill gaps from all_statuses (already newest-first).
    latest: dict[str, dict] = {}
@@ -701,19 +760,15 @@ def get_combined_status(sha: str) -> dict:


 def list_queued_issues() -> list[dict]:
-    _, body = api(
+    return api_paginated(
        "GET",
        f"/repos/{OWNER}/{NAME}/issues",
        query={
            "state": "open",
            "type": "pulls",
            "labels": QUEUE_LABEL,
-            "limit": "50",
        },
    )
-    if not isinstance(body, list):
-        raise ApiError("queued issues response not list")
-    return body


 def list_candidate_issues(*, auto_discover: bool) -> list[dict]:
@@ -727,18 +782,14 @@ def list_candidate_issues(*, auto_discover: bool) -> list[dict]:
    """
    if not auto_discover:
        return list_queued_issues()
-    _, body = api(
+    return api_paginated(
        "GET",
        f"/repos/{OWNER}/{NAME}/issues",
        query={
            "state": "open",
            "type": "pulls",
-            "limit": "50",
        },
    )
-    if not isinstance(body, list):
-        raise ApiError("candidate issues response not list")
-    return body


 def get_pull(pr_number: int) -> dict:
@@ -877,7 +928,9 @@ def process_once(*, dry_run: bool = False) -> int:
            f"unavailable (fail-closed): {exc}\n"
        )
        return 0
-    contexts = bp.required_contexts
+    # Uniform gate: governance checks are ALWAYS required, even if branch
+    # protection does not enumerate them. Deduplicate against BP list.
+    contexts = list(dict.fromkeys(bp.required_contexts + GOVERNANCE_REQUIRED_CONTEXTS))
    required_approvals = bp.required_approvals
    print(
        f"::notice::queue policy from branch protection: "
@@ -1064,12 +1117,20 @@ def _evaluate_candidate(
    # never treated as green).
    pr_status = get_combined_status(head_sha)
    pr_labels = label_names(pr)
-    # FAIL-CLOSED: Gitea returns mergeable=None (or omits the field) while it is
-    # still COMPUTING conflict state. Only the literal True is decisive proof the
-    # PR is conflict-free; None and False both mean "not (yet) mergeable". We must
-    # NOT autonomously merge on an unknown — treat anything but True as not-yet-
-    # mergeable so evaluate_merge_readiness returns a "wait" decision.
-    mergeable = pr.get("mergeable") is True
+    # FAIL-CLOSED, TRI-STATE: Gitea returns mergeable=None (or omits the field)
+    # while it is still COMPUTING conflict state, mergeable=False for a definitive
+    # conflict, and mergeable=True only when it has proven the PR conflict-free.
+    # We preserve all THREE states (do NOT collapse None/missing into False):
+    #   - True            → direct-merge eligible (step 5).
+    #   - None / missing  → still computing → WAIT (never merge, never update,
+    #                       never dismiss approvals); re-check next tick.
+    #   - False           → definitive conflict → the update/hold path (step 6).
+    # Collapsing None→False would route a behind-main but STILL-COMPUTING PR into
+    # the /update path, which triggers dismiss_stale_approvals — the exact
+    # rebase-churn this change eliminates. Normalize only to the literal True /
+    # False / None set (some Gitea versions omit the key entirely → None).
+    raw_mergeable = pr.get("mergeable")
+    mergeable: bool | None = raw_mergeable if isinstance(raw_mergeable, bool) else None

    reviews = get_pull_reviews(pr_number)
    approvers, request_changes = genuine_approvals(
@@ -1090,26 +1151,136 @@ def _evaluate_candidate(
    return decision, ctx


+@dataclasses.dataclass(frozen=True)
+class ReadinessEntry:
+    """One candidate's readiness state."""
+
+    pr_number: int
+    decision: MergeDecision | None
+    reason: str
+
+
+def enumerate_readiness(*, dry_run: bool = False) -> list[ReadinessEntry]:
+    """Evaluate ALL candidates and return their readiness states.
+
+    Fail-closed: if branch protection cannot be fetched, raise
+    BranchProtectionUnavailable (caller must handle). Unlike
+    process_once, this does NOT stop at the first actionable candidate;
+    it evaluates every eligible PR and returns the full list so a
+    post-batch summary can be printed.
+    """
+    bp = get_branch_protection(WATCH_BRANCH)
+    contexts = bp.required_contexts
+    required_approvals = bp.required_approvals
+
+    main_sha = get_branch_head(WATCH_BRANCH)
+    main_status = get_combined_status(main_sha)
+    main_latest = latest_statuses_by_context(main_status.get("statuses") or [])
+    main_ok, main_bad = required_contexts_green(main_latest, push_required_contexts())
+
+    candidates = choose_candidate_issues(
+        list_candidate_issues(auto_discover=AUTO_DISCOVER),
+        queue_label=QUEUE_LABEL,
+        opt_out_labels=OPT_OUT_LABELS,
+        auto_discover=AUTO_DISCOVER,
+    )
+
+    entries: list[ReadinessEntry] = []
+    for issue in candidates:
+        pr_number = int(issue["number"])
+        try:
+            decision, ctx = _evaluate_candidate(
+                issue,
+                main_sha=main_sha,
+                main_status=main_status,
+                required_contexts=contexts,
+                required_approvals=required_approvals,
+                dry_run=dry_run,
+            )
+        except ApiError as exc:
+            # Fail-closed per candidate: an unreadable PR is recorded as
+            # unverifiable, not skipped silently.
+            entries.append(
+                ReadinessEntry(
+                    pr_number=pr_number,
+                    decision=None,
+                    reason=f"unverifiable (API error: {exc})",
+                )
+            )
+            continue
+        if decision is None:
+            entries.append(
+                ReadinessEntry(
+                    pr_number=pr_number,
+                    decision=None,
+                    reason="not merge-eligible (opt-out/draft/fork/wrong-base)",
+                )
+            )
+            continue
+        entries.append(
+            ReadinessEntry(
+                pr_number=pr_number,
+                decision=decision,
+                reason=decision.reason,
+            )
+        )
+    return entries
+
+
+def print_post_batch_summary(entries: list[ReadinessEntry]) -> None:
+    """Print a structured summary of all candidates' readiness.
+
+    Emits ::notice:: lines for machine parsing and a human-readable
+    block for operator visibility.
+    """
+    ready = [e for e in entries if e.decision and e.decision.ready]
+    waiting = [e for e in entries if e.decision and not e.decision.ready]
+    ineligible = [e for e in entries if e.decision is None]
+
+    print("::group::merge-queue readiness summary")
+    print(f"total_candidates={len(entries)}")
+    print(f"ready={len(ready)}")
+    print(f"waiting={len(waiting)}")
+    print(f"ineligible/unverifiable={len(ineligible)}")
+    print("")
+    for e in entries:
+        state = "ready" if e.decision and e.decision.ready else (
+            "waiting" if e.decision else "ineligible"
+        )
+        action = e.decision.action if e.decision else "n/a"
+        print(f"PR #{e.pr_number}: state={state} action={action} reason={e.reason}")
+    print("::endgroup::")
+
+
 def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--dry-run", action="store_true")
+    parser.add_argument(
+        "--enumerate",
+        action="store_true",
+        help="Evaluate all candidates and print a readiness summary without merging.",
+    )
    args = parser.parse_args()
    _require_runtime_env()
    try:
+        if args.enumerate:
+            entries = enumerate_readiness(dry_run=args.dry_run)
+            print_post_batch_summary(entries)
+            return 0
        return process_once(dry_run=args.dry_run)
    except ApiError as exc:
-        # API errors (401/403/404/500) are transient for a queue tick —
-        # log and exit 0 so the workflow is not marked failed and the next
-        # tick can retry. Returning non-zero would permanently fail the
-        # workflow run, blocking future ticks.
+        # FAIL-CLOSED: API errors are not "transient success" — they mean
+        # the queue could not evaluate merge state. Returning 0 hides
+        # persistent infra issues (auth drift, endpoint outages) from
+        # operators. Return 1 so the cron job surfaces red and paging fires.
        sys.stderr.write(f"::error::queue API error: {exc}\n")
-        return 0
+        return 1
    except urllib.error.URLError as exc:
        sys.stderr.write(f"::error::queue network error: {exc}\n")
-        return 0
+        return 1
    except TimeoutError as exc:
        sys.stderr.write(f"::error::queue timeout: {exc}\n")
-        return 0
+        return 1


 if __name__ == "__main__":
@@ -165,7 +165,7 @@ def api(
 # Format: "<workflow_name> / <job_name_or_key> (<event>)"
 # Examples observed on molecule-core/main:
 #   "Secret scan / Scan diff for credential-shaped strings (pull_request)"
-#   "sop-tier-check / tier-check (pull_request)"
+#   " / tier-check (pull_request)"
 #
 # Split strategy: peel off the trailing ` (<event>)` first, then split
 # the leading `<workflow> / <rest>` on the FIRST ` / ` (workflow names
@@ -17,7 +17,7 @@ Rules (4 fatal + 1 fatal cross-file + 1 heuristic-warn):
     enumeration; task #81). Workflow registers, fires for 0 events.
  3. `name:` containing `/` — breaks the
     `<workflow> / <job> (<event>)` commit-status context convention;
-     downstream parsers (sop-tier-check, status-reaper) tokenize on `/`.
+     downstream parsers (sop-checklist, status-reaper) tokenize on `/`.
  4. `name:` collision across files — Gitea routes commit-status updates
     by `name` and behavior on collision is undefined (status-reaper
     rev1 fail-loud).
@@ -150,7 +150,7 @@ def check_name_with_slash(filename: str, doc: Any) -> list[str]:
            f"::error file={filename}::Rule 3 (FATAL): workflow `name: "
            f"{name!r}` contains `/`. The commit-status context convention "
            f"is `<workflow> / <job> (<event>)`; embedding `/` in the "
-            f"workflow name makes downstream parsers (sop-tier-check, "
+            f"workflow name makes downstream parsers (sop-checklist, "
            f"status-reaper) tokenize ambiguously. Rename to use `-` or "
            f"` ` instead."
        )
@@ -49,8 +49,7 @@ Daily scheduled run + workflow_dispatch:
  4. If orphans exist:
     - File or PATCH a `[ci-bp-drift]` issue (idempotency contract:
       search for exact title prefix, edit existing if open).
-     - Apply labels `tier:high` + `ci-bp-drift` (lookup IDs per
-       repo; per `feedback_tier_label_ids_are_per_repo`).
+     - Apply label `ci-bp-drift` (lookup ID per repo).
     - Exit 1.

  5. If no orphans:
@@ -82,7 +81,7 @@ Memory cross-links
 ------------------
  - internal#350 (the RFC that specs this lint)
  - feedback_phantom_required_check_after_gitea_migration
-  - feedback_tier_label_ids_are_per_repo
+  - feedback_label_ids_are_per_repo
  - reference_post_suspension_pipeline
 """
 from __future__ import annotations
@@ -359,7 +358,7 @@ def file_or_update_issue(
                existing = h
                break

-    label_ids = _ensure_labels(repo, ["ci-bp-drift", "tier:high"])
+    label_ids = _ensure_labels(repo, ["ci-bp-drift"])

    if existing:
        api(
@@ -305,9 +305,9 @@ def validate_tracker(
    if status == "error":
        sys.stderr.write(
            f"::error::issue {slug}#{num} fetch errored — treating as "
-            f"unverified, skipping this check.\n"
+            f"unverified, FAILING CLOSED (do not skip on outage).\n"
        )
-        return (True, "fetch-error — skipped")
+        return (False, f"{slug}#{num} fetch errored — cannot verify tracker")

    assert payload is not None
    state = payload.get("state", "")
@@ -546,16 +546,24 @@ def verify_flip(flip: dict, branch: str, n: int) -> dict:

    shas = recent_commits_on_branch(branch, n)
    if not shas:
-        result["warnings"].append(
-            f"no recent commits on {branch} (cannot verify flip)"
-        )
+        result["masked_runs"].append({
+            "sha": "",
+            "status": "unverified",
+            "target_url": "",
+            "samples": [f"no recent commits on {branch} — cannot verify flip"],
+        })
        return result

    for sha in shas:
        try:
            status_doc = combined_status(sha)
        except ApiError as e:
-            result["warnings"].append(f"combined-status for {sha}: {e}")
+            result["masked_runs"].append({
+                "sha": sha,
+                "status": "error",
+                "target_url": "",
+                "samples": [f"combined-status API error: {e}"],
+            })
            continue
        statuses = status_doc.get("statuses") or []
        # First entry matching the context name. Newest SHAs come
@@ -582,6 +590,17 @@ def verify_flip(flip: dict, branch: str, n: int) -> dict:
                        "target_url": target_url,
                        "samples": ["[log unavailable; status itself is " + state + "]"],
                    })
+                elif state == "success":
+                    # Fail-closed: unreadable log on a success status is a
+                    # potential Quirk #10 mask (continue-on-error hiding real
+                    # failures). We cannot verify it's clean, so treat as
+                    # masked rather than allowing the flip.
+                    result["masked_runs"].append({
+                        "sha": sha,
+                        "status": state,
+                        "target_url": target_url,
+                        "samples": ["[log unavailable; cannot verify status is genuine — treat as masked]"],
+                    })
                break
            samples = grep_fail_markers(log_text)
            if state in ("failure", "error"):
@@ -605,10 +624,12 @@ def verify_flip(flip: dict, branch: str, n: int) -> dict:
            break

    if result["checked_commits"] == 0:
-        result["warnings"].append(
-            f"no runs of {target_context!r} found in the last {n} commits on "
-            f"{branch} — cannot verify; allowing flip with warning"
-        )
+        result["masked_runs"].append({
+            "sha": "",
+            "status": "unverified",
+            "target_url": "",
+            "samples": [f"no runs of {target_context!r} found in the last {n} commits on {branch} — cannot verify flip"],
+        })
    return result


@@ -50,7 +50,7 @@ runtime contract enforcement lives in `_require_runtime_env()`.

 Run locally (dry-run, no API mutation):
    GITEA_TOKEN=... GITEA_HOST=git.moleculesai.app REPO=owner/repo \\
-      WATCH_BRANCH=main RED_LABEL=tier:high \\
+      WATCH_BRANCH=main RED_LABEL=ci-bp-drift \\
      python3 .gitea/scripts/main-red-watchdog.py --dry-run
 """
 from __future__ import annotations
@@ -81,7 +81,7 @@ GITEA_TOKEN = _env("GITEA_TOKEN")
 GITEA_HOST = _env("GITEA_HOST")
 REPO = _env("REPO")
 WATCH_BRANCH = _env("WATCH_BRANCH", default="main")
-RED_LABEL = _env("RED_LABEL", default="tier:high")
+RED_LABEL = _env("RED_LABEL", default="ci-bp-drift")

 OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
 API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
@@ -11,7 +11,7 @@
 #
 # Flow:
 #   1. Load .gitea/sop-checklist-config.yaml (from BASE ref — trusted).
-#   2. GET /repos/{R}/pulls/{N}          — author, head.sha, tier label
+#   2. GET /repos/{R}/pulls/{N}          — author, head.sha, labels
 #   3. GET /repos/{R}/issues/{N}/comments — extract /sop-ack and /sop-revoke
 #   4. For each checklist item:
 #        a. Is the section marker present in PR body? (author answered)
@@ -665,8 +665,8 @@ def load_config(path: str) -> dict[str, Any]:
 def _load_config_minimal(path: str) -> dict[str, Any]:
    """Minimal YAML subset parser for our config shape.

-    Supports: top-level scalar:value, top-level map-of-map (e.g.
-    tier_failure_mode), top-level list of maps (items:), and within an
+    Supports: top-level scalar:value, top-level map-of-map,
+    top-level list of maps (items:), and within an
    item map: scalars + lists of scalars. Does NOT support nested lists,
    YAML anchors, multi-doc, or flow style.
    """
@@ -835,8 +835,7 @@ def render_status(

    state is "success" if every item has at least one valid ack
    (body section presence is informational only — peer-ack is the
-    real gate).  tier:low PRs receive state="success" (soft-fail — no
-    acks required); the description carries "[info tier:low]" prefix.
+    real gate).
    """
    n = len(items)
    fully_acked = [
@@ -863,35 +862,16 @@ def render_status(
    return state, " — ".join(desc_parts)


-def get_tier_mode(pr: dict[str, Any], cfg: dict[str, Any]) -> str:
-    """Read tier label, return 'hard' or 'soft' per cfg.tier_failure_mode."""
-    labels = pr.get("labels") or []
-    tier_labels = [label.get("name", "") for label in labels if (label.get("name", "") or "").startswith("tier:")]
-    mode_map = cfg.get("tier_failure_mode") or {}
-    default_mode = cfg.get("default_mode", "hard")
-    for tl in tier_labels:
-        if tl in mode_map:
-            return mode_map[tl]
-    return default_mode
-
-
 def is_high_risk(pr: dict[str, Any], cfg: dict[str, Any]) -> bool:
    """Return True when the PR is high-risk per RFC#450 Option C.

-    A PR is high-risk when ANY of:
-      - it carries the `tier:high` label (mechanically strictest tier), or
-      - it carries any label listed in cfg.high_risk_labels.
+    A PR is high-risk when it carries any label listed in cfg.high_risk_labels.

    High-risk PRs use `required_teams_high_risk` (when set on an item)
    instead of the default `required_teams`. Items without
    `required_teams_high_risk` are unaffected (the default applies).
-
-    Governance fix for internal#442 — closes the inconsistency between
-    sop-tier-check (tier-aware) and sop-checklist (was tier-blind).
    """
    label_set = {(label.get("name") or "") for label in (pr.get("labels") or [])}
-    if "tier:high" in label_set:
-        return True
    high_risk_labels = set(cfg.get("high_risk_labels") or [])
    return bool(label_set & high_risk_labels)

@@ -1169,13 +1149,6 @@ def main(argv: list[str] | None = None) -> int:
    body_state = {it["slug"]: section_marker_present(body, it["pr_section_marker"]) for it in items}

    state, description = render_status(items, ack_state, body_state)
-    mode = get_tier_mode(pr, cfg)
-    if mode == "soft":
-        # tier:low: acks are informational only — post success so BP gate passes.
-        # Description carries "[info tier:low]" prefix so reviewers know acks
-        # were not required (vs a tier:medium+ PR that truly passed all acks).
-        state = "success"
-        description = f"[info tier:low] {description}"
    if volume_skipped:
        # Above the comment-cap — we may have a partial view. Soft-pend
        # so neither BP nor the author gets stuck; surface the cap so
@@ -1189,7 +1162,7 @@ def main(argv: list[str] | None = None) -> int:
    # Diagnostics to job log.
    print(
        f"::notice::PR #{args.pr} author={author} head={head_sha[:7]} "
-        f"mode={mode} risk_class={'high' if high_risk else 'default'}"
+        f"risk_class={'high' if high_risk else 'default'}"
    )
    for it in items:
        slug = it["slug"]
@@ -1,427 +0,0 @@
-#!/usr/bin/env bash
-# sop-tier-check — verify a Gitea PR satisfies the §SOP-6 approval gate.
-#
-# Reads the PR's tier label, walks approving reviewers, and checks team
-# membership against the tier's approval expression. Passes only when
-# ALL clauses in the expression are satisfied by the set of approving
-# reviewers (AND-composition; internal#189).
-#
-# Expression syntax:
-#   "team-a"          — OR-set: any ONE of the comma-separated teams
-#   "team-a AND team-b" — AND: BOTH must each have ≥1 approver
-#   "(a,b,c)"         — OR-set wrapped in parens; same as "a,b,c"
-#
-# Example: "qa AND security AND (managers,ceo)" means:
-#   ≥1 approver in team "qa"  AND
-#   ≥1 approver in team "security"  AND
-#   ≥1 approver in team "managers" OR "ceo"
-#
-# Per the spec (internal#189), the hard gate here pairs with the
-# advisory gate of sop-conformance LLM-judge (internal#188): each
-# required-team click must reflect real verification (visible in review
-# body or A2A messages), not rubber-stamp APPROVE. Both gates together
-# close the "teammate clicks APPROVE without verifying" gap.
-#
-# Invoked from `.gitea/workflows/sop-tier-check.yml`. The workflow sets
-# the env vars below; this script does no IO outside of stdout/stderr +
-# the Gitea API.
-#
-# Required env:
-#   GITEA_TOKEN   — bot PAT with read:organization,read:user,
-#                   read:issue,read:repository scopes
-#   GITEA_HOST    — e.g. git.moleculesai.app
-#   REPO          — owner/name (from github.repository)
-#   PR_NUMBER     — int (from github.event.pull_request.number)
-#   PR_AUTHOR     — login (from github.event.pull_request.user.login)
-#
-# Optional:
-#   SOP_DEBUG=1        — print per-API-call diagnostic lines. Default: off.
-#   SOP_LEGACY_CHECK=1 — revert to OR-gate (≥1 approver from any eligible
-#                         team). Grace window for PRs in-flight when the
-#                         new AND-composition was deployed. Expires 2026-05-17
-#                         (7-day burn-in window; internal#189 Phase 1).
-#                         Set by workflow for PRs merged before the deploy.
-
-set -euo pipefail
-
-# Ensure jq is available. Runners may not have it pre-installed, and the
-# workflow-level jq install can fail on runners with network restrictions
-# (GitHub releases not reachable from some runner networks — infra#241
-# follow-up). This fallback is idempotent — no-op when jq is already on PATH.
-if ! command -v jq >/dev/null 2>&1; then
-  echo "::notice::jq not found on PATH — attempting install..."
-  _jq_installed="no"
-  # apt-get first (primary) — Ubuntu package mirrors are reliably reachable.
-  if apt-get update -qq && apt-get install -y -qq jq 2>/dev/null; then
-    echo "::notice::jq installed via apt-get: $(jq --version)"
-    _jq_installed="yes"
-  # GitHub binary as secondary fallback — may fail on restricted networks.
-  elif timeout 120 curl -sSL \
-    "https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
-    -o /usr/local/bin/jq \
-    && chmod +x /usr/local/bin/jq; then
-    echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
-    _jq_installed="yes"
-  fi
-  if ! command -v jq >/dev/null 2>&1; then
-    echo "::error::jq installation failed — apt-get and GitHub binary both failed."
-    echo "::error::sop-tier-check requires jq for all JSON API parsing."
-    exit 1
-  fi
-fi
-
-debug() {
-  if [ "${SOP_DEBUG:-}" = "1" ]; then
-    echo "  [debug] $*" >&2
-  fi
-}
-
-# Validate env
-: "${GITEA_TOKEN:?GITEA_TOKEN required}"
-: "${GITEA_HOST:?GITEA_HOST required}"
-: "${REPO:?REPO required (owner/name)}"
-: "${PR_NUMBER:?PR_NUMBER required}"
-: "${PR_AUTHOR:?PR_AUTHOR required}"
-
-OWNER="${REPO%%/*}"
-NAME="${REPO##*/}"
-API="https://${GITEA_HOST}/api/v1"
-AUTH="Authorization: token ${GITEA_TOKEN}"
-echo "::notice::tier-check start: repo=$OWNER/$NAME pr=$PR_NUMBER author=$PR_AUTHOR"
-
-# Sanity: token resolves to a user.
-# Use || true on the jq pipeline so that set -euo pipefail (line 45) does not
-# cause the script to exit prematurely when the token is empty/invalid — the
-# if check below handles that case gracefully. Without || true, a 401 from an
-# empty/invalid token causes jq to exit 1, triggering set -e and exiting the
-# entire script before the error can be logged.
-WHOAMI=$(curl -sS -H "$AUTH" "${API}/user" | jq -r '.login // ""') || true
-if [ -z "$WHOAMI" ]; then
-  echo "::error::GITEA_TOKEN cannot resolve a user via /api/v1/user — check the token scope and that the secret is wired correctly."
-  exit 1
-fi
-echo "::notice::token resolves to user: $WHOAMI"
-
-# 0.5 Read PR head SHA so we can reject stale approvals after head moves
-# (internal#816). Reviews carry the commit_id they were submitted against.
-HEAD_SHA=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}" | jq -r '.head.sha // ""') || true
-if [ -z "$HEAD_SHA" ]; then
-  echo "::error::Failed to fetch PR head SHA — token may be invalid."
-  exit 1
-fi
-debug "pr-head-sha=$HEAD_SHA"
-
-# 1. Read tier label. || true ensures set -euo pipefail does not abort the
-# script if curl or jq fails (e.g. 401 from empty token).
-LABELS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/labels" | jq -r '.[].name') || true
-TIER=""
-for L in $LABELS; do
-  case "$L" in
-    tier:low|tier:medium|tier:high)
-      if [ -n "$TIER" ]; then
-        echo "::error::Multiple tier labels: $TIER + $L. Apply exactly one."
-        exit 1
-      fi
-      TIER="$L"
-    ;;
-  esac
-done
-if [ -z "$TIER" ]; then
-  echo "::error::PR has no tier:low|tier:medium|tier:high label. Apply one before merge."
-  exit 1
-fi
-debug "tier=$TIER"
-
-# 2. Tier → required team expression (AND-composition; internal#189)
-#
-# Expression syntax:
-#   clause-a AND clause-b AND ...   — ALL clauses must pass
-#   team-a,team-b,team-c            — OR-set: ≥1 approver in ANY of these teams
-#   (team-a,team-b)                 — same as team-a,team-b (parens optional)
-#
-# This map is the single source of truth. Update it when the team structure
-# or policy changes. Teams referenced here but absent in Gitea are treated
-# as unachievable (would always fail) — operators notice the clear error
-# and create the missing team.
-#
-# Current Gitea teams: ceo, engineers, managers
-# Future teams (create before removing "???" fallback): qa, security, security-audit
-declare -A TIER_EXPR=(
-  # tier:low — same as previous OR gate: any engineer, manager, or ceo.
-  ["tier:low"]="engineers,managers,ceo"
-
-  # tier:medium — AND of (managers) AND (engineers) AND (qa???,security???)
-  # The qa+security clause requires both teams to exist; when not yet
-  # created, the PR author is responsible for adding them before requesting
-  # approval on a tier:medium PR. Ops: create qa + security Gitea teams
-  # and update this map to remove the "???" markers (internal#189 follow-up).
-  ["tier:medium"]="managers AND engineers AND qa???,security???"
-
-  # tier:high — ceo only. The AND-composition adds no value for a
-  # single-team gate, but the framework is wired for consistency.
-  ["tier:high"]="ceo"
-)
-
-EXPR="${TIER_EXPR[$TIER]-}"
-if [ -z "$EXPR" ]; then
-  echo "::error::No expression defined for tier $TIER in TIER_EXPR map."
-  exit 1
-fi
-debug "expression=$EXPR"
-
-# 3. Legacy OR-gate override (7-day burn-in grace window; internal#189 Phase 1)
-if [ "${SOP_LEGACY_CHECK:-}" = "1" ]; then
-  LEGACY_ELIGIBLE=""
-  case "$TIER" in
-    tier:low)    LEGACY_ELIGIBLE="engineers managers ceo" ;;
-    tier:medium) LEGACY_ELIGIBLE="managers ceo" ;;
-    tier:high)   LEGACY_ELIGIBLE="ceo" ;;
-  esac
-  echo "::notice::SOP_LEGACY_CHECK=1 — using OR-gate ({$LEGACY_ELIGIBLE}) for this PR."
-  ELIGIBLE="$LEGACY_ELIGIBLE"
-fi
-
-# 4. Resolve all team names → IDs
-# /orgs/{org}/teams/{slug}/... endpoints don't exist on Gitea 1.22;
-# we use /teams/{id}.
-# set +e prevents set -e from aborting the script if curl fails (e.g. empty token).
-ORG_TEAMS_FILE=$(mktemp)
-trap 'rm -f "$ORG_TEAMS_FILE"' EXIT
-set +e
-HTTP_CODE=$(curl -sS -o "$ORG_TEAMS_FILE" -w '%{http_code}' -H "$AUTH" \
-  "${API}/orgs/${OWNER}/teams")
-_HTTP_EXIT=$?
-set -e
-debug "teams-list HTTP=$HTTP_CODE (curl exit=$_HTTP_EXIT) size=$(wc -c <"$ORG_TEAMS_FILE")"
-if [ "${SOP_DEBUG:-}" = "1" ]; then
-  echo "  [debug] teams-list body (first 300 chars):" >&2
-  head -c 300 "$ORG_TEAMS_FILE" >&2; echo >&2
-fi
-if [ "$_HTTP_EXIT" -ne 0 ] || [ "$HTTP_CODE" != "200" ]; then
-  echo "::error::GET /orgs/${OWNER}/teams failed (curl exit=$_HTTP_EXIT HTTP=$HTTP_CODE) — token may lack read:org scope or be invalid."
-  exit 1
-fi
-
-# Collect every team name that appears in the expression.
-# Bash word-splitting on $EXPR splits on spaces, so "AND" appears as a
-# token. We skip it explicitly.
-declare -A TEAM_ID
-_all_teams=""
-for _raw_clause in $EXPR; do
-  # Strip parens and split on comma.
-  _clause=${_raw_clause//[()]/}
-  for _t in $(echo "$_clause" | tr ',' '\n'); do
-    _t=$(echo "$_t" | tr -d '[:space:]')
-    [ -z "$_t" ] && continue
-    # Skip AND / OR operator tokens (bash word-split produced them from
-    # spaces in the expression string).
-    [ "$_t" = "AND" ] || [ "$_t" = "OR" ] && continue
-    # Skip if already in set.
-    case " $_all_teams " in
-      *" $_t "*) ;;  # already present
-      *) _all_teams="${_all_teams} $_t " ;;
-    esac
-  done
-done
-
-for _t in $_all_teams; do
-  _t=$(echo "$_t" | tr -d ' ')
-  [ -z "$_t" ] && continue
-  _id=$(jq -r --arg t "$_t" '.[] | select(.name==$t) | .id' <"$ORG_TEAMS_FILE" | head -1)
-  if [ -z "$_id" ] || [ "$_id" = "null" ]; then
-    # "??" suffix marks teams that don't exist yet (tier:medium qa/security).
-    # Treat as permanently failing clause; clear error message guides ops.
-    if [[ "$_t" == *"???" ]]; then
-      debug "team \"$_t\" not found (expected — pending team creation per internal#189)"
-      continue
-    fi
-    _visible=$(jq -r '.[]?.name? // empty' <"$ORG_TEAMS_FILE" 2>/dev/null | tr '\n' ' ')
-    echo "::error::Team \"$_t\" referenced in tier $TIER expression but not found in org $OWNER. Teams visible: $_visible"
-    exit 1
-  fi
-  TEAM_ID[$_t]="$_id"
-  debug "team-id: $_t → $_id"
-done
-
-# 5. Read approving reviewers. set +e disables set -e temporarily so that curl
-# failures (e.g. empty/invalid token → HTTP 401) do not abort the script before
-# set -e is restored immediately after.
-set +e
-REVIEWS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews")
-_REVIEWS_EXIT=$?
-set -e
-if [ $_REVIEWS_EXIT -ne 0 ] || [ -z "$REVIEWS" ]; then
-  echo "::error::Failed to fetch reviews (curl exit=$_REVIEWS_EXIT) — token may be invalid or unreachable."
-  exit 1
-fi
-APPROVERS=$(echo "$REVIEWS" | jq -r --arg head_sha "$HEAD_SHA" '[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]') || true
-if [ -z "$APPROVERS" ]; then
-  echo "::error::No approving reviews on this PR. Set SOP_DEBUG=1 and re-run for diagnostics."
-  exit 1
-fi
-debug "approvers: $(echo "$APPROVERS" | tr '\n' ' ')"
-
-# 6. For each approver: skip self-review; probe team membership by id.
-# Build $APPROVER_TEAMS[<user>]=space-surrounded team names (e.g. " managers ").
-# Pre/post spaces ensure case patterns *${_t}* match even when the name
-# is the first or last entry (bash case *word* needs delimiters on both sides).
-#
-# FAIL-CLOSED AUTHORIZATION (security: SOP tier gate is an AUTHORIZATION gate).
-#
-# This used to fall back to /orgs/{org}/members/{user} whenever every team
-# probe failed and credit any org member as a member of EVERY queried team.
-# That was a privilege-escalation: org membership is NOT team membership, so
-# a 403/visibility/token-scope gap on the team probes silently promoted a
-# plain org member to satisfy tier:high (ceo). An inability-to-verify became
-# an authorization GRANT. The fallback is REMOVED — org membership must never
-# satisfy a team-gated tier.
-#
-# A team-membership probe has exactly three meaningful outcomes:
-#   200 / 204  → the user IS a member of that team       (credit it)
-#   404        → the user is definitively NOT a member    (no credit, verified)
-#   anything else (403 / 401 / 5xx / curl failure / non-numeric)
-#              → membership CANNOT be read                 (cannot-verify)
-#
-# Per the dev-sop fail-closed rule (inability-to-verify = failure, never a
-# pass — and here, never an authorization grant), a cannot-verify outcome on
-# ANY probe is a HARD infra failure: we publish a loud cannot-verify error and
-# exit non-zero. We do NOT proceed to evaluate the tier expression on a partial
-# / unverifiable membership picture, because doing so could let an unverifiable
-# approver's clause silently fail-or-pass on incomplete data. Fix the token
-# scope (read:organization) or the runner network — not the gate.
-declare -A APPROVER_TEAMS
-_verify_failed=""   # accumulates "<user>:<team>(HTTP <code>)" for probes we could not read
-for U in $APPROVERS; do
-  [ "$U" = "$PR_AUTHOR" ] && debug "skip self-review by $U" && continue
-  for T in "${!TEAM_ID[@]}"; do
-    ID="${TEAM_ID[$T]}"
-    set +e
-    CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
-      "${API}/teams/${ID}/members/${U}")
-    _curl_exit=$?
-    set -e
-    debug "probe: $U in team $T (id=$ID) → HTTP $CODE (curl exit=$_curl_exit)"
-    if [ "$_curl_exit" -ne 0 ]; then
-      # curl itself failed (DNS, connection refused, timeout) — unreachable.
-      _verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(curl exit ${_curl_exit})"
-      continue
-    fi
-    case "$CODE" in
-      200|204)
-        APPROVER_TEAMS[$U]="${APPROVER_TEAMS[$U]:- } ${APPROVER_TEAMS[$U]:+ }$T "
-        debug "$U qualifies for team $T"
-        ;;
-      404)
-        # Definitively not a member of this team — a verified negative.
-        debug "$U is NOT a member of team $T (verified 404)"
-        ;;
-      *)
-        # 403/401/5xx/etc — membership is unreadable. Do NOT treat as "not a
-        # member" and do NOT fall back to org membership. This is cannot-verify.
-        _verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(HTTP ${CODE})"
-        ;;
-    esac
-  done
-done
-
-# Fail-closed: if ANY membership probe could not be read, we cannot make an
-# authorization decision. Publish a loud cannot-verify / infra-failed status
-# and exit non-zero. Never grant the tier on unverifiable membership.
-if [ -n "$_verify_failed" ]; then
-  echo "::error::sop-tier-check CANNOT VERIFY team membership — gate FAILS CLOSED."
-  echo "::error::Unreadable membership probe(s): ${_verify_failed}"
-  echo "::error::A team-membership probe returned 403/401/5xx (or curl failed). The SOP tier gate is an authorization gate; an inability to verify team membership is treated as a FAILURE, never a pass. Org membership is NOT team membership and is never credited as a fallback."
-  echo "::error::Fix: ensure GITEA_TOKEN (SOP_TIER_CHECK_TOKEN) has read:organization scope and the Gitea API is reachable from the runner, then re-run. Do NOT relax this gate."
-  exit 1
-fi
-
-# 7. Evaluate the tier expression.
-#
-# legacy OR-gate: use the simplified loop from before internal#189.
-if [ -n "${LEGACY_ELIGIBLE:-}" ]; then
-  OK=""
-  for _u in "${!APPROVER_TEAMS[@]}"; do
-    for _t2 in $LEGACY_ELIGIBLE; do
-      case "${APPROVER_TEAMS[$_u]}" in
-        *${_t2}*)
-          echo "::notice::approver $_u is in team $_t2 (eligible for $TIER)"
-          OK="yes"
-          break
-        ;;
-      esac
-    done
-    [ -n "$OK" ] && break
-  done
-  if [ -z "$OK" ]; then
-    echo "::error::Tier $TIER requires approval from a non-author member of {$LEGACY_ELIGIBLE}. Set SOP_DEBUG=1 to see per-probe HTTP codes."
-    exit 1
-  fi
-  echo "::notice::sop-tier-check passed: $TIER (legacy OR-gate)"
-  exit 0
-fi
-
-# AND-gate: evaluate the expression clause by clause.
-# _passed_clauses and _failed_clauses accumulate for the status description.
-_passed_clauses=""
-_failed_clauses=""
-
-for _raw_clause in $EXPR; do
-  # Normalise: strip parens, replace commas with spaces so bash word-split
-  # can iterate the OR-set members. The previous form
-  #   _clause=$(echo ... | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
-  # collapsed every member into one concatenated token because
-  # `tr -d '[:space:]'` strips the very newlines that just separated them
-  # ("engineers,managers,ceo" -> "engineersmanagersceo"), so the OR-clause
-  # only ever evaluated as a single nonsense team name and never matched
-  # APPROVER_TEAMS. Fixed in #229: leave the comma-separated members as
-  # space-separated tokens for `for _t in $_clause`.
-  _no_parens=${_raw_clause//[()]/}
-  _clause=${_no_parens//,/ }
-  _clause_passed="no"
-  _clause_names=""
-  for _t in $_clause; do
-    # Append (don't overwrite) team name to the human-readable accumulator.
-    # The previous form `_clause_names="${_clause_names:+, }${_t}"`
-    # rewrote the variable on every iteration, so the FAIL message only
-    # ever showed the LAST team. Fixed: prepend prior value before the
-    # comma-separator, then append the new team name.
-    _clause_names="${_clause_names}${_clause_names:+, }${_t}"
-    # Skip teams not yet in Gitea (qa??? / security??? placeholders).
-    [[ "$_t" == *"???" ]] && debug "clause \"$_t\": skipped (team pending creation)" && continue
-    [ -z "${TEAM_ID[$_t]:-}" ] && debug "clause \"$_t\": no ID resolved, skipping" && continue
-    for _u in "${!APPROVER_TEAMS[@]}"; do
-      # Note: APPROVER_TEAMS values are space-surrounded (e.g. " managers ").
-      # Pattern *${_t}* matches team name anywhere in the space-padded string.
-      case "${APPROVER_TEAMS[$_u]}" in
-        *${_t}*)
-          _clause_passed="yes"
-          debug "clause \"$_t\": satisfied by $_u"
-          break
-        ;;
-      esac
-    done
-  done
-
-  # Label for display: strip "???" from pending teams.
-  _label=$(echo "$_raw_clause" | tr -d '()' | tr ',' '/' | tr -d '[:space:]' | sed 's/???//g')
-
-  if [ "$_clause_passed" = "yes" ]; then
-    # Append (don't overwrite) — same accumulator bug as _clause_names above.
-    _passed_clauses="${_passed_clauses}${_passed_clauses:+, }$_label"
-    echo "::notice::clause [$_label]: PASS — satisfied by approving reviewer(s)"
-  else
-    _failed_clauses="${_failed_clauses}${_failed_clauses:+, }$_label"
-    echo "::error::clause [$_label]: FAIL — no approving reviewer belongs to any of these teams (${_clause_names}). Set SOP_DEBUG=1 to see per-team probe results."
-  fi
-done
-
-if [ -n "$_failed_clauses" ]; then
-  echo ""
-  echo "::error::sop-tier-check FAILED for $TIER."
-  echo "  Passed :${_passed_clauses}"
-  echo "  Missing:${_failed_clauses}"
-  echo "  All clauses must be satisfied. Each missing team needs an APPROVED review from one of its members."
-  exit 1
-fi
-
-echo "::notice::sop-tier-check PASSED: $TIER — all required clauses satisfied [${_passed_clauses}]"
@@ -1,199 +0,0 @@
-#!/usr/bin/env bash
-# sop-tier-refire — re-evaluate sop-tier-check and POST status to PR head SHA.
-#
-# Invoked from `.gitea/workflows/sop-tier-refire.yml` when a repo
-# MEMBER/OWNER/COLLABORATOR comments `/refire-tier-check` on a PR.
-#
-# Behavior:
-#
-# 1. Resolve PR head SHA + author from PR_NUMBER.
-# 2. Rate-limit: if the sop-tier-check context has been POSTed in the
-#    last 30 seconds, skip (prevents comment-spam status thrash).
-# 3. Invoke `.gitea/scripts/sop-tier-check.sh` with the same env the
-#    canonical workflow provides. This is DRY: we re-use the exact AND-
-#    composition gate logic, not a watered-down approving-count check.
-# 4. POST the resulting status (success on exit 0, failure on non-zero)
-#    to `/repos/.../statuses/{HEAD_SHA}` with context
-#    "sop-tier-check / tier-check (pull_request)" — the same context name
-#    branch protection requires.
-#
-# Required env (set by sop-tier-refire.yml):
-#   GITEA_TOKEN    — org-level SOP_TIER_CHECK_TOKEN (read:org/user/issue/repo)
-#   GITEA_HOST     — e.g. git.moleculesai.app
-#   REPO           — owner/name
-#   PR_NUMBER      — PR number from issue_comment payload
-#   COMMENT_AUTHOR — login of the commenter (logged for audit)
-#
-# Optional:
-#   SOP_DEBUG=1                — verbose per-API-call diagnostics
-#   SOP_REFIRE_RATE_LIMIT_SEC  — override the 30s rate-limit (default 30)
-#   SOP_REFIRE_DISABLE_RATE_LIMIT=1 — for tests; skips the rate-limit check
-
-set -euo pipefail
-
-debug() {
-  if [ "${SOP_DEBUG:-}" = "1" ]; then
-    echo "  [debug] $*" >&2
-  fi
-}
-
-: "${GITEA_TOKEN:?GITEA_TOKEN required}"
-: "${GITEA_HOST:?GITEA_HOST required}"
-: "${REPO:?REPO required (owner/name)}"
-: "${PR_NUMBER:?PR_NUMBER required}"
-: "${COMMENT_AUTHOR:=unknown}"
-
-OWNER="${REPO%%/*}"
-NAME="${REPO##*/}"
-API="https://${GITEA_HOST}/api/v1"
-AUTH="Authorization: token ${GITEA_TOKEN}"
-CONTEXT="sop-tier-check / tier-check (pull_request)"
-RATE_LIMIT_SEC="${SOP_REFIRE_RATE_LIMIT_SEC:-30}"
-
-echo "::notice::sop-tier-refire start: repo=$OWNER/$NAME pr=$PR_NUMBER commenter=$COMMENT_AUTHOR"
-
-# 1. Fetch PR details — need head.sha and user.login.
-PR_FILE=$(mktemp)
-trap 'rm -f "$PR_FILE"' EXIT
-PR_HTTP=$(curl -sS -o "$PR_FILE" -w '%{http_code}' -H "$AUTH" \
-  "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
-if [ "$PR_HTTP" != "200" ]; then
-  echo "::error::GET /pulls/$PR_NUMBER returned HTTP $PR_HTTP (body $(head -c 200 "$PR_FILE"))"
-  exit 1
-fi
-HEAD_SHA=$(jq -r '.head.sha' <"$PR_FILE")
-PR_AUTHOR=$(jq -r '.user.login' <"$PR_FILE")
-PR_STATE=$(jq -r '.state' <"$PR_FILE")
-if [ -z "$HEAD_SHA" ] || [ "$HEAD_SHA" = "null" ]; then
-  echo "::error::Could not resolve head.sha from PR #$PR_NUMBER response"
-  exit 1
-fi
-debug "head_sha=$HEAD_SHA pr_author=$PR_AUTHOR state=$PR_STATE"
-
-if [ "$PR_STATE" != "open" ]; then
-  echo "::notice::PR #$PR_NUMBER state is $PR_STATE; refire is a no-op on closed PRs."
-  exit 0
-fi
-
-# 2. Rate-limit: skip if our context was updated in the last $RATE_LIMIT_SEC.
-# Gitea statuses endpoint returns latest first; we check the most recent
-# entry for our context name.
-if [ "${SOP_REFIRE_DISABLE_RATE_LIMIT:-}" != "1" ]; then
-  STATUSES_FILE=$(mktemp)
-  trap 'rm -f "$PR_FILE" "$STATUSES_FILE"' EXIT
-  ST_HTTP=$(curl -sS -o "$STATUSES_FILE" -w '%{http_code}' -H "$AUTH" \
-    "${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}?limit=50&sort=newest")
-  debug "statuses-list HTTP=$ST_HTTP"
-  if [ "$ST_HTTP" = "200" ]; then
-    LAST_UPDATED=$(jq -r --arg c "$CONTEXT" \
-      '[.[] | select(.context == $c)] | first | .updated_at // ""' \
-      <"$STATUSES_FILE")
-    if [ -n "$LAST_UPDATED" ] && [ "$LAST_UPDATED" != "null" ]; then
-      # Parse RFC3339 → epoch. Use python -c for portability (date(1) -d
-      # differs between BSD/GNU; the Gitea runner is Ubuntu so GNU date
-      # works, but we keep python for future container variance).
-      LAST_EPOCH=$(python3 -c "import sys,datetime;print(int(datetime.datetime.fromisoformat(sys.argv[1].replace('Z','+00:00')).timestamp()))" "$LAST_UPDATED" 2>/dev/null || echo "0")
-      NOW_EPOCH=$(date -u +%s)
-      AGE=$((NOW_EPOCH - LAST_EPOCH))
-      debug "last status update: $LAST_UPDATED ($AGE seconds ago)"
-      if [ "$AGE" -lt "$RATE_LIMIT_SEC" ] && [ "$AGE" -ge 0 ]; then
-        echo "::notice::sop-tier-refire rate-limited — last status update was ${AGE}s ago (<${RATE_LIMIT_SEC}s window). Try again shortly."
-        exit 0
-      fi
-    fi
-  fi
-fi
-
-# 3. Invoke sop-tier-check.sh with the env it expects.
-#
-# FAIL-CLOSED contract (was fail-open — fixed 2026-06-05,
-# fix/core-ci-fail-closed). The previous shape was:
-#     bash "$SCRIPT" || true
-#     TIER_EXIT=0          # <-- hardcoded success
-# which discarded the real verdict and ALWAYS POSTed
-# `state=success` for the REQUIRED context
-# `sop-tier-check / tier-check (pull_request)`. That meant ANY
-# collaborator could comment `/refire-tier-check` to forcibly green
-# the SOP-6 approval gate on the PR head SHA — a fail-open AND a
-# privilege bypass of branch protection. The canonical
-# pull_request_target workflow's conclusion publishes the same
-# context honestly (red on a real violation); the refire MUST mirror
-# THAT honesty, not a discarded exit code.
-#
-# We now capture the script's real exit code under `set +e` and POST
-# success ONLY when it actually exited 0. sop-tier-check.sh itself
-# fails closed on infra faults (no SOP_FAIL_OPEN in this refire env),
-# so a bad token / unreachable API / missing jq → non-zero → we POST
-# `state=failure`, never a false green.
-#
-# SOP_REFIRE_TIER_CHECK_SCRIPT env var lets tests substitute a mock —
-# sop-tier-check.sh uses bash 4+ associative arrays which trigger a known
-# bash 3.2 parser bug (`tier: unbound variable` from declare -A with
-# `set -u`). Linux Gitea runners ship bash 4/5 so production is fine;
-# the override exists so the bash 3.2 dev box can still exercise the
-# refire glue logic end-to-end.
-SCRIPT="${SOP_REFIRE_TIER_CHECK_SCRIPT:-$(dirname "$0")/sop-tier-check.sh}"
-if [ ! -f "$SCRIPT" ]; then
-  echo "::error::sop-tier-check.sh not found at $SCRIPT — refire requires the canonical script"
-  exit 1
-fi
-
-# Re-invoke. Pipe stdout/stderr through so the runner log shows the
-# tier-check decision inline. Capture the REAL exit code (set +e so a
-# non-zero verdict doesn't abort this script under set -e) — the POST
-# below keys off it, so a failed tier-check posts state=failure.
-set +e
-GITEA_TOKEN="$GITEA_TOKEN" \
-  GITEA_HOST="$GITEA_HOST" \
-  REPO="$REPO" \
-  PR_NUMBER="$PR_NUMBER" \
-  PR_AUTHOR="$PR_AUTHOR" \
-  SOP_DEBUG="${SOP_DEBUG:-0}" \
-  SOP_LEGACY_CHECK="${SOP_LEGACY_CHECK:-0}" \
-  bash "$SCRIPT"
-TIER_EXIT=$?
-set -e
-debug "sop-tier-check.sh exit=$TIER_EXIT"
-
-# 4. POST the resulting status.
-if [ "$TIER_EXIT" -eq 0 ]; then
-  STATE="success"
-  DESCRIPTION="Refired via /refire-tier-check by $COMMENT_AUTHOR"
-else
-  STATE="failure"
-  DESCRIPTION="Refired via /refire-tier-check; tier-check failed (see workflow log)"
-fi
-
-# Status target_url points at the runner log so a curious reviewer can
-# follow it back. SERVER_URL + RUN_ID + JOB_ID isn't trivially constructible
-# from the bash env on Gitea 1.22.6, so we point at the PR itself.
-TARGET_URL="https://${GITEA_HOST}/${OWNER}/${NAME}/pulls/${PR_NUMBER}"
-
-POST_BODY=$(jq -nc \
-  --arg state "$STATE" \
-  --arg context "$CONTEXT" \
-  --arg description "$DESCRIPTION" \
-  --arg target_url "$TARGET_URL" \
-  '{state:$state, context:$context, description:$description, target_url:$target_url}')
-
-POST_FILE=$(mktemp)
-trap 'rm -f "$PR_FILE" "${STATUSES_FILE:-}" "$POST_FILE"' EXIT
-POST_HTTP=$(curl -sS -o "$POST_FILE" -w '%{http_code}' \
-  -X POST -H "$AUTH" -H "Content-Type: application/json" \
-  -d "$POST_BODY" \
-  "${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}")
-if [ "$POST_HTTP" != "200" ] && [ "$POST_HTTP" != "201" ]; then
-  echo "::error::POST /statuses/$HEAD_SHA returned HTTP $POST_HTTP (body $(head -c 200 "$POST_FILE"))"
-  exit 1
-fi
-
-echo "::notice::sop-tier-refire posted state=$STATE for context=\"$CONTEXT\" on sha=$HEAD_SHA"
-# Exit 0: the refire JOB succeeded — it re-evaluated the gate and posted
-# an HONEST status. The gate VERDICT is carried by the POSTed status
-# ($STATE), which is what branch protection reads; a failing tier-check
-# posts state=failure (red on the PR), so there is no fail-open. We do
-# NOT also exit non-zero on a failing verdict — that would double-signal
-# the same failure as both a red status AND a red refire job. The
-# fail-open that mattered (TIER_EXIT hardcoded to 0 → always state=success)
-# is fixed above by capturing the real exit code.
-exit 0
@@ -689,8 +689,8 @@ def reap_branch(
        shas = list_recent_commit_shas(branch, limit)
    except ApiError as e:
        print(
-            "::warning::status-reaper skipped this tick because the "
-            f"commit list could not be read after retries: {e}"
+            "::error::status-reaper cannot run: commit-list API failed "
+            f"after retries: {e}"
        )
        return {
            "scanned_shas": 0,
@@ -704,6 +704,7 @@ def reap_branch(
            "compensated_cancelled_push": 0,
            "preserved_pr_without_push_success": 0,
            "compensated_per_sha": {},
+            "sha_api_errors": 0,
            "skipped": True,
            "skip_reason": "commit-list-api-error",
        }
@@ -720,6 +721,7 @@ def reap_branch(
        "compensated_cancelled_push": 0,
        "preserved_pr_without_push_success": 0,
        "compensated_per_sha": {},
+        "sha_api_errors": 0,
    }

    for sha in shas:
@@ -731,8 +733,9 @@ def reap_branch(
        try:
            combined = get_combined_status(sha)
        except ApiError as e:
+            aggregate["sha_api_errors"] += 1
            print(
-                f"::warning::get_combined_status({sha[:10]}) failed; "
+                f"::error::get_combined_status({sha[:10]}) failed; "
                f"skipping this SHA: {e}"
            )
            continue
@@ -819,6 +822,14 @@ def main() -> int:
            sort_keys=True,
        )
    )
+    # Observability: infra-failure → red. If the commit list could not be
+    # read or any per-SHA status fetch failed, the tick is incomplete and
+    # must be observable as a failure (non-zero exit) so the cron bot or
+    # runner surface alerts.
+    if counters.get("skipped"):
+        return 1
+    if counters.get("sha_api_errors", 0) > 0:
+        return 1
    return 0


@@ -1,28 +0,0 @@
-#!/usr/bin/env bash
-# Mock sop-tier-check.sh for sop-tier-refire tests.
-#
-# Exits 0 ("PASS") if $MOCK_TIER_RESULT == "pass", else exits 1.
-# This lets the refire tests cover the success + failure status-POST
-# paths without invoking the real sop-tier-check.sh (which uses bash 4+
-# associative arrays — known parser bug on macOS bash 3.2 dev box).
-
-set -euo pipefail
-
-case "${MOCK_TIER_RESULT:-pass}" in
-  pass)
-    echo "::notice::mock tier-check: PASS"
-    exit 0
-    ;;
-  fail_no_label)
-    echo "::error::mock tier-check: no tier label"
-    exit 1
-    ;;
-  fail_no_approvals)
-    echo "::error::mock tier-check: no approving reviews"
-    exit 1
-    ;;
-  *)
-    echo "::error::mock tier-check: unknown MOCK_TIER_RESULT=${MOCK_TIER_RESULT:-}"
-    exit 2
-    ;;
-esac
@@ -1,208 +0,0 @@
-#!/usr/bin/env python3
-"""Stub Gitea API for sop-tier-refire test scenarios.
-
-Reads $FIXTURE_STATE_DIR/scenario to decide what to return for each
-endpoint the sop-tier-refire.sh + sop-tier-check.sh scripts call.
-Captures every POST to /statuses/{sha} into posted_statuses.jsonl so
-the test can assert what the script tried to write.
-
-Scenarios:
-  T1_success         — tier:low + APPROVED by engineer → tier-check passes
-  T2_no_tier_label   — no tier label → tier-check exits 1 before POST
-  T3_no_approvals    — tier:low but zero approving reviews → exits 1
-  T4_closed          — PR state=closed → refire is a no-op
-  T5_rate_limited    — last status update 5 seconds ago → skip
-
-Usage:
-  FIXTURE_STATE_DIR=/tmp/x python3 _refire_fixture.py 8080
-"""
-
-import datetime
-import http.server
-import json
-import os
-import re
-import sys
-import urllib.parse
-
-
-STATE_DIR = os.environ["FIXTURE_STATE_DIR"]
-
-
-def scenario() -> str:
-    p = os.path.join(STATE_DIR, "scenario")
-    if not os.path.isfile(p):
-        return "T1_success"
-    with open(p, encoding="utf-8") as f:
-        return f.read().strip()
-
-
-def now_iso() -> str:
-    return datetime.datetime.now(datetime.timezone.utc).isoformat()
-
-
-def append_post(body: dict) -> None:
-    with open(os.path.join(STATE_DIR, "posted_statuses.jsonl"), "a") as f:
-        f.write(json.dumps(body) + "\n")
-
-
-def pr_payload() -> dict:
-    sc = scenario()
-    state = "closed" if sc == "T4_closed" else "open"
-    return {
-        "number": 999,
-        "state": state,
-        "head": {"sha": "deadbeef0000111122223333444455556666"},
-        "user": {"login": "feature-author"},
-    }
-
-
-def labels_payload() -> list:
-    sc = scenario()
-    if sc == "T2_no_tier_label":
-        return [{"name": "bug"}]
-    # All other scenarios use tier:low
-    return [{"name": "tier:low"}, {"name": "ci"}]
-
-
-def reviews_payload() -> list:
-    sc = scenario()
-    if sc == "T3_no_approvals":
-        return []
-    # All other scenarios have one APPROVED review by an engineer
-    return [
-        {
-            "state": "APPROVED",
-            "user": {"login": "reviewer-engineer"},
-        }
-    ]
-
-
-def teams_payload() -> list:
-    # Mirror the real molecule-ai org teams referenced in TIER_EXPR
-    return [
-        {"id": 5, "name": "ceo"},
-        {"id": 2, "name": "engineers"},
-        {"id": 6, "name": "managers"},
-    ]
-
-
-def statuses_payload() -> list:
-    sc = scenario()
-    if sc == "T5_rate_limited":
-        recent = (
-            datetime.datetime.now(datetime.timezone.utc)
-            - datetime.timedelta(seconds=5)
-        ).isoformat()
-        return [
-            {
-                "context": "sop-tier-check / tier-check (pull_request)",
-                "state": "failure",
-                "updated_at": recent,
-            }
-        ]
-    return []
-
-
-def user_payload() -> dict:
-    # Mirrors the WHOAMI probe in sop-tier-check.sh
-    return {"login": "sop-tier-bot-fixture"}
-
-
-class Handler(http.server.BaseHTTPRequestHandler):
-    # Quiet — keep stdout for explicit logs only.
-    def log_message(self, *args, **kwargs):  # noqa: D401
-        pass
-
-    def _json(self, code: int, body) -> None:
-        payload = json.dumps(body).encode()
-        self.send_response(code)
-        self.send_header("Content-Type", "application/json")
-        self.send_header("Content-Length", str(len(payload)))
-        self.end_headers()
-        self.wfile.write(payload)
-
-    def _empty(self, code: int) -> None:
-        self.send_response(code)
-        self.send_header("Content-Length", "0")
-        self.end_headers()
-
-    def do_GET(self):  # noqa: N802
-        u = urllib.parse.urlparse(self.path)
-        path = u.path
-
-        if path == "/_ping":
-            return self._json(200, {"ok": True})
-        if path == "/api/v1/user":
-            return self._json(200, user_payload())
-
-        # /api/v1/repos/{owner}/{name}/pulls/{n}
-        m = re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/(\d+)$", path)
-        if m:
-            return self._json(200, pr_payload())
-
-        # /api/v1/repos/{owner}/{name}/issues/{n}/labels
-        if re.match(r"^/api/v1/repos/[^/]+/[^/]+/issues/\d+/labels$", path):
-            return self._json(200, labels_payload())
-
-        # /api/v1/repos/{owner}/{name}/pulls/{n}/reviews
-        if re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/\d+/reviews$", path):
-            return self._json(200, reviews_payload())
-
-        # /api/v1/orgs/{owner}/teams
-        if re.match(r"^/api/v1/orgs/[^/]+/teams$", path):
-            return self._json(200, teams_payload())
-
-        # /api/v1/teams/{id}/members/{login} → 204 if user is an engineer
-        m = re.match(r"^/api/v1/teams/(\d+)/members/([^/]+)$", path)
-        if m:
-            team_id, login = m.group(1), m.group(2)
-            # In our fixture reviewer-engineer ∈ engineers (id=2)
-            if team_id == "2" and login == "reviewer-engineer":
-                return self._empty(204)
-            return self._empty(404)
-
-        # /api/v1/orgs/{owner}/members/{login} — fallback path used when
-        # team-member probes all 403. We don't need it for these tests.
-        if re.match(r"^/api/v1/orgs/[^/]+/members/[^/]+$", path):
-            return self._empty(404)
-
-        # /api/v1/repos/{owner}/{name}/statuses/{sha}
-        if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
-            return self._json(200, statuses_payload())
-
-        return self._json(404, {"path": path, "msg": "fixture: no route"})
-
-    def do_POST(self):  # noqa: N802
-        u = urllib.parse.urlparse(self.path)
-        path = u.path
-        length = int(self.headers.get("Content-Length") or 0)
-        raw = self.rfile.read(length) if length else b""
-        try:
-            body = json.loads(raw) if raw else {}
-        except Exception:
-            body = {"_raw": raw.decode(errors="replace")}
-
-        if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
-            append_post(body)
-            # Echo back something status-shaped — script only checks HTTP code.
-            return self._json(
-                201,
-                {
-                    "context": body.get("context"),
-                    "state": body.get("state"),
-                    "created_at": now_iso(),
-                },
-            )
-
-        return self._json(404, {"path": path, "msg": "fixture: no route"})
-
-
-def main():
-    port = int(sys.argv[1])
-    srv = http.server.ThreadingHTTPServer(("127.0.0.1", port), Handler)
-    srv.serve_forever()
-
-
-if __name__ == "__main__":
-    main()
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+# test_audit_force_merge.sh — regression lock for audit-force-merge fail-closed
+# behavior. Verifies every schema validation path via direct jq filter tests.
+#
+# Usage: bash test_audit_force_merge.sh
+
+set -euo pipefail
+
+fail() { echo "FAIL: $*" >&2; exit 1; }
+pass() { echo "PASS: $*"; }
+
+[ -x "$(command -v jq)" ] || { echo "SKIP: jq not on PATH"; exit 0; }
+
+HEAD_SHA="deadbeef00000000000000000000000000000000"
+
+# The schema validation jq expression from audit-force-merge.sh.
+validate_pr_schema() {
+  jq -r '
+    (.merged | type == "boolean") and
+    (.merge_commit_sha | type == "string") and
+    (.merged_by | type == "object") and (.merged_by.login | type == "string") and
+    (.base | type == "object") and (.base.ref | type == "string") and
+    (.head | type == "object") and (.head.sha | type == "string")
+  '
+}
+
+validate_statuses_type() {
+  jq -r '(.statuses | type) == "array"'
+}
+
+# T1 — valid PR payload → true
+T1=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
+[ "$T1" = "true" ] || fail "T1: valid payload should pass schema"
+pass "T1: valid payload passes schema"
+
+# T2 — merged=false (valid types) → true (schema is about types, not values)
+T2=$(echo '{"merged":false,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
+[ "$T2" = "true" ] || fail "T2: merged=false with valid types should pass schema"
+pass "T2: merged=false with valid types passes schema"
+
+# T3 — missing merged field → false
+T3=$(echo '{"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
+[ "$T3" = "false" ] || fail "T3: missing merged should fail schema"
+pass "T3: missing merged fails schema"
+
+# T4 — merged is string "true" instead of boolean → false
+T4=$(echo '{"merged":"true","merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
+[ "$T4" = "false" ] || fail "T4: merged as string should fail schema"
+pass "T4: merged as string fails schema"
+
+# T5 — merge_commit_sha is null → false
+T5=$(echo '{"merged":true,"merge_commit_sha":null,"merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
+[ "$T5" = "false" ] || fail "T5: null merge_commit_sha should fail schema"
+pass "T5: null merge_commit_sha fails schema"
+
+# T6 — merged_by is null → false
+T6=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":null,"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
+[ "$T6" = "false" ] || fail "T6: null merged_by should fail schema"
+pass "T6: null merged_by fails schema"
+
+# T7 — base.ref is number → false
+T7=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":123},"head":{"sha":"def"}}' | validate_pr_schema)
+[ "$T7" = "false" ] || fail "T7: numeric base.ref should fail schema"
+pass "T7: numeric base.ref fails schema"
+
+# T8 — head is missing → false
+T8=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"}}' | validate_pr_schema)
+[ "$T8" = "false" ] || fail "T8: missing head should fail schema"
+pass "T8: missing head fails schema"
+
+# T9 — statuses missing → false
+T9=$(echo '{}' | validate_statuses_type)
+[ "$T9" = "false" ] || fail "T9: missing statuses should fail type check"
+pass "T9: missing statuses fails type check"
+
+# T10 — statuses is string → false
+T10=$(echo '{"statuses":"unexpected"}' | validate_statuses_type)
+[ "$T10" = "false" ] || fail "T10: string statuses should fail type check"
+pass "T10: string statuses fails type check"
+
+# T11 — statuses is null → false
+T11=$(echo '{"statuses":null}' | validate_statuses_type)
+[ "$T11" = "false" ] || fail "T11: null statuses should fail type check"
+pass "T11: null statuses fails type check"
+
+# T12 — statuses is array → true
+T12=$(echo '{"statuses":[{"context":"c1","status":"success"}]}' | validate_statuses_type)
+[ "$T12" = "true" ] || fail "T12: array statuses should pass type check"
+pass "T12: array statuses passes type check"
+
+# T13 — empty array statuses → true
+T13=$(echo '{"statuses":[]}' | validate_statuses_type)
+[ "$T13" = "true" ] || fail "T13: empty array statuses should pass type check"
+pass "T13: empty array statuses passes type check"
+
+# T14-T16: REQUIRED_CHECKS_JSON branch entry validation
+validate_required_checks_json() {
+  local branch="$1"
+  local json="$2"
+  echo "$json" | jq -r --arg branch "$branch" 'has($branch) and (.[$branch] | type == "array")'
+}
+
+# T14 — branch exists and is array → true
+T14=$(validate_required_checks_json "main" '{"main":["CI / all-required"]}')
+[ "$T14" = "true" ] || fail "T14: existing array branch should pass"
+pass "T14: existing array branch passes"
+
+# T15 — branch missing → false
+T15=$(validate_required_checks_json "staging" '{"main":["CI / all-required"]}')
+[ "$T15" = "false" ] || fail "T15: missing branch should fail"
+pass "T15: missing branch fails"
+
+# T16 — branch entry is string instead of array → false
+T16=$(validate_required_checks_json "main" '{"main":"CI / all-required"}')
+[ "$T16" = "false" ] || fail "T16: string branch entry should fail"
+pass "T16: string branch entry fails"
+
+echo
+echo "ALL AUDIT-FORCE-MERGE CHECKS PASSED"
@@ -107,6 +107,36 @@ def test_required_checks_env_json_malformed_fails():
        raise AssertionError("expected SystemExit(3)")


+def test_required_checks_env_json_non_string_item_fails():
+    doc = _make_audit_doc_json({"main": ["ctx-a", 123, "ctx-b"]})
+    try:
+        drift.required_checks_env(doc, "main")
+    except SystemExit as exc:
+        assert exc.code == 3
+    else:
+        raise AssertionError("expected SystemExit(3)")
+
+
+def test_required_checks_env_json_empty_string_item_fails():
+    doc = _make_audit_doc_json({"main": ["ctx-a", "   ", "ctx-b"]})
+    try:
+        drift.required_checks_env(doc, "main")
+    except SystemExit as exc:
+        assert exc.code == 3
+    else:
+        raise AssertionError("expected SystemExit(3)")
+
+
+def test_required_checks_env_json_duplicate_context_fails():
+    doc = _make_audit_doc_json({"main": ["ctx-a", "ctx-b", "ctx-a"]})
+    try:
+        drift.required_checks_env(doc, "main")
+    except SystemExit as exc:
+        assert exc.code == 3
+    else:
+        raise AssertionError("expected SystemExit(3)")
+
+
 # ---------------------------------------------------------------------------
 # sentinel_needs
 # ---------------------------------------------------------------------------
@@ -2,6 +2,8 @@ import importlib.util
 import sys
 from pathlib import Path

+import pytest
+
 SCRIPT = Path(__file__).resolve().parents[1] / "gitea-merge-queue.py"
 spec = importlib.util.spec_from_file_location("gitea_merge_queue", SCRIPT)
 mq = importlib.util.module_from_spec(spec)
@@ -44,6 +46,34 @@ def test_required_contexts_green_rejects_missing_and_pending():
    ]


+def test_required_contexts_green_rejects_volume_skipped():
+    """volume-skipped pending is a partial view, not a genuine soft-fail.
+
+    Per sop-checklist.py:1179-1187, volume_skipped posts pending with a
+    '[volume-skipped]' prefix. The merge queue must NOT treat this as an
+    acceptable soft-fail — the gate did not finish evaluating.
+    """
+    latest = mq.latest_statuses_by_context([
+        {"context": "CI / all-required (pull_request)", "status": "success"},
+        {
+            "context": "sop-checklist / all-items-acked (pull_request)",
+            "status": "pending",
+            "description": "[volume-skipped] comment-cap=1000 hit; please file ...",
+        },
+    ])
+
+    ok, missing_or_bad = mq.required_contexts_green(
+        latest,
+        [
+            "CI / all-required (pull_request)",
+            "sop-checklist / all-items-acked (pull_request)",
+        ],
+    )
+
+    assert ok is False
+    assert "sop-checklist / all-items-acked (pull_request)=pending" in missing_or_bad
+
+
 def test_choose_next_pr_sorts_by_queue_label_timestamp_then_number():
    issues = [
        {
@@ -83,7 +113,13 @@ def test_pr_needs_update_when_base_sha_absent_from_commits():


 def _ready_kwargs(**overrides):
-    """Default kwargs for a fully-ready merge; override per test."""
+    """Default kwargs for a fully-ready merge; override per test.
+
+    Includes the uniform governance checks (qa-review, security-review,
+    sop-checklist) as required contexts and green statuses, matching the
+    behaviour of process_once which merges GOVERNANCE_REQUIRED_CONTEXTS
+    with branch-protection contexts.
+    """
    base = dict(
        main_status={
            "state": "success",
@@ -91,9 +127,19 @@ def _ready_kwargs(**overrides):
        },
        pr_status={
            "state": "success",
-            "statuses": [{"context": "CI / all-required (pull_request)", "status": "success"}],
+            "statuses": [
+                {"context": "CI / all-required (pull_request)", "status": "success"},
+                {"context": "qa-review / approved (pull_request)", "status": "success"},
+                {"context": "security-review / approved (pull_request)", "status": "success"},
+                {"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
+            ],
        },
-        required_contexts=["CI / all-required (pull_request)"],
+        required_contexts=[
+            "CI / all-required (pull_request)",
+            "qa-review / approved (pull_request)",
+            "security-review / approved (pull_request)",
+            "sop-checklist / all-items-acked (pull_request)",
+        ],
        required_approvals=2,
        approvers={"agent-reviewer-cr2", "agent-researcher"},
        request_changes=[],
@@ -112,13 +158,72 @@ def test_merge_decision_requires_main_green_pr_green_and_current_base():
    assert decision.force is False  # no non-required reds present


-def test_merge_decision_updates_stale_pr_before_merge():
-    decision = mq.evaluate_merge_readiness(**_ready_kwargs(pr_has_current_base=False))
+def test_behind_main_but_mergeable_pr_merges_directly():
+    """§SOP-22 (#2358): a behind-main but CONFLICT-FREE PR (mergeable is True)
+    merges DIRECTLY — no update step. Branch protection does not require strict
+    up-to-date, and calling /update would dismiss the genuine approvals
+    (dismiss_stale_approvals), forcing re-review every tick (the throughput
+    bottleneck). This replaces the old update-before-merge behavior."""
+    decision = mq.evaluate_merge_readiness(
+        **_ready_kwargs(pr_has_current_base=False, mergeable=True)
+    )
+
+    assert decision.ready is True
+    assert decision.action == "merge"
+
+
+def test_behind_main_and_not_mergeable_pr_updates():
+    """The /update path is reached ONLY when the PR is NOT mergeable AND its head
+    lacks current main — refreshing the branch may resolve a behind-main
+    non-conflict; a real conflict 409s and is held (#2352)."""
+    decision = mq.evaluate_merge_readiness(
+        **_ready_kwargs(pr_has_current_base=False, mergeable=False)
+    )

    assert decision.ready is False
    assert decision.action == "update"


+def test_current_base_but_not_mergeable_pr_waits():
+    """Up-to-date with main yet Gitea reports not-mergeable → genuine conflict
+    against current main (or still computing). The queue cannot act: WAIT,
+    never update (update would not help) and never merge (fail-closed)."""
+    decision = mq.evaluate_merge_readiness(
+        **_ready_kwargs(pr_has_current_base=True, mergeable=False)
+    )
+
+    assert decision.ready is False
+    assert decision.action == "wait"
+    assert "not mergeable" in decision.reason
+
+
+def test_behind_main_and_mergeable_none_waits_not_update():
+    """§SOP-22 (CR2 #2374) — the churn-residual fix. A BEHIND-MAIN PR whose
+    mergeability Gitea is STILL COMPUTING (mergeable is None) must WAIT, NOT take
+    the /update path. The old code collapsed None→False, so a behind-main +
+    None PR returned action="update" → /pulls/{n}/update → dismiss_stale_approvals
+    → the exact rebase-churn this change eliminates, fired during the compute
+    window. None and False are now DISTINCT: None waits, False updates."""
+    decision = mq.evaluate_merge_readiness(
+        **_ready_kwargs(pr_has_current_base=False, mergeable=None)
+    )
+
+    assert decision.ready is False
+    assert decision.action == "wait"  # NOT "update" — no churn during compute
+    assert "computed" in decision.reason
+
+
+def test_current_base_and_mergeable_none_waits():
+    """Up-to-date with main + mergeable None (still computing) → WAIT (unchanged
+    fail-closed; just confirming None is never merged regardless of base)."""
+    decision = mq.evaluate_merge_readiness(
+        **_ready_kwargs(pr_has_current_base=True, mergeable=None)
+    )
+
+    assert decision.ready is False
+    assert decision.action == "wait"
+
+
 def test_MergePermissionError_inherits_from_ApiError():
    assert issubclass(mq.MergePermissionError, mq.ApiError)

@@ -209,16 +314,35 @@ def test_merge_blocked_when_insufficient_genuine_approvals():
    assert "insufficient genuine approvals" in decision.reason


-def test_non_required_red_does_not_block_merge():
-    # Required (CI) green; non-required governance reds present → still merge,
-    # and force is set so force_merge bypasses ONLY those non-required reds.
+def test_governance_red_blocks_merge():
+    # Uniform gate: qa-review, security-review, sop-checklist are ALWAYS
+    # required. If any of them fail/pending, the PR is blocked.
    pr_status = {
-        "state": "failure",  # combined polluted by non-required reds
+        "state": "failure",
        "statuses": [
            {"context": "CI / all-required (pull_request)", "status": "success"},
            {"context": "qa-review / approved (pull_request)", "status": "failure"},
            {"context": "security-review / approved (pull_request)", "status": "pending"},
-            {"context": "sop-tier-check / tier-check (pull_request)", "status": "failure"},
+            {"context": "sop-checklist / all-items-acked (pull_request)", "status": "failure"},
+            {"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
+        ],
+    }
+    decision = mq.evaluate_merge_readiness(**_ready_kwargs(pr_status=pr_status))
+    assert decision.ready is False
+    assert decision.action == "wait"
+    assert "required contexts not green" in decision.reason
+
+
+def test_non_required_advisory_red_does_not_block_merge():
+    # Governance checks are green; only advisory non-required reds (Staging SaaS)
+    # are present → PR is still mergeable with force_merge bypassing the advisory.
+    pr_status = {
+        "state": "failure",  # combined polluted by advisory non-required reds
+        "statuses": [
+            {"context": "CI / all-required (pull_request)", "status": "success"},
+            {"context": "qa-review / approved (pull_request)", "status": "success"},
+            {"context": "security-review / approved (pull_request)", "status": "success"},
+            {"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
            {"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
        ],
    }
@@ -322,8 +446,14 @@ def test_process_once_holds_pr_on_permanent_merge_error(monkeypatch):
    monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)

    def fake_combined(sha):
-        ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
-        return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
+        if sha == main_sha:
+            return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
+        return {"state": "success", "statuses": [
+            {"context": "CI / all-required (pull_request)", "status": "success"},
+            {"context": "qa-review / approved (pull_request)", "status": "success"},
+            {"context": "security-review / approved (pull_request)", "status": "success"},
+            {"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
+        ]}
    monkeypatch.setattr(mq, "get_combined_status", fake_combined)

    monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: [
@@ -389,8 +519,14 @@ def _fully_ready_process_once_monkeypatch(monkeypatch, mergeable, calls):
    monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)

    def fake_combined(sha):
-        ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
-        return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
+        if sha == main_sha:
+            return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
+        return {"state": "success", "statuses": [
+            {"context": "CI / all-required (pull_request)", "status": "success"},
+            {"context": "qa-review / approved (pull_request)", "status": "success"},
+            {"context": "security-review / approved (pull_request)", "status": "success"},
+            {"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
+        ]}
    monkeypatch.setattr(mq, "get_combined_status", fake_combined)

    monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: [
@@ -475,6 +611,131 @@ def test_process_once_merges_when_mergeable_is_true(monkeypatch):
    assert calls["hold_label"] is None


+def test_process_once_behind_main_mergeable_none_waits_no_update(monkeypatch):
+    """§SOP-22 (CR2 #2374) — end-to-end churn-residual regression. A BEHIND-MAIN
+    PR (commits do NOT contain main_sha) whose mergeability Gitea is STILL
+    COMPUTING (mergeable=None) must WAIT: process_once returns 0 and NEVER calls
+    update_pull (which dismisses genuine approvals via dismiss_stale_approvals)
+    NOR merge_pull NOR hold. The old None→False collapse routed this exact case
+    into the /update path → approval-dismissing rebase churn during the compute
+    window. This proves the durable churn elimination: no update, approvals
+    preserved, re-checked next tick."""
+    calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
+    _fully_ready_process_once_monkeypatch(monkeypatch, mergeable=None, calls=calls)
+    # Make the head BEHIND main: commits do NOT contain main_sha. This is the
+    # case the bug missed (the prior None test had current base, masking it).
+    behind_head = "a" * 40
+    monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": behind_head}])
+
+    rc = mq.process_once(dry_run=False)
+
+    assert rc == 0
+    assert calls["updated"] is False  # NO /update → approvals NOT dismissed
+    assert calls["merge_attempts"] == 0  # never merge on an unknown
+    assert calls["hold_label"] is None  # transient → not held, retried next tick
+
+
+# --------------------------------------------------------------------------
+# §SOP-22: DIRECT-MERGE throughput fix (#2358). A conflict-free 2-genuine PR
+# merges WITHOUT a pre-merge /update call, so its approvals are NOT dismissed by
+# dismiss_stale_approvals. The merge bar (2-genuine-on-current-head +
+# BP-required green + mergeable + no RC + opt-out) is UNCHANGED; only the
+# unnecessary update-before-merge churn is removed. The /update path survives
+# for the genuine case it is needed (not-mergeable + behind-main), where a real
+# conflict 409s and is held per #2352. mergeable=None stays fail-closed.
+# --------------------------------------------------------------------------
+
+
+def test_process_once_merges_conflict_free_pr_without_update(monkeypatch):
+    """§SOP-22(a) — the core throughput fix. A conflict-free, fully-approved PR
+    merges WITHOUT update_pull ever being called. The old behavior called
+    /update first whenever the head lacked current main, which dismissed the 2
+    genuine approvals (dismiss_stale_approvals) and forced re-review every tick.
+    Assert update_pull is NOT invoked and merge_pull IS invoked."""
+    calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
+    _fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
+    # Make the head BEHIND main: commits do NOT contain main_sha. Under the old
+    # logic this alone forced an update_pull; under the fix it merges directly.
+    head_sha = "a" * 40
+    monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": head_sha}])
+
+    rc = mq.process_once(dry_run=False)
+
+    assert rc == 0
+    assert calls["merge_attempts"] == 1  # merged directly
+    assert calls["updated"] is False  # NO update_pull → approvals NOT dismissed
+    assert calls["hold_label"] is None
+
+
+def test_process_once_behind_main_conflict_free_merges_directly(monkeypatch):
+    """§SOP-22(b) — explicit behind-main + conflict-free case: it still merges
+    directly (branch protection does not require strict up-to-date)."""
+    calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
+    _fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
+    behind_head = "a" * 40
+    monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": behind_head}])
+
+    rc = mq.process_once(dry_run=False)
+
+    assert rc == 0
+    assert calls["merge_attempts"] == 1
+    assert calls["updated"] is False
+
+
+def test_process_once_pauses_when_main_not_green_no_direct_merge(monkeypatch):
+    """§SOP-22 backstop — the serialized safety that makes direct-merge safe:
+    when main's required push contexts are NOT green (e.g. a prior direct merge
+    introduced a semantic main-break caught by post-merge main CI), the queue
+    PAUSES — it does NOT merge the next PR onto an unverified/red main."""
+    calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
+    _fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
+    main_sha = "b" * 40
+
+    def red_main_combined(sha):
+        if sha == main_sha:
+            return {"state": "failure",
+                    "statuses": [{"context": "CI / all-required (push)", "status": "failure"}]}
+        return {"state": "success",
+                "statuses": [{"context": "CI / all-required (pull_request)", "status": "success"}]}
+    monkeypatch.setattr(mq, "get_combined_status", red_main_combined)
+
+    rc = mq.process_once(dry_run=False)
+
+    assert rc == 0
+    assert calls["merge_attempts"] == 0  # paused — no merge onto red main
+    assert calls["updated"] is False
+
+
+def test_direct_merge_bar_unchanged_behind_main(monkeypatch):
+    """§SOP-22(d) — the merge bar is UNCHANGED on the new direct-merge path. A
+    behind-main + conflict-free PR is still rejected (no merge) when ANY gate
+    fails: insufficient genuine approvals, red required context, open
+    REQUEST_CHANGES, or opt-out label. Direct-merge removes the update churn, it
+    does NOT weaken the bar — fail-closed on every gate."""
+    head_sha = "a" * 40
+    behind_main = dict(pr_has_current_base=False, mergeable=True)
+
+    # <2 genuine approvals → wait, not merge.
+    d = mq.evaluate_merge_readiness(
+        **_ready_kwargs(approvers={"agent-researcher"}, **behind_main)
+    )
+    assert d.action == "wait" and d.ready is False
+
+    # Red required context → wait, not merge.
+    red_required = {"state": "failure", "statuses": [
+        {"context": "CI / all-required (pull_request)", "status": "failure"}]}
+    d = mq.evaluate_merge_readiness(
+        **_ready_kwargs(pr_status=red_required, **behind_main)
+    )
+    assert d.action == "wait" and d.ready is False
+
+    # Open REQUEST_CHANGES on current head → wait, not merge.
+    d = mq.evaluate_merge_readiness(
+        **_ready_kwargs(request_changes=["agent-reviewer-cr2"], **behind_main)
+    )
+    assert d.action == "wait" and d.ready is False
+
+
 # --------------------------------------------------------------------------
 # Fix 3: status fetch is fail-closed (failed fetch != green)
 # --------------------------------------------------------------------------
@@ -532,6 +793,61 @@ def test_status_fetch_failure_is_fail_closed(monkeypatch):
    assert merged["called"] is False


+# --------------------------------------------------------------------------
+# Pagination: api_paginated loops pages and is fail-closed on page errors
+# --------------------------------------------------------------------------
+
+def test_api_paginated_loops_pages_until_partial(monkeypatch):
+    """api_paginated fetches all pages and stops when a page is < page_size."""
+    calls = []
+
+    def fake_api(method, path, *, query=None, **kw):
+        page = int((query or {}).get("page", "1"))
+        limit = int((query or {}).get("limit", "50"))
+        calls.append((page, limit))
+        if page == 1:
+            return 200, [{"number": 1}, {"number": 2}]
+        if page == 2:
+            return 200, [{"number": 3}]
+        return 200, []
+
+    monkeypatch.setattr(mq, "api", fake_api)
+    results = mq.api_paginated("GET", "/repos/o/r/issues", page_size=2)
+    assert len(results) == 3
+    assert results[0]["number"] == 1
+    assert results[1]["number"] == 2
+    assert results[2]["number"] == 3
+    assert calls == [(1, 2), (2, 2)]
+
+
+def test_api_paginated_raises_on_non_list(monkeypatch):
+    """A page that returns a dict instead of list is an error."""
+    def fake_api(method, path, *, query=None, **kw):
+        return 200, {"message": "not found"}
+
+    monkeypatch.setattr(mq, "api", fake_api)
+    with pytest.raises(mq.ApiError):
+        mq.api_paginated("GET", "/repos/o/r/issues")
+
+
+def test_get_combined_status_propagates_paginated_statuses_error(monkeypatch):
+    """If the paginated /statuses enrichment raises, the error propagates
+    (fail-closed — we do NOT silently fall back to an incomplete status set)."""
+    monkeypatch.setattr(mq, "OWNER", "o")
+    monkeypatch.setattr(mq, "NAME", "r")
+
+    def fake_api(method, path, *, query=None, **kw):
+        if path.endswith("/status"):
+            return 200, {"state": "success", "statuses": [{"context": "c1", "status": "success", "id": 1}]}
+        if path.endswith("/statuses"):
+            raise mq.ApiError("GET /statuses -> HTTP 502")
+        raise mq.ApiError(f"unexpected {path}")
+
+    monkeypatch.setattr(mq, "api", fake_api)
+    with pytest.raises(mq.ApiError, match="GET /statuses"):
+        mq.get_combined_status("a" * 40)
+
+
 def test_process_once_holds_tick_when_branch_protection_unavailable(monkeypatch):
    """BP enumeration failure → HOLD the whole tick (no merge, rc 0)."""
    merged = {"called": False}
@@ -614,20 +930,30 @@ def _stale_pr_update_409_monkeypatch(monkeypatch, queued_issues, calls):
    monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)

    def fake_combined(sha):
-        ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
-        return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
+        if sha == main_sha:
+            return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
+        return {"state": "success", "statuses": [
+            {"context": "CI / all-required (pull_request)", "status": "success"},
+            {"context": "qa-review / approved (pull_request)", "status": "success"},
+            {"context": "security-review / approved (pull_request)", "status": "success"},
+            {"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
+        ]}
    monkeypatch.setattr(mq, "get_combined_status", fake_combined)

    # Scan-loop process_once enumerates candidates via list_candidate_issues.
    monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: queued_issues)
    monkeypatch.setattr(mq, "get_pull", lambda n: {
-        "state": "open", "number": n, "mergeable": True,
+        "state": "open", "number": n, "mergeable": False,
        "base": {"ref": "main", "repo_id": 1},
        "head": {"sha": head_sha, "repo_id": 1},
        "labels": [{"name": "merge-queue"}],
    })
-    # NOTE: commits do NOT contain main_sha → pr_has_current_base is False →
-    # decision.action == "update".
+    # NOTE: mergeable is False (real conflict) AND commits do NOT contain
+    # main_sha → pr_has_current_base is False → decision.action == "update".
+    # Under the #2358 direct-merge fix the update path is reached ONLY when the
+    # PR is NOT mergeable; a mergeable=True behind-main PR would merge directly,
+    # so this fixture sets mergeable=False to exercise the #2352 409-on-update
+    # hold path.
    monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": head_sha}])
    monkeypatch.setattr(mq, "get_pull_reviews", lambda n: [
        {"state": "APPROVED", "user": {"login": "agent-researcher"},
@@ -879,8 +1205,16 @@ def _wire_ready_process_once(monkeypatch, *, issues, pr_payload, calls):
    monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)

    def fake_combined(sha):
-        ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
-        return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
+        if sha == main_sha:
+            return {"state": "success", "statuses": [
+                {"context": "CI / all-required (push)", "status": "success"},
+            ]}
+        return {"state": "success", "statuses": [
+            {"context": "CI / all-required (pull_request)", "status": "success"},
+            {"context": "qa-review / approved (pull_request)", "status": "success"},
+            {"context": "security-review / approved (pull_request)", "status": "success"},
+            {"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
+        ]}
    monkeypatch.setattr(mq, "get_combined_status", fake_combined)
    monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: issues)
    monkeypatch.setattr(mq, "get_pull", lambda n: dict(pr_payload, number=n))
@@ -1061,8 +1395,14 @@ def _wire_multi_candidate_process_once(monkeypatch, *, issues, pulls, reviews, c
    monkeypatch.setattr(mq, "get_branch_head", lambda branch: MAIN_SHA)

    def fake_combined(sha):
-        ctx = "CI / all-required (push)" if sha == MAIN_SHA else "CI / all-required (pull_request)"
-        return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
+        if sha == MAIN_SHA:
+            return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
+        return {"state": "success", "statuses": [
+            {"context": "CI / all-required (pull_request)", "status": "success"},
+            {"context": "qa-review / approved (pull_request)", "status": "success"},
+            {"context": "security-review / approved (pull_request)", "status": "success"},
+            {"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
+        ]}
    monkeypatch.setattr(mq, "get_combined_status", fake_combined)

    monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: issues)
@@ -1194,7 +1534,12 @@ def test_hol_unready_red_required_ci_is_skipped_for_ready_pr(monkeypatch):
                    "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
        state = "failure" if sha == red_head else "success"
        return {"state": state,
-                "statuses": [{"context": "CI / all-required (pull_request)", "status": state}]}
+                "statuses": [
+                    {"context": "CI / all-required (pull_request)", "status": state},
+                    {"context": "qa-review / approved (pull_request)", "status": "success"},
+                    {"context": "security-review / approved (pull_request)", "status": "success"},
+                    {"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
+                ]}
    monkeypatch.setattr(mq, "get_combined_status", fake_combined)

    rc = mq.process_once(dry_run=False)
@@ -1289,3 +1634,126 @@ def test_process_once_defensive_skip_when_pull_payload_opted_out(monkeypatch):

    assert rc == 0
    assert calls["merged"] is None
+
+
+# ---------------------------------------------------------------------------
+# readiness-enumeration + post-batch summary
+# ---------------------------------------------------------------------------
+
+def test_enumerate_readiness_evaluates_all_candidates(monkeypatch):
+    """enumerate_readiness returns every candidate's state, not stopping at
+    the first actionable one."""
+    old_head, new_head = "a" * 40, "c" * 40
+    _wire_multi_candidate_process_once(
+        monkeypatch,
+        issues=[
+            _issue(500, labels=[], created="2026-06-01T01:00:00Z"),
+            _issue(501, labels=[], created="2026-06-01T02:00:00Z"),
+        ],
+        pulls={
+            500: {"state": "open", "mergeable": False, "draft": False,
+                  "base": {"ref": "main", "repo_id": 1},
+                  "head": {"sha": old_head, "repo_id": 1}, "labels": []},
+            501: {"state": "open", "mergeable": True, "draft": False,
+                  "base": {"ref": "main", "repo_id": 1},
+                  "head": {"sha": new_head, "repo_id": 1}, "labels": []},
+        },
+        reviews={500: _two_approvals(old_head), 501: _two_approvals(new_head)},
+        calls={},
+    )
+
+    entries = mq.enumerate_readiness(dry_run=False)
+
+    assert len(entries) == 2
+    by_num = {e.pr_number: e for e in entries}
+    assert by_num[500].decision is not None
+    assert by_num[500].decision.ready is False
+    assert by_num[501].decision is not None
+    assert by_num[501].decision.ready is True
+
+
+def test_enumerate_readiness_includes_ineligible_pr(monkeypatch):
+    """enumerate_readiness marks fork / wrong-base PRs as ineligible
+    (decision=None) while still evaluating the rest."""
+    head = "a" * 40
+    _wire_multi_candidate_process_once(
+        monkeypatch,
+        issues=[
+            _issue(600, labels=[], created="2026-06-01T01:00:00Z"),
+            _issue(601, labels=[], created="2026-06-01T02:00:00Z"),
+        ],
+        pulls={
+            600: {"state": "open", "mergeable": True, "draft": False,
+                  "base": {"ref": "main", "repo_id": 1},
+                  "head": {"sha": head, "repo_id": 2}, "labels": []},  # fork
+            601: {"state": "open", "mergeable": True, "draft": False,
+                  "base": {"ref": "main", "repo_id": 1},
+                  "head": {"sha": head, "repo_id": 1}, "labels": []},
+        },
+        reviews={600: _two_approvals(head), 601: _two_approvals(head)},
+        calls={},
+    )
+
+    entries = mq.enumerate_readiness(dry_run=False)
+
+    by_num = {e.pr_number: e for e in entries}
+    assert by_num[600].decision is None
+    assert "not merge-eligible" in by_num[600].reason
+    assert by_num[601].decision is not None
+    assert by_num[601].decision.ready is True
+
+
+def test_enumerate_readiness_fail_closed_on_api_error(monkeypatch):
+    """If get_pull raises for one candidate, that candidate is recorded as
+    unverifiable; other candidates are still evaluated."""
+    head = "a" * 40
+    _wire_multi_candidate_process_once(
+        monkeypatch,
+        issues=[
+            _issue(700, labels=[], created="2026-06-01T01:00:00Z"),
+            _issue(701, labels=[], created="2026-06-01T02:00:00Z"),
+        ],
+        pulls={
+            700: {"state": "open", "mergeable": True, "draft": False,
+                  "base": {"ref": "main", "repo_id": 1},
+                  "head": {"sha": head, "repo_id": 1}, "labels": []},
+            701: {"state": "open", "mergeable": True, "draft": False,
+                  "base": {"ref": "main", "repo_id": 1},
+                  "head": {"sha": head, "repo_id": 1}, "labels": []},
+        },
+        reviews={700: _two_approvals(head), 701: _two_approvals(head)},
+        calls={},
+    )
+
+    original_get_pull = mq.get_pull
+    def failing_get_pull(n):
+        if n == 700:
+            raise mq.ApiError("simulated API failure")
+        return original_get_pull(n)
+    monkeypatch.setattr(mq, "get_pull", failing_get_pull)
+
+    entries = mq.enumerate_readiness(dry_run=False)
+
+    by_num = {e.pr_number: e for e in entries}
+    assert by_num[700].decision is None
+    assert "unverifiable" in by_num[700].reason
+    assert by_num[701].decision is not None
+    assert by_num[701].decision.ready is True
+
+
+def test_print_post_batch_summary_counts_correctly(capsys):
+    entries = [
+        mq.ReadinessEntry(pr_number=1, decision=mq.MergeDecision(True, "merge", "ready"), reason="ready"),
+        mq.ReadinessEntry(pr_number=2, decision=mq.MergeDecision(False, "wait", "CI red"), reason="CI red"),
+        mq.ReadinessEntry(pr_number=3, decision=None, reason="draft"),
+    ]
+    mq.print_post_batch_summary(entries)
+    captured = capsys.readouterr()
+    out = captured.out
+    assert "total_candidates=3" in out
+    assert "ready=1" in out
+    assert "waiting=1" in out
+    assert "ineligible/unverifiable=1" in out
+    assert "PR #1: state=ready" in out
+    assert "PR #2: state=waiting" in out
+    assert "PR #3: state=ineligible" in out
@@ -320,10 +320,10 @@ class TestVerifyFlip(unittest.TestCase):
        self.assertEqual(len(verdict["fail_runs"]), 1)
        self.assertEqual(verdict["fail_runs"][0]["status"], "failure")

-    def test_unreadable_log_warns_not_blocks(self):
-        # Acceptance test #5: log fetch 404 (None) → warn, not block.
-        # Status is `success`, log is None — we can't tell, so we warn
-        # and allow.
+    def test_unreadable_log_on_success_blocks(self):
+        # Fail-closed: log fetch 404 (None) on a success status is a
+        # potential Quirk #10 mask — we cannot verify it's genuine, so
+        # we block the flip rather than allowing it.
        with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=["sha1"]):
            with mock.patch.object(
                lpfc, "combined_status",
@@ -332,7 +332,8 @@ class TestVerifyFlip(unittest.TestCase):
                with mock.patch.object(lpfc, "fetch_log", return_value=None):
                    verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
        self.assertEqual(verdict["fail_runs"], [])
-        self.assertEqual(verdict["masked_runs"], [])
+        self.assertEqual(len(verdict["masked_runs"]), 1)
+        self.assertIn("log unavailable", verdict["masked_runs"][0]["samples"][0])
        self.assertTrue(any("log unavailable" in w for w in verdict["warnings"]))

    def test_unreadable_log_with_failure_status_still_blocks(self):
@@ -349,9 +350,9 @@ class TestVerifyFlip(unittest.TestCase):
        self.assertEqual(len(verdict["fail_runs"]), 1)
        self.assertIn("log unavailable", verdict["fail_runs"][0]["samples"][0])

-    def test_zero_runs_history_warns_allows(self):
-        # No commits with a matching context — newly added workflow.
-        # Allow with warning.
+    def test_zero_runs_history_blocks(self):
+        # No commits with a matching context — cannot verify the flip.
+        # Fail-closed: treat as masked rather than allowing.
        with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=["sha1", "sha2"]):
            with mock.patch.object(
                lpfc, "combined_status",
@@ -360,17 +361,32 @@ class TestVerifyFlip(unittest.TestCase):
                verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
        self.assertEqual(verdict["checked_commits"], 0)
        self.assertEqual(verdict["fail_runs"], [])
-        self.assertEqual(verdict["masked_runs"], [])
-        self.assertTrue(any("no runs of" in w for w in verdict["warnings"]))
+        self.assertEqual(len(verdict["masked_runs"]), 1)
+        self.assertIn("cannot verify flip", verdict["masked_runs"][0]["samples"][0])

-    def test_zero_commits_warns_allows(self):
-        # Empty branch (newly created repo, e.g.). Allow with warning.
+    def test_zero_commits_blocks(self):
+        # Empty branch (newly created repo, e.g.). Fail-closed: block.
        with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=[]):
            verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
        self.assertEqual(verdict["checked_commits"], 0)
        self.assertEqual(verdict["fail_runs"], [])
-        self.assertEqual(verdict["masked_runs"], [])
-        self.assertTrue(any("no recent commits" in w for w in verdict["warnings"]))
+        self.assertEqual(len(verdict["masked_runs"]), 1)
+        self.assertIn("cannot verify flip", verdict["masked_runs"][0]["samples"][0])
+
+    def test_combined_status_api_error_blocks(self):
+        # Fail-closed: combined_status ApiError means the check history is
+        # unreadable — we cannot verify the flip, so block as masked.
+        with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=["sha1"]):
+            with mock.patch.object(
+                lpfc, "combined_status",
+                side_effect=lpfc.ApiError("GET /statuses/sha → HTTP 500"),
+            ):
+                verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
+        self.assertEqual(verdict["checked_commits"], 0)
+        self.assertEqual(verdict["fail_runs"], [])
+        # One masked_run from the ApiError, one from zero checked_commits.
+        self.assertEqual(len(verdict["masked_runs"]), 2)
+        self.assertIn("API error", verdict["masked_runs"][0]["samples"][0])


 # --------------------------------------------------------------------------
@@ -17,7 +17,7 @@ wd.REPO = "molecule-ai/molecule-core"
 wd.OWNER = "molecule-ai"
 wd.NAME = "molecule-core"
 wd.WATCH_BRANCH = "main"
-wd.RED_LABEL = "tier:high"
+wd.RED_LABEL = "ci-bp-drift"
 wd.API = "https://git.example.com/api/v1"


@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+set -euo pipefail
+# Anti-regression gate for #2403: fail if any SOP tier artifact reappears.
+
+cd "$(dirname "$0")/../../.."
+
+fail=0
+
+# 1. Deleted workflow files must stay deleted
+for f in .gitea/workflows/sop-tier-check.yml .gitea/workflows/sop-tier-refire.yml; do
+  if [ -e "$f" ]; then
+    echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
+    fail=1
+  fi
+done
+
+# 2. Deleted script files must stay deleted
+for f in .gitea/scripts/sop-tier-check.sh .gitea/scripts/sop-tier-refire.sh; do
+  if [ -e "$f" ]; then
+    echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
+    fail=1
+  fi
+done
+
+# 3. No tier branching logic in gate_check.py
+if grep -qE '_get_pr_tier|TIER_AGENTS' tools/gate-check-v3/gate_check.py; then
+  echo "FAIL: tier branching reappeared in gate_check.py" >&2
+  fail=1
+fi
+
+# 4. No _is_tier_low_pending_ok in merge queue
+if grep -q '_is_tier_low_pending_ok' .gitea/scripts/gitea-merge-queue.py; then
+  echo "FAIL: tier soft-fail reappeared in gitea-merge-queue.py" >&2
+  fail=1
+fi
+
+# 5. No sop-tier-check context references in workflow YAML
+if grep -r 'sop-tier-check' .gitea/workflows/; then
+  echo "FAIL: sop-tier-check context reappeared in workflows" >&2
+  fail=1
+fi
+
+if [ "$fail" -eq 1 ]; then
+  echo "TIER_REGRESSION_DETECTED" >&2
+  exit 1
+fi
+
+echo "PASS: no tier regression detected"
@@ -11,7 +11,7 @@
 #   - compute_ack_state (self-ack rejected, team probe applied, revoke
 #     invalidates own prior ack, peer's ack survives unrevoked)
 #   - render_status (state + description format)
-#   - get_tier_mode (label-driven, default fallback)
+#   - is_high_risk (label-driven, default fallback)
 #   - load_config (default config parses cleanly with both PyYAML and
 #     the bundled minimal parser)
 #
@@ -432,37 +432,6 @@ class TestRenderStatus(unittest.TestCase):
        self.assertIn("body-unfilled", desc)


-# ---------------------------------------------------------------------------
-# get_tier_mode
-# ---------------------------------------------------------------------------
-
-
-class TestGetTierMode(unittest.TestCase):
-    def setUp(self):
-        self.cfg = sop.load_config(CONFIG_PATH)
-
-    def test_tier_high_is_hard(self):
-        pr = {"labels": [{"name": "tier:high"}, {"name": "area:ci"}]}
-        self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
-
-    def test_tier_medium_is_hard(self):
-        pr = {"labels": [{"name": "tier:medium"}]}
-        self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
-
-    def test_tier_low_is_soft(self):
-        pr = {"labels": [{"name": "tier:low"}]}
-        self.assertEqual(sop.get_tier_mode(pr, self.cfg), "soft")
-
-    def test_no_tier_label_defaults_to_hard(self):
-        # Per feedback_fix_root_not_symptom — never silently lower the bar.
-        pr = {"labels": [{"name": "area:ci"}]}
-        self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
-
-    def test_no_labels_defaults_to_hard(self):
-        self.assertEqual(sop.get_tier_mode({"labels": []}, self.cfg), "hard")
-        self.assertEqual(sop.get_tier_mode({}, self.cfg), "hard")
-
-
 # ---------------------------------------------------------------------------
 # load_config
 # ---------------------------------------------------------------------------
@@ -487,13 +456,6 @@ class TestLoadConfig(unittest.TestCase):
            },
        )

-    def test_default_config_tier_mode_shape(self):
-        cfg = sop.load_config(CONFIG_PATH)
-        self.assertEqual(cfg["tier_failure_mode"]["tier:high"], "hard")
-        self.assertEqual(cfg["tier_failure_mode"]["tier:medium"], "hard")
-        self.assertEqual(cfg["tier_failure_mode"]["tier:low"], "soft")
-        self.assertEqual(cfg["default_mode"], "hard")
-
    def test_each_item_has_required_fields(self):
        cfg = sop.load_config(CONFIG_PATH)
        for it in cfg["items"]:
@@ -627,7 +589,7 @@ class TestComputeNaState(unittest.TestCase):
 class TestIsHighRisk(unittest.TestCase):
    """The high-risk predicate decides which required_teams list applies.

-    Predicate: tier:high label OR any label in cfg.high_risk_labels.
+    Predicate: any label in cfg.high_risk_labels.
    """

    def setUp(self):
@@ -637,23 +599,8 @@ class TestIsHighRisk(unittest.TestCase):
        pr = {"labels": []}
        self.assertFalse(sop.is_high_risk(pr, self.cfg))

-    def test_tier_high_is_high_risk(self):
-        pr = {"labels": [{"name": "tier:high"}]}
-        self.assertTrue(sop.is_high_risk(pr, self.cfg))
-
-    def test_tier_low_is_default_class(self):
-        pr = {"labels": [{"name": "tier:low"}]}
-        self.assertFalse(sop.is_high_risk(pr, self.cfg))
-
-    def test_tier_medium_is_default_class(self):
-        # tier:medium alone is NOT high-risk (Option C — medium routes
-        # to the wider engineers OR-set).
-        pr = {"labels": [{"name": "tier:medium"}]}
-        self.assertFalse(sop.is_high_risk(pr, self.cfg))
-
    def test_area_security_label_is_high_risk(self):
-        pr = {"labels": [{"name": "tier:medium"}, {"name": "area:security"}]}
-        self.assertTrue(sop.is_high_risk(pr, self.cfg))
+        pr = {"labels": [{"name": "area:security"}]}

    def test_area_schema_label_is_high_risk(self):
        pr = {"labels": [{"name": "area:schema"}]}
@@ -668,7 +615,7 @@ class TestIsHighRisk(unittest.TestCase):
        self.assertTrue(sop.is_high_risk(pr, self.cfg))

    def test_area_gate_meta_label_is_high_risk(self):
-        # Gate-meta = changes to sop-checklist/sop-tier-check itself.
+        # Gate-meta = changes to sop-checklist/sop-checklist itself.
        pr = {"labels": [{"name": "area:gate-meta"}]}
        self.assertTrue(sop.is_high_risk(pr, self.cfg))

@@ -722,7 +669,7 @@ class TestRootCauseAckEligibilityWidened(unittest.TestCase):
    root-cause / no-backwards-compat for the default class.

    The dead-managers/ceo-persona-token gridlock is the symptom; the
-    root cause is that sop-checklist ignored tier-class. These tests
+    root cause is that sop-checklist ignored high-risk class. These tests
    pin the new wider-default behavior so it can't regress silently.
    """

@@ -793,7 +740,7 @@ class TestHighRiskClassUsesElevatedListInConfig(unittest.TestCase):

    def test_root_cause_high_risk_elevated_to_ceo_only(self):
        items = _items_by_slug()
-        # tier:high alone makes the PR high-risk → root-cause needs ceo.
+        # area:schema alone makes the PR high-risk → root-cause needs ceo.
        self.assertEqual(
            sop.resolve_required_teams(items["root-cause"], high_risk=True),
            ["ceo"],
@@ -1,272 +0,0 @@
-#!/usr/bin/env bash
-# Security regression test for the SOP tier-gate AUTHORIZATION bypass.
-#
-# Bug (fixed in fix/sop-tier-authz-no-org-fallback):
-#   sop-tier-check.sh probed team membership at /teams/{id}/members/{user}.
-#   If EVERY team probe failed (e.g. 403 — token lacks read:organization, or
-#   any visibility/flakiness gap), it FELL BACK to /orgs/{org}/members/{user}
-#   and credited that org member as a member of EVERY queried team. The
-#   evaluator then treated those synthetic memberships as real, so a plain
-#   NON-CEO org member satisfied tier:high (ceo). A visibility/auth gap became
-#   a real highest-tier authorization PASS — privilege escalation.
-#
-# Fix (fail-closed authorization):
-#   - The org-member ⇒ "member of all teams" fallback is REMOVED. Org
-#     membership is never credited as team membership.
-#   - A team probe that returns anything other than 200/204 (member) or 404
-#     (verified non-member) is a CANNOT-VERIFY condition: the gate fails loud
-#     (exit 1) with a cannot-verify status and never grants the tier.
-#
-# Method: this is a true end-to-end test. It prepends a fake `curl` to PATH
-# that serves canned Gitea API responses keyed by URL, then runs the REAL
-# sop-tier-check.sh. The fake exercises the genuine probe→credit→evaluate
-# path — no logic is re-implemented in the test.
-
-set -euo pipefail
-
-THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
-SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
-SCRIPT="$SCRIPT_DIR/sop-tier-check.sh"
-
-command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
-[ -f "$SCRIPT" ] || { echo "::error::sop-tier-check.sh not found at $SCRIPT — test must fail loudly if the script is absent"; exit 1; }
-
-# sop-tier-check.sh uses `declare -A` (associative arrays), which require
-# bash >= 4. CI runners (Ubuntu) ship bash 5; macOS ships 3.2. Resolve a
-# bash >= 4 to run the script under.
-pick_bash() {
-  local c
-  for c in bash /opt/homebrew/bin/bash /usr/local/bin/bash /bin/bash; do
-    local p; p="$(command -v "$c" 2>/dev/null || true)"
-    [ -n "$p" ] || continue
-    local maj; maj="$("$p" -c 'echo "${BASH_VERSINFO[0]}"' 2>/dev/null || echo 0)"
-    if [ "${maj:-0}" -ge 4 ]; then echo "$p"; return 0; fi
-  done
-  return 1
-}
-BASH4="$(pick_bash)" || { echo "::error::need bash >= 4 to run sop-tier-check.sh (associative arrays); none found"; exit 1; }
-echo "using bash: $BASH4 ($("$BASH4" -c 'echo $BASH_VERSION'))"
-
-PASS=0
-FAIL=0
-
-assert_eq() {
-  local label="$1" expected="$2" got="$3"
-  if [ "$expected" = "$got" ]; then
-    echo "  PASS  $label"
-    PASS=$((PASS + 1))
-  else
-    echo "  FAIL  $label"
-    echo "        expected: <$expected>"
-    echo "        got:      <$got>"
-    FAIL=$((FAIL + 1))
-  fi
-}
-
-assert_contains() {
-  local label="$1" haystack="$2" needle="$3"
-  if printf '%s' "$haystack" | grep -qF -- "$needle"; then
-    echo "  PASS  $label"
-    PASS=$((PASS + 1))
-  else
-    echo "  FAIL  $label (missing substring: <$needle>)"
-    FAIL=$((FAIL + 1))
-  fi
-}
-
-assert_not_contains() {
-  local label="$1" haystack="$2" needle="$3"
-  if printf '%s' "$haystack" | grep -qF -- "$needle"; then
-    echo "  FAIL  $label (unexpected substring present: <$needle>)"
-    FAIL=$((FAIL + 1))
-  else
-    echo "  PASS  $label"
-    PASS=$((PASS + 1))
-  fi
-}
-
-# ---------------------------------------------------------------------------
-# Fake-curl harness.
-#
-# The real script calls curl in two shapes:
-#   (a) body capture:   curl -sS -H AUTH URL                 -> prints JSON body
-#   (b) http-code:      curl -sS -o FILE -w '%{http_code}' -H AUTH URL
-#   (c) http-code only: curl -sS -o /dev/null -w '%{http_code}' -H AUTH URL
-#
-# Our fake reads the URL (last non-flag arg), looks up a response in fixture
-# files under $FIXDIR, and emits body and/or http-code accordingly.
-# ---------------------------------------------------------------------------
-
-make_harness() {
-  # $1 = scenario dir to populate with fixtures
-  local FIXDIR="$1"
-  local BIN="$FIXDIR/bin"
-  mkdir -p "$BIN"
-  cat > "$BIN/curl" <<'FAKE'
-#!/usr/bin/env bash
-# Fake curl for sop-tier-check authz tests. Looks up canned responses by URL.
-set -u
-FIXDIR="${SOP_TEST_FIXDIR:?SOP_TEST_FIXDIR unset}"
-
-url=""
-out=""
-want_code="no"
-prev=""
-for a in "$@"; do
-  case "$prev" in
-    -o) out="$a" ;;
-  esac
-  case "$a" in
-    http*://*) url="$a" ;;
-    '%{http_code}') want_code="yes" ;;
-  esac
-  # -w '%{http_code}' arrives as the value of the -w flag
-  if [ "$prev" = "-w" ] && [ "$a" = '%{http_code}' ]; then want_code="yes"; fi
-  prev="$a"
-done
-
-# Map URL -> fixture key (a filename-safe slug).
-# We only need the path after /api/v1.
-path="${url#*/api/v1}"
-slug="$(printf '%s' "$path" | tr '/?=&' '____')"
-
-body_file="$FIXDIR/body${slug}"
-code_file="$FIXDIR/code${slug}"
-
-# Emit body to -o target (or capture for stdout) when a body fixture exists.
-body=""
-if [ -f "$body_file" ]; then body="$(cat "$body_file")"; fi
-if [ -n "$out" ]; then
-  printf '%s' "$body" > "$out"
-else
-  printf '%s' "$body"
-fi
-
-# Emit http code when requested.
-if [ "$want_code" = "yes" ]; then
-  if [ -f "$code_file" ]; then
-    printf '%s' "$(cat "$code_file")"
-  else
-    printf '200'
-  fi
-fi
-exit 0
-FAKE
-  chmod +x "$BIN/curl"
-  echo "$BIN"
-}
-
-# Common fixtures shared by scenarios. $1 = FIXDIR, $2 = approver login,
-# $3 = tier label name (e.g. tier:high), $4 = teams JSON.
-seed_common() {
-  local FIXDIR="$1" approver="$2" tier="$3" teams_json="$4"
-  mkdir -p "$FIXDIR"
-  # /user -> whoami
-  printf '%s' '{"login":"sop-bot"}' > "$FIXDIR/body_user"
-  # PR head sha
-  printf '%s' '{"head":{"sha":"headsha1"}}' \
-    > "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42"
-  # labels
-  printf '%s' "[{\"name\":\"$tier\"}]" \
-    > "$FIXDIR/body_repos_molecule-ai_molecule-core_issues_42_labels"
-  # org teams list
-  printf '%s' "$teams_json" > "$FIXDIR/body_orgs_molecule-ai_teams"
-  printf '%s' '200' > "$FIXDIR/code_orgs_molecule-ai_teams"
-  # reviews: one APPROVED on current head by $approver
-  printf '%s' "[{\"state\":\"APPROVED\",\"commit_id\":\"headsha1\",\"user\":{\"login\":\"$approver\"}}]" \
-    > "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42_reviews"
-}
-
-run_script() {
-  # $1 = FIXDIR (must contain bin/curl). Returns combined stdout+stderr; sets RC.
-  local FIXDIR="$1"
-  local BIN="$FIXDIR/bin"
-  set +e
-  OUT=$(
-    SOP_TEST_FIXDIR="$FIXDIR" \
-    PATH="$BIN:$PATH" \
-    GITEA_TOKEN="faketoken" \
-    GITEA_HOST="git.moleculesai.app" \
-    REPO="molecule-ai/molecule-core" \
-    PR_NUMBER="42" \
-    PR_AUTHOR="pr-author" \
-    SOP_DEBUG="0" \
-    SOP_LEGACY_CHECK="0" \
-    "$BASH4" "$SCRIPT" 2>&1
-  )
-  RC=$?
-  set -e
-  printf '%s' "$OUT"
-  return $RC
-}
-
-TEAMS_JSON='[{"name":"ceo","id":10},{"name":"engineers","id":11},{"name":"managers","id":12}]'
-
-echo "=============================================================="
-echo "Scenario 1: tier:high, team probe 403 (cannot read), approver"
-echo "            is a plain org member but NOT in ceo team."
-echo "            EXPECT: tier NOT granted (fail-closed cannot-verify)."
-echo "=============================================================="
-S1="$(mktemp -d)"
-make_harness "$S1" >/dev/null
-seed_common "$S1" "org-only-bob" "tier:high" "$TEAMS_JSON"
-# Team membership probe for ceo (id=10) returns 403 — cannot read.
-printf '%s' '403' > "$S1/code_teams_10_members_org-only-bob"
-# The OLD bug path: org membership probe would 204 and synthetic-credit.
-printf '%s' '204' > "$S1/code_orgs_molecule-ai_members_org-only-bob"
-set +e
-OUT1="$(run_script "$S1")"; RC1=$?
-set -e
-echo "$OUT1" | sed 's/^/    /'
-echo "    (exit=$RC1)"
-assert_eq "S1 exit non-zero (tier NOT granted)" "1" "$([ "$RC1" -ne 0 ] && echo 1 || echo 0)"
-assert_not_contains "S1 did NOT print PASSED" "$OUT1" "sop-tier-check PASSED"
-assert_contains "S1 cannot-verify error surfaced" "$OUT1" "CANNOT VERIFY"
-assert_contains "S1 names the unreadable probe (403)" "$OUT1" "HTTP 403"
-rm -rf "$S1"
-
-echo
-echo "=============================================================="
-echo "Scenario 2: tier:high, genuine ceo team member (probe 204)."
-echo "            EXPECT: tier GRANTED."
-echo "=============================================================="
-S2="$(mktemp -d)"
-make_harness "$S2" >/dev/null
-seed_common "$S2" "real-ceo" "tier:high" "$TEAMS_JSON"
-printf '%s' '204' > "$S2/code_teams_10_members_real-ceo"   # ceo team: member
-set +e
-OUT2="$(run_script "$S2")"; RC2=$?
-set -e
-echo "$OUT2" | sed 's/^/    /'
-echo "    (exit=$RC2)"
-assert_eq "S2 exit zero (granted)" "0" "$RC2"
-assert_contains "S2 printed PASSED" "$OUT2" "sop-tier-check PASSED"
-rm -rf "$S2"
-
-echo
-echo "=============================================================="
-echo "Scenario 3: tier:high, approver is an org member but a VERIFIED"
-echo "            non-member of ceo (team probe 404). Org probe would"
-echo "            204 — must NEVER be synthetic-credited."
-echo "            EXPECT: tier NOT granted (clause FAIL), no fallback."
-echo "=============================================================="
-S3="$(mktemp -d)"
-make_harness "$S3" >/dev/null
-seed_common "$S3" "org-member-carol" "tier:high" "$TEAMS_JSON"
-printf '%s' '404' > "$S3/code_teams_10_members_org-member-carol"  # verified NOT in ceo
-printf '%s' '204' > "$S3/code_orgs_molecule-ai_members_org-member-carol" # org member (must be ignored)
-set +e
-OUT3="$(run_script "$S3")"; RC3=$?
-set -e
-echo "$OUT3" | sed 's/^/    /'
-echo "    (exit=$RC3)"
-assert_eq "S3 exit non-zero (tier NOT granted)" "1" "$([ "$RC3" -ne 0 ] && echo 1 || echo 0)"
-assert_not_contains "S3 did NOT print PASSED" "$OUT3" "sop-tier-check PASSED"
-assert_contains "S3 reported a real clause FAIL (not cannot-verify)" "$OUT3" "FAILED for tier:high"
-assert_not_contains "S3 did NOT cannot-verify (404 is a verified negative)" "$OUT3" "CANNOT VERIFY"
-rm -rf "$S3"
-
-echo
-echo "------"
-echo "PASS=$PASS FAIL=$FAIL"
-[ "$FAIL" -eq 0 ]
@@ -1,101 +0,0 @@
-#!/usr/bin/env bash
-# Regression test for #229 — sop-tier-check tier:low OR-clause splitter.
-#
-# Bug (PR #225 → still broken after PR #231):
-#   Line ~289 of sop-tier-check.sh used:
-#     _clause=$(echo "$_raw_clause" | tr -d '()' | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
-#   `tr -d '[:space:]'` strips the newlines that `tr ',' '\n'` just
-#   inserted, collapsing "engineers,managers,ceo" into a single token
-#   "engineersmanagersceo". The for-loop then iterates ONCE on a name
-#   that matches no team, so every tier:low PR fails:
-#     ::error::clause [engineers/managers/ceo]: FAIL — no approving
-#     reviewer belongs to any of these teamsengineersmanagersceo
-#   (note also: missing separators in the error string is bug #2 —
-#    `_clause_names` used "${var:+, }$x" which OVERWRITES per iteration).
-#
-# Fix shape (this PR):
-#   _no_parens=${_raw_clause//[()]/}
-#   _clause=${_no_parens//,/ }    # comma -> space, bash word-split iterates
-#   _clause_names="${_clause_names}${_clause_names:+, }${_t}"  # APPEND, not overwrite
-#
-# This test extracts the splitter logic and asserts it produces the right
-# token list for each of the three tier expressions live in the script.
-
-set -euo pipefail
-
-PASS=0
-FAIL=0
-
-assert_eq() {
-  local label="$1"
-  local expected="$2"
-  local got="$3"
-  if [ "$expected" = "$got" ]; then
-    echo "  PASS  $label"
-    PASS=$((PASS + 1))
-  else
-    echo "  FAIL  $label"
-    echo "        expected: <$expected>"
-    echo "        got:      <$got>"
-    FAIL=$((FAIL + 1))
-  fi
-}
-
-# ----- Splitter under test (mirrors the fixed sop-tier-check.sh block) -----
-split_clause() {
-  local raw="$1"
-  local no_parens=${raw//[()]/}
-  local clause=${no_parens//,/ }
-  local out=""
-  for _t in $clause; do
-    out="${out}${out:+|}$_t"
-  done
-  echo "$out"
-}
-
-echo "test: tier:low OR-clause splits to 3 tokens"
-assert_eq "tier:low" "engineers|managers|ceo" "$(split_clause "engineers,managers,ceo")"
-
-echo "test: tier:medium AND-expression — bash word-split on \$EXPR yields 5 tokens"
-EXPR="managers AND engineers AND qa???,security???"
-out=""
-for _raw in $EXPR; do
-  out="${out}${out:+ ; }$(split_clause "$_raw")"
-done
-assert_eq "tier:medium" "managers ; AND ; engineers ; AND ; qa???|security???" "$out"
-
-echo "test: tier:high single-team OR-clause"
-assert_eq "tier:high" "ceo" "$(split_clause "ceo")"
-
-echo "test: paren-wrapped OR-set unwraps + splits"
-assert_eq "paren OR" "managers|ceo" "$(split_clause "(managers,ceo)")"
-
-# ----- _clause_names accumulator (was overwriting per iteration) -----
-acc=""
-for t in engineers managers ceo; do
-  acc="${acc}${acc:+, }${t}"
-done
-assert_eq "_clause_names append" "engineers, managers, ceo" "$acc"
-
-# ----- _failed_clauses / _passed_clauses accumulator across raw clauses -----
-acc=""
-for c in clauseA clauseB clauseC; do
-  acc="${acc}${acc:+, }${c}"
-done
-assert_eq "_failed_clauses append" "clauseA, clauseB, clauseC" "$acc"
-
-# ----- End-to-end OR-gate: simulate APPROVER_TEAMS[core-lead]=' managers ' -----
-# The script's case pattern is *${_t}* with a space-padded value.
-APPROVER_TEAMS_VAL=" managers "
-matched=""
-for _t in $(split_clause "engineers,managers,ceo" | tr '|' ' '); do
-  case "$APPROVER_TEAMS_VAL" in
-    *${_t}*) matched="$_t"; break ;;
-  esac
-done
-assert_eq "OR-gate matches managers" "managers" "$matched"
-
-echo
-echo "------"
-echo "PASS=$PASS FAIL=$FAIL"
-[ "$FAIL" -eq 0 ]
@@ -1,66 +0,0 @@
-#!/usr/bin/env bash
-# Regression test for internal#816 — sop-tier-check must ignore APPROVED
-# reviews that were submitted against an old PR head SHA.
-#
-# Bug: the script collected approvers with
-#   jq '[.[] | select(.state=="APPROVED") | .user.login]'
-# without filtering on .commit_id == HEAD_SHA. After a PR head moved,
-# stale approvals looked valid to the tier gate.
-#
-# Fix: the jq filter now includes
-#   select(.state=="APPROVED" and .commit_id == $head_sha)
-# where $head_sha is the current PR head fetched from the API.
-
-set -euo pipefail
-
-# jq may not be on PATH in all environments (e.g. dev containers).
-PATH="/tmp/bin:$PATH"
-command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
-
-PASS=0
-FAIL=0
-
-assert_eq() {
-  local label="$1"
-  local expected="$2"
-  local got="$3"
-  if [ "$expected" = "$got" ]; then
-    echo "  PASS  $label"
-    PASS=$((PASS + 1))
-  else
-    echo "  FAIL  $label"
-    echo "        expected: <$expected>"
-    echo "        got:      <$got>"
-    FAIL=$((FAIL + 1))
-  fi
-}
-
-# Sample reviews matching the shape from Gitea API
-REVIEWS_JSON='[
-  {"state":"APPROVED","commit_id":"abc123","user":{"login":"bob"}},
-  {"state":"APPROVED","commit_id":"old456","user":{"login":"alice"}},
-  {"state":"COMMENT","commit_id":"abc123","user":{"login":"carol"}},
-  {"state":"APPROVED","commit_id":"abc123","user":{"login":"dave"}},
-  {"state":"REQUEST_CHANGES","commit_id":"abc123","user":{"login":"eve"}}
-]'
-
-echo "test: jq filter keeps only APPROVED on current head"
-GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "abc123" \
-  '[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
-assert_eq "current-head approvers" "bob dave" "$(echo "$GOT" | tr '\n' ' ' | sed 's/ $//')"
-
-echo "test: jq filter with all-stale reviews yields empty"
-GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "new789" \
-  '[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
-assert_eq "all-stale yields empty" "" "$GOT"
-
-echo "test: jq filter handles null commit_id gracefully"
-NULL_JSON='[{"state":"APPROVED","commit_id":null,"user":{"login":"mallory"}}]'
-GOT=$(echo "$NULL_JSON" | jq -r --arg head_sha "abc123" \
-  '[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
-assert_eq "null commit_id excluded" "" "$GOT"
-
-echo
-echo "------"
-echo "PASS=$PASS FAIL=$FAIL"
-[ "$FAIL" -eq 0 ]
@@ -1,304 +0,0 @@
-#!/usr/bin/env bash
-# Tests for sop-tier-refire.{yml,sh} — internal#292.
-#
-# Behavior matrix:
-#
-#   T1: PR open + APPROVED via tier:low → script invokes sop-tier-check
-#       and POSTs status=success.
-#   T2: PR open + missing tier label → sop-tier-check exits non-zero;
-#       refire still POSTs status=success, matching the canonical
-#       pull_request_target workflow's fail-open job conclusion.
-#   T3: PR open + tier:low but NO approving reviews → sop-tier-check
-#       exits non-zero; refire still POSTs status=success for the same reason.
-#   T4: PR CLOSED → refire exits 0 with no status POST (no-op on closed).
-#   T5: Rate-limit — recent status update within 30s → refire skips,
-#       no new POST.
-#   T6 (yaml-lint): workflow `if:` expression contains author_association
-#       gate + slash-command-trigger gate + PR-not-issue gate.
-#   T7 (yaml-lint): workflow file is parseable YAML.
-#
-# Tests T1-T5 run the real script against a local-fixture HTTP server
-# (python http.server with a stub handler — `tests/_refire_fixture.py`)
-# so the script's Gitea API calls hit the fixture, not the real Gitea.
-#
-# Tests T6/T7 are pure YAML checks against the workflow file.
-#
-# Hostile-self-review (per feedback_assert_exact_not_substring):
-# this test MUST FAIL if the workflow or script is absent. Verified by
-# running the test before the files exist (covered in the PR body).
-
-set -euo pipefail
-
-THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
-SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
-WORKFLOW_DIR="$(cd "$THIS_DIR/../../workflows" && pwd)"
-WORKFLOW="$WORKFLOW_DIR/sop-tier-refire.yml"
-DISPATCH_WORKFLOW="$WORKFLOW_DIR/sop-checklist.yml"
-SCRIPT="$SCRIPT_DIR/sop-tier-refire.sh"
-
-PASS=0
-FAIL=0
-FAILED_TESTS=""
-
-assert_eq() {
-  local label="$1"
-  local expected="$2"
-  local got="$3"
-  if [ "$expected" = "$got" ]; then
-    echo "  PASS  $label"
-    PASS=$((PASS + 1))
-  else
-    echo "  FAIL  $label"
-    echo "        expected: <$expected>"
-    echo "        got:      <$got>"
-    FAIL=$((FAIL + 1))
-    FAILED_TESTS="${FAILED_TESTS} ${label}"
-  fi
-}
-
-assert_contains() {
-  local label="$1"
-  local needle="$2"
-  local haystack="$3"
-  if printf '%s' "$haystack" | grep -qF "$needle"; then
-    echo "  PASS  $label"
-    PASS=$((PASS + 1))
-  else
-    echo "  FAIL  $label"
-    echo "        needle:    <$needle>"
-    echo "        haystack:  <$(printf '%s' "$haystack" | head -c 400)>"
-    FAIL=$((FAIL + 1))
-    FAILED_TESTS="${FAILED_TESTS} ${label}"
-  fi
-}
-
-assert_file_exists() {
-  local label="$1"
-  local path="$2"
-  if [ -f "$path" ]; then
-    echo "  PASS  $label"
-    PASS=$((PASS + 1))
-  else
-    echo "  FAIL  $label (not found: $path)"
-    FAIL=$((FAIL + 1))
-    FAILED_TESTS="${FAILED_TESTS} ${label}"
-  fi
-}
-
-# Existence (foundation — every other test depends on these)
-echo
-echo "== existence =="
-assert_file_exists "workflow file exists"  "$WORKFLOW"
-assert_file_exists "SSOT dispatcher workflow file exists" "$DISPATCH_WORKFLOW"
-assert_file_exists "script file exists"    "$SCRIPT"
-if [ "$FAIL" -gt 0 ]; then
-  echo
-  echo "------"
-  echo "PASS=$PASS FAIL=$FAIL (existence)"
-  echo "Cannot proceed without these files."
-  exit 1
-fi
-
-# T6 / T7 — workflow YAML structure
-echo
-echo "== T6/T7 workflow yaml =="
-
-# YAML parseability
-PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$WORKFLOW" 2>&1 || true)
-assert_eq "T7 workflow parses as YAML" "ok" "$PARSE_OUT"
-
-# The old per-workflow issue_comment listener caused queue storms because
-# Gitea queues jobs before evaluating job-level `if:`. The script remains,
-# but comment-triggered refires route through the single dispatcher.
-WORKFLOW_CONTENT=$(cat "$WORKFLOW")
-if printf '%s' "$WORKFLOW_CONTENT" | grep -q '^  issue_comment:'; then
-  echo "  FAIL  T6a manual fallback workflow must not listen on issue_comment"
-  FAIL=$((FAIL + 1))
-  FAILED_TESTS="${FAILED_TESTS} T6a"
-else
-  echo "  PASS  T6a manual fallback workflow does not listen on issue_comment"
-  PASS=$((PASS + 1))
-fi
-assert_contains "T6b workflow exposes workflow_dispatch" \
-  "workflow_dispatch" "$WORKFLOW_CONTENT"
-assert_contains "T6c workflow documents unsupported manual inputs" \
-  "workflow_dispatch inputs" "$WORKFLOW_CONTENT"
-# Does NOT check out PR HEAD (security)
-if grep -q 'ref: \${{ github.event.pull_request.head' "$WORKFLOW"; then
-  echo "  FAIL  T6d workflow MUST NOT check out PR head (security)"
-  FAIL=$((FAIL + 1))
-  FAILED_TESTS="${FAILED_TESTS} T6d"
-else
-  echo "  PASS  T6d workflow does not check out PR head"
-  PASS=$((PASS + 1))
-fi
-
-DISPATCH_PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$DISPATCH_WORKFLOW" 2>&1 || true)
-assert_eq "T6e SSOT dispatcher workflow parses as YAML" "ok" "$DISPATCH_PARSE_OUT"
-DISPATCH_CONTENT=$(cat "$DISPATCH_WORKFLOW")
-assert_contains "T6f SSOT dispatcher listens on issue_comment" \
-  "issue_comment" "$DISPATCH_CONTENT"
-assert_contains "T6g SSOT dispatcher handles /qa-recheck" \
-  "/qa-recheck" "$DISPATCH_CONTENT"
-assert_contains "T6h SSOT dispatcher handles /security-recheck" \
-  "/security-recheck" "$DISPATCH_CONTENT"
-assert_contains "T6i SSOT dispatcher handles /refire-tier-check" \
-  "/refire-tier-check" "$DISPATCH_CONTENT"
-
-# T1-T5 — script behavior against a local Gitea-fixture
-echo
-echo "== T1-T5 script behavior (vs local fixture) =="
-
-# Spin up the fixture HTTP server.
-FIXTURE_DIR=$(mktemp -d)
-trap 'rm -rf "$FIXTURE_DIR"; [ -n "${FIX_PID:-}" ] && kill "$FIX_PID" 2>/dev/null || true' EXIT
-FIXTURE_PY="$THIS_DIR/_refire_fixture.py"
-if [ ! -f "$FIXTURE_PY" ]; then
-  echo "::error::fixture server $FIXTURE_PY missing"
-  exit 1
-fi
-
-FIX_LOG="$FIXTURE_DIR/fixture.log"
-FIX_STATE_DIR="$FIXTURE_DIR/state"
-mkdir -p "$FIX_STATE_DIR"
-
-# Find an unused port.
-FIX_PORT=$(python3 -c 'import socket;s=socket.socket();s.bind(("127.0.0.1",0));print(s.getsockname()[1]);s.close()')
-
-FIXTURE_STATE_DIR="$FIX_STATE_DIR" python3 "$FIXTURE_PY" "$FIX_PORT" \
-  >"$FIX_LOG" 2>&1 &
-FIX_PID=$!
-
-# Wait for fixture readiness.
-for _ in $(seq 1 50); do
-  if curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
-    break
-  fi
-  sleep 0.1
-done
-if ! curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
-  echo "::error::fixture server failed to start. Log:"
-  cat "$FIX_LOG"
-  exit 1
-fi
-
-# Helper: set fixture state for a scenario, then run the script.
-# tier_result is one of: pass | fail_no_label | fail_no_approvals.
-# The refire script's tier-check invocation is mocked because the real
-# sop-tier-check.sh uses bash 4+ associative arrays — incompatible with
-# the macOS bash 3.2 dev shell. Linux Gitea runners use bash 4/5 so
-# production runs the real script. The mock exercises the success +
-# failure branches of refire's status-POST glue.
-run_scenario() {
-  local scenario="$1"
-  local tier_result="${2:-pass}"
-  echo "$scenario" >"$FIX_STATE_DIR/scenario"
-  : >"$FIX_STATE_DIR/posted_statuses.jsonl"  # clear status log
-
-  local out
-  set +e
-  out=$(
-    PATH="$FIXTURE_DIR/bin:$PATH" \
-    GITEA_TOKEN="fixture-token" \
-    GITEA_HOST="fixture.local" \
-    REPO="molecule-ai/molecule-core" \
-    PR_NUMBER="999" \
-    COMMENT_AUTHOR="test-runner" \
-    SOP_REFIRE_DISABLE_RATE_LIMIT="1" \
-    SOP_REFIRE_TIER_CHECK_SCRIPT="$THIS_DIR/_mock_tier_check.sh" \
-    MOCK_TIER_RESULT="$tier_result" \
-    FIXTURE_PORT="$FIX_PORT" \
-    bash "$SCRIPT" 2>&1
-  )
-  local rc=$?
-  set -e
-  echo "$out" >"$FIX_STATE_DIR/last_run.log"
-  echo "$rc" >"$FIX_STATE_DIR/last_rc"
-}
-
-# Install a curl shim that rewrites https://fixture.local → http://127.0.0.1:$PORT
-# Use bash prefix-strip (${var#prefix}) — it sidesteps the `/` delimiter
-# confusion of ${var/pattern/replacement}.
-mkdir -p "$FIXTURE_DIR/bin"
-cat >"$FIXTURE_DIR/bin/curl" <<SHIM
-#!/usr/bin/env bash
-# Test shim: rewrite https://fixture.local/* -> http://127.0.0.1:${FIX_PORT}/*
-# The fixture doesn't authenticate; -H Authorization passes through harmlessly.
-new_args=()
-for a in "\$@"; do
-  if [[ "\$a" == https://fixture.local/* ]]; then
-    rest="\${a#https://fixture.local}"
-    a="http://127.0.0.1:${FIX_PORT}\${rest}"
-  fi
-  new_args+=("\$a")
-done
-exec /usr/bin/curl "\${new_args[@]}"
-SHIM
-chmod +x "$FIXTURE_DIR/bin/curl"
-
-# T1: tier:low + 1 APPROVED + author is in engineers team → success
-run_scenario "T1_success" "pass"
-RC=$(cat "$FIX_STATE_DIR/last_rc")
-POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
-assert_eq "T1 exit code 0 (success)" "0" "$RC"
-assert_contains "T1 POSTed state=success" '"state": "success"' "$POSTED"
-assert_contains "T1 POST context is sop-tier-check / tier-check" \
-  '"context": "sop-tier-check / tier-check (pull_request)"' "$POSTED"
-assert_contains "T1 description names commenter" "test-runner" "$POSTED"
-
-# T2: missing tier label → tier-check fails internally (mock exits 1).
-# FAIL-CLOSED contract (fix/core-ci-fail-closed): refire now captures the
-# REAL exit code and POSTs state=failure — it does NOT forge a green on
-# the required context. The refire job itself still exits 0 (it succeeded
-# at posting an honest failure status).
-run_scenario "T2_no_tier_label" "fail_no_label"
-RC=$(cat "$FIX_STATE_DIR/last_rc")
-POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
-assert_eq "T2 exit code 0 (posted an honest status)" "0" "$RC"
-assert_contains "T2 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
-
-# T3: tier:low present but ZERO approving reviews → internal tier check
-# fails (mock exits 1). Refire POSTs state=failure, never a false green.
-run_scenario "T3_no_approvals" "fail_no_approvals"
-RC=$(cat "$FIX_STATE_DIR/last_rc")
-POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
-assert_eq "T3 exit code 0 (posted an honest status)" "0" "$RC"
-assert_contains "T3 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
-
-# T4: closed PR — refire is a no-op (no POST, exit 0)
-run_scenario "T4_closed" "pass"
-RC=$(cat "$FIX_STATE_DIR/last_rc")
-POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
-assert_eq "T4 closed PR exits 0" "0" "$RC"
-assert_eq "T4 closed PR posts no status" "" "$POSTED"
-
-# T5: rate-limit — disable the env override and let scenario set a
-# recent statuses entry. Re-enable rate-limit for this scenario by NOT
-# passing SOP_REFIRE_DISABLE_RATE_LIMIT.
-echo "T5_rate_limited" >"$FIX_STATE_DIR/scenario"
-: >"$FIX_STATE_DIR/posted_statuses.jsonl"
-set +e
-T5_OUT=$(
-  PATH="$FIXTURE_DIR/bin:$PATH" \
-  GITEA_TOKEN="fixture-token" \
-  GITEA_HOST="fixture.local" \
-  REPO="molecule-ai/molecule-core" \
-  PR_NUMBER="999" \
-  COMMENT_AUTHOR="test-runner" \
-  FIXTURE_PORT="$FIX_PORT" \
-  bash "$SCRIPT" 2>&1
-)
-T5_RC=$?
-set -e
-POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
-assert_eq "T5 rate-limited exits 0" "0" "$T5_RC"
-assert_contains "T5 rate-limited log says skipped" "rate-limited" "$T5_OUT"
-assert_eq "T5 rate-limited posts no status" "" "$POSTED"
-
-echo
-echo "------"
-echo "PASS=$PASS FAIL=$FAIL"
-if [ "$FAIL" -gt 0 ]; then
-  echo "Failed:$FAILED_TESTS"
-fi
-[ "$FAIL" -eq 0 ]
@@ -0,0 +1,474 @@
+import importlib.util
+import json
+import pathlib
+import urllib.error
+
+
+ROOT = pathlib.Path(__file__).resolve().parents[1]
+SCRIPT = ROOT / "umbrella-reaper.py"
+
+
+def load_reaper():
+    spec = importlib.util.spec_from_file_location("umbrella_reaper", SCRIPT)
+    mod = importlib.util.module_from_spec(spec)
+    assert spec.loader is not None
+    spec.loader.exec_module(mod)
+    mod.API = "https://git.example.test/api/v1"
+    mod.GITEA_TOKEN = "fixture-token"
+    mod.GITEA_HOST = "git.example.test"
+    mod.REPO = "owner/repo"
+    return mod
+
+
+class FakeResponse:
+    status = 200
+
+    def __init__(self, payload):
+        self.payload = payload
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def read(self):
+        return json.dumps(self.payload).encode("utf-8")
+
+
+def _pr_fixture(number: int, sha: str) -> dict:
+    return {"number": number, "head": {"sha": sha}}
+
+
+def _status_entry(context: str, state: str) -> dict:
+    return {"context": context, "status": state}
+
+
+def test_process_pr_compensates_when_all_sub_jobs_success(monkeypatch):
+    mod = load_reaper()
+    posted = []
+
+    def fake_post_status(sha, context, description):
+        posted.append((sha, context, description))
+
+    monkeypatch.setattr(mod, "post_status", fake_post_status)
+    monkeypatch.setattr(
+        mod,
+        "REQUIRED_SUB_JOBS",
+        [
+            "CI / Detect changes (pull_request)",
+            "CI / Platform (Go) (pull_request)",
+        ],
+    )
+
+    pr = _pr_fixture(1, "abc123")
+
+    def fake_combined_status(sha):
+        return {
+            "statuses": [
+                _status_entry("CI / all-required (pull_request)", "failure"),
+                _status_entry("CI / Detect changes (pull_request)", "success"),
+                _status_entry("CI / Platform (Go) (pull_request)", "success"),
+            ]
+        }
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
+
+    ok = mod.process_pr(pr)
+    assert ok is True
+    assert len(posted) == 1
+    assert posted[0][0] == "abc123"
+    assert posted[0][1] == "CI / all-required (pull_request)"
+    assert "Compensating status" in posted[0][2]
+
+
+def test_process_pr_skips_when_umbrella_missing(monkeypatch):
+    mod = load_reaper()
+    posted = []
+    monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
+    monkeypatch.setattr(mod, "REQUIRED_SUB_JOBS", ["CI / Platform (Go) (pull_request)"])
+
+    pr = _pr_fixture(2, "def456")
+
+    def fake_combined_status(sha):
+        return {
+            "statuses": [
+                _status_entry("CI / Platform (Go) (pull_request)", "success"),
+            ]
+        }
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
+
+    ok = mod.process_pr(pr)
+    assert ok is True
+    assert posted == []
+
+
+def test_process_pr_skips_when_sub_job_pending(monkeypatch):
+    mod = load_reaper()
+    posted = []
+    monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
+    monkeypatch.setattr(
+        mod,
+        "REQUIRED_SUB_JOBS",
+        [
+            "CI / Detect changes (pull_request)",
+            "CI / Platform (Go) (pull_request)",
+        ],
+    )
+
+    pr = _pr_fixture(3, "ghi789")
+
+    def fake_combined_status(sha):
+        return {
+            "statuses": [
+                _status_entry("CI / all-required (pull_request)", "failure"),
+                _status_entry("CI / Detect changes (pull_request)", "success"),
+                _status_entry("CI / Platform (Go) (pull_request)", "pending"),
+            ]
+        }
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
+
+    ok = mod.process_pr(pr)
+    assert ok is True
+    assert posted == []
+
+
+def test_process_pr_skips_when_sub_job_failure(monkeypatch):
+    mod = load_reaper()
+    posted = []
+    monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
+    monkeypatch.setattr(
+        mod,
+        "REQUIRED_SUB_JOBS",
+        [
+            "CI / Detect changes (pull_request)",
+            "CI / Platform (Go) (pull_request)",
+        ],
+    )
+
+    pr = _pr_fixture(4, "jkl012")
+
+    def fake_combined_status(sha):
+        return {
+            "statuses": [
+                _status_entry("CI / all-required (pull_request)", "failure"),
+                _status_entry("CI / Detect changes (pull_request)", "success"),
+                _status_entry("CI / Platform (Go) (pull_request)", "failure"),
+            ]
+        }
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
+
+    ok = mod.process_pr(pr)
+    assert ok is True
+    assert posted == []
+
+
+def test_process_pr_returns_false_on_post_failure(monkeypatch):
+    mod = load_reaper()
+
+    def fake_post_status(sha, context, description):
+        raise mod.ApiError("POST /statuses/abc123 -> HTTP 500: simulated failure")
+
+    monkeypatch.setattr(mod, "post_status", fake_post_status)
+    monkeypatch.setattr(
+        mod,
+        "REQUIRED_SUB_JOBS",
+        [
+            "CI / Detect changes (pull_request)",
+            "CI / Platform (Go) (pull_request)",
+        ],
+    )
+
+    pr = _pr_fixture(5, "abc123")
+
+    def fake_combined_status(sha):
+        return {
+            "statuses": [
+                _status_entry("CI / all-required (pull_request)", "failure"),
+                _status_entry("CI / Detect changes (pull_request)", "success"),
+                _status_entry("CI / Platform (Go) (pull_request)", "success"),
+            ]
+        }
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
+
+    ok = mod.process_pr(pr)
+    assert ok is False
+
+
+def test_main_exits_nonzero_when_any_post_fails(monkeypatch):
+    mod = load_reaper()
+
+    monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
+    monkeypatch.setenv("GITEA_HOST", "git.example.test")
+    monkeypatch.setenv("REPO", "owner/repo")
+
+    monkeypatch.setattr(
+        mod,
+        "REQUIRED_SUB_JOBS",
+        [
+            "CI / Detect changes (pull_request)",
+            "CI / Platform (Go) (pull_request)",
+        ],
+    )
+    monkeypatch.setattr(
+        mod,
+        "list_open_prs",
+        lambda limit: [
+            _pr_fixture(1, "abc123"),
+            _pr_fixture(2, "def456"),
+        ],
+    )
+
+    calls = {"n": 0}
+
+    def fake_combined_status(sha):
+        return {
+            "statuses": [
+                _status_entry("CI / all-required (pull_request)", "failure"),
+                _status_entry("CI / Detect changes (pull_request)", "success"),
+                _status_entry("CI / Platform (Go) (pull_request)", "success"),
+            ]
+        }
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
+
+    def fake_post_status(sha, context, description):
+        calls["n"] += 1
+        if calls["n"] == 2:
+            raise mod.ApiError("simulated failure")
+
+    monkeypatch.setattr(mod, "post_status", fake_post_status)
+
+    exit_code = mod.main()
+    assert exit_code == 1
+
+
+def test_main_exits_zero_when_all_posts_succeed(monkeypatch):
+    mod = load_reaper()
+
+    monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
+    monkeypatch.setenv("GITEA_HOST", "git.example.test")
+    monkeypatch.setenv("REPO", "owner/repo")
+
+    monkeypatch.setattr(
+        mod,
+        "REQUIRED_SUB_JOBS",
+        [
+            "CI / Detect changes (pull_request)",
+            "CI / Platform (Go) (pull_request)",
+        ],
+    )
+    monkeypatch.setattr(
+        mod,
+        "list_open_prs",
+        lambda limit: [_pr_fixture(1, "abc123")],
+    )
+
+    def fake_combined_status(sha):
+        return {
+            "statuses": [
+                _status_entry("CI / all-required (pull_request)", "failure"),
+                _status_entry("CI / Detect changes (pull_request)", "success"),
+                _status_entry("CI / Platform (Go) (pull_request)", "success"),
+            ]
+        }
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
+    monkeypatch.setattr(mod, "post_status", lambda *a, **k: None)
+
+    exit_code = mod.main()
+    assert exit_code == 0
+
+
+def test_dry_run_does_not_post(monkeypatch):
+    mod = load_reaper()
+    api_calls = []
+
+    def fake_api(method, path, *, body=None, query=None, expect_json=True):
+        api_calls.append((method, path, body))
+        return 200, {"ok": True}
+
+    monkeypatch.setattr(mod, "api", fake_api)
+    monkeypatch.setattr(
+        mod,
+        "REQUIRED_SUB_JOBS",
+        [
+            "CI / Detect changes (pull_request)",
+            "CI / Platform (Go) (pull_request)",
+        ],
+    )
+
+    pr = _pr_fixture(6, "mno345")
+
+    def fake_combined_status(sha):
+        return {
+            "statuses": [
+                _status_entry("CI / all-required (pull_request)", "failure"),
+                _status_entry("CI / Detect changes (pull_request)", "success"),
+                _status_entry("CI / Platform (Go) (pull_request)", "success"),
+            ]
+        }
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
+    monkeypatch.setattr(mod, "DRY_RUN", True)
+
+    ok = mod.process_pr(pr)
+    assert ok is True
+    # DRY_RUN should prevent the POST /statuses call
+    assert not any(
+        method == "POST" and "/statuses/" in path for method, path, _ in api_calls
+    )
+
+
+def test_duplicate_contexts_use_latest_state(monkeypatch):
+    mod = load_reaper()
+    posted = []
+    monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
+    monkeypatch.setattr(
+        mod,
+        "REQUIRED_SUB_JOBS",
+        [
+            "CI / Detect changes (pull_request)",
+        ],
+    )
+
+    pr = _pr_fixture(7, "pqr678")
+
+    def fake_combined_status(sha):
+        return {
+            "statuses": [
+                _status_entry("CI / all-required (pull_request)", "failure"),
+                # duplicate: first pending, then success — the loop overwrites
+                _status_entry("CI / Detect changes (pull_request)", "pending"),
+                _status_entry("CI / Detect changes (pull_request)", "success"),
+            ]
+        }
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
+
+    ok = mod.process_pr(pr)
+    assert ok is True
+    assert len(posted) == 1
+
+
+def test_load_required_sub_jobs_from_ci_yml_pull_request_event():
+    mod = load_reaper()
+    # UMBRELLA_CONTEXT defaults to pull_request, so derivation should yield
+    # the pull_request suffix.
+    jobs = mod._load_required_sub_jobs_from_ci_yml(".gitea/workflows")
+    assert all(j.endswith(" (pull_request)") for j in jobs)
+    assert "CI / Detect changes (pull_request)" in jobs
+    assert "CI / Python Lint & Test (pull_request)" in jobs
+
+
+def test_load_required_sub_jobs_from_ci_yml_push_event(monkeypatch):
+    mod = load_reaper()
+    monkeypatch.setattr(mod, "UMBRELLA_CONTEXT", "CI / all-required (push)")
+    jobs = mod._load_required_sub_jobs_from_ci_yml(".gitea/workflows")
+    assert all(j.endswith(" (push)") for j in jobs)
+    assert "CI / Detect changes (push)" in jobs
+
+
+def test_list_open_prs_paginates(monkeypatch):
+    mod = load_reaper()
+    calls = []
+
+    def fake_api(method, path, *, body=None, query=None, expect_json=True):
+        calls.append(query)
+        page = int(query.get("page", 1))
+        limit = int(query.get("limit", 50))
+        if page == 1:
+            return 200, [{"number": 1}, {"number": 2}]
+        if page == 2:
+            return 200, [{"number": 3}]
+        return 200, []
+
+    monkeypatch.setattr(mod, "api", fake_api)
+    prs = mod.list_open_prs(limit=2)
+    assert len(prs) == 3
+    assert prs[0]["number"] == 1
+    assert prs[2]["number"] == 3
+    assert calls[0]["page"] == "1"
+    assert calls[1]["page"] == "2"
+
+
+def test_process_pr_returns_false_on_status_fetch_failure(monkeypatch):
+    mod = load_reaper()
+
+    def fake_get_combined_status(sha):
+        raise mod.ApiError("GET /statuses/abc123 -> HTTP 500: simulated outage")
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_get_combined_status)
+    monkeypatch.setattr(
+        mod,
+        "REQUIRED_SUB_JOBS",
+        ["CI / Detect changes (pull_request)"],
+    )
+
+    pr = _pr_fixture(8, "abc123")
+    ok = mod.process_pr(pr)
+    assert ok is False
+
+
+def test_process_pr_returns_false_on_missing_statuses_array(monkeypatch):
+    mod = load_reaper()
+
+    def fake_get_combined_status(sha):
+        return {"state": "success"}  # missing 'statuses' array
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_get_combined_status)
+    monkeypatch.setattr(
+        mod,
+        "REQUIRED_SUB_JOBS",
+        ["CI / Detect changes (pull_request)"],
+    )
+
+    pr = _pr_fixture(9, "def456")
+    ok = mod.process_pr(pr)
+    assert ok is False
+
+
+def test_main_exits_nonzero_when_any_status_read_fails(monkeypatch):
+    mod = load_reaper()
+
+    monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
+    monkeypatch.setenv("GITEA_HOST", "git.example.test")
+    monkeypatch.setenv("REPO", "owner/repo")
+
+    monkeypatch.setattr(
+        mod,
+        "REQUIRED_SUB_JOBS",
+        [
+            "CI / Detect changes (pull_request)",
+            "CI / Platform (Go) (pull_request)",
+        ],
+    )
+    monkeypatch.setattr(
+        mod,
+        "list_open_prs",
+        lambda limit: [
+            _pr_fixture(1, "abc123"),
+            _pr_fixture(2, "def456"),
+        ],
+    )
+
+    def fake_combined_status(sha):
+        if sha == "abc123":
+            return {
+                "statuses": [
+                    _status_entry("CI / all-required (pull_request)", "failure"),
+                    _status_entry("CI / Detect changes (pull_request)", "success"),
+                    _status_entry("CI / Platform (Go) (pull_request)", "success"),
+                ]
+            }
+        raise mod.ApiError("simulated status fetch failure")
+
+    monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
+    monkeypatch.setattr(mod, "post_status", lambda *a, **k: None)
+
+    exit_code = mod.main()
+    assert exit_code == 1
@@ -0,0 +1,360 @@
+#!/usr/bin/env python3
+"""umbrella-reaper — auto-recovery for stale CI umbrella statuses on PRs.
+
+Tracking: molecule-core#1780.
+
+Sibling to status-reaper.py (default-branch push-suffix compensation),
+but scoped to pull_request umbrellas instead of main-branch contexts.
+
+What this script does, per `.gitea/workflows/umbrella-reaper.yml` invocation:
+
+  1. List open PRs via GET /repos/{o}/{r}/pulls?state=open&limit={N}.
+  2. For EACH PR:
+     - GET combined commit status for PR head SHA.
+     - Look for the umbrella context (default: "CI / all-required (pull_request)").
+     - If umbrella state is "failure":
+         - Verify ALL required sub-job contexts are "success".
+         - If yes → POST compensating success to /statuses/{sha} with the
+           same umbrella context and an honest description.
+         - If any required sub-job is NOT success → skip (umbrella correctly
+           reflects reality; do NOT lie).
+     - If umbrella state is "success" or "pending" → skip.
+  3. Exit 0. Re-running is idempotent — Gitea de-dups by context.
+
+What it does NOT do:
+  - Touch non-umbrella contexts.
+  - Compensate when ANY required sub-job is missing, pending, failure, or
+    cancelled. Only the "all sub-jobs green, umbrella stale" race.
+  - Merge PRs. It only posts a status; branch protection still requires
+    human approval.
+  - Run on closed PRs.
+
+Halt conditions:
+  - Missing required env vars → exit 1 with ::error:: message.
+  - API 5xx on PR list → fail-loud (can't assess state).
+  - API 5xx on an individual PR's status → ::warning:: + continue to next PR.
+"""
+from __future__ import annotations
+
+import json
+import os
+import re
+import sys
+import urllib.error
+import urllib.parse
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+
+def _load_required_sub_jobs_from_ci_yml(workflows_dir: str) -> list[str]:
+    """Parse ci.yml and extract the all-required sentinel's sub-job contexts.
+
+    Supports two shapes of the all-required job run block:
+      1. Legacy Python f-string list (pre-2026-06-01):
+         f"CI / Detect changes ({event})"
+      2. Current shell-script shape (post-2026-06-01 scheduler fix):
+         check "Detect changes"        "$CHANGES_RESULT"
+
+    Raises RuntimeError if ci.yml is missing, has no all-required job, or the
+    run block cannot be parsed.
+    """
+    ci_path = Path(workflows_dir) / "ci.yml"
+    if not ci_path.exists():
+        raise RuntimeError(f"ci.yml not found at {ci_path}")
+
+    # PyYAML is installed by the workflow (same as status-reaper.py).
+    import yaml
+
+    with ci_path.open() as f:
+        doc = yaml.safe_load(f)
+
+    jobs = doc.get("jobs", {})
+    all_required = jobs.get("all-required")
+    if not isinstance(all_required, dict):
+        raise RuntimeError("ci.yml missing 'all-required' job")
+
+    steps = all_required.get("steps", [])
+    run_block = ""
+    for step in steps:
+        if isinstance(step, dict):
+            run_text = step.get("run", "")
+            if run_text:
+                run_block = run_text
+                break
+
+    if not run_block:
+        raise RuntimeError("all-required job missing run block")
+
+    # Determine event suffix from the umbrella context we are watching.
+    if UMBRELLA_CONTEXT.endswith(" (pull_request)"):
+        suffix = "(pull_request)"
+    elif UMBRELLA_CONTEXT.endswith(" (push)"):
+        suffix = "(push)"
+    else:
+        m = re.search(r' \(([^)]+)\)$', UMBRELLA_CONTEXT)
+        suffix = m.group(1) if m else "pull_request"
+
+    # Try legacy f-string format first.
+    if "({event})" in run_block:
+        matches = re.findall(r'f["\'](.*?\(\{event\}\))["\']', run_block)
+        if matches:
+            return [m.replace("({event})", suffix) for m in matches]
+
+    # Try current shell-script format: check "Name" "$RESULT"
+    matches = re.findall(r'check\s+"([^"]+)"', run_block)
+    if matches:
+        return [f"CI / {name} {suffix}" for name in matches]
+
+    raise RuntimeError("unable to derive required sub-jobs from all-required run block")
+
+
+# --------------------------------------------------------------------------
+# Environment
+# --------------------------------------------------------------------------
+def _env(key: str, *, default: str = "") -> str:
+    return os.environ.get(key, default)
+
+
+GITEA_TOKEN = _env("GITEA_TOKEN")
+GITEA_HOST = _env("GITEA_HOST")
+REPO = _env("REPO")
+DRY_RUN = _env("DRY_RUN", default="").lower() in ("1", "true", "yes")
+
+# The umbrella context to watch. Must match the branch-protection name
+# exactly (Gitea de-dups by context string).
+UMBRELLA_CONTEXT = _env("UMBRELLA_CONTEXT", default="CI / all-required (pull_request)")
+
+# Required sub-job contexts. The umbrella is only compensated when ALL of
+# these are "success" on the same SHA. Order does not matter.
+#
+# Derive from ci.yml at runtime to prevent drift (CR2 blocker #1).
+# The env var REQUIRED_SUB_JOBS overrides derivation for emergency
+# tuning or local testing.
+_REQUIRED_SUB_JOBS_OVERRIDE = _env("REQUIRED_SUB_JOBS")
+if _REQUIRED_SUB_JOBS_OVERRIDE:
+    REQUIRED_SUB_JOBS = [
+        ctx.strip()
+        for ctx in _REQUIRED_SUB_JOBS_OVERRIDE.split(";")
+        if ctx.strip()
+    ]
+else:
+    try:
+        REQUIRED_SUB_JOBS = _load_required_sub_jobs_from_ci_yml(".gitea/workflows")
+    except Exception as exc:
+        sys.stderr.write(
+            f"::error::Failed to derive REQUIRED_SUB_JOBS from ci.yml: {exc}\n"
+        )
+        sys.exit(1)
+
+OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
+API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
+PR_LIMIT = int(_env("PR_LIMIT", default="50"))
+
+
+def _require_runtime_env() -> None:
+    for key in ("GITEA_TOKEN", "GITEA_HOST", "REPO"):
+        if not os.environ.get(key):
+            sys.stderr.write(f"::error::missing required env var: {key}\n")
+            sys.exit(1)
+
+
+# --------------------------------------------------------------------------
+# Tiny HTTP helper
+# --------------------------------------------------------------------------
+class ApiError(RuntimeError):
+    pass
+
+
+def api(
+    method: str,
+    path: str,
+    *,
+    body: dict | None = None,
+    query: dict[str, str] | None = None,
+    expect_json: bool = True,
+) -> tuple[int, Any]:
+    url = f"{API}{path}"
+    if query:
+        url = f"{url}?{urllib.parse.urlencode(query)}"
+    data = None
+    headers = {
+        "Authorization": f"token {GITEA_TOKEN}",
+        "Accept": "application/json",
+    }
+    if body is not None:
+        data = json.dumps(body).encode("utf-8")
+        headers["Content-Type"] = "application/json"
+    req = urllib.request.Request(url, method=method, data=data, headers=headers)
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            raw = resp.read()
+            status = resp.status
+    except urllib.error.HTTPError as e:
+        raw = e.read()
+        status = e.code
+
+    if not (200 <= status < 300):
+        snippet = raw[:500].decode("utf-8", errors="replace") if raw else ""
+        raise ApiError(f"{method} {path} -> HTTP {status}: {snippet}")
+
+    if not raw:
+        return status, None
+    try:
+        return status, json.loads(raw)
+    except json.JSONDecodeError as e:
+        if expect_json:
+            raise ApiError(
+                f"{method} {path} -> HTTP {status} but body is not JSON: {e}"
+            ) from e
+        return status, {"_raw": raw.decode("utf-8", errors="replace")}
+
+
+# --------------------------------------------------------------------------
+# Gitea reads / writes
+# --------------------------------------------------------------------------
+def list_open_prs(limit: int = 50) -> list[dict]:
+    """Paginate through all open PR pages. Fail closed on non-list responses."""
+    all_prs: list[dict] = []
+    page = 1
+    while True:
+        _, body = api(
+            "GET",
+            f"/repos/{OWNER}/{NAME}/pulls",
+            query={"state": "open", "limit": str(limit), "page": str(page)},
+        )
+        if not isinstance(body, list):
+            raise ApiError(f"PR list page {page} response is not a JSON array")
+        if not body:
+            break
+        all_prs.extend(body)
+        if len(body) < limit:
+            break
+        page += 1
+    return all_prs
+
+
+def get_combined_status(sha: str) -> dict:
+    _, body = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
+    if not isinstance(body, dict):
+        raise ApiError(f"status for {sha} response is not a JSON object")
+    return body
+
+
+def post_status(sha: str, context: str, description: str) -> None:
+    payload = {
+        "context": context,
+        "state": "success",
+        "description": description,
+    }
+    if DRY_RUN:
+        print(f"[DRY-RUN] Would POST /statuses/{sha}: {json.dumps(payload)}")
+        return
+    api("POST", f"/repos/{OWNER}/{NAME}/statuses/{sha}", body=payload)
+
+
+# --------------------------------------------------------------------------
+# Core logic
+# --------------------------------------------------------------------------
+def _entry_state(s: dict) -> str:
+    return s.get("status") or s.get("state") or ""
+
+
+def process_pr(pr: dict) -> bool:
+    """Process a single PR. Returns True if the tick succeeded for this PR
+    (including no-op skips), False if a compensating POST failed.
+    """
+    num = pr.get("number")
+    sha = pr.get("head", {}).get("sha")
+    if not sha:
+        print(f"::warning::PR #{num}: missing head.sha; skipping")
+        return True
+
+    try:
+        status = get_combined_status(sha)
+    except ApiError as e:
+        print(f"::error::PR #{num}: status fetch failed: {e}")
+        return False
+
+    statuses = status.get("statuses")
+    if not isinstance(statuses, list):
+        print(f"::error::PR #{num}: combined status missing 'statuses' array")
+        return False
+    umbrella_entry = None
+    subjob_states: dict[str, str] = {}
+
+    for s in statuses:
+        if not isinstance(s, dict):
+            continue
+        ctx = s.get("context", "")
+        state = _entry_state(s)
+        if ctx == UMBRELLA_CONTEXT:
+            umbrella_entry = s
+        if ctx in REQUIRED_SUB_JOBS:
+            subjob_states[ctx] = state
+
+    if umbrella_entry is None:
+        print(f"::notice::PR #{num}: no umbrella context '{UMBRELLA_CONTEXT}'; skipping")
+        return True
+
+    umbrella_state = _entry_state(umbrella_entry)
+    if umbrella_state != "failure":
+        print(f"::notice::PR #{num}: umbrella is '{umbrella_state}'; skipping")
+        return True
+
+    # Verify ALL required sub-jobs are present and success
+    missing = [ctx for ctx in REQUIRED_SUB_JOBS if ctx not in subjob_states]
+    if missing:
+        print(
+            f"::notice::PR #{num}: umbrella=failure, but missing sub-jobs: {missing}; "
+            "skipping (sub-jobs may still be running)"
+        )
+        return True
+
+    not_success = [ctx for ctx in REQUIRED_SUB_JOBS if subjob_states[ctx] != "success"]
+    if not_success:
+        print(
+            f"::notice::PR #{num}: umbrella=failure, but sub-jobs not all success: "
+            f"{[(ctx, subjob_states[ctx]) for ctx in not_success]}; skipping"
+        )
+        return True
+
+    # All checks pass — post compensating status
+    desc = (
+        "Compensating status: all required sub-jobs verified success; "
+        "umbrella stale due to commit-status propagation race. "
+        f"Auto-posted by umbrella-reaper for PR #{num}."
+    )
+    try:
+        post_status(sha, UMBRELLA_CONTEXT, desc)
+        print(f"::notice::PR #{num}: posted compensating success for {UMBRELLA_CONTEXT}")
+        return True
+    except ApiError as e:
+        print(f"::error::PR #{num}: failed to post compensating status: {e}")
+        return False
+
+
+def main() -> int:
+    _require_runtime_env()
+
+    # Drift guard: ci.yml derivation already happened at module load, but
+    # we sanity-check it is non-empty so the loop below doesn't trivially
+    # no-op because of a parse bug.
+    if not REQUIRED_SUB_JOBS:
+        sys.stderr.write("::error::REQUIRED_SUB_JOBS is empty; bailing out\n")
+        return 1
+
+    prs = list_open_prs(limit=PR_LIMIT)
+    print(f"::notice::Scanning {len(prs)} open PRs for stale umbrella statuses")
+    compensated = 0
+    failed = 0
+    for pr in prs:
+        ok = process_pr(pr)
+        if not ok:
+            failed += 1
+    print(f"::notice::umbrella-reaper complete (failed POSTs={failed})")
+    return 1 if failed else 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -55,38 +55,22 @@

 version: 1

-# Tier-aware failure mode (RFC#351 open question 2):
-#   For tier:high — hard-fail (status `failure`, blocks merge via BP).
-#   For tier:medium — hard-fail (same as high; medium is non-trivial).
-#   For tier:low — soft-fail (status `pending` with `acked: N/M` in the
-#                  description). BP can choose to require the context
-#                  or not for low-tier PRs.
-# If no tier label is present, default to medium (hard-fail) — every PR
-# should have a tier label per sop-tier-check, and absence indicates
-# a missing-tier defect we should surface, not silently lower the bar.
-tier_failure_mode:
-  "tier:high": hard
-  "tier:medium": hard
-  "tier:low": soft
-default_mode: hard  # used when no tier:* label is present
+# Uniform hard-fail mode (CTO 2026-06-07):
+#   Every PR uses the same gate — no tier branching.
+#   Missing acks → status `failure`, blocks merge via branch protection.

 # High-risk class (RFC#450 Option C, governance-fix for internal#442).
 #
-# A PR is "high-risk" when ANY of the listed labels are applied OR when
-# the PR has `tier:high` (mechanically the strictest existing tier).
+# A PR is "high-risk" when ANY of the listed labels are applied.
 # High-risk items use `required_teams_high_risk` (when present on the
 # item); non-high-risk items use the default `required_teams`.
 #
-# This closes the inconsistency that the SOP charter already mandates
-# `tier:high → ceo only` for the sibling `sop-tier-check` gate; the
-# sop-checklist's `root-cause` and `no-backwards-compat` items now
-# follow the same risk-classed two-eyes shape:
-#   - Default class (tier:low/medium, not high-risk): a non-author
-#     engineers/managers/ceo ack satisfies the item — 25+ live
-#     identities, no dependency on a dead/inactive senior persona
-#     token.
-#   - High-risk class (tier:high OR any high_risk_label): still
-#     requires a non-author ceo ack (durable human team).
+# Risk-classed two-eyes shape:
+#   - Default class (not high-risk): a non-author engineers/managers/ceo
+#     ack satisfies the item — 25+ live identities, no dependency on a
+#     dead/inactive senior persona token.
+#   - High-risk class (any high_risk_label): still requires a non-author
+#     ceo ack (durable human team).
 #
 # Tightening: add labels to high_risk_labels.
 # Loosening: remove labels.
@@ -13,14 +13,14 @@
 # the structured JSON shape is forward-compatible.
 #
 # Logic in `.gitea/scripts/audit-force-merge.sh` per the same script-
-# extract pattern as sop-tier-check.
+# extract pattern as sop-checklist.

 name: audit-force-merge

 # pull_request_target loads from the base branch — same security model
-# as sop-tier-check. Without this, an attacker could rewrite the
+# as sop-checklist. Without this, an attacker could rewrite the
 # workflow on a PR and skip the audit emission for their own
-# force-merge. See `.gitea/workflows/sop-tier-check.yml` for the full
+# force-merge. See `.gitea/workflows/sop-checklist.yml` for the full
 # rationale.
 on:
  pull_request_target:
@@ -41,7 +41,7 @@ jobs:
          ref: ${{ github.event.pull_request.base.sha }}
      - name: Detect force-merge + emit audit event
        env:
-          # Same org-level secret the sop-tier-check workflow uses.
+          # Same org-level secret the sop-checklist workflow uses.
          GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
          GITEA_HOST: git.moleculesai.app
          REPO: ${{ github.repository }}
@@ -54,7 +54,7 @@ jobs:
          # required checks) for each branch listed here.
          #
          # Declared here rather than fetched from /branch_protections
-          # because that endpoint requires admin write — sop-tier-bot is
+          # because that endpoint requires admin write — sop-checklist-bot is
          # read-only by design (least-privilege).
          REQUIRED_CHECKS_JSON: |
            {
@@ -34,6 +34,8 @@ jobs:
  check:
    name: Block forbidden paths
    runs-on: ubuntu-latest
+    # Hard gate — detected internal-path leaks fail the workflow.
+    # continue-on-error removed per directive (fail-open → fail-closed).
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
@@ -0,0 +1,165 @@
+name: boot-to-registration-e2e (advisory)
+
+# cp#455 — Minimal-cell boot-to-registration e2e.
+# CTO directive 14eb4f07: "build the minimal claude-code+kimi cell,
+# it should now go GREEN since the fix is live."
+#
+# Stage 1 of 5-stage rollout. Reuses the dispatch-only EC2
+# provisioning path from test_staging_full_saas.sh but reduced to
+# the minimum boot-to-registration surface:
+#
+#   1. Provision request accepted; workspace transitions to booting/running
+#   2. Controlplane receives /registry/register for that workspace_id
+#   3. JSON-RPC/completion route returns successful minimal response
+#   4. Teardown terminates workspace even on failure (trap)
+#
+# Advisory (non-blocking) per Researcher Stage 2 design — RED on
+# current main is expected pre-cp#469-cluster. After cp#477 deploy
+# (888efceb) + PR #2167 merge, cell should turn GREEN. THAT green
+# is the cluster-proof signal.
+#
+# Cost controls (mandatory):
+#   - SPOT instances (tagged run_id/workspace_id for cost attribution)
+#   - Fast teardown (~3-5 min wall-clock) even on assertion failure
+#   - Structured per-cell results JSON (runtime/provider/model/
+#     billing_mode/workspace_id/register_status/completion_status/
+#     teardown_status/elapsed_seconds)
+#
+# Inputs:
+#   runtime        : default claude-code
+#   billing_mode   : default platform_managed (the cp#469-cluster path)
+#   provider       : default platform (vs direct-to-provider)
+#   model          : default moonshot/kimi-k2.6 (CTO-specified)
+#
+# PR target: molecule-core (this file). Companion harness extension
+# (test_minimal_boot_cell.sh) lives in tests/e2e/ alongside
+# test_staging_full_saas.sh — same repo, same branch.
+#
+# Note: cp#455 was originally spec'd to live in molecule-controlplane
+# (`.gitea/workflows/` path), but molecule-core's CI is the home for
+# tenant-boot e2e tests in this stage. Stage 2 may move the path.
+
+on:
+  workflow_dispatch:
+  # Note: Gitea 1.22.6 does not support workflow_dispatch.inputs
+  # (feedback_gitea_workflow_dispatch_inputs_unsupported). Defaults
+  # are hardcoded in the job env below. Stage 2 can add matrix/
+  # param support once the Gitea version supports it.
+
+# Advisory: no cron schedule, manual dispatch only. Branch protection
+# doesn't require this — RED on main is expected pre-cp#469-cluster
+# deploy, GREEN signals the cluster is live.
+permissions:
+  contents: read
+  # No issue-write; failures surface as red runs in workflow history.
+
+concurrency:
+  group: boot-to-registration-e2e
+  cancel-in-progress: false
+
+jobs:
+  # bp-exempt: advisory e2e — non-gating, manual dispatch only (cp#455 Stage 1)
+  minimal-cell:
+    name: Minimal cell (claude-code + platform + moonshot/kimi-k2.6)
+    runs-on: ubuntu-latest
+    # Bounded at 12 min. Wall-clock budget breakdown:
+    #   - cold EC2 provision: ~3-4 min (SPOT)
+    #   - /registry/register wait: ~30s
+    #   - completion call: ~10s
+    #   - teardown: ~30-60s
+    #   - tail headroom: ~6-7 min
+    timeout-minutes: 12
+    env:
+      # Hardcoded defaults — Gitea 1.22.6 does not support workflow_dispatch.inputs
+      # (feedback_gitea_workflow_dispatch_inputs_unsupported). Stage 2 can add
+      # matrix/param support once the Gitea version supports it.
+      E2E_RUNTIME: claude-code
+      E2E_BILLING_MODE: platform_managed
+      E2E_PROVIDER: platform
+      E2E_MODEL: moonshot/kimi-k2.6
+      E2E_RUN_ID: cp455-${{ github.run_id }}
+      E2E_PROVISION_TIMEOUT_SECS: '300' # 5 min — fast teardown budget
+      MOLECULE_CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }}
+      MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Verify required secrets present
+        run: |
+          if [ -z "${MOLECULE_ADMIN_TOKEN:-}" ]; then
+            echo "::error::CP_STAGING_ADMIN_API_TOKEN secret missing — minimal-cell e2e cannot run"
+            echo "::error::Set it at Settings → Secrets and Variables → Actions; pull from staging-CP's CP_ADMIN_API_TOKEN env in Railway."
+            exit 1
+          fi
+
+      - name: Install required tools
+        run: |
+          for cmd in jq curl python3; do
+            command -v "$cmd" >/dev/null 2>&1 || {
+              echo "::error::required tool '$cmd' not on PATH — runner image regression?"
+              exit 1
+            }
+          done
+
+      - name: Run minimal-cell boot-to-registration harness
+        # The harness script handles its own teardown via EXIT trap;
+        # even on assertion failure (provision timeout, register
+        # timeout, completion failure), the workspace is deprovisioned
+        # and a leak is reported. Exit code propagates from the script.
+        # Structured per-cell results are emitted to ${GITHUB_STEP_SUMMARY}
+        # so operators see pass/fail per assertion without scrolling.
+        run: |
+          bash tests/e2e/test_minimal_boot_cell.sh
+
+      - name: Emit structured per-cell results
+        if: always()
+        # Always run (even on failure) so the structured results are
+        # visible in the workflow summary. The script writes a JSON
+        # file at /tmp/cell-result.json; this step renders it as a
+        # job summary.
+        run: |
+          if [ -f /tmp/cell-result.json ]; then
+            echo "## Minimal-cell results" >> "$GITHUB_STEP_SUMMARY"
+            echo "" >> "$GITHUB_STEP_SUMMARY"
+            echo '```json' >> "$GITHUB_STEP_SUMMARY"
+            cat /tmp/cell-result.json >> "$GITHUB_STEP_SUMMARY"
+            echo "" >> "$GITHUB_STEP_SUMMARY"
+            echo '```' >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "## Minimal-cell results: NO_RESULT_FILE" >> "$GITHUB_STEP_SUMMARY"
+            echo "" >> "$GITHUB_STEP_SUMMARY"
+            echo "Harness did not produce /tmp/cell-result.json — likely crashed before trap fired." >> "$GITHUB_STEP_SUMMARY"
+          fi
+
+      - name: Failure summary
+        if: failure()
+        run: |
+          {
+            echo "## cp#455 minimal-cell FAILED"
+            echo ""
+            echo "**Run ID:** ${{ github.run_id }}"
+            echo "**Runtime:** ${E2E_RUNTIME}"
+            echo "**Billing mode:** ${E2E_BILLING_MODE}"
+            echo "**Provider:** ${E2E_PROVIDER}"
+            echo "**Model:** ${E2E_MODEL}"
+            echo "**Slug:** ${E2E_RUN_ID}"
+            echo ""
+            echo "### What this means"
+            echo ""
+            echo "The minimal claude-code+kimi cell did not pass all 4 assertions:"
+            echo "1. Provision request accepted; workspace transitions to booting/running"
+            echo "2. Controlplane receives /registry/register for that workspace_id"
+            echo "3. JSON-RPC/completion route returns successful minimal response"
+            echo "4. Teardown terminates workspace even on failure (trap)"
+            echo ""
+            echo "RED is expected pre-cp#469-cluster. After cp#477 deploy (888efceb) + PR #2167 merge,"
+            echo "this should turn GREEN. Persistent RED after both merge = cluster bug, not e2e bug."
+            echo ""
+            echo "### Next steps"
+            echo ""
+            echo "1. Check the harness output above for the assertion that failed"
+            echo "2. If assertion 1 fails: provision path broken — check CP admin API + EC2 quota"
+            echo "3. If assertion 2 fails: /registry/register path broken — check workspace-server boot"
+            echo "4. If assertion 3 fails: LLM proxy / completion path broken — check cp#469 cluster"
+            echo "5. If assertion 4 fails: teardown trap broken — leak risk, fix immediately"
+          } >> "$GITHUB_STEP_SUMMARY"
@@ -12,7 +12,7 @@
 # (SHA 0adf2098) per RFC internal#219 Phase 2b+c — replicate repo-by-repo.
 #
 # When any pair diverges, a `[ci-drift]` issue is opened or updated
-# (idempotent by title) and labelled `tier:high`. This is the
+# (idempotent by title) and labelled `ci-bp-drift`. This is the
 # auto-detection that closes the regression class identified in
 # RFC §1 finding 3 (protection only listed 2 of 6 real jobs for
 # ~weeks, undetected) and §6 (audit env drifts silently from
@@ -106,7 +106,7 @@ jobs:
          AUDIT_WORKFLOW_PATH: '.gitea/workflows/audit-force-merge.yml'
          # Path to the CI workflow with the sentinel + the jobs.
          CI_WORKFLOW_PATH: '.gitea/workflows/ci.yml'
-          # Issue label applied on file/update. `tier:high` exists in
+          # Issue label applied on file/update. `ci-bp-drift` exists in
          # the molecule-core label set (verified 2026-05-11, label id 9).
-          DRIFT_LABEL: 'tier:high'
+          DRIFT_LABEL: 'ci-bp-drift'
        run: python3 .gitea/scripts/ci-required-drift.py
@@ -418,10 +418,9 @@ jobs:
    # a manual action that determinism made obsolete.
    name: Canvas Deploy Status
    runs-on: docker-host
-    # Job-level `if:` so ci-required-drift.py's ci_job_names() detects this as
-    # github.ref-gated and skips it from the required-context F1 set (mc#1982).
+    # Per-step no-op (not job-level `if:`) so the job reaches SUCCESS on PRs
+    # instead of skipped — skipped poisons the PR combined status (internal#817).
    # Step-level exit 0 handles the "not a canvas main push" case.
-    if: ${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging' }}
    needs: [changes, canvas-build]
    steps:
      - name: Record canvas ordered-deploy status
@@ -500,7 +499,7 @@ jobs:
    # `CI / all-required (pull_request)` per issue #1473.
    #
    # Closes the failure mode where status_check_contexts on molecule-core/main
-    # only listed `Secret scan` + `sop-tier-check` (the 2 meta-gates), so real
+    # only listed `Secret scan` + `sop-checklist` (the 2 meta-gates), so real
    # `Platform (Go)` / `Canvas (Next.js)` / `Python Lint & Test` / `Shellcheck`
    # red silently merged through. See internal#286 for the three concrete
    # tonight-of-2026-05-11 incidents that prompted the emergency bump.
@@ -533,9 +532,8 @@ jobs:
    # The `needs:` list MUST stay in lockstep with ci-required-drift.py's
    # F1 check (`ci_job_names()` = every job MINUS the sentinel MINUS jobs
    # whose `if:` gates on github.event_name/github.ref). canvas-deploy-
-    # reminder is event-gated (`if: github.ref == refs/heads/{main,staging}`)
-    # so it is intentionally EXCLUDED — it skips on PRs and a `needs:` on a
-    # skipped job would never let the sentinel run. If a new always-running
+    # status is per-step-gated (not job-level `if:`) so it reaches SUCCESS
+    # on PRs and is included here — internal#817. If a new always-running
    # CI job is added, add it here too or ci-required-drift F1 will flag it.
    #
    # Stays on the dedicated `ci-meta` lane (no docker work, so the
@@ -549,6 +547,7 @@ jobs:
      - canvas-build
      - shellcheck
      - python-lint
+      - canvas-deploy-status
    continue-on-error: false
    runs-on: ci-meta
    timeout-minutes: 5
@@ -567,6 +566,7 @@ jobs:
          CANVAS_RESULT: ${{ needs.canvas-build.result }}
          SHELLCHECK_RESULT: ${{ needs.shellcheck.result }}
          PYTHON_LINT_RESULT: ${{ needs.python-lint.result }}
+          CANVAS_DEPLOY_RESULT: ${{ needs.canvas-deploy-status.result }}
        run: |
          set -euo pipefail
          fail=0
@@ -588,6 +588,7 @@ jobs:
          check "Canvas (Next.js)"      "$CANVAS_RESULT"
          check "Shellcheck (E2E scripts)" "$SHELLCHECK_RESULT"
          check "Python Lint & Test"    "$PYTHON_LINT_RESULT"
+          check "Canvas Deploy Status"  "$CANVAS_DEPLOY_RESULT"
          if [ "$fail" -ne 0 ]; then
            echo "::error::all-required: one or more aggregated CI jobs did not succeed"
            exit 1
@@ -131,9 +131,9 @@ jobs:
      # on the per-runtime default ("sonnet" → routes to direct
      # Anthropic, defeats the cost saving). Operators can override
      # via workflow_dispatch by setting a different E2E_MODEL_SLUG
-      # input if they need to exercise a specific model. MiniMax-M2 is the
-      # stable staging MiniMax path used by the full-SaaS smoke.
-      E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2' }}
+      # input if they need to exercise a specific model. MiniMax-M2.7 is the
+      # stable staging MiniMax path used by the full-SaaS smoke (#1997).
+      E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2.7' }}
      # Bound to 10 min so a stuck provision fails the run instead of
      # holding up the next cron firing. 15-min default in the script
      # is for the on-PR full lifecycle where we have more headroom.
@@ -250,6 +250,20 @@ jobs:
          echo "CANVAS_PORT=${CANVAS_PORT}" >> "$GITHUB_ENV"
          echo "Canvas host port: ${CANVAS_PORT}"

+      - name: Set deterministic admin token
+        if: needs.detect-changes.outputs.chat == 'true'
+        run: |
+          # PR #2291 made auth fail-closed everywhere (no dev-mode escape).
+          # The platform server requires ADMIN_TOKEN; the canvas requires the
+          # matching NEXT_PUBLIC_ADMIN_TOKEN or every API call 401s.
+          # We set a deterministic per-run value so the ephemeral platform
+          # and canvas are paired correctly.
+          E2E_ADMIN_TOKEN="e2e-chat-admin-${{ github.run_id }}-${{ github.run_attempt }}"
+          echo "ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
+          echo "MOLECULE_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
+          echo "NEXT_PUBLIC_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
+          echo "Admin token configured for e2e-chat platform + canvas."
+
      - name: Start platform (background)
        if: needs.detect-changes.outputs.chat == 'true'
        working-directory: workspace-server
@@ -73,7 +73,7 @@ jobs:
          # NOTE: REQUIRED_CONTEXTS is no longer the authoritative PR gate. The
          # queue now reads the required status contexts from BRANCH PROTECTION
          # (status_check_contexts) so non-required governance reds (qa-review,
-          # security-review, sop-tier, sop-checklist when not branch-required,
+          # security-review, sop-checklist when not branch-required,
          # E2E Chat, Staging SaaS, ci-arm64-advisory) cannot block a merge.
          # If branch protection cannot be enumerated the queue HOLDS
          # (fail-closed). REQUIRED_APPROVALS below is only a fallback used when
@@ -61,11 +61,9 @@ name: Lint pre-flip continue-on-error
 # feedback_no_shared_persona_token_use.
 #
 # Phase contract (RFC internal#219 §1 ladder):
-#   - This workflow lands at `continue-on-error: true` (Phase 3 —
-#     surface defects without blocking). Follow-up PR flips it to
-#     `false` ONLY after this workflow's own recent runs on `main`
-#     are confirmed clean — exactly the discipline the workflow
-#     itself enforces. Eat your own dogfood.
+#   - Flipped to `continue-on-error: false` after Researcher live-verified
+#     clean runs. The script's own 35 pytest tests pass and recent PR
+#     history shows no masked regressions — the gate is now enforcing.

 on:
  pull_request:
@@ -97,10 +95,9 @@ jobs:
    name: Verify continue-on-error flips have run-log proof
    runs-on: ubuntu-latest
    timeout-minutes: 8
-    # Phase 3 (RFC internal#219 §1): surface broken flips without blocking
-    # the PR yet. Follow-up flips this to `false` once the workflow itself
-    # has clean recent runs on main. mc#1982 interim — remove when CoE→false.
-    continue-on-error: true  # mc#1982
+    # Fail-closed: the lint script is verified clean (35/35 tests pass,
+    # Researcher live-check confirmed). Masking removed per mc#1982 close-out.
+    continue-on-error: false
    steps:
      - name: Check out PR head (full history for base-SHA access)
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
@@ -19,7 +19,7 @@
 # Forward-compat scope:
 #   Today (2026-05-11) molecule-core/main protects 3 contexts:
 #     - "Secret scan / Scan diff for credential-shaped strings (pull_request)"
-#     - "sop-tier-check / tier-check (pull_request)"
+#     - "sop-checklist / tier-check (pull_request)"
 #     - "CI / all-required (pull_request)"
 #   Per RFC#324 Step 2 the required-list expands to ~5 contexts
 #   (qa-review, security-review added). Each new required context's
@@ -40,6 +40,7 @@ env:
  GITHUB_SERVER_URL: https://git.moleculesai.app

 jobs:
+  # bp-exempt: informational lint enforcing docker-host/publish pin convention (internal#512), not a merge gate
  lint-docker-host-pin:
    name: Lint docker-host pin on docker-touching workflows
    runs-on: docker-host
@@ -16,7 +16,7 @@ name: Lint workflow YAML (Gitea-1.22.6-hostile shapes)
 #
 # Empirical history this hardens against:
 #   - status-reaper rev1 caught rule-4 (name-collision) class
-#   - sop-tier-refire DOA'd on rule-2 (workflow_run partial)
+#   - sop-checklist DOA'd on rule-2 (workflow_run partial)
 #   - #319 bootstrap-paradox (chained-defect class, related)
 #   - internal#329 dispatcher race (adjacent)
 #   - 2026-05-11 publish-runtime: rule-1, 24h PyPI freeze
@@ -95,10 +95,10 @@ jobs:
          # included here — staging green is a separate gate
          # (`feedback_staging_e2e_merge_gate`).
          WATCH_BRANCH: 'main'
-          # Issue label applied on file/open. `tier:high` exists in the
+          # Issue label applied on file/open. `ci-bp-drift` exists in the
          # molecule-core label set (verified 2026-05-11, label id 9).
          # Rationale for high: main red blocks the promotion train and
          # poisons every PR's auto-rebase base; treat as a fire even
          # if intermittent.
-          RED_LABEL: 'tier:high'
+          RED_LABEL: 'ci-bp-drift'
        run: python3 .gitea/scripts/main-red-watchdog.py
@@ -12,9 +12,9 @@
 #     - `pull_request_review` types: [submitted]
 #         → re-evaluate when a team member submits an APPROVE review so
 #           the gate flips immediately (no wait for the next push or
-#           slash-command). Verified live: sop-tier-check.yml uses this
+#           slash-command). Verified live: sop-checklist.yml uses this
 #           same event and provably fires (produces
-#           `sop-tier-check / tier-check (pull_request_review)` contexts).
+#           `sop-checklist / all-items-acked (pull_request_review)` contexts).
 #           The job-level `if:` guard checks
 #           `github.event.review.state == 'APPROVED' || 'approved'` so
 #           only APPROVE reviews run the evaluator; COMMENT and
@@ -53,7 +53,7 @@
 #
 #   We MUST NOT use `github.event.comment.author_association` (the
 #   field doesn't exist on Gitea 1.22.6 webhook payload — this was
-#   sop-tier-refire's defect #1).
+#   's defect #1).
 #
 # A4 (no PR-head checkout under pull_request_target):
 #   We check out the BASE ref explicitly so the review-check.sh script is
@@ -73,7 +73,7 @@
 # also not in qa/security teams → also 403.
 #
 # Resolution: a dedicated `RFC_324_TEAM_READ_TOKEN` secret, owned by an
-# identity that IS in both `qa` and `security` teams (Owners-tier
+# identity that IS in both `qa` and `security` teams (Owners-level
 # claude-ceo-assistant, or a new service-bot added to both teams).
 # Provisioning of this secret is tracked as a follow-up issue (filed by
 # core-devops at PR open).
@@ -10,8 +10,8 @@
 # A1-α addendum (internal#760): review-event trigger added so the security
 # gate flips immediately when a team member submits an APPROVE review.
 # Uses `pull_request_review` types: [submitted] — verified live via
-# sop-tier-check.yml which provably fires this event (produces
-# `sop-tier-check / tier-check (pull_request_review)` contexts).
+# sop-checklist.yml which provably fires this event (produces
+# `sop-checklist / all-items-acked (pull_request_review)` contexts).
 # The job-level `if:` guard checks
 # `github.event.review.state == 'APPROVED' || 'approved'` so only APPROVE
 # reviews run the evaluator; COMMENT and REQUEST_CHANGES are skipped at
@@ -14,10 +14,10 @@
 # Fix (PR #1345 / issue #1280):
 #   - ONE workflow, ONE issue_comment:[created] subscription (no edited/deleted)
 #   - all-items-acked job: pull_request_target OR sop slash-command comments
-#   - review-refire job: qa/security/tier refire slash commands
+#   - review-refire job: qa/security refire slash commands
 #   → ~50% reduction in comment-triggered runner occupancy vs pre-fix.
 #
-# Trust boundary (mirrors RFC#324 §A4 + sop-tier-check security note):
+# Trust boundary (mirrors RFC#324 §A4 + sop-checklist security note):
 #   `pull_request_target` (not `pull_request`) — workflow def is loaded
 #   from BASE branch, so a PR cannot rewrite this workflow to exfiltrate
 #   the token. The `actions/checkout` step pins `ref: base.sha` so the
@@ -34,14 +34,6 @@
 #     via a repo secret `SOP_CHECKLIST_GATE_TOKEN`. Provisioning of that
 #     secret is a follow-up authorization step (separate from this PR).
 #
-# Failure mode: tier-aware (RFC#351 open question 2):
-#   - tier:high   → state=failure (hard-fail; BP blocks merge)
-#   - tier:medium → state=failure (hard-fail; same)
-#   - tier:low    → state=pending (soft-fail; BP can choose to require
-#                    this context or skip for low-tier PRs)
-#   - missing/no-tier → state=failure (default-mode: hard — never lower
-#                    the bar per feedback_fix_root_not_symptom)
-#
 # Slash-command contract (RFC#351 v1 + §A1.1-style notes from RFC#324):
 #
 #   /sop-ack <slug-or-numeric-alias> [optional note]
@@ -61,7 +53,7 @@
 #       — declare a gate (qa-review, security-review) N/A.
 #       — see sop-checklist-config.yaml n/a_gates section.
 #
-#   /qa-recheck /security-recheck /refire-tier-check
+#   /qa-recheck /security-recheck
 #       — refire the corresponding status check on the PR head.
 #
 # The eval is read-only + idempotent (read PR + comments + team
@@ -149,7 +141,6 @@ jobs:
          {
            echo "run_qa=false"
            echo "run_security=false"
-            echo "run_tier=false"
          } >> "$GITHUB_OUTPUT"
          first_line=$(printf '%s\n' "$COMMENT_BODY" | sed -n '1p')
          case "$first_line" in
@@ -159,9 +150,6 @@ jobs:
            /security-recheck*)
              echo "run_security=true" >> "$GITHUB_OUTPUT"
              ;;
-            /refire-tier-check*)
-              echo "run_tier=true" >> "$GITHUB_OUTPUT"
-              ;;
            *)
              echo "::notice::no supported review refire slash command; no-op"
              ;;
@@ -170,8 +158,7 @@ jobs:
      - name: Check out BASE ref for trusted scripts
        if: |
          steps.classify.outputs.run_qa == 'true' ||
-          steps.classify.outputs.run_security == 'true' ||
-          steps.classify.outputs.run_tier == 'true'
+          steps.classify.outputs.run_security == 'true'
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          ref: ${{ github.event.repository.default_branch }}
@@ -213,13 +200,3 @@ jobs:
        run: |
          set -euo pipefail
          .gitea/scripts/review-refire-status.sh
-
-      - name: Refire sop-tier-check status
-        if: steps.classify.outputs.run_tier == 'true'
-        env:
-          GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
-          GITEA_HOST: git.moleculesai.app
-          REPO: ${{ github.repository }}
-          PR_NUMBER: ${{ github.event.issue.number }}
-          SOP_DEBUG: '0'
-        run: bash .gitea/scripts/sop-tier-refire.sh
@@ -1,162 +0,0 @@
-# sop-tier-check — canonical Gitea Actions workflow for §SOP-6 enforcement.
-#
-# Logic lives in `.gitea/scripts/sop-tier-check.sh` (extracted 2026-05-09
-# from the previous inline-bash version). The script is the single source
-# of truth; this workflow file just sets env + invokes it.
-#
-# Copy BOTH files (`.gitea/workflows/sop-tier-check.yml` +
-# `.gitea/scripts/sop-tier-check.sh`) into any repo that wants the
-# §SOP-6 PR gate enforced. Pair with branch protection on the protected
-# branch:
-#   required_status_checks:    ["sop-tier-check / tier-check (pull_request)"]
-#   required_approving_reviews: 1
-#   approving_review_teams:    ["ceo", "managers", "engineers"]
-#
-# Tier → required-team expression (internal#189 AND-composition):
-#   tier:low    → engineers,managers,ceo        (OR: any one suffices)
-#   tier:medium → managers AND engineers AND qa???,security???  (AND: all required)
-#   tier:high   → ceo                           (OR: single team, wired for AND)
-#
-# "???" = teams not yet created in Gitea. When qa + security teams are
-# added, update TIER_EXPR["tier:medium"] in the script to remove the
-# markers. PRs already in-flight when qa/security are created continue
-# to work because their authors explicitly requested those reviews.
-#
-# Force-merge: Owners-team override remains available out-of-band via
-# the Gitea merge API; force-merge writes `incident.force_merge` to
-# `structure_events` per §Persistent structured logging gate (Phase 3).
-#
-# Environment variables:
-#   SOP_DEBUG=1          — per-API-call diagnostic lines. Default: off.
-#   SOP_LEGACY_CHECK=1   — revert to OR-gate for this run. Intended for
-#                           emergency use only; burn-in window closed
-#                           2026-05-17 (internal#189 Phase 1).
-#
-# BURN-IN CLOSED 2026-05-17 (internal#189 Phase 1): The 7-day burn-in
-# window closed. As of 2026-06-04 the residual masks left behind by the
-# burn-in are removed for real (the comment previously claimed this while
-# the masks still persisted — that was stale):
-#   - continue-on-error: true on the jq-install step (redundant; the step
-#     already exits 0) and on the tier-check step (the burn-in mask).
-#   - the `|| true` after the sop-tier-check.sh invocation, which masked
-#     real tier-gate verdicts.
-# AND-composition is now fully enforced and the tier-check step can
-# honestly red CI on a real SOP-6 violation.
-#
-# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed): this is a
-# REQUIRED branch-protected gate on `pull_request_target` (always
-# same-repo, secrets always present — no fork/advisory split). Failing
-# open on a token/network/jq fault greened the SOP-6 approval gate
-# WITHOUT verifying approvals — a fail-open on a required context. The
-# gate now FAILS CLOSED on infra faults too: fix the token/runner, not
-# the gate. If you ever need to temporarily re-introduce a mask, file a
-# tracker and follow the mc#1982 protocol.
-
-name: sop-tier-check
-
-# SECURITY: triggers MUST use `pull_request_target`, not `pull_request`.
-# `pull_request_target` loads the workflow definition from the BASE
-# branch (i.e. `main`), not the PR's HEAD. With `pull_request`, anyone
-# with write access to a feature branch could rewrite this file in
-# their PR to dump SOP_TIER_CHECK_TOKEN (org-read scope) to logs and
-# exfiltrate it. Verified 2026-05-09 against Gitea 1.22.6 —
-# `pull_request_target` (added in Gitea 1.21 via go-gitea/gitea#25229)
-# is the documented mitigation.
-#
-# This workflow does NOT call `actions/checkout` of PR HEAD code, so no
-# untrusted code is ever executed in the runner — we only HTTP-call the
-# Gitea API. If a future change adds a checkout step, it MUST pin to
-# `${{ github.event.pull_request.base.sha }}` (NOT `head.sha`) to keep
-# the trust boundary.
-on:
-  pull_request_target:
-    types: [opened, edited, synchronize, reopened, labeled, unlabeled]
-  pull_request_review:
-    types: [submitted, dismissed, edited]
-
-concurrency:
-  group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  tier-check:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      pull-requests: read
-      secrets: read
-    steps:
-      - name: Check out base branch (for the script)
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          # Pin to base.sha — pull_request_target's protection only
-          # works if we never check out PR HEAD. Same SHA the workflow
-          # itself was loaded from.
-          ref: ${{ github.event.pull_request.base.sha }}
-      - name: Install jq
-        # Gitea Actions runners (ubuntu-latest label) do not bundle jq.
-        # The sop-tier-check script uses jq for all JSON API parsing.
-        # Install jq before the script runs so sop-tier-check can pass.
-        #
-        # Method: apt-get first (reliable for Ubuntu runners with internet
-        # access to package mirrors). Falls back to GitHub binary download.
-        # GitHub releases may be unreachable from some runner networks
-        # (infra#241 follow-up: GitHub timeout after 3s on 5.78.80.188
-        # runners). The sop-tier-check script has its own fallback as a
-        # third line of defense, and this step's final command
-        # (`jq --version ... || echo`) already exits 0 unconditionally — so
-        # the step cannot fail the job on its own.
-        # continue-on-error REMOVED 2026-06-04 (mc#1982 directive: root-fix
-        # and remove, do not renew). It was redundant masking, not a gate.
-        run: |
-          # apt-get is the primary method — Ubuntu package mirrors are reliably
-          # reachable from runner containers. GitHub releases may be blocked
-          # or slow on some networks (infra#241 follow-up).
-          if apt-get update -qq && apt-get install -y -qq jq; then
-            echo "::notice::jq installed via apt-get: $(jq --version)"
-          elif timeout 120 curl -sSL \
-            "https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
-            -o /usr/local/bin/jq && chmod +x /usr/local/bin/jq; then
-            echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
-          else
-            echo "::warning::jq install failed — apt-get and GitHub download both failed."
-          fi
-          jq --version 2>/dev/null || echo "::notice::jq not yet available — script fallback will retry"
-
-      - name: Verify tier label + reviewer team membership
-        # continue-on-error REMOVED 2026-06-04 (expired internal#189 Phase 1
-        # burn-in, window closed 2026-05-17; mc#1982 directive: root-fix and
-        # remove, do not renew). SOP_FAIL_OPEN REMOVED 2026-06-05
-        # (fix/core-ci-fail-closed): the gate now fails CLOSED on infra
-        # faults too (see the env block below), not just on a real verdict.
-        env:
-          GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
-          GITEA_HOST: git.moleculesai.app
-          REPO: ${{ github.repository }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
-          SOP_DEBUG: '0'
-          SOP_LEGACY_CHECK: '0'
-          # SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed).
-          #
-          # This is the REQUIRED branch-protected gate
-          # `sop-tier-check / tier-check (pull_request)`. It runs on
-          # `pull_request_target`, which ALWAYS executes from the base
-          # branch WITH secrets present — there is NO fork/advisory split
-          # and no legitimate "secrets genuinely absent" degradation here.
-          #
-          # SOP_FAIL_OPEN=1 made the script `exit 0` on an empty/invalid
-          # token, an unreachable Gitea API, or missing jq — i.e. an AUTH
-          # FAILURE or unreachable-dependency would green the SOP-6
-          # approval gate WITHOUT verifying that the required teams
-          # actually approved. That is a fail-open on a required gate: a
-          # mis-wired or under-scoped SOP_TIER_CHECK_TOKEN would let any PR
-          # merge past the approval requirement.
-          #
-          # Removing the env unsets it → `${SOP_FAIL_OPEN:-}` is empty in
-          # sop-tier-check.sh → every guarded `exit 0` branch instead falls
-          # through to `exit 1`. Infra faults (bad token / API down / no
-          # jq) now FAIL CLOSED with a loud `::error::`, exactly like a real
-          # SOP-6 violation. Fix the token/runner, not the gate.
-        run: |
-          bash .gitea/scripts/sop-tier-check.sh
@@ -1,52 +0,0 @@
-# sop-tier-refire — manual fallback for sop-tier-check refire.
-#
-# Closes internal#292. Gitea 1.22.6 doesn't refire workflows on the
-# `pull_request_review` event (go-gitea/gitea#33700); the `sop-tier-check`
-# workflow's review-event subscription is silently dead. The result:
-# PRs that get their approving review AFTER the tier-check ran on open/
-# synchronize keep their failing status check forever, and the only way
-# to merge is the admin force-merge path (audited via `audit-force-merge`
-# but the audit trail keeps growing; see `feedback_never_admin_merge_bypass`).
-#
-# Comment-triggered refires now live in `review-refire-comments.yml`. Gitea
-# queues issue_comment workflows before evaluating job-level `if:`, so having
-# qa-review, security-review, sop-checklist, and sop-tier-refire all subscribe
-# to every comment caused queue storms on SOP-heavy PRs. This workflow is a
-# non-automatic breadcrumb only; Gitea 1.22.6 does not support
-# workflow_dispatch inputs, so real refires must use `/refire-tier-check`.
-#
-# SECURITY MODEL:
-#
-# 1. `pull_request` exists on the issue (issue_comment fires on issues
-#    AND PRs; we only want PRs).
-# 2. `comment.author_association` must be MEMBER/OWNER/COLLABORATOR.
-#    Per the internal#292 core-security review (review#1066 ask): anyone
-#    can comment, but only repo collaborators+ can flip the status.
-#    Without this gate, a drive-by commenter on a public-issue-tracker
-#    surface could trigger a status flip.
-# 3. Comment body must contain `/refire-tier-check` — a slash-command-
-#    shaped trigger (not just any comment word). Prevents accidental
-#    triggering from prose like "we should refire tests" in a review.
-# 4. This workflow does NOT check out PR HEAD code. Like sop-tier-check,
-#    it only HTTP-calls the Gitea API. Trust boundary preserved.
-#
-# Note: `issue_comment` fires from the BASE branch's workflow file. There
-# is no `pull_request_target` equivalent to set; the trigger inherently
-# loads the workflow from the default branch.
-#
-# Rate-limit: a 1s pre-sleep + a "skip if status posted in last 30s"
-# guard prevents comment-spam from thrashing the status. See the script.
-
-name: sop-tier-check refire (manual)
-
-on:
-  workflow_dispatch:
-
-jobs:
-  refire:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Explain supported refire path
-        run: |
-          echo "::error::Gitea 1.22.6 does not support workflow_dispatch inputs here; comment /refire-tier-check on the PR instead."
-          exit 1
@@ -112,9 +112,9 @@ jobs:
      E2E_RUNTIME: claude-code
      # Pin the smoke to a specific MiniMax model rather than relying
      # on the per-runtime default (which could resolve to "sonnet" →
-      # direct Anthropic and defeat the cost saving). MiniMax-M2 is the
-      # stable staging MiniMax path used by the full-SaaS smoke.
-      E2E_MODEL_SLUG: MiniMax-M2
+      # direct Anthropic and defeat the cost saving). MiniMax-M2.7 is the
+      # stable staging MiniMax path used by the full-SaaS smoke (#1997).
+      E2E_MODEL_SLUG: MiniMax-M2.7
      E2E_RUN_ID: "smoke-${{ github.run_id }}"
      # Debug-only: when an operator dispatches with keep_on_failure=true,
      # the smoke script's E2E_KEEP_ORG=1 path skips teardown so the
@@ -34,8 +34,10 @@ name: Sweep stale Cloudflare DNS records
 # scripts/ops/test_sweep_cf_decide.py (#2027) cover the rule
 # classifier.
 #
-# Secrets: CF_API_TOKEN, CF_ZONE_ID, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
-# are confirmed existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
+# Secrets: CF_API_TOKEN (preferred CI-scoped name) or CLOUDFLARE_API_TOKEN
+# (operator-host canonical name) are accepted — the workflow falls back
+# automatically. Same for CF_ZONE_ID / CLOUDFLARE_ZONE_ID. Confirmed
+# existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
 # CP_STAGING_ADMIN_API_TOKEN are unconfirmed — if missing, the verify step
 # (schedule → hard-fail, dispatch → soft-skip) surfaces it clearly.

@@ -79,8 +81,8 @@ jobs:
    # each individually capped at 10s by the script's curl -m flag.
    timeout-minutes: 3
    env:
-      CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
-      CF_ZONE_ID: ${{ secrets.CF_ZONE_ID }}
+      CF_API_TOKEN: ${{ secrets.CF_API_TOKEN || secrets.CLOUDFLARE_API_TOKEN }}
+      CF_ZONE_ID: ${{ secrets.CF_ZONE_ID || secrets.CLOUDFLARE_ZONE_ID }}
      CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
      CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -129,6 +131,7 @@ jobs:
            fi
            echo "::error::sweep cannot run — required secrets missing: ${missing[*]}"
            echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow."
+            echo "::error::Cloudflare secrets accept either the CI-scoped name (CF_API_TOKEN / CF_ZONE_ID) or the operator-host canonical name (CLOUDFLARE_API_TOKEN / CLOUDFLARE_ZONE_ID)."
            echo "::error::a silent skip masked an active CF DNS leak (152/200 zone records) caught only by a manual audit on 2026-04-28; this gate exists to make the gap visible."
            exit 1
          fi
@@ -29,10 +29,12 @@ name: Sweep stale Cloudflare Tunnels
 # the DNS sweep's 50% because tenant-shaped tunnels are mostly
 # orphans by design) refuses to nuke past the threshold.
 #
-# Secrets: CF_API_TOKEN, CF_ACCOUNT_ID are confirmed existing per
-# issue #425 §425 audit. CP_ADMIN_API_TOKEN and CP_STAGING_ADMIN_API_TOKEN
-# are unconfirmed — if missing, the verify step (schedule → hard-fail,
-# dispatch → soft-skip) surfaces it clearly.
+# Secrets: CF_API_TOKEN (preferred CI-scoped name) or CLOUDFLARE_API_TOKEN
+# (operator-host canonical name) are accepted — the workflow falls back
+# automatically. Same for CF_ACCOUNT_ID / CLOUDFLARE_ACCOUNT_ID. Confirmed
+# existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
+# CP_STAGING_ADMIN_API_TOKEN are unconfirmed — if missing, the verify step
+# (schedule → hard-fail, dispatch → soft-skip) surfaces it clearly.

 on:
  schedule:
@@ -74,8 +76,8 @@ jobs:
    # the sweep-cf-orphans companion job).
    timeout-minutes: 30
    env:
-      CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
-      CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
+      CF_API_TOKEN: ${{ secrets.CF_API_TOKEN || secrets.CLOUDFLARE_API_TOKEN }}
+      CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID || secrets.CLOUDFLARE_ACCOUNT_ID }}
      CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
      CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
      MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '90' }}
@@ -0,0 +1,67 @@
+# umbrella-reaper — auto-recovery for stale CI umbrella statuses on open PRs.
+#
+# Tracking: molecule-core#1780.
+#
+# Problem: when `CI / all-required (pull_request)` reports failure due to
+# a propagation/timing race despite all required sub-jobs being success,
+# branch protection blocks the merge. Operators currently recover manually
+# per docs/runbooks/ci-umbrella-stale-compensating-status.md.
+#
+# This workflow automates that recovery: it scans open PRs and posts a
+# compensating success status when the umbrella is stale but all sub-jobs
+# are verified green.
+#
+# Trust boundary: the script only reads PR lists + statuses and POSTs to
+# /statuses/{sha}. It never checks out PR HEAD code. The Gitea token has
+# write:repository scope for statuses only.
+#
+# Sibling: .gitea/workflows/status-reaper.yml (default-branch push-suffix
+# compensation). Same persona provisioning model.
+
+name: umbrella-reaper
+
+# IMPORTANT — Schedule moved to operator-config:
+#   /etc/cron.d/molecule-core-umbrella-reaper ->
+#   /usr/local/bin/molecule-core-cron-bot.sh umbrella-reaper
+#
+# This keeps the compensation cadence but stops a maintenance bot from
+# consuming Gitea Actions runner slots during PR merge waves.
+# Gitea 1.22.6 parser quirk per
+# `feedback_gitea_workflow_dispatch_inputs_unsupported`: do NOT add an
+# `inputs:` block here. Gitea 1.22.6 rejects the whole workflow as
+# "unknown on type" when `workflow_dispatch.inputs.X` is present.
+on:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+# NOTE: NO `concurrency:` block is intentional — same reasoning as
+# status-reaper.yml. Gitea 1.22.6 doesn't honor cancel-in-progress for
+# queued ticks; the POST is idempotent so concurrent ticks are safe.
+
+jobs:
+  reap:
+    runs-on: ubuntu-latest
+    timeout-minutes: 8
+    steps:
+      - name: Check out repo at default-branch HEAD
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
+        with:
+          ref: ${{ github.event.repository.default_branch }}
+
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065
+        with:
+          python-version: '3.12'
+
+      - name: Install PyYAML
+        run: python -m pip install --quiet 'PyYAML==6.0.2'
+
+      - name: Compensate stale PR umbrella statuses
+        env:
+          GITEA_TOKEN: ${{ secrets.UMBRELLA_REAPER_TOKEN }}
+          GITEA_HOST: git.moleculesai.app
+          REPO: ${{ github.repository }}
+          PR_LIMIT: "50"
+        run: python3 .gitea/scripts/umbrella-reaper.py
@@ -26,7 +26,7 @@ name: verify-providers-gen
 #   * It is intentionally absent from ci.yml's job set so the ci-required-drift
 #     sentinel (jobs ↔ branch-protection ↔ audit-env) does NOT fire on it, and
 #     from branch protection (turning it into a hard merge gate has blast radius
-#     — operator GO required, same pattern as sop-tier-check / verify-providers-gen
+#     — operator GO required, same pattern as sop-checklist / verify-providers-gen
 #     on controlplane). Promote it into branch protection in a follow-up once
 #     P2 has soaked.
 # Until then it behaves like secret-scan / block-internal-paths: a standalone
@@ -27,9 +27,13 @@ export async function seedWorkspace(echoURL: string): Promise<SeededWorkspace> {
  // 1. Create external workspace pointing at the in-process echo runtime.
  const runId = Math.random().toString(36).slice(2, 8);
  const wsName = `Chat E2E Agent ${runId}`;
+  const adminToken = process.env.E2E_ADMIN_TOKEN ?? process.env.ADMIN_TOKEN;
  const createRes = await fetch(`${PLATFORM_URL}/workspaces`, {
    method: "POST",
-    headers: { "Content-Type": "application/json" },
+    headers: {
+      "Content-Type": "application/json",
+      ...(adminToken ? { Authorization: `Bearer ${adminToken}` } : {}),
+    },
    body: JSON.stringify({
      name: wsName,
      tier: 1,
@@ -234,30 +234,44 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
    "Authorization": `Bearer ${tenantToken}`,
    "X-Molecule-Org-Id": orgID,
  };
-  const ws = await jsonFetch(`${tenantURL}/workspaces`, {
-    method: "POST",
-    headers: tenantAuth,
-    body: JSON.stringify({
-      name: "E2E Canvas Test",
-      runtime: "hermes",
-      tier: 2,
-      // Provider-registry SSOT (internal#718) registers ONLY Kimi models for
-      // the hermes runtime — `moonshot/kimi-k2.6` is the platform-managed
-      // entry (workspace-server/internal/providers/providers.yaml, hermes ->
-      // platform). The old `gpt-4o` was never a registered hermes model and
-      // now 422s UNREGISTERED_MODEL_FOR_RUNTIME (core#2225). This workspace
-      // defaults closed to platform_managed (see the boot-shape note below),
-      // so a platform-namespaced model id is the registry-correct choice.
-      model: "moonshot/kimi-k2.6",
-    }),
-  });
-  if (ws.status >= 400 || !ws.body?.id) {
-    throw new Error(`Workspace create ${ws.status}: ${JSON.stringify(ws.body)}`);
+  // Retry workspace creation on transient 5xx / timeout — staging CP can
+  // return 502/503/504 under load and a single-shot failure kills the
+  // entire E2E run. 3 attempts with 3s exponential backoff (3s, 6s, 12s)
+  // gives ~21s total budget, well inside the 20-min provision envelope.
+  let workspaceId = "";
+  for (let attempt = 1; attempt <= 3; attempt++) {
+    const ws = await jsonFetch(`${tenantURL}/workspaces`, {
+      method: "POST",
+      headers: tenantAuth,
+      body: JSON.stringify({
+        name: "E2E Canvas Test",
+        runtime: "hermes",
+        tier: 2,
+        // Provider-registry SSOT (internal#718) registers ONLY Kimi models for
+        // the hermes runtime — `moonshot/kimi-k2.6` is the platform-managed
+        // entry (workspace-server/internal/providers/providers.yaml, hermes ->
+        // platform). The old `gpt-4o` was never a registered hermes model and
+        // now 422s UNREGISTERED_MODEL_FOR_RUNTIME (core#2225). This workspace
+        // defaults closed to platform_managed (see the boot-shape note below),
+        // so a platform-namespaced model id is the registry-correct choice.
+        model: "moonshot/kimi-k2.6",
+      }),
+    });
+    if (ws.status >= 200 && ws.status < 300 && ws.body?.id) {
+      workspaceId = ws.body.id as string;
+      break;
+    }
+    const isTransient = ws.status >= 500 || ws.status === 0;
+    if (!isTransient || attempt === 3) {
+      throw new Error(`Workspace create ${ws.status} (attempt ${attempt}): ${JSON.stringify(ws.body)}`);
+    }
+    const backoff = 3000 * Math.pow(2, attempt - 1);
+    console.log(`[staging-setup] Workspace create transient ${ws.status}, retrying in ${backoff}ms...`);
+    await new Promise((r) => setTimeout(r, backoff));
  }
-  const workspaceId = ws.body.id as string;
  console.log(`[staging-setup] Workspace created: ${workspaceId}`);

-  // 6. Wait for workspace RENDERABLE.
+  // 6. Wait for workspace online
  //
  // This harness exists to verify the canvas *tab UI* renders (staging-
  // tabs.spec.ts: open each of the 13 workspace-panel tabs, assert no hard
@@ -266,6 +280,16 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
  // it needs is a workspace ROW that the canvas lists so the node renders
  // and the side-panel tabs open. A fully-`online` agent is NOT required.
  //
+  // Hermes cold-boot takes 10-13 min on slow apt days (apt + uv + hermes
+  // install + npm browser-tools). The controlplane bootstrap-watcher
+  // deadline fires at 5 min and sets status=failed prematurely; heartbeat
+  // then transitions failed → online after install.sh finishes. The ONLY
+  // failed shape we tolerate is the pre-start credential-abort
+  // (uptime_seconds=0, no last_sample_error) — the agent never ran. Real
+  // boot regressions (image pull error, panic, PYTHONPATH, etc.) still
+  // hard-throw immediately so triage gets detail without waiting for a
+  // polling timeout. See test_staging_full_saas.sh step 7/11 and issue #2632.
+  //
  // That distinction became load-bearing on 2026-06-03: workspace-server
  // #2162 (fix(provision): platform-managed workspace must fail-closed when
  // CP proxy env absent) made a platform_managed workspace ABORT AT BOOT
@@ -287,8 +311,10 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
  // the node + tabs render, proceed. We do NOT mask a real boot regression:
  // any `failed` carrying a last_sample_error, OR a non-zero uptime (the
  // agent started then crashed — image pull, panic, PYTHONPATH, etc.),
-  // still hard-throws. Genuine *infra* provision failure is already caught
-  // loud one step earlier at the org level (instance_status === "failed").
+  // still hard-throws immediately so triage gets boot_stage / last_error /
+  // image fields without waiting for a polling timeout.
+  // Genuine *infra* provision failure is already caught loud one step
+  // earlier at the org level (instance_status === "failed").
  await waitFor<boolean>(
    async () => {
      const r = await jsonFetch(`${tenantURL}/workspaces/${workspaceId}`, {
@@ -315,13 +341,7 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
          );
          return true;
        }
-        // last_sample_error is often empty when the failure happens before
-        // the agent emits a sample (e.g. boot crash, image pull error,
-        // missing PYTHONPATH, OpenAI quota at startup). Dumping the full
-        // body gives triage the boot_stage / last_error / image fields it
-        // needs without a second probe. Otherwise this propagates as a
-        // bare "Workspace failed: " — the exact useless message that
-        // sent #2632 to the issue tracker.
+        // Real boot regression — hard-throw immediately with full detail.
        const detail = sampleErr
          ? sampleErr
          : `(no last_sample_error) full body: ${JSON.stringify(r.body)}`;
@@ -333,7 +353,7 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
    10_000,
    "workspace online",
  );
-  console.log(`[staging-setup] Workspace renderable`);
+  console.log(`[staging-setup] Workspace online`);

  // 7. Hand state off to tests + teardown — overwrite the slug-only
  // bootstrap state with the full state spec tests need.
@@ -370,7 +370,7 @@ test.describe("staging canvas tabs", () => {

    // The tablist appears once the side panel mounts. Condition-based
    // wait — no fixed delay.
-    const tablist = page.locator('[role="tablist"]');
+    const tablist = page.getByRole("tablist", { name: "Workspace panel tabs" });
    await expect(
      tablist,
      "side panel tablist never appeared after clicking the workspace node",
@@ -179,7 +179,6 @@ function Shell({
          <p className="mt-2 text-ink-mid">
            Each org is an isolated Molecule workspace.
          </p>
-          <DataResidencyNotice />
          <div className="mt-8">{children}</div>
        </div>
      </TermsGate>
@@ -220,25 +219,6 @@ function AccountBar({ session }: { session: Session }) {
    </div>
  );
 }
-
-// DataResidencyNotice surfaces where workspace data lives so EU-based
-// signups can make an informed choice (GDPR Art. 13 disclosure
-// requirement). Plain text, no icon — the goal is clarity, not
-// decoration. A future EU region selector can replace this with a
-// region dropdown.
-function DataResidencyNotice() {
-  return (
-    <p className="mt-3 rounded border border-line bg-surface-sunken/60 px-3 py-2 text-xs text-ink-mid">
-      Workspaces run in AWS us-east-2 (Ohio, United States). EU region support is on the roadmap — reach out to
-      {" "}
-      <a href="mailto:support@moleculesai.app" className="underline">
-        support@moleculesai.app
-      </a>
-      {" "}if you need data residency in another region today.
-    </p>
-  );
-}
-
 function OrgRow({ org }: { org: Org }) {
  return (
    <li className="rounded-lg border border-line bg-surface-sunken p-4">
@@ -172,7 +172,7 @@ export function ContextMenu() {
    const nodeId = contextMenu.nodeId;
    closeContextMenu();
    try {
-      await api.post(`/workspaces/${nodeId}/pause`, {});
+      await api.post(`/workspaces/${nodeId}/pause?cascade=true`, {});
      updateNodeData(nodeId, { status: "paused" });
    } catch (e) {
      showToast("Pause failed", "error");
@@ -184,7 +184,7 @@ export function ContextMenu() {
    const nodeId = contextMenu.nodeId;
    closeContextMenu();
    try {
-      await api.post(`/workspaces/${nodeId}/resume`, {});
+      await api.post(`/workspaces/${nodeId}/resume?cascade=true`, {});
      updateNodeData(nodeId, { status: "provisioning" });
    } catch (e) {
      showToast("Resume failed", "error");
@@ -60,6 +60,16 @@ const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "goog
 const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
 const DEFAULT_HEADLESS_ROOT_GB = 30;
 const DEFAULT_DISPLAY_INSTANCE_TYPE = "t3.xlarge";
+
+// Per-workspace cloud/compute backend (multi-provider RFC). "aws" is the default
+// EC2 path; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner. A
+// workspace whose cloud differs from its tenant's is reached over a per-workspace
+// Cloudflare tunnel (runtime#95). Distinct from the LLM/model provider.
+const CLOUD_PROVIDER_OPTIONS = [
+  { value: "aws", label: "AWS (default)" },
+  { value: "gcp", label: "GCP" },
+  { value: "hetzner", label: "Hetzner" },
+];
 const DEFAULT_DISPLAY_ROOT_GB = 80;

 export function CreateWorkspaceButton() {
@@ -77,6 +87,10 @@ export function CreateWorkspaceButton() {
  const [displayInstanceType, setDisplayInstanceType] = useState(DEFAULT_DISPLAY_INSTANCE_TYPE);
  const [displayRootGB, setDisplayRootGB] = useState(String(DEFAULT_DISPLAY_ROOT_GB));
  const [displayResolution, setDisplayResolution] = useState("1920x1080");
+  // Cloud/compute backend for the workspace box (multi-provider, per-workspace).
+  // "aws" default; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner
+  // (a non-tenant-cloud box is reached over a per-workspace tunnel, runtime#95).
+  const [cloudProvider, setCloudProvider] = useState("aws");
  // Templates fetched from /api/templates — drives the dynamic provider
  // filter below. Same data source ConfigTab uses (PR #2454). When the
  // selected template declares `runtime_config.providers` in its
@@ -266,6 +280,7 @@ export function CreateWorkspaceButton() {
    setDisplayInstanceType(DEFAULT_DISPLAY_INSTANCE_TYPE);
    setDisplayRootGB(String(DEFAULT_DISPLAY_ROOT_GB));
    setDisplayResolution("1920x1080");
+    setCloudProvider("aws");
    setExternalRuntime("external");
    setLLMSelection({ providerId: "", model: "", envVars: [] });
    setLLMSecret("");
@@ -355,11 +370,16 @@ export function CreateWorkspaceButton() {
                      width: Number.isFinite(displayWidth) ? displayWidth : 1920,
                      height: Number.isFinite(displayHeight) ? displayHeight : 1080,
                    },
+                    // Only meaningful when CP provisions the box (SaaS), where
+                    // the picker is shown. Omit on self-hosted so the payload is
+                    // unchanged there.
+                    ...(isSaaS ? { provider: cloudProvider } : {}),
                  }
                : {
                    instance_type: DEFAULT_HEADLESS_INSTANCE_TYPE,
                    volume: { root_gb: DEFAULT_HEADLESS_ROOT_GB },
                    display: { mode: "none" },
+                    ...(isSaaS ? { provider: cloudProvider } : {}),
                  },
            }
          : {}),
@@ -599,6 +619,26 @@ export function CreateWorkspaceButton() {
                <div className="mb-2 text-[11px] font-medium text-ink-mid">
                  Container Config
                </div>
+                {/* Cloud provider — only meaningful when CP provisions the box
+                    (SaaS). A non-tenant-cloud workspace is reached over a
+                    per-workspace Cloudflare tunnel (runtime#95). */}
+                {isSaaS && (
+                  <label htmlFor="workspace-cloud-provider" className="mb-3 grid gap-1">
+                    <span className="text-xs font-medium text-ink">Cloud provider</span>
+                    <select
+                      id="workspace-cloud-provider"
+                      value={cloudProvider}
+                      onChange={(e) => setCloudProvider(e.target.value)}
+                      className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors"
+                    >
+                      {CLOUD_PROVIDER_OPTIONS.map((p) => (
+                        <option key={p.value} value={p.value}>
+                          {p.label}
+                        </option>
+                      ))}
+                    </select>
+                  </label>
+                )}
                <label className="flex items-center justify-between gap-3">
                  <span className="text-xs font-medium text-ink">Display</span>
                  <input
@@ -12,6 +12,7 @@ import {
  ProviderModelSelector,
  buildProviderCatalog,
  findProviderForModel,
+  isPlatformManagedProvider,
  type SelectorValue,
 } from "./ProviderModelSelector";

@@ -267,10 +268,21 @@ function ProviderPickerModal({
    setSelectorValue(initial);
  }, [open, initial]);

+  // #2248: filter out provisioner-injected internal tokens for platform-managed
+  // providers so the user can't clobber them. Memoized so the array reference is
+  // stable across renders and does not churn the entries useEffect.
+  const userEditableEnvVars = useMemo(() => {
+    const selectedProvider = catalog.find((p) => p.id === selectorValue.providerId);
+    const isPlatformManaged = selectedProvider ? isPlatformManagedProvider(selectedProvider) : false;
+    return isPlatformManaged
+      ? selectorValue.envVars.filter((k) => k !== "MOLECULE_LLM_USAGE_TOKEN")
+      : selectorValue.envVars;
+  }, [catalog, selectorValue.providerId, selectorValue.envVars]);
+
  useEffect(() => {
    if (!open) return;
    setEntries(
-      selectorValue.envVars.map((key) => ({
+      userEditableEnvVars.map((key) => ({
        key,
        value: "",
        // Pre-mark as saved when the key is already in the configured
@@ -283,7 +295,7 @@ function ProviderPickerModal({
    );
    setOptionalEntries(
      optionalKeys
-        .filter((key) => !selectorValue.envVars.includes(key))
+        .filter((key) => !userEditableEnvVars.includes(key))
        .map((key) => ({
          key,
          value: "",
@@ -292,7 +304,7 @@ function ProviderPickerModal({
          error: null,
        })),
    );
-  }, [open, selectorValue.envVars, configuredKeys, optionalKeys]);
+  }, [open, userEditableEnvVars, configuredKeys, optionalKeys]);

  useEffect(() => {
    if (!open) return;
@@ -91,6 +91,7 @@ export interface RegistryModel {
  name?: string;
  provider?: string;
  billing_mode?: "platform_managed" | "byok";
+  required_env?: string[];
 }

 export interface SelectorValue {
@@ -385,7 +385,7 @@ describe("ContextMenu — item actions", () => {
    render(<ContextMenu />);
    fireEvent.click(screen.getByRole("menuitem", { name: /pause/i }));
    await act(async () => { /* flush */ });
-    expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/pause", {});
+    expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/pause?cascade=true", {});
    expect(mockStoreState.updateNodeData).toHaveBeenCalledWith("n1", { status: "paused" });
  });

@@ -395,7 +395,7 @@ describe("ContextMenu — item actions", () => {
    render(<ContextMenu />);
    fireEvent.click(screen.getByRole("menuitem", { name: /resume/i }));
    await act(async () => { /* flush */ });
-    expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/resume", {});
+    expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/resume?cascade=true", {});
  });
 });

@@ -0,0 +1,84 @@
+// @vitest-environment jsdom
+//
+// SaaS-mode coverage for the per-workspace cloud-provider picker. The main
+// CreateWorkspaceDialog.test.tsx runs non-SaaS (the picker is hidden and the
+// payload omits `provider`); this file forces SaaS by mocking isSaaSTenant so
+// the picker renders and the selected provider flows into compute.provider.
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
+import { CreateWorkspaceButton } from "../CreateWorkspaceDialog";
+
+vi.mock("@/lib/api", () => ({
+  api: { get: vi.fn(), post: vi.fn() },
+}));
+
+// Force SaaS so the Cloud provider picker is shown and the payload carries it.
+vi.mock("@/lib/tenant", async (importOriginal) => ({
+  ...(await importOriginal<typeof import("@/lib/tenant")>()),
+  isSaaSTenant: () => true,
+}));
+
+import { api } from "@/lib/api";
+
+const mockGet = vi.mocked(api.get);
+const mockPost = vi.mocked(api.post);
+
+const SAMPLE_TEMPLATES = [
+  {
+    id: "claude-code-default",
+    name: "Claude Code Agent",
+    runtime: "claude-code",
+    model: "moonshot/kimi-k2.6",
+    providers: ["platform", "minimax"],
+    models: [{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: [] }],
+  },
+];
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  mockGet.mockImplementation(async (url: string) => {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    if (url === "/templates") return SAMPLE_TEMPLATES as any;
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    return [] as any;
+  });
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  mockPost.mockResolvedValue({} as any);
+});
+
+afterEach(() => cleanup());
+
+async function openDialog() {
+  render(<CreateWorkspaceButton />);
+  const btn = screen.getAllByRole("button").find((b) => b.textContent?.includes("New Workspace"));
+  fireEvent.click(btn!);
+  await waitFor(() => expect(screen.getByText("Create Workspace")).toBeTruthy());
+}
+
+describe("CreateWorkspaceDialog — cloud provider (SaaS)", () => {
+  it("shows the Cloud provider picker, defaulting to AWS", async () => {
+    await openDialog();
+    const select = screen.getByLabelText("Cloud provider") as HTMLSelectElement;
+    expect(select).toBeTruthy();
+    expect(select.value).toBe("aws");
+  });
+
+  it("defaults compute.provider to aws when the picker is untouched", async () => {
+    await openDialog();
+    fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "AWS Agent" } });
+    fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
+    await waitFor(() => expect(mockPost).toHaveBeenCalled());
+    const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.compute).toMatchObject({ provider: "aws" });
+  });
+
+  it("threads the selected cloud provider into compute.provider", async () => {
+    await openDialog();
+    fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "GCP Agent" } });
+    fireEvent.change(screen.getByLabelText("Cloud provider"), { target: { value: "gcp" } });
+    fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
+    await waitFor(() => expect(mockPost).toHaveBeenCalled());
+    const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.compute).toMatchObject({ provider: "gcp" });
+  });
+});
@@ -0,0 +1,175 @@
+// @vitest-environment jsdom
+/**
+ * Regression tests for #2248 — platform-managed provider credential suppression.
+ *
+ * Covers:
+ *  - MOLECULE_LLM_USAGE_TOKEN is hidden when the selected provider is platform-managed
+ *  - MOLECULE_LLM_USAGE_TOKEN is still shown for BYOK providers
+ *  - No render churn from unstable array references (useMemo guard)
+ */
+import { describe, it, expect, vi, afterEach } from "vitest";
+import { render, screen, fireEvent, cleanup, waitFor, act } from "@testing-library/react";
+import { MissingKeysModal } from "../MissingKeysModal";
+import type { ModelSpec, ProviderChoice } from "@/lib/deploy-preflight";
+
+vi.mock("@/lib/api", () => ({
+  api: { get: vi.fn(), put: vi.fn() },
+}));
+
+vi.mock("@/lib/deploy-preflight", async () => {
+  const actual = await vi.importActual<typeof import("@/lib/deploy-preflight")>(
+    "@/lib/deploy-preflight",
+  );
+  return actual;
+});
+
+const PLATFORM_MANAGED_MODELS: ModelSpec[] = [
+  { id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
+];
+
+const BYOK_MODELS: ModelSpec[] = [
+  { id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
+];
+
+function makeProviders(billingMode: "platform_managed" | "byok"): ProviderChoice[] {
+  const main = {
+    id: billingMode === "platform_managed" ? "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" : "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
+    label: billingMode === "platform_managed" ? "Platform Anthropic" : "BYOK Anthropic",
+    envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
+    billingMode,
+  };
+  // Need ≥2 providers so MissingKeysModal enters picker mode (pickerMode = providers.length > 1).
+  const dummy = {
+    id: "openai|OPENAI_API_KEY",
+    label: "OpenAI",
+    envVars: ["OPENAI_API_KEY"],
+  };
+  return [main, dummy];
+}
+
+describe("ProviderPickerModal — platform-managed suppression (#2248)", () => {
+  afterEach(() => cleanup());
+
+  it("hides MOLECULE_LLM_USAGE_TOKEN when provider is platform-managed", () => {
+    render(
+      <MissingKeysModal
+        open
+        missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
+        providers={makeProviders("platform_managed")}
+        models={PLATFORM_MANAGED_MODELS}
+        runtime="claude-code"
+        onKeysAdded={vi.fn()}
+        onCancel={vi.fn()}
+      />,
+    );
+    // Only ANTHROPIC_API_KEY should be rendered; MOLECULE_LLM_USAGE_TOKEN suppressed
+    expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
+    expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
+  });
+
+  it("shows MOLECULE_LLM_USAGE_TOKEN when provider is BYOK", () => {
+    render(
+      <MissingKeysModal
+        open
+        missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
+        providers={makeProviders("byok")}
+        models={BYOK_MODELS}
+        runtime="claude-code"
+        onKeysAdded={vi.fn()}
+        onCancel={vi.fn()}
+      />,
+    );
+    // Both keys visible for BYOK
+    expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
+    expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
+  });
+
+  it("does not churn renders when the modal is open and platform-managed", () => {
+    let renderCount = 0;
+
+    function RenderSpy({ children }: { children: React.ReactNode }) {
+      renderCount++;
+      return <>{children}</>;
+    }
+
+    render(
+      <RenderSpy>
+        <MissingKeysModal
+          open
+          missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
+          providers={makeProviders("platform_managed")}
+          models={PLATFORM_MANAGED_MODELS}
+          runtime="claude-code"
+          onKeysAdded={vi.fn()}
+          onCancel={vi.fn()}
+        />
+      </RenderSpy>,
+    );
+
+    const countAfterInitial = renderCount;
+
+    // Wait a tick — if useEffect were looping, renderCount would climb.
+    // In jsdom without real timers there's no automatic re-render, so we
+    // just assert the count is stable immediately after the single
+    // commit required by the initial open state.
+    expect(renderCount).toBe(countAfterInitial);
+    expect(renderCount).toBeLessThanOrEqual(2); // StrictMode double-render ceiling
+  });
+
+  it("updates suppression correctly when switching from BYOK to platform-managed", async () => {
+    const providers: ProviderChoice[] = [
+      {
+        id: "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
+        label: "BYOK Anthropic",
+        envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
+        billingMode: "byok",
+      },
+      {
+        id: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
+        label: "Platform Anthropic",
+        envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
+        billingMode: "platform_managed",
+      },
+      {
+        id: "openai|OPENAI_API_KEY",
+        label: "OpenAI",
+        envVars: ["OPENAI_API_KEY"],
+      },
+    ];
+
+    const models: ModelSpec[] = [
+      { id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
+      { id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
+    ];
+
+    render(
+      <MissingKeysModal
+        open
+        missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
+        providers={providers}
+        models={models}
+        runtime="claude-code"
+        onKeysAdded={vi.fn()}
+        onCancel={vi.fn()}
+      />,
+    );
+
+    // Default selection is providers[0] (BYOK) — both keys visible
+    expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
+    expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
+
+    // Switch to platform-managed provider
+    const providerSelect = screen.getByTestId("provider-select") as HTMLSelectElement;
+    act(() => {
+      fireEvent.change(providerSelect, {
+        target: { value: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" },
+      });
+    });
+
+    // MOLECULE_LLM_USAGE_TOKEN should now be suppressed
+    await waitFor(() => {
+      expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
+    });
+    expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
+  });
+});
@@ -13,6 +13,7 @@ import {
  buildProviderCatalog,
  buildProviderCatalogFromRegistry,
  findProviderForModel,
+  isPlatformManagedProvider,
  type SelectorValue,
  type ProviderEntry,
  type RegistryProvider,
@@ -682,6 +683,9 @@ export function ConfigTab({ workspaceId }: Props) {
            name: m.name,
            // carry the derived provider so the selector buckets correctly
            ...(m.provider ? { provider: m.provider } : {}),
+            // carry required_env so wasTemplateDriven can detect
+            // template-driven env lists for registry-backed runtimes
+            ...(m.required_env ? { required_env: m.required_env } : {}),
          }))
        : availableModels,
    [registryBacked, selectedRuntime?.registryModels, availableModels],
@@ -1017,6 +1021,15 @@ export function ConfigTab({ workspaceId }: Props) {
                  // top-level model. required_env follows the selected
                  // provider's envVars when the existing required_env
                  // was template-driven (don't clobber user-typed envs).
+                  //
+                  // #2248: suppress provisioner-injected internal tokens
+                  // (MOLECULE_LLM_USAGE_TOKEN) for platform-managed providers
+                  // so the user can't clobber them.
+                  const selectedEntry = providerCatalog.find((p) => p.id === next.providerId);
+                  const isPlatformManaged = selectedEntry ? isPlatformManagedProvider(selectedEntry) : false;
+                  const filteredEnvVars = isPlatformManaged
+                    ? next.envVars.filter((k) => k !== "MOLECULE_LLM_USAGE_TOKEN")
+                    : next.envVars;
                  setConfig((prev) => {
                    const v = next.model;
                    const prevModelId = prev.runtime_config?.model || prev.model || "";
@@ -1029,8 +1042,8 @@ export function ConfigTab({ workspaceId }: Props) {
                          prevRequired.every((e, i) => e === prevSpec.required_env![i])
                        : false);
                    const nextRequired =
-                      next.envVars.length > 0 && wasTemplateDriven
-                        ? next.envVars
+                      wasTemplateDriven
+                        ? filteredEnvVars
                        : prevRequired;
                    if (prev.runtime) {
                      return {
@@ -1038,7 +1051,7 @@ export function ConfigTab({ workspaceId }: Props) {
                        runtime_config: {
                          ...prev.runtime_config,
                          model: v,
-                          ...(next.envVars.length > 0 && wasTemplateDriven
+                          ...(wasTemplateDriven
                            ? { required_env: nextRequired }
                            : {}),
                        },
@@ -38,8 +38,16 @@ const DATA_PERSISTENCE_OPTIONS = ["", "persist", "ephemeral"];
 const dataPersistenceLabel = (v: string): string =>
  v === "persist" ? "Always keep (persist)" : v === "ephemeral" ? "Don't keep (ephemeral)" : "Auto";

+// Cloud/compute backend display name. The provider is chosen at create time and
+// is NOT editable here (changing a workspace's cloud requires a recreate), so
+// it renders as a read-only badge — but we must preserve it across Save (the
+// compute payload is rebuilt below, and dropping it would wipe the column).
+const cloudProviderLabel = (v: string | undefined): string =>
+  v === "gcp" ? "GCP" : v === "hetzner" ? "Hetzner" : "AWS";
+
 export function ContainerConfigTab({ workspaceId, data }: Props) {
  const runtime = data.runtime;
+  const provider = data.compute?.provider; // read-only; set at create time
  const instanceType = data.compute?.instance_type;
  const rootGB = data.compute?.volume?.root_gb;
  const displayMode = data.compute?.display?.mode;
@@ -94,6 +102,10 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
            : { mode: "none" },
          // internal#734: omit when "auto" so the wire/default behavior is unchanged.
          ...(form.dataPersistence ? { data_persistence: form.dataPersistence } : {}),
+          // Preserve the create-time cloud provider — it's not editable here, but
+          // this PATCH rebuilds the whole compute object, so omitting it would
+          // wipe the persisted provider (and mislead the badge after a Save).
+          ...(provider ? { provider } : {}),
        };

        const resp = await api.patch<{ needs_restart?: boolean }>(`/workspaces/${workspaceId}`, {
@@ -126,7 +138,18 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
    <div className="p-4 space-y-4">
      <section className="rounded-lg border border-line/50 bg-surface-card/40 p-4">
        <div className="mb-3 flex items-center justify-between gap-3">
-          <h3 className="text-sm font-semibold text-ink">Container Config</h3>
+          <div className="flex items-center gap-2">
+            <h3 className="text-sm font-semibold text-ink">Container Config</h3>
+            {/* Read-only cloud-provider badge — which cloud this workspace's box
+                runs on (AWS/GCP/Hetzner). Defaults to AWS when unset (legacy
+                rows). Set at create time in the Create Workspace dialog. */}
+            <span
+              title="Cloud provider for this workspace's compute (set at create time)"
+              className="rounded-full border border-line/60 bg-surface-sunken px-2 py-0.5 font-mono text-[10px] uppercase tracking-wide text-ink-mid"
+            >
+              {cloudProviderLabel(provider)}
+            </span>
+          </div>
          {data.needsRestart && <span className="text-[11px] text-warm">Restart required</span>}
        </div>

@@ -0,0 +1,229 @@
+// @vitest-environment jsdom
+//
+// Regression tests for #2248 — platform-managed provider credential suppression
+// in ConfigTab.
+//
+// Covers:
+//  - required_env is cleared to [] when switching to a platform-managed provider
+//    whose only declared env var is MOLECULE_LLM_USAGE_TOKEN (single-token case).
+//  - required_env preserves non-internal tokens for BYOK providers.
+
+import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
+import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
+import React from "react";
+
+afterEach(cleanup);
+
+const apiGet = vi.fn();
+const apiPatch = vi.fn();
+const apiPut = vi.fn();
+vi.mock("@/lib/api", () => ({
+  api: {
+    get: (path: string) => apiGet(path),
+    patch: (path: string, body: unknown) => apiPatch(path, body),
+    put: (path: string, body: unknown) => apiPut(path, body),
+    post: vi.fn(),
+    del: vi.fn(),
+  },
+}));
+
+vi.mock("@/store/canvas", () => ({
+  useCanvasStore: Object.assign(
+    (selector: (s: unknown) => unknown) =>
+      selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }),
+    { getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) },
+  ),
+}));
+
+vi.mock("../AgentCardSection", () => ({
+  AgentCardSection: () => <div data-testid="agent-card-stub" />,
+}));
+
+import { ConfigTab } from "../ConfigTab";
+
+function wireApi(opts: {
+  workspaceRuntime?: string;
+  workspaceModel?: string;
+  configYamlContent?: string | null;
+  templates?: Array<{
+    id: string;
+    name?: string;
+    runtime?: string;
+    models?: unknown[];
+    registry_backed?: boolean;
+    registry_providers?: unknown[];
+    registry_models?: unknown[];
+  }>;
+}) {
+  apiGet.mockImplementation((path: string) => {
+    if (path === `/workspaces/ws-test`) {
+      return Promise.resolve({ runtime: opts.workspaceRuntime ?? "" });
+    }
+    if (path === `/workspaces/ws-test/model`) {
+      return Promise.resolve({ model: opts.workspaceModel ?? "" });
+    }
+    if (path === `/workspaces/ws-test/files/config.yaml`) {
+      if (opts.configYamlContent === null) {
+        return Promise.reject(new Error("not found"));
+      }
+      return Promise.resolve({ content: opts.configYamlContent ?? "" });
+    }
+    if (path === "/templates") {
+      return Promise.resolve(opts.templates ?? []);
+    }
+    return Promise.reject(new Error(`unmocked api.get: ${path}`));
+  });
+}
+
+beforeEach(() => {
+  apiGet.mockReset();
+  apiPatch.mockReset();
+  apiPut.mockReset();
+});
+
+describe("ConfigTab — platform-managed credential suppression (#2248)", () => {
+  it("clears required_env to [] when switching to a single-token platform-managed provider", async () => {
+    // Setup: workspace currently has a BYOK provider selected with both keys.
+    // The user switches to a platform-managed provider whose ONLY auth_env
+    // is MOLECULE_LLM_USAGE_TOKEN. After filtering, envVars becomes [];
+    // wasTemplateDriven must still overwrite required_env with [] so the
+    // old MOLECULE_LLM_USAGE_TOKEN requirement does not linger.
+    wireApi({
+      workspaceRuntime: "claude-code",
+      workspaceModel: "byok-sonnet",
+      configYamlContent: [
+        "runtime: claude-code",
+        "runtime_config:",
+        "  model: byok-sonnet",
+        "  required_env:",
+        "    - ANTHROPIC_API_KEY",
+        "    - MOLECULE_LLM_USAGE_TOKEN",
+      ].join("\n"),
+      templates: [
+        {
+          id: "t-claude-code",
+          name: "Claude Code",
+          runtime: "claude-code",
+          models: [],
+          registry_backed: true,
+          registry_providers: [
+            {
+              name: "anthropic",
+              display_name: "BYOK Anthropic",
+              auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
+              billing_mode: "byok",
+            },
+            {
+              name: "platform",
+              display_name: "Platform Anthropic",
+              auth_env: ["MOLECULE_LLM_USAGE_TOKEN"],
+              billing_mode: "platform_managed",
+            },
+          ],
+          registry_models: [
+            { id: "byok-sonnet", provider: "anthropic", billing_mode: "byok", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
+            { id: "platform-sonnet", provider: "platform", billing_mode: "platform_managed", required_env: ["MOLECULE_LLM_USAGE_TOKEN"] },
+          ],
+        },
+      ],
+    });
+
+    apiPut.mockResolvedValue({});
+    apiPatch.mockResolvedValue({});
+
+    render(<ConfigTab workspaceId="ws-test" />);
+
+    // Wait for the provider dropdown to populate.
+    const providerSelect = (await waitFor(() =>
+      screen.getByTestId("provider-select"),
+    )) as HTMLSelectElement;
+
+    // Switch from BYOK to platform-managed provider.
+    const platformOption = Array.from(providerSelect.options).find((o) =>
+      o.text.includes("Platform"),
+    );
+    expect(platformOption).toBeTruthy();
+    fireEvent.change(providerSelect, { target: { value: platformOption!.value } });
+
+    // Save & Restart.
+    fireEvent.click(screen.getByRole("button", { name: /save & restart/i }));
+
+    await waitFor(() => {
+      expect(apiPut).toHaveBeenCalledWith(
+        "/workspaces/ws-test/files/config.yaml",
+        expect.objectContaining({
+          content: expect.not.stringContaining("ANTHROPIC_API_KEY"),
+        }),
+      );
+    });
+
+    // Verify the specific put call no longer carries the suppressed token.
+    const putCall = apiPut.mock.calls.find(
+      ([path]) => path === "/workspaces/ws-test/files/config.yaml",
+    );
+    expect(putCall?.[1].content).not.toContain("MOLECULE_LLM_USAGE_TOKEN");
+  });
+
+  it("preserves non-internal tokens for BYOK providers", async () => {
+    wireApi({
+      workspaceRuntime: "claude-code",
+      workspaceModel: "byok-sonnet",
+      configYamlContent: [
+        "runtime: claude-code",
+        "runtime_config:",
+        "  model: byok-sonnet",
+        "  required_env:",
+        "    - ANTHROPIC_API_KEY",
+        "    - MOLECULE_LLM_USAGE_TOKEN",
+      ].join("\n"),
+      templates: [
+        {
+          id: "t-claude-code",
+          name: "Claude Code",
+          runtime: "claude-code",
+          models: [],
+          registry_backed: true,
+          registry_providers: [
+            {
+              name: "anthropic",
+              display_name: "BYOK Anthropic",
+              auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
+              billing_mode: "byok",
+            },
+          ],
+          registry_models: [
+            { id: "byok-sonnet", provider: "anthropic", billing_mode: "byok" },
+          ],
+        },
+      ],
+    });
+
+    apiPut.mockResolvedValue({});
+    apiPatch.mockResolvedValue({});
+
+    render(<ConfigTab workspaceId="ws-test" />);
+
+    // Wait for load.
+    await waitFor(() =>
+      screen.getByRole("button", { name: /save & restart/i }),
+    );
+
+    // Click Save without changing provider — BYOK should keep both keys.
+    fireEvent.click(screen.getByRole("button", { name: /save & restart/i }));
+
+    await waitFor(() => {
+      expect(apiPut).toHaveBeenCalledWith(
+        "/workspaces/ws-test/files/config.yaml",
+        expect.objectContaining({
+          content: expect.stringContaining("required_env:"),
+        }),
+      );
+    });
+
+    const putCall = apiPut.mock.calls.find(
+      ([path]) => path === "/workspaces/ws-test/files/config.yaml",
+    );
+    expect(putCall?.[1].content).toContain("ANTHROPIC_API_KEY");
+    expect(putCall?.[1].content).toContain("MOLECULE_LLM_USAGE_TOKEN");
+  });
+});
@@ -324,7 +324,7 @@ export const useCanvasStore = create<CanvasState>((set, get) => ({
  batchPause: async () => {
    const ids = Array.from(get().selectedNodeIds);
    const results = await Promise.allSettled(
-      ids.map((id) => api.post(`/workspaces/${id}/pause`))
+      ids.map((id) => api.post(`/workspaces/${id}/pause?cascade=true`))
    );
    const failed: string[] = [];
    results.forEach((r, i) => {
@@ -371,6 +371,12 @@ export interface WorkspaceCompute {
  // internal#734: per-workspace durable-data choice. "persist" | "ephemeral" |
  // undefined (auto). Controls whether the data volume survives recreate.
  data_persistence?: string;
+  // Cloud/compute backend for this workspace box (multi-provider, per-workspace):
+  // "aws" (default EC2) | "gcp" | "hetzner". Distinct from the LLM/model provider.
+  // Set at create time; routed by CP to the matching WorkspaceProvisioner. A
+  // workspace whose provider differs from its tenant's cloud is reached over a
+  // per-workspace Cloudflare tunnel (runtime#95).
+  provider?: string;
 }

 let socket: ReconnectingSocket | null = null;
@@ -2,7 +2,7 @@

 **Status:** living document — update when you ship a feature that touches one backend.
 **Owner:** workspace-server + controlplane teams.
-**Last audit:** 2026-05-07 (plugin install/uninstall closed for EC2 backend via EIC SSH push to the bind-mounted `/configs/plugins/<name>/`, mirroring the Files API PR #1702 pattern).
+**Last audit:** 2026-05-31 (Claude agent — drift risk #6 verified resolved; nil guards present, contract tests run without Skip).

 ## Why this exists

@@ -93,12 +93,12 @@ For "do we have any backend?", use `HasProvisioner()`, never bare `h.provisioner
 3. **Restart divergence on runtime changes.** Docker re-reads `/configs/config.yaml` from the container before stop, so a changed `runtime:` survives a restart even if the DB isn't synced. EC2 trusts the DB only. If you change the runtime via the Config tab and the handler races the restart, Docker will land on the new runtime, EC2 will land on the old one. **Fix path:** make the Config-tab save explicitly flush to DB before kicking off a restart, not deferred.
 4. **Console-output asymmetry.** Users debugging a stuck workspace on Docker see `docker logs`; on EC2 they see `GetConsoleOutput`. The two outputs look nothing alike. **Fix path:** expose a unified `GET /workspaces/:id/boot-log` that proxies to whichever backend serves the data. Already partly there via `cp_provisioner.Console`.
 5. **Template script drift.** `install.sh` and `start.sh` in each template repo do the same high-level work (install hermes-agent, write .env, write config.yaml, start gateway) but must be kept byte-level consistent on the provider-key forwarding block. Easy to forget. Enforced now by `tools/check-template-parity.sh` (see below) — run it in each template repo's CI.
-6. **Both backends panic when underlying client is nil.** ✅ **Resolved** (`fix/provisioner-nil-guards-1813`). `Provisioner.{Stop,IsRunning}` and `CPProvisioner.{Stop,IsRunning}` now guard against nil clients with `ErrNoBackend`, so the contract-test runner executes scenarios against zero-valued backends without panic.
+6. ~~**Both backends panic when underlying client is nil.**~~ **RESOLVED** — nil guards landed in `Provisioner` (`Start`, `Stop`, `IsRunning`, `ExecRead`, `RemoveVolume`, `VolumeHasFile`, `WriteAuthTokenToVolume`) and `CPProvisioner` (`Stop`, `IsRunning`), all returning `ErrNoBackend`. Contract tests (`TestDockerBackend_Contract`, `TestCPProvisionerBackend_Contract`, `TestZeroValuedBackends_NoPanic`) run in CI without `t.Skip`.

 ## Enforcement

 - **`tools/check-template-parity.sh`** (this repo) — ensures `install.sh` and `start.sh` in a template repo forward identical sets of provider keys. Wire into each template repo's CI as `bash $MONOREPO/tools/check-template-parity.sh install.sh start.sh`.
- **Contract tests** — `workspace-server/internal/provisioner/backend_contract_test.go` defines the behaviors every `provisioner.Provisioner` implementation must satisfy. Fails compile when a method drifts between `Docker` and `CPProvisioner`. Scenario-level runs execute against zero-valued backends since drift risk #6 was resolved (`fix/provisioner-nil-guards-1813`).
+- **Contract tests** — `workspace-server/internal/provisioner/backend_contract_test.go` defines the behaviors every `provisioner.Provisioner` implementation must satisfy. Fails compile when a method drifts between `Docker` and `CPProvisioner`. Scenario-level runs (`TestDockerBackend_Contract`, `TestCPProvisionerBackend_Contract`, `TestZeroValuedBackends_NoPanic`) execute in CI — drift risk #6 resolved.
 - **Source-level dispatcher pins** — `workspace_provision_auto_test.go` enforces the SoT pattern documented above:
  - `TestNoCallSiteCallsDirectProvisionerExceptAuto` — no handler calls `.provisionWorkspace(` or `.provisionWorkspaceCP(` directly outside the dispatcher's allowlist.
  - `TestNoCallSiteCallsBareStop` — no handler calls `.provisioner.Stop(` or `.cpProv.Stop(` directly outside the dispatcher's allowlist (strips Go comments before substring match so archaeology in code comments doesn't trip the gate).
@@ -0,0 +1,293 @@
+# RFC: Org-level Platform Agent — a tenant-resident concierge
+
+**Perspective:** CTO + Backend Engineer + DevOps
+**Status:** Draft — pre-implementation, **CTO sign-off required before any implementation PR**
+**Scope:** `molecule-core` (workspace-server), `molecule-controlplane`, workspace runtime, `molecule-app`
+**This document is the single source of truth (SSOT) for the feature.** Code, OpenAPI, the platform
+MCP, and end-user docs reconcile to this RFC — not to each other.
+
+---
+
+## 1. Summary
+
+Today a Molecule tenant is a control/router box: one EC2 runs the `workspace-server`
+(`molecule-tenant` container) + Postgres + Redis, and **each workspace is its own separate EC2**
+running a runtime image that joins the tenant's A2A mesh. A2A has exactly two participant kinds:
+**workspaces** (agents) and the **user** (the canvas, modeled implicitly as `activity_logs.source_id
+IS NULL`). A user who wants to *do* anything must drive individual workspaces directly — create them,
+assign agents, wire channels/schedules/secrets — i.e. they must carry a lot of platform knowledge.
+
+This RFC introduces a **platform agent**: an always-on org-level agent that
+
+1. runs as a **container on the tenant EC2** itself (beside `molecule-tenant`),
+2. natively holds the **platform-management MCP** (the org-admin tool surface) so it can do anything
+   in the org,
+3. joins A2A as a **first-class third participant** (`kind='platform'`) that sits at the org root, and
+4. becomes the **user's default chat target** — a concierge the user talks to like a chatbot, which
+   then orchestrates the org on their behalf.
+
+Destructive actions the concierge triggers are **human-approved** through the existing approvals
+subsystem.
+
+## 2. Motivation
+
+- **Lower the knowledge floor.** "Spin up an SEO team and have them publish weekly" should be a
+  sentence, not a sequence of workspace/agent/schedule/secret operations.
+- **One front door.** A single conversational entry point that *is* the org, instead of N per-workspace
+  chats the user has to coordinate.
+- **Reuse, don't rebuild.** The agent runtime, A2A mesh, the 87-tool platform MCP, and the approvals
+  subsystem already exist. This feature is mostly *composition* plus one honest new participant kind.
+
+## 3. Goals / Non-Goals
+
+**Goals**
+- A per-tenant platform agent, provisioned automatically, that controls the org via the platform MCP.
+- A first-class `platform` participant in A2A with correct routing and tenant isolation.
+- Server-side approval gating for destructive org operations.
+- Parity with normal workspaces for runtime/model/provider/billing (no special-casing).
+
+**Non-Goals (this RFC)**
+- Replacing the canvas. The canvas remains the advanced/power-user surface.
+- Multi-concierge / per-team concierges. Exactly **one** platform agent per org.
+- A new scoped-down token system for the MCP (tracked separately; see §10 Open Questions).
+
+## 4. Current-state ground truth (verified, with references)
+
+- **Topology.** Tenant EC2 runs `molecule-tenant` (workspace-server) + Postgres + Redis;
+  `controlplane/internal/provisioner/ec2.go:buildTenantUserDataSM()` `docker run`s it with
+  `--network host`, `PORT=8080`. Each **workspace is its own EC2** (`ec2.go:ProvisionWorkspace`).
+- **No `org_id` column.** An "org" is the `parent_id IS NULL` subtree root;
+  `workspace-server/internal/handlers/org_scope.go` resolves it with a recursive CTE (`orgRootID`) and
+  `sameOrg()` compares two workspaces' resolved roots for tenant isolation (#1953/OFFSEC-015).
+- **A2A authorization is hierarchy-based.** `workspace-server/internal/registry/access.go:CanCommunicate`
+  permits self / siblings / ancestor↔descendant. Root-level rows are "siblings" but every routing path
+  is additionally gated by `sameOrg()`.
+- **No participant-kind discriminator.** `workspaces.role` is a free-form string; the user is implicit
+  (`activity_logs.source_id IS NULL`). `migrations/001_workspaces.sql`.
+- **Runtime injects MCP servers** in the claude-code executor's `mcp_servers` dict — today exactly one
+  entry, `"a2a"` (`molecule-ai-workspace-template-claude-code/claude_sdk_executor.py`,
+  `molecule_runtime/claude_sdk_executor.py`). The agent self-registers via `POST /registry/register`
+  (`molecule_runtime/main.py`) and is identified by `WORKSPACE_ID` + `X-Molecule-Org-Id`.
+- **Platform MCP** (`molecule-mcp-server`, stdio Node) authenticates purely from env
+  (`MOLECULE_API_KEY` = org-admin token, `MOLECULE_API_URL`, `MOLECULE_ORG_ID`; `src/api.ts`), is a
+  thin proxy over the tenant REST/A2A API (`chat_with_agent` → `POST /workspaces/:id/a2a`,
+  `async_delegate` → `/delegate`), and has **zero embeddability blockers**.
+- **Billing** is a per-workspace resolver — `ResolveLLMBillingModeDerived`
+  (`workspace-server/internal/handlers/workspace_provision.go`, `llm_billing_mode.go`), defaulting
+  closed to `platform_managed`; `byok` runs on the tenant's own provider key (see
+  `docs/architecture/byok-fail-closed-billing.md`).
+- **Approvals** exist: `migrations/007_approvals.sql`, `internal/handlers/approvals.go`,
+  `EventApprovalRequested`, decide route `POST /workspaces/:id/approvals/:approvalId/decide`.
+
+## 5. Design
+
+### 5.1 The platform agent IS the org root
+
+Because `sameOrg()` resolves each workspace to its topmost `parent_id IS NULL` root, a platform agent
+added as a *second* root would resolve to a *different* root than the existing team and be **blocked**
+by `sameOrg`. Therefore the platform agent **becomes the single org root**, and the org's existing
+root is **re-parented under it**. Consequences:
+
+- `orgRootID(any workspace) == platform-agent-id`; `sameOrg(platform, any in-org ws) == true`.
+- The platform agent reaches every workspace (and is reachable) via the **existing**
+  ancestor↔descendant rules — **no `CanCommunicate` change**, and tenant isolation is unchanged.
+
+This is the honest realization of "a third participant above workspace and user": the concierge is
+literally the org.
+
+### 5.2 `kind` discriminator (the only new marker)
+
+Add a single column `workspaces.kind TEXT NOT NULL DEFAULT 'workspace'`, constrained to
+`('workspace','platform')`. It is the **only** marker of the platform agent — we do **not** also
+encode identity in `role`/`tier` (those stay descriptive). The enum is defined once: the migration
+`CHECK` and the Go constants `KindWorkspace`/`KindPlatform` (+ one `IsValidKind`) are kept in lockstep.
+
+Invariants (handler-enforced, since there is no `org_id` for a pure-SQL unique):
+- `kind='platform' ⇒ parent_id IS NULL`.
+- A row may be `kind='platform'` only if it is its own org root (`orgRootID(self) == self`), giving
+  "exactly one platform agent per org". Guard the check+write in a tx with `FOR UPDATE` on the root.
+
+### 5.3 Identity & registration
+
+- **ID** = derived `uuidv5(org-namespace, "platform-agent")` — reproducible, no stored-vs-derived
+  drift, lowercase so it satisfies the runtime's `WORKSPACE_ID` validator.
+- CP **pre-seeds** the `workspaces` row (`kind='platform'`, `parent_id=NULL`, `tier=0`) before the
+  agent boots; the agent self-registers (`POST /registry/register`) into that row. `Register` accepts
+  an optional `kind` and reconciles it, enforcing the §5.2 invariants.
+
+### 5.4 Default-target resolver
+
+New `GET /registry/platform-agent` (handler `internal/handlers/platform_agent.go`): resolve the
+caller's `orgRootID()` and return it iff `kind='platform'`. This is the server hook the dashboard
+targets by default; no change to `ProxyA2A`. **Authored in the OpenAPI SSOT first**; MCP/CLI/docs
+derive from it.
+
+### 5.5 Runtime: two MCPs, config-driven
+
+Make the runtime's `mcp_servers` **config-driven** rather than hardcoded:
+- `molecule_runtime/config.py`: add `extra_mcp_servers: list[dict]` to `WorkspaceConfig`, read
+  `raw.get("mcp_servers", [])`.
+- Both executors merge `extra_mcp_servers` into the `mcp_servers` dict after the always-on `"a2a"`
+  entry (the template `claude_sdk_executor.py` is the live one; the runtime-package copy is the
+  fallback).
+
+The platform agent's `config.yaml` then declares:
+
+```yaml
+runtime: claude-code
+model: sonnet            # default; user-switchable model AND provider via providers.yaml
+a2a:
+  port: 8090             # avoid the workspace default 8000 under host networking
+mcp_servers:
+  - name: platform
+    command: node
+    args: ["/opt/molecule-mcp-server/dist/index.js"]
+```
+
+The `platform` MCP reads `MOLECULE_API_KEY`/`MOLECULE_API_URL`/`MOLECULE_ORG_ID` from the container
+env (passed through to the stdio child) — no per-server `env` block needed.
+
+### 5.6 Hosting & provisioning (tenant EC2 container)
+
+In `ec2.go:buildTenantUserDataSM()` add a `start_platform_agent` stage **after** `wait_platform_health`
+(the agent registers against `localhost:8080` on boot):
+
+```bash
+docker run -d --restart=always --name molecule-platform-agent --network host \
+  -v /data/platform-agent/configs:/configs \
+  -e WORKSPACE_ID=<platform-uuid> -e WORKSPACE_CONFIG_PATH=/configs \
+  -e PLATFORM_URL=http://localhost:8080 \
+  -e MOLECULE_API_URL=http://localhost:8080 -e MOLECULE_API_KEY=$ADMIN_TOKEN -e MOLECULE_ORG_ID=<orgID> \
+  -e ANTHROPIC_AUTH_TOKEN=$ADMIN_TOKEN -e MOLECULE_LLM_ANTHROPIC_BASE_URL=$MOLECULE_LLM_ANTHROPIC_BASE_URL \
+  <platform-agent-image>
+```
+
+- The org `admin_token` is already on the box (Secrets Manager `molecule/tenant/{orgID}`).
+- `--restart=always` provides Docker-level supervision (matches `molecule-tenant`).
+- Mirror the block into the redeploy path (`buildRedeployScript`) so existing tenants backfill it.
+
+### 5.7 Image
+
+A **dedicated `molecule-platform-agent` image**: `FROM workspace-template-claude-code`, `COPY` the
+prebuilt `molecule-mcp-server/dist` + `node_modules` into `/opt/molecule-mcp-server`, and **pin Node
+20** (the slim base ships Node 18; the MCP expects ≥20). A dedicated image keeps the org-admin MCP
+**out of** ordinary workspace images (security hygiene) and lets us set concierge defaults without
+touching the workspace template. `molecule-ci` publishes it.
+
+### 5.8 Approval gate (server-side trust boundary)
+
+The MCP is a *client* of the tenant handlers, so enforcement lives in the **handlers**, not the MCP.
+
+- `internal/approvals/policy.go` (new): one auditable map of gated actions —
+  `delete_workspace`, `deprovision`, `secret_write`, `org_token_mint`.
+- `requireApproval(ctx, workspaceID, action, contextHash)` reuses the existing approvals
+  INSERT/broadcast/escalate. If an `approved`+unconsumed row matches → consume it → proceed. Else
+  create a `pending` row, broadcast `EventApprovalRequested`, and return **HTTP 202
+  `{approval_id, status:"pending"}`** instead of executing. The human decides via the existing decide
+  route; the agent retries and the gate now passes.
+- Add `approval_requests.consumed_at` (single-use) and optional `request_hash` (dedupe identical
+  pending requests).
+- **Escalation:** the platform agent's `parent_id` is NULL, so platform-originated approvals escalate
+  to the **user** (canvas notify), not a parent.
+- The 202 response shape is authored in the **OpenAPI SSOT**.
+
+### 5.9 Billing & model/provider parity
+
+The platform agent is a `workspaces` row, so it inherits the one billing resolver and the
+`providers.yaml` runtime matrix unchanged:
+- **Default `platform_managed`** (metered CP proxy, billed to org credits) — the env wiring in §5.6.
+- **`byok`** = flip `/admin/workspaces/:id/llm-billing-mode` + supply the org's `ANTHROPIC_API_KEY`
+  secret (workspace or global). Exposed as a provisioning flag so a tenant can choose at create time.
+- Model **and provider** are switchable (Claude, Kimi-for-coding, …) via the same dashboard
+  model-switcher any workspace uses.
+
+### 5.10 UX (summary; detailed in app RFC / Phase 5)
+
+The **dashboard** (`molecule-app`) becomes the primary entry: a concierge chat (default-targeting the
+§5.4 resolver) plus a live org overview, with pending approvals surfaced inline. The **canvas** stays
+for advanced users. First UI version is produced in Claude Design and iterated before build.
+
+## 6. SSOT mapping (derive, don't fork)
+
+| Concern | Single source of truth | This RFC's rule |
+|---|---|---|
+| "The org" | `orgRootID()`/`sameOrg()` (`org_scope.go`) | platform agent *becomes* the root; no `org_id` column |
+| Platform marker | `workspaces.kind` | `kind` only; never also `role`/`tier` |
+| Model/provider | `providers.yaml` runtime matrix | concierge switches via the same registry |
+| LLM billing | `ResolveLLMBillingModeDerived` | inherits the one resolver; no new path |
+| Config/secrets delivery | tenant Secrets Manager bundle (`seedWorkspaceConfigSecret`) | no new S3 prefix / second store |
+| Management API | OpenAPI spec | new endpoints authored there first; MCP/CLI/docs derive |
+| Gated actions | `internal/approvals/policy.go` | one map |
+| Platform-agent id | `uuidv5(org, "platform-agent")` | derived, never stored separately |
+
+## 7. Security & blast radius
+
+The concierge holds the org **admin token** (full tenant-root, self-minting) and is driven by
+end-user chat. Mitigations:
+- **Approval gate (§5.8)** must ship *with* the agent going user-facing, not after. Until then the
+  agent is operator-only.
+- **Tenant isolation** is unchanged — every reach path still passes `sameOrg()`.
+- **MCP not in workspace images** (dedicated image, §5.7); the admin token lives only in the
+  platform-agent container env on the tenant box.
+- **Token rotation:** the MCP reads env once at spawn → rotation = `docker restart
+  molecule-platform-agent` (runbook item).
+- Future: a scoped-down org token (no delete/billing/member) — see §10.
+
+## 8. Migration & rollout
+
+Phase ordering is the rollout contract:
+- **Phase 0** (schema) ships and bakes before anything writes `kind`. Backward-compatible: every
+  existing row defaults to `kind='workspace'`; the `CHECK` is added `NOT VALID` then validated.
+- **Phase 1 re-parenting backfill** is the one real watch-item. **Before** running it, audit whether
+  any org-scoped table keys off the *root workspace id* (e.g. `org_api_tokens`, `org_plugin_allowlist`)
+  versus the CP org UUID. If they reference the root workspace id, re-parenting changes "the root" and
+  those refs must migrate too. The backfill is per-org, idempotent, and reversible.
+- New orgs get the platform agent from first boot; existing orgs backfill via `/admin/tenants
+  redeploy` + a one-time re-parent migration.
+
+## 9. Implementation phases
+
+0. **Schema + model** (`molecule-core`): `kind` column + `approval_requests.consumed_at`; model field +
+   constants; `Register` accepts/validates `kind` with invariants.
+1. **Platform-as-root + resolver** (`molecule-core` + CP): CP pre-seeds the platform row and creates
+   teams under it; per-org re-parent backfill (after the §8 audit); `GET /registry/platform-agent`.
+2. **Config-driven two-MCP runtime** (runtime + claude-code template).
+3. **Image + tenant provisioning** (CP + image + `molecule-ci`): dedicated image; `start_platform_agent`
+   in user-data + redeploy; config via the tenant Secrets Manager bundle; billing knob.
+4. **Approval gate** (`molecule-core`): policy map + `requireApproval` at destructive handlers; OpenAPI
+   202 shape.
+5. **Dashboard concierge UX** (`molecule-app`): design-first, then build against the resolver.
+6. **Cleanup**: exclude the platform agent from billable counts; canvas visibility; rotation runbook.
+
+## 10. Open questions
+
+- **Scoped-down token.** Should the concierge hold a reduced-scope token (no delete/billing/member)
+  instead of full admin + an approval gate? The token-scope system does not exist yet (`orgtoken`
+  TODO). Recommendation: ship admin-token + approval gate now; add scope-down as a follow-up.
+- **Re-parenting vs. wrapper.** If product later wants a platform agent that is *not* the topological
+  root, a `CanCommunicateWithKind` wrapper (guarded by `sameOrg`) is the alternative. Deferred —
+  platform-as-root is lower-risk and needs zero access-control change.
+- **Canvas visibility** of the root concierge node (hide vs. show as the org anchor).
+
+## 11. Verification (end-to-end on a staging tenant)
+
+1. **Schema:** Phase-0 migrations applied; existing workspaces report `kind='workspace'`; `go test
+   ./...` + `-tags=integration` green.
+2. **Provision:** redeploy a staging tenant; `docker ps` shows `molecule-platform-agent` healthy; its
+   logs show a successful `/registry/register`.
+3. **Identity:** the platform row is `kind='platform'`, `parent_id IS NULL`; the former root now has
+   `parent_id = <platform id>`; `GET /registry/platform-agent` returns it.
+4. **Reach:** chat the platform agent → it `list_workspaces` then `create_workspace` via the platform
+   MCP and reports back via `send_message_to_user`.
+5. **Isolation:** it reaches every workspace in its org and **cannot** reach another tenant's
+   workspace.
+6. **Approval gate:** `delete_workspace` → HTTP 202 pending + approval event; decide-approve →
+   completes; a second delete with the same approval is rejected (consumed).
+7. Drive a real concierge flow ("spin up a PM + engineer to build X") and watch the delegation/activity
+   ledger.
+
+---
+
+*Derived from a read-only multi-agent source audit of `molecule-core`, `molecule-controlplane`,
+`molecule-ai-workspace-runtime`, `molecule-ai-workspace-template-claude-code`, and
+`molecule-mcp-server`. No secret values recorded.*
@@ -19,7 +19,10 @@
 #
 # Env vars required:
 #   CF_API_TOKEN        — Cloudflare token with zone:dns:edit
+#     (falls back to CLOUDFLARE_API_TOKEN if CF_API_TOKEN is unset;
+#      the workflow YAML maps both secret names into CF_API_TOKEN)
 #   CF_ZONE_ID          — the zone (moleculesai.app)
+#     (falls back to CLOUDFLARE_ZONE_ID if CF_ZONE_ID is unset)
 #   CP_ADMIN_API_TOKEN — CP admin bearer for api.moleculesai.app
 #   CP_STAGING_ADMIN_API_TOKEN — CP admin bearer for staging-api.moleculesai.app
 #   AWS_*               — standard AWS creds (default region us-east-2)
@@ -56,6 +59,12 @@ need() {
    exit 1
  fi
 }
+# Fallback: operator-host canonical names → CI-scoped names.
+# The workflow YAML already maps both, but direct script invocation
+# (e.g. local ops) may only have the canonical names set.
+CF_API_TOKEN="${CF_API_TOKEN:-${CLOUDFLARE_API_TOKEN:-}}"
+CF_ZONE_ID="${CF_ZONE_ID:-${CLOUDFLARE_ZONE_ID:-}}"
+
 need CF_API_TOKEN
 need CF_ZONE_ID
 need CP_ADMIN_API_TOKEN
@@ -121,7 +130,7 @@ if not payload.get("success", False) or not isinstance(payload.get("result"), li
    print(f"ERROR: Cloudflare DNS list failed: {detail}", file=sys.stderr)
    raise SystemExit(1)
 '; then
-  log "Cloudflare DNS list failed; verify CF_API_TOKEN has Zone:DNS:Edit and CF_ZONE_ID is the moleculesai.app zone."
+  log "Cloudflare DNS list failed; verify CF_API_TOKEN (or CLOUDFLARE_API_TOKEN) has Zone:DNS:Edit and CF_ZONE_ID (or CLOUDFLARE_ZONE_ID) is the moleculesai.app zone."
  exit 1
 fi
 TOTAL_CF=$(echo "$CF_JSON" | python3 -c "import json,sys; print(len(json.load(sys.stdin)['result']))")
@@ -29,8 +29,11 @@
 #                          account:cloudflare_tunnel:edit scope.
 #                          (Same secret as sweep-cf-orphans, but the
 #                          token must include the tunnel scope.)
+#     (falls back to CLOUDFLARE_API_TOKEN if CF_API_TOKEN is unset;
+#      the workflow YAML maps both secret names into CF_API_TOKEN)
 #   CF_ACCOUNT_ID       — the account that owns the tunnels (visible
 #                          in dash.cloudflare.com URL path)
+#     (falls back to CLOUDFLARE_ACCOUNT_ID if CF_ACCOUNT_ID is unset)
 #   CP_ADMIN_API_TOKEN — CP admin bearer for api.moleculesai.app
 #   CP_STAGING_ADMIN_API_TOKEN — CP admin bearer for staging-api.moleculesai.app
 #
@@ -70,6 +73,12 @@ need() {
    exit 1
  fi
 }
+# Fallback: operator-host canonical names → CI-scoped names.
+# The workflow YAML already maps both, but direct script invocation
+# (e.g. local ops) may only have the canonical names set.
+CF_API_TOKEN="${CF_API_TOKEN:-${CLOUDFLARE_API_TOKEN:-}}"
+CF_ACCOUNT_ID="${CF_ACCOUNT_ID:-${CLOUDFLARE_ACCOUNT_ID:-}}"
+
 need CF_API_TOKEN
 need CF_ACCOUNT_ID
 need CP_ADMIN_API_TOKEN
@@ -0,0 +1,299 @@
+#!/usr/bin/env bash
+# cp#455 — Minimal-cell boot-to-registration harness.
+# CTO directive 14eb4f07: "build the minimal claude-code+kimi cell,
+# it should now go GREEN since the fix is live."
+#
+# Stage 1 of 5-stage rollout. Reduced to the minimum boot-to-
+# registration surface so each cell run is ~3-5 min wall-clock.
+#
+# Four assertions (per Researcher Task #79 spec):
+#   1. Provision request accepted; workspace transitions to booting/running
+#   2. Controlplane receives /registry/register for that workspace_id
+#   3. JSON-RPC/completion route returns successful minimal response
+#   4. Teardown terminates workspace even on failure (trap)
+#
+# Cost controls (mandatory):
+#   - SPOT instances (via the dispatch-only EC2 provisioning path;
+#     we don't set instance type — that's the provisioner's call)
+#   - Fast teardown ~3-5 min wall-clock
+#   - Structured per-cell results JSON output
+#
+# Auth model (mirrors test_staging_full_saas.sh):
+#   Single MOLECULE_ADMIN_TOKEN drives everything.
+#     - POST /cp/admin/orgs to provision
+#     - GET  /cp/admin/orgs/:slug/admin-token for per-tenant token
+#     - DELETE /cp/admin/tenants/:slug for teardown
+#   Per-tenant admin token drives tenant API calls (workspaces,
+#   /registry/register, JSON-RPC completion).
+#
+# Required env:
+#   MOLECULE_CP_URL        default: https://staging-api.moleculesai.app
+#   MOLECULE_ADMIN_TOKEN   CP admin bearer
+#
+# Optional env (passed from workflow_dispatch inputs):
+#   E2E_RUNTIME            default claude-code
+#   E2E_BILLING_MODE       default platform_managed
+#   E2E_PROVIDER           default platform
+#   E2E_MODEL              default moonshot/kimi-k2.6
+#   E2E_RUN_ID             Slug suffix; CI: cp455-${GITHUB_RUN_ID}
+#   E2E_PROVISION_TIMEOUT_SECS  default 300 (5 min — fast teardown budget)
+#   E2E_KEEP_ORG           1 → skip teardown (debugging only)
+#
+# Exit codes:
+#   0  happy path
+#   1  generic failure
+#   2  missing required env
+#   3  provisioning timed out (assertion 1)
+#   4  register timeout (assertion 2)
+#   5  completion failure (assertion 3)
+#   6  teardown left orphan (assertion 4)
+
+set -uo pipefail
+
+CP_URL="${MOLECULE_CP_URL:-https://staging-api.moleculesai.app}"
+ADMIN_TOKEN="${MOLECULE_ADMIN_TOKEN:?MOLECULE_ADMIN_TOKEN required — Railway staging CP_ADMIN_API_TOKEN}"
+RUNTIME="${E2E_RUNTIME:-claude-code}"
+BILLING_MODE="${E2E_BILLING_MODE:-platform_managed}"
+PROVIDER="${E2E_PROVIDER:-platform}"
+MODEL="${E2E_MODEL:-moonshot/kimi-k2.6}"
+PROVISION_TIMEOUT_SECS="${E2E_PROVISION_TIMEOUT_SECS:-300}"
+KEEP_ORG="${E2E_KEEP_ORG:-}"
+RUN_ID_SUFFIX="${E2E_RUN_ID:-$(date +%H%M%S)-$$}"
+SLUG="cp455-${RUNTIME}-${RUN_ID_SUFFIX}"
+WORKSPACE_ID=""
+TENANT_TOKEN=""
+RESULT_JSON="/tmp/cell-result.json"
+PROVISION_START_EPOCH=""
+PROVISION_END_EPOCH=""
+REGISTER_STATUS="not_attempted"
+COMPLETION_STATUS="not_attempted"
+TEARDOWN_STATUS="not_attempted"
+EXIT_CODE=0
+
+# Structured per-cell results writer. Emits JSON with all 4
+# assertion statuses + elapsed timing. Called from EXIT trap so
+# results are captured even on early failure.
+write_result() {
+  local elapsed="${1:-0}"
+  cat > "${RESULT_JSON}" <<JSON
+{
+  "runtime": "${RUNTIME}",
+  "billing_mode": "${BILLING_MODE}",
+  "provider": "${PROVIDER}",
+  "model": "${MODEL}",
+  "workspace_id": "${WORKSPACE_ID}",
+  "register_status": "${REGISTER_STATUS}",
+  "completion_status": "${COMPLETION_STATUS}",
+  "teardown_status": "${TEARDOWN_STATUS}",
+  "elapsed_seconds": ${elapsed},
+  "exit_code": ${EXIT_CODE},
+  "ts": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+}
+JSON
+}
+
+# EXIT trap — ALWAYS run. Writes structured results, tears down
+# workspace if we have one, never lets the script exit without
+# emitting /tmp/cell-result.json.
+on_exit() {
+  local exit_code=$?
+  EXIT_CODE=${exit_code}
+  local now
+  now=$(date +%s)
+  local elapsed=0
+  if [ -n "${PROVISION_START_EPOCH:-}" ] && [ "${PROVISION_START_EPOCH}" -gt 0 ] 2>/dev/null; then
+    elapsed=$(( now - PROVISION_START_EPOCH ))
+  fi
+
+  # Assertion 4: teardown terminates workspace even on failure.
+  if [ -z "${KEEP_ORG}" ] && [ -n "${SLUG:-}" ]; then
+    if [ -n "${WORKSPACE_ID:-}" ] || [ -n "${SLUG:-}" ]; then
+      echo "::group::Teardown (trap)"
+      echo "DELETE ${CP_URL}/cp/admin/tenants/${SLUG}"
+      local teardown_http_code
+      teardown_http_code=$(curl -sS -o /dev/null -w '%{http_code}' \
+        -X DELETE \
+        -H "Authorization: Bearer ${ADMIN_TOKEN}" \
+        --max-time 60 \
+        "${CP_URL}/cp/admin/tenants/${SLUG}" || echo "000")
+      if [ "${teardown_http_code}" = "200" ] || [ "${teardown_http_code}" = "204" ] || [ "${teardown_http_code}" = "404" ]; then
+        TEARDOWN_STATUS="ok"
+        echo "Teardown OK (HTTP ${teardown_http_code})"
+      else
+        TEARDOWN_STATUS="leak_risk_http_${teardown_http_code}"
+        echo "::error::Teardown returned HTTP ${teardown_http_code} — orphan risk"
+        # Bump exit code to 6 if teardown is the failure source.
+        if [ "${EXIT_CODE}" -eq 0 ]; then
+          EXIT_CODE=6
+        fi
+      fi
+      echo "::endgroup::"
+    fi
+  else
+    TEARDOWN_STATUS="skipped_keep_org"
+  fi
+
+  write_result "${elapsed}"
+  echo "Structured results written to ${RESULT_JSON}"
+  cat "${RESULT_JSON}"
+  exit "${EXIT_CODE}"
+}
+trap on_exit EXIT
+trap 'echo "::error::Script aborted on signal"; exit 130' INT TERM
+
+PROVISION_START_EPOCH=$(date +%s)
+
+# Assertion 1: Provision request accepted; workspace transitions to
+# booting/running.
+echo "::group::Assertion 1: Provision"
+echo "POST ${CP_URL}/cp/admin/orgs  slug=${SLUG}  runtime=${RUNTIME}  billing_mode=${BILLING_MODE}  provider=${PROVIDER}  model=${MODEL}"
+PROVISION_HTTP_CODE=$(curl -sS -o /tmp/provision-resp.json -w '%{http_code}' \
+  -X POST \
+  -H "Authorization: Bearer ${ADMIN_TOKEN}" \
+  -H "Content-Type: application/json" \
+  --max-time 30 \
+  -d "$(cat <<JSON
+{
+  "slug": "${SLUG}",
+  "runtime": "${RUNTIME}",
+  "billing_mode": "${BILLING_MODE}",
+  "provider": "${PROVIDER}",
+  "model": "${MODEL}",
+  "tier": "spot",
+  "tags": {
+    "cp455_minimal_cell": "1",
+    "run_id": "${RUN_ID_SUFFIX}"
+  }
+}
+JSON
+)" \
+  "${CP_URL}/cp/admin/orgs" || echo "000")
+echo "HTTP ${PROVISION_HTTP_CODE}"
+if [ "${PROVISION_HTTP_CODE}" != "202" ] && [ "${PROVISION_HTTP_CODE}" != "200" ]; then
+  echo "::error::Provision failed (HTTP ${PROVISION_HTTP_CODE})"
+  cat /tmp/provision-resp.json 2>/dev/null || true
+  EXIT_CODE=1
+  exit "${EXIT_CODE}"
+fi
+echo "::endgroup::"
+
+# Wait for org to reach running + retrieve per-tenant token. Bounded
+# at PROVISION_TIMEOUT_SECS. We poll the admin token endpoint; once
+# the org is up, the endpoint returns 200 with the token, and the
+# workspace_id is in the same response or in a follow-up /orgs/:slug
+# call.
+echo "::group::Wait for org to be ready (max ${PROVISION_TIMEOUT_SECS}s)"
+WAIT_START=$(date +%s)
+WAIT_DEADLINE=$(( WAIT_START + PROVISION_TIMEOUT_SECS ))
+TENANT_TOKEN=""
+while [ "$(date +%s)" -lt "${WAIT_DEADLINE}" ]; do
+  TOKEN_HTTP_CODE=$(curl -sS -o /tmp/token-resp.json -w '%{http_code}' \
+    -H "Authorization: Bearer ${ADMIN_TOKEN}" \
+    --max-time 10 \
+    "${CP_URL}/cp/admin/orgs/${SLUG}/admin-token" || echo "000")
+  if [ "${TOKEN_HTTP_CODE}" = "200" ]; then
+    TENANT_TOKEN=$(jq -r '.admin_token // .token // empty' /tmp/token-resp.json 2>/dev/null || echo "")
+    if [ -n "${TENANT_TOKEN}" ]; then
+      WORKSPACE_ID=$(jq -r '.workspace_id // .default_workspace_id // empty' /tmp/token-resp.json 2>/dev/null || echo "")
+      if [ -z "${WORKSPACE_ID}" ]; then
+        # Fallback: list orgs and find by slug
+        WORKSPACE_ID=$(curl -sS -H "Authorization: Bearer ${ADMIN_TOKEN}" \
+          "${CP_URL}/cp/admin/orgs/${SLUG}" | jq -r '.workspace_id // .default_workspace_id // empty' 2>/dev/null || echo "")
+      fi
+      if [ -n "${WORKSPACE_ID}" ]; then
+        PROVISION_END_EPOCH=$(date +%s)
+        echo "Org ready in $(( PROVISION_END_EPOCH - WAIT_START ))s — workspace_id=${WORKSPACE_ID}"
+        break
+      fi
+    fi
+  fi
+  sleep 5
+done
+if [ -z "${TENANT_TOKEN}" ] || [ -z "${WORKSPACE_ID}" ]; then
+  echo "::error::Provision timed out (org never reached running within ${PROVISION_TIMEOUT_SECS}s)"
+  EXIT_CODE=3
+  exit "${EXIT_CODE}"
+fi
+echo "::endgroup::"
+
+# Assertion 2: Controlplane receives /registry/register for that
+# workspace_id. The harness doesn't POST to /registry/register
+# directly — that's the workspace-server's own job on boot. We
+# verify the registration was received by polling the registry
+# endpoint (or by checking that a /workspaces/:id call returns
+# the expected fields).
+echo "::group::Assertion 2: /registry/register for workspace_id=${WORKSPACE_ID}"
+REGISTER_DEADLINE=$(( $(date +%s) + 60 ))
+while [ "$(date +%s)" -lt "${REGISTER_DEADLINE}" ]; do
+  REG_HTTP_CODE=$(curl -sS -o /tmp/reg-resp.json -w '%{http_code}' \
+    -H "Authorization: Bearer ${TENANT_TOKEN}" \
+    --max-time 10 \
+    "${CP_URL}/cp/registry/workspaces/${WORKSPACE_ID}" || echo "000")
+  if [ "${REG_HTTP_CODE}" = "200" ]; then
+    REGISTERED=$(jq -r '.registered // .workspace_id // empty' /tmp/reg-resp.json 2>/dev/null || echo "")
+    if [ -n "${REGISTERED}" ]; then
+      REGISTER_STATUS="ok"
+      echo "Registry confirms workspace_id=${WORKSPACE_ID} registered"
+      break
+    fi
+  fi
+  sleep 3
+done
+if [ "${REGISTER_STATUS}" != "ok" ]; then
+  echo "::error::Registry did not confirm registration within 60s"
+  cat /tmp/reg-resp.json 2>/dev/null || true
+  EXIT_CODE=4
+  exit "${EXIT_CODE}"
+fi
+echo "::endgroup::"
+
+# Assertion 3: JSON-RPC/completion route returns successful minimal
+# response. One minimal completion call — keep payload small.
+echo "::group::Assertion 3: JSON-RPC completion"
+COMPLETION_HTTP_CODE=$(curl -sS -o /tmp/completion-resp.json -w '%{http_code}' \
+  -X POST \
+  -H "Authorization: Bearer ${TENANT_TOKEN}" \
+  -H "Content-Type: application/json" \
+  --max-time 30 \
+  -d "$(cat <<JSON
+{
+  "jsonrpc": "2.0",
+  "id": 1,
+  "method": "completion",
+  "params": {
+    "workspace_id": "${WORKSPACE_ID}",
+    "model": "${MODEL}",
+    "messages": [{"role": "user", "content": "ping"}],
+    "max_tokens": 1
+  }
+}
+JSON
+)" \
+  "${CP_URL}/cp/rpc" || echo "000")
+echo "HTTP ${COMPLETION_HTTP_CODE}"
+if [ "${COMPLETION_HTTP_CODE}" != "200" ]; then
+  echo "::error::Completion failed (HTTP ${COMPLETION_HTTP_CODE})"
+  cat /tmp/completion-resp.json 2>/dev/null || true
+  EXIT_CODE=5
+  exit "${EXIT_CODE}"
+fi
+# Verify JSON-RPC 2.0 success envelope
+RPC_ERROR=$(jq -r '.error // empty' /tmp/completion-resp.json 2>/dev/null || echo "")
+if [ -n "${RPC_ERROR}" ]; then
+  echo "::error::Completion returned JSON-RPC error: ${RPC_ERROR}"
+  cat /tmp/completion-resp.json 2>/dev/null || true
+  EXIT_CODE=5
+  exit "${EXIT_CODE}"
+fi
+RPC_RESULT=$(jq -r '.result // empty' /tmp/completion-resp.json 2>/dev/null || echo "")
+if [ -z "${RPC_RESULT}" ] || [ "${RPC_RESULT}" = "null" ]; then
+  echo "::error::Completion response missing result field"
+  cat /tmp/completion-resp.json 2>/dev/null || true
+  EXIT_CODE=5
+  exit "${EXIT_CODE}"
+fi
+COMPLETION_STATUS="ok"
+echo "Completion OK"
+echo "::endgroup::"
+
+echo "All 4 assertions passed for ${SLUG} (workspace_id=${WORKSPACE_ID})"
@@ -53,7 +53,9 @@
 #   PV_RUNTIMES            space list; default "hermes openclaw claude-code"
 #   E2E_PROVISION_TIMEOUT_SECS  default 1800 (hermes/openclaw cold EC2 budget)
 #   E2E_MINIMAX_API_KEY / E2E_ANTHROPIC_API_KEY / E2E_OPENAI_API_KEY
-#                          LLM provider key injected so the runtime can boot
+#                          DEPRECATED for this script — platform-managed models
+#                          use the CP LLM proxy; direct vendor keys are blocked
+#                          by PR #2291. Kept in workflow env for other E2Es.
 #   PV_TOKEN_DIAGNOSTIC_ONLY
 #                          1 -> stop after create/token acquisition. Useful
 #                          to classify Hermes-only vs shared auth-route issues.
@@ -222,17 +224,14 @@ else
 fi

 # ─── 4. Provision the parent + one sibling per runtime under test ──────
-# Inject the LLM provider key so each runtime can authenticate at boot.
-# Priority: MiniMax → direct-Anthropic → OpenAI (mirrors
-# test_staging_full_saas.sh's secrets-injection chain).
+# Platform-managed models: Molecule owns billing via the CP LLM proxy, so
+# the workspace needs NO tenant key. PR #2291 blocks direct vendor key writes
+# (ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN, MINIMAX_API_KEY, etc.) for
+# platform-managed workspaces. We intentionally keep SECRETS_JSON empty so a
+# stray E2E_*_API_KEY in the runner env cannot silently convert this into a
+# BYOK run and mask the platform-managed path (mirrors
+# test_staging_full_saas.sh's E2E_LLM_PATH=platform branch).
 SECRETS_JSON='{}'
-if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
-  SECRETS_JSON=$(python3 -c "import json,os;k=os.environ['E2E_MINIMAX_API_KEY'];print(json.dumps({'ANTHROPIC_BASE_URL':'https://api.minimax.io/anthropic','ANTHROPIC_AUTH_TOKEN':k,'MINIMAX_API_KEY':k}))")
-elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
-  SECRETS_JSON=$(python3 -c "import json,os;k=os.environ['E2E_ANTHROPIC_API_KEY'];print(json.dumps({'ANTHROPIC_API_KEY':k}))")
-elif [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
-  SECRETS_JSON=$(python3 -c "import json,os;k=os.environ['E2E_OPENAI_API_KEY'];print(json.dumps({'OPENAI_API_KEY':k,'OPENAI_BASE_URL':'https://api.openai.com/v1','MODEL_PROVIDER':'openai:gpt-4o','HERMES_INFERENCE_PROVIDER':'custom','HERMES_CUSTOM_BASE_URL':'https://api.openai.com/v1','HERMES_CUSTOM_API_KEY':k,'HERMES_CUSTOM_API_MODE':'chat_completions'}))")
-fi

 # Workspace-create now enforces the MODEL_REQUIRED contract: there is NO
 # platform-side default model for a runtime (feedback_workspace_model_required_
@@ -55,7 +55,7 @@ def drift_module():
        "SENTINEL_JOB": "all-required",
        "AUDIT_WORKFLOW_PATH": ".gitea/workflows/audit-force-merge.yml",
        "CI_WORKFLOW_PATH": ".gitea/workflows/ci.yml",
-        "DRIFT_LABEL": "tier:high",
+        "DRIFT_LABEL": "ci-bp-drift",
    }
    with mock.patch.dict(os.environ, env, clear=False):
        spec = importlib.util.spec_from_file_location(
@@ -584,6 +584,54 @@ def test_find_open_issue_raises_on_transient_error(drift_module, monkeypatch):
        drift_module.find_open_issue("[ci-drift] foo")


+# --------------------------------------------------------------------------
+# Pagination: search beyond page 1 so an existing issue on any page is found
+# --------------------------------------------------------------------------
+def test_find_open_issue_paginates_to_page_2(drift_module, monkeypatch):
+    """Issue exists on page 2 → paginate and find it."""
+    target = {"number": 99, "title": "[ci-drift] foo"}
+    filler = [{"number": i, "title": f"other-{i}"} for i in range(1, 51)]
+
+    class PaginatedStub:
+        def __init__(self):
+            self.calls = []
+
+        def __call__(self, method, path, *, body=None, query=None, expect_json=True):
+            self.calls.append((method, path, body, query))
+            page = int((query or {}).get("page", "1"))
+            if page == 1:
+                return 200, filler
+            if page == 2:
+                return 200, [target]
+            return 200, []
+
+    stub = PaginatedStub()
+    monkeypatch.setattr(drift_module, "api", stub)
+    assert drift_module.find_open_issue("[ci-drift] foo") == target
+    assert len(stub.calls) == 2
+
+
+def test_find_open_issue_stops_at_last_page(drift_module, monkeypatch):
+    """No match across pages → stop when a page has <50 results."""
+    filler = [{"number": i, "title": f"other-{i}"} for i in range(1, 51)]
+
+    class PaginatedStub:
+        def __init__(self):
+            self.calls = []
+
+        def __call__(self, method, path, *, body=None, query=None, expect_json=True):
+            self.calls.append((method, path, body, query))
+            page = int((query or {}).get("page", "1"))
+            if page == 1:
+                return 200, filler
+            return 200, []
+
+    stub = PaginatedStub()
+    monkeypatch.setattr(drift_module, "api", stub)
+    assert drift_module.find_open_issue("[ci-drift] foo") is None
+    assert len(stub.calls) == 2
+
+
 # --------------------------------------------------------------------------
 # Idempotent path: existing issue is PATCHed, NOT duplicated
 # --------------------------------------------------------------------------
@@ -617,7 +665,7 @@ def test_file_or_update_posts_new_issue_when_none_exists(drift_module, monkeypat
    stub = _make_stub_api({
        ("GET", "/repos/owner/repo/issues"): (200, []),
        ("POST", "/repos/owner/repo/issues"): (201, {"number": 99}),
-        ("GET", "/repos/owner/repo/labels"): (200, [{"id": 10, "name": "tier:high"}]),
+        ("GET", "/repos/owner/repo/labels"): (200, [{"id": 10, "name": "ci-bp-drift"}]),
        ("POST", "/repos/owner/repo/issues/99/labels"): (200, []),
    })
    monkeypatch.setattr(drift_module, "api", stub)
@@ -127,7 +127,7 @@ def _stub_api(monkeypatch, lint_mod, bp_response, issue_search_response=None, po
            posted_record.setdefault("patches", []).append({"path": path, "body": body})
            return ("ok", {"number": 9001})
        if "/labels" in path:
-            return ("ok", [{"id": 10, "name": "ci-bp-drift"}, {"id": 9, "name": "tier:high"}])
+            return ("ok", [{"id": 10, "name": "ci-bp-drift"}, {"id": 9, "name": "ci-bp-drift"}])
        return ("ok", {})

    monkeypatch.setattr(lint_mod, "api", fake_api)
@@ -427,13 +427,13 @@ def test_required_workflow_with_paths_ignore_fails(
    """Same defect class for `paths-ignore` — exit 1, named."""
    _write_workflow(
        lint_module.WORKFLOWS_DIR,
-        "sop-tier-check.yml",
-        "name: sop-tier-check\n"
+        "sop-checklist.yml",
+        "name: sop-checklist\n"
        "on:\n"
        "  pull_request_target:\n"
        "    paths-ignore: ['docs/**']\n"
        "jobs:\n"
-        "  tier-check:\n"
+        "  all-items-acked:\n"
        "    runs-on: ubuntu-latest\n",
    )
    stub = _make_stub_api({
@@ -441,7 +441,7 @@ def test_required_workflow_with_paths_ignore_fails(
            200,
            {
                "status_check_contexts": [
-                    "sop-tier-check / tier-check (pull_request_target)"
+                    "sop-checklist / all-items-acked (pull_request_target)"
                ]
            },
        ),
@@ -450,7 +450,7 @@ def test_required_workflow_with_paths_ignore_fails(
    rc = lint_module.run()
    assert rc == 1
    out = capsys.readouterr().out
-    assert "sop-tier-check.yml" in out
+    assert "sop-checklist.yml" in out
    assert "paths-ignore" in out


@@ -78,7 +78,7 @@ def wd_module():
        "GITEA_HOST": "git.example.test",
        "REPO": "owner/repo",
        "WATCH_BRANCH": "main",
-        "RED_LABEL": "tier:high",
+        "RED_LABEL": "ci-bp-drift",
    }
    with mock.patch.dict(os.environ, env, clear=False):
        spec = importlib.util.spec_from_file_location(
@@ -463,7 +463,7 @@ def test_red_detected_opens_issue(wd_module, monkeypatch):
        ("GET", "/repos/owner/repo/issues"): (200, []),  # no existing issue
        ("POST", "/repos/owner/repo/issues"): (201, {"number": 555}),
        ("GET", "/repos/owner/repo/labels"): (
-            200, [{"id": 9, "name": "tier:high"}],
+            200, [{"id": 9, "name": "ci-bp-drift"}],
        ),
        ("POST", "/repos/owner/repo/issues/555/labels"): (200, []),
    })
@@ -1063,7 +1063,7 @@ def test_head_recheck_files_when_still_red_after_settling(
        if method == "GET" and path == "/repos/owner/repo/issues":
            return (200, [])
        if method == "GET" and path == "/repos/owner/repo/labels":
-            return (200, [{"id": 9, "name": "tier:high"}])
+            return (200, [{"id": 9, "name": "ci-bp-drift"}])
        if method == "POST" and path == "/repos/owner/repo/issues":
            post_filed["value"] = True
            return (201, {"number": 999})
@@ -1050,12 +1050,13 @@ def test_reap_continues_on_per_sha_apierror(sr_module, monkeypatch, capsys):


 def test_main_soft_skips_when_commit_listing_times_out(sr_module, monkeypatch, capsys):
-    """A transient outage while listing recent commits should not paint main red.
+    """A transient outage while listing recent commits fails the tick visibly.

    Per-SHA status read failures are already isolated inside `reap_branch`.
    The real 2026-05-14 failure was earlier: `/commits?sha=main&limit=30`
    timed out after all retries, aborting the tick. The next 5-minute tick can
-    retry safely, so `main()` should emit an observable warning and return 0.
+    retry safely, but the tick itself must be observable as red (exit 1 + error
+    annotation) so the cron bot alerts on persistent infra issues.
    """

    monkeypatch.setattr(sr_module, "scan_workflows", lambda _: {"workflow-without-push": False})
@@ -1068,9 +1069,9 @@ def test_main_soft_skips_when_commit_listing_times_out(sr_module, monkeypatch, c
    monkeypatch.setattr(sr_module, "list_recent_commit_shas", fake_list_recent_commit_shas)
    monkeypatch.setattr(sys, "argv", ["status-reaper.py"])

-    assert sr_module.main() == 0
+    assert sr_module.main() == 1
    captured = capsys.readouterr()
-    assert "::warning::status-reaper skipped this tick" in captured.out
+    assert "::error::status-reaper cannot run" in captured.out
    assert '"skipped": true' in captured.out
    assert '"skip_reason": "commit-list-api-error"' in captured.out

@@ -35,7 +35,7 @@ GITEA_TOKEN = os.environ.get("GITEA_TOKEN", os.environ.get("GITHUB_TOKEN", ""))
 API_BASE = f"https://{GITEA_HOST}/api/v1"

 # Timeout in seconds for all HTTP calls. Defence-in-depth: ensures a missing or
-# invalid SOP_TIER_CHECK_TOKEN causes a fast (~15 s) failure rather than an
+# invalid GITEA_TOKEN causes a fast (~15 s) failure rather than an
 # indefinite hang. The real fix is provisioning the token; this caps worst-case
 # wall-clock on a broken/unreachable Gitea host.
 DEFAULT_TIMEOUT = 15
@@ -116,45 +116,27 @@ LOGIN_ALIASES = {
    "infra-sre": "core-devops",
 }

-# SOP-6 tier → required agent groups
-# tier:low    → engineers,managers,ceo (OR: any one suffices)
-# tier:medium → managers AND engineers AND qa,security (AND)
-# tier:high   → ceo (OR, but single)
-# "?" = teams not yet created; treated as optional for MVP
-TIER_AGENTS = {
-    "tier:low":    {"managers": "core-lead", "engineers": "core-devops", "ceo": "ceo"},
-    "tier:medium": {"managers": "core-lead", "engineers": "core-devops", "qa": "core-qa", "security": "core-security"},
-    "tier:high":   {"ceo": "ceo"},
-}
-
 POSITIVE_VERDICTS = {"APPROVED", "N/A", "ACK"}

-
-def _get_pr_tier(pr_number: int, repo: str) -> str:
-    """Get the PR's tier label."""
-    owner, name = repo.split("/", 1)
-    try:
-        pr = api_get(f"/repos/{owner}/{name}/pulls/{pr_number}")
-        for label in pr.get("labels", []):
-            name_l = label.get("name", "")
-            if name_l in TIER_AGENTS:
-                return name_l
-    except GiteaError:
-        pass
-    return "tier:low"  # Default for untagged PRs
+# Uniform required-agent set (SOP-6 tier removal, CTO 2026-06-07).
+# ALL of the following must APPROVE (AND gate, strict).
+REQUIRED_AGENTS = {
+    "managers": "core-lead",
+    "engineers": "core-devops",
+    "qa": "core-qa",
+    "security": "core-security",
+}


 def signal_1_comment_scan(pr_number: int, repo: str) -> dict:
    """
    Scan issue + PR comments AND reviews for agent-tag policy gates.
-    Matches tag AND author. Filters to tier-relevant agents.
+    Matches tag AND author. All REQUIRED_AGENTS must positively ACK.
    Returns: {signal, results, verdict}
    """
    owner, name = repo.split("/", 1)

-    # Get tier label to determine relevant agents
-    tier = _get_pr_tier(pr_number, repo)
-    relevant_roles = TIER_AGENTS.get(tier, TIER_AGENTS["tier:low"])
+    relevant_roles = REQUIRED_AGENTS

    # Build reverse map: login -> (group, agent_key)
    login_to_group = {}
@@ -221,35 +203,22 @@ def signal_1_comment_scan(pr_number: int, repo: str) -> dict:
        latest = max(matches, key=lambda x: x["created_at"], default=None) if matches else None
        findings[agent_key] = {
            "group": group,
-            "tier": tier,
            "found": latest,
            "verdict": latest["verdict"] if latest else "MISSING",
        }

-    # Compute gate verdict using tier-specific logic:
-    # - tier:low / tier:high (OR gate): ANY positive = CLEAR, ANY negative = BLOCKED
-    # - tier:medium (AND gate): ALL must be positive = CLEAR, ANY negative = BLOCKED
+    # Uniform AND gate: ALL required agents must be positive.
    verdicts = [f["verdict"] for f in findings.values()]
    if not verdicts:
        gate_verdict = "N/A"
-    elif tier in ("tier:low", "tier:high"):
-        # OR gate: one positive is enough
-        if any(v in POSITIVE_VERDICTS for v in verdicts):
-            gate_verdict = "CLEAR"
-        elif any(v in ("BLOCKED", "CHANGES_REQUESTED", "COMMENT") for v in verdicts):
-            gate_verdict = "BLOCKED"
-        else:
-            gate_verdict = "INCOMPLETE"
+    elif all(v in POSITIVE_VERDICTS for v in verdicts):
+        gate_verdict = "CLEAR"
+    elif any(v in ("BLOCKED", "CHANGES_REQUESTED", "COMMENT") for v in verdicts):
+        gate_verdict = "BLOCKED"
    else:
-        # AND gate (tier:medium): all must be positive
-        if all(v in POSITIVE_VERDICTS for v in verdicts):
-            gate_verdict = "CLEAR"
-        elif any(v in ("BLOCKED", "CHANGES_REQUESTED", "COMMENT") for v in verdicts):
-            gate_verdict = "BLOCKED"
-        else:
-            gate_verdict = "INCOMPLETE"
+        gate_verdict = "INCOMPLETE"

-    return {"signal": "agent_tag_comments", "results": findings, "verdict": gate_verdict, "tier": tier}
+    return {"signal": "agent_tag_comments", "results": findings, "verdict": gate_verdict}


 # ── Signal 2: REQUEST_CHANGES reviews state machine ────────────────────────────
@@ -504,6 +473,7 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d

    failing_required = []
    passing_required = []
+    pending_required = []
    for ctx in required_checks:
        state = check_statuses.get(ctx, "null")
        if state == "failure":
@@ -511,7 +481,7 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
        elif state in ("success", "neutral"):
            passing_required.append(ctx)
        else:
-            passing_required.append(f"{ctx} (pending)")
+            pending_required.append(ctx)

    # NOTE: do NOT use ci_state (combined_state) as a fallback verdict driver.
    # The combined_state is computed over ALL statuses including this
@@ -519,12 +489,14 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
    # self-referential loop: gate-check posts failure → combined_state
    # becomes failure → script re-blocks → posts failure again.
    # The check_statuses dict already excludes gate-check (Bug-1 fix from
-    # PR #547). Use failing_required as the sole CI gate; if no required
-    # checks are defined on the branch, return CLEAR rather than re-using
-    # the combined_state which includes our own status.
+    # PR #547).
+    #
+    # Fail-closed: any required check that is missing, pending, or failing
+    # blocks the gate. Only return CLEAR when every required check is
+    # explicitly success/neutral.
    if failing_required:
        verdict = "CI_FAIL"
-    elif ci_state == "pending":
+    elif pending_required:
        verdict = "CI_PENDING"
    else:
        verdict = "CLEAR"
@@ -535,6 +507,7 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
        "required_checks": required_checks,
        "failing_required": failing_required,
        "passing_required": passing_required,
+        "pending_required": pending_required,
        "all_check_statuses": check_statuses,
        "verdict": verdict,
    }
@@ -39,11 +39,11 @@ def test_signal_1_infra_sre_login_alias_resolved_to_core_devops(monkeypatch):
    mod = load_gate_check()

    def fake_api_get(path):
-        # PR 900 has tier:low label
+        # PR 900 has area:ci label
        if path == "/repos/molecule-ai/molecule-core/pulls/900":
            return {
                "number": 900,
-                "labels": [{"name": "tier:low"}],
+                "labels": [{"name": "area:ci"}],
            }
        raise AssertionError(f"unexpected api_get: {path}")

@@ -59,7 +59,25 @@ def test_signal_1_infra_sre_login_alias_resolved_to_core_devops(monkeypatch):
                    "user": {"login": "infra-sre"},
                    "state": "APPROVED",
                    "submitted_at": "2026-05-13T10:00:00Z",
-                }
+                },
+                {
+                    "id": 2,
+                    "user": {"login": "core-lead"},
+                    "state": "APPROVED",
+                    "submitted_at": "2026-05-13T10:00:01Z",
+                },
+                {
+                    "id": 3,
+                    "user": {"login": "core-qa"},
+                    "state": "APPROVED",
+                    "submitted_at": "2026-05-13T10:00:02Z",
+                },
+                {
+                    "id": 4,
+                    "user": {"login": "core-security"},
+                    "state": "APPROVED",
+                    "submitted_at": "2026-05-13T10:00:03Z",
+                },
            ]
        raise AssertionError(f"unexpected api_list: {path}")

@@ -85,7 +103,7 @@ def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):
        if path == "/repos/molecule-ai/molecule-core/pulls/901":
            return {
                "number": 901,
-                "labels": [{"name": "tier:low"}],
+                "labels": [{"name": "area:ci"}],
            }
        raise AssertionError(f"unexpected api_get: {path}")

@@ -108,6 +126,24 @@ def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):
                    "state": "APPROVED",
                    "submitted_at": "2026-05-13T10:01:00Z",
                },
+                {
+                    "id": 3,
+                    "user": {"login": "core-lead"},
+                    "state": "APPROVED",
+                    "submitted_at": "2026-05-13T10:01:01Z",
+                },
+                {
+                    "id": 4,
+                    "user": {"login": "core-qa"},
+                    "state": "APPROVED",
+                    "submitted_at": "2026-05-13T10:01:02Z",
+                },
+                {
+                    "id": 5,
+                    "user": {"login": "core-security"},
+                    "state": "APPROVED",
+                    "submitted_at": "2026-05-13T10:01:03Z",
+                },
            ]
        raise AssertionError(f"unexpected api_list: {path}")

@@ -116,7 +152,7 @@ def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):

    result = mod.signal_1_comment_scan(901, "molecule-ai/molecule-core")

-    # Should not crash; the valid review from core-devops still satisfies engineers gate
+    # Should not crash; all required gates clear
    assert result["verdict"] == "CLEAR"
    assert result["results"]["core-devops"]["verdict"] == "APPROVED"

@@ -351,8 +351,17 @@ func main() {
 	// (true, err) on any transient error, so a CP blip never flips a healthy
 	// workspace.
 	if cpProv != nil {
+		// Guard against double-reprovision thrash (internal#544): the restart
+		// debounce window must cover the reconciler interval so a workspace
+		// flipped offline by one reconcile tick isn't immediately reprovisioned
+		// again by the next tick before the debounce drops it. If the interval
+		// ever shrinks below the debounce window, the coupling silently breaks.
+		reconcileInterval := 60 * time.Second
+		if handlers.RestartDebounceWindow < reconcileInterval {
+			log.Fatalf("RestartDebounceWindow (%s) must be >= CP instance reconciler interval (%s) to prevent double-reprovision thrash (internal#544)", handlers.RestartDebounceWindow, reconcileInterval)
+		}
 		go supervised.RunWithRecover(ctx, "cp-instance-reconciler", func(c context.Context) {
-			registry.StartCPInstanceReconciler(c, cpProv, onWorkspaceOffline, 60*time.Second)
+			registry.StartCPInstanceReconciler(c, cpProv, onWorkspaceOffline, reconcileInterval)
 		})
 	}

@@ -0,0 +1,39 @@
+// Package approvals holds the single source of truth for which destructive
+// org operations require a human approval before they execute.
+//
+// (RFC docs/design/rfc-platform-agent.md — Phase 4)
+//
+// The org-level platform agent is driven by end-user chat and holds an org-admin
+// token, so destructive/irreversible operations it can trigger are gated: the
+// handler creates a pending approval and returns it instead of executing, and a
+// human decides via the existing approvals subsystem. Keeping the gated-action
+// list in ONE map makes the blast-radius boundary auditable in a single place —
+// a handler not listed here behaves exactly as before.
+package approvals
+
+// Action is the canonical identifier of a gated destructive operation. The same
+// string is stored in approval_requests.action so the gate can match a pending/
+// approved request to the operation being retried.
+type Action string
+
+const (
+	ActionDeleteWorkspace Action = "delete_workspace"
+	ActionDeprovision     Action = "deprovision_workspace"
+	ActionSecretWrite     Action = "secret_write"
+	ActionOrgTokenMint    Action = "org_token_mint"
+)
+
+// gated is the set of actions that require a human approval. Add an entry here
+// (and gate the corresponding handler with requireApproval) to expand the
+// boundary; remove one to drop a gate. This is the only place the policy lives.
+var gated = map[Action]bool{
+	ActionDeleteWorkspace: true,
+	ActionDeprovision:     true,
+	ActionSecretWrite:     true,
+	ActionOrgTokenMint:    true,
+}
+
+// IsGated reports whether action requires a human approval before executing.
+func IsGated(action Action) bool {
+	return gated[action]
+}
@@ -271,6 +271,11 @@ func (m *Manager) Reload(ctx context.Context) {
 		ch.Config["_channel_id"] = ch.ID

 		go func(a ChannelAdapter, c ChannelRow, pCtx context.Context) {
+			defer func() {
+				if r := recover(); r != nil {
+					log.Printf("PANIC recovered in channel polling goroutine: %v", r)
+				}
+			}()
 			if err := a.StartPolling(pCtx, c.Config, m.onInboundMessage); err != nil {
 				log.Printf("Channels: polling error for %s/%s: %v", c.ChannelType, truncID(c.ID), err)
 			}
@@ -354,6 +359,11 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound
 			typingCtx, typingCancel := context.WithCancel(fireCtx)
 			defer typingCancel()
 			go func() {
+				defer func() {
+					if r := recover(); r != nil {
+						log.Printf("PANIC recovered in typing indicator goroutine: %v", r)
+					}
+				}()
 				typer.SendTyping(ch.Config, msg.ChatID)
 				ticker := time.NewTicker(4 * time.Second)
 				defer ticker.Stop()
@@ -142,7 +142,7 @@ func ghcrAuthHeader() string {
 		log.Printf("workspace-images: failed to marshal GHCR auth: %v", err)
 		return ""
 	}
-	return base64.URLEncoding.EncodeToString(js)
+	return base64.StdEncoding.EncodeToString(js)
 }

 // Refresh pulls the requested runtimes' template images from GHCR and (if
@@ -47,9 +47,9 @@ func TestGHCRAuthHeader_EncodesDockerEnginePayload(t *testing.T) {
 	if got == "" {
 		t.Fatal("expected non-empty auth header")
 	}
-	raw, err := base64.URLEncoding.DecodeString(got)
+	raw, err := base64.StdEncoding.DecodeString(got)
 	if err != nil {
-		t.Fatalf("auth header is not valid base64-url: %v", err)
+		t.Fatalf("auth header is not valid base64: %v", err)
 	}
 	var payload map[string]string
 	if err := json.Unmarshal(raw, &payload); err != nil {
@@ -80,9 +80,9 @@ func TestGHCRAuthHeader_RespectsRegistryEnv(t *testing.T) {
 	if got == "" {
 		t.Fatal("expected non-empty auth header")
 	}
-	raw, err := base64.URLEncoding.DecodeString(got)
+	raw, err := base64.StdEncoding.DecodeString(got)
 	if err != nil {
-		t.Fatalf("auth header is not valid base64-url: %v", err)
+		t.Fatalf("auth header is not valid base64: %v", err)
 	}
 	var payload map[string]string
 	if err := json.Unmarshal(raw, &payload); err != nil {
@@ -220,7 +220,7 @@ func TestGHCRAuthHeader_TrimsWhitespace(t *testing.T) {
 	t.Setenv("GHCR_USER", "  alice  ")
 	t.Setenv("GHCR_TOKEN", "\tfake-tok-value\n")
 	got := ghcrAuthHeader()
-	raw, _ := base64.URLEncoding.DecodeString(got)
+	raw, _ := base64.StdEncoding.DecodeString(got)
 	var payload map[string]string
 	_ = json.Unmarshal(raw, &payload)
 	if payload["username"] != "alice" {
@@ -0,0 +1,196 @@
+package handlers
+
+// approval_gate.go — server-side gate for destructive org operations.
+// (RFC docs/design/rfc-platform-agent.md — Phase 4)
+//
+// requireApproval is the choke point a destructive handler calls before
+// executing. It is the trust boundary: the platform-management MCP is a CLIENT
+// of these handlers, so enforcing here (not in the MCP) means anything holding
+// an org-admin token still goes through the gate. The flow:
+//
+//   - if a matching APPROVED + unconsumed approval exists, consume it (single-
+//     use) and let the operation proceed;
+//   - otherwise create (or reuse) a PENDING approval, broadcast it to the canvas
+//     (and escalate to the parent if any), and the handler returns HTTP 202 so a
+//     human can decide. The agent retries after approval and the gate passes.
+//
+// Matching is by (workspace_id, action, request_hash) where request_hash is a
+// stable digest of the operation + its context, so a retried op reuses its own
+// request instead of flooding the table, and an approval for "delete ws A"
+// cannot be replayed to "delete ws B".
+
+import (
+	"context"
+	"crypto/sha256"
+	"database/sql"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"log"
+	"net/http"
+	"os"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/approvals"
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
+	"github.com/gin-gonic/gin"
+)
+
+// approvalRequestHash is a stable digest of the gated operation. Go's
+// json.Marshal sorts map keys, so the same context always hashes the same.
+func approvalRequestHash(workspaceID, action string, contextMap map[string]interface{}) string {
+	cj, err := json.Marshal(contextMap)
+	if err != nil || cj == nil {
+		cj = []byte("{}")
+	}
+	sum := sha256.Sum256([]byte(workspaceID + "\x00" + action + "\x00" + string(cj)))
+	return hex.EncodeToString(sum[:])
+}
+
+// requireApproval returns (approved=true, consumedID) when a matching approval
+// exists and was just consumed; otherwise it creates/reuses a pending approval
+// and returns (false, pendingID). A non-nil error is a server error.
+func requireApproval(ctx context.Context, b events.EventEmitter, workspaceID string, action approvals.Action, reason string, contextMap map[string]interface{}) (bool, string, error) {
+	hash := approvalRequestHash(workspaceID, string(action), contextMap)
+
+	// 1. Atomically consume an approved + unconsumed request, if one exists.
+	//    The conditional UPDATE ... RETURNING makes consumption race-safe: two
+	//    concurrent destructive calls cannot both consume the same approval.
+	var consumedID string
+	err := db.DB.QueryRowContext(ctx, `
+		UPDATE approval_requests SET consumed_at = now()
+		WHERE id = (
+			SELECT id FROM approval_requests
+			WHERE workspace_id = $1 AND action = $2 AND request_hash = $3
+			  AND status = 'approved' AND consumed_at IS NULL
+			ORDER BY decided_at DESC NULLS LAST
+			LIMIT 1
+			FOR UPDATE SKIP LOCKED
+		)
+		RETURNING id
+	`, workspaceID, string(action), hash).Scan(&consumedID)
+	if err == nil {
+		return true, consumedID, nil
+	}
+	if !errors.Is(err, sql.ErrNoRows) {
+		return false, "", fmt.Errorf("consume approval: %w", err)
+	}
+
+	// 2. No usable approval — create a pending one, or reuse an existing pending
+	//    request for the same operation so retries don't flood the table.
+	cj, mErr := json.Marshal(contextMap)
+	if mErr != nil || cj == nil {
+		cj = []byte("{}")
+	}
+	var approvalID string
+	err = db.DB.QueryRowContext(ctx, `
+		WITH existing AS (
+			SELECT id FROM approval_requests
+			WHERE workspace_id = $1 AND action = $2 AND request_hash = $3 AND status = 'pending'
+			LIMIT 1
+		), ins AS (
+			INSERT INTO approval_requests (workspace_id, action, reason, context, request_hash)
+			SELECT $1, $2, $4, $5::jsonb, $3
+			WHERE NOT EXISTS (SELECT 1 FROM existing)
+			RETURNING id
+		)
+		SELECT id FROM ins UNION ALL SELECT id FROM existing LIMIT 1
+	`, workspaceID, string(action), hash, reason, string(cj)).Scan(&approvalID)
+	if err != nil {
+		return false, "", fmt.Errorf("create approval: %w", err)
+	}
+
+	// Broadcast to the canvas (the user-facing signal). For a platform agent the
+	// parent_id is NULL, so the requested-event on its own workspace IS the user
+	// prompt; ordinary workspaces also escalate to their parent.
+	//
+	// b may be nil: stateless handlers (e.g. org-token mint — OrgTokenHandler is
+	// an empty struct with no broadcaster) still gate; they just can't push a
+	// live canvas event. The pending approval row is persisted regardless, so
+	// the request is never lost — only the notification is skipped.
+	if b != nil {
+		if bErr := b.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
+			"approval_id": approvalID,
+			"action":      string(action),
+			"reason":      reason,
+		}); bErr != nil {
+			log.Printf("approval_gate: broadcast requested failed (ws=%s): %v", workspaceID, bErr)
+		}
+	}
+	var parentID *string
+	if pErr := db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID); pErr != nil {
+		log.Printf("approval_gate: parent lookup failed (ws=%s): %v", workspaceID, pErr)
+	}
+	if parentID != nil && b != nil {
+		if bErr := b.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
+			"approval_id":       approvalID,
+			"from_workspace_id": workspaceID,
+			"action":            string(action),
+			"reason":            reason,
+		}); bErr != nil {
+			log.Printf("approval_gate: broadcast escalated failed (ws=%s): %v", workspaceID, bErr)
+		}
+	}
+	return false, approvalID, nil
+}
+
+// gateDestructive runs requireApproval for a gated action and, when approval is
+// still pending, writes the 202 response and returns false (caller must stop).
+// Returns true when the caller may proceed (action consumed an approval).
+func gateDestructive(c *gin.Context, b events.EventEmitter, workspaceID string, action approvals.Action, reason string, contextMap map[string]interface{}) bool {
+	if !approvals.IsGated(action) {
+		return true
+	}
+	// Scope (RFC platform-agent Phase 4b). Wiring is a one-liner in each
+	// destructive handler; the activation policy lives here, centrally, so it is
+	// uniform and testable:
+	//   - default-OFF rollout flag, so the wiring is inert until an operator
+	//     enables it (mirrors the 3a/3c default-off design and protects existing
+	//     org-token automation from a surprise async-approval behaviour change);
+	//   - only callers holding an ORG token are gated. The platform agent runs
+	//     with MOLECULE_API_KEY=<org-admin token>, so the auth middleware sets
+	//     org_token_id. Ordinary workspace-token agents and human CP-session
+	//     operators (cp_session_actor — the approvers themselves) are NOT gated,
+	//     so normal operation is byte-identical. This realises the file-header
+	//     trust boundary ("anything holding an org-admin token still goes
+	//     through the gate") without gating everyone.
+	if !destructiveGateEnabled() || !callerHoldsOrgToken(c) {
+		return true
+	}
+	approved, approvalID, err := requireApproval(c.Request.Context(), b, workspaceID, action, reason, contextMap)
+	if err != nil {
+		log.Printf("gateDestructive: %v (ws=%s action=%s)", err, workspaceID, action)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "approval gate failed"})
+		return false
+	}
+	if !approved {
+		c.JSON(http.StatusAccepted, gin.H{
+			"status":      "pending_approval",
+			"approval_id": approvalID,
+			"action":      string(action),
+			"reason":      reason,
+		})
+		return false
+	}
+	return true
+}
+
+// destructiveGateEnabled is the default-off rollout flag for the org-level
+// destructive-op approval gate. Inert until an operator sets
+// MOLECULE_PLATFORM_APPROVAL_GATE=1 (or "true") — typically when the platform
+// agent is deployed to the org. Keeps 4b's wiring shipped-but-dormant, matching
+// the platform-agent feature's default-off posture (3a/3c).
+func destructiveGateEnabled() bool {
+	v := os.Getenv("MOLECULE_PLATFORM_APPROVAL_GATE")
+	return v == "1" || v == "true"
+}
+
+// callerHoldsOrgToken reports whether the request authenticated with an org
+// token (the auth middleware sets org_token_id, see middleware/wsauth_middleware.go).
+// The platform agent uses an org-admin token; ordinary workspace-token agents
+// and human CP sessions do not, so they bypass the gate entirely.
+func callerHoldsOrgToken(c *gin.Context) bool {
+	_, ok := c.Get("org_token_id")
+	return ok
+}
@@ -0,0 +1,137 @@
+//go:build integration
+// +build integration
+
+// approval_gate_integration_test.go — REAL Postgres gate for requireApproval.
+//
+// Run with:
+//
+//	INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \
+//	  go test -tags=integration ./internal/handlers/ -run Integration_RequireApproval -v
+//
+// Why this is NOT a sqlmock test
+// ------------------------------
+// The whole gate is about row state across calls: a pending request is created
+// once and reused (dedup), an approval is consumed exactly once (single-use via
+// the conditional UPDATE ... RETURNING), and a different operation context hashes
+// to a different request. sqlmock returns whatever the stub says; only a real
+// Postgres proves the consume-once semantics and the partial-index lookup.
+
+package handlers
+
+import (
+	"context"
+	"database/sql"
+	"testing"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/approvals"
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
+	"github.com/google/uuid"
+	_ "github.com/lib/pq"
+)
+
+func TestIntegration_RequireApproval_GateCycle(t *testing.T) {
+	url := requireIntegrationDBURL(t)
+	conn, err := sql.Open("postgres", url)
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	if err := conn.Ping(); err != nil {
+		t.Fatalf("ping: %v", err)
+	}
+	t.Cleanup(func() { conn.Close() })
+
+	// requireApproval + the broadcaster's structure_events write use the db.DB
+	// global; point it at the integration DB and restore afterwards.
+	prev := db.DB
+	db.DB = conn
+	t.Cleanup(func() { db.DB = prev })
+	setupTestRedis(t) // broadcaster publishes to db.RDB; miniredis backs it
+
+	ctx := context.Background()
+	b := newTestBroadcaster()
+
+	wsID := uuid.New().String()
+	t.Cleanup(func() {
+		_, _ = conn.ExecContext(ctx, `DELETE FROM approval_requests WHERE workspace_id = $1`, wsID)
+		_, _ = conn.ExecContext(ctx, `DELETE FROM workspaces WHERE id = $1`, wsID)
+	})
+	// A root workspace (parent_id NULL) — like the platform agent, it has no
+	// parent, so the gate's escalation target is the user/canvas. (This branch
+	// is off main and has no kind column; the gate is kind-agnostic.)
+	if _, err := conn.ExecContext(ctx, `
+		INSERT INTO workspaces (id, name, tier, status, runtime, parent_id)
+		VALUES ($1, 'Org Concierge', 0, 'online', 'claude-code', NULL)`, wsID); err != nil {
+		t.Fatalf("seed root workspace: %v", err)
+	}
+
+	action := approvals.ActionDeleteWorkspace
+	ctxA := map[string]interface{}{"target": "ws-A"}
+
+	// 1. First call → no approval yet → pending created.
+	ok, id1, err := requireApproval(ctx, b, wsID, action, "delete ws-A", ctxA)
+	if err != nil {
+		t.Fatalf("call 1: %v", err)
+	}
+	if ok {
+		t.Fatal("call 1: approved=true, want false (no approval exists yet)")
+	}
+
+	// 2. Same operation again → must REUSE the same pending row (dedup), not flood.
+	ok, id2, err := requireApproval(ctx, b, wsID, action, "delete ws-A", ctxA)
+	if err != nil {
+		t.Fatalf("call 2: %v", err)
+	}
+	if ok || id2 != id1 {
+		t.Fatalf("call 2: ok=%v id2=%s, want false and id2==id1(%s) (dedup)", ok, id2, id1)
+	}
+	var nPending int
+	if err := conn.QueryRowContext(ctx,
+		`SELECT count(*) FROM approval_requests WHERE workspace_id=$1 AND status='pending'`, wsID).Scan(&nPending); err != nil {
+		t.Fatalf("count pending: %v", err)
+	}
+	if nPending != 1 {
+		t.Fatalf("pending rows = %d, want 1 (dedup must not flood)", nPending)
+	}
+
+	// 3. A human approves it (simulating the Decide handler).
+	if _, err := conn.ExecContext(ctx,
+		`UPDATE approval_requests SET status='approved', decided_by='human', decided_at=now() WHERE id=$1`, id1); err != nil {
+		t.Fatalf("approve: %v", err)
+	}
+
+	// 4. Now the gate consumes the approval and lets the op proceed.
+	ok, consumedID, err := requireApproval(ctx, b, wsID, action, "delete ws-A", ctxA)
+	if err != nil {
+		t.Fatalf("call 4: %v", err)
+	}
+	if !ok || consumedID != id1 {
+		t.Fatalf("call 4: ok=%v consumedID=%s, want true and id1(%s)", ok, consumedID, id1)
+	}
+
+	// 5. Single-use: the SAME approval cannot be replayed — the next call is
+	//    pending again (a fresh request), not approved.
+	ok, id5, err := requireApproval(ctx, b, wsID, action, "delete ws-A", ctxA)
+	if err != nil {
+		t.Fatalf("call 5: %v", err)
+	}
+	if ok {
+		t.Fatal("call 5: approved=true — a consumed approval was replayed")
+	}
+	if id5 == id1 {
+		t.Fatal("call 5: reused the consumed request id; want a new pending request")
+	}
+
+	// 6. Context isolation: an approval for ws-A must not authorize ws-B.
+	//    Approve the ws-A request, then a ws-B op must still be pending.
+	if _, err := conn.ExecContext(ctx,
+		`UPDATE approval_requests SET status='approved', decided_at=now() WHERE id=$1`, id5); err != nil {
+		t.Fatalf("approve id5: %v", err)
+	}
+	ok, _, err = requireApproval(ctx, b, wsID, action, "delete ws-B", map[string]interface{}{"target": "ws-B"})
+	if err != nil {
+		t.Fatalf("call 6: %v", err)
+	}
+	if ok {
+		t.Fatal("call 6: ws-B proceeded on a ws-A approval — context isolation broken")
+	}
+}
@@ -0,0 +1,76 @@
+package handlers
+
+// Phase 4b — unit coverage for the gate's activation SCOPE: the default-off
+// rollout flag + org-token-only targeting. These exercise the short-circuit
+// paths that return "proceed" BEFORE requireApproval, so they need no DB. The
+// full flag-on + org-token + gated → 202 path is covered by the real-Postgres
+// approval_gate_integration_test.go.
+
+import (
+	"net/http/httptest"
+	"os"
+	"testing"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/approvals"
+	"github.com/gin-gonic/gin"
+)
+
+func TestDestructiveGateEnabled_DefaultOff(t *testing.T) {
+	os.Unsetenv("MOLECULE_PLATFORM_APPROVAL_GATE")
+	if destructiveGateEnabled() {
+		t.Fatal("gate must be OFF by default (no env)")
+	}
+	for _, v := range []string{"1", "true"} {
+		t.Setenv("MOLECULE_PLATFORM_APPROVAL_GATE", v)
+		if !destructiveGateEnabled() {
+			t.Errorf("%q must enable the gate", v)
+		}
+	}
+	t.Setenv("MOLECULE_PLATFORM_APPROVAL_GATE", "0")
+	if destructiveGateEnabled() {
+		t.Error(`"0" must keep the gate off`)
+	}
+}
+
+func TestCallerHoldsOrgToken(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	c, _ := gin.CreateTestContext(httptest.NewRecorder())
+	if callerHoldsOrgToken(c) {
+		t.Error("no org_token_id in context → must be false (workspace/CP caller)")
+	}
+	c.Set("org_token_id", "tok-abc")
+	if !callerHoldsOrgToken(c) {
+		t.Error("org_token_id set → must be true (platform-agent / org-admin caller)")
+	}
+}
+
+// gateDestructive must return true (proceed, no 202, no DB touch) whenever the
+// scope excludes the call: non-gated action, flag off, or non-org-token caller.
+func TestGateDestructive_ScopeShortCircuits(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	newCtx := func(orgToken bool) *gin.Context {
+		c, _ := gin.CreateTestContext(httptest.NewRecorder())
+		c.Request = httptest.NewRequest("DELETE", "/x", nil)
+		if orgToken {
+			c.Set("org_token_id", "tok")
+		}
+		return c
+	}
+
+	// flag OFF (default) + org-token + gated action → proceed.
+	os.Unsetenv("MOLECULE_PLATFORM_APPROVAL_GATE")
+	if !gateDestructive(newCtx(true), nil, "ws", approvals.ActionDeleteWorkspace, "r", nil) {
+		t.Error("flag off must proceed (gate dormant)")
+	}
+
+	// flag ON + NO org token (workspace agent / human CP session) → proceed.
+	t.Setenv("MOLECULE_PLATFORM_APPROVAL_GATE", "1")
+	if !gateDestructive(newCtx(false), nil, "ws", approvals.ActionDeleteWorkspace, "r", nil) {
+		t.Error("non-org-token caller must proceed (normal operation unchanged)")
+	}
+
+	// flag ON + org token + NON-gated action → proceed (IsGated short-circuit).
+	if !gateDestructive(newCtx(true), nil, "ws", approvals.Action("not_a_gated_action"), "r", nil) {
+		t.Error("non-gated action must proceed")
+	}
+}
@@ -0,0 +1,46 @@
+package handlers
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/approvals"
+	"github.com/gin-gonic/gin"
+)
+
+// TestGateDestructive_NonGatedPassesThrough verifies a non-gated action skips
+// the gate entirely (no DB access, no 202) so handlers whose action isn't in the
+// policy map behave exactly as before.
+func TestGateDestructive_NonGatedPassesThrough(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Request = httptest.NewRequest("POST", "/x", nil)
+
+	proceed := gateDestructive(c, newTestBroadcaster(), "ws-1",
+		approvals.Action("not_a_gated_action"), "noop", nil)
+
+	if !proceed {
+		t.Fatalf("non-gated action must proceed, got proceed=false (status %d)", w.Code)
+	}
+	if w.Code != http.StatusOK { // CreateTestContext default; nothing written
+		t.Errorf("non-gated action wrote a response (status %d), want none", w.Code)
+	}
+}
+
+// TestApprovalRequestHash_StableAndContextSensitive pins the two properties the
+// gate relies on: the same operation hashes identically across calls, and a
+// different context yields a different hash (so an approval can't be replayed
+// onto a different target).
+func TestApprovalRequestHash_StableAndContextSensitive(t *testing.T) {
+	a := approvalRequestHash("ws", "delete_workspace", map[string]interface{}{"target": "A", "n": 1})
+	aAgain := approvalRequestHash("ws", "delete_workspace", map[string]interface{}{"n": 1, "target": "A"})
+	b := approvalRequestHash("ws", "delete_workspace", map[string]interface{}{"target": "B", "n": 1})
+	if a != aAgain {
+		t.Errorf("hash not stable across equal contexts: %s vs %s", a, aAgain)
+	}
+	if a == b {
+		t.Errorf("hash not context-sensitive: target A and B collided (%s)", a)
+	}
+}
@@ -173,20 +173,8 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
 	// check_task_status returned status='queued' forever even after a
 	// real reply landed). messageId mirrors delegation_id so the
 	// platform's idempotency-key extraction also keys off the same id.
-	a2aBody, marshalErr := json.Marshal(map[string]interface{}{
-		"method": "message/send",
-		"params": map[string]interface{}{
-			"message": map[string]interface{}{
-				"role":      "user",
-				"messageId": delegationID,
-				// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251) —
-				// a `type`-keyed Part is dropped by the receiver's v0.3
-				// validator, silently losing the delegated task.
-				"parts":    []map[string]interface{}{{"kind": "text", "text": body.Task}},
-				"metadata": map[string]interface{}{"delegation_id": delegationID},
-			},
-		},
-	})
+	// Build A2A payload via helper so contract tests can assert the envelope shape.
+	a2aBody, marshalErr := buildDelegateA2ABody(delegationID, body.Task)
 	if marshalErr != nil {
 		log.Printf("Delegation %s: json.Marshal a2aBody failed: %v", delegationID, marshalErr)
 	}
@@ -374,6 +362,27 @@ func insertDelegationRow(ctx context.Context, c *gin.Context, sourceID string, b
 	return insertTrackingUnavailable
 }

+// buildDelegateA2ABody constructs the A2A JSON-RPC envelope for a delegation.
+// The returned shape is a schema-valid SendMessageRequest with role="user",
+// messageId, parts, and delegation metadata. Extracted to a pure function so
+// unit tests can assert the envelope contract without standing up HTTP or DB.
+func buildDelegateA2ABody(delegationID, task string) ([]byte, error) {
+	return json.Marshal(map[string]interface{}{
+		"method": "message/send",
+		"params": map[string]interface{}{
+			"message": map[string]interface{}{
+				"role":      "user",
+				"messageId": delegationID,
+				// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251) —
+				// a `type`-keyed Part is dropped by the receiver's v0.3
+				// validator, silently losing the delegated task.
+				"parts":    []map[string]interface{}{{"kind": "text", "text": task}},
+				"metadata": map[string]interface{}{"delegation_id": delegationID},
+			},
+		},
+	})
+}
+
 // executeDelegation runs in a goroutine — sends A2A and stores the result.
 // Updates delegation status through: pending → dispatched → received → completed/failed
 // delegationRetryDelay is the pause between the first failed proxy attempt
@@ -1762,3 +1762,74 @@ func TestListDelegations_LedgerFailedIncludesErrorDetail(t *testing.T) {
 		t.Errorf("unmet sqlmock expectations: %v", err)
 	}
 }
+
+// ---------- buildDelegateA2ABody: schema-valid SendMessageRequest ----------
+
+// TestBuildDelegateA2ABody_SchemaValidSendMessageRequest pins the contract
+// requested by issue #2251: delegate_task must produce a schema-valid A2A
+// SendMessageRequest with role="user", messageId, parts, and metadata.
+func TestBuildDelegateA2ABody_SchemaValidSendMessageRequest(t *testing.T) {
+	delegationID := "del-2251-test"
+	task := "write a contract test"
+
+	body, err := buildDelegateA2ABody(delegationID, task)
+	if err != nil {
+		t.Fatalf("buildDelegateA2ABody failed: %v", err)
+	}
+
+	var envelope map[string]interface{}
+	if err := json.Unmarshal(body, &envelope); err != nil {
+		t.Fatalf("body is not valid JSON: %v", err)
+	}
+
+	// Top-level envelope shape
+	if envelope["method"] != "message/send" {
+		t.Errorf("method = %v, want message/send", envelope["method"])
+	}
+
+	params, ok := envelope["params"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("params missing or not a map: %T", envelope["params"])
+	}
+
+	msg, ok := params["message"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("message missing or not a map: %T", params["message"])
+	}
+
+	// Issue #2251: role is required
+	if msg["role"] != "user" {
+		t.Errorf("message.role = %v, want \"user\"", msg["role"])
+	}
+
+	// messageId must be present and match delegationID
+	if msg["messageId"] != delegationID {
+		t.Errorf("message.messageId = %v, want %s", msg["messageId"], delegationID)
+	}
+
+	// parts must be a non-empty list with a text part
+	parts, ok := msg["parts"].([]interface{})
+	if !ok || len(parts) == 0 {
+		t.Fatalf("message.parts missing or empty: %T", msg["parts"])
+	}
+	firstPart, ok := parts[0].(map[string]interface{})
+	if !ok {
+		t.Fatalf("first part is not a map: %T", parts[0])
+	}
+	// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251)
+	if firstPart["kind"] != "text" {
+		t.Errorf("first part kind = %v, want text", firstPart["kind"])
+	}
+	if firstPart["text"] != task {
+		t.Errorf("first part text = %v, want %q", firstPart["text"], task)
+	}
+
+	// metadata.delegation_id must match
+	meta, ok := msg["metadata"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("metadata missing or not a map: %T", msg["metadata"])
+	}
+	if meta["delegation_id"] != delegationID {
+		t.Errorf("metadata.delegation_id = %v, want %s", meta["delegation_id"], delegationID)
+	}
+}
@@ -337,7 +337,7 @@ func TestRegister_ProvisionerURLPreserved(t *testing.T) {
 		WillReturnError(sql.ErrNoRows)

 	mock.ExpectExec("INSERT INTO workspaces").
-		WithArgs("ws-prov", "ws-prov", "http://localhost:8000", `{"name":"agent"}`, "push").
+		WithArgs("ws-prov", "ws-prov", "http://localhost:8000", `{"name":"agent"}`, "push", "").
 		WillReturnResult(sqlmock.NewResult(0, 1))

 	// DB returns provisioner URL (127.0.0.1) — should take precedence over agent-reported URL
@@ -450,6 +450,98 @@ func TestHeartbeat_DegradedRecovery(t *testing.T) {
 	}
 }

+// TestHeartbeat_ErrorRateDegrade_Guarded verifies the error_rate degrade path
+// carries the `AND status = 'online'` guard, preventing a racing heartbeat
+// from flipping a concurrently-removed workspace back to degraded.
+func TestHeartbeat_ErrorRateDegrade_Guarded(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	handler := NewRegistryHandler(broadcaster)
+
+	mock.ExpectQuery("SELECT COALESCE\\(current_task").
+		WithArgs("ws-degrade-guard").
+		WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
+	mock.ExpectExec("UPDATE workspaces SET").
+		WithArgs("ws-degrade-guard", 0.6, "", 1, 100, "").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	// Stale read: heartbeat started before CascadeDelete set status='removed'
+	mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
+		WithArgs("ws-degrade-guard").
+		WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
+
+	// Guarded UPDATE returns 0 rows because row is actually 'removed'
+	mock.ExpectExec("UPDATE workspaces SET status =.*AND status = 'online'").
+		WithArgs(models.StatusDegraded, "ws-degrade-guard").
+		WillReturnResult(sqlmock.NewResult(0, 0))
+
+	// Broadcast still fires (existing behaviour) — mock it so sqlmock passes
+	mock.ExpectExec("INSERT INTO structure_events").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	body := `{"workspace_id":"ws-degrade-guard","error_rate":0.6,"sample_error":"","active_tasks":1,"uptime_seconds":100}`
+	c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	handler.Heartbeat(c)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet expectations: %v", err)
+	}
+}
+
+// TestHeartbeat_DegradedRecovery_Guarded verifies the degraded→online recovery
+// path carries the `AND status = 'degraded'` guard, preventing a racing
+// heartbeat from flipping a concurrently-removed workspace back to online.
+func TestHeartbeat_DegradedRecovery_Guarded(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	handler := NewRegistryHandler(broadcaster)
+
+	mock.ExpectQuery("SELECT COALESCE\\(current_task").
+		WithArgs("ws-recover-guard").
+		WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
+	mock.ExpectExec("UPDATE workspaces SET").
+		WithArgs("ws-recover-guard", 0.05, "", 1, 100, "").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	// Stale read: heartbeat started before CascadeDelete set status='removed'
+	mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
+		WithArgs("ws-recover-guard").
+		WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("degraded"))
+
+	// Guarded UPDATE returns 0 rows because row is actually 'removed'
+	mock.ExpectExec("UPDATE workspaces SET status =.*AND status = 'degraded'").
+		WithArgs(models.StatusOnline, "ws-recover-guard").
+		WillReturnResult(sqlmock.NewResult(0, 0))
+
+	// Broadcast still fires (existing behaviour) — mock it so sqlmock passes
+	mock.ExpectExec("INSERT INTO structure_events").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	body := `{"workspace_id":"ws-recover-guard","error_rate":0.05,"sample_error":"","active_tasks":1,"uptime_seconds":100}`
+	c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	handler.Heartbeat(c)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet expectations: %v", err)
+	}
+}
+
 // ---------- a2a_proxy.go: Workspace has no URL (503 with status) ----------

 func TestProxyA2A_WorkspaceNoURL(t *testing.T) {
@@ -180,7 +180,7 @@ func TestRegisterHandler(t *testing.T) {

 	// Expect the upsert INSERT ... ON CONFLICT
 	mock.ExpectExec("INSERT INTO workspaces").
-		WithArgs("ws-123", "ws-123", "http://localhost:8000", `{"name":"test"}`, "push").
+		WithArgs("ws-123", "ws-123", "http://localhost:8000", `{"name":"test"}`, "push", "").
 		WillReturnResult(sqlmock.NewResult(0, 1))

 	// Expect the SELECT url query (for cache URL logic)
@@ -0,0 +1,122 @@
+//go:build integration
+// +build integration
+
+// kind_platform_root_integration_test.go — REAL Postgres gate for the
+// platform-agent participant kind (RFC docs/design/rfc-platform-agent.md).
+//
+// Run with:
+//
+//	INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \
+//	  go test -tags=integration ./internal/handlers/ -run Integration_PlatformKind -v
+//
+// CI: piggybacks on the handlers-postgres-integration workflow (path filter
+// includes workspace-server/internal/handlers/** and migrations/**).
+//
+// Why this is NOT a sqlmock test
+// ------------------------------
+// The invariant "a platform agent must be the org root (parent_id IS NULL),
+// which structurally also means at most one platform agent per org" is enforced
+// by the workspaces_platform_root_check CHECK constraint in migration
+// 20260606000000_workspaces_kind. sqlmock cannot execute DDL or evaluate a CHECK
+// constraint, so only a real Postgres can prove the constraint actually rejects
+// a non-root platform agent and accepts a root one. The Register handler's
+// isPlatformRootViolation()/409 path depends on this constraint firing.
+
+package handlers
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/google/uuid"
+	_ "github.com/lib/pq"
+)
+
+func integrationDB_PlatformKind(t *testing.T) *sql.DB {
+	t.Helper()
+	url := requireIntegrationDBURL(t)
+	conn, err := sql.Open("postgres", url)
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	if err := conn.Ping(); err != nil {
+		t.Fatalf("ping: %v", err)
+	}
+	t.Cleanup(func() { conn.Close() })
+	return conn
+}
+
+// TestIntegration_PlatformKind_RootAllowed_NonRootRejected proves the three
+// guarantees of the kind column against a real Postgres:
+//
+//  1. a fresh workspace defaults to kind='workspace';
+//  2. a root row (parent_id IS NULL) may be kind='platform';
+//  3. a non-root row (parent_id set) may NOT be kind='platform' — the
+//     workspaces_platform_root_check constraint rejects it (23514).
+func TestIntegration_PlatformKind_RootAllowed_NonRootRejected(t *testing.T) {
+	conn := integrationDB_PlatformKind(t)
+	ctx := context.Background()
+
+	prefix := fmt.Sprintf("itest-kind-%s", uuid.New().String()[:8])
+	cleanup := func() {
+		if _, err := conn.ExecContext(ctx,
+			`DELETE FROM workspaces WHERE name LIKE $1`, prefix+"%"); err != nil {
+			t.Logf("cleanup (non-fatal): %v", err)
+		}
+	}
+	t.Cleanup(cleanup)
+	cleanup() // pre-test hygiene in the shared integration DB
+
+	rootID := uuid.New().String()
+	childID := uuid.New().String()
+
+	// 1. Default kind is 'workspace' when the column is omitted on INSERT.
+	if _, err := conn.ExecContext(ctx, `
+		INSERT INTO workspaces (id, name, tier, runtime, status, parent_id)
+		VALUES ($1, $2, 2, 'claude-code', 'online', NULL)
+	`, rootID, prefix+"-root"); err != nil {
+		t.Fatalf("seed root: %v", err)
+	}
+	var gotKind string
+	if err := conn.QueryRowContext(ctx,
+		`SELECT kind FROM workspaces WHERE id = $1`, rootID).Scan(&gotKind); err != nil {
+		t.Fatalf("read kind: %v", err)
+	}
+	if gotKind != "workspace" {
+		t.Fatalf("default kind = %q, want \"workspace\"", gotKind)
+	}
+
+	// 2. The root row may become a platform agent.
+	if _, err := conn.ExecContext(ctx,
+		`UPDATE workspaces SET kind = 'platform' WHERE id = $1`, rootID); err != nil {
+		t.Fatalf("promote root to platform: unexpected error: %v", err)
+	}
+
+	// A child of the platform root (an ordinary workspace) inserts fine.
+	if _, err := conn.ExecContext(ctx, `
+		INSERT INTO workspaces (id, name, tier, runtime, status, parent_id)
+		VALUES ($1, $2, 2, 'claude-code', 'online', $3)
+	`, childID, prefix+"-child", rootID); err != nil {
+		t.Fatalf("seed child: %v", err)
+	}
+
+	// 3. The non-root child may NOT be a platform agent — the CHECK rejects it.
+	_, err := conn.ExecContext(ctx,
+		`UPDATE workspaces SET kind = 'platform' WHERE id = $1`, childID)
+	if err == nil {
+		t.Fatalf("non-root child accepted kind='platform' — constraint did not fire")
+	}
+	if !strings.Contains(err.Error(), "workspaces_platform_root_check") {
+		t.Fatalf("non-root platform rejection wanted workspaces_platform_root_check, got: %v", err)
+	}
+
+	// And the unknown-kind value is rejected by workspaces_kind_check.
+	_, err = conn.ExecContext(ctx,
+		`UPDATE workspaces SET kind = 'bogus' WHERE id = $1`, rootID)
+	if err == nil || !strings.Contains(err.Error(), "workspaces_kind_check") {
+		t.Fatalf("unknown kind wanted workspaces_kind_check rejection, got: %v", err)
+	}
+}
@@ -63,7 +63,11 @@ var (
 	providerRegistryErr      error
 )

-func providerRegistry() (*providers.Manifest, error) {
+// providerRegistry loads the embedded providers manifest once and caches it.
+// Defined as a variable (not a named function) so tests can swap in a mock
+// without restarting the process — required for fail-closed coverage of the
+// registry-unavailable path (workspace_provision_derive_test.go).
+var providerRegistry = func() (*providers.Manifest, error) {
 	providerRegistryOnce.Do(func() {
 		providerRegistryManifest, providerRegistryErr = providers.LoadManifest()
 		if providerRegistryErr != nil {
@@ -54,6 +54,55 @@ func mcpPost(t *testing.T, h *MCPHandler, workspaceID string, body interface{})
 	return w
 }

+// assertA2ASendMessageSchema validates that body is a schema-valid A2A
+// SendMessageRequest with role="user", messageId, and non-empty parts.
+// Issue #2251 contract test: delegate_task must always produce this shape.
+func assertA2ASendMessageSchema(t *testing.T, body []byte, wantTask string) {
+	t.Helper()
+	var envelope map[string]interface{}
+	if err := json.Unmarshal(body, &envelope); err != nil {
+		t.Fatalf("A2A body is not valid JSON: %v", err)
+	}
+	if envelope["jsonrpc"] != "2.0" {
+		t.Errorf("jsonrpc = %v, want 2.0", envelope["jsonrpc"])
+	}
+	if envelope["method"] != "message/send" {
+		t.Errorf("method = %v, want message/send", envelope["method"])
+	}
+
+	params, ok := envelope["params"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("params missing or not a map: %T", envelope["params"])
+	}
+	msg, ok := params["message"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("message missing or not a map: %T", params["message"])
+	}
+
+	if msg["role"] != "user" {
+		t.Errorf("message.role = %v, want \"user\"", msg["role"])
+	}
+	if msg["messageId"] == "" {
+		t.Error("message.messageId is empty")
+	}
+
+	parts, ok := msg["parts"].([]interface{})
+	if !ok || len(parts) == 0 {
+		t.Fatalf("message.parts missing or empty: %T", msg["parts"])
+	}
+	firstPart, ok := parts[0].(map[string]interface{})
+	if !ok {
+		t.Fatalf("first part is not a map: %T", parts[0])
+	}
+	// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251)
+	if firstPart["kind"] != "text" {
+		t.Errorf("first part kind = %v, want text", firstPart["kind"])
+	}
+	if firstPart["text"] != wantTask {
+		t.Errorf("first part text = %v, want %q", firstPart["text"], wantTask)
+	}
+}
+
 func expectCanCommunicateSiblings(mock sqlmock.Sqlmock, callerID, targetID, parentID string) {
 	mock.ExpectQuery(`SELECT id, parent_id FROM workspaces WHERE id = \$1`).
 		WithArgs(callerID).
@@ -209,9 +258,7 @@ func TestMCPHandler_DelegateTask_RoutesThroughPlatformA2AProxy(t *testing.T) {
 		if !logActivity {
 			t.Fatal("delegate_task should log through platform A2A proxy")
 		}
-		if !strings.Contains(string(body), "do work") {
-			t.Fatalf("A2A body missing task text: %s", string(body))
-		}
+		assertA2ASendMessageSchema(t, body, "do work")
 		return 200, []byte(`{"result":{"message":{"parts":[{"text":"done"}]}}}`), nil
 	}

@@ -244,7 +291,10 @@ func TestMCPHandler_DelegateTaskAsync_RoutesThroughPlatformA2AProxy(t *testing.T
 		WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg(), "pending").
 		WillReturnResult(sqlmock.NewResult(1, 1))
 	mock.ExpectExec(`UPDATE activity_logs`).
-		WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
+		WithArgs("queued", "", callerID, sqlmock.AnyArg()).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectExec(`UPDATE activity_logs`).
+		WithArgs("delivered", "", callerID, sqlmock.AnyArg()).
 		WillReturnResult(sqlmock.NewResult(0, 1))

 	called := make(chan struct{}, 1)
@@ -252,9 +302,7 @@ func TestMCPHandler_DelegateTaskAsync_RoutesThroughPlatformA2AProxy(t *testing.T
 		if workspaceID != targetID || proxyCallerID != callerID {
 			t.Fatalf("unexpected proxy route target=%q caller=%q", workspaceID, proxyCallerID)
 		}
-		if !strings.Contains(string(body), "async work") {
-			t.Fatalf("A2A body missing task text: %s", string(body))
-		}
+		assertA2ASendMessageSchema(t, body, "async work")
 		called <- struct{}{}
 		return 200, []byte(`{"result":{"message":{"parts":[{"text":"accepted"}]}}}`), nil
 	}
@@ -266,7 +314,7 @@ func TestMCPHandler_DelegateTaskAsync_RoutesThroughPlatformA2AProxy(t *testing.T
 	if err != nil {
 		t.Fatalf("delegate_task_async returned error: %v", err)
 	}
-	if !strings.Contains(out, `"status":"dispatched"`) {
+	if !strings.Contains(out, `"status":"queued"`) {
 		t.Fatalf("delegate_task_async response = %s", out)
 	}
 	waitGlobalAsyncForTest()
@@ -304,10 +352,8 @@ func TestMCPHandler_DelegateTask_WithAttachments(t *testing.T) {
 		if workspaceID != targetID || proxyCallerID != callerID {
 			t.Fatalf("unexpected proxy route target=%q caller=%q", workspaceID, proxyCallerID)
 		}
+		assertA2ASendMessageSchema(t, body, "review this video")
 		bodyStr := string(body)
-		if !strings.Contains(bodyStr, `"text":"review this video"`) {
-			t.Fatalf("A2A body missing task text: %s", bodyStr)
-		}
 		if !strings.Contains(bodyStr, `"kind":"video"`) {
 			t.Fatalf("A2A body missing video attachment kind: %s", bodyStr)
 		}
@@ -354,7 +400,10 @@ func TestMCPHandler_DelegateTaskAsync_WithAttachments(t *testing.T) {
 		WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg(), "pending").
 		WillReturnResult(sqlmock.NewResult(1, 1))
 	mock.ExpectExec(`UPDATE activity_logs`).
-		WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
+		WithArgs("queued", "", callerID, sqlmock.AnyArg()).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectExec(`UPDATE activity_logs`).
+		WithArgs("delivered", "", callerID, sqlmock.AnyArg()).
 		WillReturnResult(sqlmock.NewResult(0, 1))

 	called := make(chan []byte, 1)
@@ -380,12 +429,13 @@ func TestMCPHandler_DelegateTaskAsync_WithAttachments(t *testing.T) {
 	if err != nil {
 		t.Fatalf("delegate_task_async returned error: %v", err)
 	}
-	if !strings.Contains(out, `"status":"dispatched"`) {
+	if !strings.Contains(out, `"status":"queued"`) {
 		t.Fatalf("delegate_task_async response = %s", out)
 	}
 	waitGlobalAsyncForTest()
 	select {
 	case body := <-called:
+		assertA2ASendMessageSchema(t, body, "async work with image")
 		bodyStr := string(body)
 		if !strings.Contains(bodyStr, `"kind":"image"`) {
 			t.Fatalf("A2A body missing image attachment kind: %s", bodyStr)
@@ -411,7 +461,10 @@ func TestMCPHandler_DelegateTaskAsync_MarshalFailureDoesNotCallProxy(t *testing.
 		WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg(), "pending").
 		WillReturnResult(sqlmock.NewResult(1, 1))
 	mock.ExpectExec(`UPDATE activity_logs`).
-		WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
+		WithArgs("queued", "", callerID, sqlmock.AnyArg()).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectExec(`UPDATE activity_logs`).
+		WithArgs("failed", sqlmock.AnyArg(), callerID, sqlmock.AnyArg()).
 		WillReturnResult(sqlmock.NewResult(0, 1))

 	// Force the (otherwise near-impossible) marshal failure for the A2A body.
@@ -434,7 +487,7 @@ func TestMCPHandler_DelegateTaskAsync_MarshalFailureDoesNotCallProxy(t *testing.
 	if err != nil {
 		t.Fatalf("delegate_task_async returned error: %v", err)
 	}
-	if !strings.Contains(out, `"status":"dispatched"`) {
+	if !strings.Contains(out, `"status":"queued"`) {
 		t.Fatalf("delegate_task_async response = %s", out)
 	}

@@ -286,12 +286,12 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
 	delegationID := uuid.New().String()

 	// Issue #158: write delegation row so canvas Agent Comms tab shows the task text.
-	// Insert with 'dispatched' status since the goroutine won't update it.
+	// Insert with 'queued' status; goroutine updates to delivered or failed.
 	if err := insertMCPDelegationRow(ctx, h.database, callerID, targetID, delegationID, task); err != nil {
 		log.Printf("MCP delegate_task_async: failed to record delegation row: %v", err)
 		// Non-fatal: still fire the A2A call.
 	} else {
-		updateMCPDelegationStatus(ctx, h.database, callerID, delegationID, "dispatched", "")
+		updateMCPDelegationStatus(ctx, h.database, callerID, delegationID, "queued", "")
 	}

 	// Fire and forget in a detached goroutine. Use a background context so
@@ -321,21 +321,28 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
 			log.Printf("toolDelegateTask %s: json.Marshal a2aBody failed: %v", delegationID, marshalErr)
 			// Bail out: proceeding would call proxyA2ARequest with a
 			// nil/empty body, dispatching a malformed A2A request.
+			updateMCPDelegationStatus(bgCtx, h.database, callerID, delegationID, "failed", fmt.Sprintf("marshal_error: %v", marshalErr))
 			return
 		}

 		status, _, err := h.proxyA2ARequest(bgCtx, targetID, a2aBody, callerID, true)
 		if err != nil || status < 200 || status >= 300 {
+			var errorDetail string
 			if err != nil {
 				log.Printf("MCPHandler.delegate_task_async: A2A proxy to %s: %v", targetID, err)
+				errorDetail = fmt.Sprintf("target_offline: %v", err)
 			} else {
 				log.Printf("MCPHandler.delegate_task_async: A2A proxy to %s returned status %d", targetID, status)
+				errorDetail = fmt.Sprintf("http_status: %d", status)
 			}
+			updateMCPDelegationStatus(bgCtx, h.database, callerID, delegationID, "failed", errorDetail)
 			return
 		}
+
+		updateMCPDelegationStatus(bgCtx, h.database, callerID, delegationID, "delivered", "")
 	})

-	return fmt.Sprintf(`{"task_id":%q,"status":"dispatched","target_id":%q}`, delegationID, targetID), nil
+	return fmt.Sprintf(`{"task_id":%q,"status":"queued","target_id":%q}`, delegationID, targetID), nil
 }

 func (h *MCPHandler) toolCheckTaskStatus(ctx context.Context, callerID string, args map[string]interface{}) (string, error) {
@@ -226,17 +226,27 @@ func (h *MemoriesHandler) Commit(c *gin.Context) {
 		Source: contract.MemorySourceUser,
 	})
 	if err != nil {
-		log.Printf("Commit memory error (plugin): %v", err)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store memory"})
-		return
-	}
-	memoryID := resp.ID
+			// Server-side log ONLY. The client response below is the generic
+			// 500 — the underlying plugin error must NOT leak to the HTTP
+			// response body (clients have no business seeing the memory
+			// plugin's internal error class, message, or stack; the same
+			// discipline as the #2392 leak fix). We include enough context
+			// here for an operator to diagnose the failure from the server
+			// log: workspace id, requested scope, the resolved v2 namespace,
+			// the concrete Go error type (for log-aggregator filtering via
+			// `err_class=...`), and the quoted error message (preserves
+			// trailing whitespace / special chars that %v would munge).
+			log.Printf(
+				"Commit memory plugin error: workspace=%s scope=%s namespace=%s err_class=%T err=%q",
+				workspaceID, body.Scope, nsName, err, err,
+			)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store memory"})
+			return
+		}
+		memoryID := resp.ID

-	// #767 Audit: write a GLOBAL memory audit log entry for forensic replay.
-	// Records a SHA-256 hash of the content — never plaintext — so the audit
-	// trail can prove what was written without leaking sensitive values.
-	// Failure is non-fatal: a logging error must not roll back a successful write.
-	if body.Scope == "GLOBAL" {
+		// #767 Audit: write a GLOBAL memory audit log entry for forensic replay.
+		// Records a SHA-256 hash of the content — never plaintext — so the audit
 		// Hash the sanitised content so the audit trail reflects what was
 		// actually persisted (not the raw, potentially secret-bearing input).
 		sum := sha256.Sum256([]byte(content))
@@ -177,7 +177,7 @@ func isEnvIdentPart(c byte) bool {
 	return isEnvIdentStart(c) || (c >= '0' && c <= '9')
 }

-// loadWorkspaceEnv reads the org root .env and the workspace-specific .env
+// loadWorkspaceEnv reads the org root .env and the workspace-specific .env files.
 // (workspace overrides org root). Used by both secret injection and channel
 // config expansion.
 //
--- a/Show More
+++ b/Show More