docs(openapi): apply Five-Axis review fixes to management spec

Verified each against the authoritative handler source (molecule-core workspace-server + molecule-controlplane) before editing: 1. tenantAdminToken: http/bearer -> apiKey header X-Molecule-Admin-Token. authenticateTenant (controlplane workspace_provision.go) reads that header, NOT Authorization, and derives org from the token (SELECT org_id ... WHERE admin_token=$1). Removed orgRoutingHeaderId from the DELETE /api/v1/workspaces/{workspace_id} security — no X-Molecule-Org-Id is read on deprovision. 2. ProvisionStatus.stage: added `failed` (emitted by orgs.go on failed/deprovisioning/deprovisioned). Existing launching/installing/ starting/configuring_https/ready all confirmed emitted by orgs_progress.go + estimateBootProgress — none trimmed. 3. GET /workspaces/{id}: set security: [] — router.go registers it outside every auth group (intentionally open for canvas-node self- polling). Dropped the now-inapplicable 401. 4. Multi-period budget shape: added `budget_limits` (canonical) + legacy `budget_limit` to PatchBudgetRequest, and `periods` (+ PeriodBudget) to BudgetResponse, matching budget.go budgetResponse/PatchBudget. 5. GET tenant llm-billing-mode already modeled (handler serves GET+PUT) — no change needed; verified. 6. Added prune=true destructive note (only literal "true" permanently deletes, internal#734) and the CP-admin /api/v1/admin/workspaces/{id}/llm-billing-mode GET+PUT pair (cpAdminBearer, requires ?org_slug=). redocly lint clean under both recommended and recommended-strict. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
docs(openapi): add OpenAPI 3.1 management spec (SSOT) + README
2026-05-31 22:29:07 -07:00 · 2026-05-31 20:40:36 -07:00 · 2026-05-31 23:50:53 +00:00 · 2026-05-31 16:05:55 -07:00 · 2026-05-31 19:52:21 +00:00 · 2026-05-31 12:38:13 -07:00
190 changed files with 19056 additions and 3637 deletions
@@ -385,8 +385,12 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
    contexts = set(protection.get("status_check_contexts") or [])

    # ----- F1: job exists in CI but not under sentinel.needs -----
+    # Post-#1766 contract: the sentinel may deliberately have no `needs:`
+    # and instead poll path-relevant statuses dynamically. In that case
+    # F1 is a false positive — skip it. F1b (typos in existing needs)
+    # is naturally skipped when needs is empty.
    missing_from_needs = sorted(jobs - needs)
-    if missing_from_needs:
+    if missing_from_needs and needs:
        findings.append(
            "F1 — jobs in ci.yml NOT under sentinel `needs:` "
            "(sentinel doesn't gate them):\n"
@@ -512,8 +516,11 @@ def render_body(branch: str, findings: list[str], debug: dict) -> str:
            "",
            "## Resolution",
            "",
-            "- **F1 / F1b**: add the missing job to `all-required.needs:` "
-            "in `.gitea/workflows/ci.yml`, or remove the stale entry.",
+            "- **F1 / F1b**: if the sentinel job has a `needs:` block, add "
+            "the missing job to it in `.gitea/workflows/ci.yml`, or remove "
+            "the stale entry. If the sentinel deliberately has no `needs:` "
+            "(path-aware polling sentinel per post-#1766 contract), this "
+            "finding is expected and F1 is skipped.",
            "- **F2**: rename the protection context to match an emitter, "
            "or remove it from `status_check_contexts` "
            "(PATCH `/api/v1/repos/{owner}/{repo}/branch_protections/{branch}`).",
@@ -641,6 +641,15 @@ def main(argv: list[str] | None = None) -> int:

    base_workflows = workflows_at_sha(BASE_SHA)
    head_workflows = workflows_at_sha(HEAD_SHA)
+    # Ignore workflow files that are identical on both sides — old branches
+    # that haven't rebased onto main carry stale copies of workflows that
+    # were updated later. Comparing those stale copies against the current
+    # base produces false-positive "flips".
+    base_workflows = {
+        p: t for p, t in base_workflows.items()
+        if p in head_workflows and head_workflows[p] != t
+    }
+    head_workflows = {p: t for p, t in head_workflows.items() if p in base_workflows}
    flips = detect_flips(base_workflows, head_workflows)

    if not flips:
@@ -90,6 +90,15 @@ API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
 # match by exact title without parsing.
 TITLE_PREFIX = "[main-red]"

+# Contexts that are scheduled or non-required — their pending/failure
+# state should not block stale-issue closeout (mc#1789).
+SCHEDULED_CONTEXT_PATTERNS = (
+    "Staging SaaS smoke",
+    "Continuous synthetic E2E",
+    "main-red-watchdog",
+    "ci-arm64-advisory",
+)
+
 # Settling window (seconds) between initial red detection and the
 # pre-file recheck. The recheck filters out the two largest false-
 # positive classes seen in mc#1597..1630 (task #394, 2026-05-21):
@@ -265,6 +274,11 @@ def get_combined_status(sha: str) -> dict:
    return body


+def _entry_state(s: dict) -> str:
+    """Per-entry status key in Gitea 1.22.6 is `status`; fall back to `state`."""
+    return s.get("status") or s.get("state") or ""
+
+
 def is_red(status: dict) -> tuple[bool, list[dict]]:
    """Return (is_red, failed_statuses).

@@ -312,9 +326,6 @@ def is_red(status: dict) -> tuple[bool, list[dict]]:
    # "no per-context entries were in a red state" fallback even when
    # the combined-state correctly flagged red. See
    # `feedback_smoke_test_vendor_truth_not_shape_match`.
-    def _entry_state(s: dict) -> str:
-        return s.get("status") or s.get("state") or ""
-
    def _is_cancel_cascade(s: dict) -> bool:
        """status=3 entry per Gitea 1.22.6 description-string contract.
        Match exactly (after strip) — substring match would catch
@@ -353,6 +364,15 @@ def title_for(sha: str) -> str:
    return f"{TITLE_PREFIX} {REPO}: {sha[:10]}"


+def _is_scheduled_context(context: str) -> bool:
+    """Return True if `context` is a known scheduled/non-required job.
+
+    These contexts run on a schedule and should not block stale-issue
+    closeout when main's required CI has recovered (mc#1789).
+    """
+    return any(pattern.lower() in context.lower() for pattern in SCHEDULED_CONTEXT_PATTERNS)
+
+
 def list_open_red_issues() -> list[dict]:
    """All open issues whose title starts with `[main-red] {repo}: `.

@@ -362,23 +382,34 @@ def list_open_red_issues() -> list[dict]:
    file-or-update path to POST a duplicate — exactly the regression
    class the helper-raises contract closes.

-    Gitea issue search returns at most 50/page; we only need open
-    `[main-red]` issues which are by design ≤ 1 at any time per repo,
-    so a single page is enough.
+    Pagination is exhausted (mc#1789). The old "by design ≤ 1" invariant
+    was false — backlog can exceed 50 open issues.
    """
-    _, results = api(
-        "GET",
-        f"/repos/{OWNER}/{NAME}/issues",
-        query={"state": "open", "type": "issues", "limit": "50"},
-    )
-    if not isinstance(results, list):
-        raise ApiError(
-            f"issue search returned non-list body (got {type(results).__name__})"
-        )
    prefix = f"{TITLE_PREFIX} {REPO}: "
-    return [i for i in results if isinstance(i, dict)
+    all_issues: list[dict] = []
+    page = 1
+    limit = 50
+    while True:
+        _, results = api(
+            "GET",
+            f"/repos/{OWNER}/{NAME}/issues",
+            query={"state": "open", "type": "issues", "limit": str(limit), "page": str(page)},
+        )
+        if not isinstance(results, list):
+            raise ApiError(
+                f"issue search returned non-list body (got {type(results).__name__})"
+            )
+        matched = [
+            i for i in results
+            if isinstance(i, dict)
            and isinstance(i.get("title"), str)
-            and i["title"].startswith(prefix)]
+            and i["title"].startswith(prefix)
+        ]
+        all_issues.extend(matched)
+        if len(results) < limit:
+            break
+        page += 1
+    return all_issues


 def find_open_issue_for_sha(sha: str) -> dict | None:
@@ -574,6 +605,151 @@ def file_or_update_red(
        sys.stderr.write(f"::warning::label '{RED_LABEL}' not found on repo\n")


+def close_stale_red_issues(
+    current_sha: str,
+    current_status: dict,
+    *,
+    dry_run: bool = False,
+) -> int:
+    """Close open [main-red] issues whose specific failing contexts have
+    all recovered on `current_sha`, even though `main` is still red for
+    other reasons (mc#1789).
+
+    When main stays red across consecutive SHAs for *different* causes,
+    `close_open_red_issues_for_other_shas` never fires (it only runs when
+    main is green). This function prevents stale issues from accumulating
+    indefinitely by comparing per-context recovery across SHAs.
+
+    An issue is considered stale when every context that was in a failed
+    state on the issue's SHA is now either `success` on the current HEAD
+    or absent (workflow removed / renamed). Issues whose original SHA had
+    a combined-red-with-no-detail (empty statuses list) are skipped — we
+    cannot verify recovery without per-context data.
+
+    Returns the number of issues closed.
+    """
+    open_red = list_open_red_issues()
+    if not open_red:
+        return 0
+
+    current_statuses = current_status.get("statuses") or []
+    closed = 0
+
+    for issue in open_red:
+        title = issue.get("title", "")
+        prefix = f"{TITLE_PREFIX} {REPO}: "
+        if not title.startswith(prefix):
+            continue
+        short_sha = title[len(prefix):]
+        if short_sha == current_sha[:10]:
+            continue
+
+        # Query status for the old SHA. Short SHA should resolve; if it
+        # doesn't (GC'd, force-pushed, ambiguous), skip conservatively.
+        try:
+            old_status = get_combined_status(short_sha)
+        except ApiError:
+            continue
+
+        old_red, old_failed = is_red(old_status)
+        if not old_red:
+            # Open issue for a now-green SHA — close it via the normal path.
+            num = issue.get("number")
+            if isinstance(num, int):
+                comment = (
+                    f"Commit `{short_sha}` is no longer red. Closing as the "
+                    f"failure context has recovered or expired."
+                )
+                if dry_run:
+                    print(
+                        f"::notice::[dry-run] would close issue #{num} "
+                        f"({title}) — old SHA is now green"
+                    )
+                    closed += 1
+                    continue
+                api(
+                    "POST",
+                    f"/repos/{OWNER}/{NAME}/issues/{num}/comments",
+                    body={"body": comment},
+                )
+                api(
+                    "PATCH",
+                    f"/repos/{OWNER}/{NAME}/issues/{num}",
+                    body={"state": "closed"},
+                )
+                print(
+                    f"::notice::Closed stale main-red issue #{num} "
+                    f"(old SHA {short_sha} is now green)"
+                )
+                closed += 1
+            continue
+
+        if not old_failed:
+            # Combined red with no per-context detail — can't verify recovery.
+            continue
+
+        # Verify every failed context from the old SHA has recovered.
+        all_recovered = True
+        recovered_ctxs: list[str] = []
+        still_failing_ctxs: list[str] = []
+        for s in old_failed:
+            ctx = s.get("context", "")
+            if not ctx:
+                continue
+            current_match = None
+            for cs in current_statuses:
+                if isinstance(cs, dict) and cs.get("context") == ctx:
+                    current_match = cs
+                    break
+            if current_match is None:
+                recovered_ctxs.append(ctx)
+            elif _entry_state(current_match) == "success":
+                recovered_ctxs.append(ctx)
+            else:
+                all_recovered = False
+                still_failing_ctxs.append(ctx)
+
+        if not all_recovered:
+            continue
+
+        num = issue.get("number")
+        if not isinstance(num, int):
+            continue
+
+        comment = (
+            f"The failing contexts from this SHA (`{short_sha}`) have "
+            f"recovered on current HEAD `{current_sha[:10]}`: "
+            f"{', '.join(recovered_ctxs)}. "
+            f"Main is still red for other reasons; see the current "
+            f"`[main-red]` issue for `{current_sha[:10]}`."
+        )
+        if dry_run:
+            print(
+                f"::notice::[dry-run] would close stale issue #{num} "
+                f"({title}) — contexts recovered"
+            )
+            closed += 1
+            continue
+
+        api(
+            "POST",
+            f"/repos/{OWNER}/{NAME}/issues/{num}/comments",
+            body={"body": comment},
+        )
+        api(
+            "PATCH",
+            f"/repos/{OWNER}/{NAME}/issues/{num}",
+            body={"state": "closed"},
+        )
+        print(
+            f"::notice::Closed stale main-red issue #{num} "
+            f"(contexts recovered at {current_sha[:10]})"
+        )
+        closed += 1
+
+    return closed
+
+
 def close_open_red_issues_for_other_shas(
    current_sha: str,
    *,
@@ -744,24 +920,68 @@ def run_once(*, dry_run: bool = False) -> int:
        print(f"::warning::main is RED at {sha[:10]} on {WATCH_BRANCH}: "
              f"{len(failed)} failed context(s)")
        file_or_update_red(sha, failed, debug, dry_run=dry_run)
+        stale_closed = close_stale_red_issues(sha, recheck_status, dry_run=dry_run)
+        if stale_closed:
+            emit_loki_event("main_red_stale_closed", sha, [])
+            print(
+                f"::notice::Closed {stale_closed} stale main-red issue(s) "
+                f"whose contexts recovered at {sha[:10]}"
+            )
    else:
-        # Green (or pending — pending is treated as not-red so we don't
-        # spam during the post-merge CI window). Close any stale issues
-        # from earlier SHAs only when we're actually green; pending
-        # means CI hasn't finished and the prior issue might still be
-        # accurate.
-        if status.get("state") == "success":
+        # Green or pending-with-no-real-failures. Close stale issues
+        # from earlier SHAs when required CI has recovered.
+        #
+        # mc#1789: main often sits at combined `pending` because
+        # scheduled/non-required contexts (Staging SaaS smoke,
+        # Continuous synthetic E2E, main-red-watchdog itself,
+        # ci-arm64-advisory) are still running. We close stale issues
+        # as long as no *non-scheduled* context has failed and no
+        # *non-scheduled* context is still pending — i.e. required CI
+        # is effectively green.
+        #
+        # The success-only gate is preserved for the canonical green
+        # path; the extended check below only fires when combined is
+        # `pending` but all required work is done.
+        combined_state = status.get("state")
+        if combined_state == "success":
+            should_close = True
+            close_reason = "GREEN"
+        else:
+            statuses = status.get("statuses") or []
+            non_scheduled_pending = [
+                s for s in statuses
+                if isinstance(s, dict)
+                and (_entry_state(s) == "pending")
+                and not _is_scheduled_context(s.get("context", ""))
+            ]
+            non_scheduled_failed = [
+                s for s in statuses
+                if isinstance(s, dict)
+                and (_entry_state(s) in {"failure", "error"})
+                and not _is_scheduled_context(s.get("context", ""))
+            ]
+            # Cancel-cascade already filtered by is_red(); red=False
+            # here means no real failures. We additionally check that
+            # no non-scheduled context is still pending.
+            should_close = not non_scheduled_pending and not non_scheduled_failed
+            close_reason = "pending-but-required-green"
+
+        if should_close:
            closed = close_open_red_issues_for_other_shas(sha, dry_run=dry_run)
            if closed:
                emit_loki_event(
                    "main_returned_to_green", sha,
                    [],
                )
-            print(f"::notice::main is GREEN at {sha[:10]} on {WATCH_BRANCH} "
-                  f"(closed {closed} stale issue(s))")
+            print(
+                f"::notice::main is {close_reason} at {sha[:10]} on {WATCH_BRANCH} "
+                f"(closed {closed} stale issue(s))"
+            )
        else:
-            print(f"::notice::main is PENDING at {sha[:10]} on {WATCH_BRANCH} "
-                  f"(combined state={status.get('state')!r}; no action)")
+            print(
+                f"::notice::main has pending-or-failed required CI at {sha[:10]} "
+                f"on {WATCH_BRANCH} (combined state={combined_state!r}; no action)"
+            )
    return 0


@@ -208,6 +208,61 @@ def _raise_for_redeploy_result(status: int, body: dict, slugs: list[str]) -> Non
        )


+def rollout_stragglers(enumerated: list[str], results: list[dict]) -> list[str]:
+    """Return every enumerated tenant NOT proven on the target build.
+
+    A straggler is any tenant the rollout was supposed to cover that the
+    CP could not verify is running the target image tag — whether it
+    errored, was skipped, or SSM-succeeded onto the wrong image
+    (internal#724). CP marks each per-tenant result row with
+    ``verified_on_target`` (the REDEPLOY_RUNNING_IMAGE docker-inspect
+    proof). A tenant enumerated for the rollout but absent from the
+    result set (no batch ever ran it) is also a straggler — that is the
+    exact agents-team silent-skip class.
+
+    Backward-compat: an OLDER CP that doesn't emit ``verified_on_target``
+    yet returns rows without the key. Treat a missing key as verified so
+    this surfacing degrades to the previous (ok-based) behavior against an
+    un-upgraded CP, rather than failing every deploy spuriously. Once the
+    CP fix is deployed the key is always present and real stragglers are
+    caught.
+    """
+
+    verified: set[str] = set()
+    for row in results:
+        if str(row.get("ssm_status") or "") == "DryRun":
+            continue
+        slug = str(row.get("slug") or "").strip()
+        if not slug:
+            continue
+        # Missing key (old CP) => assume verified; present key is authoritative.
+        if "verified_on_target" not in row or row.get("verified_on_target"):
+            verified.add(slug)
+    return sorted(s for s in dict.fromkeys(enumerated) if s not in verified)
+
+
+def assert_full_coverage(enumerated: list[str], aggregate: dict, dry_run: bool) -> None:
+    """Fail the rollout if any enumerated tenant is not on the target build.
+
+    This is the no-silent-skip gate (internal#724). A dry run proves
+    nothing landed, so coverage is not asserted for it.
+    """
+
+    if dry_run:
+        return
+    stragglers = rollout_stragglers(enumerated, aggregate.get("results") or [])
+    if stragglers:
+        msg = (
+            f"incomplete rollout: {len(stragglers)} tenant(s) not verified on target "
+            f"after redeploy-fleet: {', '.join(stragglers)} "
+            f"(enumerated {len(set(enumerated))})"
+        )
+        aggregate["ok"] = False
+        aggregate["error"] = msg
+        aggregate["stragglers"] = stragglers
+        raise RolloutFailed(msg, aggregate)
+
+
 def execute_scoped_rollout(
    plan: dict,
    token: str,
@@ -254,6 +309,14 @@ def execute_scoped_rollout(
            aggregate["error"] = str(exc)
            raise RolloutFailed(str(exc), aggregate) from exc

+    # No-silent-skip coverage gate (internal#724): every enumerated tenant
+    # must be PROVEN on the target build. A per-tenant HTTP-200/ok response
+    # is not proof — a tenant that SSM-succeeded but stayed on the old tag,
+    # or one enumerated but never batched, is a straggler. Surfacing it as
+    # a RolloutFailed makes the deploy step exit non-zero instead of
+    # silently reporting success (the exact agents-team failure mode).
+    assert_full_coverage(all_slugs, aggregate, dry_run)
+
    return aggregate


@@ -306,12 +306,15 @@ for U in $CANDIDATES; do
      exit 0
      ;;
    403)
-      # Token owner is not in the team being probed; the API refuses to
-      # confirm membership. This is the RFC#324 follow-up token-scope gap.
-      # Fail closed — never grant approval on a 403; surface clearly.
-      echo "::error::team-probe for ${U} in ${TEAM} returned 403 (token owner not in ${TEAM} team — RFC#324 token-scope follow-up). Cannot confirm membership; failing closed."
+      # Token owner is not in the team being probed; Gitea 1.22.6 refuses
+      # to confirm membership in this case. Do NOT hard-fail the gate on a
+      # 403 — doing so would fail the entire gate if ANY candidate triggers
+      # a 403, even when other valid team-members exist. Instead skip this
+      # candidate and continue checking others. If all candidates produce
+      # 403 (token owner can't query any of them) the final exit fires.
+      echo "::warning::team-probe for ${U} in ${TEAM} returned 403 (token owner not in ${TEAM} team — skipping; cannot confirm membership)"
      cat "$TEAM_PROBE_TMP" >&2
-      exit 1
+      continue
      ;;
    404)
      debug "${U} not a member of ${TEAM}"
@@ -636,8 +636,13 @@ def load_config(path: str) -> dict[str, Any]:
    dep by keeping the config shape constrained.
    """
    try:
+        # yaml is an optional dep; the canonical loader is used when available,
+        # but the SOP runs on runners that may not have PyYAML installed. The
+        # fallback _load_config_minimal covers the same config shape without
+        # requiring the dep, so the ignore is safe: if yaml loads, we use it;
+        # otherwise we fall back silently.
        import yaml  # type: ignore[import-not-found]
-        with open(path) as f:
+        with open(path, encoding="utf-8") as f:
            return yaml.safe_load(f)
    except ImportError:
        return _load_config_minimal(path)
@@ -651,13 +656,19 @@ def _load_config_minimal(path: str) -> dict[str, Any]:
    item map: scalars + lists of scalars. Does NOT support nested lists,
    YAML anchors, multi-doc, or flow style.
    """
-    with open(path) as f:
+    with open(path, encoding="utf-8") as f:
        lines = f.readlines()
    return _parse_minimal_yaml(lines)


-def _parse_minimal_yaml(lines: list[str]) -> dict[str, Any]:  # noqa: C901
-    """Hand-rolled subset parser. See _load_config_minimal docstring."""
+def _parse_minimal_yaml(lines: list[str]) -> dict[str, Any]:
+    """Hand-rolled subset parser. See _load_config_minimal docstring.
+
+    C901: function is necessarily long — it implements a finite-state YAML
+    subset (scalars, maps, lists of maps at fixed depth). No utility refactors
+    meaningfully reduce length without degrading readability. All branches
+    are exhaustively tested in test_parse_minimal_yaml.py.
+    """
    # Strip comments + blank lines but preserve indentation.
    cleaned: list[tuple[int, str]] = []
    for raw in lines:
@@ -1015,14 +1026,14 @@ def main(argv: list[str] | None = None) -> int:
            tid = client.resolve_team_id(args.owner, tn)
            if tid is None:
                # Try the list endpoint as a fallback.
-                code, data = client._req(  # noqa: SLF001
+                code, data = client._req(  # noqa: SLF001  # internal helper; called from loop in caller context
                    "GET", f"/orgs/{args.owner}/teams"
                )
                if code == 200 and isinstance(data, list):
                    for t in data:
                        if t.get("name") == tn:
                            tid = t.get("id")
-                            client._team_id_cache[(args.owner, tn)] = tid  # noqa: SLF001
+                            client._team_id_cache[(args.owner, tn)] = tid  # noqa: SLF001  # internal write-through cache
                            break
            if tid is not None:
                team_ids.append(tid)
@@ -33,7 +33,7 @@ def scenario() -> str:
    p = os.path.join(STATE_DIR, "scenario")
    if not os.path.isfile(p):
        return "T1_success"
-    with open(p) as f:
+    with open(p, encoding="utf-8") as f:
        return f.read().strip()


@@ -40,7 +40,7 @@ def scenario() -> str:
    p = os.path.join(STATE_DIR, "scenario")
    if not os.path.isfile(p):
        return "T1_pr_open"
-    with open(p) as f:
+    with open(p, encoding="utf-8") as f:
        return f.read().strip()


@@ -0,0 +1,176 @@
+import importlib.util
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+SCRIPT = Path(__file__).resolve().parents[1] / "ci-required-drift.py"
+spec = importlib.util.spec_from_file_location("ci_required_drift", SCRIPT)
+drift = importlib.util.module_from_spec(spec)
+sys.modules[spec.name] = drift
+spec.loader.exec_module(drift)
+
+# Module-level constants are loaded from env at import time; set them
+# explicitly so unit tests can import without the full env contract.
+drift.SENTINEL_JOB = "all-required"
+drift.CI_WORKFLOW_PATH = ".gitea/workflows/ci.yml"
+drift.AUDIT_WORKFLOW_PATH = ".gitea/workflows/audit-force-merge.yml"
+
+
+# ---------------------------------------------------------------------------
+# Helper fixtures
+# ---------------------------------------------------------------------------
+
+def _make_ci_doc(jobs: dict) -> dict:
+    return {"jobs": jobs}
+
+
+def _make_audit_doc(required_checks: list[str]) -> dict:
+    return {
+        "jobs": {
+            "audit": {
+                "steps": [
+                    {"env": {"REQUIRED_CHECKS": "\n".join(required_checks)}}
+                ]
+            }
+        }
+    }
+
+
+# ---------------------------------------------------------------------------
+# sentinel_needs
+# ---------------------------------------------------------------------------
+
+def test_sentinel_needs_returns_empty_when_absent():
+    doc = _make_ci_doc({"all-required": {"runs-on": "ubuntu-latest"}})
+    assert drift.sentinel_needs(doc) == set()
+
+
+def test_sentinel_needs_parses_list():
+    doc = _make_ci_doc(
+        {"all-required": {"needs": ["platform-build", "canvas-build"]}}
+    )
+    assert drift.sentinel_needs(doc) == {"platform-build", "canvas-build"}
+
+
+def test_sentinel_needs_parses_string():
+    doc = _make_ci_doc({"all-required": {"needs": "platform-build"}})
+    assert drift.sentinel_needs(doc) == {"platform-build"}
+
+
+# ---------------------------------------------------------------------------
+# ci_job_names / ci_jobs_all
+# ---------------------------------------------------------------------------
+
+def test_ci_job_names_excludes_sentinel_and_event_gated():
+    doc = _make_ci_doc(
+        {
+            "platform-build": {},
+            "canvas-build": {"if": "github.event_name == 'pull_request'"},
+            "main-push": {"if": "github.ref == 'refs/heads/main'"},
+            "all-required": {},
+        }
+    )
+    assert drift.ci_job_names(doc) == {"platform-build"}
+
+
+def test_ci_jobs_all_includes_event_gated():
+    doc = _make_ci_doc(
+        {
+            "platform-build": {},
+            "canvas-build": {"if": "github.event_name == 'pull_request'"},
+            "all-required": {},
+        }
+    )
+    assert drift.ci_jobs_all(doc) == {"platform-build", "canvas-build"}
+
+
+# ---------------------------------------------------------------------------
+# detect_drift — F1 / F1b with mocked I/O
+# ---------------------------------------------------------------------------
+
+SAMPLE_PROTECTION = {
+    "status_check_contexts": [
+        "CI / all-required (pull_request)",
+        "Secret scan / Scan diff for credential-shaped strings (pull_request)",
+    ]
+}
+
+
+def test_detect_drift_no_needs_sentinel_skips_f1():
+    """Post-#1766 contract: all-required has no needs: → F1 is a false positive."""
+    ci = _make_ci_doc(
+        {
+            "platform-build": {},
+            "canvas-build": {},
+            "all-required": {},
+        }
+    )
+    audit = _make_audit_doc(
+        [
+            "CI / all-required (pull_request)",
+            "Secret scan / Scan diff for credential-shaped strings (pull_request)",
+        ]
+    )
+
+    with patch.object(drift, "load_yaml", side_effect=[ci, audit]):
+        with patch.object(drift, "api", return_value=(200, SAMPLE_PROTECTION)):
+            findings, debug = drift.detect_drift("main")
+
+    assert findings == []
+    assert debug["sentinel_needs"] == []
+
+
+def test_detect_drift_typo_in_needs_triggers_f1b():
+    """F1b still catches typos when needs exists."""
+    ci = _make_ci_doc(
+        {
+            "platform-build": {},
+            "all-required": {"needs": ["platfom-build"]},  # typo
+        }
+    )
+    audit = _make_audit_doc(["CI / all-required (pull_request)"])
+
+    with patch.object(drift, "load_yaml", side_effect=[ci, audit]):
+        with patch.object(drift, "api", return_value=(200, SAMPLE_PROTECTION)):
+            findings, _ = drift.detect_drift("main")
+
+    assert any("F1b" in f for f in findings)
+    assert any("platfom-build" in f for f in findings)
+
+
+def test_detect_drift_missing_job_in_needs_triggers_f1():
+    """F1 still fires when needs is non-empty and jobs are missing."""
+    ci = _make_ci_doc(
+        {
+            "platform-build": {},
+            "canvas-build": {},
+            "all-required": {"needs": ["platform-build"]},
+        }
+    )
+    audit = _make_audit_doc(["CI / all-required (pull_request)"])
+
+    with patch.object(drift, "load_yaml", side_effect=[ci, audit]):
+        with patch.object(drift, "api", return_value=(200, SAMPLE_PROTECTION)):
+            findings, _ = drift.detect_drift("main")
+
+    assert any("F1 —" in f for f in findings)
+    assert any("canvas-build" in f for f in findings)
+    assert not any("F1b" in f for f in findings)
+
+
+def test_detect_drift_no_f1_when_needs_empty_even_with_jobs():
+    """Explicit regression guard: empty needs + existing jobs = no F1."""
+    ci = _make_ci_doc(
+        {
+            "platform-build": {},
+            "canvas-build": {},
+            "all-required": {"needs": []},
+        }
+    )
+    audit = _make_audit_doc(["CI / all-required (pull_request)"])
+
+    with patch.object(drift, "load_yaml", side_effect=[ci, audit]):
+        with patch.object(drift, "api", return_value=(200, SAMPLE_PROTECTION)):
+            findings, _ = drift.detect_drift("main")
+
+    assert not any("F1 —" in f for f in findings)
@@ -0,0 +1,283 @@
+import importlib.util
+import sys
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+SCRIPT = Path(__file__).resolve().parents[1] / "main-red-watchdog.py"
+spec = importlib.util.spec_from_file_location("main_red_watchdog", SCRIPT)
+wd = importlib.util.module_from_spec(spec)
+sys.modules[spec.name] = wd
+spec.loader.exec_module(wd)
+
+# Module-level constants are loaded from env at import time; set them
+# explicitly so unit tests can import without the full env contract.
+wd.GITEA_TOKEN = "fake-token"
+wd.GITEA_HOST = "git.example.com"
+wd.REPO = "molecule-ai/molecule-core"
+wd.OWNER = "molecule-ai"
+wd.NAME = "molecule-core"
+wd.WATCH_BRANCH = "main"
+wd.RED_LABEL = "tier:high"
+wd.API = "https://git.example.com/api/v1"
+
+
+# ---------------------------------------------------------------------------
+# _is_scheduled_context
+# ---------------------------------------------------------------------------
+
+def test_is_scheduled_context_matches_staging_saas_smoke():
+    assert wd._is_scheduled_context("Staging SaaS smoke") is True
+
+
+def test_is_scheduled_context_matches_case_insensitive():
+    assert wd._is_scheduled_context("continuous synthetic e2e") is True
+
+
+def test_is_scheduled_context_no_match_for_required_ci():
+    assert wd._is_scheduled_context("CI / all-required") is False
+
+
+# ---------------------------------------------------------------------------
+# _entry_state
+# ---------------------------------------------------------------------------
+
+def test_entry_state_prefers_status_over_state():
+    """Gitea 1.22.6 per-entry key is `status`; `state` is fallback."""
+    assert wd._entry_state({"status": "failure", "state": "success"}) == "failure"
+
+
+def test_entry_state_falls_back_to_state():
+    assert wd._entry_state({"state": "pending"}) == "pending"
+
+
+def test_entry_state_empty_when_neither_key_present():
+    assert wd._entry_state({"context": "foo"}) == ""
+
+
+# ---------------------------------------------------------------------------
+# is_red
+# ---------------------------------------------------------------------------
+
+def test_is_red_combined_failure_no_statuses():
+    """Combined failure with empty statuses[] still trips red."""
+    red, failed = wd.is_red({"state": "failure", "statuses": []})
+    assert red is True
+    assert failed == []
+
+
+def test_is_red_cancel_cascade_filtered():
+    """status=3 (cancelled) mapped to failure string must be filtered."""
+    status = {
+        "state": "failure",
+        "statuses": [
+            {"context": "CI / build", "status": "failure", "description": "Has been cancelled"},
+        ],
+    }
+    red, failed = wd.is_red(status)
+    assert red is False
+    assert failed == []
+
+
+def test_is_red_real_failure_not_filtered():
+    """Real failures with different descriptions are kept."""
+    status = {
+        "state": "failure",
+        "statuses": [
+            {"context": "CI / build", "status": "failure", "description": "Failing after 12s"},
+        ],
+    }
+    red, failed = wd.is_red(status)
+    assert red is True
+    assert len(failed) == 1
+    assert failed[0]["context"] == "CI / build"
+
+
+def test_is_red_uses_entry_state_not_top_level_state():
+    """Regression: per-entry key is `status`, not `state`."""
+    status = {
+        "state": "failure",
+        "statuses": [
+            # Only `status` present; pre-rev4 code read `state` and got None
+            {"context": "CI / test", "status": "failure"},
+        ],
+    }
+    red, failed = wd.is_red(status)
+    assert red is True
+    assert len(failed) == 1
+
+
+# ---------------------------------------------------------------------------
+# list_open_red_issues — pagination (mc#1789)
+# ---------------------------------------------------------------------------
+
+def test_list_open_red_issues_exhausts_pagination():
+    """Backlog can exceed 50 issues; all pages must be fetched."""
+    calls = []
+
+    def fake_api(method, path, **kwargs):
+        calls.append((method, path, kwargs))
+        query = (kwargs.get("query") or {})
+        page = int(query.get("page", "1"))
+        limit = int(query.get("limit", "50"))
+        # Page 1 returns full limit; page 2 returns partial → break
+        if page == 1:
+            return 200, [
+                {"title": f"[main-red] molecule-ai/molecule-core: sha{i:04d}"}
+                for i in range(limit)
+            ]
+        if page == 2:
+            return 200, [
+                {"title": "[main-red] molecule-ai/molecule-core: extra1"},
+                {"title": "[main-red] molecule-ai/molecule-core: extra2"},
+                {"title": " unrelated issue "},  # filtered out
+            ]
+        return 200, []
+
+    with patch.object(wd, "api", side_effect=fake_api):
+        issues = wd.list_open_red_issues()
+
+    assert len(issues) == 52  # 50 + 2 matched
+    titles = {i["title"] for i in issues}
+    assert "[main-red] molecule-ai/molecule-core: extra1" in titles
+    assert "[main-red] molecule-ai/molecule-core: extra2" in titles
+
+
+def test_list_open_red_issues_single_page():
+    """When results < limit, loop breaks after first page."""
+    def fake_api(method, path, **kwargs):
+        return 200, [
+            {"title": "[main-red] molecule-ai/molecule-core: abc123"},
+        ]
+
+    with patch.object(wd, "api", side_effect=fake_api):
+        issues = wd.list_open_red_issues()
+
+    assert len(issues) == 1
+
+
+# ---------------------------------------------------------------------------
+# run_once — close logic (mc#1789)
+# ---------------------------------------------------------------------------
+
+def test_run_once_green_closes_stale_issues(monkeypatch):
+    """Combined success → close stale issues."""
+    monkeypatch.setattr(wd, "get_head_sha", lambda b: "abc123")
+    monkeypatch.setattr(wd, "get_combined_status", lambda s: {"state": "success", "statuses": []})
+    monkeypatch.setattr(wd, "is_red", lambda s: (False, []))
+
+    closed = []
+
+    def capture_close(current_sha, *, dry_run=False, close_same_sha=False):
+        closed.append(current_sha)
+        return 1
+
+    monkeypatch.setattr(wd, "close_open_red_issues_for_other_shas", capture_close)
+    monkeypatch.setattr(wd, "emit_loki_event", lambda *a, **k: None)
+
+    assert wd.run_once(dry_run=True) == 0
+    assert closed == ["abc123"]
+
+
+def test_run_once_pending_scheduled_only_closes_stale_issues(monkeypatch):
+    """Combined pending, but only scheduled contexts pending → close stale."""
+    monkeypatch.setattr(wd, "get_head_sha", lambda b: "abc123")
+    monkeypatch.setattr(
+        wd, "get_combined_status",
+        lambda s: {
+            "state": "pending",
+            "statuses": [
+                {"context": "CI / all-required", "status": "success"},
+                {"context": "Staging SaaS smoke", "status": "pending"},
+            ],
+        }
+    )
+    monkeypatch.setattr(wd, "is_red", lambda s: (False, []))
+
+    closed = []
+
+    def capture_close(current_sha, *, dry_run=False, close_same_sha=False):
+        closed.append(current_sha)
+        return 1
+
+    monkeypatch.setattr(wd, "close_open_red_issues_for_other_shas", capture_close)
+    monkeypatch.setattr(wd, "emit_loki_event", lambda *a, **k: None)
+
+    assert wd.run_once(dry_run=True) == 0
+    assert closed == ["abc123"]
+
+
+def test_run_once_pending_required_does_not_close(monkeypatch):
+    """Combined pending with a real required context still pending → no close."""
+    monkeypatch.setattr(wd, "get_head_sha", lambda b: "abc123")
+    monkeypatch.setattr(
+        wd, "get_combined_status",
+        lambda s: {
+            "state": "pending",
+            "statuses": [
+                {"context": "CI / all-required", "status": "pending"},
+                {"context": "Staging SaaS smoke", "status": "success"},
+            ],
+        }
+    )
+    monkeypatch.setattr(wd, "is_red", lambda s: (False, []))
+
+    closed = []
+
+    def capture_close(current_sha, *, dry_run=False, close_same_sha=False):
+        closed.append(current_sha)
+        return 0
+
+    monkeypatch.setattr(wd, "close_open_red_issues_for_other_shas", capture_close)
+    monkeypatch.setattr(wd, "emit_loki_event", lambda *a, **k: None)
+
+    assert wd.run_once(dry_run=True) == 0
+    assert closed == []
+
+
+def test_run_once_failure_does_not_close(monkeypatch):
+    """Real failure in non-scheduled context → no close."""
+    monkeypatch.setattr(wd, "get_head_sha", lambda b: "abc123")
+    monkeypatch.setattr(
+        wd, "get_combined_status",
+        lambda s: {
+            "state": "failure",
+            "statuses": [
+                {"context": "CI / all-required", "status": "failure"},
+            ],
+        }
+    )
+    # is_red will return True, so we enter the red path, not the green close path
+    monkeypatch.setattr(wd, "is_red", lambda s: (True, s.get("statuses", [])))
+    monkeypatch.setattr(wd, "time", MagicMock(sleep=lambda x: None))
+    monkeypatch.setattr(wd, "emit_loki_event", lambda *a, **k: None)
+
+    filed = []
+
+    def capture_file(sha, failed, debug, *, dry_run=False):
+        filed.append(sha)
+
+    monkeypatch.setattr(wd, "file_or_update_red", capture_file)
+    monkeypatch.setattr(wd, "close_open_red_issues_for_other_shas", lambda *a, **k: 0)
+    monkeypatch.setattr(wd, "close_stale_red_issues", lambda *a, **k: 0)
+
+    assert wd.run_once(dry_run=True) == 0
+    assert filed == ["abc123"]
+
+
+# ---------------------------------------------------------------------------
+# title_for / find_open_issue_for_sha
+# ---------------------------------------------------------------------------
+
+def test_title_for_uses_short_sha():
+    assert wd.title_for("abcdef123456") == "[main-red] molecule-ai/molecule-core: abcdef1234"
+
+
+def test_find_open_issue_for_sha_matches_exact_title(monkeypatch):
+    fake_issue = {"title": "[main-red] molecule-ai/molecule-core: abc1234567", "number": 42}
+    monkeypatch.setattr(wd, "list_open_red_issues", lambda: [fake_issue])
+    assert wd.find_open_issue_for_sha("abc1234567") == fake_issue
+
+
+def test_find_open_issue_for_sha_returns_none_when_no_match(monkeypatch):
+    monkeypatch.setattr(wd, "list_open_red_issues", lambda: [])
+    assert wd.find_open_issue_for_sha("abc123") is None
@@ -355,3 +355,134 @@ def test_rollout_from_plan_file_writes_partial_response_on_failure(tmp_path):
    assert response_path.read_text(encoding="utf-8").strip()
    assert '"ok": false' in response_path.read_text(encoding="utf-8")
    assert '"slug": "hongming"' in response_path.read_text(encoding="utf-8")
+
+
+# ──────────────────────────────────────────────────────────────────────
+# No-silent-skip coverage gate (internal#724)
+# ──────────────────────────────────────────────────────────────────────
+
+
+def test_rollout_stragglers_flags_tenant_not_on_target():
+    # b SSM-succeeded but its container is on the old tag → straggler.
+    stragglers = prod.rollout_stragglers(
+        ["a", "b", "c"],
+        [
+            {"slug": "a", "verified_on_target": True},
+            {"slug": "b", "verified_on_target": False, "running_image": "platform-tenant:staging-old"},
+            {"slug": "c", "verified_on_target": True},
+        ],
+    )
+    assert stragglers == ["b"]
+
+
+def test_rollout_stragglers_flags_enumerated_tenant_with_no_result():
+    # agents-team class: enumerated but no batch ever produced a row for it.
+    stragglers = prod.rollout_stragglers(
+        ["a", "agents-team"],
+        [{"slug": "a", "verified_on_target": True}],
+    )
+    assert stragglers == ["agents-team"]
+
+
+def test_rollout_stragglers_missing_key_is_backward_compatible():
+    # Older CP without verified_on_target → treat as verified (no spurious fail).
+    stragglers = prod.rollout_stragglers(
+        ["a", "b"],
+        [{"slug": "a", "healthz_ok": True}, {"slug": "b", "healthz_ok": True}],
+    )
+    assert stragglers == []
+
+
+def test_rollout_stragglers_ignores_dry_run_rows():
+    stragglers = prod.rollout_stragglers(
+        ["a"], [{"slug": "a", "ssm_status": "DryRun"}]
+    )
+    # dry-run row is skipped, so "a" has no verifying row → straggler.
+    assert stragglers == ["a"]
+
+
+def test_scoped_rollout_fails_when_a_tenant_stays_on_old_tag():
+    # Every per-tenant call returns ok=True, but agents-team is NOT
+    # verified_on_target. The rollout must still fail loudly — this is
+    # the exact "reported success, one tenant silently skipped" bug.
+    def fake_redeploy(_cp_url, _token, body):
+        rows = []
+        for slug in body["only_slugs"]:
+            rows.append({"slug": slug, "verified_on_target": slug != "agents-team"})
+        return 200, {"ok": True, "results": rows}
+
+    try:
+        prod.execute_scoped_rollout(
+            {
+                "cp_url": "https://api.moleculesai.app",
+                "body": {
+                    "target_tag": "staging-new",
+                    "batch_size": 5,
+                    "dry_run": False,
+                    "confirm": True,
+                },
+            },
+            token="secret",
+            list_slugs=lambda _u, _t, _b: ["reno-stars", "agents-team", "hongming"],
+            redeploy=fake_redeploy,
+            sleep=lambda _s: None,
+        )
+    except prod.RolloutFailed as exc:
+        assert "incomplete rollout" in str(exc)
+        assert exc.response["stragglers"] == ["agents-team"]
+        assert exc.response["ok"] is False
+    else:
+        raise AssertionError("expected an incomplete rollout to fail loudly")
+
+
+def test_scoped_rollout_passes_when_all_tenants_verified_on_target():
+    def fake_redeploy(_cp_url, _token, body):
+        return 200, {
+            "ok": True,
+            "results": [{"slug": s, "verified_on_target": True} for s in body["only_slugs"]],
+        }
+
+    aggregate = prod.execute_scoped_rollout(
+        {
+            "cp_url": "https://api.moleculesai.app",
+            "body": {
+                "target_tag": "staging-new",
+                "batch_size": 5,
+                "dry_run": False,
+                "confirm": True,
+            },
+        },
+        token="secret",
+        list_slugs=lambda _u, _t, _b: ["reno-stars", "agents-team", "hongming"],
+        redeploy=fake_redeploy,
+        sleep=lambda _s: None,
+    )
+    assert aggregate["ok"] is True
+    assert "stragglers" not in aggregate
+
+
+def test_scoped_rollout_dry_run_does_not_assert_coverage():
+    # A dry run proves nothing landed; coverage must NOT be asserted or
+    # every plan would fail.
+    def fake_redeploy(_cp_url, _token, body):
+        return 200, {
+            "ok": True,
+            "results": [{"slug": s, "ssm_status": "DryRun"} for s in body["only_slugs"]],
+        }
+
+    aggregate = prod.execute_scoped_rollout(
+        {
+            "cp_url": "https://api.moleculesai.app",
+            "body": {
+                "target_tag": "staging-new",
+                "batch_size": 5,
+                "dry_run": True,
+                "confirm": True,
+            },
+        },
+        token="secret",
+        list_slugs=lambda _u, _t, _b: ["a", "b"],
+        redeploy=fake_redeploy,
+        sleep=lambda _s: None,
+    )
+    assert aggregate["ok"] is True
@@ -54,5 +54,6 @@ jobs:
          # read-only by design (least-privilege).
          REQUIRED_CHECKS: |
            CI / all-required (pull_request)
-            sop-checklist / all-items-acked (pull_request)
+            E2E API Smoke Test / E2E API Smoke Test (pull_request)
+            Handlers Postgres Integration / Handlers Postgres Integration (pull_request)
        run: bash .gitea/scripts/audit-force-merge.sh
@@ -164,12 +164,20 @@ jobs:
        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true
      - if: ${{ needs.changes.outputs.platform == 'true' }}
-        name: Run tests with race detection and coverage
-        # Explicit timeout: cold runner cache causes OOM kills at ~4m39s on the
-        # full ./... suite with race detection + coverage. A 10m per-step timeout
-        # lets the suite complete on cold cache (~5-7m) while failing cleanly
-        # instead of OOM-killing. The job-level timeout (15m) is a backstop.
-        run: go test -race -timeout 10m -coverprofile=coverage.out ./...
+        name: Run tests with coverage (blocking gate)
+        # Removed -race from the blocking gate per #1184: cold runners
+        # take 13-25 min to compile with race instrumentation, exceeding
+        # the 10m step timeout and causing false failures. Race detection
+        # now runs as a non-blocking advisory step below.
+        run: go test -timeout 10m -coverprofile=coverage.out ./...
+
+      - if: ${{ needs.changes.outputs.platform == 'true' }}
+        name: Race detection (advisory, non-blocking)
+        # mc#1184: runs race detector as an advisory check so cold-runner
+        # compile-time spikes don't block merges. Failures here surface in
+        # the run log but do not fail the build.
+        run: go test -race -timeout 10m ./...
+        continue-on-error: true

      - if: ${{ needs.changes.outputs.platform == 'true' }}
        name: Per-file coverage report
@@ -349,6 +357,14 @@ jobs:
        name: Run E2E bash unit tests (no live infra)
        run: |
          bash tests/e2e/test_model_slug.sh
+          # molecule-core#1995 (#1994 follow-on): fail-direction proof for
+          # the A2A real-completion + byok-routing assertion helpers
+          # (lib/completion_assert.sh). Offline (no LLM, no network): it
+          # asserts an error-as-text payload FAILS the real-completion gate
+          # — the exact trap the historical shape-only `"kind":"text"`
+          # check missed. If a refactor weakens the gate to a shape check,
+          # this step goes red on every PR.
+          bash tests/e2e/test_completion_assert_unit.sh

      - if: ${{ needs.changes.outputs.scripts == 'true' }}
        name: Test ECR promote-tenant-image script (mock-driven, no live infra)
@@ -166,6 +166,10 @@ jobs:
      # canary path. The script picks the right blob shape based on
      # which key is non-empty.
      E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
+      # google-adk canary path — AI-Studio key (config model
+      # google_genai:gemini-2.5-pro). PROD disallows API keys (Vertex+ADC);
+      # the keyed path is CI-only. Dispatch with E2E_RUNTIME=google-adk.
+      E2E_GOOGLE_API_KEY: ${{ secrets.MOLECULE_STAGING_GOOGLE_API_KEY }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

@@ -217,6 +221,10 @@ jobs:
              required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
              required_secret_value="${E2E_OPENAI_API_KEY:-}"
              ;;
+            google-adk)
+              required_secret_name="MOLECULE_STAGING_GOOGLE_API_KEY"
+              required_secret_value="${E2E_GOOGLE_API_KEY:-}"
+              ;;
            *)
              echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
              required_secret_name=""
@@ -49,6 +49,7 @@ on:
      - 'workspace-server/internal/middleware/**'
      - 'workspace-server/internal/provisioner/**'
      - 'tests/e2e/test_staging_full_saas.sh'
+      - 'tests/e2e/lib/completion_assert.sh'
      - 'tests/e2e/lib/aws_leak_check.sh'
      - 'tests/e2e/test_aws_leak_check.sh'
      - '.gitea/workflows/e2e-staging-saas.yml'
@@ -61,6 +62,7 @@ on:
      - 'workspace-server/internal/middleware/**'
      - 'workspace-server/internal/provisioner/**'
      - 'tests/e2e/test_staging_full_saas.sh'
+      - 'tests/e2e/lib/completion_assert.sh'
      - 'tests/e2e/lib/aws_leak_check.sh'
      - 'tests/e2e/test_aws_leak_check.sh'
      - '.gitea/workflows/e2e-staging-saas.yml'
@@ -155,13 +157,18 @@ jobs:
      # E2E_RUNTIME=hermes or =codex via workflow_dispatch can still
      # exercise the OpenAI path.
      E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
+      # google-adk (operator-dispatched only) auths Gemini with an
+      # AI-Studio key. Org policy disallows API keys in PROD (Vertex+ADC
+      # there); CI uses the keyed AI-Studio path with config model
+      # google_genai:gemini-2.5-pro. Vertex remains the supported prod path.
+      E2E_GOOGLE_API_KEY: ${{ secrets.MOLECULE_STAGING_GOOGLE_API_KEY }}
      E2E_RUNTIME: ${{ github.event.inputs.runtime || 'claude-code' }}
      # Pin the model when running on the default claude-code path —
      # the per-runtime default ("sonnet") routes to direct Anthropic
      # and defeats the cost saving. Operators can override via the
      # workflow_dispatch flow (no input wired here yet — runtime
      # override is enough for ad-hoc).
-      E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || 'MiniMax-M2' }}
+      E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2' }}
      E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
      E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}

@@ -210,6 +217,10 @@ jobs:
              required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
              required_secret_value="${E2E_OPENAI_API_KEY:-}"
              ;;
+            google-adk)
+              required_secret_name="MOLECULE_STAGING_GOOGLE_API_KEY"
+              required_secret_value="${E2E_GOOGLE_API_KEY:-}"
+              ;;
            *)
              echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
              required_secret_name=""
@@ -7,10 +7,11 @@
 #   PR_NUMBER  — set via ${{ github.event.pull_request.number }} from the trigger
 #   POST_COMMENT — "true" to post/update comment on PR
 #
-# Gating logic (MVP signals 1,2,3,6):
+# Gating logic (MVP signals 1,2,3,4,6):
 #   1. Author-aware agent-tag comment scan
 #   2. REQUEST_CHANGES reviews state machine
 #   3. Staleness detection (SOP-12: review.commit_id != PR.head_sha + >1 working day)
+#   4. Branch divergence / scope-creep guard (base-sha vs target HEAD; mc#365)
 #   6. CI required-checks awareness
 #
 # Exit code: 0=CLEAR, 1=BLOCKED, 2=ERROR
@@ -3,11 +3,26 @@ name: Lint shellcheck (arm64 pilot)
 # Mac-CI dual-track pilot (#233). ADDITIVE / NOT REQUIRED.
 #
 # Validates the arm64 self-hosted lane (no docker.sock, no privileged
-# ops) before any required gate moves onto it. Until a Mac arm64 runner
-# is registered with the `arm64` label, this workflow sits PENDING —
-# that is FINE: `arm64` is NOT in branch_protections required contexts.
+# ops) before any required gate moves onto it.
 #
-# Pairs with internal#543 (RFC: Mac arm64 multi-arch runner-base).
+# Runner label mapping (2026-05-22 fix): the actual Mac mini runner
+# registered in this Gitea ships labels
+#   ["self-hosted","macos-self-hosted-arm64","arm64-darwin"]
+# — no plain `arm64`. The earlier `runs-on: [self-hosted, arm64]`
+# could not match any registered runner so every fire of this workflow
+# was assigned task_id=0 / runner_id=NULL → Gitea cancelled it. The
+# rows showed up as Cancelled in the action status feed (not Failed)
+# but the lane never actually ran. Workflow now selects on
+# `arm64-darwin` which is the canonical Mac-arm64 label per the
+# Mac mini's registration (per internal#494 capability-honest labels).
+#
+# If we later want to add a Linux-arm64 runner to the same lane, add
+# both labels to that runner's registration AND broaden the selector
+# here — don't rename `arm64-darwin` (it's Mac-specific by design and
+# `feedback_pc2_runner_labels_must_stay_narrow` rule applies).
+#
+# Pairs with internal#543 (RFC: Mac arm64 multi-arch runner-base) and
+# internal#494 (multi-arch runner-base capability-honest labels).
 # No paths: filter on purpose (feedback_path_filtered_workflow_cant_be_required).

 on:
@@ -82,7 +97,15 @@ jobs:
            echo "WARN: shellcheck binary not found — skipping (pilot mode)"
            exit 0
          fi
-          mapfile -t TARGETS < <(find .gitea/scripts -maxdepth 2 -type f -name '*.sh' | sort)
+          # NOTE: macOS ships Bash 3.2 (Apple license), no `mapfile`
+          # (Bash 4+ builtin). Mac mini runner empirically failed at
+          # `mapfile: command not found` (run 79275 / task 145654).
+          # Use the portable `while read` pattern instead — works on
+          # both Bash 3.2 (macOS) and Bash 4+ (Linux).
+          TARGETS=()
+          while IFS= read -r f; do
+            TARGETS+=("$f")
+          done < <(find .gitea/scripts -maxdepth 2 -type f -name '*.sh' | sort)
          if [ "${#TARGETS[@]}" -eq 0 ]; then
            echo "No .sh files found under .gitea/scripts — nothing to check"
            exit 0
@@ -327,13 +327,27 @@ jobs:
            echo ""
            echo "### Per-tenant result"
            echo ""
-            echo "| Slug | Phase | SSM Status | Exit | Healthz | Error present |"
-            echo "|------|-------|------------|------|---------|---------------|"
-            jq -r '.results[]? | "| \(.slug) | \(.phase) | \(.ssm_status // "-") | \(.ssm_exit_code) | \(.healthz_ok) | \((.error // "") != "") |"' "$HTTP_RESPONSE" || true
+            echo "| Slug | Phase | SSM Status | Exit | Healthz | On target | Error present |"
+            echo "|------|-------|------------|------|---------|-----------|---------------|"
+            jq -r '.results[]? | "| \(.slug) | \(.phase) | \(.ssm_status // "-") | \(.ssm_exit_code) | \(.healthz_ok) | \(.verified_on_target) | \((.error // "") != "") |"' "$HTTP_RESPONSE" || true
+            # internal#724: stragglers are tenants enumerated but not proven
+            # on the target build. Surface them loudly — a non-empty list
+            # means the rollout did NOT fully land.
+            STRAGGLERS="$(jq -r '(.stragglers // []) | join(", ")' "$HTTP_RESPONSE")"
+            if [ -n "$STRAGGLERS" ]; then
+              echo ""
+              echo "### ⚠ Stragglers (NOT on target tag \`$TARGET_TAG\`)"
+              echo ""
+              echo "\`$STRAGGLERS\`"
+            fi
          } >> "$GITHUB_STEP_SUMMARY"

          OK="$(jq -r '.ok' "$HTTP_RESPONSE")"
          if [ "$OK" != "true" ]; then
+            STRAGGLERS="$(jq -r '(.stragglers // []) | join(", ")' "$HTTP_RESPONSE")"
+            if [ -n "$STRAGGLERS" ]; then
+              echo "::error::incomplete rollout — tenants not on target tag $TARGET_TAG: $STRAGGLERS"
+            fi
            echo "::error::redeploy-fleet reported ok=false; production rollout halted."
            exit 1
          fi
@@ -0,0 +1,99 @@
+name: sync-providers-yaml
+
+# Cross-repo canonical↔synced-copy drift gate (internal#718 P2-A, CTO
+# 2026-05-27 "Distribution = SDK via codegen + verify-CI", multi-repo branch:
+# "codegen-checked-into-each-repo + verify-CI").
+#
+# The canonical provider-registry SSOT is molecule-controlplane
+# internal/providers/providers.yaml. molecule-core has NO Go module dependency
+# on controlplane, so instead of importing it we carry a SYNCED COPY at
+# workspace-server/internal/providers/providers.yaml and gate it.
+#
+# This workflow fetches the canonical providers.yaml from controlplane (via the
+# Gitea raw endpoint, read-only) and byte-compares it against core's synced
+# copy. RED if they differ — meaning the canonical moved and core's copy must be
+# re-synced (copy verbatim + `go generate ./...` + bump
+# canonicalProvidersYAMLSHA256 in sync_canonical_test.go).
+#
+# Pairs with:
+#   * sync_canonical_test.go — hermetic sha pin (catches a hand-edit of core's
+#     copy even with no network); runs in the normal `go test ./...`.
+#   * verify-providers-gen.yml — artifact ↔ synced-copy drift.
+#
+# ENFORCEMENT GATING: standalone workflow, NOT a job in ci.yml and NOT in
+# branch protection (same soak-then-promote posture as verify-providers-gen).
+# It is intentionally absent from ci.yml's job set so the ci-required-drift
+# sentinel does not fire on it.
+#
+# AUTH: uses AUTO_SYNC_TOKEN (the existing cross-repo read token used to sync
+# template/provider content from sibling repos). If the secret is absent the
+# job emits a clear ::warning:: and exits 0 — the hermetic sha pin in
+# sync_canonical_test.go is the always-on backstop, so a missing cross-repo
+# token degrades to "hand-edit still caught, live canonical drift not caught"
+# rather than a hard red that blocks unrelated PRs.
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+    paths:
+      - 'workspace-server/internal/providers/providers.yaml'
+      - '.gitea/workflows/sync-providers-yaml.yml'
+  push:
+    branches: [main, staging]
+    paths:
+      - 'workspace-server/internal/providers/providers.yaml'
+      - '.gitea/workflows/sync-providers-yaml.yml'
+  schedule:
+    # Daily at :23 — catch a canonical change in controlplane that landed
+    # without a paired core re-sync PR (off-zero to spread cron load).
+    - cron: '23 4 * * *'
+  workflow_dispatch:
+
+env:
+  GITHUB_SERVER_URL: https://git.moleculesai.app
+
+permissions:
+  contents: read
+
+concurrency:
+  group: sync-providers-yaml-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  compare:
+    name: Compare synced providers.yaml against controlplane canonical
+    runs-on: ubuntu-latest
+    timeout-minutes: 6
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Fetch canonical providers.yaml from controlplane and byte-compare
+        env:
+          AUTO_SYNC_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
+          API_ROOT: ${{ github.server_url }}/api/v1
+        run: |
+          set -euo pipefail
+          if [ -z "${AUTO_SYNC_TOKEN:-}" ]; then
+            echo "::warning::AUTO_SYNC_TOKEN secret missing — skipping the live cross-repo compare."
+            echo "The hermetic sha pin (sync_canonical_test.go) still gates hand-edits of core's copy."
+            echo "Provision AUTO_SYNC_TOKEN (read scope on molecule-controlplane) to enable live canonical-drift detection."
+            exit 0
+          fi
+          CANON_URL="${API_ROOT}/repos/molecule-ai/molecule-controlplane/raw/internal/providers/providers.yaml?ref=main"
+          # Use the /raw endpoint: it returns the file bytes directly. (The
+          # /contents endpoint ignores Accept: application/vnd.gitea.raw on
+          # Gitea 1.22.6 and returns the JSON+base64 envelope, which made this
+          # diff a permanent false RED.)
+          curl -fsS \
+            -H "Authorization: token ${AUTO_SYNC_TOKEN}" \
+            "${CANON_URL}" -o /tmp/canonical-providers.yaml
+          LOCAL=workspace-server/internal/providers/providers.yaml
+          if diff -u /tmp/canonical-providers.yaml "$LOCAL"; then
+            echo "OK — core's synced providers.yaml is byte-identical to the controlplane canonical."
+          else
+            echo "::error::core's synced providers.yaml DRIFTED from the controlplane canonical (SSOT)."
+            echo "Re-sync: copy controlplane internal/providers/providers.yaml verbatim over"
+            echo "  $LOCAL, run 'go generate ./...' in workspace-server/, and bump"
+            echo "  canonicalProvidersYAMLSHA256 in internal/providers/sync_canonical_test.go."
+            exit 1
+          fi
@@ -0,0 +1,89 @@
+name: verify-providers-gen
+
+# Provider-registry SSOT enforcement gate — molecule-core side (internal#718
+# P2-A, CTO 2026-05-27 "Distribution = SDK via codegen + verify-CI").
+#
+# The canonical schema SSOT is molecule-controlplane
+# internal/providers/providers.yaml. molecule-core carries a SYNCED COPY at
+# workspace-server/internal/providers/providers.yaml (kept in sync by the
+# companion sync-providers-yaml.yml gate), and cmd/gen-providers emits the
+# checked-in Go projection workspace-server/internal/providers/gen/registry_gen.go.
+#
+# This workflow regenerates the artifact into the working tree and fails RED if
+# it differs from what is committed — catching BOTH:
+#   * a providers.yaml (synced-copy) change that wasn't followed by `go generate ./...`, and
+#   * a hand-edit of the generated artifact (it carries a DO NOT EDIT header).
+#
+# It is the molecule-core mirror of molecule-controlplane's verify-providers-gen
+# workflow. Together with sync-providers-yaml (canonical↔synced-copy drift) it
+# closes the codegen-checked-into-each-repo + verify-CI loop the RFC mandates.
+#
+# ENFORCEMENT GATING (deliberate, per dev-SOP "implementation gating"):
+# this is a STANDALONE workflow, NOT a job inside ci.yml, and is NOT yet in any
+# branch-protection status_check_contexts. Rationale (identical to the CP P0
+# rollout):
+#   * It runs + reports RED on every PR/push immediately (visible signal).
+#   * It is intentionally absent from ci.yml's job set so the ci-required-drift
+#     sentinel (jobs ↔ branch-protection ↔ audit-env) does NOT fire on it, and
+#     from branch protection (turning it into a hard merge gate has blast radius
+#     — operator GO required, same pattern as sop-tier-check / verify-providers-gen
+#     on controlplane). Promote it into branch protection in a follow-up once
+#     P2 has soaked.
+# Until then it behaves like secret-scan / block-internal-paths: a standalone
+# advisory-to-hard gate the author is expected to keep green.
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+  push:
+    branches: [main, staging]
+
+env:
+  GITHUB_SERVER_URL: https://git.moleculesai.app
+
+permissions:
+  contents: read
+
+concurrency:
+  group: verify-providers-gen-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  verify:
+    name: Regenerate providers artifact and fail on drift
+    runs-on: ubuntu-latest
+    timeout-minutes: 8
+    defaults:
+      run:
+        working-directory: workspace-server
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
+        with:
+          go-version: 'stable'
+          cache: true
+          cache-dependency-path: workspace-server/go.sum
+
+      - name: Verify generated artifact is in sync with providers.yaml
+        run: |
+          set -euo pipefail
+          # -check regenerates in memory and byte-compares against the
+          # checked-in artifact; exit 1 (RED) on any drift. This is the
+          # single source of the gate's verdict — the same code path
+          # `go test ./cmd/gen-providers` exercises.
+          go run ./cmd/gen-providers -check
+
+      - name: Belt-and-braces — regenerate in place and assert clean tree
+        run: |
+          set -euo pipefail
+          # Independent confirmation that does not trust the -check path:
+          # actually write the artifact and assert git sees no change. If
+          # this and the step above ever disagree, the gate is suspect.
+          go generate ./...
+          if ! git diff --quiet -- internal/providers/gen/registry_gen.go; then
+            echo "::error::workspace-server/internal/providers/gen/registry_gen.go drifted from providers.yaml."
+            echo "Run 'go generate ./...' (or 'go run ./cmd/gen-providers') in workspace-server/ and commit the result."
+            git --no-pager diff -- internal/providers/gen/registry_gen.go | head -80
+            exit 1
+          fi
+          echo "OK — generated providers artifact is in sync with the schema SSOT."
@@ -34,60 +34,20 @@ interface TemplateSpec {
  providers?: string[];
 }

-interface HermesProvider {
-  id: string;
-  label: string;
-  envVar: string;
-  defaultModel: string;
-  models: string[];
-}
-
-const DEFAULT_LLM_MODELS: SelectorModel[] = [
-  { id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: [] },
-  { id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
-  { id: "kimi-k2-turbo-preview", name: "Kimi K2 Turbo Preview", required_env: ["KIMI_API_KEY"] },
-  { id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", required_env: ["ANTHROPIC_API_KEY"] },
-  { id: "sonnet", name: "Claude Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
-];
-const DEFAULT_PLATFORM_MODEL = DEFAULT_LLM_MODELS[0];
 const DEFAULT_RUNTIME = "claude-code";
 const RUNTIME_OPTIONS = [
  { value: "claude-code", label: "Claude Code" },
  { value: "codex", label: "OpenAI Codex CLI" },
+  { value: "google-adk", label: "Google ADK" },
  { value: "hermes", label: "Hermes" },
  { value: "openclaw", label: "OpenClaw" },
 ];
-const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "hermes", "openclaw"]);
+const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "google-adk", "hermes", "openclaw"]);
 const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
 const DEFAULT_HEADLESS_ROOT_GB = 30;
 const DEFAULT_DISPLAY_INSTANCE_TYPE = "t3.xlarge";
 const DEFAULT_DISPLAY_ROOT_GB = 80;

-// All providers supported by Hermes runtime via providers.resolve_provider().
-// `defaultModel` is the slug injected into the workspace provision request
-// when the user picks this provider — template-hermes's derive-provider.sh
-// maps the prefix back to the provider name at install time, so this is
-// the canonical handshake. `models` are additional suggestions surfaced in
-// the datalist so the user can pick a different size without typing the
-// whole slug.
-export const HERMES_PROVIDERS: HermesProvider[] = [
-  { id: "anthropic",  label: "Anthropic (Claude)",    envVar: "ANTHROPIC_API_KEY",  defaultModel: "anthropic/claude-sonnet-4-5",   models: ["anthropic/claude-opus-4-5", "anthropic/claude-sonnet-4-5", "anthropic/claude-haiku-4-5"] },
-  { id: "openai",     label: "OpenAI",                envVar: "OPENAI_API_KEY",     defaultModel: "openai/gpt-4o",                 models: ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/o3-mini"] },
-  { id: "openrouter", label: "OpenRouter",            envVar: "OPENROUTER_API_KEY", defaultModel: "openrouter/auto",               models: ["openrouter/auto", "openrouter/anthropic/claude-sonnet-4", "openrouter/meta-llama/llama-3.3-70b"] },
-  { id: "xai",        label: "xAI (Grok)",            envVar: "XAI_API_KEY",        defaultModel: "xai/grok-4",                    models: ["xai/grok-4", "xai/grok-4-mini"] },
-  { id: "gemini",     label: "Google Gemini",         envVar: "GEMINI_API_KEY",     defaultModel: "gemini/gemini-2.5-pro",         models: ["gemini/gemini-2.5-pro", "gemini/gemini-2.5-flash"] },
-  { id: "qwen",       label: "Qwen (Alibaba)",        envVar: "QWEN_API_KEY",       defaultModel: "alibaba/qwen3-max",             models: ["alibaba/qwen3-max", "alibaba/qwen3-coder"] },
-  { id: "glm",        label: "GLM (Zhipu AI)",        envVar: "GLM_API_KEY",        defaultModel: "zai/glm-4.6",                   models: ["zai/glm-4.6", "zai/glm-4.5-air"] },
-  { id: "kimi",       label: "Kimi (Moonshot)",       envVar: "KIMI_API_KEY",       defaultModel: "kimi-coding/kimi-k2",           models: ["kimi-coding/kimi-k2", "kimi-coding/kimi-k1.5"] },
-  { id: "minimax",    label: "MiniMax",               envVar: "MINIMAX_API_KEY",    defaultModel: "minimax/MiniMax-M2.7",          models: ["minimax/MiniMax-M2.7", "minimax/MiniMax-M2.7-highspeed", "minimax/MiniMax-M1"] },
-  { id: "deepseek",   label: "DeepSeek",              envVar: "DEEPSEEK_API_KEY",   defaultModel: "deepseek/deepseek-chat",        models: ["deepseek/deepseek-chat", "deepseek/deepseek-reasoner"] },
-  { id: "groq",       label: "Groq",                  envVar: "GROQ_API_KEY",       defaultModel: "openrouter/groq/llama-3.3-70b", models: ["openrouter/groq/llama-3.3-70b"] },
-  { id: "mistral",    label: "Mistral",               envVar: "MISTRAL_API_KEY",    defaultModel: "openrouter/mistralai/mistral-large", models: ["openrouter/mistralai/mistral-large"] },
-  { id: "together",   label: "Together AI",           envVar: "TOGETHER_API_KEY",   defaultModel: "openrouter/meta-llama/llama-3.3-70b", models: ["openrouter/meta-llama/llama-3.3-70b"] },
-  { id: "fireworks",  label: "Fireworks AI",          envVar: "FIREWORKS_API_KEY",  defaultModel: "openrouter/meta-llama/llama-3.3-70b", models: ["openrouter/meta-llama/llama-3.3-70b"] },
-  { id: "hermes",     label: "Hermes / Nous (legacy)", envVar: "HERMES_API_KEY",    defaultModel: "nousresearch/Hermes-3-Llama-3.1-405B", models: ["nousresearch/Hermes-3-Llama-3.1-405B", "nousresearch/Hermes-4-14B"] },
-];
-
 export function CreateWorkspaceButton() {
  const [open, setOpen] = useState(false);
  const [name, setName] = useState("");
@@ -107,32 +67,20 @@ export function CreateWorkspaceButton() {
  // filter below. Same data source ConfigTab uses (PR #2454). When the
  // selected template declares `runtime_config.providers` in its
  // config.yaml, the modal surfaces only those providers in the
-  // <select>. Empty/missing list falls back to the full HERMES_PROVIDERS
-  // catalog so older templates without the field keep working.
+  // <select>. Provider/model options are derived from template models.
  const [templateSpecs, setTemplateSpecs] = useState<TemplateSpec[]>([]);
  // External-runtime path: skip docker provision, mint a workspace_auth_token,
  // and surface the connection snippet in a modal after create. When
-  // isExternal is true the template / model / hermes-provider fields are
-  // hidden (they're meaningless for BYO-compute agents).
+  // isExternal is true the template and model fields are hidden (they're
+  // meaningless for BYO-compute agents).
  const [isExternal, setIsExternal] = useState(false);
  const [externalRuntime, setExternalRuntime] = useState("external");
  const [externalConnection, setExternalConnection] =
    useState<ExternalConnectionInfo | null>(null);

-  // Hermes-specific state
-  const [hermesProvider, setHermesProvider] = useState("anthropic");
-  const [hermesApiKey, setHermesApiKey] = useState("");
-  // Model slug is sent to CP as `model` and plumbed to the workspace EC2
-  // as HERMES_DEFAULT_MODEL env var. template-hermes's derive-provider.sh
-  // reads the prefix (`minimax/…`, `anthropic/…`) to set
-  // HERMES_INFERENCE_PROVIDER at install time. Missing model → provider
-  // falls back to "auto" and hermes picks its compiled-in default
-  // (Anthropic), which 401s if the user's key is for a different
-  // provider. Hence: require model when template=hermes.
-  const [hermesModel, setHermesModel] = useState("");
  const [llmSelection, setLLMSelection] = useState<SelectorValue>({
-    providerId: "platform|",
-    model: "moonshot/kimi-k2.6",
+    providerId: "",
+    model: "",
    envVars: [],
  });
  const [llmSecret, setLLMSecret] = useState("");
@@ -194,10 +142,7 @@ export function CreateWorkspaceButton() {
  const handleRuntimeChange = useCallback((nextRuntime: string) => {
    setRuntime(nextRuntime);
    setTemplate("");
-    setHermesProvider("anthropic");
-    setHermesApiKey("");
-    setHermesModel("");
-    setLLMSelection({ providerId: "platform|", model: DEFAULT_PLATFORM_MODEL.id, envVars: [] });
+    setLLMSelection({ providerId: "", model: "", envVars: [] });
    setLLMSecret("");
  }, []);

@@ -209,9 +154,12 @@ export function CreateWorkspaceButton() {
    return templateSpecs.find((s) => s.id === template) ?? null;
  }, [template, templateSpecs]);
  const selectedRuntimeTemplateSpec = useMemo<TemplateSpec | null>(() => (
-    templateSpecs.find((s) => s.id === runtime && BASE_RUNTIME_TEMPLATE_IDS.has(s.id)) ?? null
+    templateSpecs.find((s) => {
+      if (!BASE_RUNTIME_TEMPLATE_IDS.has(s.id)) return false;
+      const specRuntime = (s.runtime ?? s.id).trim().toLowerCase();
+      return s.id === runtime || specRuntime === runtime;
+    }) ?? null
  ), [runtime, templateSpecs]);
-  const isHermes = runtime === "hermes";
  const visibleTemplateSpecs = useMemo(
    () => templateSpecs.filter((spec) => {
      if (BASE_RUNTIME_TEMPLATE_IDS.has(spec.id)) return false;
@@ -222,28 +170,11 @@ export function CreateWorkspaceButton() {
  );
  const llmModels = useMemo(
    () => {
-      if (!selectedTemplateSpec?.models?.length) return DEFAULT_LLM_MODELS;
-      if (isHermes) {
-        return selectedTemplateSpec.models;
-      }
-      if (selectedTemplateSpec.models.some((model) => model.provider === "platform")) {
-        return selectedTemplateSpec.models;
-      }
-      const templateDefault = selectedTemplateSpec.model?.trim();
-      const defaultModelSpec = templateDefault
-        ? selectedTemplateSpec.models.find((model) => model.id === templateDefault)
-        : undefined;
-      return [
-        {
-          id: templateDefault || DEFAULT_PLATFORM_MODEL.id,
-          name: defaultModelSpec?.name ?? DEFAULT_PLATFORM_MODEL.name,
-          provider: "platform",
-          required_env: [],
-        },
-        ...selectedTemplateSpec.models,
-      ];
+      const sourceSpec = selectedTemplateSpec ?? selectedRuntimeTemplateSpec;
+      if (!sourceSpec?.models?.length) return [];
+      return sourceSpec.models;
    },
-    [isHermes, selectedTemplateSpec],
+    [selectedRuntimeTemplateSpec, selectedTemplateSpec],
  );
  const llmCatalog = useMemo(() => buildProviderCatalog(llmModels), [llmModels]);
  const selectedLLMProvider = useMemo(
@@ -251,67 +182,22 @@ export function CreateWorkspaceButton() {
    [llmCatalog, llmSelection.providerId],
  );

-  // Filter HERMES_PROVIDERS by what the template declares it supports.
-  // Empty/missing declared list → fall back to the full catalog so
-  // templates that haven't migrated to the explicit `providers:` field
-  // (and self-hosted setups without /templates) keep working unchanged.
-  const availableProviders = useMemo<HermesProvider[]>(() => {
-    const declared = selectedTemplateSpec?.providers ?? selectedRuntimeTemplateSpec?.providers;
-    if (!declared || declared.length === 0) return HERMES_PROVIDERS;
-    const allowed = new Set(declared.map((p) => p.toLowerCase()));
-    const filtered = HERMES_PROVIDERS.filter((p) => allowed.has(p.id.toLowerCase()));
-    // Defensive: if the template's declared list doesn't match anything
-    // in our static catalog (e.g. brand-new provider id we don't have
-    // metadata for yet), fall back to the full list rather than render
-    // an empty <select>. Better to over-show than to lock the user out.
-    return filtered.length > 0 ? filtered : HERMES_PROVIDERS;
-  }, [selectedRuntimeTemplateSpec, selectedTemplateSpec]);
-
-  // If the currently-selected provider is filtered out by a template
-  // change, snap back to the first available. Without this, the
-  // hermesProvider state could refer to a provider not in the dropdown
-  // — confusing UI + the API key field's envVar would be wrong.
  useEffect(() => {
-    if (!isHermes) return;
-    if (availableProviders.length === 0) return;
-    if (!availableProviders.some((p) => p.id === hermesProvider)) {
-      setHermesProvider(availableProviders[0].id);
-    }
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [availableProviders, isHermes]);
-
-  useEffect(() => {
-    if (isHermes || llmCatalog.length === 0) return;
-    const templateDefault = selectedTemplateSpec?.model?.trim();
-    const matched = templateDefault ? findProviderForModel(llmCatalog, templateDefault) : null;
-    const next = matched ?? llmCatalog[0];
+    if (llmCatalog.length === 0) return;
+    const sourceDefault = (selectedTemplateSpec ?? selectedRuntimeTemplateSpec)?.model?.trim();
+    const platformProvider = llmCatalog.find((p) => p.vendor === "platform");
+    const matched = sourceDefault ? findProviderForModel(llmCatalog, sourceDefault) : null;
+    const next = platformProvider ?? matched ?? llmCatalog[0];
+    const defaultModel = next.models.find((model) => model.id === sourceDefault)?.id
+      ?? next.models[0]?.id
+      ?? "";
    setLLMSelection({
      providerId: next.id,
-      model: matched && templateDefault
-        ? templateDefault
-        : next.wildcard
-          ? ""
-          : next.models[0]?.id ?? "",
+      model: next.wildcard ? "" : defaultModel,
      envVars: next.envVars,
    });
    setLLMSecret("");
-  }, [isHermes, llmCatalog, selectedTemplateSpec?.model]);
-
-  // Auto-fill hermesModel with the provider's defaultModel whenever the
-  // provider changes, but only if the user hasn't already typed their own
-  // slug. Prevents the empty-model → "auto" → Anthropic-default 401 trap.
-  useEffect(() => {
-    if (!isHermes) return;
-    const p = HERMES_PROVIDERS.find((x) => x.id === hermesProvider);
-    if (!p) return;
-    // Replace model only if current value matches another provider's
-    // default (user hasn't customized it) OR is empty.
-    const isUntouched =
-      hermesModel === "" ||
-      HERMES_PROVIDERS.some((x) => x.defaultModel === hermesModel);
-    if (isUntouched) setHermesModel(p.defaultModel);
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [hermesProvider, isHermes]);
+  }, [llmCatalog, selectedRuntimeTemplateSpec, selectedTemplateSpec]);

  // Reset form and load workspaces whenever dialog opens
  useEffect(() => {
@@ -328,11 +214,8 @@ export function CreateWorkspaceButton() {
    setDisplayInstanceType(DEFAULT_DISPLAY_INSTANCE_TYPE);
    setDisplayRootGB(String(DEFAULT_DISPLAY_ROOT_GB));
    setDisplayResolution("1920x1080");
-    setHermesProvider("anthropic");
    setExternalRuntime("external");
-    setHermesApiKey("");
-    setHermesModel("");
-    setLLMSelection({ providerId: "platform|", model: "moonshot/kimi-k2.6", envVars: [] });
+    setLLMSelection({ providerId: "", model: "", envVars: [] });
    setLLMSecret("");
    api
      .get<WorkspaceOption[]>("/workspaces")
@@ -341,7 +224,7 @@ export function CreateWorkspaceButton() {
    api
      .get<TemplateSpec[]>("/templates")
      .then((rows) => setTemplateSpecs(Array.isArray(rows) ? rows : []))
-      .catch(() => { /* keep empty — HERMES_PROVIDERS fallback below */ });
+      .catch(() => { /* keep empty; create stays blocked until the catalog loads */ });
    // defaultTier is stable for the session (derived from window.location),
    // safe to omit from deps.
    // eslint-disable-next-line react-hooks/exhaustive-deps
@@ -352,29 +235,18 @@ export function CreateWorkspaceButton() {
      setError("Name is required");
      return;
    }
-    if (isHermes && !hermesApiKey.trim()) {
-      setError("API key is required for Hermes workspaces");
-      return;
-    }
-    if (isHermes && !hermesModel.trim()) {
-      setError("Model is required for Hermes workspaces — provider routing depends on the model slug prefix");
-      return;
-    }
-    if (!isExternal && !isHermes && !llmSelection.model.trim()) {
+    if (!isExternal && !llmSelection.model.trim()) {
      setError("Model is required");
      return;
    }
-    if (!isExternal && !isHermes && selectedLLMProvider?.envVars.length && !llmSecret.trim()) {
+    if (!isExternal && selectedLLMProvider?.envVars.length && !llmSecret.trim()) {
      setError("Provider credential is required");
      return;
    }
    setCreating(true);
    setError(null);

-    const provider = isHermes
-      ? HERMES_PROVIDERS.find((p) => p.id === hermesProvider)
-      : undefined;
-    const nativeProvider = !isHermes ? selectedLLMProvider : undefined;
+    const nativeProvider = selectedLLMProvider;

    try {
      const parsedBudget = budgetLimit.trim()
@@ -398,7 +270,7 @@ export function CreateWorkspaceButton() {
        tier,
        parent_id: parentId || undefined,
        budget_limit: parsedBudget,
-        ...(!isExternal && !isHermes && nativeProvider
+        ...(!isExternal && nativeProvider
          ? {
              model: llmSelection.model.trim(),
              llm_provider: nativeProvider.vendor,
@@ -432,12 +304,6 @@ export function CreateWorkspaceButton() {
        // no container provisioning, token minted, connection payload
        // returned in the response for the modal below.
        ...(isExternal ? { runtime: externalRuntime } : { runtime }),
-        ...(!isExternal && isHermes && provider
-          ? {
-              secrets: { [provider.envVar]: hermesApiKey.trim() },
-              model: hermesModel.trim(),
-            }
-          : {}),
      });
      // External path: keep the create dialog open just long enough to
      // hand control to the connect modal, then close. The connect
@@ -588,7 +454,7 @@ export function CreateWorkspaceButton() {
              </div>
            )}

-            {!isExternal && !isHermes && selectedLLMProvider && (
+            {!isExternal && selectedLLMProvider && (
              <div className="rounded-lg border border-line/50 bg-surface-card/40 p-3 space-y-3">
                <div className="text-[11px] font-medium text-ink-mid">
                  LLM
@@ -744,100 +610,6 @@ export function CreateWorkspaceButton() {
            </div>
          </div>

-          {/* Hermes provider configuration — shown only for the Hermes runtime. */}
-          {isHermes && (
-            <div
-              className="mt-4 rounded-xl border border-violet-700/40 bg-violet-950/20 p-4 space-y-3"
-              data-testid="hermes-provider-section"
-            >
-              <p className="text-[11px] font-semibold text-violet-400 uppercase tracking-wide">
-                Hermes Provider
-              </p>
-              <p className="text-[11px] text-ink-mid -mt-1">
-                Choose the AI provider and paste your API key. The key is
-                stored as an encrypted workspace secret.
-              </p>
-
-              <div>
-                <label
-                  htmlFor="hermes-provider-select"
-                  className="text-[11px] text-ink-mid block mb-1"
-                >
-                  Provider
-                </label>
-                <select
-                  id="hermes-provider-select"
-                  value={hermesProvider}
-                  onChange={(e) => setHermesProvider(e.target.value)}
-                  aria-label="Hermes provider"
-                  className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink focus:outline-none focus:border-violet-500/60 focus:ring-1 focus:ring-violet-500/20 transition-colors"
-                >
-                  {availableProviders.map((p) => (
-                    <option key={p.id} value={p.id}>
-                      {p.label}
-                    </option>
-                  ))}
-                </select>
-              </div>
-
-              <div>
-                <label
-                  htmlFor="hermes-api-key-input"
-                  className="text-[11px] text-ink-mid block mb-1"
-                >
-                  API Key{" "}
-                  <span aria-hidden="true" className="text-bad">
-                    *
-                  </span>
-                  <span className="sr-only"> (required)</span>
-                </label>
-                <input
-                  id="hermes-api-key-input"
-                  type="password"
-                  value={hermesApiKey}
-                  onChange={(e) => setHermesApiKey(e.target.value)}
-                  placeholder="sk-…"
-                  aria-label="Hermes API key"
-                  autoComplete="off"
-                  className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-ink-soft focus:outline-none focus:border-violet-500/60 focus:ring-1 focus:ring-violet-500/20 transition-colors font-mono"
-                />
-              </div>
-
-              <div>
-                <label
-                  htmlFor="hermes-model-input"
-                  className="text-[11px] text-ink-mid block mb-1"
-                >
-                  Model{" "}
-                  <span aria-hidden="true" className="text-bad">
-                    *
-                  </span>
-                  <span className="sr-only"> (required)</span>
-                </label>
-                <input
-                  id="hermes-model-input"
-                  type="text"
-                  value={hermesModel}
-                  onChange={(e) => setHermesModel(e.target.value)}
-                  placeholder="e.g. minimax/MiniMax-M2.7"
-                  aria-label="Hermes model slug"
-                  autoComplete="off"
-                  spellCheck={false}
-                  list="hermes-model-suggestions"
-                  className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-ink-soft focus:outline-none focus:border-violet-500/60 focus:ring-1 focus:ring-violet-500/20 transition-colors font-mono"
-                />
-                <datalist id="hermes-model-suggestions">
-                  {HERMES_PROVIDERS.find((p) => p.id === hermesProvider)?.models.map(
-                    (m) => <option key={m} value={m} />,
-                  )}
-                </datalist>
-                <p className="text-[10px] text-ink-mid mt-1">
-                  Slug determines which provider hermes routes to at install time.
-                </p>
-              </div>
-            </div>
-          )}
-
          {error && (
            <div
              role="alert"
@@ -49,6 +49,33 @@ export interface ProviderEntry {
  wildcard: boolean;
  /** Optional tooltip text (rendered as native title=). */
  tooltip?: string;
+  /** Billing mode the DERIVED provider implies, when this entry came from the
+   *  registry-backed payload (internal#718 P3): "platform_managed" | "byok".
+   *  Undefined for entries built by the legacy inferVendor heuristic. */
+  billingMode?: "platform_managed" | "byok";
+}
+
+/** RegistryProvider mirrors one entry of GET /templates `registry_providers`
+ *  (workspace-server registryProviderView): the registry's native provider for
+ *  a runtime, with its display label, auth-env NAMES, and billing mode. This is
+ *  the SSOT the dropdown labels come from — the canvas drops VENDOR_LABELS for
+ *  registry-backed runtimes (internal#718 P3, retire-list #4). */
+export interface RegistryProvider {
+  name: string;
+  display_name?: string;
+  auth_env?: string[];
+  billing_mode?: "platform_managed" | "byok";
+  deprecated?: boolean;
+}
+
+/** RegistryModel mirrors one entry of GET /templates `registry_models`: a
+ *  native model id annotated with its DERIVED provider (registry name) and the
+ *  billing_mode that provider implies. */
+export interface RegistryModel {
+  id: string;
+  name?: string;
+  provider?: string;
+  billing_mode?: "platform_managed" | "byok";
 }

 export interface SelectorValue {
@@ -68,6 +95,13 @@ interface Props {
  models: SelectorModel[];
  value: SelectorValue;
  onChange: (next: SelectorValue) => void;
+  /** Optional pre-built provider catalog. When provided, the selector uses it
+   *  verbatim instead of re-inferring one from `models` via
+   *  buildProviderCatalog — the registry-backed path (internal#718 P3), where
+   *  the parent builds the catalog from the registry-served providers/models
+   *  so dropdown labels + billing come from the provider-registry SSOT rather
+   *  than the inferVendor heuristic. Omitted = legacy heuristic over `models`. */
+  catalog?: ProviderEntry[];
  /** Display variant. "grid" = label+control side-by-side (used in ConfigTab
   *  Runtime section). "stack" = vertical (used in MissingKeysModal). */
  variant?: "grid" | "stack";
@@ -251,6 +285,66 @@ export function buildProviderCatalog(models: SelectorModel[]): ProviderEntry[] {
  return Array.from(buckets.values());
 }

+/** Build the provider catalog from a REGISTRY-BACKED GET /templates payload
+ *  (registry_providers + registry_models) — internal#718 P3, retire-list #4.
+ *
+ *  Unlike buildProviderCatalog (which RE-INFERS vendor from model-id prefixes
+ *  + env via inferVendor/VENDOR_LABELS/BARE_VENDOR_PATTERNS), this trusts the
+ *  registry: each model carries its DERIVED `provider` (a registry provider
+ *  name) and the dropdown label/billing/auth come from the matching
+ *  `registry_providers` entry. The canvas can render no provider/model the
+ *  registry did not serve ("only registered selectable"), and the billing-mode
+ *  shown reflects the derived provider rather than a hardcoded rule.
+ *
+ *  A provider with no served model is omitted (no empty buckets). Models whose
+ *  `provider` doesn't match a registry_providers entry still get a bucket
+ *  keyed by the raw provider name (defensive — should not happen for a
+ *  well-formed registry payload), so a model is never silently dropped. */
+export function buildProviderCatalogFromRegistry(
+  registryProviders: RegistryProvider[],
+  registryModels: RegistryModel[],
+): ProviderEntry[] {
+  const byName = new Map<string, RegistryProvider>();
+  for (const p of registryProviders) byName.set(p.name, p);
+
+  // Bucket models by their derived provider name, preserving registry order.
+  const buckets = new Map<string, ProviderEntry>();
+  for (const m of registryModels) {
+    const vendor = (m.provider ?? "").trim();
+    if (!vendor) continue; // un-annotated registry model — skip from the
+    // provider cascade (selectable elsewhere via free-text); it has no
+    // derived provider to bucket under.
+    const meta = byName.get(vendor);
+    const wildcard = m.id.includes("*");
+    let entry = buckets.get(vendor);
+    if (!entry) {
+      entry = {
+        id: `registry|${vendor}`,
+        vendor,
+        label: meta?.display_name || vendor,
+        envVars: meta?.auth_env ?? [],
+        models: [],
+        wildcard,
+        billingMode: meta?.billing_mode ?? m.billing_mode,
+        tooltip: VENDOR_TOOLTIPS[vendor],
+      };
+      buckets.set(vendor, entry);
+    }
+    entry.models.push({ id: m.id, name: m.name, provider: vendor });
+    entry.wildcard = entry.wildcard || wildcard;
+  }
+
+  // Decorate label with model-count when ≥2 concrete models share the bucket,
+  // matching buildProviderCatalog's UX.
+  for (const e of buckets.values()) {
+    if (!e.wildcard && e.models.length > 1) {
+      e.label = `${e.label} (${e.models.length} models)`;
+    }
+  }
+
+  return Array.from(buckets.values());
+}
+
 /** Find the provider entry that contains a given model id. Used by
 *  callers to back-derive the provider when only the model is known
 *  (e.g. ConfigTab loading from saved state). */
@@ -283,6 +377,7 @@ export function ProviderModelSelector({
  models,
  value,
  onChange,
+  catalog: catalogProp,
  variant = "stack",
  allowCustomModelEscape = false,
  disabled = false,
@@ -293,7 +388,12 @@ export function ProviderModelSelector({
  const providerSelectId = `${baseId}-provider`;
  const modelSelectId = `${baseId}-model`;

-  const catalog = useMemo(() => buildProviderCatalog(models), [models]);
+  // Registry-backed path (internal#718 P3): use the parent-supplied catalog
+  // verbatim; otherwise re-infer one from `models` via the legacy heuristic.
+  const catalog = useMemo(
+    () => catalogProp ?? buildProviderCatalog(models),
+    [catalogProp, models],
+  );
  const selected = useMemo(
    () => catalog.find((p) => p.id === value.providerId) ?? null,
    [catalog, value.providerId],
@@ -1,411 +1,82 @@
 // @vitest-environment jsdom
 /**
- * Tests for BudgetSection (issue #541).
+ * Focused tests for BudgetSection's PER-PERIOD progress-bar math + aria (#49).
 *
- * Covers:
- *  - Loading state
- *  - Stats row: used / limit, "Unlimited" when null
- *  - Progress bar: correct percentage, capped at 100%, absent when no limit
- *  - Budget remaining text
- *  - Input pre-fill (existing limit / blank when null)
- *  - Save: PATCH with number, PATCH with null (blank input)
- *  - 402 on GET → exceeded banner, no fetch-error text
- *  - 402 on PATCH → exceeded banner
- *  - Non-402 fetch error → error text
- *  - Non-402 save error → save error alert
- *  - Section header and subheading
- *  - Fetch error does not show stats
+ * Behavioral coverage (loading, save, 402 banners, USD formatting, legacy
+ * back-compat) lives in tabs/__tests__/BudgetSection.test.tsx — this file
+ * deliberately covers only the per-period progress percentage + aria-valuenow
+ * + the over-budget colouring, which that suite doesn't assert in detail. Kept
+ * separate to avoid duplicating the behavioral suite (one component, no
+ * parallel/identical suites).
 */
 import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
-import {
-  render,
-  screen,
-  fireEvent,
-  waitFor,
-  cleanup,
-  act,
-} from "@testing-library/react";
-
-// ── Mock api ──────────────────────────────────────────────────────────────────
+import { render, screen, waitFor, cleanup } from "@testing-library/react";

 vi.mock("@/lib/api", () => ({
-  api: {
-    get: vi.fn(),
-    patch: vi.fn(),
-  },
+  api: { get: vi.fn(), patch: vi.fn() },
 }));

 import { api } from "@/lib/api";
 import { BudgetSection } from "../tabs/BudgetSection";

 const mockGet = vi.mocked(api.get);
-const mockPatch = vi.mocked(api.patch);

-// ── Helpers ───────────────────────────────────────────────────────────────────
+type P = { limit: number | null; spend: number; remaining: number | null };

-function budgetResponse(overrides: Partial<{
-  budget_limit: number | null;
-  budget_used: number;
-  budget_remaining: number | null;
-}> = {}) {
+// Build a periods response where the named period has the given limit/spend.
+function withMonthly(limit: number | null, spend: number) {
+  const blank: P = { limit: null, spend: 0, remaining: null };
+  const monthly: P = { limit, spend, remaining: limit == null ? null : limit - spend };
  return {
-    budget_limit: 1000,
-    budget_used: 250,
-    budget_remaining: 750,
-    ...overrides,
+    periods: { hourly: blank, daily: blank, weekly: blank, monthly },
+    budget_limit: limit,
+    monthly_spend: spend,
+    budget_remaining: monthly.remaining,
  };
 }

-function make402Error(): Error {
-  return new Error("API GET /workspaces/ws-1/budget: 402 Payment Required");
-}
+beforeEach(() => vi.clearAllMocks());
+afterEach(() => cleanup());

-function make402PatchError(): Error {
-  return new Error("API PATCH /workspaces/ws-1/budget: 402 Payment Required");
-}
-
-function makeGenericError(msg = "network timeout"): Error {
-  return new Error(`API GET /workspaces/ws-1/budget: 500 ${msg}`);
-}
-
-beforeEach(() => {
-  vi.clearAllMocks();
-});
-
-afterEach(() => {
-  cleanup();
-});
-
-// ── Rendering helpers ─────────────────────────────────────────────────────────
-
-async function renderLoaded(budgetData = budgetResponse()) {
+async function renderLoaded(data: unknown) {
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  mockGet.mockResolvedValueOnce(budgetData as any);
+  mockGet.mockResolvedValueOnce(data as any);
  render(<BudgetSection workspaceId="ws-1" />);
-  // Wait for loading to finish
  await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
 }

-// ── Loading state ─────────────────────────────────────────────────────────────
-
-describe("BudgetSection — loading state", () => {
-  it("shows loading indicator while fetch is in flight", () => {
-    // Never resolve
-    mockGet.mockReturnValue(new Promise(() => {}));
-    render(<BudgetSection workspaceId="ws-1" />);
-    expect(screen.getByTestId("budget-loading")).toBeTruthy();
-    expect(screen.getByText("Loading…")).toBeTruthy();
+describe("BudgetSection — per-period progress bar", () => {
+  it("renders the bar for a limited period and omits it for an unlimited one", async () => {
+    await renderLoaded(withMonthly(1000, 250));
+    expect(screen.getByTestId("budget-monthly-fill")).toBeTruthy();
+    expect(screen.queryByTestId("budget-hourly-fill")).toBeNull(); // hourly unlimited
  });

-  it("hides loading indicator after fetch resolves", async () => {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockGet.mockResolvedValueOnce(budgetResponse() as any);
-    render(<BudgetSection workspaceId="ws-1" />);
-    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
-  });
-});
-
-// ── Section header ────────────────────────────────────────────────────────────
-
-describe("BudgetSection — header and subheading", () => {
-  it("renders 'Budget' as the section heading", async () => {
-    await renderLoaded();
-    expect(screen.getByText("Budget")).toBeTruthy();
-  });
-
-  it("renders the subheading 'Limit total message credits for this workspace'", async () => {
-    await renderLoaded();
-    expect(
-      screen.getByText("Limit total message credits for this workspace")
-    ).toBeTruthy();
-  });
-
-  it("renders 'Budget limit (credits)' label for the input", async () => {
-    await renderLoaded();
-    expect(screen.getByText("Budget limit (credits)")).toBeTruthy();
-  });
-});
-
-// ── Stats row ─────────────────────────────────────────────────────────────────
-
-describe("BudgetSection — stats row", () => {
-  it("shows budget_used in the stats row", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 350, budget_limit: 1000 }));
-    expect(screen.getByTestId("budget-used-value").textContent).toBe("350");
-  });
-
-  it("shows budget_limit in the stats row", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 100, budget_limit: 500 }));
-    expect(screen.getByTestId("budget-limit-value").textContent).toBe("500");
-  });
-
-  it("shows 'Unlimited' when budget_limit is null", async () => {
-    await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
-    expect(screen.getByTestId("budget-limit-value").textContent).toBe("Unlimited");
-  });
-
-  it("shows budget_remaining when present", async () => {
-    await renderLoaded(budgetResponse({ budget_remaining: 750 }));
-    expect(screen.getByTestId("budget-remaining").textContent).toContain("750");
-    expect(screen.getByTestId("budget-remaining").textContent).toContain("credits remaining");
-  });
-
-  it("hides budget_remaining row when null", async () => {
-    await renderLoaded(budgetResponse({ budget_remaining: null }));
-    expect(screen.queryByTestId("budget-remaining")).toBeNull();
-  });
-
-  it("does not crash when budget_used is missing from the response", async () => {
-    // Backend for a provisioning-stuck workspace may return a partial
-    // shape. Regression: previously this threw
-    // "Cannot read properties of undefined (reading 'toLocaleString')"
-    // and crashed the whole Details tab.
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    await renderLoaded({ budget_limit: 1000, budget_remaining: null } as any);
-    expect(screen.getByTestId("budget-used-value").textContent).toBe("0");
-  });
-});
-
-// ── Progress bar ──────────────────────────────────────────────────────────────
-
-describe("BudgetSection — progress bar", () => {
-  it("renders the progress bar when budget_limit is set", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
-    expect(screen.getByRole("progressbar")).toBeTruthy();
-  });
-
-  it("does NOT render progress bar when budget_limit is null", async () => {
-    await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
-    expect(screen.queryByRole("progressbar")).toBeNull();
-  });
-
-  it("fills to the correct percentage (25%)", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
-    const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
-    expect(fill.style.width).toBe("25%");
-  });
-
-  it("fills to the correct percentage (50%)", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 500, budget_limit: 1000 }));
-    const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
-    expect(fill.style.width).toBe("50%");
-  });
-
-  it("caps fill at 100% when budget_used exceeds budget_limit", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 1500, budget_limit: 1000 }));
-    const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
-    expect(fill.style.width).toBe("100%");
-  });
-
-  it("progress bar has aria-valuenow equal to the calculated percentage", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 300, budget_limit: 1000 }));
-    const bar = screen.getByRole("progressbar");
-    expect(bar.getAttribute("aria-valuenow")).toBe("30");
-  });
-
-  it("shows 0% progress bar when budget_used is absent from the response", async () => {
-    // Regression: budget_used is optional (provisioning-stuck workspaces return
-    // partial shapes). Without the `?? 0` guard the progressPct calculation
-    // throws a TypeScript strict-null error and the build fails.
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    await renderLoaded({ budget_limit: 1000, budget_remaining: null } as any);
-    const bar = screen.getByRole("progressbar");
-    expect(bar.getAttribute("aria-valuenow")).toBe("0");
-    const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
-    expect(fill.style.width).toBe("0%");
-  });
-});
-
-// ── Input pre-fill ────────────────────────────────────────────────────────────
-
-describe("BudgetSection — input pre-fill", () => {
-  it("pre-fills input with existing budget_limit", async () => {
-    await renderLoaded(budgetResponse({ budget_limit: 500 }));
-    const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
-    expect(input.value).toBe("500");
-  });
-
-  it("leaves input empty when budget_limit is null", async () => {
-    await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
-    const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
-    expect(input.value).toBe("");
-  });
-});
-
-// ── Save — PATCH calls ────────────────────────────────────────────────────────
-
-describe("BudgetSection — save", () => {
-  it("calls PATCH /workspaces/:id/budget with budget_limit as integer", async () => {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 800 }) as any);
-    await renderLoaded(budgetResponse({ budget_limit: 1000 }));
-
-    fireEvent.change(screen.getByTestId("budget-limit-input"), {
-      target: { value: "800" },
-    });
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() => expect(mockPatch).toHaveBeenCalled());
-    expect(mockPatch.mock.calls[0][0]).toBe("/workspaces/ws-1/budget");
-    const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
-    expect(body.budget_limit).toBe(800);
-  });
-
-  it("sends budget_limit: 0 (not null) when input is '0' — zero-credit budget", async () => {
-    // Regression for QA bug report: `parseInt("0") || null` would yield null.
-    // The correct form `raw !== "" ? parseInt(raw, 10) : null` must return 0.
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 0, budget_used: 0, budget_remaining: 0 }) as any);
-    await renderLoaded(budgetResponse({ budget_limit: 1000 }));
-
-    fireEvent.change(screen.getByTestId("budget-limit-input"), {
-      target: { value: "0" },
-    });
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() => expect(mockPatch).toHaveBeenCalled());
-    const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
-    expect(body.budget_limit).toBe(0);
-    expect(body.budget_limit).not.toBeNull();
-  });
-
-  it("sends budget_limit: null when input is blank", async () => {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: null, budget_remaining: null }) as any);
-    await renderLoaded(budgetResponse({ budget_limit: 1000 }));
-
-    fireEvent.change(screen.getByTestId("budget-limit-input"), {
-      target: { value: "" },
-    });
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() => expect(mockPatch).toHaveBeenCalled());
-    const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
-    expect(body.budget_limit).toBeNull();
-  });
-
-  it("updates displayed stats after successful save", async () => {
-    const updated = budgetResponse({ budget_limit: 2000, budget_used: 500, budget_remaining: 1500 });
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockPatch.mockResolvedValueOnce(updated as any);
-    await renderLoaded(budgetResponse({ budget_limit: 1000, budget_used: 250 }));
-
-    fireEvent.change(screen.getByTestId("budget-limit-input"), {
-      target: { value: "2000" },
-    });
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-limit-value").textContent).toBe("2,000")
-    );
-  });
-
-  it("shows save error message on non-402 PATCH failure", async () => {
-    mockPatch.mockRejectedValueOnce(
-      new Error("API PATCH /workspaces/ws-1/budget: 500 server error")
-    );
-    await renderLoaded();
-
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-save-error")).toBeTruthy()
-    );
-    expect(screen.getByTestId("budget-save-error").textContent).toContain("500");
-  });
-});
-
-// ── 402 handling ──────────────────────────────────────────────────────────────
-
-describe("BudgetSection — 402 handling", () => {
-  it("shows exceeded banner when GET returns 402", async () => {
-    mockGet.mockRejectedValueOnce(make402Error());
-    render(<BudgetSection workspaceId="ws-1" />);
-
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
-    );
-    expect(screen.getByText("Budget exceeded — messages blocked")).toBeTruthy();
-  });
-
-  it("does NOT show fetch error text when GET returns 402 (only banner)", async () => {
-    mockGet.mockRejectedValueOnce(make402Error());
-    render(<BudgetSection workspaceId="ws-1" />);
-
-    await waitFor(() =>
-      expect(screen.queryByTestId("budget-loading")).toBeNull()
-    );
-    expect(screen.queryByTestId("budget-fetch-error")).toBeNull();
-    expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
-  });
-
-  it("shows exceeded banner when PATCH returns 402", async () => {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockGet.mockResolvedValueOnce(budgetResponse() as any);
-    mockPatch.mockRejectedValueOnce(make402PatchError());
-    render(<BudgetSection workspaceId="ws-1" />);
-    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
-
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
-    );
-    // Should NOT also show the save-error alert
-    expect(screen.queryByTestId("budget-save-error")).toBeNull();
-  });
-
-  it("clears exceeded banner after a successful save", async () => {
-    mockGet.mockRejectedValueOnce(make402Error());
-    render(<BudgetSection workspaceId="ws-1" />);
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
-    );
-
-    // Now a successful PATCH (limit was raised)
-    const updated = budgetResponse({ budget_limit: 5000, budget_used: 250, budget_remaining: 4750 });
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockPatch.mockResolvedValueOnce(updated as any);
-
-    await act(async () => {
-      fireEvent.change(screen.getByTestId("budget-limit-input"), {
-        target: { value: "5000" },
-      });
-      fireEvent.click(screen.getByTestId("budget-save-btn"));
-    });
-
-    await waitFor(() =>
-      expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull()
-    );
-  });
-});
-
-// ── Non-402 fetch error ───────────────────────────────────────────────────────
-
-describe("BudgetSection — non-402 fetch errors", () => {
-  it("shows fetch error text on non-402 GET failure", async () => {
-    mockGet.mockRejectedValueOnce(makeGenericError("internal server error"));
-    render(<BudgetSection workspaceId="ws-1" />);
-
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-fetch-error")).toBeTruthy()
-    );
-    expect(screen.getByTestId("budget-fetch-error").textContent).toContain("500");
-  });
-
-  it("does NOT show stats row on fetch error", async () => {
-    mockGet.mockRejectedValueOnce(makeGenericError());
-    render(<BudgetSection workspaceId="ws-1" />);
-
-    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
-    expect(screen.queryByTestId("budget-stats-row")).toBeNull();
-  });
-
-  it("does NOT show exceeded banner on non-402 fetch error", async () => {
-    mockGet.mockRejectedValueOnce(makeGenericError());
-    render(<BudgetSection workspaceId="ws-1" />);
-
-    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
-    expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
+  it("fills to 25%", async () => {
+    await renderLoaded(withMonthly(1000, 250));
+    expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("25%");
+  });
+
+  it("fills to 50%", async () => {
+    await renderLoaded(withMonthly(1000, 500));
+    expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("50%");
+  });
+
+  it("caps fill at 100% when spend exceeds limit", async () => {
+    await renderLoaded(withMonthly(1000, 4000));
+    expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("100%");
+  });
+
+  it("sets aria-valuenow to the computed percentage on the progressbar", async () => {
+    await renderLoaded(withMonthly(1000, 250));
+    const bars = screen.getAllByRole("progressbar");
+    // the monthly bar is the only one rendered (others unlimited)
+    expect(bars).toHaveLength(1);
+    expect(bars[0].getAttribute("aria-valuenow")).toBe("25");
+  });
+
+  it("shows a 0% bar when spend is 0 against a set limit", async () => {
+    await renderLoaded(withMonthly(1000, 0));
+    expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("0%");
  });
 });
@@ -1,7 +1,7 @@
 // @vitest-environment jsdom
 import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
 import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
-import { CreateWorkspaceButton, HERMES_PROVIDERS } from "../CreateWorkspaceDialog";
+import { CreateWorkspaceButton } from "../CreateWorkspaceDialog";

 vi.mock("@/lib/api", () => ({
  api: {
@@ -21,6 +21,22 @@ const SAMPLE_WORKSPACES = [
 ];

 const SAMPLE_TEMPLATES = [
+  {
+    id: "claude-code-default",
+    name: "Claude Code Agent",
+    runtime: "claude-code",
+    model: "moonshot/kimi-k2.6",
+    providers: ["platform", "minimax", "kimi-coding", "anthropic", "anthropic-oauth"],
+    models: [
+      { id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: [] },
+      { id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
+      { id: "kimi-k2-turbo-preview", name: "Kimi K2 Turbo Preview", required_env: ["KIMI_API_KEY"] },
+      { id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", required_env: ["ANTHROPIC_API_KEY"] },
+      { id: "sonnet", name: "Claude Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
+      { id: "opus", name: "Claude Opus", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
+      { id: "haiku", name: "Claude Haiku", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
+    ],
+  },
  {
    id: "seo-agent",
    name: "SEO Agent",
@@ -33,9 +49,22 @@ const SAMPLE_TEMPLATES = [
      { id: "kimi-k2-turbo-preview", name: "Kimi K2 Turbo Preview", required_env: ["KIMI_API_KEY"] },
      { id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", required_env: ["ANTHROPIC_API_KEY"] },
      { id: "sonnet", name: "Claude Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
+      { id: "opus", name: "Claude Opus", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
+      { id: "haiku", name: "Claude Haiku", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
+    ],
+  },
+  {
+    id: "hermes",
+    name: "Hermes",
+    runtime: "hermes",
+    model: "openai/gpt-4o",
+    providers: ["openai", "anthropic", "platform"],
+    models: [
+      { id: "openai/gpt-4o", name: "GPT-4o", required_env: ["OPENAI_API_KEY"] },
+      { id: "anthropic/claude-sonnet-4-5", name: "Claude Sonnet 4.5", required_env: ["ANTHROPIC_API_KEY"] },
+      { id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: [] },
    ],
  },
-  { id: "hermes", name: "Hermes", runtime: "hermes" },
 ];

 beforeEach(() => {
@@ -184,6 +213,7 @@ describe("CreateWorkspaceDialog", () => {
    expect(runtimeTexts).toEqual([
      "Claude Code",
      "OpenAI Codex CLI",
+      "Google ADK",
      "Hermes",
      "OpenClaw",
    ]);
@@ -269,6 +299,9 @@ describe("CreateWorkspaceDialog", () => {
    fireEvent.change(document.querySelector("[data-testid='provider-select']") as HTMLSelectElement, {
      target: { value: "anthropic-oauth|CLAUDE_CODE_OAUTH_TOKEN" },
    });
+    fireEvent.change(document.querySelector("[data-testid='model-select']") as HTMLSelectElement, {
+      target: { value: "sonnet" },
+    });
    fireEvent.change(document.getElementById("llm-secret-input") as HTMLInputElement, {
      target: { value: "oauth-token" },
    });
@@ -283,6 +316,18 @@ describe("CreateWorkspaceDialog", () => {
    expect(body.secrets).toEqual({ CLAUDE_CODE_OAUTH_TOKEN: "oauth-token" });
  });

+  it("lists all Claude Code subscription aliases for blank workspaces", async () => {
+    await openDialog();
+
+    fireEvent.change(document.querySelector("[data-testid='provider-select']") as HTMLSelectElement, {
+      target: { value: "anthropic-oauth|CLAUDE_CODE_OAUTH_TOKEN" },
+    });
+
+    const modelSelect = document.querySelector("[data-testid='model-select']") as HTMLSelectElement;
+    const optionValues = Array.from(modelSelect.options).map((option) => option.value);
+    expect(optionValues).toEqual(expect.arrayContaining(["sonnet", "opus", "haiku"]));
+  });
+
  it("renders gracefully when GET /workspaces fails", async () => {
    mockGet.mockRejectedValueOnce(new Error("Network error"));
    await openDialog();
@@ -297,226 +342,103 @@ describe("CreateWorkspaceDialog", () => {
 });

 // ---------------------------------------------------------------------------
-// Hermes provider picker tests
+// Dynamic runtime provider picker tests
 // ---------------------------------------------------------------------------

-describe("CreateWorkspaceDialog — Hermes provider picker", () => {
-  it("does NOT show hermes provider section for non-hermes templates", async () => {
+describe("CreateWorkspaceDialog — dynamic runtime provider picker", () => {
+  it("does not render the old Hermes-only provider section", async () => {
    await openDialog();
-    await setTemplate("seo-agent");
+    await setRuntime("hermes");
    expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeNull();
  });

-  it("shows hermes provider section when runtime is 'hermes'", async () => {
+  it("derives Hermes provider and model options from the /templates runtime row", async () => {
    await openDialog();
    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
+
+    const providerSelect = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
+    await waitFor(() => expect(providerSelect.options.length).toBe(4));
+
+    const providerValues = Array.from(providerSelect.options).map((option) => option.value);
+    expect(providerValues).toEqual(expect.arrayContaining([
+      "platform|",
+      "openai|OPENAI_API_KEY",
+      "anthropic|ANTHROPIC_API_KEY",
+    ]));
+    expect(providerValues).not.toContain("gemini|GEMINI_API_KEY");
  });

-  it("shows hermes provider section for the Hermes runtime preset", async () => {
+  it("uses the template-declared default provider/model for Hermes", async () => {
    await openDialog();
    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
+
+    await waitFor(() => {
+      const providerSelect = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
+      expect(providerSelect.value).toBe("platform|");
+    });
+    const modelSelect = document.querySelector("[data-testid='model-select']") as HTMLSelectElement;
+    expect(modelSelect.value).toBe("moonshot/kimi-k2.6");
  });

-  it("hermes provider dropdown defaults to 'anthropic'", async () => {
+  it("prompts for the provider credential required by the selected Hermes model", async () => {
    await openDialog();
    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
-    const providerSelect = document.getElementById("hermes-provider-select") as HTMLSelectElement;
-    expect(providerSelect).toBeTruthy();
-    expect(providerSelect.value).toBe("anthropic");
-  });

-  it("hermes provider dropdown lists all 15 providers", async () => {
-    await openDialog();
-    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
-    const providerSelect = document.getElementById("hermes-provider-select") as HTMLSelectElement;
-    expect(providerSelect.options.length).toBe(HERMES_PROVIDERS.length);
-    const ids = Array.from(providerSelect.options).map((o) => o.value);
-    expect(ids).toContain("anthropic");
-    expect(ids).toContain("openai");
-    expect(ids).toContain("gemini");
-    expect(ids).toContain("deepseek");
-    expect(ids).toContain("hermes");
-  });
-
-  // Pins the dynamic-providers behavior: when the matched template's
-  // /templates row declares `providers`, the dropdown filters to that
-  // subset instead of showing the full HERMES_PROVIDERS catalog. Same
-  // data source ConfigTab uses (PR #2454) — keeps the modal and the
-  // settings tab honest about which providers a template supports.
-  it("hermes provider dropdown filters to template-declared providers when /templates ships them", async () => {
-    // Per-URL mock: /workspaces returns the existing fixture, /templates
-    // returns a hermes row that only allows anthropic + minimax + openai.
-    mockGet.mockImplementation(async (url: string) => {
-      if (url === "/templates") {
-        return [
-          { id: "hermes", name: "Hermes", runtime: "hermes", providers: ["anthropic", "minimax", "openai"] },
-        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-        ] as any;
-      }
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      return SAMPLE_WORKSPACES as any;
+    fireEvent.change(document.querySelector("[data-testid='provider-select']") as HTMLSelectElement, {
+      target: { value: "openai|OPENAI_API_KEY" },
    });

-    await openDialog();
-    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
-    const providerSelect = document.getElementById("hermes-provider-select") as HTMLSelectElement;
-    // Filtered list arrives async after /templates fetch resolves —
-    // keep waiting until the dropdown shrinks below the full catalog.
-    await waitFor(() => expect(providerSelect.options.length).toBe(3));
-    const ids = Array.from(providerSelect.options).map((o) => o.value);
-    expect(ids).toEqual(expect.arrayContaining(["anthropic", "minimax", "openai"]));
-    expect(ids).not.toContain("gemini");
-    expect(ids).not.toContain("deepseek");
-  });
-
-  // Back-compat: a template that hasn't migrated to runtime_config.providers
-  // (older templates, self-hosted setups without /templates server) keeps
-  // showing the full provider catalog. Operators picking from those
-  // templates can't be locked out of providers we know hermes supports.
-  it("hermes provider dropdown falls back to all providers when template declares no providers list", async () => {
-    mockGet.mockImplementation(async (url: string) => {
-      if (url === "/templates") {
-        // No `providers` field — empty/missing → fall back to full catalog.
-        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-        return [{ id: "hermes", name: "Hermes", runtime: "hermes" }] as any;
-      }
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      return SAMPLE_WORKSPACES as any;
-    });
-
-    await openDialog();
-    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
-    const providerSelect = document.getElementById("hermes-provider-select") as HTMLSelectElement;
-    expect(providerSelect.options.length).toBe(HERMES_PROVIDERS.length);
-  });
-
-  // Defensive: a template's declared list with NO matches against our
-  // static catalog (e.g. a brand-new provider id we don't have label/
-  // envVar metadata for yet) must not render an empty <select> — the
-  // operator can't pick a provider, the form locks. Component falls
-  // back to the full catalog so the user can still proceed.
-  it("hermes provider dropdown falls back to all providers when template declares only unknown providers", async () => {
-    mockGet.mockImplementation(async (url: string) => {
-      if (url === "/templates") {
-        return [
-          { id: "hermes", name: "Hermes", runtime: "hermes", providers: ["totally-new-provider-2030"] },
-        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-        ] as any;
-      }
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      return SAMPLE_WORKSPACES as any;
-    });
-
-    await openDialog();
-    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
-    const providerSelect = document.getElementById("hermes-provider-select") as HTMLSelectElement;
-    // Stays at full catalog length — no flapping to 0 then back.
-    expect(providerSelect.options.length).toBe(HERMES_PROVIDERS.length);
-  });
-
-  it("hermes API key field is a password input (masked)", async () => {
-    await openDialog();
-    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
-    const keyInput = document.getElementById("hermes-api-key-input") as HTMLInputElement;
+    const keyInput = document.getElementById("llm-secret-input") as HTMLInputElement;
    expect(keyInput).toBeTruthy();
    expect(keyInput.type).toBe("password");
  });

-  it("shows an error if hermes template is set but API key is empty on submit", async () => {
+  it("shows an error if the selected runtime provider requires a credential", async () => {
    await openDialog();
    fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
      target: { value: "Hermes Agent" },
    });
    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
+    fireEvent.change(document.querySelector("[data-testid='provider-select']") as HTMLSelectElement, {
+      target: { value: "openai|OPENAI_API_KEY" },
+    });

-    // Submit without API key
    const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
    fireEvent.click(createBtn!);

    await waitFor(() => {
      const alert = screen.getByRole("alert");
-      expect(alert.textContent).toContain("API key");
+      expect(alert.textContent).toContain("Provider credential");
    });
    expect(mockPost).not.toHaveBeenCalled();
  });

-  it("includes secrets in POST body with correct env var for selected provider", async () => {
-    await openDialog();
-    fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
-      target: { value: "Hermes Agent" },
-    });
-    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
-
-    // Fill in the API key
-    const keyInput = document.getElementById("hermes-api-key-input") as HTMLInputElement;
-    fireEvent.change(keyInput, { target: { value: "sk-test-anthropic-key" } });
-
-    const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
-    fireEvent.click(createBtn!);
-
-    await waitFor(() => expect(mockPost).toHaveBeenCalled());
-    const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
-    expect(body.secrets).toEqual({ ANTHROPIC_API_KEY: "sk-test-anthropic-key" });
-    expect(body.runtime).toBe("hermes");
-    expect(body.template).toBeUndefined();
-  });
-
-  it("uses the correct env var when a non-default provider is selected", async () => {
+  it("includes runtime-derived provider/model/secrets in POST body", async () => {
    await openDialog();
    fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
      target: { value: "Hermes OpenAI" },
    });
    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
-
-    // Switch to openai
-    const providerSelect = document.getElementById("hermes-provider-select") as HTMLSelectElement;
-    fireEvent.change(providerSelect, { target: { value: "openai" } });
-
-    const keyInput = document.getElementById("hermes-api-key-input") as HTMLInputElement;
-    fireEvent.change(keyInput, { target: { value: "sk-openai-test" } });
+    fireEvent.change(document.querySelector("[data-testid='provider-select']") as HTMLSelectElement, {
+      target: { value: "openai|OPENAI_API_KEY" },
+    });
+    fireEvent.change(document.getElementById("llm-secret-input") as HTMLInputElement, {
+      target: { value: "sk-openai-test" },
+    });

    const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
    fireEvent.click(createBtn!);

    await waitFor(() => expect(mockPost).toHaveBeenCalled());
    const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.runtime).toBe("hermes");
+    expect(body.template).toBeUndefined();
+    expect(body.model).toBe("openai/gpt-4o");
+    expect(body.llm_provider).toBe("openai");
    expect(body.secrets).toEqual({ OPENAI_API_KEY: "sk-openai-test" });
  });

-  it("does NOT include secrets field when template is not hermes", async () => {
+  it("does NOT include secrets field when provider is platform-managed", async () => {
    await openDialog();
    fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
      target: { value: "Normal Agent" },
@@ -530,20 +452,6 @@ describe("CreateWorkspaceDialog — Hermes provider picker", () => {
    const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
    expect(body.secrets).toBeUndefined();
  });
-
-  it("hides hermes section and resets state when template is cleared", async () => {
-    await openDialog();
-    await setRuntime("hermes");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
-    );
-
-    // Switch back to a non-Hermes runtime.
-    await setRuntime("claude-code");
-    await waitFor(() =>
-      expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeNull()
-    );
-  });
 });

 // ---------------------------------------------------------------------------
@@ -0,0 +1,110 @@
+// @vitest-environment jsdom
+//
+// internal#718 P3 (retire-list #4) — when GET /templates serves a
+// registry-backed selectable list (registry_providers + registry_models with
+// display_name / billing_mode / derived provider), the canvas builds the
+// provider catalog FROM that registry data instead of re-inferring vendor
+// from model-id prefixes (VENDOR_LABELS / BARE_VENDOR_PATTERNS / inferVendor).
+// The heuristic path stays only as the fallback for non-registry runtimes /
+// older backends.
+
+import { describe, it, expect } from "vitest";
+import {
+  buildProviderCatalogFromRegistry,
+  type RegistryProvider,
+  type RegistryModel,
+} from "../ProviderModelSelector";
+
+// Mirrors the registry-served claude-code payload from GET /templates
+// (registry_providers / registry_models). display_name + billing_mode come
+// from the registry, NOT from the canvas VENDOR_LABELS map.
+const CLAUDE_CODE_REGISTRY_PROVIDERS: RegistryProvider[] = [
+  {
+    name: "anthropic-oauth",
+    display_name: "Claude Code subscription",
+    auth_env: ["CLAUDE_CODE_OAUTH_TOKEN"],
+    billing_mode: "byok",
+  },
+  {
+    name: "anthropic-api",
+    display_name: "Anthropic API",
+    auth_env: ["ANTHROPIC_API_KEY"],
+    billing_mode: "byok",
+  },
+  {
+    name: "platform",
+    display_name: "Platform",
+    auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
+    billing_mode: "platform_managed",
+  },
+];
+
+const CLAUDE_CODE_REGISTRY_MODELS: RegistryModel[] = [
+  { id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
+  { id: "opus", provider: "anthropic-oauth", billing_mode: "byok" },
+  { id: "claude-opus-4-7", provider: "anthropic-api", billing_mode: "byok" },
+  { id: "anthropic/claude-opus-4-7", provider: "platform", billing_mode: "platform_managed" },
+];
+
+describe("buildProviderCatalogFromRegistry", () => {
+  it("buckets models by their DERIVED registry provider, not by inferred vendor", () => {
+    const catalog = buildProviderCatalogFromRegistry(
+      CLAUDE_CODE_REGISTRY_PROVIDERS,
+      CLAUDE_CODE_REGISTRY_MODELS,
+    );
+
+    const byVendor = new Map(catalog.map((p) => [p.vendor, p]));
+    // anthropic-oauth bucket holds the two OAuth-derived models.
+    const oauth = byVendor.get("anthropic-oauth");
+    expect(oauth).toBeDefined();
+    expect(oauth!.models.map((m) => m.id).sort()).toEqual(["opus", "sonnet"]);
+    // platform bucket holds the platform-namespaced model.
+    const platform = byVendor.get("platform");
+    expect(platform).toBeDefined();
+    expect(platform!.models.map((m) => m.id)).toEqual(["anthropic/claude-opus-4-7"]);
+  });
+
+  it("labels providers from the registry display_name, not VENDOR_LABELS", () => {
+    const catalog = buildProviderCatalogFromRegistry(
+      CLAUDE_CODE_REGISTRY_PROVIDERS,
+      CLAUDE_CODE_REGISTRY_MODELS,
+    );
+    const oauth = catalog.find((p) => p.vendor === "anthropic-oauth");
+    // Registry display_name "Claude Code subscription" (decorated with the
+    // model count by the catalog builder is acceptable; assert it carries the
+    // registry label, not an inferred one).
+    expect(oauth!.label).toContain("Claude Code subscription");
+  });
+
+  it("carries the registry billing_mode per provider", () => {
+    const catalog = buildProviderCatalogFromRegistry(
+      CLAUDE_CODE_REGISTRY_PROVIDERS,
+      CLAUDE_CODE_REGISTRY_MODELS,
+    );
+    expect(catalog.find((p) => p.vendor === "anthropic-oauth")!.billingMode).toBe("byok");
+    expect(catalog.find((p) => p.vendor === "platform")!.billingMode).toBe("platform_managed");
+  });
+
+  it("surfaces the registry auth_env on the provider entry", () => {
+    const catalog = buildProviderCatalogFromRegistry(
+      CLAUDE_CODE_REGISTRY_PROVIDERS,
+      CLAUDE_CODE_REGISTRY_MODELS,
+    );
+    expect(catalog.find((p) => p.vendor === "anthropic-oauth")!.envVars).toEqual([
+      "CLAUDE_CODE_OAUTH_TOKEN",
+    ]);
+  });
+
+  it("only includes providers that actually have at least one served model", () => {
+    // anthropic-api is a registry provider but has no model in this slice →
+    // it should not appear as an empty bucket.
+    const models: RegistryModel[] = [
+      { id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
+    ];
+    const catalog = buildProviderCatalogFromRegistry(
+      CLAUDE_CODE_REGISTRY_PROVIDERS,
+      models,
+    );
+    expect(catalog.map((p) => p.vendor)).toEqual(["anthropic-oauth"]);
+  });
+});
@@ -131,7 +131,7 @@ export function OrgTokensTab() {
        <button
          onClick={handleCreate}
          disabled={creating}
-          className="px-3 py-1.5 bg-accent-strong/20 hover:bg-accent-strong/30 border border-accent/30 rounded-lg text-[11px] text-accent font-medium transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1.5"
+          className="px-3 py-1.5 bg-accent-strong/20 hover:bg-accent-strong/30 border border-accent/30 rounded-lg text-[11px] text-accent font-medium transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1.5 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1"
        >
          {creating ? (
            <>
@@ -175,7 +175,7 @@ export function OrgTokensTab() {
      )}

      {error && (
-        <div className="px-3 py-2 bg-red-950/40 border border-red-800/50 rounded-lg text-[10px] text-bad">
+        <div role="alert" aria-live="assertive" className="px-3 py-2 bg-red-950/40 border border-red-800/50 rounded-lg text-[10px] text-bad">
          {error}
        </div>
      )}
@@ -152,7 +152,7 @@ export function SecretRow({ secret, workspaceId }: SecretRowProps) {
            className="secret-row__action-btn"
            title="Edit"
          >
-            ✏
+            <span aria-hidden="true">✏</span>
          </button>
          <button
            type="button"
@@ -161,7 +161,7 @@ export function SecretRow({ secret, workspaceId }: SecretRowProps) {
            className="secret-row__action-btn secret-row__action-btn--delete"
            title="Delete"
          >
-            🗑
+            <span aria-hidden="true">🗑</span>
          </button>
        </div>
      </div>
@@ -121,7 +121,7 @@ function WorkspaceTokensTab({ workspaceId }: TokensTabProps) {
        <button
          onClick={handleCreate}
          disabled={creating}
-          className="px-3 py-1.5 bg-accent-strong/20 hover:bg-accent-strong/30 border border-accent/30 rounded-lg text-[11px] text-accent font-medium transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1.5"
+          className="px-3 py-1.5 bg-accent-strong/20 hover:bg-accent-strong/30 border border-accent/30 rounded-lg text-[11px] text-accent font-medium transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1.5 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1"
        >
          {creating ? <><Spinner size="sm" /> Creating...</> : '+ New Token'}
        </button>
@@ -155,7 +155,7 @@ function WorkspaceTokensTab({ workspaceId }: TokensTabProps) {
      )}

      {error && (
-        <div className="px-3 py-2 bg-red-950/40 border border-red-800/50 rounded-lg text-[10px] text-bad">
+        <div role="alert" aria-live="assertive" className="px-3 py-2 bg-red-950/40 border border-red-800/50 rounded-lg text-[10px] text-bad">
          {error}
        </div>
      )}
@@ -7,10 +7,28 @@ import { api } from "@/lib/api";
 // Types
 // ---------------------------------------------------------------------------

+// Period keys MUST match the server SSOT (workspace-server budget_periods.go).
+type BudgetPeriod = "hourly" | "daily" | "weekly" | "monthly";
+
+const PERIODS: { key: BudgetPeriod; label: string }[] = [
+  { key: "hourly", label: "Hourly" },
+  { key: "daily", label: "Daily" },
+  { key: "weekly", label: "Weekly" },
+  { key: "monthly", label: "Monthly" },
+];
+
+interface PeriodBudget {
+  limit: number | null; // USD cents; null = no limit
+  spend: number; // rolling-window spend, USD cents
+  remaining: number | null; // null when no limit
+}
+
 interface BudgetData {
-  budget_limit: number | null;
-  budget_used?: number; // optional — provisioning-stuck workspaces return partial shapes
-  budget_remaining: number | null;
+  periods?: Partial<Record<BudgetPeriod, PeriodBudget>>;
+  // legacy fields (pre-multi-period server) — tolerated for back-compat
+  budget_limit?: number | null;
+  monthly_spend?: number;
+  budget_remaining?: number | null;
 }

 interface Props {
@@ -26,31 +44,71 @@ function isApiError402(e: unknown): boolean {
  return e instanceof Error && /: 402( |$)/.test(e.message);
 }

+/** USD cents → "$X.XX". */
+function fmtUSD(cents: number): string {
+  return `$${(cents / 100).toLocaleString(undefined, { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
+}
+
+/** Normalize the server payload (multi-period or legacy) into a period map. */
+function periodsFrom(data: BudgetData | null): Record<BudgetPeriod, PeriodBudget> {
+  const base: Record<BudgetPeriod, PeriodBudget> = {
+    hourly: { limit: null, spend: 0, remaining: null },
+    daily: { limit: null, spend: 0, remaining: null },
+    weekly: { limit: null, spend: 0, remaining: null },
+    monthly: { limit: null, spend: 0, remaining: null },
+  };
+  if (!data) return base;
+  if (data.periods) {
+    for (const { key } of PERIODS) {
+      const p = data.periods[key];
+      if (p) base[key] = { limit: p.limit ?? null, spend: p.spend ?? 0, remaining: p.remaining ?? null };
+    }
+    return base;
+  }
+  // legacy: map the single monthly limit/spend
+  base.monthly = {
+    limit: data.budget_limit ?? null,
+    spend: data.monthly_spend ?? 0,
+    remaining: data.budget_remaining ?? null,
+  };
+  return base;
+}
+
 // ---------------------------------------------------------------------------
 // Component
 // ---------------------------------------------------------------------------

 /**
- * BudgetSection — dedicated "Budget" section in the workspace details panel.
- *
- * - Fetches GET /workspaces/:id/budget on mount for live usage stats
- * - Shows a progress bar (budget_used / budget_limit, blue-500, capped 100%)
- * - Allows updating budget_limit via PATCH /workspaces/:id/budget
- * - Shows a 402-specific "Budget exceeded" amber banner for any blocked state
+ * BudgetSection — per-workspace LLM budget, four independent rolling windows
+ * (hourly / daily / weekly / monthly). Each period has its own ceiling (USD);
+ * spend is the rolling-window LLM cost. Crossing ANY period blocks new work
+ * (server returns 402). Sends PATCH {budget_limits:{period:cents|null}}.
 */
 export function BudgetSection({ workspaceId }: Props) {
  const [budget, setBudget] = useState<BudgetData | null>(null);
  const [loading, setLoading] = useState(true);
  const [fetchError, setFetchError] = useState<string | null>(null);

-  const [limitInput, setLimitInput] = useState("");
+  // One input per period, in USD cents (string for controlled inputs).
+  const [limitInputs, setLimitInputs] = useState<Record<BudgetPeriod, string>>({
+    hourly: "",
+    daily: "",
+    weekly: "",
+    monthly: "",
+  });
  const [saving, setSaving] = useState(false);
  const [saveError, setSaveError] = useState<string | null>(null);
-
-  /** True when a 402 has been seen from any API call in this section. */
  const [budgetExceeded, setBudgetExceeded] = useState(false);

-  // ── Fetch current budget data ─────────────────────────────────────────────
+  const syncInputs = useCallback((data: BudgetData | null) => {
+    const p = periodsFrom(data);
+    setLimitInputs({
+      hourly: p.hourly.limit != null ? String(p.hourly.limit) : "",
+      daily: p.daily.limit != null ? String(p.daily.limit) : "",
+      weekly: p.weekly.limit != null ? String(p.weekly.limit) : "",
+      monthly: p.monthly.limit != null ? String(p.monthly.limit) : "",
+    });
+  }, []);

  const loadBudget = useCallback(async () => {
    setLoading(true);
@@ -58,7 +116,7 @@ export function BudgetSection({ workspaceId }: Props) {
    try {
      const data = await api.get<BudgetData>(`/workspaces/${workspaceId}/budget`);
      setBudget(data);
-      setLimitInput(data.budget_limit != null ? String(data.budget_limit) : "");
+      syncInputs(data);
    } catch (e) {
      if (isApiError402(e)) {
        setBudgetExceeded(true);
@@ -68,29 +126,30 @@ export function BudgetSection({ workspaceId }: Props) {
    } finally {
      setLoading(false);
    }
-  }, [workspaceId]);
+  }, [workspaceId, syncInputs]);

  useEffect(() => {
    loadBudget();
  }, [loadBudget]);

-  // ── Save handler ──────────────────────────────────────────────────────────
-
  const handleSave = async () => {
    setSaving(true);
    setSaveError(null);
-    const raw = limitInput.trim();
-    // Use explicit empty-string check (not falsy check) so that a
-    // user-entered "0" is sent as budget_limit: 0, not null (unlimited).
-    const parsedLimit = raw !== "" ? parseInt(raw, 10) : null;
-
+    // Build the per-period map: blank → null (clear); a number → that ceiling.
+    const budget_limits: Record<BudgetPeriod, number | null> = {
+      hourly: null,
+      daily: null,
+      weekly: null,
+      monthly: null,
+    };
+    for (const { key } of PERIODS) {
+      const raw = limitInputs[key].trim();
+      budget_limits[key] = raw !== "" ? parseInt(raw, 10) : null;
+    }
    try {
-      const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, {
-        budget_limit: parsedLimit,
-      });
+      const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, { budget_limits });
      setBudget(updated);
-      setLimitInput(updated.budget_limit != null ? String(updated.budget_limit) : "");
-      // Clear exceeded state if the save succeeded (limit was raised or removed)
+      syncInputs(updated);
      setBudgetExceeded(false);
    } catch (e) {
      if (isApiError402(e)) {
@@ -103,24 +162,15 @@ export function BudgetSection({ workspaceId }: Props) {
    }
  };

-  // ── Progress calculation ──────────────────────────────────────────────────
-
-  const progressPct =
-    budget && budget.budget_limit != null && budget.budget_limit > 0
-      ? Math.min(100, Math.round(((budget.budget_used ?? 0) / budget.budget_limit) * 100))
-      : 0;
-
-  // ── Render ────────────────────────────────────────────────────────────────
+  const periods = periodsFrom(budget);

  return (
    <div className="space-y-3" data-testid="budget-section">
      {/* Section header */}
      <div>
-        <h3 className="text-xs font-semibold text-ink-mid uppercase tracking-wider">
-          Budget
-        </h3>
+        <h3 className="text-xs font-semibold text-ink-mid uppercase tracking-wider">Budget</h3>
        <p className="text-[11px] text-ink-mid mt-0.5">
-          Limit total message credits for this workspace
+          Cap LLM spend for this workspace per period — crossing any limit pauses new work
        </p>
      </div>

@@ -131,32 +181,14 @@ export function BudgetSection({ workspaceId }: Props) {
          data-testid="budget-exceeded-banner"
          className="flex items-center gap-2 px-3 py-2 rounded-lg bg-surface border border-amber-700/50 text-warm text-xs font-medium"
        >
-          <svg
-            width="13"
-            height="13"
-            viewBox="0 0 13 13"
-            fill="none"
-            aria-hidden="true"
-            className="shrink-0"
-          >
-            <path
-              d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z"
-              stroke="currentColor"
-              strokeWidth="1.4"
-              strokeLinejoin="round"
-            />
-            <path
-              d="M6.5 5.5V7.5M6.5 9.5h.01"
-              stroke="currentColor"
-              strokeWidth="1.4"
-              strokeLinecap="round"
-            />
+          <svg width="13" height="13" viewBox="0 0 13 13" fill="none" aria-hidden="true" className="shrink-0">
+            <path d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z" stroke="currentColor" strokeWidth="1.4" strokeLinejoin="round" />
+            <path d="M6.5 5.5V7.5M6.5 9.5h.01" stroke="currentColor" strokeWidth="1.4" strokeLinecap="round" />
          </svg>
-          Budget exceeded — messages blocked
+          Budget exceeded — new work paused
        </div>
      )}

-      {/* Usage stats */}
      {loading ? (
        <p className="text-xs text-ink-mid" data-testid="budget-loading">
          Loading…
@@ -165,89 +197,78 @@ export function BudgetSection({ workspaceId }: Props) {
        <p className="text-xs text-bad" data-testid="budget-fetch-error">
          {fetchError}
        </p>
-      ) : budget ? (
-        <div className="space-y-2">
-          {/* Stats row */}
-          <div className="flex items-baseline justify-between" data-testid="budget-stats-row">
-            <span className="text-xs text-ink-mid">Credits used</span>
-            <span className="text-xs font-mono text-ink-mid">
-              <span data-testid="budget-used-value">{(budget.budget_used ?? 0).toLocaleString()}</span>
-              <span className="text-ink-mid mx-1">/</span>
-              <span data-testid="budget-limit-value">
-                {budget.budget_limit != null
-                  ? budget.budget_limit.toLocaleString()
-                  : "Unlimited"}
-              </span>
-            </span>
-          </div>
+      ) : (
+        <div className="space-y-3">
+          {PERIODS.map(({ key, label }) => {
+            const p = periods[key];
+            const pct =
+              p.limit != null && p.limit > 0 ? Math.min(100, Math.round((p.spend / p.limit) * 100)) : 0;
+            const over = p.limit != null && p.spend >= p.limit;
+            return (
+              <div key={key} className="space-y-1" data-testid={`budget-period-${key}`}>
+                <div className="flex items-baseline justify-between">
+                  <label htmlFor={`budget-${key}-${workspaceId}`} className="text-xs text-ink-mid">
+                    {label}
+                  </label>
+                  <span className="text-[11px] font-mono text-ink-mid">
+                    <span data-testid={`budget-${key}-spend`}>{fmtUSD(p.spend)}</span>
+                    <span className="mx-1">/</span>
+                    <span data-testid={`budget-${key}-limit`}>{p.limit != null ? fmtUSD(p.limit) : "∞"}</span>
+                  </span>
+                </div>
+                {p.limit != null && (
+                  <div
+                    role="progressbar"
+                    aria-label={`${label} budget usage`}
+                    aria-valuenow={pct}
+                    aria-valuemin={0}
+                    aria-valuemax={100}
+                    className="h-1.5 w-full rounded-full bg-surface-card overflow-hidden"
+                  >
+                    <div
+                      data-testid={`budget-${key}-fill`}
+                      className={`h-full rounded-full transition-all duration-300 ${over ? "bg-bad" : "bg-accent"}`}
+                      style={{ width: `${pct}%` }}
+                    />
+                  </div>
+                )}
+                <input
+                  id={`budget-${key}-${workspaceId}`}
+                  type="number"
+                  min="0"
+                  step="1"
+                  value={limitInputs[key]}
+                  onChange={(e) => setLimitInputs((s) => ({ ...s, [key]: e.target.value }))}
+                  placeholder="USD cents — blank for unlimited"
+                  data-testid={`budget-${key}-input`}
+                  className="w-full bg-surface-card border border-line rounded-lg px-3 py-1.5 text-xs text-ink-mid placeholder-zinc-500 focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent/30 transition-colors"
+                />
+              </div>
+            );
+          })}

-          {/* Progress bar (only when limit is set) */}
-          {budget.budget_limit != null && (
+          <p className="text-[11px] text-ink-mid">Limits are USD cents (e.g. 500 = $5.00). Blank = unlimited.</p>
+
+          {saveError && (
            <div
-              role="progressbar"
-              aria-label="Budget usage"
-              aria-valuenow={progressPct}
-              aria-valuemin={0}
-              aria-valuemax={100}
-              className="h-1.5 w-full rounded-full bg-surface-card overflow-hidden"
+              role="alert"
+              data-testid="budget-save-error"
+              className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-bad"
            >
-              <div
-                data-testid="budget-progress-fill"
-                className="h-full rounded-full bg-accent transition-all duration-300"
-                style={{ width: `${progressPct}%` }}
-              />
+              {saveError}
            </div>
          )}

-          {/* Remaining credits */}
-          {budget.budget_remaining != null && (
-            <p className="text-[11px] text-ink-mid" data-testid="budget-remaining">
-              {budget.budget_remaining.toLocaleString()} credits remaining
-            </p>
-          )}
-        </div>
-      ) : null}
-
-      {/* Input + Save */}
-      <div className="space-y-1.5 pt-1">
-        <label
-          htmlFor={`budget-limit-input-${workspaceId}`}
-          className="text-[11px] text-ink-mid block"
-        >
-          Budget limit (credits)
-        </label>
-        <input
-          id={`budget-limit-input-${workspaceId}`}
-          type="number"
-          min="0"
-          step="1"
-          value={limitInput}
-          onChange={(e) => setLimitInput(e.target.value)}
-          placeholder="e.g. 1000 — blank for unlimited"
-          data-testid="budget-limit-input"
-          className="w-full bg-surface-card border border-line rounded-lg px-3 py-2 text-sm text-ink-mid placeholder-zinc-500 focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent/30 transition-colors"
-        />
-        <p className="text-xs text-ink-mid">Leave blank for unlimited</p>
-
-        {saveError && (
-          <div
-            role="alert"
-            data-testid="budget-save-error"
-            className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-bad"
+          <button
+            onClick={handleSave}
+            disabled={saving}
+            data-testid="budget-save-btn"
+            className="px-4 py-1.5 bg-accent-strong hover:bg-accent active:bg-accent-strong rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-900"
          >
-            {saveError}
-          </div>
-        )}
-
-        <button
-          onClick={handleSave}
-          disabled={saving}
-          data-testid="budget-save-btn"
-          className="px-4 py-1.5 bg-accent-strong hover:bg-accent active:bg-accent-strong rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-900"
-        >
-          {saving ? "Saving…" : "Save"}
-        </button>
-      </div>
+            {saving ? "Saving…" : "Save"}
+          </button>
+        </div>
+      )}
    </div>
  );
 }
@@ -6,12 +6,17 @@ import { useCanvasStore } from "@/store/canvas";
 import { type ConfigData, DEFAULT_CONFIG, TextInput, NumberInput, Toggle, TagList, Section } from "./config/form-inputs";
 import { parseYaml, toYaml } from "./config/yaml-utils";
 import { SecretsSection } from "./config/secrets-section";
+import { LLMBillingSection } from "./config/llm-billing-section";
 import { ExternalConnectionSection } from "./ExternalConnectionSection";
 import {
  ProviderModelSelector,
  buildProviderCatalog,
+  buildProviderCatalogFromRegistry,
  findProviderForModel,
  type SelectorValue,
+  type ProviderEntry,
+  type RegistryProvider,
+  type RegistryModel,
 } from "../ProviderModelSelector";
 import { isExternalLikeRuntime } from "@/lib/externalRuntimes";

@@ -257,6 +262,17 @@ interface RuntimeOption {
  // canvas falls back to deriving unique vendor prefixes from
  // models[].id (still adapter-driven, just inferred).
  providers: string[];
+  // registryBacked / registryProviders / registryModels come from the
+  // registry-served GET /templates fields (internal#718 P3). When
+  // registryBacked is true, the selectable provider+model list is built from
+  // the registry (registryProviders/registryModels) — display labels +
+  // billing mode + derived provider come from the provider-registry SSOT, not
+  // the canvas VENDOR_LABELS / billingModeForProvider vocabularies. When
+  // false (non-registry runtime / older backend), the canvas falls back to
+  // the template-served models[] + its inferVendor heuristic.
+  registryBacked: boolean;
+  registryProviders: RegistryProvider[];
+  registryModels: RegistryModel[];
 }

 // deriveProvidersFromModels — when a template doesn't ship an explicit
@@ -287,6 +303,66 @@ export function deriveProvidersFromModels(models: ModelSpec[]): string[] {
  return out;
 }

+// billingModeForProvider — maps a selected PROVIDER (vendor key) to the
+// LLM billing_mode it implies (internal#703 Gap 2).
+//
+// Today, picking a non-Platform provider in the Config tab writes the
+// credential env (CLAUDE_CODE_OAUTH_TOKEN / vendor key) but leaves
+// llm_billing_mode at its resolved default (`platform_managed`). The CP
+// tenant_config endpoint then keeps injecting the platform proxy base
+// URLs, so the OAuth token / vendor key is never actually used — BYOK
+// silently no-ops (the live SEO-Agent symptom in #703). The workspace-
+// server even hard-blocks vendor-key writes on platform_managed
+// workspaces (secrets.go:87), pointing the user at this exact billing-
+// mode switch. Wiring the provider change to also set billing_mode is
+// the UI half that makes BYOK take (the CP/workspace-server backend half
+// is being fixed in parallel — internal#703 Gap 1).
+//
+// Mapping:
+//   - "platform" (the Platform-managed proxy) OR "" (no explicit
+//     provider override → inherit, defaults to platform) → "platform_managed".
+//   - any other vendor key ("anthropic-oauth" = Claude Code subscription
+//     OAuth, "anthropic" = Anthropic API key, "minimax", "openrouter",
+//     etc.) → "byok".
+//
+// Returns the billing_mode string the PUT body should carry. The valid
+// set is fixed by workspace-server's recognizer (platform_managed | byok
+// | disabled); "disabled" is never auto-selected by a provider choice —
+// it's an explicit operator action via the LLM Billing section.
+export type LLMBillingMode = "platform_managed" | "byok";
+
+export function billingModeForProvider(provider: string): LLMBillingMode {
+  const v = provider.trim().toLowerCase();
+  if (v === "" || v === "platform") return "platform_managed";
+  return "byok";
+}
+
+// billingModeForSelectedProvider — internal#718 P3 (retire-list #5): the
+// billing mode the Config tab shows/sends for the selected PROVIDER, sourced
+// from the registry-served catalog when available rather than the hardcoded
+// billingModeForProvider rule.
+//
+// When the runtime is registry-backed, GET /templates serves each provider's
+// DERIVED billing_mode (platform_managed for the closed platform provider,
+// byok otherwise) on the ProviderEntry. We read it off the catalog so the UI
+// reflects the registry SSOT — the same predicate billing/credential emission
+// keys off the derived provider.
+//
+// Falls back to billingModeForProvider when: no catalog (non-registry runtime
+// / older backend), or the provider string isn't carried by the catalog
+// (e.g. a stale saved value). The fallback keeps the legacy behavior intact
+// for everything the registry doesn't yet speak to.
+export function billingModeForSelectedProvider(
+  provider: string,
+  catalog?: ProviderEntry[],
+): LLMBillingMode {
+  if (catalog && catalog.length > 0) {
+    const entry = catalog.find((p) => p.vendor === provider.trim());
+    if (entry?.billingMode) return entry.billingMode;
+  }
+  return billingModeForProvider(provider);
+}
+
 // Fallback used when /templates can't be fetched (offline, older backend).
 // Keep in sync with manifest.json workspace_templates as a defensive default.
 // Model + env suggestions only flow when the backend is reachable.
@@ -301,13 +377,20 @@ export function deriveProvidersFromModels(models: ModelSpec[]): string[] {
 // config.yaml` on the container is a separate runtime-internal file,
 // not this one.
 const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["external", "kimi", "kimi-cli", "openclaw"]);
-const SUPPORTED_RUNTIME_VALUES = new Set(["claude-code", "codex", "openclaw", "hermes"]);
+// The runtime picker is SSOT-driven: options come from GET /templates,
+// which workspace-server already gates to the manifest.json maintained set
+// (loadRuntimesFromManifest). A hand-maintained frontend allowlist silently
+// dropped runtimes the backend added (google-adk shipped in manifest but was
+// filtered out, so its workspaces rendered the wrong default option). A
+// template may still opt OUT of the picker via `displayable: false` on its
+// /templates row. See project_canvas_runtime_dropdown_ssot_fix.

 const FALLBACK_RUNTIME_OPTIONS: RuntimeOption[] = [
-  { value: "claude-code", label: "Claude Code", models: [], providers: [] },
-  { value: "codex", label: "Codex", models: [], providers: [] },
-  { value: "openclaw", label: "OpenClaw", models: [], providers: [] },
-  { value: "hermes", label: "Hermes", models: [], providers: [] },
+  { value: "claude-code", label: "Claude Code", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
+  { value: "codex", label: "Codex", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
+  { value: "google-adk", label: "Google ADK", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
+  { value: "openclaw", label: "OpenClaw", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
+  { value: "hermes", label: "Hermes", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
 ];

 export function ConfigTab({ workspaceId }: Props) {
@@ -320,15 +403,24 @@ export function ConfigTab({ workspaceId }: Props) {
  const [rawMode, setRawMode] = useState(false);
  const [rawDraft, setRawDraft] = useState("");
  const [runtimeOptions, setRuntimeOptions] = useState<RuntimeOption[]>(FALLBACK_RUNTIME_OPTIONS);
-  // Provider override (Option B PR-5): stored separately from config.yaml
-  // because the value lives in workspace_secrets (encrypted), not in the
-  // platform-managed config.yaml. The two endpoints are GET/PUT
-  // /workspaces/:id/provider on workspace-server (handlers/secrets.go).
-  // Empty = "auto-derive from model slug prefix" — pre-Option-B behavior
-  // and what most users want. Setting to a non-empty value writes
-  // LLM_PROVIDER into workspace_secrets and triggers an auto-restart so
-  // the workspace boots with the new provider in env (and via CP user-
-  // data, written into /configs/config.yaml on next provision too).
+  // internal#718 P4 closure: the explicit provider override
+  // (LLM_PROVIDER workspace_secret, surfaced via GET/PUT
+  // /workspaces/:id/provider) has been RETIRED. The provider is
+  // derived at every decision point from (runtime, model) via the
+  // registry — no stored row remains. The `provider` / `originalProvider`
+  // state and the provider dropdown survive in this component for
+  // backwards-compat (display only) but are no longer persisted:
+  //   - loadConfig no longer GETs /workspaces/:id/provider (the
+  //     endpoint returns 410 Gone). The state initializes to ""
+  //     and stays there.
+  //   - handleSave no longer PUTs /workspaces/:id/provider.
+  //   - The dropdown still updates the local `provider` state so the
+  //     user can preview the derived value; the value never leaves
+  //     the browser.
+  // This is the canvas-side complement to the backend retirement of
+  // SetProvider/GetProvider/setProviderSecret. Older canvases that
+  // still call PUT /provider hit the 410 Gone with a structured
+  // PROVIDER_ENDPOINT_RETIRED code — loud failure, no silent miss.
  const [provider, setProvider] = useState("");
  const [originalProvider, setOriginalProvider] = useState("");
  // Track the model the form first rendered, so handleSave can detect
@@ -379,26 +471,23 @@ export function ConfigTab({ workspaceId }: Props) {
    //
    // See GH #1894 for the workspace-row-as-source-of-truth rationale
    // that motivated splitting from a single config.yaml read.
-    const [wsRes, modelRes, providerRes] = await Promise.all([
+    // internal#718 P4 closure: the GET /workspaces/:id/provider leg is
+    // RETIRED — the endpoint returns 410 Gone. Provider is now derived
+    // from (runtime, model) via the registry; no stored value exists
+    // to load. Always seed the local state to "" so the dropdown
+    // initializes to "auto-derive".
+    const [wsRes, modelRes] = await Promise.all([
      api.get<{ runtime?: string; tier?: number }>(`/workspaces/${workspaceId}`)
        .catch(() => ({} as { runtime?: string; tier?: number })),
      api.get<{ model?: string }>(`/workspaces/${workspaceId}/model`)
        .catch(() => ({} as { model?: string })),
-      api.get<{ provider?: string }>(`/workspaces/${workspaceId}/provider`)
-        .catch(() => null),
    ]);
    const wsMetadataRuntime = (wsRes.runtime || "").trim();
    const wsMetadataModel = (modelRes.model || "").trim();
    const wsMetadataTier: number | null =
      typeof wsRes.tier === "number" ? wsRes.tier : null;
-    if (providerRes !== null) {
-      const loadedProvider = (providerRes.provider || "").trim();
-      setProvider(loadedProvider);
-      setOriginalProvider(loadedProvider);
-    } else {
-      setProvider("");
-      setOriginalProvider("");
-    }
+    setProvider("");
+    setOriginalProvider("");
    // originalModel is set further down once the YAML has been parsed —
    // we want it to reflect what the form ACTUALLY rendered, which may
    // be the YAML's runtime_config.model fallback when MODEL_PROVIDER
@@ -492,20 +581,49 @@ export function ConfigTab({ workspaceId }: Props) {

  useEffect(() => {
    let cancelled = false;
-    api.get<Array<{ id: string; name?: string; runtime?: string; models?: ModelSpec[]; providers?: string[] }>>("/templates")
+    api.get<Array<{
+      id: string;
+      name?: string;
+      runtime?: string;
+      models?: ModelSpec[];
+      providers?: string[];
+      // internal#718 P3 registry-served fields (additive; absent on older
+      // backends and for non-registry runtimes).
+      registry_backed?: boolean;
+      registry_providers?: RegistryProvider[];
+      registry_models?: RegistryModel[];
+      displayable?: boolean;
+    }>>("/templates")
      .then((rows) => {
        if (cancelled || !Array.isArray(rows)) return;
        const byRuntime = new Map<string, RuntimeOption>();
        for (const r of rows) {
          const v = (r.runtime || "").trim();
-          if (!SUPPORTED_RUNTIME_VALUES.has(v)) continue;
+          if (!v) continue;
+          // Honor an explicit opt-out; absent/true means show it.
+          if (r.displayable === false) continue;
          // Last template wins if two templates share a runtime — rare, and the
          // one with the richer models list is probably newer.
          const existing = byRuntime.get(v);
          const models = Array.isArray(r.models) ? r.models : [];
          const providers = Array.isArray(r.providers) ? r.providers : [];
-          if (!existing || models.length > existing.models.length) {
-            byRuntime.set(v, { value: v, label: r.name || v, models, providers });
+          const registryProviders = Array.isArray(r.registry_providers) ? r.registry_providers : [];
+          const registryModels = Array.isArray(r.registry_models) ? r.registry_models : [];
+          const registryBacked = r.registry_backed === true && registryModels.length > 0;
+          // Prefer the richer payload: a registry-backed entry, then more
+          // template models. Keeps the "last/richer template wins" intent.
+          const score = (o: RuntimeOption) => (o.registryBacked ? 1000 : 0) + o.models.length;
+          const candidate: RuntimeOption = {
+            value: v,
+            label: r.name || v,
+            models,
+            providers,
+            registryBacked,
+            registryProviders,
+            registryModels,
+          };
+          if (!existing || score(candidate) > score(existing)) {
+            byRuntime.set(v, candidate);
          }
        }
        if (byRuntime.size > 0) setRuntimeOptions(Array.from(byRuntime.values()));
@@ -516,7 +634,13 @@ export function ConfigTab({ workspaceId }: Props) {

  // Models + env hints for the currently-selected runtime.
  const selectedRuntime = runtimeOptions.find((o) => o.value === (config.runtime || "")) ?? null;
-  const availableModels: ModelSpec[] = selectedRuntime?.models ?? [];
+  // Memoised so its identity is stable across renders — it feeds several
+  // useMemo dependency arrays below (registry/legacy catalog, selector models)
+  // and a fresh `[]` literal each render would defeat their memoisation.
+  const availableModels: ModelSpec[] = useMemo(
+    () => selectedRuntime?.models ?? [],
+    [selectedRuntime?.models],
+  );
  // Provider suggestions for the legacy free-text input fallback (used
  // when /templates returned no models for this runtime, e.g. hermes
  // workspaces). Prefer the runtime's declarative providers list,
@@ -530,9 +654,37 @@ export function ConfigTab({ workspaceId }: Props) {

  // Vendor-aware catalog shared with the selector. Memoised so the
  // catalog identity is stable across renders (selector relies on it).
+  //
+  // internal#718 P3: when the runtime is registry-backed, build the catalog
+  // FROM the registry-served providers/models (display labels + billing +
+  // derived provider from the provider-registry SSOT) instead of re-inferring
+  // vendor from model-id prefixes. Falls back to the inferVendor heuristic
+  // for non-registry runtimes / older backends.
+  const registryBacked = selectedRuntime?.registryBacked ?? false;
  const providerCatalog = useMemo(
-    () => buildProviderCatalog(availableModels),
-    [availableModels],
+    () =>
+      registryBacked
+        ? buildProviderCatalogFromRegistry(
+            selectedRuntime?.registryProviders ?? [],
+            selectedRuntime?.registryModels ?? [],
+          )
+        : buildProviderCatalog(availableModels),
+    [registryBacked, selectedRuntime?.registryProviders, selectedRuntime?.registryModels, availableModels],
+  );
+  // Models fed to the selector dropdown: the registry-served native set for a
+  // registry-backed runtime (so the dropdown can render no unregistered
+  // option), else the template-served models.
+  const selectorModels: ModelSpec[] = useMemo(
+    () =>
+      registryBacked
+        ? (selectedRuntime?.registryModels ?? []).map((m) => ({
+            id: m.id,
+            name: m.name,
+            // carry the derived provider so the selector buckets correctly
+            ...(m.provider ? { provider: m.provider } : {}),
+          }))
+        : availableModels,
+    [registryBacked, selectedRuntime?.registryModels, availableModels],
  );

  // Derive the selector's current value from the form state. Provider
@@ -683,23 +835,27 @@ export function ConfigTab({ workspaceId }: Props) {
        }
      }

-      // Provider override save (Option B PR-5). PUT only when the user
-      // changed the dropdown — otherwise an unrelated Save (e.g. tier
-      // edit) would re-write the provider unchanged and the server-
-      // side auto-restart would fire on every Save, costing the user a
-      // ~30s reboot for a no-op change. Server endpoint accepts an
-      // empty string to clear the override (deletes the
-      // workspace_secrets row); we forward whatever the form holds.
-      let providerSaveError: string | null = null;
-      const providerChanged = provider !== originalProvider;
-      if (providerChanged) {
-        try {
-          await api.put(`/workspaces/${workspaceId}/provider`, { provider });
-          setOriginalProvider(provider);
-        } catch (e) {
-          providerSaveError = e instanceof Error ? e.message : "Provider update was rejected";
-        }
-      }
+      // internal#718 P4 closure: provider override save is RETIRED. The
+      // /workspaces/:id/provider endpoint returns 410 Gone; the provider
+      // is derived from (runtime, model) at every decision point via the
+      // registry. The local dropdown state still updates so the user can
+      // see the predicted provider, but it never round-trips to the
+      // server. Variables retained as locals (set to constants) so the
+      // downstream restart-suppress logic below has clear semantics
+      // and the diff against the prior shape stays small.
+      const providerSaveError: string | null = null;
+      const providerChanged = false;
+
+      // internal#718 P4 closure: provider → billing_mode linkage is also
+      // RETIRED. P2-B (#1972) moved the billing decision to
+      // ResolveLLMBillingModeDerived, which DERIVES the provider from
+      // (runtime, model) at every read. The canvas can no longer
+      // override it via a separate PUT, by design — the runtime+model
+      // selection IS the billing-mode selection. The
+      // /admin/workspaces/:id/llm-billing-mode endpoint still exists
+      // as the operator override surface (workspaces.llm_billing_mode
+      // column); it is no longer driven by the provider dropdown.
+      const billingModeSaveError: string | null = null;

      setOriginalYaml(content);
      if (rawMode) {
@@ -708,28 +864,29 @@ export function ConfigTab({ workspaceId }: Props) {
      } else {
        setRawDraft(content);
      }
-      // SetProvider on the server already triggers an auto-restart for
-      // the workspace whenever the value actually changed (see
-      // workspace-server/internal/handlers/secrets.go:SetProvider). If
-      // the user also clicked Save+Restart we'd kick off a SECOND
-      // restart here and the two would race in the canvas store —
-      // suppress the redundant call and rely on the server-side one.
-      const providerWillAutoRestart = providerChanged && !providerSaveError;
+      // internal#718 P4 closure: providerWillAutoRestart is always
+      // false now (provider PUT is retired; no server-side auto-restart
+      // can fire). Save+Restart flows through the canvas store
+      // restart path the same way it did pre-#718 for non-provider
+      // edits.
+      const providerWillAutoRestart = providerChanged && !providerSaveError
      if (restart && !providerWillAutoRestart) {
        await useCanvasStore.getState().restartWorkspace(workspaceId);
      } else if (!restart) {
        useCanvasStore.getState().updateNodeData(workspaceId, { needsRestart: !providerWillAutoRestart });
      }
-      // Aggregate partial-save errors. Both modelSaveError and
-      // providerSaveError describe rejected updates from independent
-      // endpoints — show whichever fired so the user knows which
-      // field reverts on next reload (otherwise they'd see "Saved" and
-      // be confused why Provider snapped back).
+      // Aggregate partial-save errors. With provider+billing-mode PUTs
+      // retired, only modelSaveError can fire from the secret-mint side
+      // — the provider/billing branches are dead code retained as
+      // constant nils to keep the diff small. They are surfaced
+      // defensively in case a future re-enablement needs the wiring.
      const partialError = providerSaveError
        ? `Other fields saved, but provider update failed: ${providerSaveError}`
-        : modelSaveError
-          ? `Other fields saved, but model update failed: ${modelSaveError}`
-          : null;
+        : billingModeSaveError
+          ? `Provider saved, but switching billing mode failed — your own provider key/OAuth may not take effect until billing mode is set: ${billingModeSaveError}`
+          : modelSaveError
+            ? `Other fields saved, but model update failed: ${modelSaveError}`
+            : null;
      if (partialError) {
        setError(partialError);
      } else {
@@ -847,9 +1004,10 @@ export function ConfigTab({ workspaceId }: Props) {
                — empty = "auto-derive from model slug" was the pre-PR-5
                behavior; selecting any provider here writes LLM_PROVIDER
                and triggers an auto-restart. */}
-            {availableModels.length > 0 ? (
+            {selectorModels.length > 0 ? (
              <ProviderModelSelector
-                models={availableModels}
+                models={selectorModels}
+                catalog={registryBacked ? providerCatalog : undefined}
                value={selectorValue}
                onChange={(next) => {
                  setSelectorValue(next);
@@ -862,7 +1020,7 @@ export function ConfigTab({ workspaceId }: Props) {
                  setConfig((prev) => {
                    const v = next.model;
                    const prevModelId = prev.runtime_config?.model || prev.model || "";
-                    const prevSpec = availableModels.find((m) => m.id === prevModelId) ?? null;
+                    const prevSpec = selectorModels.find((m) => m.id === prevModelId) ?? null;
                    const prevRequired = prev.runtime_config?.required_env ?? [];
                    const wasTemplateDriven =
                      prevRequired.length === 0 ||
@@ -1108,6 +1266,8 @@ export function ConfigTab({ workspaceId }: Props) {
            </div>
          </Section>

+          <LLMBillingSection workspaceId={workspaceId} />
+
          <SecretsSection
            workspaceId={workspaceId}
            requiredEnv={config.runtime_config?.required_env}
@@ -29,8 +29,15 @@ type FormState = {
  displayMode: string;
  displayProtocol: string;
  resolution: string;
+  dataPersistence: string; // "" (auto) | "persist" | "ephemeral" — internal#734
 };

+// internal#734: per-workspace durable-data choice. "" = auto (desktop-control
+// keeps data, others follow the org default). Human labels for the selector.
+const DATA_PERSISTENCE_OPTIONS = ["", "persist", "ephemeral"];
+const dataPersistenceLabel = (v: string): string =>
+  v === "persist" ? "Always keep (persist)" : v === "ephemeral" ? "Don't keep (ephemeral)" : "Auto";
+
 export function ContainerConfigTab({ workspaceId, data }: Props) {
  const runtime = data.runtime;
  const instanceType = data.compute?.instance_type;
@@ -39,9 +46,10 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
  const displayProtocol = data.compute?.display?.protocol;
  const displayWidth = data.compute?.display?.width;
  const displayHeight = data.compute?.display?.height;
+  const dataPersistence = data.compute?.data_persistence;
  const initial = useMemo(
-    () => formFromData({ runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight }),
-    [runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight],
+    () => formFromData({ runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence }),
+    [runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence],
  );
  const [form, setForm] = useState<FormState>(initial);
  const [saving, setSaving] = useState(false);
@@ -84,6 +92,8 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
          display: form.displayEnabled
            ? { mode: form.displayMode, protocol: form.displayProtocol, width, height }
            : { mode: "none" },
+          // internal#734: omit when "auto" so the wire/default behavior is unchanged.
+          ...(form.dataPersistence ? { data_persistence: form.dataPersistence } : {}),
        };

        const resp = await api.patch<{ needs_restart?: boolean }>(`/workspaces/${workspaceId}`, {
@@ -176,6 +186,18 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
              onChange={(resolution) => setForm((s) => ({ ...s, resolution }))}
            />
          )}
+          <SelectField
+            id="data-persistence"
+            label="Saved data (cookies, downloads, memory)"
+            value={form.dataPersistence}
+            options={DATA_PERSISTENCE_OPTIONS}
+            optionLabel={dataPersistenceLabel}
+            onChange={(dataPersistence) => setForm((s) => ({ ...s, dataPersistence }))}
+          />
+          <p className="-mt-1 text-[10px] leading-snug text-ink-soft">
+            Whether this workspace&apos;s data survives a restart/recreate. Auto keeps it for
+            browser (desktop) workspaces; Ephemeral never keeps it (privacy).
+          </p>
        </div>

        <div className="mt-4 flex items-center justify-end gap-2">
@@ -231,6 +253,7 @@ function formFromData(data: {
  displayProtocol?: string;
  displayWidth?: number;
  displayHeight?: number;
+  dataPersistence?: string;
 }): FormState {
  const width = data.displayWidth ?? 1920;
  const height = data.displayHeight ?? 1080;
@@ -243,6 +266,7 @@ function formFromData(data: {
    displayMode: data.displayMode && data.displayMode !== "none" ? data.displayMode : "desktop-control",
    displayProtocol: data.displayProtocol || "novnc",
    resolution,
+    dataPersistence: data.dataPersistence || "",
  };
 }

@@ -29,6 +29,7 @@ export function DetailsTab({ workspaceId, data }: Props) {
  const [peers, setPeers] = useState<PeerData[]>([]);
  const [saving, setSaving] = useState(false);
  const [confirmDelete, setConfirmDelete] = useState(false);
+  const [eraseData, setEraseData] = useState(false); // internal#734: erase saved data on delete
  const [peersError, setPeersError] = useState<string | null>(null);
  const [saveError, setSaveError] = useState<string | null>(null);
  const [deleteError, setDeleteError] = useState<string | null>(null);
@@ -93,7 +94,10 @@ export function DetailsTab({ workspaceId, data }: Props) {
  const handleDelete = async () => {
    setDeleteError(null);
    try {
-      await api.del(`/workspaces/${workspaceId}?confirm=true`, {
+      // internal#734: erase_data=true asks the server to prune this workspace's
+      // durable data volume (cookies / downloads / memory). Default off keeps it
+      // for the orphan-sweeper grace.
+      await api.del(`/workspaces/${workspaceId}?confirm=true${eraseData ? "&erase_data=true" : ""}`, {
        headers: { "X-Confirm-Name": name },
      });
      // Mirror the server-side cascade — drop the row + every
@@ -323,6 +327,19 @@ export function DetailsTab({ workspaceId, data }: Props) {
            <h3 id="delete-confirm-title" className="text-xs font-medium text-bad">
              Confirm deletion
            </h3>
+            <label className="flex items-start gap-2 text-[11px] text-ink-mid">
+              <input
+                type="checkbox"
+                aria-label="Also erase saved data"
+                checked={eraseData}
+                onChange={(e) => setEraseData(e.target.checked)}
+                className="mt-0.5 h-3.5 w-3.5 accent-red-600"
+              />
+              <span>
+                Also erase saved data (cookies, downloads, agent memory). Cannot be undone.
+                Unchecked keeps it recoverable briefly.
+              </span>
+            </label>
            <div className="flex gap-2">
              <button
                type="button"
@@ -339,6 +356,7 @@ export function DetailsTab({ workspaceId, data }: Props) {
                onClick={() => {
                  setConfirmDelete(false);
                  setDeleteError(null);
+                  setEraseData(false);
                  // Return focus to the trigger so keyboard users aren't stranded
                  deleteButtonRef.current?.focus();
                }}
@@ -5,9 +5,10 @@ import React from "react";
 import { BudgetSection } from "../BudgetSection";
 import { api } from "@/lib/api";

-// Queue-based mock for the api module. Each api call shifts from the queue.
-// Tests push with qGet/qPatch and the module-level mockImplementation
-// reads from the queue.
+// Multi-period budget (#49): the API now returns a `periods` map
+// (hourly/daily/weekly/monthly), each {limit, spend, remaining} in USD cents.
+// The UI renders one row per period and PATCHes {budget_limits:{period:cents|null}}.
+
 type QueueEntry = { body?: unknown; err?: Error };
 const apiQueue: QueueEntry[] = [];

@@ -40,45 +41,49 @@ const WS_ID = "budget-test-ws";
 function qGet(body: unknown) {
  apiQueue.push({ body });
 }
-
 function qGetErr(status: number, msg: string) {
  apiQueue.push({ err: new Error(`${msg}: ${status}`) });
 }
-
 function qPatch(body: unknown) {
  apiQueue.push({ body });
 }
-
 function qPatchErr(status: number, msg: string) {
  apiQueue.push({ err: new Error(`${msg}: ${status}`) });
 }

-function makeBudget(overrides: Partial<{
-  budget_limit: number | null;
-  budget_used: number;
-  budget_remaining: number | null;
-}> = {}) {
+type P = { limit: number | null; spend: number; remaining: number | null };
+
+// makeBudget builds the periods response. Override any subset of periods.
+function makeBudget(overrides: Partial<Record<"hourly" | "daily" | "weekly" | "monthly", Partial<P>>> = {}) {
+  const blank: P = { limit: null, spend: 0, remaining: null };
+  const mk = (o?: Partial<P>): P => {
+    const p = { ...blank, ...(o ?? {}) };
+    if (p.limit != null && p.remaining == null) p.remaining = p.limit - p.spend;
+    return p;
+  };
+  const periods = {
+    hourly: mk(overrides.hourly),
+    daily: mk(overrides.daily),
+    weekly: mk(overrides.weekly),
+    monthly: mk(overrides.monthly),
+  };
  return {
-    budget_limit: 10_000,
-    budget_used: 3_500,
-    budget_remaining: 6_500,
-    ...overrides,
+    periods,
+    budget_limit: periods.monthly.limit,
+    monthly_spend: periods.monthly.spend,
+    budget_remaining: periods.monthly.remaining,
  };
 }

-describe("BudgetSection", () => {
+describe("BudgetSection (multi-period)", () => {
  describe("loading state", () => {
    it("shows loading indicator while fetching", async () => {
      let resolveGet: (v: unknown) => void;
      vi.mocked(api.get).mockImplementationOnce(
        async () => new Promise((r) => { resolveGet = r as (v: unknown) => void; }),
      );
-
      render(<BudgetSection workspaceId={WS_ID} />);
-
      expect(screen.getByTestId("budget-loading")).toBeTruthy();
-
-      // Resolve after render to verify state clears
      resolveGet!(makeBudget());
      await vi.waitFor(() => {
        expect(screen.queryByTestId("budget-loading")).toBeNull();
@@ -89,21 +94,16 @@ describe("BudgetSection", () => {
  describe("fetch error state", () => {
    it("shows error message on non-402 fetch failure", async () => {
      qGetErr(500, "Internal Server Error");
-
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
        expect(screen.getByTestId("budget-fetch-error")).toBeTruthy();
      });
      expect(screen.getByTestId("budget-fetch-error")!.textContent).toContain("500");
    });

-    it("shows 402 as exceeded banner, not fetch error", async () => {
-      // 402 means the budget limit was hit — different UX from a network/API error.
+    it("shows the exceeded banner (not a fetch error) on a 402", async () => {
      qGetErr(402, "Payment Required");
-
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
        expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
      });
@@ -111,220 +111,105 @@ describe("BudgetSection", () => {
    });
  });

-  describe("budget loaded — display", () => {
-    it("renders used / limit stats row", async () => {
-      qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500 }));
-
+  describe("rendering periods", () => {
+    it("renders all four period rows", async () => {
+      qGet(makeBudget());
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-used-value")!.textContent).toBe("3,500");
-      });
-      expect(screen.getByTestId("budget-limit-value")!.textContent).toBe("10,000");
-    });
-
-    it("renders 'Unlimited' when budget_limit is null", async () => {
-      qGet(makeBudget({ budget_limit: null, budget_used: 1_000, budget_remaining: null }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-limit-value")!.textContent).toBe("Unlimited");
+        for (const k of ["hourly", "daily", "weekly", "monthly"]) {
+          expect(screen.getByTestId(`budget-period-${k}`)).toBeTruthy();
+        }
      });
    });

-    it("renders remaining credits when present", async () => {
-      qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500, budget_remaining: 6_500 }));
-
+    it("formats spend and limit as USD per period", async () => {
+      qGet(makeBudget({ monthly: { limit: 10_000, spend: 3_500 } }));
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-remaining")!.textContent).toContain("6,500");
-        expect(screen.getByTestId("budget-remaining")!.textContent).toContain("credits remaining");
+        expect(screen.getByTestId("budget-monthly-spend")!.textContent).toBe("$35.00");
+      });
+      expect(screen.getByTestId("budget-monthly-limit")!.textContent).toBe("$100.00");
+    });
+
+    it("shows ∞ for a period with no limit", async () => {
+      qGet(makeBudget({ hourly: { limit: null, spend: 1_000 } }));
+      render(<BudgetSection workspaceId={WS_ID} />);
+      await vi.waitFor(() => {
+        expect(screen.getByTestId("budget-hourly-limit")!.textContent).toBe("∞");
      });
    });

-    it("omits remaining credits when budget_remaining is null", async () => {
-      qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500, budget_remaining: null }));
-
+    it("renders the progress bar only for periods with a limit", async () => {
+      qGet(makeBudget({ monthly: { limit: 10_000, spend: 12_000 }, hourly: { limit: null, spend: 5_000 } }));
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.queryByTestId("budget-remaining")).toBeNull();
-      });
-    });
-
-    it("caps progress bar at 100% when used > limit", async () => {
-      // Over-limit: 12000 used of 10000 limit should show 100%, not 120%.
-      qGet(makeBudget({ budget_limit: 10_000, budget_used: 12_000, budget_remaining: null }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      await vi.waitFor(() => {
-        const fill = screen.getByTestId("budget-progress-fill");
-        expect(fill.getAttribute("style")).toContain("100%");
-      });
-    });
-
-    it("omits progress bar when budget_limit is null (unlimited)", async () => {
-      qGet(makeBudget({ budget_limit: null, budget_used: 5_000, budget_remaining: null }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      await vi.waitFor(() => {
-        expect(screen.queryByTestId("budget-progress-fill")).toBeNull();
+        expect(screen.getByTestId("budget-monthly-fill")).toBeTruthy();
      });
+      expect(screen.queryByTestId("budget-hourly-fill")).toBeNull();
+      // over-budget fill caps at 100%
+      const fill = screen.getByTestId("budget-monthly-fill") as HTMLElement;
+      expect(fill.style.width).toBe("100%");
    });
  });

-  describe("budget exceeded (402)", () => {
-    it("shows exceeded banner when load returns 402", async () => {
-      qGetErr(402, "Payment Required");
-
+  describe("save", () => {
+    it("PATCHes budget_limits for all four periods and clears the exceeded banner", async () => {
+      qGet(makeBudget({ monthly: { limit: 10_000, spend: 3_500 } }));
+      qPatch(makeBudget({ hourly: { limit: 500, spend: 0 }, monthly: { limit: 20_000, spend: 0 } }));
      render(<BudgetSection workspaceId={WS_ID} />);
+      await vi.waitFor(() => {
+        expect(screen.getByTestId("budget-hourly-input")).toBeTruthy();
+      });
+
+      fireEvent.change(screen.getByTestId("budget-hourly-input"), { target: { value: "500" } });
+      fireEvent.click(screen.getByTestId("budget-save-btn"));

      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
-        expect(screen.getByTestId("budget-exceeded-banner")!.textContent).toContain("Budget exceeded");
+        expect(vi.mocked(api.patch)).toHaveBeenCalled();
+      });
+      const [, body] = vi.mocked(api.patch).mock.calls[0];
+      expect((body as { budget_limits: Record<string, number | null> }).budget_limits).toMatchObject({
+        hourly: 500,
+        monthly: 10_000, // unchanged input echoes the loaded limit
      });
    });

-    it("clears exceeded banner after successful save", async () => {
-      qGetErr(402, "Payment Required");
-      qPatch(makeBudget({ budget_limit: 50_000, budget_used: 0, budget_remaining: 50_000 }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
-      });
-
-      const input = screen.getByTestId("budget-limit-input");
-      fireEvent.change(input, { target: { value: "50000" } });
-
-      const saveBtn = screen.getByTestId("budget-save-btn");
-      fireEvent.click(saveBtn);
-
-      await vi.waitFor(() => {
-        expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
-      });
-    });
-  });
-
-  describe("save flow", () => {
-    it("shows save error on non-402 patch failure", async () => {
+    it("shows a save error on non-402 PATCH failure", async () => {
      qGet(makeBudget());
      qPatchErr(500, "Internal Server Error");
-
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
+        expect(screen.getByTestId("budget-save-btn")).toBeTruthy();
      });
-
-      const saveBtn = screen.getByTestId("budget-save-btn");
-      fireEvent.click(saveBtn);
-
+      fireEvent.click(screen.getByTestId("budget-save-btn"));
      await vi.waitFor(() => {
        expect(screen.getByTestId("budget-save-error")).toBeTruthy();
-        expect(screen.getByTestId("budget-save-error")!.textContent).toContain("500");
      });
+      expect(screen.getByTestId("budget-save-error")!.textContent).toContain("500");
    });

-    it("updates input to new limit value after successful save", async () => {
-      qGet(makeBudget({ budget_limit: 10_000 }));
-      qPatch(makeBudget({ budget_limit: 20_000 }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      // Wait for the input to appear (loading → loaded)
-      await vi.waitFor(() => {
-        expect(screen.queryByTestId("budget-loading")).toBeNull();
-      });
-
-      const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
-      // Debug: check what values are rendered
-      const limitValue = screen.getByTestId("budget-limit-value")?.textContent;
-      expect(input.value).toBe("10000"); // initial value from API
-      expect(limitValue).toBe("10,000");
-
-      fireEvent.change(input, { target: { value: "20000" } });
-      expect(input.value).toBe("20000");
-
-      fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-      await vi.waitFor(() => {
-        expect((screen.getByTestId("budget-limit-input") as HTMLInputElement).value).toBe("20000");
-      });
-    });
-
-    it("sends null when input is cleared (unlimited)", async () => {
-      qGet(makeBudget({ budget_limit: 10_000 }));
-      qPatch(makeBudget({ budget_limit: null }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
-      });
-
-      const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
-      fireEvent.change(input, { target: { value: "" } });
-      fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-      await vi.waitFor(() => {
-        // After save with null limit, input should show empty (unlimited)
-        expect(input.value).toBe("");
-      });
-    });
-
-    it("shows saving state on button while patch is in flight", async () => {
+    it("surfaces the exceeded banner on a 402 PATCH", async () => {
      qGet(makeBudget());
-      let resolvePatch: (v: unknown) => void;
-      vi.mocked(api.patch).mockImplementationOnce(
-        async () => new Promise((r) => { resolvePatch = r as (v: unknown) => void; }),
-      );
-
+      qPatchErr(402, "Payment Required");
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
+        expect(screen.getByTestId("budget-save-btn")).toBeTruthy();
      });
-
-      fireEvent.change(screen.getByTestId("budget-limit-input"), { target: { value: "50000" } });
      fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-      const btn = screen.getByTestId("budget-save-btn");
-      expect(btn.textContent).toContain("Saving");
-
-      resolvePatch!(makeBudget({ budget_limit: 50_000 }));
-      await vi.waitFor(() => {
-        expect(btn.textContent).toContain("Save");
-      });
-    });
-  });
-
-  describe("isApiError402 — regression coverage", () => {
-    it("classifies ': 402' with space as 402", async () => {
-      qGetErr(402, "Payment Required");
-      qPatch(makeBudget());
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
        expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
      });
    });
+  });

-    it("classifies non-402 error messages as regular fetch errors", async () => {
-      qGetErr(503, "Service Unavailable");
-
+  describe("legacy payload back-compat", () => {
+    it("maps a pre-multi-period {budget_limit, monthly_spend} response to the monthly row", async () => {
+      qGet({ budget_limit: 5_000, monthly_spend: 1_000, budget_remaining: 4_000 });
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-fetch-error")).toBeTruthy();
+        expect(screen.getByTestId("budget-monthly-limit")!.textContent).toBe("$50.00");
      });
-      expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
+      expect(screen.getByTestId("budget-monthly-spend")!.textContent).toBe("$10.00");
    });
  });
 });
@@ -0,0 +1,35 @@
+// @vitest-environment jsdom
+//
+// internal#718 P4 closure — ConfigTab.billingMode.test.tsx is retired.
+//
+// This suite (255 lines, 8 tests) pinned the canvas-side provider →
+// llm_billing_mode linkage from internal#703 Gap 2: when the operator
+// changed the PROVIDER in the Config tab, ConfigTab.handleSave would
+// PUT /admin/workspaces/:id/llm-billing-mode so the platform-vs-byok
+// decision tracked the dropdown.
+//
+// That linkage is retired together with the LLM_PROVIDER override flow
+// (see ConfigTab.provider.test.tsx retirement note). P2-B (#1972)
+// moved the platform-vs-byok decision to
+// `ResolveLLMBillingModeDerived(runtime, model, authEnv)` in
+// workspace-server — the canvas can no longer override it via the
+// provider dropdown, by design. The runtime+model selection IS the
+// billing-mode selection now.
+//
+// The `/admin/workspaces/:id/llm-billing-mode` endpoint still exists
+// as the operator override surface (`workspaces.llm_billing_mode`
+// column); it is no longer driven by the provider dropdown.
+// Coverage for the derived billing flow lives in
+// workspace-server/internal/handlers/llm_billing_mode_derived_test.go.
+//
+// Restore from git history if the canvas-side provider→billing linkage
+// needs to be revisited (it should not — the derived resolver is the
+// single decision point).
+
+import { describe, it } from "vitest";
+
+describe("ConfigTab — provider → llm_billing_mode linkage (retired internal#718 P4)", () => {
+  it.skip("LLM_PROVIDER → billing_mode wiring is retired; see file header for the replacement coverage", () => {
+    // intentionally empty
+  });
+});
@@ -0,0 +1,87 @@
+// @vitest-environment jsdom
+//
+// Regression: project_canvas_runtime_dropdown_ssot_fix — a google-adk
+// workspace's Config tab showed the wrong runtime ("LangGraph (default)"
+// / first option) because a hardcoded frontend allowlist
+// (SUPPORTED_RUNTIME_VALUES) dropped google-adk from the /templates-derived
+// options even though the backend served it. A Save from that state would
+// PATCH runtime to the wrong value and break the ADK agent.
+//
+// The fix: the dropdown is SSOT-driven — it trusts GET /templates (which the
+// backend already gates to the manifest maintained set) and hides a runtime
+// only when its row carries `displayable: false`. This pins: a google-adk
+// workspace shows "google-adk" selected, and a displayable:false template is
+// not offered.
+import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
+import { render, screen, cleanup, waitFor } from "@testing-library/react";
+import React from "react";
+
+afterEach(cleanup);
+
+const apiGet = vi.fn();
+const apiPatch = vi.fn();
+const apiPut = vi.fn();
+vi.mock("@/lib/api", () => ({
+  api: {
+    get: (path: string) => apiGet(path),
+    patch: (path: string, body: unknown) => apiPatch(path, body),
+    put: (path: string, body: unknown) => apiPut(path, body),
+    post: vi.fn(),
+    del: vi.fn(),
+  },
+}));
+
+vi.mock("@/store/canvas", () => ({
+  useCanvasStore: Object.assign(
+    (selector: (s: unknown) => unknown) => selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }),
+    { getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) },
+  ),
+}));
+
+vi.mock("../AgentCardSection", () => ({
+  AgentCardSection: () => <div data-testid="agent-card-stub" />,
+}));
+
+import { ConfigTab } from "../ConfigTab";
+
+function wireApi(templates: Array<{ id: string; name?: string; runtime?: string; models?: unknown[]; displayable?: boolean }>) {
+  apiGet.mockImplementation((path: string) => {
+    if (path === "/workspaces/ws-adk") return Promise.resolve({ runtime: "google-adk" });
+    if (path === "/workspaces/ws-adk/model") return Promise.resolve({ model: "vertex:gemini-2.5-pro" });
+    if (path === "/workspaces/ws-adk/files/config.yaml") return Promise.resolve({ content: "name: adk\nruntime: google-adk\n" });
+    if (path === "/templates") return Promise.resolve(templates);
+    return Promise.reject(new Error(`unmocked api.get: ${path}`));
+  });
+}
+
+beforeEach(() => {
+  apiGet.mockReset();
+  apiPatch.mockReset();
+  apiPut.mockReset();
+});
+
+describe("ConfigTab — google-adk runtime (SSOT dropdown)", () => {
+  it("shows google-adk selected in the runtime dropdown (#ssot-fix)", async () => {
+    wireApi([
+      { id: "claude-code", name: "Claude Code", runtime: "claude-code", models: [] },
+      { id: "google-adk", name: "Google ADK", runtime: "google-adk", models: [] },
+    ]);
+    render(<ConfigTab workspaceId="ws-adk" />);
+    const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
+    expect((select as HTMLSelectElement).value).toBe("google-adk");
+    const opts = Array.from((select as HTMLSelectElement).options).map((o) => o.value);
+    expect(opts).toContain("google-adk");
+  });
+
+  it("hides a template flagged displayable:false", async () => {
+    wireApi([
+      { id: "google-adk", name: "Google ADK", runtime: "google-adk", models: [] },
+      { id: "legacy", name: "Legacy", runtime: "legacy", models: [], displayable: false },
+    ]);
+    render(<ConfigTab workspaceId="ws-adk" />);
+    const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
+    const opts = Array.from((select as HTMLSelectElement).options).map((o) => o.value);
+    expect(opts).toContain("google-adk");
+    expect(opts).not.toContain("legacy");
+  });
+});
@@ -1,574 +1,45 @@
 // @vitest-environment jsdom
 //
-// Regression tests for ConfigTab Provider override (Option B PR-5).
+// internal#718 P4 closure — ConfigTab.provider.test.tsx is retired.
 //
-// What this pins: a free-text Provider combobox in the Runtime section
-// that lets the operator override the model→provider derivation hermes-
-// agent does internally. Without this UI, a fresh signup whose Hermes
-// workspace defaults to a model with no clean vendor prefix (e.g.
-// `nousresearch/hermes-4-70b`) hits the runtime's own preflight error:
-//   "No LLM provider configured. Run `hermes model` to select a
-//    provider, or run `hermes setup` for first-time configuration."
-// — even though tasks #195-198 wired the entire downstream pipe so a
-// non-empty provider WOULD flow through canvas → workspace-server →
-// CP user-data → workspace config.yaml → hermes adapter.
+// This 574-line suite exercised the canvas-side LLM provider override
+// flow: load the existing override from GET /workspaces/:id/provider,
+// edit the dropdown, Save → PUT /workspaces/:id/provider, and the
+// provider→billing_mode linkage on Save. All three server endpoints
+// behind those flows are retired in internal#718 P4 closure:
 //
-// Hongming Wang hit this on hongming.moleculesai.app at signup
-// 2026-05-01T17:35Z. Backend PRs were green, the gap was the missing
-// UI to set the value.
+//   - workspace-server SetProvider / GetProvider (PUT/GET
+//     /workspaces/:id/provider) → both return 410 Gone with a
+//     PROVIDER_ENDPOINT_RETIRED structured body.
+//   - workspace-server setProviderSecret (the writer into
+//     workspace_secrets.LLM_PROVIDER) — removed; row never written.
+//   - The LLM_PROVIDER workspace_secret itself — migrated away in
+//     20260528000000_drop_llm_provider_workspace_secret.up.sql.
 //
-// Each test pins one invariant. If any fails, the bug is back.
+// ConfigTab still renders the provider dropdown for display (the user
+// can preview the derived provider locally), but Save no longer
+// round-trips the value. The replacement contract is that the provider
+// is DERIVED at every decision point from (runtime, model) via the
+// registry — see internal/providers/derive_provider.go.
+//
+// The original suite's coverage is replaced by:
+//
+//   - workspace-server: TestPutProvider_410Gone +
+//     TestGetProvider_410Gone + TestProviderEndpointGone_BodyShape in
+//     internal/handlers/llm_provider_removal_p4_test.go.
+//   - workspace-server: TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL
+//     in internal/handlers/workspace_provision_shared_test.go.
+//   - registry: TestDeriveProvider_RealManifest in
+//     internal/providers/derive_provider_test.go.
+//
+// Restore from git history if any aspect of the legacy LLM_PROVIDER
+// flow needs to be revisited (it should not — the retirement is
+// permanent).

-import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
-import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
-import React from "react";
+import { describe, it } from "vitest";

-afterEach(cleanup);
-
-const apiGet = vi.fn();
-const apiPatch = vi.fn();
-const apiPut = vi.fn();
-vi.mock("@/lib/api", () => ({
-  api: {
-    get: (path: string) => apiGet(path),
-    patch: (path: string, body: unknown) => apiPatch(path, body),
-    put: (path: string, body: unknown) => apiPut(path, body),
-    post: vi.fn(),
-    del: vi.fn(),
-  },
-}));
-
-// Shared store stub — `updateNodeData` is exposed so a test can assert the
-// node-data flush happens after a successful PATCH (regression: previously
-// the DB updated but the canvas badge stayed stale until full hydrate).
-const storeUpdateNodeData = vi.fn();
-const storeRestartWorkspace = vi.fn();
-vi.mock("@/store/canvas", () => ({
-  useCanvasStore: Object.assign(
-    (selector: (s: unknown) => unknown) => selector({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }),
-    { getState: () => ({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }) },
-  ),
-}));
-
-vi.mock("../AgentCardSection", () => ({
-  AgentCardSection: () => <div data-testid="agent-card-stub" />,
-}));
-
-import { ConfigTab } from "../ConfigTab";
-
-// wireApi — same shape as ConfigTab.hermes.test.tsx, extended with the
-// /provider endpoint. Each test sets `providerValue` to the value the
-// GET endpoint returns; "missing" means the endpoint rejects (older
-// workspace-server pre-PR-2 — must not crash the tab).
-function wireApi(opts: {
-  workspaceRuntime?: string;
-  workspaceModel?: string;
-  configYamlContent?: string | null;
-  templates?: Array<{ id: string; name?: string; runtime?: string; models?: unknown[]; providers?: string[] }>;
-  providerValue?: string | "missing";
-}) {
-  apiGet.mockImplementation((path: string) => {
-    if (path === `/workspaces/ws-test`) {
-      return Promise.resolve({ runtime: opts.workspaceRuntime ?? "" });
-    }
-    if (path === `/workspaces/ws-test/model`) {
-      return Promise.resolve({ model: opts.workspaceModel ?? "" });
-    }
-    if (path === `/workspaces/ws-test/provider`) {
-      if (opts.providerValue === "missing") {
-        return Promise.reject(new Error("404"));
-      }
-      return Promise.resolve({ provider: opts.providerValue ?? "", source: opts.providerValue ? "workspace_secrets" : "default" });
-    }
-    if (path === `/workspaces/ws-test/files/config.yaml`) {
-      if (opts.configYamlContent === null) return Promise.reject(new Error("not found"));
-      return Promise.resolve({ content: opts.configYamlContent ?? "" });
-    }
-    if (path === "/templates") {
-      return Promise.resolve(opts.templates ?? []);
-    }
-    return Promise.reject(new Error(`unmocked api.get: ${path}`));
-  });
-}
-
-beforeEach(() => {
-  apiGet.mockReset();
-  apiPatch.mockReset();
-  apiPut.mockReset();
-  storeUpdateNodeData.mockReset();
-  storeRestartWorkspace.mockReset();
-});
-
-describe("ConfigTab — Provider override (Option B PR-5)", () => {
-  // Empty provider on load is the legitimate default ("auto-derive
-  // from model slug prefix"), NOT an error. The endpoint returning
-  // {provider: "", source: "default"} is the documented happy-path
-  // shape — if the form treated that as "load failed" we'd lose the
-  // ability to render the input at all on fresh workspaces.
-  it("renders an empty Provider input when no override is set", async () => {
-    wireApi({
-      workspaceRuntime: "hermes",
-      workspaceModel: "nousresearch/hermes-4-70b",
-      configYamlContent: "name: ws\nruntime: hermes\n",
-      providerValue: "",
-    });
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    const input = await screen.findByTestId("provider-input");
-    expect((input as HTMLInputElement).value).toBe("");
-  });
-
-  // Pre-existing override loads back into the field on mount. Without
-  // this, an operator who set provider=openrouter yesterday would see
-  // the field blank today, conclude the value didn't stick, and
-  // re-save — the resulting PUT-with-same-value would auto-restart
-  // the workspace for nothing.
-  it("loads an existing provider override from the server", async () => {
-    wireApi({
-      workspaceRuntime: "hermes",
-      workspaceModel: "nousresearch/hermes-4-70b",
-      configYamlContent: "name: ws\nruntime: hermes\n",
-      providerValue: "openrouter",
-    });
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    const input = await screen.findByTestId("provider-input");
-    await waitFor(() => expect((input as HTMLInputElement).value).toBe("openrouter"));
-  });
-
-  // Old workspace-server (pre-PR-2) returns a 404 on /provider. The
-  // tab must keep loading — the fallback is "" (auto-derive), same as
-  // a fresh workspace.
-  it("falls back to empty provider when the endpoint is missing", async () => {
-    wireApi({
-      workspaceRuntime: "hermes",
-      workspaceModel: "nousresearch/hermes-4-70b",
-      configYamlContent: "name: ws\nruntime: hermes\n",
-      providerValue: "missing",
-    });
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    const input = await screen.findByTestId("provider-input");
-    expect((input as HTMLInputElement).value).toBe("");
-    // Tab should be fully rendered, not stuck in loading or error state.
-    expect(screen.queryByText(/Loading config/i)).toBeNull();
-  });
-
-  // Setting a value + Save must PUT to the right endpoint with the
-  // right body shape. Server-side handler (workspace-server
-  // handlers/secrets.go:SetProvider) reads body.provider — any other
-  // key gets silently ignored and the workspace_secrets row stays
-  // unset. This regression would manifest as "Save → Restart →
-  // workspace still says No LLM provider configured."
-  it("PUTs the new provider to /workspaces/:id/provider on Save", async () => {
-    wireApi({
-      workspaceRuntime: "hermes",
-      workspaceModel: "nousresearch/hermes-4-70b",
-      configYamlContent: "name: ws\nruntime: hermes\n",
-      providerValue: "",
-    });
-    apiPut.mockResolvedValue({ status: "saved", provider: "anthropic" });
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    const input = await screen.findByTestId("provider-input");
-
-    fireEvent.change(input, { target: { value: "anthropic" } });
-    expect((input as HTMLInputElement).value).toBe("anthropic");
-
-    const saveBtn = screen.getByRole("button", { name: /^save$/i });
-    fireEvent.click(saveBtn);
-
-    await waitFor(() => {
-      const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
-      expect(providerCalls.length).toBe(1);
-      expect(providerCalls[0][1]).toEqual({ provider: "anthropic" });
-    });
-  });
-
-  // No-change Save must NOT PUT /provider. The server-side SetProvider
-  // auto-restarts the workspace on every successful PUT — re-writing
-  // an unchanged value would cost the user a ~30s reboot every time
-  // they tweak some other field.
-  it("does not PUT /provider when the value is unchanged", async () => {
-    wireApi({
-      workspaceRuntime: "hermes",
-      workspaceModel: "nousresearch/hermes-4-70b",
-      configYamlContent: "name: ws\nruntime: hermes\ntier: 2\n",
-      providerValue: "openrouter",
-    });
-    apiPut.mockResolvedValue({});
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    await screen.findByTestId("provider-input");
-
-    // Click Save without touching the provider field. Trigger another
-    // dirty-marker (tier change) so Save is enabled — the test is
-    // about NOT touching /provider, not about Save being disabled.
-    const tierSelect = screen.getByLabelText(/tier/i) as HTMLSelectElement;
-    fireEvent.change(tierSelect, { target: { value: "3" } });
-
-    const saveBtn = screen.getByRole("button", { name: /^save$/i });
-    fireEvent.click(saveBtn);
-
-    await waitFor(() => {
-      // Some PUT(s) may fire (e.g. /model). Just assert /provider is NOT among them.
-      const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
-      expect(providerCalls.length).toBe(0);
-    });
-  });
-
-  // The dropdown's suggestion list MUST come from the runtime's own
-  // template (via /templates → runtime_config.providers), not a
-  // hardcoded canvas-side enum. This is the "Native + pluggable
-  // runtime" invariant: a new runtime declaring its own provider
-  // taxonomy in its config.yaml gets a working dropdown without ANY
-  // canvas-side change.
-  //
-  // Pinned by checking that suggestions surfaced in the datalist
-  // exactly mirror what the templates endpoint returned for the
-  // matching runtime. If a future contributor reintroduces a
-  // PROVIDER_SUGGESTIONS-style hardcoded list and the datalist
-  // contents don't follow the template, this test fails.
-  it("populates the provider datalist from the matched runtime's templates entry", async () => {
-    wireApi({
-      workspaceRuntime: "hermes",
-      workspaceModel: "nousresearch/hermes-4-70b",
-      configYamlContent: "name: ws\nruntime: hermes\n",
-      providerValue: "",
-      templates: [
-        {
-          id: "hermes",
-          name: "Hermes",
-          runtime: "hermes",
-          models: [],
-          // The provider list every runtime adapter ships in its own
-          // config.yaml. Canvas must surface THIS, not its own list.
-          providers: ["nous", "openrouter", "anthropic", "minimax-cn"],
-        },
-      ],
-    });
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    const input = await screen.findByTestId("provider-input");
-    const listId = (input as HTMLInputElement).getAttribute("list");
-    expect(listId).toBeTruthy();
-    await waitFor(() => {
-      const datalist = document.getElementById(listId!);
-      expect(datalist).not.toBeNull();
-      const optionValues = Array.from(datalist!.querySelectorAll("option")).map(
-        (o) => (o as HTMLOptionElement).value,
-      );
-      // Order matters — most-common-first is part of the contract so
-      // the demo flow lands on a working choice without scrolling.
-      expect(optionValues).toEqual(["nous", "openrouter", "anthropic", "minimax-cn"]);
-    });
-  });
-
-  // Fallback path: when a template hasn't migrated to the explicit
-  // `providers:` field yet, suggestions are derived from model slug
-  // prefixes. Still adapter-driven (the slugs come from the template's
-  // `models:` list), just inferred. This keeps existing templates
-  // working while the platform team migrates them one at a time.
-  it("renders vendor-grouped provider dropdown when template ships models", async () => {
-    wireApi({
-      workspaceRuntime: "hermes",
-      workspaceModel: "anthropic/claude-opus-4-7",
-      configYamlContent: "name: ws\nruntime: hermes\n",
-      providerValue: "",
-      templates: [
-        {
-          id: "hermes",
-          name: "Hermes",
-          runtime: "hermes",
-          models: [
-            { id: "anthropic/claude-opus-4-7", required_env: ["ANTHROPIC_API_KEY"] },
-            { id: "openai/gpt-4o", required_env: ["OPENROUTER_API_KEY"] },
-            { id: "anthropic/claude-sonnet-4-5", required_env: ["ANTHROPIC_API_KEY"] }, // dup vendor — must dedupe
-            { id: "nousresearch/hermes-4-70b", required_env: ["HERMES_API_KEY"] },
-          ],
-          // No `providers:` field → ProviderModelSelector derives vendors
-          // from model id prefixes via its own buildProviderCatalog.
-        },
-      ],
-    });
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    // With models present, the new vendor-aware dropdown renders.
-    // Provider entries dedupe by vendor → 3 unique vendors here
-    // (anthropic, openai, nousresearch).
-    const select = await screen.findByTestId("provider-select") as HTMLSelectElement;
-    await waitFor(() => {
-      const optionTexts = Array.from(select.options)
-        .map((o) => o.text)
-        .filter((t) => !t.startsWith("—")); // strip placeholder
-      // Labels are vendor display names, but vendor identity is what
-      // matters for dedupe. Assert each expected vendor surfaces once.
-      expect(optionTexts.some((t) => t.startsWith("Anthropic API"))).toBe(true);
-      expect(optionTexts.some((t) => t.startsWith("OpenAI"))).toBe(true);
-      expect(optionTexts.some((t) => t.startsWith("Nous Research"))).toBe(true);
-      expect(optionTexts.length).toBe(3); // dedupe pin
-    });
-  });
-
-  // Empty string is a legitimate save target — it clears the override
-  // (the server-side endpoint deletes the workspace_secrets row).
-  // Operators who picked "anthropic" yesterday and want to revert to
-  // auto-derive today should be able to do so by clearing the field
-  // and clicking Save. Without this PUT path, the only way to clear
-  // would be a direct DB edit.
-  it("PUTs an empty string when the operator clears a previously-set provider", async () => {
-    wireApi({
-      workspaceRuntime: "hermes",
-      workspaceModel: "anthropic:claude-opus-4-7",
-      configYamlContent: "name: ws\nruntime: hermes\n",
-      providerValue: "openrouter",
-    });
-    apiPut.mockResolvedValue({ status: "cleared" });
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    const input = await screen.findByTestId("provider-input");
-    await waitFor(() => expect((input as HTMLInputElement).value).toBe("openrouter"));
-
-    fireEvent.change(input, { target: { value: "" } });
-
-    const saveBtn = screen.getByRole("button", { name: /^save$/i });
-    fireEvent.click(saveBtn);
-
-    await waitFor(() => {
-      const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
-      expect(providerCalls.length).toBe(1);
-      expect(providerCalls[0][1]).toEqual({ provider: "" });
-    });
-  });
-
-  // Display-vs-storage drift regression (2026-05-03 incident, workspace
-  // e13aebd8…). User deployed claude-code with MiniMax-M2 stored in
-  // MODEL_PROVIDER. The container env (MODEL=MiniMax-M2) and chat
-  // worked correctly, but the Config tab showed "Claude Code
-  // subscription / Claude Sonnet (OAuth)" — i.e. the template's
-  // runtime_config.model: sonnet default — because currentModelId
-  // reads runtime_config.model first and loadConfig was overriding
-  // only the top-level config.model field. The merged shape was:
-  //   { model: "MiniMax-M2", runtime_config: { model: "sonnet" } }
-  // and currentModelId picked "sonnet". Fix: loadConfig propagates
-  // wsMetadataModel into BOTH places so the form is a single source
-  // of truth (DB-backed MODEL_PROVIDER). Pinning the merged-path
-  // branch with the exact reproducing shape: claude-code template
-  // YAML has runtime_config.model: sonnet; live workspace's
-  // MODEL_PROVIDER is MiniMax-M2; tab must show the latter.
-  it("prefers MODEL_PROVIDER over the template's runtime_config.model on load", async () => {
-    wireApi({
-      workspaceRuntime: "claude-code",
-      workspaceModel: "MiniMax-M2",
-      configYamlContent: "name: ws\nruntime: claude-code\nruntime_config:\n  model: sonnet\n",
-      providerValue: "",
-      templates: [
-        {
-          id: "claude-code-default",
-          name: "Claude Code",
-          runtime: "claude-code",
-          models: [
-            { id: "sonnet", name: "Claude Sonnet (OAuth)", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
-            { id: "MiniMax-M2", name: "MiniMax M2", required_env: ["MINIMAX_API_KEY"] },
-            { id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
-          ],
-        },
-      ],
-    });
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    const modelSelect = (await screen.findByTestId("model-select")) as HTMLSelectElement;
-    await waitFor(() => expect(modelSelect.value).toBe("MiniMax-M2"));
-
-    // Provider dropdown should also reflect MiniMax (back-derived from
-    // the model slug since LLM_PROVIDER is unset). Without the fix,
-    // the selector falls back to the first catalog entry whose first
-    // model matches "sonnet" → anthropic-oauth bucket → "Claude Code
-    // subscription".
-    const providerSelect = screen.getByTestId("provider-select") as HTMLSelectElement;
-    const selectedOption = providerSelect.options[providerSelect.selectedIndex];
-    expect(selectedOption.textContent ?? "").toMatch(/MiniMax/);
-  });
-
-  // Sibling pin to the display-fix above. The display fix mirrors
-  // wsMetadataModel into runtime_config.model so the selector renders
-  // the live value; that mirror means handleSave's old YAML-vs-form
-  // diff would always be non-zero on a no-op save (YAML default
-  // "sonnet" vs. mirrored "MiniMax-M2") and PUT /model — which
-  // server-side SetModel chains into an auto-restart. handleSave now
-  // diffs against the loaded MODEL_PROVIDER instead. Pin: an
-  // unrelated edit (tier change) must NOT touch /model when the
-  // model itself didn't change.
-  it("does not PUT /model on a no-op save when only an unrelated field changed", async () => {
-    wireApi({
-      workspaceRuntime: "claude-code",
-      workspaceModel: "MiniMax-M2",
-      configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n  model: sonnet\n",
-      providerValue: "",
-      templates: [
-        {
-          id: "claude-code-default",
-          name: "Claude Code",
-          runtime: "claude-code",
-          models: [
-            { id: "sonnet", name: "Claude Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
-            { id: "MiniMax-M2", name: "MiniMax M2", required_env: ["MINIMAX_API_KEY"] },
-          ],
-        },
-      ],
-    });
-    apiPut.mockResolvedValue({});
-    apiPatch.mockResolvedValue({});
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
-    fireEvent.change(tierSelect, { target: { value: "3" } });
-
-    const saveBtn = screen.getByRole("button", { name: /^save$/i });
-    fireEvent.click(saveBtn);
-
-    await waitFor(() => {
-      const tierPatches = apiPatch.mock.calls.filter(([path, body]) =>
-        path === "/workspaces/ws-test" && (body as { tier?: number }).tier === 3,
-      );
-      expect(tierPatches.length).toBe(1);
-    });
-    // Spurious /model PUT would fire here without the originalModel
-    // diff baseline. The model itself didn't change, so /model must
-    // stay untouched (otherwise SetModel auto-restarts).
-    const modelPuts = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/model");
-    expect(modelPuts.length).toBe(0);
-  });
-
-  // Save-then-stale-badge regression (2026-05-03 incident). User
-  // selected T3 in the Tier dropdown, hit Save & Restart, the workspace
-  // PATCH succeeded (`tier: 3` in DB), but the canvas header pill kept
-  // showing "TIER T2" until a full hydrate. Root cause: handleSave
-  // sent the PATCH to workspace-server but never pushed the same
-  // change into useCanvasStore.updateNodeData, so every UI surface
-  // reading from the store kept its stale value. Pin: a successful
-  // tier PATCH must mirror into the store so the badge updates
-  // synchronously with the response.
-  it("flushes the dbPatch into useCanvasStore.updateNodeData after a successful PATCH", async () => {
-    wireApi({
-      workspaceRuntime: "claude-code",
-      workspaceModel: "MiniMax-M2",
-      configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n  model: sonnet\n",
-      providerValue: "",
-      templates: [
-        {
-          id: "claude-code-default",
-          name: "Claude Code",
-          runtime: "claude-code",
-          models: [{ id: "sonnet", name: "Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] }],
-        },
-      ],
-    });
-    apiPatch.mockResolvedValue({ status: "updated" });
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
-    fireEvent.change(tierSelect, { target: { value: "3" } });
-
-    const saveBtn = screen.getByRole("button", { name: /^save$/i });
-    fireEvent.click(saveBtn);
-
-    await waitFor(() => {
-      expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
-    });
-    // Without the store flush, the badge would keep reading tier=2
-    // from useCanvasStore.nodes until a full hydrate. Pin: handleSave
-    // pushes the same fields it PATCHed.
-    expect(storeUpdateNodeData).toHaveBeenCalledWith(
-      "ws-test",
-      expect.objectContaining({ tier: 3 }),
-    );
-  });
-
-  // Failure-gating sibling pin to the store-flush test above. The
-  // production code places `updateNodeData` AFTER `await api.patch(...)`
-  // inside the same `if (Object.keys(dbPatch).length > 0)` block, so a
-  // PATCH rejection should throw before the store call. Without this
-  // pin, a future refactor that wraps the PATCH in try/catch and
-  // unconditionally calls updateNodeData would ship green — and then
-  // the badge would lie when the server actually rejected the change.
-  // Codified review feedback from PR #2545 (Agent 2).
-  it("does NOT flush into useCanvasStore.updateNodeData when the PATCH rejects", async () => {
-    wireApi({
-      workspaceRuntime: "claude-code",
-      workspaceModel: "MiniMax-M2",
-      configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n  model: sonnet\n",
-      providerValue: "",
-      templates: [
-        {
-          id: "claude-code-default",
-          name: "Claude Code",
-          runtime: "claude-code",
-          models: [{ id: "sonnet", name: "Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] }],
-        },
-      ],
-    });
-    apiPatch.mockRejectedValue(new Error("500 from workspace-server"));
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
-    fireEvent.change(tierSelect, { target: { value: "3" } });
-
-    const saveBtn = screen.getByRole("button", { name: /^save$/i });
-    fireEvent.click(saveBtn);
-
-    // Wait for handleSave to settle (succeeds-or-fails). PATCH must
-    // have been attempted; the error swallow inside handleSave keeps
-    // saving=false in finally.
-    await waitFor(() => {
-      expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
-    });
-    // Critically: the store must NOT have been told about the failed
-    // change. Otherwise the badge would lie about a write the server
-    // rejected.
-    const tierFlushes = storeUpdateNodeData.mock.calls.filter(([, body]) =>
-      typeof (body as { tier?: number }).tier === "number",
-    );
-    expect(tierFlushes.length).toBe(0);
-  });
-
-  // Pin the hermes/pre-#240 edge case: workspace where MODEL_PROVIDER
-  // was never written but YAML has runtime_config.model: "something".
-  // originalModel must reflect the rendered baseline (the YAML value),
-  // not the empty MODEL_PROVIDER, so an unrelated save (tier change)
-  // doesn't fire a /model PUT and trigger an auto-restart. Codified
-  // review feedback from PR #2545 (Agent 1, "Important").
-  it("does not PUT /model when MODEL_PROVIDER is empty and the user only edited an unrelated field", async () => {
-    wireApi({
-      workspaceRuntime: "hermes",
-      workspaceModel: "", // legacy workspace — never went through the picker
-      configYamlContent:
-        "name: ws\nruntime: hermes\ntier: 2\nruntime_config:\n  model: nousresearch/hermes-4-70b\n",
-      providerValue: "",
-      templates: [
-        {
-          id: "hermes",
-          name: "Hermes",
-          runtime: "hermes",
-          models: [{ id: "nousresearch/hermes-4-70b", name: "Hermes 4 70B", required_env: ["HERMES_API_KEY"] }],
-          providers: ["nous"],
-        },
-      ],
-    });
-    apiPut.mockResolvedValue({});
-    apiPatch.mockResolvedValue({});
-
-    render(<ConfigTab workspaceId="ws-test" />);
-    const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
-    fireEvent.change(tierSelect, { target: { value: "3" } });
-
-    const saveBtn = screen.getByRole("button", { name: /^save$/i });
-    fireEvent.click(saveBtn);
-
-    await waitFor(() => {
-      expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
-    });
-    const modelPuts = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/model");
-    expect(modelPuts.length).toBe(0);
+describe("ConfigTab provider override — retired (internal#718 P4)", () => {
+  it.skip("LLM_PROVIDER override flow is retired; see file header for the replacement coverage", () => {
+    // intentionally empty
  });
 });
@@ -0,0 +1,78 @@
+// @vitest-environment jsdom
+//
+// internal#718 P3 (retire-list #5) — the billing-mode the Config tab shows /
+// sends must reflect the DERIVED provider per the registry, not the hardcoded
+// billingModeForProvider("" | "platform" → platform_managed else byok) rule.
+// When the runtime is registry-backed, billingModeForSelectedProvider reads the
+// registry-served billing_mode off the provider catalog entry. The hardcoded
+// rule remains only as the fallback for non-registry runtimes / older backends.
+
+import { describe, it, expect } from "vitest";
+import { billingModeForSelectedProvider, billingModeForProvider } from "../ConfigTab";
+import {
+  buildProviderCatalogFromRegistry,
+  type RegistryProvider,
+  type RegistryModel,
+} from "../../ProviderModelSelector";
+
+const REGISTRY_PROVIDERS: RegistryProvider[] = [
+  { name: "anthropic-oauth", display_name: "Claude Code subscription", auth_env: ["CLAUDE_CODE_OAUTH_TOKEN"], billing_mode: "byok" },
+  { name: "platform", display_name: "Platform", auth_env: ["ANTHROPIC_API_KEY"], billing_mode: "platform_managed" },
+  // DISCRIMINATING fixture (review #7790): a provider whose registry-served
+  // billing_mode DISAGREES with the hardcoded name-based rule. Its name is not
+  // "platform"/"" so billingModeForProvider() would call it "byok", yet the
+  // registry serves "platform_managed" (the federation-ready shape the SSOT is
+  // built for — a managed provider that isn't literally named "platform").
+  // billingModeForSelectedProvider MUST return the REGISTRY value here; the
+  // only way to get "platform_managed" out is to honor the catalog, so this
+  // case fails if the impl ever regresses to the hardcoded rule.
+  { name: "managed-federated", display_name: "Managed (federated)", auth_env: [], billing_mode: "platform_managed" },
+];
+const REGISTRY_MODELS: RegistryModel[] = [
+  { id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
+  { id: "anthropic/claude-opus-4-7", provider: "platform", billing_mode: "platform_managed" },
+  // model bucketed under the disagreeing provider so the catalog builds an
+  // entry for it (buildProviderCatalogFromRegistry only emits a provider entry
+  // for providers that own at least one model).
+  { id: "managed/some-model", provider: "managed-federated", billing_mode: "platform_managed" },
+];
+
+describe("billingModeForSelectedProvider (registry-driven)", () => {
+  const catalog = buildProviderCatalogFromRegistry(REGISTRY_PROVIDERS, REGISTRY_MODELS);
+
+  it("reads platform_managed from the registry for the platform provider", () => {
+    expect(billingModeForSelectedProvider("platform", catalog)).toBe("platform_managed");
+  });
+
+  it("reads byok from the registry for a BYOK provider", () => {
+    // anthropic-oauth derives to byok via the REGISTRY. (Note: the hardcoded
+    // rule would ALSO say byok for this non-'platform' name, so on its own this
+    // assertion does NOT prove the registry is authoritative — it agrees either
+    // way. The registry-WINS proof is the disagreement case below.)
+    expect(billingModeForSelectedProvider("anthropic-oauth", catalog)).toBe("byok");
+  });
+
+  it("lets the registry billing_mode WIN when it disagrees with the hardcoded rule", () => {
+    // 'managed-federated' is not '' / 'platform', so the legacy name-based rule
+    // classifies it byok — but the registry serves platform_managed. The
+    // registry is the SSOT, so billingModeForSelectedProvider must return
+    // platform_managed. This is the discriminating case: it FAILS if the impl
+    // regresses to billingModeForProvider (which would return byok here).
+    expect(billingModeForProvider("managed-federated")).toBe("byok"); // sanity: the rules genuinely disagree
+    expect(billingModeForSelectedProvider("managed-federated", catalog)).toBe("platform_managed");
+  });
+
+  it("falls back to the hardcoded rule when no registry catalog is supplied", () => {
+    // Non-registry runtime / older backend → catalog empty/undefined → the
+    // legacy mapping still applies ('' | 'platform' → platform_managed).
+    expect(billingModeForSelectedProvider("", undefined)).toBe("platform_managed");
+    expect(billingModeForSelectedProvider("platform", undefined)).toBe("platform_managed");
+    expect(billingModeForSelectedProvider("minimax", undefined)).toBe("byok");
+  });
+
+  it("falls back to the hardcoded rule when the provider is not in the registry catalog", () => {
+    // A provider string the registry catalog doesn't carry (stale saved
+    // value) → fall back to the legacy rule rather than guessing.
+    expect(billingModeForSelectedProvider("some-byo-vendor", catalog)).toBe("byok");
+  });
+});
@@ -297,6 +297,25 @@ describe("DetailsTab — delete workflow", () => {
    expect(mockSelectNode).toHaveBeenCalledWith(null);
  });

+  // internal#734: checking "also erase saved data" adds &erase_data=true so the
+  // server prunes the data volume. Default (unchecked) must NOT send it.
+  it("checking erase-saved-data sends erase_data=true on delete", async () => {
+    mockApi.del.mockResolvedValue(undefined);
+    render(<DetailsTab workspaceId="ws-1" data={data()} />);
+    await flush();
+    fireEvent.click(screen.getByRole("button", { name: /delete workspace/i }));
+    await flush();
+    fireEvent.click(screen.getByRole("checkbox", { name: /erase saved data/i }));
+    const confirmBtn = Array.from(document.querySelectorAll("button")).find(
+      (b) => b.textContent === "Confirm Delete",
+    ) as HTMLButtonElement;
+    fireEvent(confirmBtn, new MouseEvent("click", { bubbles: true }));
+    await flush();
+    expect(mockApi.del).toHaveBeenCalledWith("/workspaces/ws-1?confirm=true&erase_data=true", {
+      headers: { "X-Confirm-Name": "Test Workspace" },
+    });
+  });
+
  it("cancelling delete returns to view mode", async () => {
    mockApi.del.mockResolvedValue(undefined);
    render(<DetailsTab workspaceId="ws-1" data={data()} />);
@@ -0,0 +1,176 @@
+// @vitest-environment jsdom
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import {
+  render,
+  screen,
+  waitFor,
+  cleanup,
+  fireEvent,
+} from "@testing-library/react";
+import { LLMBillingSection } from "../llm-billing-section";
+
+// Tests for LLMBillingSection (internal#691). Locks in:
+//  - the section renders the resolved mode + source label
+//  - the dropdown maps "inherit" → PUT {mode: null}
+//  - the dropdown maps "byok" → PUT {mode: "byok"}
+//  - a garbled override surfaces the warning banner
+//  - the post-write resolution updates the UI without a refetch
+
+const apiGet = vi.fn();
+const apiPut = vi.fn();
+
+vi.mock("@/lib/api", () => ({
+  api: {
+    get: (...args: unknown[]) => apiGet(...args),
+    put: (...args: unknown[]) => apiPut(...args),
+    post: vi.fn().mockResolvedValue({}),
+    del: vi.fn().mockResolvedValue({}),
+    patch: vi.fn().mockResolvedValue({}),
+  },
+}));
+
+// Collapsed-by-default Section wrapper would hide the content; replace
+// it with a passthrough so the dropdown is reachable in the test DOM.
+vi.mock("../form-inputs", async () => {
+  const actual = await vi.importActual<typeof import("../form-inputs")>(
+    "../form-inputs",
+  );
+  return {
+    ...actual,
+    Section: ({ children }: { children: React.ReactNode }) => (
+      <div>{children}</div>
+    ),
+  };
+});
+
+beforeEach(() => {
+  vi.clearAllMocks();
+});
+
+afterEach(() => {
+  cleanup();
+});
+
+describe("LLMBillingSection — internal#691", () => {
+  it("renders the resolved mode + source for an inherited workspace", async () => {
+    apiGet.mockResolvedValueOnce({
+      workspace_id: "ws-1",
+      resolved_mode: "platform_managed",
+      workspace_override: null,
+      org_default: "platform_managed",
+      source: "org_default",
+    });
+
+    render(<LLMBillingSection workspaceId="ws-1" />);
+
+    await waitFor(() => {
+      expect(apiGet).toHaveBeenCalledWith(
+        "/admin/workspaces/ws-1/llm-billing-mode",
+      );
+    });
+    // Resolved mode appears.
+    expect(screen.getByText(/Resolved mode:/i).textContent).toMatch(/platform_managed/);
+    // Source label appears.
+    expect(
+      screen.getByText(/inherited from org default/i),
+    ).toBeTruthy();
+  });
+
+  it('PUTs {mode: "byok"} when user picks BYOK and reflects the new resolution', async () => {
+    apiGet.mockResolvedValueOnce({
+      workspace_id: "ws-2",
+      resolved_mode: "platform_managed",
+      workspace_override: null,
+      org_default: "platform_managed",
+      source: "org_default",
+    });
+    apiPut.mockResolvedValueOnce({
+      workspace_id: "ws-2",
+      resolved_mode: "byok",
+      workspace_override: "byok",
+      org_default: "platform_managed",
+      source: "workspace_override",
+    });
+
+    render(<LLMBillingSection workspaceId="ws-2" />);
+    await waitFor(() => expect(apiGet).toHaveBeenCalled());
+
+    const select = (await screen.findByLabelText(
+      /llm billing mode override/i,
+    )) as HTMLSelectElement;
+    fireEvent.change(select, { target: { value: "byok" } });
+
+    await waitFor(() => {
+      expect(apiPut).toHaveBeenCalledWith(
+        "/admin/workspaces/ws-2/llm-billing-mode",
+        { mode: "byok" },
+      );
+    });
+    // Post-write resolution propagated to UI.
+    await waitFor(() => {
+      expect(
+        screen.getByText(/explicit override on this workspace/i),
+      ).toBeTruthy();
+    });
+  });
+
+  it("PUTs {mode: null} when user picks Inherit (clears the override)", async () => {
+    apiGet.mockResolvedValueOnce({
+      workspace_id: "ws-3",
+      resolved_mode: "byok",
+      workspace_override: "byok",
+      org_default: "platform_managed",
+      source: "workspace_override",
+    });
+    apiPut.mockResolvedValueOnce({
+      workspace_id: "ws-3",
+      resolved_mode: "platform_managed",
+      workspace_override: null,
+      org_default: "platform_managed",
+      source: "org_default",
+    });
+
+    render(<LLMBillingSection workspaceId="ws-3" />);
+    await waitFor(() => expect(apiGet).toHaveBeenCalled());
+
+    const select = (await screen.findByLabelText(
+      /llm billing mode override/i,
+    )) as HTMLSelectElement;
+    fireEvent.change(select, { target: { value: "inherit" } });
+
+    await waitFor(() => {
+      expect(apiPut).toHaveBeenCalledWith(
+        "/admin/workspaces/ws-3/llm-billing-mode",
+        { mode: null },
+      );
+    });
+  });
+
+  it("surfaces a warning banner when the override value is garbled", async () => {
+    apiGet.mockResolvedValueOnce({
+      workspace_id: "ws-4",
+      resolved_mode: "platform_managed", // resolver fell through, default-closed
+      workspace_override: "byokk", // typo persisted somehow
+      org_default: "platform_managed",
+      source: "org_default",
+    });
+
+    render(<LLMBillingSection workspaceId="ws-4" />);
+
+    await waitFor(() => {
+      expect(
+        screen.getByText(/non-standard value/i),
+      ).toBeTruthy();
+    });
+  });
+
+  it("renders an error banner when the GET fails", async () => {
+    apiGet.mockRejectedValueOnce(new Error("network down"));
+
+    render(<LLMBillingSection workspaceId="ws-5" />);
+
+    await waitFor(() => {
+      expect(screen.getByText(/network down/i)).toBeTruthy();
+    });
+  });
+});
@@ -1,3 +1,4 @@
 export { type ConfigData, DEFAULT_CONFIG, TextInput, NumberInput, Toggle, TagList, Section } from "./form-inputs";
 export { parseYaml, toYaml } from "./yaml-utils";
 export { SecretsSection } from "./secrets-section";
+export { LLMBillingSection } from "./llm-billing-section";
@@ -0,0 +1,219 @@
+"use client";
+
+// llm-billing-section.tsx — Config-tab section for the per-workspace
+// llm_billing_mode override (internal#691).
+//
+// Surfaces:
+//   - The currently RESOLVED mode for this workspace (the mode the
+//     workspace-server's strip gate will use at next provision).
+//   - The org-level default (so the user sees what they're inheriting).
+//   - A dropdown to set / clear the workspace-level override.
+//   - A "source" line so operators can answer "is this inherited or
+//     explicit?" without DB archeology (RFC Observability hot-spot).
+//
+// Hits:
+//   GET /admin/workspaces/:id/llm-billing-mode   — read resolution
+//   PUT /admin/workspaces/:id/llm-billing-mode   — write {mode: "..."|null}
+//
+// Both routes are on the per-tenant workspace-server (same origin as the
+// other canvas /admin calls). CP's proxy at /cp/admin/workspaces/:id/
+// llm-billing-mode exists for ops use; the canvas uses the per-tenant
+// path directly to keep the round-trip cheap.
+
+import { useState, useEffect, useCallback } from "react";
+import { api } from "@/lib/api";
+import { Section } from "./form-inputs";
+
+// Mirrors workspace-server/internal/handlers/llm_billing_mode.go::BillingModeResolution.
+// Kept as a literal shape (not imported) because canvas has no Go-type bridge.
+export interface BillingModeResolution {
+  workspace_id: string;
+  resolved_mode: "platform_managed" | "byok" | "disabled";
+  // Pointer-typed on the Go side: nil = inherit, non-nil = the raw
+  // workspace-level override (even if garbled and falling through).
+  workspace_override: string | null;
+  org_default: "platform_managed" | "byok" | "disabled";
+  source: "workspace_override" | "org_default" | "constant_fallback";
+}
+
+// The dropdown emits one of these values. "inherit" is the UX-only label
+// that maps to a `null` body in the PUT request.
+type DropdownChoice = "inherit" | "platform_managed" | "byok" | "disabled";
+
+interface Props {
+  workspaceId: string;
+}
+
+const MODE_LABELS: Record<DropdownChoice, string> = {
+  inherit: "Inherit from org default",
+  platform_managed: "Platform-managed (uses Molecule credits)",
+  byok: "BYOK (your own OAuth / vendor keys)",
+  disabled: "Disabled (no LLM access)",
+};
+
+const MODE_DESCRIPTIONS: Record<DropdownChoice, string> = {
+  inherit:
+    "Use whichever mode is set at the organization level. Recommended unless this specific workspace needs a different billing source.",
+  platform_managed:
+    "Strip CLAUDE_CODE_OAUTH_TOKEN and vendor API keys from the workspace; route all LLM traffic through Molecule's proxy and bill your org credits.",
+  byok:
+    "Keep CLAUDE_CODE_OAUTH_TOKEN / vendor API keys in the workspace; LLM traffic goes directly to your provider and is billed to your OAuth subscription or API account.",
+  disabled:
+    "Block all LLM access for this workspace. Useful for sandbox workspaces that should not consume credits or hit external providers.",
+};
+
+const SOURCE_LABELS: Record<BillingModeResolution["source"], string> = {
+  workspace_override: "explicit override on this workspace",
+  org_default: "inherited from org default",
+  constant_fallback:
+    "fallback (workspace + org defaults missing or unrecognized — defaulted to platform_managed)",
+};
+
+export function LLMBillingSection({ workspaceId }: Props) {
+  const [resolution, setResolution] = useState<BillingModeResolution | null>(
+    null,
+  );
+  const [loading, setLoading] = useState(true);
+  const [saving, setSaving] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+  const [success, setSuccess] = useState(false);
+
+  const load = useCallback(async () => {
+    setLoading(true);
+    setError(null);
+    try {
+      const res = await api.get<BillingModeResolution>(
+        `/admin/workspaces/${workspaceId}/llm-billing-mode`,
+      );
+      setResolution(res);
+    } catch (e) {
+      setError(e instanceof Error ? e.message : "Failed to load billing mode");
+    } finally {
+      setLoading(false);
+    }
+  }, [workspaceId]);
+
+  useEffect(() => {
+    void load();
+  }, [load]);
+
+  // Current dropdown selection is derived from the resolution. If the
+  // override is null, we show "inherit"; otherwise we mirror the raw
+  // workspace_override (NOT resolved_mode — that would conflate "explicit
+  // platform_managed override" with "inherit while org happens to be
+  // platform_managed", which has different semantics on the write side).
+  const currentChoice: DropdownChoice = (() => {
+    if (!resolution) return "inherit";
+    if (resolution.workspace_override == null) return "inherit";
+    const raw = resolution.workspace_override;
+    if (raw === "platform_managed" || raw === "byok" || raw === "disabled") {
+      return raw;
+    }
+    // Garbled value persisted via some external write. Show inherit so
+    // the user can pick a clean value; on save they'll either clear it
+    // (PUT null) or overwrite it with a valid one.
+    return "inherit";
+  })();
+
+  const handleChange = async (choice: DropdownChoice) => {
+    if (!resolution) return;
+    setSaving(true);
+    setError(null);
+    setSuccess(false);
+    try {
+      // "inherit" → PUT {mode: null}; otherwise → PUT {mode: choice}.
+      const body = choice === "inherit" ? { mode: null } : { mode: choice };
+      const updated = await api.put<BillingModeResolution>(
+        `/admin/workspaces/${workspaceId}/llm-billing-mode`,
+        body,
+      );
+      setResolution(updated);
+      setSuccess(true);
+      setTimeout(() => setSuccess(false), 2000);
+    } catch (e) {
+      setError(e instanceof Error ? e.message : "Failed to update billing mode");
+    } finally {
+      setSaving(false);
+    }
+  };
+
+  return (
+    <Section title="LLM Billing" defaultOpen={false}>
+      {loading && (
+        <div className="text-[10px] text-ink-mid">Loading billing mode…</div>
+      )}
+
+      {error && (
+        <div
+          role="alert"
+          aria-live="assertive"
+          className="px-2 py-1 bg-red-900/30 border border-red-800 rounded text-[10px] text-bad mb-2"
+        >
+          {error}
+        </div>
+      )}
+
+      {resolution && (
+        <div className="space-y-2">
+          <div className="text-[10px] text-ink-mid">
+            Resolved mode: <strong className="text-ink">{resolution.resolved_mode}</strong>{" "}
+            <span className="text-ink-mid">
+              ({SOURCE_LABELS[resolution.source]})
+            </span>
+          </div>
+          <div className="text-[10px] text-ink-mid">
+            Org default: <span className="text-ink">{resolution.org_default}</span>
+          </div>
+
+          <label
+            className="block text-[10px] text-ink-mid"
+            htmlFor={`llm-billing-mode-${workspaceId}`}
+          >
+            Override
+          </label>
+          <select
+            id={`llm-billing-mode-${workspaceId}`}
+            aria-label="LLM billing mode override"
+            value={currentChoice}
+            disabled={saving}
+            onChange={(e) => void handleChange(e.target.value as DropdownChoice)}
+            className="w-full bg-surface-card border border-line rounded p-1 text-[10px] text-ink focus:outline-none focus:border-accent disabled:opacity-50"
+          >
+            {(Object.keys(MODE_LABELS) as DropdownChoice[]).map((m) => (
+              <option key={m} value={m}>
+                {MODE_LABELS[m]}
+              </option>
+            ))}
+          </select>
+
+          <div
+            className="text-[10px] text-ink-mid leading-snug"
+            aria-live="polite"
+          >
+            {MODE_DESCRIPTIONS[currentChoice]}
+          </div>
+
+          {success && (
+            <div className="mt-1 px-2 py-1 bg-green-900/30 border border-green-800 rounded text-[10px] text-good">
+              Updated. Restart the workspace to apply.
+            </div>
+          )}
+
+          {resolution.workspace_override != null &&
+            !["platform_managed", "byok", "disabled"].includes(
+              resolution.workspace_override,
+            ) && (
+              <div
+                role="alert"
+                className="mt-1 px-2 py-1 bg-yellow-900/30 border border-yellow-800 rounded text-[10px] text-warning"
+              >
+                Workspace override has a non-standard value (
+                <code>{resolution.workspace_override}</code>) and is being
+                ignored. Pick a valid mode above to clear the corrupt value.
+              </div>
+            )}
+        </div>
+      )}
+    </Section>
+  );
+}
@@ -5,6 +5,7 @@
 const RUNTIME_NAMES: Record<string, string> = {
  "claude-code": "Claude Code",
  codex: "Codex",
+  "google-adk": "Google ADK",
  hermes: "Hermes",
  openclaw: "OpenClaw",
  kimi: "Kimi",
@@ -368,6 +368,9 @@ export interface WorkspaceCompute {
    width?: number;
    height?: number;
  };
+  // internal#734: per-workspace durable-data choice. "persist" | "ephemeral" |
+  // undefined (auto). Controls whether the data volume survives recreate.
+  data_persistence?: string;
 }

 let socket: ReconnectingSocket | null = null;
@@ -658,6 +658,11 @@
  outline-offset: var(--focus-ring-offset);
 }

+.delete-dialog__cancel-btn:focus-visible {
+  outline: var(--focus-ring);
+  outline-offset: var(--focus-ring-offset);
+}
+
 .delete-dialog__confirm-btn {
  background: var(--status-invalid);
  color: #ffffff;
@@ -671,6 +676,11 @@
  outline-offset: var(--focus-ring-offset);
 }

+.delete-dialog__confirm-btn:focus-visible {
+  outline: var(--focus-ring);
+  outline-offset: var(--focus-ring-offset);
+}
+
 .delete-dialog__confirm-btn:disabled { opacity: 0.4; cursor: not-allowed; }

 /* ── Unsaved changes guard ─────────────────────────── */
@@ -1,5 +1,16 @@
 # Running a Gemini CLI Workspace on Molecule AI

+> **⚠️ Accuracy correction (2026-05-29):** this page is **aspirational, not
+> shipped.** There is **no `gemini-cli` runtime** in `manifest.json` or the
+> provisioner's `knownRuntimes`, and the "PR #379" cited below is unrelated (a
+> CI-workflow-cleanup PR, not a gemini-cli adapter). Do not follow this as-is.
+>
+> **For Gemini on Molecule, use the real `google-adk` runtime instead** — see
+> [`google-adk-runtime.md`](./google-adk-runtime.md) (ADK engine + Gemini on
+> Vertex AI/AI Studio), implemented in PR
+> [`molecule-ai-workspace-template-google-adk#1`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk) per RFC `internal#730`.
+> This gemini-cli page is retained only until it's either implemented for real or removed.
+
 Molecule AI now ships a `gemini-cli` runtime adapter alongside the existing `claude-code` adapter. This tutorial walks you from zero to a running Gemini agent workspace in under five minutes.

 ## What you'll need
@@ -1,74 +1,69 @@
 # Running a Google ADK Workspace on Molecule AI

-Google's Agent Development Kit (ADK) is now a first-class runtime on Molecule AI. This tutorial walks you from zero to a running ADK agent workspace — one that persists per-conversation session state and sits alongside your Claude Code and Gemini CLI workers in the same A2A network.
+> **Status (2026-05-29):** the `google-adk` runtime is **landing**, not yet on
+> `main`. It's implemented in the template repo
+> [`molecule-ai-workspace-template-google-adk`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk)
+> (PR **#1**) with platform registration in molecule-core PR **#2003** and the
+> validator allowlist in molecule-ci PR **#26**. Design + approval: RFC
+> [`internal#730`](https://git.moleculesai.app/molecule-ai/internal/issues/730).
+> Remove this banner once those PRs merge.
+>
+> **Doc-accuracy note:** a prior version of this page claimed ADK was "already
+> first-class" and cited "PR #550" — that PR is unrelated (a MemoryTab test
+> suite). No `google-adk` adapter existed at that time. This rewrite reflects
+> the real implementation.

-## What you'll need
+Google's Agent Development Kit (ADK) runs as a Molecule AI workspace runtime:
+ADK is the **agent engine** (`LlmAgent` + `Runner`), and the workspace
+participates in Molecule's A2A org like any other runtime.

- A Molecule AI account with at least one provisioned tenant
- A `GOOGLE_API_KEY` from [aistudio.google.com](https://aistudio.google.com) (or Vertex AI credentials — see below)
- `curl` + `jq`
+## How it actually works

-## Setup
+- **ADK = engine only.** The adapter builds an ADK `LlmAgent` from the
+  workspace config (model + system prompt + tools) and drives its `Runner`.
+  It installs `google-adk[mcp]==2.1.0` and **never** the `[a2a]` extra — ADK's
+  a2a layer pins `a2a-sdk<0.4`, which is incompatible with the platform's
+  `a2a-sdk>=1.0`. (Verified: `google-adk[mcp]==2.1.0` + `a2a-sdk 1.0.3` coexist.)
+- **A2A** is provided by the platform's a2a-1.x server; a Molecule-authored
+  executor bridges ADK's `Runner` event stream onto it, one ADK session per
+  A2A `context_id`.
+- **Tools** reach the agent via ADK's native `McpToolset` pointed at the
+  workspace's `a2a_mcp_server` — the same MCP surface the CLI runtimes use
+  (`delegate_task`, `commit_memory`, `list_peers`, …). No LangChain.
+
+## Auth — Vertex AI via ADC (keyless), or an AI Studio key
+
+The runtime supports both google-genai auth paths:
+
+- **Vertex AI + Application Default Credentials (recommended; required if your
+  org disallows API keys).** Set `model: vertex:gemini-2.5-pro` and provide
+  `GOOGLE_CLOUD_PROJECT`; the adapter sets `GOOGLE_GENAI_USE_VERTEXAI=1` and
+  google-genai authenticates via ADC — no API key. (Locally:
+  `gcloud auth application-default login`.)
+- **AI Studio API key** (where your org permits API keys): set
+  `model: google_genai:gemini-2.5-pro` and `GOOGLE_API_KEY`.
+
+## Create a workspace

 ```bash
-# 1. Store your Google API key as a global secret
-curl -s -X PUT http://localhost:8080/settings/secrets \
-  -H "Content-Type: application/json" \
-  -d '{"key":"GOOGLE_API_KEY","value":"YOUR-AI-STUDIO-KEY"}' | jq .
-
-# 2. Create a google-adk workspace
-WS=$(curl -s -X POST http://localhost:8080/workspaces \
+# Vertex AI + ADC (keyless)
+curl -s -X POST http://localhost:8080/workspaces \
  -H "Content-Type: application/json" \
  -d '{
    "name": "adk-agent",
    "role": "Google ADK inference worker",
    "runtime": "google-adk",
-    "model": "google:gemini-2.0-flash"
-  }' | jq -r '.id')
-echo "Workspace: $WS"
-
-# 3. Wait for ready (~30s)
-until curl -s http://localhost:8080/workspaces/$WS | jq -r '.status' | grep -q ready; do
-  echo "Waiting..."; sleep 5
-done
-
-# 4. Send your first task
-curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-  -H "Content-Type: application/json" \
-  -d '{"jsonrpc":"2.0","id":"1","method":"message/send",
-       "params":{"message":{"role":"user","parts":[{"kind":"text",
-       "text":"Summarise the ADK architecture in 3 bullet points."}]}}}' \
-  | jq '.result.parts[0].text'
-
-# 5. Multi-turn — session state is preserved across calls
-curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-  -H "Content-Type: application/json" \
-  -d '{"jsonrpc":"2.0","id":"2","method":"message/send",
-       "params":{"message":{"role":"user","parts":[{"kind":"text",
-       "text":"Now give me a one-line TL;DR of what you just said."}]}}}' \
-  | jq '.result.parts[0].text'
-
-# 6. Vertex AI alternative — set these instead of GOOGLE_API_KEY
-# curl -X PUT .../secrets -d '{"key":"GOOGLE_GENAI_USE_VERTEXAI","value":"1"}'
-# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_PROJECT","value":"my-project"}'
-# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_LOCATION","value":"us-central1"}'
+    "model": "vertex:gemini-2.5-pro",
+    "runtime_config": {"required_env": ["GOOGLE_CLOUD_PROJECT"]}
+  }'
 ```

-## Expected output
-
-After step 4, ADK streams the Gemini response through its event bus, filters for `is_final_response()` events, and returns the agent's reply as a standard A2A text part. Step 5 should reference the prior answer — the adapter ties each A2A `context_id` to an `InMemorySessionService` session, so conversation state is isolated per task context and survives across calls within the same session.
-
-## How it works
-
-The `google-adk` adapter wraps Google ADK's runner/session model behind the same `AgentExecutor` interface used by every other Molecule AI runtime. On each turn, `GoogleADKA2AExecutor` calls `runner.run_async()` with the incoming message wrapped in a `google.genai.types.Content` object, then drains the event stream until it collects a final-response event. The `google:` model prefix is stripped before being passed to ADK — so `google:gemini-2.0-flash` in your workspace config becomes `gemini-2.0-flash` in the ADK `LlmAgent`. Error class names are sanitized before leaving the executor; raw Google SDK stack traces never reach the A2A caller.
-
-## Mixed-runtime teams
-
-ADK workspaces participate in the same A2A network as Claude Code, Gemini CLI, Hermes, and LangGraph workers. An orchestrator can delegate long-context summarisation to a `google-adk` worker (Gemini 1.5 Pro's 1M token window) while routing tool-use tasks to a `claude-code` worker — with no provider-specific code in the orchestrator itself. Add an ADK peer with `POST /workspaces`, set `GOOGLE_API_KEY`, and it's available for `delegate_task` immediately.
+Send it a task via the A2A proxy (`POST /workspaces/:id/a2a`, JSON-RPC
+`message/send`) and it replies through the ADK `Runner`. Verified end-to-end:
+a Gemini 2.5 round-trip on Vertex via ADC returns through the built image.

 ## Related
-
- PR #550: [feat(adapters): add google-adk runtime adapter](https://git.moleculesai.app/molecule-ai/molecule-core/pull/550)
+- Template + adapter: [`molecule-ai-workspace-template-google-adk`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk) (PR #1)
+- Platform registration: molecule-core PR #2003 · validator: molecule-ci PR #26
+- Design/approval: RFC [`internal#730`](https://git.moleculesai.app/molecule-ai/internal/issues/730)
 - [Google ADK (adk-python)](https://github.com/google/adk-python)
- [Gemini CLI runtime tutorial](./gemini-cli-runtime.md)
- [Platform API reference](../api-reference.md)
@@ -29,6 +29,7 @@
    {"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "main"},
    {"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
    {"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
+    {"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "main"},
    {"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "main"}
  ],
  "org_templates": [
@@ -91,6 +91,10 @@ def _gitea_get(path: str, params: dict[str, str] | None = None) -> bytes | None:
        req.add_header("Authorization", f"token {token}")
    req.add_header("Accept", "application/json")
    try:
+        # S310 (信任boundary): this function IS the outbound HTTP client for
+        # Gitea API calls. The call is intentional and controlled — we build
+        # the request ourselves and handle errors explicitly. Timeout=20s
+        # prevents indefinite hangs.
        with urllib.request.urlopen(req, timeout=20) as resp:  # noqa: S310
            return resp.read()
    except urllib.error.HTTPError as e:
@@ -1,12 +1,13 @@
 #!/usr/bin/env bash
-# E2E test: A2A round-trip parity across all four runtimes.
+# E2E test: A2A round-trip parity across all five runtimes.
 #
-# Validates that for each of {claude-code, hermes, codex, openclaw}:
+# Validates that for each of {claude-code, hermes, codex, openclaw, google-adk}:
 #   1. A workspace can be provisioned + brought online
 #   2. The adapter responds to A2A message/send
 #   3. The reply contains expected content (echo of the prompt)
 #   4. A SECOND message preserves session state where the runtime
-#      supports it (currently: hermes via plugin path)
+#      supports it (currently: hermes via plugin path; google-adk via
+#      ADK InMemorySessionService keyed on A2A context_id)
 #
 # Targets a SaaS tenant subdomain. Provisions workspaces in the calling
 # tenant, runs the round-trip, deletes them on success.
@@ -16,6 +17,10 @@
 #       (e.g. https://demo-tenant.staging.moleculesai.app)
 #   - $OPENROUTER_API_KEY (or $HERMES_API_KEY) for non-claude runtimes
 #   - $OPENAI_API_KEY for claude-code peer
+#   - $GOOGLE_API_KEY (AI Studio) for google-adk — the org disallows API
+#       keys in PROD (Vertex+ADC there), but CI auths Gemini with an
+#       AI-Studio key (config model google_genai:gemini-2.5-pro). Vertex
+#       stays supported; this is the keyed CI path only.
 #   - SaaS edge requires Origin header — see auto-memory
 #       reference_saas_waf_origin_header.md
 #
@@ -24,12 +29,13 @@
 #       ./scripts/test-all-runtimes-a2a-e2e.sh
 #
 # Skip individual runtimes:
-#   SKIP_HERMES=1 SKIP_OPENCLAW=1 ./scripts/test-all-runtimes-a2a-e2e.sh
+#   SKIP_HERMES=1 SKIP_OPENCLAW=1 SKIP_GOOGLE_ADK=1 ./scripts/test-all-runtimes-a2a-e2e.sh
 set -euo pipefail

 PLATFORM="${PLATFORM:-${1:-http://localhost:8080}}"
 HERMES_PROVIDER_KEY="${OPENROUTER_API_KEY:-${HERMES_API_KEY:-}}"
 PEER_OPENAI_KEY="${OPENAI_API_KEY:-}"
+GOOGLE_ADK_KEY="${GOOGLE_API_KEY:-}"
 # SaaS auth chain — TENANT_ADMIN_TOKEN + TENANT_ORG_ID required when
 # hitting *.moleculesai.app (per-tenant ADMIN_TOKEN, NOT
 # CP_ADMIN_API_TOKEN). Optional for localhost.
@@ -48,6 +54,10 @@ if [ -z "$HERMES_PROVIDER_KEY" ] && [ -z "${SKIP_HERMES:-}${SKIP_CODEX:-}${SKIP_
  echo "FAIL: set OPENROUTER_API_KEY or HERMES_API_KEY for non-claude runtimes"
  exit 2
 fi
+if [ -z "$GOOGLE_ADK_KEY" ] && [ -z "${SKIP_GOOGLE_ADK:-}" ]; then
+  echo "FAIL: set GOOGLE_API_KEY (AI Studio) for google-adk, or SKIP_GOOGLE_ADK=1"
+  exit 2
+fi

 PASS=0
 FAIL=0
@@ -143,7 +153,7 @@ echo "=========================================="
 echo ""

 # -------------------------------------------------------
-# 1. Provision the four runtimes (skip via SKIP_* flags)
+# 1. Provision the five runtimes (skip via SKIP_* flags)
 # -------------------------------------------------------
 echo "--- 1. Provision workspaces ---"
 if [ -z "${SKIP_CLAUDE_CODE:-}" ]; then
@@ -162,6 +172,10 @@ if [ -z "${SKIP_OPENCLAW:-}" ]; then
  WS_IDS[openclaw]=$(provision "ParityOpenClaw" "openclaw" "openclaw peer")
  echo "  openclaw:    ${WS_IDS[openclaw]}"
 fi
+if [ -z "${SKIP_GOOGLE_ADK:-}" ]; then
+  WS_IDS[google-adk]=$(provision "ParityGoogleADK" "google-adk" "google-adk peer")
+  echo "  google-adk:  ${WS_IDS[google-adk]}"
+fi

 # -------------------------------------------------------
 # 2. Set provider keys
@@ -177,6 +191,12 @@ if [ -n "${WS_IDS[claude-code]:-}" ] && [ -n "$PEER_OPENAI_KEY" ]; then
  set_secret "${WS_IDS[claude-code]}" "OPENAI_API_KEY" "$PEER_OPENAI_KEY"
  echo "  claude-code: OPENAI_API_KEY set"
 fi
+if [ -n "${WS_IDS[google-adk]:-}" ] && [ -n "$GOOGLE_ADK_KEY" ]; then
+  # AI-Studio path: the adapter reads GOOGLE_API_KEY natively when the
+  # config model is google_genai:gemini-2.5-pro (see _routing.resolve_model).
+  set_secret "${WS_IDS[google-adk]}" "GOOGLE_API_KEY" "$GOOGLE_ADK_KEY"
+  echo "  google-adk:  GOOGLE_API_KEY set"
+fi

 # -------------------------------------------------------
 # 3. Wait for online
@@ -188,6 +208,9 @@ for runtime in "${!WS_IDS[@]}"; do
  [ -z "$id" ] && continue
  max=60
  [ "$runtime" = "hermes" ] && max=120
+  # google-adk's first cold boot pulls a large fresh ADK image — give it
+  # a hermes-class window so a slow first pull doesn't read as "failed".
+  [ "$runtime" = "google-adk" ] && max=180
  if wait_online "$id" "$runtime" "$max"; then
    check "$runtime online" "ok" "ok"
  else
@@ -200,7 +223,7 @@ done
 # -------------------------------------------------------
 echo ""
 echo "--- 4. A2A round-trip (first message) ---"
-for runtime in claude-code hermes codex openclaw; do
+for runtime in claude-code hermes codex openclaw google-adk; do
  id="${WS_IDS[$runtime]:-}"
  [ -z "$id" ] && continue
  reply=$(a2a_send "$id" "Reply with just the word OK so we know you got this.")
@@ -213,7 +236,7 @@ done
 # -------------------------------------------------------
 echo ""
 echo "--- 5. Session continuity (second message recalls first) ---"
-for runtime in claude-code hermes codex openclaw; do
+for runtime in claude-code hermes codex openclaw google-adk; do
  id="${WS_IDS[$runtime]:-}"
  [ -z "$id" ] && continue
  # Set up: tell the agent a name.
@@ -0,0 +1,229 @@
+#!/usr/bin/env bash
+# Real-completion + per-provider liveness + byok-routing assertion helpers
+# for the staging full-SaaS E2E (tests/e2e/test_staging_full_saas.sh).
+#
+# WHY THIS LIB EXISTS (molecule-core#1995 / #1994 follow-on):
+# The A2A e2e historically asserted only response SHAPE — e.g.
+# test_a2a_e2e.sh:`check "SEO response has text" '"kind":"text"'`. A fully
+# BROKEN agent returns its error AS a text part:
+#     {"kind":"text","text":"Agent error (Exception) — see workspace logs..."}
+# which STILL matches `"kind":"text"` → the shape check PASSES on a broken
+# agent. That is exactly why the 2026-05-2x drained-key / byok-misroute
+# failures (agents-team PM + reno marketing erroring on every LLM call)
+# sailed through CI. "Channel returns text shape" != "agent actually
+# completed an LLM round-trip".
+#
+# These helpers add three load-bearing gates ON TOP of (never replacing) the
+# existing shape + PONG checks:
+#   1. a2a_assert_real_completion  — deterministic known-answer round-trip
+#      (CONTAINS the expected token AND NOT an error-as-text payload).
+#   2. provider_liveness_matrix    — per-offered-provider cheap completion
+#      probe, providers sourced from the providers.yaml SSOT runtimes block.
+#   3. assert_byok_not_platform_proxy — #1994 regression guard: a
+#      byok-resolving workspace must NOT resolve to platform_managed.
+#
+# Conventions: reuses the host script's fail()/ok()/log() + tenant_call().
+# Source this AFTER those are defined. BASH 4+.
+
+# Error-as-text trap markers. If the agent's text part contains ANY of
+# these, the "round-trip" did not really complete — the agent surfaced an
+# error AS text. This is the negative assertion that makes a broken agent
+# FAIL instead of slipping through the shape check.
+#
+# Kept as an array (not a single regex) so a new failure signature is a
+# one-line append + the failure message can name which marker matched.
+A2A_ERROR_AS_TEXT_MARKERS=(
+  "Agent error"
+  "Exception"
+  "error result"
+  "MISSING_BYOK_CREDENTIAL"
+)
+
+# a2a_completion_error_marker <agent_text>
+#   Echoes the first error-as-text marker found in <agent_text> (case-
+#   insensitive), or nothing if clean. Exit 0 if a marker matched, 1 if not.
+#   Pure string scan — no LLM, no network — so it is deterministic and is the
+#   unit under the fail-direction proof in test_completion_assert_unit.sh.
+a2a_completion_error_marker() {
+  local text="$1"
+  local upper marker
+  upper=$(printf '%s' "$text" | tr '[:lower:]' '[:upper:]')
+  for marker in "${A2A_ERROR_AS_TEXT_MARKERS[@]}"; do
+    if printf '%s' "$upper" | grep -qF -- "$(printf '%s' "$marker" | tr '[:lower:]' '[:upper:]')"; then
+      printf '%s' "$marker"
+      return 0
+    fi
+  done
+  return 1
+}
+
+# a2a_assert_real_completion <agent_text> <expected_token> <context_label>
+#   The CORE gate. Asserts the agent text:
+#     (a) does NOT contain any error-as-text marker (broken-agent trap), AND
+#     (b) CONTAINS <expected_token> (case-insensitive) — proving a real LLM
+#         round-trip produced the deterministic known answer.
+#   Calls fail() (which exits) on either violation. This MUST fail on an
+#   error-as-text payload — that is the property test_completion_assert_unit.sh
+#   pins.
+a2a_assert_real_completion() {
+  local text="$1"
+  local expected="$2"
+  local ctx="${3:-A2A}"
+
+  if [ -z "$text" ]; then
+    fail "$ctx — real-completion gate: agent returned EMPTY text (no round-trip)."
+  fi
+
+  local hit
+  if hit=$(a2a_completion_error_marker "$text"); then
+    fail "$ctx — real-completion gate: agent returned an ERROR-AS-TEXT payload (matched '$hit'). A broken agent that surfaces its error as a text part is NOT a completed round-trip. This is the trap the shape-only check missed (#1994). Raw: ${text:0:200}"
+  fi
+
+  # Known-answer: real LLM round-trip yields the deterministic token. A
+  # prompt-echo / truncated-context / wrong-auth pipeline won't.
+  if ! printf '%s' "$text" | tr '[:lower:]' '[:upper:]' | grep -qF -- "$(printf '%s' "$expected" | tr '[:lower:]' '[:upper:]')"; then
+    fail "$ctx — real-completion gate: reply did NOT contain expected known-answer token '$expected'. The channel returned a text shape but no real completion. Raw: ${text:0:200}"
+  fi
+
+  ok "$ctx — real completion verified (contains '$expected', no error-as-text). Reply: \"${text:0:80}\""
+}
+
+# offered_platform_models_for_runtime <runtime>
+#   Emits, one per line, the platform-servable model ids the providers.yaml
+#   SSOT (runtimes.<runtime>.providers[name=platform].models) declares for
+#   <runtime>. This is the SSOT-driven offered/platform-servable matrix — NOT
+#   a hardcoded provider list — so a provider added/removed in providers.yaml
+#   automatically changes the matrix this probe exercises.
+#
+#   Reads the embedded copy at workspace-server/internal/providers/providers.yaml
+#   (the same file go:embed compiles into the binary). Requires python3 +
+#   PyYAML (already a test-harness dep). On parse failure, emits nothing and
+#   returns 1 so the caller can fail loud rather than silently skip.
+offered_platform_models_for_runtime() {
+  local runtime="$1"
+  local yaml_path="${PROVIDERS_YAML_PATH:-}"
+  if [ -z "$yaml_path" ]; then
+    # This lib lives at tests/e2e/lib/ -> repo root is three dirs up
+    # (lib -> e2e -> tests -> repo-root).
+    yaml_path="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)/workspace-server/internal/providers/providers.yaml"
+  fi
+  if [ ! -f "$yaml_path" ]; then
+    log "    [provider-matrix] providers.yaml SSOT not found at $yaml_path"
+    return 1
+  fi
+  RUNTIME_REF="$runtime" python3 - "$yaml_path" <<'PY'
+import os, sys
+try:
+    import yaml
+except Exception as e:  # PyYAML missing — fail loud, do not silently skip.
+    sys.stderr.write(f"PyYAML required for provider-matrix SSOT read: {e}\n")
+    sys.exit(2)
+rt = os.environ["RUNTIME_REF"]
+with open(sys.argv[1]) as f:
+    doc = yaml.safe_load(f)
+native = (doc.get("runtimes") or {}).get(rt) or {}
+for pref in native.get("providers", []) or []:
+    if pref.get("name") == "platform":
+        for m in pref.get("models", []) or []:
+            print(m)
+PY
+}
+
+# provider_liveness_matrix <runtime> <probe_fn>
+#   For each platform-servable model the SSOT lists for <runtime>, calls
+#   <probe_fn> <model_id> which must echo the agent text (or empty) and return
+#   0 on a non-error completion, non-zero otherwise. Logs a per-model pass/fail
+#   matrix. Returns 0 only if EVERY probed model produced a non-error
+#   completion; non-zero (and a recorded matrix) otherwise.
+#
+#   Purpose: exercise each offered provider's AUTH + ROUTING path so a drained
+#   key / wrong base-URL / byok-misroute fails the gate (the #1994 class). The
+#   probe_fn is expected to use minimal max_tokens.
+#
+#   This helper does the SSOT read + matrix bookkeeping; the host script
+#   supplies probe_fn (it owns workspace ids + tenant_call wiring).
+provider_liveness_matrix() {
+  local runtime="$1"
+  local probe_fn="$2"
+  local models model rc total=0 passed=0
+  local -a results=()
+
+  models=$(offered_platform_models_for_runtime "$runtime") || {
+    fail "provider-liveness: could not read offered-provider matrix from providers.yaml SSOT for runtime=$runtime"
+  }
+  if [ -z "$models" ]; then
+    log "    [provider-matrix] runtime=$runtime offers no platform-servable models in the SSOT — nothing to probe (not a failure)."
+    return 0
+  fi
+
+  log "    [provider-matrix] SSOT offered platform models for $runtime:"
+  while IFS= read -r model; do
+    [ -z "$model" ] && continue
+    log "      - $model"
+  done <<<"$models"
+
+  while IFS= read -r model; do
+    [ -z "$model" ] && continue
+    total=$((total + 1))
+    set +e
+    "$probe_fn" "$model"
+    rc=$?
+    set -e
+    if [ "$rc" = "0" ]; then
+      passed=$((passed + 1))
+      results+=("PASS  $model")
+    elif [ "$rc" = "75" ]; then
+      # 75 (EX_TEMPFAIL convention) = probe skipped (key/runtime not
+      # available in this lane). Not counted toward pass/fail — logged.
+      total=$((total - 1))
+      results+=("SKIP  $model (probe unavailable in this lane)")
+    else
+      results+=("FAIL  $model")
+    fi
+  done <<<"$models"
+
+  log "    [provider-matrix] result matrix (runtime=$runtime):"
+  local line
+  for line in "${results[@]}"; do
+    log "      $line"
+  done
+  log "    [provider-matrix] $passed/$total probed providers completed without error"
+
+  if [ "$passed" != "$total" ]; then
+    return 1
+  fi
+  return 0
+}
+
+# assert_byok_not_platform_proxy <billing_mode_json> <context_label>
+#   #1994 regression guard. Given the JSON body from
+#   GET /admin/workspaces/:id/llm-billing-mode (same derived resolver the
+#   provision-time strip gate uses), asserts the workspace resolves to BYOK
+#   and NOT platform_managed. A regression of #1994 (byok workspace baked to
+#   platform_managed → routed through the platform proxy → platform LLM key
+#   drained) flips resolved_mode to "platform_managed" and trips this gate.
+#   Calls fail() (exits) on violation.
+assert_byok_not_platform_proxy() {
+  local body="$1"
+  local ctx="${2:-byok-guard}"
+  local mode prov
+  mode=$(printf '%s' "$body" | python3 -c "import json,sys
+try: print(json.load(sys.stdin).get('resolved_mode',''))
+except Exception: print('')" 2>/dev/null || echo "")
+  prov=$(printf '%s' "$body" | python3 -c "import json,sys
+try:
+    d=json.load(sys.stdin); v=d.get('provider_selection')
+    print(v if v is not None else '')
+except Exception: print('')" 2>/dev/null || echo "")
+
+  if [ -z "$mode" ]; then
+    fail "$ctx — byok-routing guard: could not read resolved_mode from billing-mode response. Raw: ${body:0:200}"
+  fi
+  if [ "$mode" = "platform_managed" ]; then
+    fail "$ctx — byok-routing guard TRIPPED (#1994 regression): a byok-configured workspace resolved to 'platform_managed' (provider_selection=$prov) → it would route through the platform proxy and drain the platform LLM key. Expected resolved_mode=byok. Raw: ${body:0:200}"
+  fi
+  if [ "$mode" != "byok" ]; then
+    fail "$ctx — byok-routing guard: unexpected resolved_mode='$mode' (expected 'byok'). provider_selection=$prov. Raw: ${body:0:200}"
+  fi
+  ok "$ctx — byok-routing guard: workspace resolves byok (provider_selection=$prov), NOT platform-proxy. #1994 stays fixed."
+}
@@ -8,6 +8,34 @@ TIMEOUT="${A2A_TIMEOUT:-120}"  # seconds per A2A call (override via A2A_TIMEOUT

 # shellcheck source=_lib.sh
 source "$(dirname "$0")/_lib.sh"
+# molecule-core#1995 (#1994 follow-on): real-completion assertion helpers.
+# Adds a NEGATIVE error-as-text check on top of the shape checks below, so a
+# broken agent that returns its error AS a text part
+# ({"kind":"text","text":"Agent error (Exception) ..."}) — which STILL
+# matches the shape check `"kind":"text"` — now FAILS instead of passing.
+# shellcheck source=lib/completion_assert.sh
+source "$(dirname "$0")/lib/completion_assert.sh"
+
+# check_no_error_as_text <desc> <agent_text>
+# Additive negative gate: PASS only if the agent text carries NO
+# error-as-text marker (Agent error / Exception / error result /
+# MISSING_BYOK_CREDENTIAL). Uses the same scanner as the staging
+# real-completion gate so the trap is closed consistently across lanes.
+check_no_error_as_text() {
+  local desc="$1"
+  local text="$2"
+  local hit
+  if hit=$(a2a_completion_error_marker "$text"); then
+    echo "FAIL: $desc"
+    echo "  agent returned an error-AS-text payload (matched '$hit') — a broken"
+    echo "  agent that surfaces its error as a text part is NOT a real reply."
+    echo "  got: $(echo "$text" | head -3)"
+    FAIL=$((FAIL + 1))
+  else
+    echo "PASS: $desc"
+    PASS=$((PASS + 1))
+  fi
+}

 check() {
  local desc="$1"
@@ -81,6 +109,8 @@ check "JSON-RPC response has result" '"result"' "$R"
 check "Response has agent role" '"role":"agent"' "$R"
 check "Response has text part" '"kind":"text"' "$R"
 TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR")
+# Negative gate (#1994): the text part must not BE an error.
+check_no_error_as_text "Echo reply is not an error-as-text payload" "$TEXT"
 echo "  Agent said: $TEXT"
 echo ""

@@ -92,6 +122,11 @@ R=$(a2a_send "$SEO_ID" "What SEO skills do you have?")
 check "SEO agent responds" '"result"' "$R"
 check "SEO response has text" '"kind":"text"' "$R"
 TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR")
+# Negative gate (#1994): a broken SEO agent that returns "Agent error
+# (Exception) ..." AS text still matches the `"kind":"text"` shape check
+# above — THAT is the gap that let drained-key/byok-misroute failures pass
+# CI. This makes that case FAIL.
+check_no_error_as_text "SEO reply is not an error-as-text payload" "$TEXT"
 echo "  SEO Agent said: $TEXT"
 echo ""

@@ -73,7 +73,15 @@ else
 fi

 # Test 4: Create workspace B (needs bearer — tokens now exist in DB)
-R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Summarizer Agent","tier":1,"runtime":"external","external":true}')
+# #1953 cross-tenant isolation: Summarizer is created as a CHILD of Echo so the
+# two live in the SAME org (Echo is the org root; Summarizer hangs off it via
+# parent_id). The peer-discovery tests below assert same-org peer enumeration
+# (Echo sees its child, the child sees its parent). Previously both were created
+# parent_id=NULL — two DISTINCT org roots — and "peers" only listed each other
+# via the `WHERE parent_id IS NULL` branch that returned every tenant's org root.
+# That branch WAS the cross-tenant leak (#1953) and is now removed, so two org
+# roots no longer see each other; the assertions must run inside one org.
+R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d "{\"name\":\"Summarizer Agent\",\"tier\":1,\"runtime\":\"external\",\"external\":true,\"parent_id\":\"$ECHO_ID\"}")
 check "POST /workspaces (create summarizer)" '"status":"awaiting_agent"' "$R"
 SUM_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")

@@ -133,21 +141,23 @@ check "Heartbeat updated uptime" '"uptime_seconds":120' "$R"
 R=$(curl -s "$BASE/registry/discover/$ECHO_ID")
 check "GET /registry/discover/:id (missing caller rejected)" 'X-Workspace-ID header is required' "$R"

-# Test 12: Discover (from sibling — allowed)
+# Test 12: Discover (from same-org child — allowed)
 R=$(curl -s "$BASE/registry/discover/$ECHO_ID" -H "X-Workspace-ID: $SUM_ID" -H "Authorization: Bearer $SUM_TOKEN")
-check "GET /registry/discover/:id (sibling)" '"url"' "$R"
+check "GET /registry/discover/:id (same-org)" '"url"' "$R"

-# Test 13: Peers (root siblings see each other)
+# Test 13: Peers — same-org parent/child see each other (#1953). Echo is the org
+# root and lists its child Summarizer; Summarizer lists its parent Echo. A
+# cross-org workspace would NOT appear here (see cross_tenant_isolation_test.go).
 R=$(curl -s "$BASE/registry/$ECHO_ID/peers" -H "Authorization: Bearer $ECHO_TOKEN")
 check "GET /registry/:id/peers (has summarizer)" '"Summarizer' "$R"

 R=$(curl -s "$BASE/registry/$SUM_ID/peers" -H "Authorization: Bearer $SUM_TOKEN")
 check "GET /registry/:id/peers (has echo)" '"Echo Agent"' "$R"

-# Test 14: Check access (root siblings)
+# Test 14: Check access (same-org parent↔child — allowed)
 R=$(curl -s -X POST "$BASE/registry/check-access" -H "Content-Type: application/json" \
  -d "{\"caller_id\":\"$ECHO_ID\",\"target_id\":\"$SUM_ID\"}")
-check "POST /registry/check-access (siblings allowed)" '"allowed":true' "$R"
+check "POST /registry/check-access (same-org allowed)" '"allowed":true' "$R"

 # Test 15: PATCH workspace (update position)
 R=$(acurl -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"x":100,"y":200}')
@@ -289,32 +299,40 @@ R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
 check "current_task in list response" '"current_task"' "$R"

 # Test 21: Delete
-R=$(acurl -X DELETE "$BASE/workspaces/$ECHO_ID?confirm=true" \
-  -H "Authorization: Bearer $ECHO_TOKEN" \
-  -H "X-Confirm-Name: Echo Agent v2")
-check "DELETE /workspaces/:id" '"status":"removed"' "$R"
-
-R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $SUM_TOKEN")
-COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
-check "List after delete (count=1)" "1" "$COUNT"
-
-# Test 22: Bundle round-trip — export → delete → import → verify same config
-echo ""
-echo "--- Bundle Round-Trip Test ---"
-
-# Export the summarizer workspace (#165 / PR #167 — admin-gated)
+# #1953: Summarizer is now a CHILD of Echo (same-org, for the peer-discovery
+# tests above). DELETE on the *parent* (Echo) cascade-removes its descendants
+# (CascadeDelete walks the recursive `parent_id` CTE), so deleting Echo first
+# would also remove Summarizer and the "one survives" assertion would see 0.
+# Delete the CHILD (Summarizer) here instead: a child delete does NOT cascade
+# upward, so the parent Echo survives and count=1 holds. The bundle round-trip
+# below needs Summarizer's exported config, so capture it BEFORE this delete.
 BUNDLE=$(curl -s "$BASE/bundles/export/$SUM_ID" -H "Authorization: Bearer $SUM_TOKEN")
 check "GET /bundles/export/:id" '"name":"Summarizer Agent"' "$BUNDLE"
-
-# Capture original config for comparison
 ORIG_NAME=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['name'])")
 ORIG_TIER=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['tier'])")

-# Delete the workspace — use SUM_TOKEN (per-workspace) for WorkspaceAuth
-# and ADMIN_TOKEN for the AdminAuth layer.
-R=$(curl -s -X DELETE "$BASE/workspaces/$SUM_ID?confirm=true" \
+R=$(acurl -X DELETE "$BASE/workspaces/$SUM_ID?confirm=true" \
  -H "Authorization: Bearer $SUM_TOKEN" \
  -H "X-Confirm-Name: Summarizer Agent")
+check "DELETE /workspaces/:id" '"status":"removed"' "$R"
+
+# Parent Echo must survive a child delete — list as Echo and expect count=1.
+R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
+COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
+check "List after delete (count=1)" "1" "$COUNT"
+
+# Test 22: Bundle round-trip — export → delete → import → verify same config.
+# Summarizer's bundle was captured above; now delete the parent Echo (the only
+# remaining workspace) so the import lands in a clean org, then re-import the
+# Summarizer bundle.
+echo ""
+echo "--- Bundle Round-Trip Test ---"
+
+# Delete the remaining parent Echo — use ECHO_TOKEN (per-workspace) for
+# WorkspaceAuth and ADMIN_TOKEN for the AdminAuth layer.
+R=$(acurl -X DELETE "$BASE/workspaces/$ECHO_ID?confirm=true" \
+  -H "Authorization: Bearer $ECHO_TOKEN" \
+  -H "X-Confirm-Name: Echo Agent v2")
 check "Delete before re-import" '"status":"removed"' "$R"

 # After deleting both workspaces, all per-workspace tokens are revoked.
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+# Fail-direction / load-bearing proof for lib/completion_assert.sh.
+#
+# This is the watch-it-FAIL counterpart the dev-SOP Phase 3 requires: it
+# proves the new real-completion + byok gates actually CATCH a broken agent,
+# not just pass on a good one. It runs entirely offline (no LLM, no network,
+# no provisioning) — pure assertion logic — so it can run on every PR in the
+# fast lane (e2e-api.yml unit-shell step) and locally via `bash`.
+#
+# The decisive case is `error-as-text payload MUST FAIL`: that is the exact
+# trap (#1994) the historical shape-only check missed. If a refactor weakens
+# a2a_assert_real_completion to a substring/shape check, THIS test goes red.
+set -uo pipefail
+
+HERE="$(cd "$(dirname "$0")" && pwd)"
+PASS=0
+FAIL=0
+
+# Minimal stand-ins for the host script's helpers. fail() must NOT exit the
+# whole harness here — we want to assert that it WAS called. We trap it by
+# running the assertion in a subshell and checking the subshell's exit code:
+# the real fail() exits 1, ok() exits 0 implicitly.
+log()  { echo "[unit] $*"; }
+ok()   { echo "[unit] OK: $*"; }
+fail() { echo "[unit] FAIL-CALLED: $*" >&2; exit 1; }
+
+# shellcheck source=lib/completion_assert.sh
+source "$HERE/lib/completion_assert.sh"
+
+expect_pass() {
+  local desc="$1"; shift
+  if ( "$@" ) >/dev/null 2>&1; then
+    echo "PASS: $desc (assertion accepted, as expected)"
+    PASS=$((PASS + 1))
+  else
+    echo "FAIL: $desc — expected the assertion to ACCEPT, but it rejected"
+    FAIL=$((FAIL + 1))
+  fi
+}
+
+expect_fail() {
+  local desc="$1"; shift
+  if ( "$@" ) >/dev/null 2>&1; then
+    echo "FAIL: $desc — expected the assertion to REJECT, but it accepted (gate NOT load-bearing!)"
+    FAIL=$((FAIL + 1))
+  else
+    echo "PASS: $desc (assertion rejected, as expected)"
+    PASS=$((PASS + 1))
+  fi
+}
+
+echo "=== completion_assert.sh fail-direction proof ==="
+
+# ---- a2a_assert_real_completion ----
+# Good: real known-answer reply passes.
+expect_pass "real PINEAPPLE reply passes" \
+  a2a_assert_real_completion "PINEAPPLE" "PINEAPPLE" "unit"
+expect_pass "case-insensitive known answer passes" \
+  a2a_assert_real_completion "pineapple" "PINEAPPLE" "unit"
+expect_pass "known answer with minor wrapping passes" \
+  a2a_assert_real_completion "Sure: PINEAPPLE" "PINEAPPLE" "unit"
+
+# DECISIVE: the error-as-text trap. Each MUST fail — these are the payloads a
+# broken agent returns that the old shape-only `"kind":"text"` check passed.
+expect_fail "Agent error as text payload MUST fail" \
+  a2a_assert_real_completion "Agent error (Exception) — see workspace logs for details." "PINEAPPLE" "unit"
+expect_fail "bare Exception as text MUST fail" \
+  a2a_assert_real_completion "Traceback ... Exception: boom" "PINEAPPLE" "unit"
+expect_fail "error result as text MUST fail" \
+  a2a_assert_real_completion "tool returned error result" "PINEAPPLE" "unit"
+expect_fail "MISSING_BYOK_CREDENTIAL as text MUST fail" \
+  a2a_assert_real_completion "MISSING_BYOK_CREDENTIAL: set your own key" "PINEAPPLE" "unit"
+# Error-as-text that ALSO happens to contain the token still fails (error
+# marker takes precedence — a real completion never carries these markers).
+expect_fail "error-as-text containing the token still fails" \
+  a2a_assert_real_completion "Agent error: could not produce PINEAPPLE" "PINEAPPLE" "unit"
+# Empty text fails.
+expect_fail "empty text fails" \
+  a2a_assert_real_completion "" "PINEAPPLE" "unit"
+# Wrong/echoed content (no token, no error) fails — shape-OK but not a real
+# completion.
+expect_fail "wrong content without token fails" \
+  a2a_assert_real_completion "Reply with exactly the word PINEAPPLE and nothing else." "BANANA" "unit"
+
+# ---- assert_byok_not_platform_proxy (#1994 guard) ----
+expect_pass "byok resolution passes the guard" \
+  assert_byok_not_platform_proxy '{"resolved_mode":"byok","provider_selection":"minimax","source":"derived_provider"}' "unit"
+# DECISIVE: a platform_managed resolution on a byok workspace = the #1994
+# regression. MUST fail.
+expect_fail "platform_managed resolution trips the #1994 guard" \
+  assert_byok_not_platform_proxy '{"resolved_mode":"platform_managed","provider_selection":"platform","source":"derived_provider"}' "unit"
+expect_fail "missing resolved_mode trips the guard" \
+  assert_byok_not_platform_proxy '{"provider_selection":"x"}' "unit"
+expect_fail "disabled mode trips the guard (not byok)" \
+  assert_byok_not_platform_proxy '{"resolved_mode":"disabled"}' "unit"
+
+# ---- a2a_completion_error_marker (the scanner under the gate) ----
+if hit=$(a2a_completion_error_marker "all good PINEAPPLE"); then
+  echo "FAIL: clean text wrongly flagged as error marker ($hit)"; FAIL=$((FAIL + 1))
+else
+  echo "PASS: clean text has no error marker"; PASS=$((PASS + 1))
+fi
+if hit=$(a2a_completion_error_marker "An Exception occurred"); then
+  echo "PASS: error marker detected ($hit)"; PASS=$((PASS + 1))
+else
+  echo "FAIL: error marker NOT detected in 'An Exception occurred'"; FAIL=$((FAIL + 1))
+fi
+
+echo ""
+echo "=== Results: $PASS passed, $FAIL failed ==="
+[ "$FAIL" -eq 0 ]
@@ -99,6 +99,12 @@ source "$(dirname "$0")/lib/model_slug.sh"
 # shellcheck disable=SC1091
 # shellcheck source=lib/aws_leak_check.sh
 source "$(dirname "$0")/lib/aws_leak_check.sh"
+# shellcheck disable=SC1091
+# shellcheck source=lib/completion_assert.sh
+# molecule-core#1995 (#1994 follow-on): real-completion + per-provider
+# liveness + byok-routing assertion helpers. Adds gates that FAIL on an
+# error-as-text payload (the trap the shape-only A2A checks missed).
+source "$(dirname "$0")/lib/completion_assert.sh"

 CURL_COMMON=(-sS --fail-with-body --max-time 30)
 E2E_TMP_FILES=()
@@ -867,6 +873,182 @@ fi

 ok "A2A parent round-trip succeeded: \"${AGENT_TEXT:0:80}\""

+# ─── 8b. Real-completion known-answer round-trip (CORE GATE, #1994) ────
+# The existing PONG check + generic error grep above already do a lot, but
+# this stanza is the canonical real-completion gate the #1994 follow-on
+# adds: a DETERMINISTIC known-answer prompt asserted via
+# a2a_assert_real_completion, which FAILS on an error-as-text payload
+# ({"kind":"text","text":"Agent error (Exception) ..."}). That payload
+# matches the historical shape-only check `"kind":"text"` and so passed CI
+# on a fully broken agent (drained-key / byok-misroute, 2026-05-2x). This
+# gate makes that case RED. Reuses the same cold-start retry-on-transient
+# (502/503/504) loop the PONG probe uses — retry-once-on-network, never on
+# agent-error. Single round-trip → the one place we spend a non-trivial
+# token budget (default backend MiniMax — cheap token plan).
+KA_PAYLOAD=$(python3 -c "
+import json, uuid
+print(json.dumps({
+    'jsonrpc': '2.0',
+    'method': 'message/send',
+    'id': 'e2e-known-answer-1',
+    'params': {
+        'message': {
+            'role': 'user',
+            'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
+            'parts': [{'kind': 'text', 'text': 'Reply with exactly the word PINEAPPLE and nothing else.'}]
+        }
+    }
+}))
+")
+KA_TMP=$(mktemp -t known_answer_a2a.XXXXXX)
+KA_RESP=""
+for KA_ATTEMPT in $(seq 1 6); do
+  : >"$KA_TMP"
+  set +e
+  KA_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
+    --max-time 90 \
+    -H "Content-Type: application/json" \
+    -d "$KA_PAYLOAD" \
+    -o "$KA_TMP" \
+    -w '%{http_code}' \
+    2>/dev/null)
+  KA_RC=$?
+  set -e
+  KA_CODE=${KA_CODE:-000}
+  KA_RESP=$(cat "$KA_TMP" 2>/dev/null || echo "")
+  if [ "$KA_RC" = "0" ] && [ "$KA_CODE" -ge 200 ] && [ "$KA_CODE" -lt 300 ]; then
+    break
+  fi
+  KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
+  # Retry ONLY on transient transport errors — never on an agent-level
+  # error (those must surface and fail the gate).
+  if echo "$KA_CODE" | grep -Eq '^(502|503|504)$' && echo "$KA_SAFE_BODY" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|workspace agent unreachable|connection refused|no healthy upstream|workspace agent busy|native_session'; then
+    log "    known-answer A2A transient $KA_CODE attempt $KA_ATTEMPT/6: $KA_SAFE_BODY"
+    if [ "$KA_ATTEMPT" -lt 6 ]; then sleep 10; continue; fi
+  fi
+  break
+done
+rm -f "$KA_TMP"
+if [ "$KA_RC" != "0" ] || [ "$KA_CODE" -lt 200 ] || [ "$KA_CODE" -ge 300 ]; then
+  KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
+  fail "Known-answer A2A POST failed after $KA_ATTEMPT attempt(s) (curl_rc=$KA_RC, http=$KA_CODE): $KA_SAFE_BODY"
+fi
+KA_TEXT=$(echo "$KA_RESP" | python3 -c "
+import json, sys
+try:
+    d = json.load(sys.stdin)
+    parts = d.get('result', {}).get('parts', [])
+    print(parts[0].get('text', '') if parts else '')
+except Exception:
+    print('')
+" 2>/dev/null || echo "")
+# CORE GATE: contains PINEAPPLE (real round-trip) AND no error-as-text.
+a2a_assert_real_completion "$KA_TEXT" "PINEAPPLE" "A2A known-answer (parent, $RUNTIME/$MODEL_SLUG)"
+
+# ─── 8c. byok-routing regression guard (#1994) ─────────────────────────
+# The parent was provisioned with the customer's OWN vendor key
+# (MINIMAX_API_KEY / ANTHROPIC_API_KEY in SECRETS_JSON) → it must resolve
+# BYOK, not platform_managed. #1994 was exactly the inverse: a byok
+# workspace baked platform_managed on (re-)provision → routed through the
+# platform proxy → drained the platform LLM key. We read the SAME derived
+# resolver the provision-time strip gate uses
+# (GET /admin/workspaces/:id/llm-billing-mode) and assert resolved_mode!=
+# platform_managed. A regression flips it RED.
+#
+# Only meaningful when the parent actually carries a byok credential; the
+# OpenAI/hermes path uses a different env shape, and the no-key path is
+# legitimately platform_managed (the CTO default). Gate on the same
+# E2E_*_API_KEY presence the SECRETS_JSON branch keyed off.
+if [ -n "${E2E_MINIMAX_API_KEY:-}" ] || [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
+  set +e
+  BILLING_RESP=$(tenant_call GET "/admin/workspaces/$PARENT_ID/llm-billing-mode" 2>/dev/null)
+  BILLING_RC=$?
+  set -e
+  if [ "$BILLING_RC" != "0" ] || [ -z "$BILLING_RESP" ]; then
+    fail "byok-routing guard: GET /admin/workspaces/$PARENT_ID/llm-billing-mode failed (rc=$BILLING_RC). Body: ${BILLING_RESP:0:200}"
+  fi
+  assert_byok_not_platform_proxy "$BILLING_RESP" "byok-guard (parent, $RUNTIME/$MODEL_SLUG)"
+else
+  log "8c.  byok-routing guard skipped — parent carries no own-vendor key (OpenAI/no-key path is legitimately platform_managed)."
+fi
+
+# ─── 8d. Per-offered-provider liveness matrix (SSOT-driven, #1994 class) ─
+# For each platform-servable model the providers.yaml SSOT
+# (runtimes.<runtime>.providers[platform].models) declares for this
+# runtime, send a minimal max_tokens-bounded "say ok" probe and assert a
+# NON-ERROR completion. Purpose: exercise each offered provider's AUTH +
+# ROUTING path so a drained key / wrong base-URL / byok-misroute fails the
+# gate (the #1994 class). Providers/models come from the SSOT — not a
+# hardcoded list — so the matrix tracks providers.yaml automatically.
+#
+# This lane provisions ONE parent workspace with ONE configured key, so we
+# can only truly drive the providers that key authenticates. Probing a
+# model whose provider key is absent in this lane is reported SKIP (rc=75),
+# not FAIL — keeping the gate deterministic + low-flake. The matrix still
+# proves the configured provider's full auth+routing path end-to-end, and
+# logs the offered set so over/under-offer drift is visible in the CI log.
+provider_liveness_probe() {
+  local model_id="$1"
+  # Map the SSOT platform model id (e.g. minimax/MiniMax-M2.7) to the
+  # vendor namespace token to decide whether THIS lane has its key.
+  local vendor="${model_id%%/*}"
+  case "$vendor" in
+    minimax)   [ -n "${E2E_MINIMAX_API_KEY:-}" ]   || return 75 ;;
+    anthropic) [ -n "${E2E_ANTHROPIC_API_KEY:-}" ] || return 75 ;;
+    openai)    [ -n "${E2E_OPENAI_API_KEY:-}" ]    || return 75 ;;
+    *)         return 75 ;;  # kimi/moonshot etc. — no key wired in this lane
+  esac
+  local probe_payload
+  probe_payload=$(python3 -c "
+import json, uuid
+print(json.dumps({
+    'jsonrpc': '2.0',
+    'method': 'message/send',
+    'id': 'e2e-liveness-' + uuid.uuid4().hex[:6],
+    'params': {
+        'message': {
+            'role': 'user',
+            'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
+            'parts': [{'kind': 'text', 'text': 'Reply with exactly: ok'}],
+        },
+        'configuration': {'max_tokens': 4}
+    }
+}))
+")
+  local tmp code rc resp
+  tmp=$(mktemp -t liveness_a2a.XXXXXX)
+  set +e
+  code=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
+    --max-time 60 \
+    -H "Content-Type: application/json" \
+    -d "$probe_payload" \
+    -o "$tmp" -w '%{http_code}' 2>/dev/null)
+  rc=$?
+  set -e
+  resp=$(cat "$tmp" 2>/dev/null || echo "")
+  rm -f "$tmp"
+  if [ "$rc" != "0" ] || [ "${code:-000}" -lt 200 ] || [ "${code:-000}" -ge 300 ]; then
+    log "      probe $model_id: HTTP ${code:-000} rc=$rc"
+    return 1
+  fi
+  local text
+  text=$(echo "$resp" | python3 -c "
+import json,sys
+try:
+    d=json.load(sys.stdin); p=d.get('result',{}).get('parts',[])
+    print(p[0].get('text','') if p else '')
+except Exception: print('')" 2>/dev/null || echo "")
+  if [ -z "$text" ] || a2a_completion_error_marker "$text" >/dev/null; then
+    log "      probe $model_id: error-as-text or empty: ${text:0:120}"
+    return 1
+  fi
+  return 0
+}
+if ! provider_liveness_matrix "$RUNTIME" provider_liveness_probe; then
+  fail "Per-provider liveness matrix: at least one offered provider failed its auth+routing probe (see matrix above). This is the #1994 class — a drained key / wrong base-URL / byok-misroute."
+fi
+ok "Per-provider liveness matrix passed (all probed offered providers completed without error)"
+
 # ─── 9. HMA + peers + activity (full mode) ─────────────────────────────
 if [ "$MODE" = "full" ]; then
  log "9/11 Writing + reading HMA memory on parent..."
@@ -606,7 +606,7 @@ def test_head_drift_closes_stale_issue_for_prior_sha(wd_module, monkeypatch):
                {"context": "ci/test", "status": "success"},
            ])),
        ],
-        (f"GET", f"/repos/owner/repo/commits/{SHA_NEW}/status"): [
+        ("GET", f"/repos/owner/repo/commits/{SHA_NEW}/status"): [
            (200, _combined_status("success", [
                {"context": "ci/test", "status": "success"},
            ])),
@@ -6,10 +6,11 @@ Emits structured verdict + human-readable summary. Designed to run as:
  1. CLI:  python gate_check.py --repo org/repo --pr N
  2. Gitea Actions step: runs this script, captures stdout JSON

-Signals (MVP — signals 1,2,3,6):
+Signals (MVP — signals 1,2,3,4,6):
  1. Author-aware agent-tag comment scan
  2. REQUEST_CHANGES reviews state machine
  3. Staleness detection (review.commit_id != PR.head_sha)
+  4. Branch divergence / scope-creep guard (base-sha vs target HEAD)
  6. CI required-checks awareness

 Exit codes:
@@ -177,7 +178,7 @@ def signal_1_comment_scan(pr_number: int, repo: str) -> dict:
    try:
        reviews = api_list(f"/repos/{owner}/{name}/pulls/{pr_number}/reviews")
        for r in reviews:
-            login = r.get("user", {}).get("login", "")
+            login = (r.get("user") or {}).get("login", "")
            canonical = LOGIN_ALIASES.get(login, login)
            if canonical in login_to_group and r.get("state") == "APPROVED":
                comments.append(
@@ -198,7 +199,7 @@ def signal_1_comment_scan(pr_number: int, repo: str) -> dict:
        matches = []
        for c in comments:
            body = c.get("body", "") or ""
-            user_login = c.get("user", {}).get("login", "")
+            user_login = (c.get("user") or {}).get("login", "")
            # Resolve LOGIN_ALIASES so alternate logins satisfy the canonical gate
            user_login = LOGIN_ALIASES.get(user_login, user_login)
            if user_login != login:
@@ -264,11 +265,18 @@ def signal_2_reviews(pr_number: int, repo: str) -> dict:

    blocking = []
    for r in reviews:
-        if r.get("state") == "REQUEST_CHANGES" and not r.get("dismissed", False):
+        if (
+            r.get("state") == "REQUEST_CHANGES"
+            and not r.get("dismissed", False)
+            and r.get("official") is not False
+        ):
+            login = (r.get("user") or {}).get("login", "")
+            if not login:
+                continue
            blocking.append(
                {
                    "review_id": r["id"],
-                    "user": r["user"]["login"],
+                    "user": login,
                    "commit_id": r.get("commit_id", ""),
                    "created_at": r.get("submitted_at") or r.get("created_at", ""),
                }
@@ -328,6 +336,132 @@ def signal_3_staleness(pr_number: int, repo: str) -> dict:
    }


+# ── Signal 4: Branch divergence / scope-creep guard ─────────────────────────
+# Detects stale PR branches where the base SHA has drifted behind target HEAD.
+# Distinguishes files that are "inherited" from base divergence (already on
+# target via prior commits) from genuinely new PR work. Prevents misattribution
+# of scope creep when branches are stale (molecule-core#365).
+
+
+def _commits_and_files_behind(
+    owner: str, name: str, base_sha: str, target_branch: str
+) -> tuple[int | None, set[str]]:
+    """Paginate target-branch commits from HEAD back to base_sha.
+    Return (commits_behind_count, set of filenames changed in those commits).
+    Safety-capped at 20 pages (~1000 commits) to avoid runaway pagination.
+    """
+    commits_behind = 0
+    target_files: set[str] = set()
+    page = 1
+    max_pages = 20
+    per_page = 50
+
+    while page <= max_pages:
+        try:
+            commits = api_get(
+                f"/repos/{owner}/{name}/commits?sha={target_branch}&page={page}&limit={per_page}"
+            )
+        except GiteaError:
+            return (None, target_files)
+
+        if not isinstance(commits, list):
+            return (None, target_files)
+
+        for c in commits:
+            if c.get("sha") == base_sha:
+                return (commits_behind, target_files)
+            commits_behind += 1
+            for f in c.get("files", []):
+                fname = f.get("filename") or f.get("name", "")
+                if fname:
+                    target_files.add(fname)
+
+        if len(commits) < per_page:
+            break
+        page += 1
+
+    return (commits_behind if commits_behind > 0 else None, target_files)
+
+
+def signal_4_branch_divergence(
+    pr_number: int, repo: str, pr_data: dict | None = None
+) -> dict:
+    """
+    Compare PR.base.sha to current target-branch HEAD.
+    If diverged, show "inherited from base divergence" vs "actual new work"
+    file fractions using the commits API.
+    Returns: {signal, verdict, diverged, commits_behind, inherited_fraction, ...}
+    """
+    owner, name = repo.split("/", 1)
+
+    if pr_data is None:
+        pr_data = api_get(f"/repos/{owner}/{name}/pulls/{pr_number}")
+
+    base_sha = pr_data["base"]["sha"]
+    target_branch = pr_data["base"]["ref"]
+
+    try:
+        branch_info = api_get(f"/repos/{owner}/{name}/branches/{target_branch}")
+        target_head = branch_info["commit"]["id"]
+    except GiteaError as e:
+        return {"signal": "branch_divergence", "verdict": "N/A", "error": str(e)}
+
+    if base_sha == target_head:
+        return {
+            "signal": "branch_divergence",
+            "verdict": "CLEAR",
+            "diverged": False,
+            "commits_behind": 0,
+            "pr_files_count": 0,
+            "inherited_files": [],
+            "new_work_files": [],
+            "inherited_fraction": 0.0,
+        }
+
+    # Branch is diverged — count commits behind and collect files changed on
+    # target since the PR's base snapshot.
+    commits_behind, target_files = _commits_and_files_behind(
+        owner, name, base_sha, target_branch
+    )
+
+    # Get PR files
+    try:
+        pr_files_data = api_list(f"/repos/{owner}/{name}/pulls/{pr_number}/files")
+        pr_files = {
+            f.get("filename") or f.get("name", "") for f in pr_files_data
+        }
+        pr_files.discard("")
+    except GiteaError:
+        pr_files = set()
+
+    inherited_files = sorted(pr_files & target_files)
+    new_work_files = sorted(pr_files - target_files)
+    total = len(pr_files)
+    inherited_fraction = len(inherited_files) / total if total else 0.0
+
+    # Verdict: WARNING if significant divergence.
+    # Thresholds: >50 % inherited files, or >5 commits behind with any inherited files.
+    if inherited_fraction > 0.5 or (
+        commits_behind and commits_behind > 5 and inherited_files
+    ):
+        verdict = "WARNING"
+    else:
+        verdict = "CLEAR"
+
+    return {
+        "signal": "branch_divergence",
+        "verdict": verdict,
+        "diverged": True,
+        "base_sha": base_sha,
+        "target_head": target_head,
+        "commits_behind": commits_behind,
+        "pr_files_count": total,
+        "inherited_files": inherited_files,
+        "new_work_files": new_work_files,
+        "inherited_fraction": round(inherited_fraction, 2),
+    }
+
+
 # ── Signal 6: CI required-checks awareness ───────────────────────────────────

 def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: dict | None = None) -> dict:
@@ -408,7 +542,7 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d

 # ── Gate evaluation ───────────────────────────────────────────────────────────

-VERDICT_ORDER = {"ERROR": 0, "CI_FAIL": 1, "BLOCKED": 2, "STALE-RC": 3, "CI_PENDING": 4, "N/A": 5, "CLEAR": 6}
+VERDICT_ORDER = {"ERROR": 0, "CI_FAIL": 1, "BLOCKED": 2, "STALE-RC": 3, "CI_PENDING": 4, "N/A": 5, "WARNING": 6, "CLEAR": 7}


 def compute_verdict(gates: list[dict]) -> tuple[str, list[dict]]:
@@ -439,6 +573,7 @@ def format_comment(repo: str, pr_number: int, verdict: str, gates: list[dict], b
        "agent_tag_comments": "Agent-tag gates",
        "request_changes_reviews": "REQUEST_CHANGES reviews",
        "stale_reviews": "Staleness check",
+        "branch_divergence": "Branch divergence / scope-creep guard",
        "ci_checks": "CI required checks",
    }

@@ -474,6 +609,25 @@ def format_comment(repo: str, pr_number: int, verdict: str, gates: list[dict], b
                    lines.append(
                        f"  - @{r['user']} stale (commit={r.get('review_commit','?')[:7]}, age={r.get('age_hours','?')}h)"
                    )
+            elif sig == "branch_divergence":
+                if b.get("diverged"):
+                    lines.append(
+                        f"  - Branch is {b.get('commits_behind', '?')} commits behind target "
+                        f"({b.get('target_head', '?')[:7]})"
+                    )
+                    frac = b.get("inherited_fraction", 0)
+                    lines.append(
+                        f"  - {frac * 100:.0f}% of PR files inherited from base divergence "
+                        f"({len(b.get('inherited_files', []))}/{b.get('pr_files_count', 0)} files)"
+                    )
+                    for f in b.get("inherited_files", [])[:5]:
+                        lines.append(f"    - inherited: `{f}`")
+                    if len(b.get("inherited_files", [])) > 5:
+                        lines.append(
+                            f"    - ... and {len(b.get('inherited_files', [])) - 5} more"
+                        )
+                else:
+                    lines.append("  - Branch is up to date with target")
            elif sig == "agent_tag_comments":
                for agent, res in b.get("results", {}).items():
                    v = res.get("verdict", "MISSING")
@@ -516,6 +670,7 @@ def run(repo: str, pr_number: int, post_comment: bool = False) -> dict:
            signal_1_comment_scan(pr_number, repo),
            signal_2_reviews(pr_number, repo),
            signal_3_staleness(pr_number, repo),
+            signal_4_branch_divergence(pr_number, repo, pr_data=pr),
            signal_6_ci(pr_number, repo, branch=base_ref, pr_data=pr),
        ]
        verdict, blockers = compute_verdict(gates)
@@ -74,3 +74,247 @@ def test_signal_1_infra_sre_login_alias_resolved_to_core_devops(monkeypatch):
    engineers = result["results"]["core-devops"]
    assert engineers["verdict"] == "APPROVED"
    assert engineers["group"] == "engineers"
+
+
+def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):
+    """Regression: Gitea may return reviews with user=null (deleted/bot edge case).
+    signal_1_comment_scan must survive this without AttributeError."""
+    mod = load_gate_check()
+
+    def fake_api_get(path):
+        if path == "/repos/molecule-ai/molecule-core/pulls/901":
+            return {
+                "number": 901,
+                "labels": [{"name": "tier:low"}],
+            }
+        raise AssertionError(f"unexpected api_get: {path}")
+
+    def fake_api_list(path):
+        if path == "/repos/molecule-ai/molecule-core/issues/901/comments":
+            return []
+        if path == "/repos/molecule-ai/molecule-core/pulls/901/comments":
+            return []
+        if path == "/repos/molecule-ai/molecule-core/pulls/901/reviews":
+            return [
+                {
+                    "id": 1,
+                    "user": None,  # <-- the regression trigger
+                    "state": "APPROVED",
+                    "submitted_at": "2026-05-13T10:00:00Z",
+                },
+                {
+                    "id": 2,
+                    "user": {"login": "core-devops"},
+                    "state": "APPROVED",
+                    "submitted_at": "2026-05-13T10:01:00Z",
+                },
+            ]
+        raise AssertionError(f"unexpected api_list: {path}")
+
+    monkeypatch.setattr(mod, "api_get", fake_api_get)
+    monkeypatch.setattr(mod, "api_list", fake_api_list)
+
+    result = mod.signal_1_comment_scan(901, "molecule-ai/molecule-core")
+
+    # Should not crash; the valid review from core-devops still satisfies engineers gate
+    assert result["verdict"] == "CLEAR"
+    assert result["results"]["core-devops"]["verdict"] == "APPROVED"
+
+
+# ── Signal 2: Draft REQUEST_CHANGES guard ───────────────────────────────────
+
+
+def test_signal_2_draft_request_changes_does_not_block(monkeypatch):
+    """official=False REQUEST_CHANGES is a draft/pending review and must NOT
+    block the gate (matching review-check.sh post-#1818 official-filter)."""
+    mod = load_gate_check()
+
+    def fake_api_list(path):
+        if path == "/repos/molecule-ai/molecule-core/pulls/902/reviews":
+            return [
+                {
+                    "id": 1,
+                    "user": {"login": "agent-reviewer"},
+                    "state": "REQUEST_CHANGES",
+                    "official": False,
+                    "dismissed": False,
+                    "submitted_at": "2026-05-13T10:00:00Z",
+                }
+            ]
+        raise AssertionError(f"unexpected api_list: {path}")
+
+    monkeypatch.setattr(mod, "api_list", fake_api_list)
+
+    result = mod.signal_2_reviews(902, "molecule-ai/molecule-core")
+    assert result["verdict"] == "CLEAR"
+    assert result["blocking_reviews"] == []
+
+
+def test_signal_2_null_user_in_request_changes_does_not_crash(monkeypatch):
+    """Regression: Gitea may return user=null on a REQUEST_CHANGES review.
+    signal_2_reviews must survive this without AttributeError."""
+    mod = load_gate_check()
+
+    def fake_api_list(path):
+        if path == "/repos/molecule-ai/molecule-core/pulls/903/reviews":
+            return [
+                {
+                    "id": 1,
+                    "user": None,
+                    "state": "REQUEST_CHANGES",
+                    "official": True,
+                    "dismissed": False,
+                    "submitted_at": "2026-05-13T10:00:00Z",
+                }
+            ]
+        raise AssertionError(f"unexpected api_list: {path}")
+
+    monkeypatch.setattr(mod, "api_list", fake_api_list)
+
+    result = mod.signal_2_reviews(903, "molecule-ai/molecule-core")
+    assert result["verdict"] == "CLEAR"
+    assert result["blocking_reviews"] == []
+
+
+# ── Signal 4: Branch divergence / scope-creep guard ─────────────────────────
+
+
+def test_signal_4_no_divergence_returns_clear(monkeypatch):
+    """When PR.base.sha equals target branch HEAD, divergence is zero."""
+    mod = load_gate_check()
+
+    shared_sha = "abc123"
+
+    def fake_api_get(path):
+        if path == "/repos/molecule-ai/molecule-core/pulls/100":
+            return {
+                "base": {"sha": shared_sha, "ref": "main"},
+                "head": {"sha": "def456"},
+            }
+        if path == "/repos/molecule-ai/molecule-core/branches/main":
+            return {"commit": {"id": shared_sha}}
+        raise AssertionError(f"unexpected api_get: {path}")
+
+    monkeypatch.setattr(mod, "api_get", fake_api_get)
+
+    result = mod.signal_4_branch_divergence(100, "molecule-ai/molecule-core")
+
+    assert result["verdict"] == "CLEAR"
+    assert result["diverged"] is False
+    assert result["commits_behind"] == 0
+    assert result["inherited_fraction"] == 0.0
+
+
+def test_signal_4_divergence_with_inherited_files_warning(monkeypatch):
+    """Stale branch with overlapping files triggers WARNING and correct fractions."""
+    mod = load_gate_check()
+
+    base_sha = "base000"
+    target_head = "head111"
+
+    def fake_api_get(path):
+        if path == "/repos/molecule-ai/molecule-core/pulls/101":
+            return {
+                "base": {"sha": base_sha, "ref": "main"},
+                "head": {"sha": "pr222"},
+            }
+        if path == "/repos/molecule-ai/molecule-core/branches/main":
+            return {"commit": {"id": target_head}}
+        if path == "/repos/molecule-ai/molecule-core/commits?sha=main&page=1&limit=50":
+            return [
+                {
+                    "sha": target_head,
+                    "files": [
+                        {"filename": "ci.yml"},
+                        {"filename": "README.md"},
+                    ],
+                },
+                {"sha": base_sha, "files": []},
+            ]
+        raise AssertionError(f"unexpected api_get: {path}")
+
+    def fake_api_list(path):
+        if path == "/repos/molecule-ai/molecule-core/pulls/101/files":
+            return [
+                {"filename": "ci.yml"},
+                {"filename": "README.md"},
+                {"filename": "new_feature.go"},
+            ]
+        raise AssertionError(f"unexpected api_list: {path}")
+
+    monkeypatch.setattr(mod, "api_get", fake_api_get)
+    monkeypatch.setattr(mod, "api_list", fake_api_list)
+
+    result = mod.signal_4_branch_divergence(101, "molecule-ai/molecule-core")
+
+    assert result["verdict"] == "WARNING"
+    assert result["diverged"] is True
+    assert result["commits_behind"] == 1
+    assert result["pr_files_count"] == 3
+    assert result["inherited_files"] == ["README.md", "ci.yml"]
+    assert result["new_work_files"] == ["new_feature.go"]
+    assert result["inherited_fraction"] == round(2 / 3, 2)
+
+
+def test_signal_4_divergence_no_inherited_files_clear(monkeypatch):
+    """Stale branch but zero file overlap → still CLEAR (no scope-creep risk)."""
+    mod = load_gate_check()
+
+    base_sha = "base000"
+    target_head = "head111"
+
+    def fake_api_get(path):
+        if path == "/repos/molecule-ai/molecule-core/pulls/102":
+            return {
+                "base": {"sha": base_sha, "ref": "main"},
+                "head": {"sha": "pr222"},
+            }
+        if path == "/repos/molecule-ai/molecule-core/branches/main":
+            return {"commit": {"id": target_head}}
+        if path == "/repos/molecule-ai/molecule-core/commits?sha=main&page=1&limit=50":
+            return [
+                {
+                    "sha": target_head,
+                    "files": [{"filename": "other.go"}],
+                },
+                {"sha": base_sha, "files": []},
+            ]
+        raise AssertionError(f"unexpected api_get: {path}")
+
+    def fake_api_list(path):
+        if path == "/repos/molecule-ai/molecule-core/pulls/102/files":
+            return [{"filename": "new_feature.go"}]
+        raise AssertionError(f"unexpected api_list: {path}")
+
+    monkeypatch.setattr(mod, "api_get", fake_api_get)
+    monkeypatch.setattr(mod, "api_list", fake_api_list)
+
+    result = mod.signal_4_branch_divergence(102, "molecule-ai/molecule-core")
+
+    assert result["verdict"] == "CLEAR"
+    assert result["diverged"] is True
+    assert result["inherited_files"] == []
+    assert result["new_work_files"] == ["new_feature.go"]
+    assert result["inherited_fraction"] == 0.0
+
+
+def test_signal_4_branch_api_error_returns_na(monkeypatch):
+    """If the branch endpoint 404s, signal degrades to N/A rather than crashing."""
+    mod = load_gate_check()
+
+    def fake_api_get(path):
+        if path == "/repos/molecule-ai/molecule-core/pulls/103":
+            return {
+                "base": {"sha": "base000", "ref": "main"},
+                "head": {"sha": "pr222"},
+            }
+        if path == "/repos/molecule-ai/molecule-core/branches/main":
+            raise mod.GiteaError("GET .../branches/main → 404: not found")
+        raise AssertionError(f"unexpected api_get: {path}")
+
+    monkeypatch.setattr(mod, "api_get", fake_api_get)
+
+    result = mod.signal_4_branch_divergence(103, "molecule-ai/molecule-core")
+
+    assert result["verdict"] == "N/A"
+    assert "error" in result
@@ -0,0 +1,271 @@
+// Command gen-providers is the codegen half of the provider-registry SSOT
+// machinery on the molecule-core side (internal#718 P2-A, CTO 2026-05-27
+// "Distribution = SDK via codegen + verify-CI"). It is the byte-for-byte mirror
+// of molecule-controlplane's cmd/gen-providers (the canonical generator). It
+// reads core's SYNCED COPY of the schema — internal/providers/providers.yaml
+// (via the providers loader, so it shares the SAME parse + validation as the
+// runtime) — and emits a checked-in Go artifact:
+//
+//	internal/providers/gen/registry_gen.go
+//
+// The artifact is a deterministic projection of the merged registry: the
+// provider catalog + per-runtime native sets as Go literals, plus the schema
+// version and a content fingerprint. It is core's leaf of the multi-language SDK
+// layer the RFC calls for (Go(CP+core)/TS(canvas)/Python(adapters)).
+//
+// CONTRACT for P2-A (zero behavior change): the generated artifact is
+// checked-in + drift-gated ONLY. NO production code path imports
+// internal/providers/gen — the gen-import-boundary test pins that. P2-B wires
+// the billing/credential decision onto the LOADER (DeriveProvider/IsPlatform),
+// not the raw gen literals. The generator is the build-time half;
+// verify-providers-gen.yml is the CI half that regenerates and fails RED on any
+// diff (drift or hand-edit); sync-providers-yaml.yml gates the synced copy
+// against the controlplane canonical.
+//
+// Usage:
+//
+//	go run ./cmd/gen-providers            # write the artifact in place
+//	go run ./cmd/gen-providers -check     # exit non-zero if the on-disk
+//	                                      # artifact differs from a fresh gen
+//	                                      # (the CI drift gate)
+//	go run ./cmd/gen-providers -o PATH    # write to a specific path
+//
+//go:generate go run ../gen-providers -o ../../internal/providers/gen/registry_gen.go
+package main
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/hex"
+	"flag"
+	"fmt"
+	"go/format"
+	"os"
+	"sort"
+	"strconv"
+	"text/template"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
+)
+
+// defaultOutPath is the checked-in artifact location, relative to the repo
+// root (the directory `go run ./cmd/gen-providers` is invoked from).
+const defaultOutPath = "internal/providers/gen/registry_gen.go"
+
+func main() {
+	var (
+		outPath string
+		check   bool
+	)
+	flag.StringVar(&outPath, "o", defaultOutPath, "output path for the generated artifact")
+	flag.BoolVar(&check, "check", false, "verify the on-disk artifact matches a fresh generation; exit 1 on drift")
+	flag.Parse()
+
+	generated, err := render()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "gen-providers: %v\n", err)
+		os.Exit(1)
+	}
+
+	if check {
+		existing, err := os.ReadFile(outPath)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "gen-providers -check: cannot read %s: %v\n", outPath, err)
+			fmt.Fprintln(os.Stderr, "Run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit the result.")
+			os.Exit(1)
+		}
+		if !bytes.Equal(existing, generated) {
+			fmt.Fprintf(os.Stderr, "gen-providers -check: DRIFT — %s is out of sync with providers.yaml.\n", outPath)
+			fmt.Fprintln(os.Stderr, "The generated artifact was hand-edited or providers.yaml changed without regen.")
+			fmt.Fprintln(os.Stderr, "Fix: run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit.")
+			os.Exit(1)
+		}
+		fmt.Println("gen-providers -check: OK — artifact in sync with providers.yaml")
+		return
+	}
+
+	if err := os.WriteFile(outPath, generated, 0o644); err != nil {
+		fmt.Fprintf(os.Stderr, "gen-providers: write %s: %v\n", outPath, err)
+		os.Exit(1)
+	}
+	fmt.Printf("gen-providers: wrote %s\n", outPath)
+}
+
+// render loads the manifest and produces the gofmt'd artifact bytes.
+func render() ([]byte, error) {
+	m, err := providers.LoadManifest()
+	if err != nil {
+		return nil, fmt.Errorf("load manifest: %w", err)
+	}
+
+	// Deterministic ordering: providers in catalog order is already stable
+	// (slice). Runtimes is a map — sort its keys so the artifact is
+	// reproducible regardless of Go map iteration order.
+	runtimeNames := make([]string, 0, len(m.Runtimes))
+	for rt := range m.Runtimes {
+		runtimeNames = append(runtimeNames, rt)
+	}
+	sort.Strings(runtimeNames)
+
+	type genProvider struct {
+		Name             string
+		DisplayName      string
+		Protocol         string
+		AuthMode         string
+		AuthEnv          []string
+		ModelPrefixMatch string
+		IsPlatform       bool
+		// UpstreamVendor is the proxy's upstream-vendor key for this entry
+		// (internal#718 P1, CONVERGED) — empty for entries the proxy does not
+		// route to an upstream. A plain scalar (no pointer), so both the rendered
+		// literal and the fingerprint stay deterministic.
+		UpstreamVendor string
+	}
+	type genRef struct {
+		Name   string
+		Models []string
+	}
+	type genRuntime struct {
+		Name      string
+		Providers []genRef
+	}
+
+	data := struct {
+		SchemaVersion int
+		Fingerprint   string
+		Providers     []genProvider
+		Runtimes      []genRuntime
+	}{
+		SchemaVersion: providers.SchemaVersion(),
+	}
+
+	for _, p := range m.Providers {
+		gp := genProvider{
+			Name:             p.Name,
+			DisplayName:      p.DisplayName,
+			Protocol:         string(p.Protocol),
+			AuthMode:         p.AuthMode,
+			AuthEnv:          p.AuthEnv,
+			ModelPrefixMatch: p.ModelPrefixMatch,
+			IsPlatform:       p.IsPlatform(),
+			UpstreamVendor:   p.UpstreamVendor,
+		}
+		data.Providers = append(data.Providers, gp)
+	}
+	for _, rt := range runtimeNames {
+		native := m.Runtimes[rt]
+		gr := genRuntime{Name: rt}
+		for _, ref := range native.Providers {
+			gr.Providers = append(gr.Providers, genRef{Name: ref.Name, Models: ref.Models})
+		}
+		data.Runtimes = append(data.Runtimes, gr)
+	}
+
+	// Fingerprint pins the artifact to the data it was generated from. It is
+	// derived from the structured projection (schema version + providers +
+	// runtimes), NOT the raw YAML bytes, so a comment-only YAML edit does not
+	// churn the artifact while any data change does.
+	data.Fingerprint = fingerprint(data.SchemaVersion, data.Providers, data.Runtimes)
+
+	var buf bytes.Buffer
+	if err := artifactTmpl.Execute(&buf, data); err != nil {
+		return nil, fmt.Errorf("execute template: %w", err)
+	}
+	formatted, err := format.Source(buf.Bytes())
+	if err != nil {
+		return nil, fmt.Errorf("gofmt generated source: %w\n----\n%s", err, buf.String())
+	}
+	return formatted, nil
+}
+
+// fingerprint is a stable content hash of the structured projection. Any
+// fields below this function references must be kept in sync with the
+// template's emitted data so the hash and the literals never diverge.
+func fingerprint(schema int, provs any, runtimes any) string {
+	h := sha256.New()
+	fmt.Fprintf(h, "schema=%d\n", schema)
+	fmt.Fprintf(h, "%#v\n%#v\n", provs, runtimes)
+	return hex.EncodeToString(h.Sum(nil))[:16]
+}
+
+func quote(s string) string { return strconv.Quote(s) }
+
+func quoteSlice(ss []string) string {
+	var b bytes.Buffer
+	b.WriteString("[]string{")
+	for i, s := range ss {
+		if i > 0 {
+			b.WriteString(", ")
+		}
+		b.WriteString(strconv.Quote(s))
+	}
+	b.WriteString("}")
+	return b.String()
+}
+
+var artifactTmpl = template.Must(template.New("artifact").Funcs(template.FuncMap{
+	"quote":      quote,
+	"quoteSlice": quoteSlice,
+}).Parse(`// Code generated by cmd/gen-providers; DO NOT EDIT.
+//
+// Source of truth: internal/providers/providers.yaml (schema_version {{.SchemaVersion}}).
+// Regenerate with: go generate ./...   (or: go run ./cmd/gen-providers)
+// The verify-providers-gen CI workflow fails RED if this file drifts from
+// providers.yaml or is hand-edited. internal#718 P0 — checked-in + drift-
+// gated ONLY; no production path imports this package yet (that is P1+).
+
+package gen
+
+// SchemaVersion is the providers.yaml schema this artifact was generated
+// against. It is the semver'd contract version (the MAJOR component for the
+// public extension contract; see internal/providers/README.md).
+const SchemaVersion = {{.SchemaVersion}}
+
+// Fingerprint is a stable content hash of the generated projection (schema
+// version + provider catalog + runtime native sets). It changes iff the
+// registry DATA changes (comment-only YAML edits do not churn it).
+const Fingerprint = {{quote .Fingerprint}}
+
+// GenProvider is the generated projection of one provider catalog entry —
+// the subset a downstream consumer needs to derive + display a provider.
+type GenProvider struct {
+	Name             string
+	DisplayName      string
+	Protocol         string
+	AuthMode         string
+	AuthEnv          []string
+	ModelPrefixMatch string
+	// IsPlatform marks the closed, core-only platform-managed provider.
+	IsPlatform bool
+	// UpstreamVendor is the proxy's upstream-vendor key for this entry
+	// (internal#718 P1, CONVERGED); empty for providers the proxy does not
+	// route to an upstream vendor. ResolveUpstream maps a model id's namespace
+	// token to the entry whose UpstreamVendor equals it.
+	UpstreamVendor string
+}
+
+// GenRuntimeRef is one native provider a runtime supports + its exact models.
+type GenRuntimeRef struct {
+	Name   string
+	Models []string
+}
+
+// Providers is the full provider catalog, in providers.yaml declaration order.
+var Providers = []GenProvider{
+{{- range .Providers}}
+	{Name: {{quote .Name}}, DisplayName: {{quote .DisplayName}}, Protocol: {{quote .Protocol}}, AuthMode: {{quote .AuthMode}}, AuthEnv: {{quoteSlice .AuthEnv}}, ModelPrefixMatch: {{quote .ModelPrefixMatch}}, IsPlatform: {{.IsPlatform}}{{if .UpstreamVendor}}, UpstreamVendor: {{quote .UpstreamVendor}}{{end}}},
+{{- end}}
+}
+
+// Runtimes maps each runtime to its native provider+model set, runtime names
+// sorted for a deterministic artifact.
+var Runtimes = map[string][]GenRuntimeRef{
+{{- range .Runtimes}}
+	{{quote .Name}}: {
+{{- range .Providers}}
+		{Name: {{quote .Name}}, Models: {{quoteSlice .Models}}},
+{{- end}}
+	},
+{{- end}}
+}
+`))
@@ -0,0 +1,121 @@
+package main
+
+import (
+	"bytes"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+// repoRoot walks up from the test's working dir (cmd/gen-providers) to the
+// module root so the test can locate the checked-in artifact regardless of
+// where `go test` is invoked from.
+func repoRoot(t *testing.T) string {
+	t.Helper()
+	dir, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	for i := 0; i < 6; i++ {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		dir = filepath.Dir(dir)
+	}
+	t.Fatal("could not locate repo root (go.mod) from cmd/gen-providers")
+	return ""
+}
+
+// TestArtifactInSync is the drift gate's Go-test counterpart: the checked-in
+// internal/providers/gen/registry_gen.go MUST byte-equal a fresh render. If a
+// future edit changes providers.yaml without regenerating, OR hand-edits the
+// artifact, this flips red — the same signal the verify-providers-gen CI
+// workflow emits, but caught locally by `go test ./...` too.
+func TestArtifactInSync(t *testing.T) {
+	generated, err := render()
+	if err != nil {
+		t.Fatalf("render() error = %v", err)
+	}
+	artifactPath := filepath.Join(repoRoot(t), defaultOutPath)
+	onDisk, err := os.ReadFile(artifactPath)
+	if err != nil {
+		t.Fatalf("read checked-in artifact %s: %v (run `go generate ./...` and commit)", artifactPath, err)
+	}
+	if !bytes.Equal(onDisk, generated) {
+		t.Fatalf("DRIFT: %s is out of sync with providers.yaml.\n"+
+			"Run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit the result.", defaultOutPath)
+	}
+}
+
+// TestDriftGateCatchesMutation is the load-bearing-gate proof (per the SOP
+// fail-direction discipline). The original P0 version was TAUTOLOGICAL
+// (internal#718 P1 review carry-over): it appended bytes to an in-memory copy
+// and asserted the copy differed from the original — true by construction,
+// touching neither the on-disk artifact nor the actual in-sync comparison the
+// gate runs. This version exercises the REAL gate: it writes a MUTATED artifact
+// to disk and re-runs the SAME comparison TestArtifactInSync / `-check` perform
+// (`render()` bytes vs the on-disk file), asserting it now reports drift — then
+// restores the original. So the test would fail if the gate were vacuous (e.g.
+// if the comparison ignored content), not merely if append changes bytes.
+func TestDriftGateCatchesMutation(t *testing.T) {
+	generated, err := render()
+	if err != nil {
+		t.Fatalf("render() error = %v", err)
+	}
+	artifactPath := filepath.Join(repoRoot(t), defaultOutPath)
+	original, err := os.ReadFile(artifactPath)
+	if err != nil {
+		t.Fatalf("read checked-in artifact %s: %v", artifactPath, err)
+	}
+	// Precondition: the tree is in sync (so the mutation is what flips the gate,
+	// not pre-existing drift).
+	if !bytes.Equal(original, generated) {
+		t.Fatalf("precondition failed: %s already drifted from render() — run `go generate ./...`", defaultOutPath)
+	}
+
+	// Restore the pristine artifact no matter how the test exits.
+	t.Cleanup(func() {
+		if err := os.WriteFile(artifactPath, original, 0o644); err != nil {
+			t.Fatalf("CRITICAL: failed to restore %s after mutation: %v", artifactPath, err)
+		}
+	})
+
+	// Mutate the ON-DISK artifact (simulating a hand-edit / a providers.yaml
+	// change that wasn't regenerated).
+	mutated := append(append([]byte(nil), original...), []byte("\n// injected drift\n")...)
+	if err := os.WriteFile(artifactPath, mutated, 0o644); err != nil {
+		t.Fatalf("write mutated artifact: %v", err)
+	}
+
+	// Re-run the EXACT in-sync comparison the gate uses: fresh render vs the
+	// (now mutated) on-disk file. It MUST report drift.
+	onDiskAfter, err := os.ReadFile(artifactPath)
+	if err != nil {
+		t.Fatalf("re-read mutated artifact: %v", err)
+	}
+	freshRender, err := render()
+	if err != nil {
+		t.Fatalf("render() after mutation error = %v", err)
+	}
+	if bytes.Equal(onDiskAfter, freshRender) {
+		t.Fatal("drift gate did NOT detect a mutated on-disk artifact — gate is not load-bearing")
+	}
+}
+
+// TestRenderDeterministic proves regeneration is idempotent: two renders of
+// the same manifest produce byte-identical output (sorted runtime keys, stable
+// catalog order). A non-deterministic generator would make the drift gate
+// flap on Go map iteration order.
+func TestRenderDeterministic(t *testing.T) {
+	a, err := render()
+	if err != nil {
+		t.Fatalf("render() #1 error = %v", err)
+	}
+	b, err := render()
+	if err != nil {
+		t.Fatalf("render() #2 error = %v", err)
+	}
+	if !bytes.Equal(a, b) {
+		t.Fatal("render() is non-deterministic — two runs differ; the drift gate would flap")
+	}
+}
@@ -36,6 +36,7 @@ import (
 	"time"

 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/channels"
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/codexauth"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
@@ -149,8 +150,13 @@ func main() {
 				result, err := db.DB.ExecContext(ctx, `DELETE FROM activity_logs WHERE created_at < now() - ($1 || ' days')::interval`, retentionDays)
 				if err != nil {
 					log.Printf("Activity log cleanup error: %v", err)
-				} else if n, _ := result.RowsAffected(); n > 0 {
-					log.Printf("Activity log cleanup: purged %d old entries", n)
+				} else {
+					n, err := result.RowsAffected()
+					if err != nil {
+						log.Printf("Activity log cleanup RowsAffected error: %v", err)
+					} else if n > 0 {
+						log.Printf("Activity log cleanup: purged %d old entries", n)
+					}
 				}
 			}
 		}
@@ -329,6 +335,20 @@ func main() {
 		pendinguploads.StartSweeper(c, pendinguploads.NewPostgres(db.DB), 0)
 	})

+	// Codex shared-OAuth central refresher — the SINGLE owner of the rotating
+	// refresh_token for the global codex (ChatGPT/Codex subscription) credential
+	// (global_secrets key CODEX_AUTH_JSON). Multiple codex workspaces share ONE
+	// ChatGPT-Pro OAuth token; OpenAI's refresh_token is single-use, so letting
+	// each per-agent app-server refresh on its own 401 burned the seed within
+	// seconds (a refresh storm). This goroutine is structurally single-flight
+	// (one goroutine + a package mutex), refreshes only within a safety margin
+	// of expiry, POSTs the refresh_token at most once per due cycle, and writes
+	// the rotated blob back — workspaces now only GET the current token (see the
+	// codex template's codex_auth_sync.sh). INERT when no CODEX_AUTH_JSON exists.
+	go supervised.RunWithRecover(ctx, "codex-auth-refresher", func(c context.Context) {
+		codexauth.StartCodexAuthRefresher(c, db.DB)
+	})
+
 	// Provision-timeout sweep — flips workspaces that have been stuck in
 	// status='provisioning' past the timeout window to 'failed' and emits
 	// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
@@ -0,0 +1,114 @@
+# Molecule Platform OpenAPI specs
+
+This directory holds the machine-readable API contracts for the Molecule
+platform.
+
+| File | Spec | Scope | Status |
+|------|------|-------|--------|
+| `management.yaml` | OpenAPI **3.1** | The **management surface** across both services (orgs, billing, admin, provisioning, workspaces, secrets, templates, org-tokens, bundles). | **SSOT** — hand-authored. |
+| `swagger.yaml` / `swagger.json` | OpenAPI 2.0 | swaggo-generated stub, `/schedules` only (the per-workspace **runtime** surface). | Legacy stub; superseded for management by `management.yaml`. |
+
+`management.yaml` is the **single source of truth** the management tooling
+derives from — the management MCP server, the management CLI (`molecule-cli`),
+and the human-facing API docs (RFC #1706, the gap closed by
+`PLATFORM-MANAGEMENT-API.md` §5c). Do not hand-edit those clients' route maps;
+change them here and regenerate/derive.
+
+## The two-service split
+
+One structural fact drives the whole spec: there are **two services with two
+auth stacks**, and the management surface spans both.
+
+```
+                         ┌─────────────────────────────────────────┐
+   browser / CLI / MCP   │  Control plane (CP)                      │
+        │                │  molecule-controlplane @ api.moleculesai │
+        │  session       │  /api/v1/* (stable) [+ /cp/* sunset]      │
+        ├───────────────▶│  orgs · members · billing · provisioning │
+        │  admin bearer  │  · fleet/admin ops · pins                 │
+        │  provision sec │                                          │
+        └────────────────┴──────────────┬───────────────────────────┘
+                                         │ edge reverse-proxy
+                                         │ (subdomain / X-Molecule-Org-Slug)
+                                         ▼
+                         ┌─────────────────────────────────────────┐
+   Org API Key / ws tok  │  Tenant workspace-server                 │
+        │                │  molecule-core/workspace-server          │
+        └───────────────▶│  ONE EC2 per org @ <slug>.moleculesai.app│
+                         │  workspaces · secrets · templates ·      │
+                         │  org-tokens · bundles                    │
+                         └─────────────────────────────────────────┘
+```
+
+- **Control plane (CP)** — `api.moleculesai.app`, routes modelled under
+  `/api/v1/*` (the `/cp/*` mirror is identical but sunset-headed per RFC #61 and
+  is not duplicated in the spec). Owns **orgs, members, billing, provisioning,
+  fleet/admin ops**.
+- **Tenant workspace-server** — one EC2 per org at `<slug>.moleculesai.app`.
+  Owns **workspaces, agents, secrets, templates, org-tokens, bundles**. Requests
+  may also be sent to the CP host with an `X-Molecule-Org-Slug` header; the CP
+  edge reverse-proxies them to the tenant host (the `Authorization`,
+  `X-Molecule-Org-*`, and cookie headers pass through unchanged and the tenant's
+  own middleware validates them).
+
+The key consequence, called out in `PLATFORM-MANAGEMENT-API.md`: **the Org API
+Key is a TENANT credential, not a CP one.** It is full tenant-admin over its own
+org's workspace-server surface and reaches **nothing** on the CP (org
+create/delete, billing, members, provisioning all 401/403 it). That is why
+member/billing tools belong in a separate CP-admin MCP, not the org-key-authed
+management MCP.
+
+## Security scheme → surface map (the tier matrix)
+
+`management.yaml` defines these `securitySchemes`; each operation declares the
+one(s) it accepts. Mirror of `PLATFORM-MANAGEMENT-API.md` §1:
+
+| Scheme | What it is | Where it applies |
+|--------|-----------|------------------|
+| `workosSession` | WorkOS AuthKit session cookie `mcp_session` (+ org membership/ownership checks) | CP `/api/v1/orgs/*`, `/api/v1/billing/*`. Also accepted on the tenant surface via the CP-session path. |
+| `cpAdminBearer` | CP `CP_ADMIN_API_TOKEN` operator bearer (AdminGate, constant-time) | CP `/api/v1/admin/*` — admin-create-org, tenant teardown, workspace env, ListOrgWorkspaces, redeploy, pins. |
+| `provisionSecret` | CP `PROVISION_SHARED_SECRET` bearer | CP `/api/v1/workspaces/provision`, `…/status`. Routes unmounted when the secret is unset. |
+| `tenantAdminToken` | Per-tenant admin_token (+ `X-Molecule-Org-Id`) | CP `DELETE /api/v1/workspaces/:id` (deprovision) — **in addition to** `provisionSecret` (issue #118). |
+| `orgApiKey` | Tenant Org API Key — `Authorization: Bearer <key>` + routing header; full tenant-admin, self-minting | **All** tenant routes: `/workspaces[/:id]`, `/workspaces/:id/secrets`, budget, billing-mode, `/settings/secrets`, `/org/import`, `/org/templates`, `/org/tokens`, `/templates`, `/bundles`. |
+| `workspaceToken` | Per-workspace bearer, bound to one workspace id (+ routing header) | Read/lifecycle/secrets on a single `/workspaces/:id/*`. **Rejected** on admin list/create/delete when ADMIN_TOKEN is set — use `orgApiKey`. |
+| `orgRoutingHeaderId` / `orgRoutingHeaderSlug` | `X-Molecule-Org-Id` / `X-Molecule-Org-Slug` | Required on every tenant-host request so the edge / TenantGuard route + authorize against the correct org. Send one of them alongside the bearer. |
+
+### Guards worth knowing (modelled per-operation)
+
+- **Dry-run:** `POST /api/v1/admin/orgs?dry_run=true` — validate + echo, no org
+  created. (The only dry-run on the whole management API.)
+- **Confirm token:** `DELETE /api/v1/admin/tenants/:slug` and
+  `…/scrub-artifacts` — body `confirm` MUST equal the URL slug, else `400`
+  before any teardown.
+- **Force flag:** `POST /api/v1/admin/workspaces/:id/env` — keys matching the
+  secret-keyword guard (`TOKEN`/`SECRET`/`KEY`/`PASSWORD`) require `force=true`.
+- **Runtime-pin gate:** `POST /api/v1/workspaces/provision` returns `422
+  RUNTIME_PIN_MISSING` when no runtime image pin exists.
+- **Auto-restart side-effects:** writing a workspace or global secret
+  auto-restarts the affected workspace(s).
+
+## Security note (carried from the synthesis spec)
+
+The Org API Key is **full tenant-admin and self-minting** — a management MCP
+holding one holds tenant root. There is no scope-down today (TODO in
+`orgtoken`). Per-role / per-workspace scoping should ship alongside the
+management MCP.
+
+## Validate
+
+```bash
+cd workspace-server/docs/openapi
+npx @redocly/cli lint management.yaml   # must be clean (0 errors, 0 warnings)
+```
+
+## Scope notes / best-effort flags
+
+- The per-workspace **runtime** surface (schedules, agent, registry, a2a,
+  memory, approvals, channels, terminal, files) is intentionally **out of
+  scope** here — that's the runtime contract, not management.
+- A handful of bodies are **best-effort** from the handlers (org-import inline
+  template, bundle import, list responses with open shapes) and are marked with
+  `additionalProperties: true` in the schema. Tighten as the handler structs
+  stabilise.
+- `/cp/*` deprecated mirrors are omitted (identical shapes; RFC #61
+  Deprecation/Sunset). Build against `/api/v1/*`.
@@ -3,6 +3,7 @@ package bundle
 import (
 	"context"
 	"fmt"
+	"log"
 	"strings"

 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
@@ -72,7 +73,9 @@ func Import(
 		}
 	}
 	// Store runtime in DB
-	_, _ = db.DB.ExecContext(ctx, `UPDATE workspaces SET runtime = $1 WHERE id = $2`, bundleRuntime, wsID)
+	if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET runtime = $1 WHERE id = $2`, bundleRuntime, wsID); err != nil {
+		log.Printf("bundle import: failed to store runtime for workspace %s: %v", wsID, err)
+	}

 	// Provision the container if provisioner is available
 	if prov != nil {
@@ -92,7 +95,9 @@ func Import(
 			if err != nil {
 				markFailed(provCtx, wsID, broadcaster, err)
 			} else if url != "" {
-				db.DB.ExecContext(provCtx, `UPDATE workspaces SET url = $1 WHERE id = $2`, url, wsID)
+				if _, err := db.DB.ExecContext(provCtx, `UPDATE workspaces SET url = $1 WHERE id = $2`, url, wsID); err != nil {
+					log.Printf("bundle import: failed to store URL for workspace %s: %v", wsID, err)
+				}
 			}
 		}()
 	}
@@ -139,9 +144,11 @@ func markFailed(ctx context.Context, wsID string, broadcaster *events.Broadcaste
 	// markProvisionFailed in workspace-server/internal/handlers/
 	// workspace_provision_shared.go.
 	msg := err.Error()
-	db.DB.ExecContext(ctx,
+	if _, dbErr := db.DB.ExecContext(ctx,
 		`UPDATE workspaces SET status = $1, last_sample_error = $2, updated_at = now() WHERE id = $3`,
-		models.StatusFailed, msg, wsID)
+		models.StatusFailed, msg, wsID); dbErr != nil {
+		log.Printf("bundle import: failed to mark workspace %s as failed: %v", wsID, dbErr)
+	}
 	broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
 		"error": msg,
 	})
@@ -18,6 +18,11 @@ const (
 	discordHTTPTimeout   = 10 * time.Second
 )

+// httpClient abstracts http.Client for test injection.
+type httpClient interface {
+	Do(req *http.Request) (*http.Response, error)
+}
+
 // DiscordAdapter implements ChannelAdapter for Discord.
 //
 // Outbound messages are sent via Discord Incoming Webhooks. The webhook URL
@@ -33,7 +38,11 @@ const (
 //
 // StartPolling returns nil immediately — Discord does not support long-polling;
 // use the Interactions webhook route instead.
-type DiscordAdapter struct{}
+type DiscordAdapter struct {
+	// client allows dependency injection for testing. If nil, the default
+	// http.Client is used at call time (safe for production use).
+	client httpClient
+}

 func (d *DiscordAdapter) Type() string        { return "discord" }
 func (d *DiscordAdapter) DisplayName() string { return "Discord" }
@@ -95,7 +104,10 @@ func (d *DiscordAdapter) SendMessage(ctx context.Context, config map[string]inte
 	// Split long messages into chunks at word boundaries where possible.
 	chunks := splitMessage(text, maxLen)

-	client := &http.Client{Timeout: discordHTTPTimeout}
+	client := d.client
+	if client == nil {
+		client = &http.Client{Timeout: discordHTTPTimeout}
+	}
 	for _, chunk := range chunks {
 		payload, err := json.Marshal(map[string]string{"content": chunk})
 		if err != nil {
@@ -3,6 +3,7 @@ package channels
 import (
 	"context"
 	"encoding/json"
+	"fmt"
 	"net/http"
 	"net/http/httptest"
 	"strings"
@@ -13,6 +14,17 @@ import (

 // ==================== DiscordAdapter unit tests ====================

+// fatalClient is a deterministic httpClient stub that always returns a
+// fixed error. Used to test that error messages from SendMessage do not
+// contain the Discord webhook token.
+type fatalClient struct {
+	err error
+}
+
+func (c *fatalClient) Do(*http.Request) (*http.Response, error) {
+	return nil, c.err
+}
+
 func TestDiscordAdapter_Type(t *testing.T) {
 	a := &DiscordAdapter{}
 	if a.Type() != "discord" {
@@ -288,17 +300,36 @@ func TestSplitMessage_LongMessage(t *testing.T) {
 }

 // TestDiscordAdapter_SendMessage_ErrorDoesNotLeakToken verifies that when the
-// HTTP call to the Discord webhook fails (e.g. DNS error), the returned error
+// HTTP call to the Discord webhook fails (network error), the returned error
 // message does NOT contain the webhook URL — which embeds the Discord token.
 // Regression test for the MEDIUM security finding in PR #659.
+//
+// This test uses a deterministic httptest.Server (connection refused) rather
+// than a live network call, so it always exercises the error path regardless
+// of environment routing.
 func TestDiscordAdapter_SendMessage_ErrorDoesNotLeakToken(t *testing.T) {
-	a := &DiscordAdapter{}
-	// Use a valid-looking webhook URL with a fake token so we can check it
-	// doesn't appear in the error string.
 	fakeToken := "SUPER_SECRET_DISCORD_TOKEN_12345"
 	webhookURL := discordWebhookPrefix + "123456789/" + fakeToken

-	// Point at an unroutable address to force a dial error.
+	// httptest.Server with no handler → connection refused / immediate close.
+	// Deterministic in all environments; no skip condition.
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		t.Fatal("server handler called — should have been unreachable")
+	}))
+	defer ts.Close()
+
+	// Point the webhook URL at the test server so DiscordAdapter sends there.
+	// We intercept the *request* (not the URL) by swapping the client's base URL.
+	// The adapter always resolves webhookURL from config, so we set up a
+	// test server that refuses connections on the real discord.com domain
+	// by having the adapter's HTTP client hit an unreachable host.
+	//
+	// Simpler: construct a URL with the fake token that won't route anywhere,
+	// but use a mock httpClient to control the error exactly.
+	a := &DiscordAdapter{
+		client: &fatalClient{err: fmt.Errorf("connection refused")},
+	}
+
 	err := a.SendMessage(
 		context.Background(),
 		map[string]interface{}{"webhook_url": webhookURL},
@@ -307,12 +338,14 @@ func TestDiscordAdapter_SendMessage_ErrorDoesNotLeakToken(t *testing.T) {
 	)

 	if err == nil {
-		// In some environments the request might actually succeed; that's fine.
-		t.Skip("request unexpectedly succeeded — skipping token-leak check")
+		t.Fatal("expected error from fatalClient")
 	}
 	if strings.Contains(err.Error(), fakeToken) {
 		t.Errorf("error message leaks Discord webhook token: %q", err.Error())
 	}
+	if strings.Contains(err.Error(), "123456789") {
+		t.Errorf("error message leaks webhook ID: %q", err.Error())
+	}
 }

 func TestSplitMessage_SplitsAtNewline(t *testing.T) {
@@ -82,7 +82,10 @@ func NewManager(proxy A2AProxy, broadcaster Broadcaster) *Manager {
 			log.Printf("Channels: failed to disable telegram chat_id=%s: %v", chatID, err)
 			return
 		}
-		if rows, _ := res.RowsAffected(); rows > 0 {
+		rows, err := res.RowsAffected()
+		if err != nil {
+			log.Printf("Channels: disable telegram RowsAffected error chat_id=%s: %v", chatID, err)
+		} else if rows > 0 {
 			log.Printf("Channels: disabled %d telegram channel(s) for chat_id=%s (bot removed)", rows, chatID)
 			// Reload so the in-memory poller map drops the now-disabled row.
 			m.Reload(ctx)
@@ -310,7 +313,7 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound
 	history := m.loadHistory(ctx, historyKey)

 	// Build A2A JSON-RPC payload
-	a2aBody, _ := json.Marshal(map[string]interface{}{
+	a2aBody, marshalErr := json.Marshal(map[string]interface{}{
 		"method": "message/send",
 		"params": map[string]interface{}{
 			"message": map[string]interface{}{
@@ -330,6 +333,10 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound
 			},
 		},
 	})
+	if marshalErr != nil {
+		log.Printf("Channels %s: json.Marshal a2aBody failed: %v", ch.ChannelType, marshalErr)
+		return fmt.Errorf("marshal a2a body: %w", marshalErr)
+	}

 	callerID := "channel:" + ch.ChannelType

@@ -389,11 +396,13 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound

 	// Update stats in DB
 	if db.DB != nil {
-		db.DB.ExecContext(ctx, `
+		if _, err := db.DB.ExecContext(ctx, `
 			UPDATE workspace_channels
 			SET last_message_at = now(), message_count = message_count + 1, updated_at = now()
 			WHERE id = $1
-		`, ch.ID)
+		`, ch.ID); err != nil {
+			log.Printf("Channels: inbound stats update failed for channel %s: %v", ch.ID, err)
+		}
 	}

 	// Broadcast event
@@ -434,11 +443,13 @@ func (m *Manager) SendOutbound(ctx context.Context, channelID string, text strin
 	}

 	if db.DB != nil {
-		db.DB.ExecContext(ctx, `
+		if _, err := db.DB.ExecContext(ctx, `
 			UPDATE workspace_channels
 			SET last_message_at = now(), message_count = message_count + 1, updated_at = now()
 			WHERE id = $1
-		`, channelID)
+		`, channelID); err != nil {
+			log.Printf("Channels: outbound stats update failed for channel %s: %v", channelID, err)
+		}
 	}

 	if m.broadcaster != nil {
@@ -508,14 +519,20 @@ func (m *Manager) FetchWorkspaceChannelContext(ctx context.Context, workspaceID
 	}
 	defer rows.Close()
 	if !rows.Next() {
+		if err := rows.Err(); err != nil {
+			log.Printf("ChannelManager: FetchWorkspaceChannelContext rows error for %s: %v", workspaceID, err)
+		}
 		return ""
 	}
 	var configJSON []byte
-	if rows.Scan(&configJSON) != nil {
+	if err := rows.Scan(&configJSON); err != nil {
+		log.Printf("ChannelManager: FetchWorkspaceChannelContext scan error for %s: %v", workspaceID, err)
 		return ""
 	}
 	var config map[string]interface{}
-	json.Unmarshal(configJSON, &config)
+	if err := json.Unmarshal(configJSON, &config); err != nil {
+		log.Printf("ChannelManager: unmarshal config: %v", err)
+	}
 	if err := DecryptSensitiveFields(config); err != nil {
 		return ""
 	}
@@ -652,12 +669,16 @@ func (m *Manager) appendHistory(ctx context.Context, key string, username, userM
 	if db.RDB == nil {
 		return
 	}
-	entry, _ := json.Marshal(map[string]string{
+	entry, marshalErr := json.Marshal(map[string]string{
 		"user":    username,
 		"message": userMsg,
 		"reply":   agentReply,
 		"time":    time.Now().UTC().Format(time.RFC3339),
 	})
+	if marshalErr != nil {
+		log.Printf("appendHistory %s: json.Marshal entry failed: %v", key, marshalErr)
+		return
+	}
 	db.RDB.LPush(ctx, key, string(entry))
 	db.RDB.LTrim(ctx, key, 0, int64(maxHistoryEntries-1))
 	db.RDB.Expire(ctx, key, historyTTL)
@@ -6,6 +6,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"log"
 	"net/http"
 	"strings"
 	"time"
@@ -159,7 +160,11 @@ func (s *SlackAdapter) sendBotMessage(ctx context.Context, config map[string]int
 			payload["icon_emoji"] = iconEmoji
 		}

-		body, _ := json.Marshal(payload)
+		body, marshalErr := json.Marshal(payload)
+		if marshalErr != nil {
+			log.Printf("slack SendMessage: json.Marshal payload failed: %v", marshalErr)
+			return fmt.Errorf("slack: marshal payload: %w", marshalErr)
+		}
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://slack.com/api/chat.postMessage", bytes.NewReader(body))
 		if err != nil {
 			return fmt.Errorf("slack: build request: %w", err)
@@ -482,12 +482,14 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in
 				if apiErr.Code == 429 {
 					retryAfter := time.Duration(apiErr.RetryAfter) * time.Second
 					log.Printf("Channels: Telegram poll rate-limited, sleeping %s", retryAfter)
+					timer := time.NewTimer(retryAfter)
 					select {
 					case <-ctx.Done():
+						timer.Stop()
 						return nil
-					case <-time.After(retryAfter):
-						continue
+					case <-timer.C:
 					}
+					continue
 				}
 				if apiErr.Code == 401 {
 					invalidateBot(token)
@@ -495,12 +497,14 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in
 				}
 			}
 			log.Printf("Channels: Telegram poll error: %v", err)
+			timer := time.NewTimer(telegramPollInterval)
 			select {
 			case <-ctx.Done():
+				timer.Stop()
 				return nil
-			case <-time.After(telegramPollInterval):
-				continue
+			case <-timer.C:
 			}
+			continue
 		}

 		for _, update := range updates {
@@ -0,0 +1,463 @@
+// Package codexauth owns the SINGLE, platform-side refresh of the global
+// codex (ChatGPT/Codex subscription) OAuth credential stored in the
+// global_secrets table under key CODEX_AUTH_JSON.
+//
+// THE PROBLEM IT FIXES (agents-team prod, 2026-05-31)
+//
+// Multiple codex workspaces share ONE ChatGPT-Pro OAuth token (the global
+// secret CODEX_AUTH_JSON). OpenAI's refresh_token is SINGLE-USE: every refresh
+// rotates it and invalidates the prior one. When each per-agent codex
+// app-server refreshed independently on a 401, the siblings' in-flight tokens
+// were invalidated within seconds — a refresh storm that burned the seed and
+// wedged every codex agent.
+//
+// THE FIX (two halves; this is the core half)
+//
+//  1. The per-workspace codex app-server NO LONGER refreshes (the template's
+//     OAuth POST is gated off by default — see the codex template's
+//     codex_auth_sync.sh / CODEX_AUTH_REFRESH_OWNER gate). Workspaces only ever
+//     GET the current token and write it to auth.json.
+//  2. ONE owner refreshes the rotating refresh_token: this background goroutine
+//     in the platform. It is structurally single-flight (one goroutine + a
+//     package mutex), refreshes ONLY when the access_token is within a safety
+//     margin of expiry, POSTs the refresh_token at most ONCE per due cycle, and
+//     writes the rotated blob back to global_secrets. On a permanent failure
+//     (the seed was already burned by an out-of-band login) it logs ONCE and
+//     backs off — it never hot-loops a dead refresh_token.
+//
+// Billing-mode resolution and the byok strip are UNTOUCHED by this package.
+package codexauth
+
+import (
+	"context"
+	"database/sql"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
+)
+
+const (
+	// CodexAuthSecretKey is the global_secrets key holding the shared codex
+	// ChatGPT/Codex subscription OAuth blob (auth.json contents).
+	CodexAuthSecretKey = "CODEX_AUTH_JSON"
+
+	// oauthTokenURL is OpenAI's OAuth token endpoint. The ONLY endpoint this
+	// package ever POSTs to, and only for a due refresh.
+	oauthTokenURL = "https://auth.openai.com/oauth/token"
+
+	// codexOAuthClientID is the public Codex CLI OAuth client id (the same id
+	// the codex CLI sends). Not a secret.
+	codexOAuthClientID = "app_EMoamEEZ73f0CkXaXp7hrann"
+
+	// refreshSafetyMargin is how far ahead of access_token expiry a refresh is
+	// considered DUE. A token expiring within this window is refreshed now; one
+	// expiring later is left untouched (skip-when-fresh). Generous so a slow
+	// tick can never let the shared token lapse for the fleet.
+	refreshSafetyMargin = 15 * time.Minute
+
+	// defaultInterval is how often the loop wakes to check due-ness. The check
+	// is cheap (decrypt + JWT exp parse) and only POSTs when actually due.
+	defaultInterval = 5 * time.Minute
+
+	// permanentFailureBackoff is how long the loop waits after a PERMANENT
+	// refresh failure (invalid_grant / "refresh token already used"). The seed
+	// is burned until a human re-seeds a fresh login; there is nothing to retry,
+	// so we back off hard rather than hammer the dead token.
+	permanentFailureBackoff = 1 * time.Hour
+)
+
+// SecretStore is the minimal global_secrets surface the refresher needs. The
+// production implementation (postgresStore) is backed by *sql.DB; tests inject
+// a fake. It is deliberately tiny — read one key, write one key — so the test
+// double is trivial and the refresher never reaches for the package-global DB.
+type SecretStore interface {
+	// Get returns the decrypted secret value and true, or ("", false) when the
+	// key is absent. A non-nil error is a real read failure (not absence).
+	Get(ctx context.Context, key string) (value string, found bool, err error)
+	// Put encrypts and upserts value under key, bumping the row's updated_at
+	// (the "last_refresh" timestamp). It is the rotated-blob write-back.
+	Put(ctx context.Context, key, value string) error
+}
+
+// httpDoer is the http client seam (real *http.Client in prod, fake transport
+// in tests). Tests NEVER hit the network.
+type httpDoer interface {
+	Do(req *http.Request) (*http.Response, error)
+}
+
+// refresher is the single-owner refresh engine. The package-level mutex makes
+// the refresh structurally single-flight: even if two refreshOnce calls raced
+// (they cannot in prod — one goroutine drives it — but a test or a future
+// caller might), only one POSTs at a time, and the access-token freshness
+// re-check inside the lock means the second sees a freshly-rotated token and
+// skips. One goroutine + this mutex = single-flight by construction.
+type refresher struct {
+	store  SecretStore
+	client httpDoer
+	now    func() time.Time
+
+	// permanentlyFailed records that the current seed's refresh_token was
+	// rejected as already-used/invalid. While set, refreshOnce is INERT (it
+	// will not re-POST the dead token) until the secret value CHANGES (a human
+	// re-seed), detected by comparing the stored blob. This is the anti-storm
+	// latch — it lives on the struct, not globally, so it resets if the seed is
+	// replaced out of band.
+	failedSeed string // the auth-json blob that failed; "" = no known failure
+}
+
+// mu serializes refreshOnce across the process. Package-level so the
+// single-flight guarantee holds regardless of how many refresher values exist
+// (in prod there is exactly one).
+var mu sync.Mutex
+
+// oauthTokens is the token trio inside auth.json (and the OAuth response).
+type oauthTokens struct {
+	AccessToken  string `json:"access_token"`
+	RefreshToken string `json:"refresh_token"`
+	IDToken      string `json:"id_token,omitempty"`
+}
+
+// StartCodexAuthRefresher launches the single background refresher goroutine.
+// It returns immediately; the loop runs until ctx is cancelled. Wire it under
+// supervised.RunWithRecover in main.go like the other Start* sweeps.
+//
+// db may be nil only in tests that drive refreshOnce directly; in prod it is
+// the server's *sql.DB. The loop is INERT (logs once, keeps ticking) whenever
+// CODEX_AUTH_JSON is absent — a deployment with no shared codex seed pays only
+// a cheap periodic read.
+func StartCodexAuthRefresher(ctx context.Context, db *sql.DB) {
+	r := &refresher{
+		store:  &postgresStore{db: db},
+		client: &http.Client{Timeout: 30 * time.Second},
+		now:    time.Now,
+	}
+	r.run(ctx, defaultInterval)
+}
+
+// run is the tick loop. It checks due-ness every interval and on a permanent
+// failure waits permanentFailureBackoff before the next check (never a tight
+// retry of a burned token).
+func (r *refresher) run(ctx context.Context, interval time.Duration) {
+	// Check once promptly on boot, then on the interval.
+	for {
+		wait := interval
+		if perm := r.refreshOnce(ctx); perm {
+			// Permanent failure this cycle — the seed is burned. Back off hard;
+			// a human must re-seed. We keep ticking (a re-seed CHANGES the blob,
+			// which clears the latch) but slowly.
+			wait = permanentFailureBackoff
+		}
+
+		timer := time.NewTimer(wait)
+		select {
+		case <-ctx.Done():
+			timer.Stop()
+			log.Printf("codexauth: context done; stopping refresher")
+			return
+		case <-timer.C:
+		}
+	}
+}
+
+// refreshOnce performs ONE due-check + at most one refresh POST. It returns
+// permanentFailure=true iff the refresh_token was permanently rejected this
+// cycle (the caller backs off). All other outcomes (inert/skip/rotated/transient
+// error) return false.
+//
+// It is single-flight: the package mutex is held for the whole read→decide→
+// POST→write-back so two callers cannot both POST the (single-use) refresh_token.
+func (r *refresher) refreshOnce(ctx context.Context) (permanentFailure bool) {
+	mu.Lock()
+	defer mu.Unlock()
+
+	blob, found, err := r.store.Get(ctx, CodexAuthSecretKey)
+	if err != nil {
+		log.Printf("codexauth: read CODEX_AUTH_JSON failed: %v (skipping this cycle)", err)
+		return false
+	}
+	if !found || strings.TrimSpace(blob) == "" {
+		// INERT: no shared codex seed in this deployment. Cheap no-op.
+		log.Printf("codexauth: no CODEX_AUTH_JSON in global_secrets — refresher inert")
+		// A previously-failed seed that has since been DELETED clears the latch.
+		r.failedSeed = ""
+		return false
+	}
+
+	// Anti-storm latch: if THIS exact blob already failed permanently, do not
+	// re-POST its dead refresh_token. A re-seed changes the blob and clears it.
+	if r.failedSeed != "" && r.failedSeed == blob {
+		return false
+	}
+	if r.failedSeed != "" && r.failedSeed != blob {
+		// The seed changed out of band (human re-login) — give it a fresh chance.
+		r.failedSeed = ""
+	}
+
+	tokens, err := parseTokens(blob)
+	if err != nil {
+		log.Printf("codexauth: CODEX_AUTH_JSON is not parseable codex auth json: %v (skipping)", err)
+		return false
+	}
+	if tokens.RefreshToken == "" {
+		log.Printf("codexauth: CODEX_AUTH_JSON carries no refresh_token (skipping)")
+		return false
+	}
+
+	// Skip-when-fresh: only refresh within the safety margin of expiry. A blob
+	// with an unparseable/absent access_token exp is treated as DUE (better to
+	// refresh a token we cannot date than let the fleet lapse).
+	exp, haveExp := jwtExp(tokens.AccessToken)
+	if haveExp {
+		remaining := exp.Sub(r.now())
+		if remaining > refreshSafetyMargin {
+			// Fresh — nothing to do. No POST.
+			return false
+		}
+	}
+
+	// DUE: POST the refresh_token ONCE.
+	newTokens, perm, err := r.doRefresh(ctx, tokens.RefreshToken)
+	if err != nil {
+		if perm {
+			// Permanent: the seed is burned. Latch it so we don't re-POST, log
+			// ONCE, and DO NOT write anything back.
+			log.Printf("codexauth: PERMANENT refresh failure (refresh_token rejected): %v — "+
+				"NOT writing back; the shared CODEX_AUTH_JSON seed is burned and must be re-seeded "+
+				"via a fresh codex login. Backing off.", err)
+			r.failedSeed = blob
+			return true
+		}
+		// Transient (network/5xx): no write-back, retry next cycle (no backoff).
+		log.Printf("codexauth: transient refresh error: %v (will retry next cycle)", err)
+		return false
+	}
+
+	// Success: merge the rotated trio into the blob (preserving every other
+	// field) and write it back encrypted, bumping updated_at (last_refresh).
+	rotated, err := mergeTokens(blob, newTokens)
+	if err != nil {
+		log.Printf("codexauth: failed to merge rotated tokens into auth json: %v (NOT writing back)", err)
+		return false
+	}
+	if err := r.store.Put(ctx, CodexAuthSecretKey, rotated); err != nil {
+		log.Printf("codexauth: write-back of rotated CODEX_AUTH_JSON failed: %v", err)
+		return false
+	}
+	r.failedSeed = "" // success clears any stale latch
+	log.Printf("codexauth: rotated shared CODEX_AUTH_JSON (single-owner refresh)")
+	return false
+}
+
+// doRefresh POSTs the refresh_token to OpenAI's OAuth endpoint exactly once and
+// returns the rotated trio. permanent=true marks an unrecoverable rejection
+// (HTTP 400 invalid_grant / "refresh token already used") so the caller latches
+// and backs off instead of retrying.
+func (r *refresher) doRefresh(ctx context.Context, refreshToken string) (tokens oauthTokens, permanent bool, err error) {
+	body, _ := json.Marshal(map[string]string{
+		"grant_type":    "refresh_token",
+		"client_id":     codexOAuthClientID,
+		"refresh_token": refreshToken,
+	})
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, oauthTokenURL, strings.NewReader(string(body)))
+	if err != nil {
+		return oauthTokens{}, false, err
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Accept", "application/json")
+
+	resp, err := r.client.Do(req)
+	if err != nil {
+		return oauthTokens{}, false, err // transient: network
+	}
+	defer resp.Body.Close()
+	respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
+
+	if resp.StatusCode == http.StatusOK {
+		var t oauthTokens
+		if err := json.Unmarshal(respBody, &t); err != nil {
+			return oauthTokens{}, false, fmt.Errorf("decode token response: %w", err)
+		}
+		if t.AccessToken == "" {
+			return oauthTokens{}, false, fmt.Errorf("token response missing access_token")
+		}
+		return t, false, nil
+	}
+
+	// Non-200. A 400 (and any body naming invalid_grant / already-used) is a
+	// PERMANENT rejection of the refresh_token. 401/403 likewise mean the seed
+	// is no good. Everything else (429/5xx/network-shaped) is transient.
+	lowerBody := strings.ToLower(string(respBody))
+	isInvalidGrant := strings.Contains(lowerBody, "invalid_grant") ||
+		strings.Contains(lowerBody, "refresh token already used") ||
+		strings.Contains(lowerBody, "already been used") ||
+		strings.Contains(lowerBody, "token has been revoked")
+	switch {
+	case resp.StatusCode == http.StatusBadRequest && isInvalidGrant:
+		return oauthTokens{}, true, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
+	case resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden:
+		return oauthTokens{}, true, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
+	default:
+		return oauthTokens{}, false, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
+	}
+}
+
+// parseTokens extracts the OAuth trio from an auth.json blob, accepting both
+// the nested `{"tokens":{...}}` shape the codex CLI writes and a flat top-level
+// shape some seeds use.
+func parseTokens(blob string) (oauthTokens, error) {
+	var top map[string]json.RawMessage
+	if err := json.Unmarshal([]byte(blob), &top); err != nil {
+		return oauthTokens{}, err
+	}
+	if nested, ok := top["tokens"]; ok {
+		var t oauthTokens
+		if err := json.Unmarshal(nested, &t); err != nil {
+			return oauthTokens{}, fmt.Errorf("decode nested tokens: %w", err)
+		}
+		return t, nil
+	}
+	var t oauthTokens
+	if err := json.Unmarshal([]byte(blob), &t); err != nil {
+		return oauthTokens{}, err
+	}
+	return t, nil
+}
+
+// mergeTokens writes the rotated trio back into the original blob in-place,
+// preserving the blob's shape (nested-vs-flat) and every other field. A field
+// in the OAuth response that is empty (e.g. id_token omitted) does NOT clobber
+// the existing value.
+func mergeTokens(blob string, rotated oauthTokens) (string, error) {
+	var top map[string]json.RawMessage
+	if err := json.Unmarshal([]byte(blob), &top); err != nil {
+		return "", err
+	}
+
+	applyTo := func(m map[string]json.RawMessage) error {
+		setStr := func(key, val string) error {
+			if val == "" {
+				return nil // don't clobber an existing value with an empty one
+			}
+			b, err := json.Marshal(val)
+			if err != nil {
+				return err
+			}
+			m[key] = b
+			return nil
+		}
+		if err := setStr("access_token", rotated.AccessToken); err != nil {
+			return err
+		}
+		if err := setStr("refresh_token", rotated.RefreshToken); err != nil {
+			return err
+		}
+		if err := setStr("id_token", rotated.IDToken); err != nil {
+			return err
+		}
+		return nil
+	}
+
+	if nestedRaw, ok := top["tokens"]; ok {
+		var nested map[string]json.RawMessage
+		if err := json.Unmarshal(nestedRaw, &nested); err != nil {
+			return "", fmt.Errorf("decode nested tokens for merge: %w", err)
+		}
+		if err := applyTo(nested); err != nil {
+			return "", err
+		}
+		nb, err := json.Marshal(nested)
+		if err != nil {
+			return "", err
+		}
+		top["tokens"] = nb
+	} else {
+		if err := applyTo(top); err != nil {
+			return "", err
+		}
+	}
+
+	out, err := json.Marshal(top)
+	if err != nil {
+		return "", err
+	}
+	return string(out), nil
+}
+
+// jwtExp decodes the `exp` claim (Unix seconds) from a JWT access token WITHOUT
+// verifying the signature (we only need the expiry to decide due-ness; the
+// token's validity is OpenAI's to enforce). Returns ok=false when the token is
+// not a parseable 3-part JWT or carries no numeric exp.
+func jwtExp(token string) (time.Time, bool) {
+	parts := strings.Split(token, ".")
+	if len(parts) != 3 {
+		return time.Time{}, false
+	}
+	payload, err := base64.RawURLEncoding.DecodeString(parts[1])
+	if err != nil {
+		// Some encoders pad; tolerate standard base64url with padding too.
+		payload, err = base64.URLEncoding.DecodeString(parts[1])
+		if err != nil {
+			return time.Time{}, false
+		}
+	}
+	var claims struct {
+		Exp json.Number `json:"exp"`
+	}
+	if err := json.Unmarshal(payload, &claims); err != nil {
+		return time.Time{}, false
+	}
+	secs, err := claims.Exp.Int64()
+	if err != nil || secs <= 0 {
+		return time.Time{}, false
+	}
+	return time.Unix(secs, 0), true
+}
+
+// postgresStore is the production SecretStore backed by global_secrets, using
+// the SAME crypto path the secrets handler uses (DecryptVersioned on read,
+// Encrypt + CurrentEncryptionVersion on write).
+type postgresStore struct {
+	db *sql.DB
+}
+
+func (s *postgresStore) Get(ctx context.Context, key string) (string, bool, error) {
+	var enc []byte
+	var ver int
+	err := s.db.QueryRowContext(ctx,
+		`SELECT encrypted_value, encryption_version FROM global_secrets WHERE key = $1`, key).
+		Scan(&enc, &ver)
+	if err == sql.ErrNoRows {
+		return "", false, nil
+	}
+	if err != nil {
+		return "", false, err
+	}
+	plain, err := crypto.DecryptVersioned(enc, ver)
+	if err != nil {
+		return "", false, err
+	}
+	return string(plain), true, nil
+}
+
+func (s *postgresStore) Put(ctx context.Context, key, value string) error {
+	enc, err := crypto.Encrypt([]byte(value))
+	if err != nil {
+		return err
+	}
+	ver := crypto.CurrentEncryptionVersion()
+	_, err = s.db.ExecContext(ctx, `
+		INSERT INTO global_secrets (key, encrypted_value, encryption_version)
+		VALUES ($1, $2, $3)
+		ON CONFLICT (key) DO UPDATE
+			SET encrypted_value = $2, encryption_version = $3, updated_at = now()
+	`, key, enc, ver)
+	return err
+}
@@ -0,0 +1,425 @@
+package codexauth
+
+import (
+	"context"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// --- test doubles -----------------------------------------------------------
+
+// fakeStore is an in-memory SecretStore. nil entry = absent key.
+type fakeStore struct {
+	mu     sync.Mutex
+	values map[string]string
+	getErr error
+	putErr error
+	puts   int32 // count of successful Put calls
+}
+
+func newFakeStore() *fakeStore { return &fakeStore{values: map[string]string{}} }
+
+func (f *fakeStore) Get(_ context.Context, key string) (string, bool, error) {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	if f.getErr != nil {
+		return "", false, f.getErr
+	}
+	v, ok := f.values[key]
+	return v, ok, nil
+}
+
+func (f *fakeStore) Put(_ context.Context, key, value string) error {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	if f.putErr != nil {
+		return f.putErr
+	}
+	f.values[key] = value
+	atomic.AddInt32(&f.puts, 1)
+	return nil
+}
+
+func (f *fakeStore) get(key string) string {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	return f.values[key]
+}
+
+// fakeTransport records every request and returns a scripted response. It is
+// the network seam — tests NEVER make a real request.
+type fakeTransport struct {
+	mu        sync.Mutex
+	calls     int32
+	urls      []string
+	methods   []string
+	bodies    []string
+	status    int
+	respBody  string
+	transport func(*http.Request) (*http.Response, error) // optional override
+}
+
+func (t *fakeTransport) Do(req *http.Request) (*http.Response, error) {
+	atomic.AddInt32(&t.calls, 1)
+	t.mu.Lock()
+	t.urls = append(t.urls, req.URL.String())
+	t.methods = append(t.methods, req.Method)
+	if req.Body != nil {
+		b, _ := io.ReadAll(req.Body)
+		t.bodies = append(t.bodies, string(b))
+	} else {
+		t.bodies = append(t.bodies, "")
+	}
+	t.mu.Unlock()
+
+	if t.transport != nil {
+		return t.transport(req)
+	}
+	status := t.status
+	if status == 0 {
+		status = http.StatusOK
+	}
+	return &http.Response{
+		StatusCode: status,
+		Body:       io.NopCloser(strings.NewReader(t.respBody)),
+		Header:     make(http.Header),
+	}, nil
+}
+
+func (t *fakeTransport) callCount() int { return int(atomic.LoadInt32(&t.calls)) }
+
+// --- helpers ----------------------------------------------------------------
+
+// makeJWT builds an unsigned-but-parseable JWT whose payload carries exp.
+func makeJWT(exp time.Time) string {
+	header := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"none","typ":"JWT"}`))
+	payload := base64.RawURLEncoding.EncodeToString([]byte(
+		fmt.Sprintf(`{"exp":%d,"sub":"codex"}`, exp.Unix())))
+	sig := base64.RawURLEncoding.EncodeToString([]byte("sig"))
+	return header + "." + payload + "." + sig
+}
+
+// authBlob builds a nested codex auth.json blob with the given tokens.
+func authBlob(access, refresh string) string {
+	b, _ := json.Marshal(map[string]any{
+		"tokens": map[string]any{
+			"access_token":  access,
+			"refresh_token": refresh,
+			"id_token":      "id-original",
+		},
+		"OPENAI_API_KEY": nil,
+		"last_refresh":   "2026-01-01T00:00:00Z",
+	})
+	return string(b)
+}
+
+func newTestRefresher(store SecretStore, client httpDoer, now time.Time) *refresher {
+	return &refresher{
+		store:  store,
+		client: client,
+		now:    func() time.Time { return now },
+	}
+}
+
+func okRefreshResponse(access, refresh string) string {
+	b, _ := json.Marshal(oauthTokens{AccessToken: access, RefreshToken: refresh, IDToken: "id-new"})
+	return string(b)
+}
+
+// --- tests ------------------------------------------------------------------
+
+// TestJWTExpParse covers the exp decode (valid, malformed, missing).
+func TestJWTExpParse(t *testing.T) {
+	want := time.Now().Add(2 * time.Hour).Truncate(time.Second)
+	got, ok := jwtExp(makeJWT(want))
+	if !ok {
+		t.Fatalf("jwtExp(valid) ok=false, want true")
+	}
+	if !got.Equal(want) {
+		t.Errorf("jwtExp = %v, want %v", got, want)
+	}
+
+	if _, ok := jwtExp("not-a-jwt"); ok {
+		t.Errorf("jwtExp(non-jwt) ok=true, want false")
+	}
+	if _, ok := jwtExp("a.b.c"); ok {
+		t.Errorf("jwtExp(garbage parts) ok=true, want false")
+	}
+	// 3 parts but payload has no exp.
+	noExp := base64.RawURLEncoding.EncodeToString([]byte("{}"))
+	if _, ok := jwtExp("h." + noExp + ".s"); ok {
+		t.Errorf("jwtExp(no exp claim) ok=true, want false")
+	}
+}
+
+// TestRefreshOnce_SkipWhenFresh: a token well outside the safety margin is NOT
+// refreshed — no POST, no write-back.
+func TestRefreshOnce_SkipWhenFresh(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(2*time.Hour)), "rt-1")
+	tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse("new-at", "rt-2")}
+	r := newTestRefresher(store, tr, now)
+
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("fresh token: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 0 {
+		t.Errorf("fresh token: %d OAuth POSTs, want 0", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 0 {
+		t.Errorf("fresh token: %d write-backs, want 0", store.puts)
+	}
+}
+
+// TestRefreshOnce_RotateThenReskip: a token inside the margin is refreshed once
+// (POST + write-back of the rotated blob); a subsequent call on the now-fresh
+// rotated token skips (no second POST). Proves rotate→write-back→re-skip.
+func TestRefreshOnce_RotateThenReskip(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	// Expires in 5m — inside the 15m safety margin → DUE.
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(5*time.Minute)), "rt-1")
+	// Rotated access token is fresh (2h out); rotated refresh is rt-2.
+	tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
+	r := newTestRefresher(store, tr, now)
+
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("due token: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 1 {
+		t.Fatalf("due token: %d OAuth POSTs, want exactly 1", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 1 {
+		t.Fatalf("due token: %d write-backs, want exactly 1", store.puts)
+	}
+
+	// The written blob must carry the rotated refresh_token and preserve the
+	// non-token field.
+	rotated := store.get(CodexAuthSecretKey)
+	tokens, err := parseTokens(rotated)
+	if err != nil {
+		t.Fatalf("parse rotated blob: %v", err)
+	}
+	if tokens.RefreshToken != "rt-2" {
+		t.Errorf("rotated refresh_token = %q, want rt-2", tokens.RefreshToken)
+	}
+	if !strings.Contains(rotated, "last_refresh") {
+		t.Errorf("rotated blob dropped the preserved last_refresh field: %s", rotated)
+	}
+
+	// Second call: the rotated access token is fresh → skip, no new POST.
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("re-skip: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 1 {
+		t.Errorf("re-skip: %d total OAuth POSTs, want still 1", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 1 {
+		t.Errorf("re-skip: %d total write-backs, want still 1", store.puts)
+	}
+}
+
+// TestRefreshOnce_NoSecretInert: absent CODEX_AUTH_JSON → inert (no POST, no
+// write-back, no error/permanent).
+func TestRefreshOnce_NoSecretInert(t *testing.T) {
+	store := newFakeStore() // empty
+	tr := &fakeTransport{}
+	r := newTestRefresher(store, tr, time.Now())
+
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("no secret: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 0 {
+		t.Errorf("no secret: %d POSTs, want 0", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 0 {
+		t.Errorf("no secret: %d write-backs, want 0", store.puts)
+	}
+}
+
+// TestRefreshOnce_PermanentFailNoWriteNoStorm: a 400 invalid_grant must (a) not
+// write back, (b) return permanentFailure=true, and (c) NOT re-POST on the next
+// cycle for the same (burned) seed — the anti-storm latch.
+func TestRefreshOnce_PermanentFailNoWriteNoStorm(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-burned")
+	tr := &fakeTransport{
+		status:   http.StatusBadRequest,
+		respBody: `{"error":"invalid_grant","error_description":"refresh token already used"}`,
+	}
+	r := newTestRefresher(store, tr, now)
+
+	perm := r.refreshOnce(context.Background())
+	if !perm {
+		t.Fatalf("invalid_grant: permanentFailure=false, want true")
+	}
+	if tr.callCount() != 1 {
+		t.Fatalf("invalid_grant: %d POSTs, want exactly 1", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 0 {
+		t.Fatalf("invalid_grant: %d write-backs, want 0 (must NOT persist a failed refresh)", store.puts)
+	}
+
+	// Next cycle, SAME burned seed: must NOT re-POST (anti-storm latch).
+	perm2 := r.refreshOnce(context.Background())
+	if tr.callCount() != 1 {
+		t.Errorf("anti-storm: re-POSTed a burned refresh_token (%d total POSTs, want still 1)", tr.callCount())
+	}
+	_ = perm2 // latched cycle returns false (already-known failure, nothing new)
+
+	// A RE-SEED (blob changes) clears the latch and allows a fresh attempt.
+	store.mu.Lock()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-freshly-seeded")
+	store.mu.Unlock()
+	tr.status = http.StatusOK
+	tr.respBody = okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-rotated")
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("post-reseed: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 2 {
+		t.Errorf("post-reseed: %d total POSTs, want 2 (latch should clear on re-seed)", tr.callCount())
+	}
+}
+
+// TestRefreshOnce_TransientNoWriteNoLatch: a 5xx is transient — no write-back,
+// returns false (no hard backoff latch), and a later cycle retries.
+func TestRefreshOnce_TransientNoWriteNoLatch(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-1")
+	tr := &fakeTransport{status: http.StatusServiceUnavailable, respBody: "upstream down"}
+	r := newTestRefresher(store, tr, now)
+
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("503: permanentFailure=true, want false (transient)")
+	}
+	if atomic.LoadInt32(&store.puts) != 0 {
+		t.Errorf("503: %d write-backs, want 0", store.puts)
+	}
+	// Retry next cycle succeeds (no latch on transient).
+	tr.status = http.StatusOK
+	tr.respBody = okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("retry after 503: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 2 {
+		t.Errorf("transient retry: %d total POSTs, want 2", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 1 {
+		t.Errorf("transient retry: %d write-backs, want 1", store.puts)
+	}
+}
+
+// TestRefreshOnce_SingleFlight: concurrent refreshOnce calls on a DUE token must
+// POST exactly once total — the package mutex serializes them and the second
+// sees the freshly-rotated (now-fresh) token and skips. Structural single-flight.
+func TestRefreshOnce_SingleFlight(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-1")
+	// Every successful rotation yields a FRESH (2h) access token, so once one
+	// caller rotates, the other sees fresh and skips.
+	tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
+	r := newTestRefresher(store, tr, now)
+
+	const n = 16
+	var wg sync.WaitGroup
+	wg.Add(n)
+	for i := 0; i < n; i++ {
+		go func() {
+			defer wg.Done()
+			r.refreshOnce(context.Background())
+		}()
+	}
+	wg.Wait()
+
+	if tr.callCount() != 1 {
+		t.Errorf("single-flight: %d OAuth POSTs across %d concurrent calls, want exactly 1", tr.callCount(), n)
+	}
+	if atomic.LoadInt32(&store.puts) != 1 {
+		t.Errorf("single-flight: %d write-backs, want exactly 1", store.puts)
+	}
+}
+
+// TestRefreshOnce_PostsExactlyOnceToOAuthEndpoint: when it DOES refresh, the
+// single POST goes to the OAuth token URL with the refresh_token grant body.
+func TestRefreshOnce_PostsExactlyOnceToOAuthEndpoint(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-secret")
+	tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
+	r := newTestRefresher(store, tr, now)
+
+	r.refreshOnce(context.Background())
+
+	if tr.callCount() != 1 {
+		t.Fatalf("%d POSTs, want exactly 1", tr.callCount())
+	}
+	if tr.urls[0] != oauthTokenURL {
+		t.Errorf("POST URL = %q, want %q", tr.urls[0], oauthTokenURL)
+	}
+	if tr.methods[0] != http.MethodPost {
+		t.Errorf("method = %q, want POST", tr.methods[0])
+	}
+	var body map[string]string
+	if err := json.Unmarshal([]byte(tr.bodies[0]), &body); err != nil {
+		t.Fatalf("request body not json: %v (%s)", err, tr.bodies[0])
+	}
+	if body["grant_type"] != "refresh_token" {
+		t.Errorf("grant_type = %q, want refresh_token", body["grant_type"])
+	}
+	if body["refresh_token"] != "rt-secret" {
+		t.Errorf("refresh_token = %q, want rt-secret", body["refresh_token"])
+	}
+	if body["client_id"] != codexOAuthClientID {
+		t.Errorf("client_id = %q, want %q", body["client_id"], codexOAuthClientID)
+	}
+}
+
+// TestRefreshOnce_ReadErrorSkips: a store read error is a transient skip (no
+// POST, no permanent latch).
+func TestRefreshOnce_ReadErrorSkips(t *testing.T) {
+	store := newFakeStore()
+	store.getErr = fmt.Errorf("db down")
+	tr := &fakeTransport{}
+	r := newTestRefresher(store, tr, time.Now())
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Errorf("read error: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 0 {
+		t.Errorf("read error: %d POSTs, want 0", tr.callCount())
+	}
+}
+
+// TestMergeTokens_PreservesOtherFields proves the rotated write-back keeps every
+// non-token field and does not clobber id_token with an empty rotated value.
+func TestMergeTokens_PreservesOtherFields(t *testing.T) {
+	blob := authBlob("old-at", "old-rt")
+	out, err := mergeTokens(blob, oauthTokens{AccessToken: "new-at", RefreshToken: "new-rt"}) // no id_token
+	if err != nil {
+		t.Fatalf("mergeTokens: %v", err)
+	}
+	tokens, err := parseTokens(out)
+	if err != nil {
+		t.Fatalf("parse merged: %v", err)
+	}
+	if tokens.AccessToken != "new-at" || tokens.RefreshToken != "new-rt" {
+		t.Errorf("merged tokens = %+v, want new-at/new-rt", tokens)
+	}
+	if tokens.IDToken != "id-original" {
+		t.Errorf("empty rotated id_token clobbered the original: got %q, want id-original", tokens.IDToken)
+	}
+	if !strings.Contains(out, "last_refresh") {
+		t.Errorf("merge dropped preserved field: %s", out)
+	}
+}
@@ -334,28 +334,39 @@ func (h *WorkspaceHandler) ProxyA2A(c *gin.Context) {
 	c.Data(status, "application/json", respBody)
 }

-// checkWorkspaceBudget returns a proxyA2AError with 402 when the workspace
-// has a budget_limit set and monthly_spend has reached or exceeded it.
-// DB errors are logged and treated as fail-open — a budget check failure
-// must not block legitimate A2A traffic.
+// checkWorkspaceBudget returns a proxyA2AError with 402 when the workspace has
+// exceeded ANY of its configured per-period budget limits (hourly/daily/weekly/
+// monthly — see budget_periods.go). Per-period spend is the rolling-window sum
+// over the workspace_spend_events ledger. DB errors are logged and treated as
+// fail-open — a budget check failure must not block legitimate A2A traffic.
 func (h *WorkspaceHandler) checkWorkspaceBudget(ctx context.Context, workspaceID string) *proxyA2AError {
-	var budgetLimit sql.NullInt64
-	var monthlySpend int64
-	err := db.DB.QueryRowContext(ctx,
-		`SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`,
+	var limitsRaw []byte
+	if err := db.DB.QueryRowContext(ctx,
+		`SELECT COALESCE(budget_limits, '{}'::jsonb) FROM workspaces WHERE id = $1`,
 		workspaceID,
-	).Scan(&budgetLimit, &monthlySpend)
-	if err != nil {
+	).Scan(&limitsRaw); err != nil {
 		if err != sql.ErrNoRows {
 			log.Printf("ProxyA2A: budget check failed for %s: %v", workspaceID, err)
 		}
 		return nil // fail-open
 	}
-	if budgetLimit.Valid && monthlySpend >= budgetLimit.Int64 {
-		log.Printf("ProxyA2A: budget exceeded for %s (spend=%d limit=%d)", workspaceID, monthlySpend, budgetLimit.Int64)
+	limits := parseBudgetLimits(limitsRaw)
+	if len(limits) == 0 {
+		return nil // no limits configured
+	}
+	spend, err := spendByPeriod(ctx, db.DB, workspaceID)
+	if err != nil {
+		log.Printf("ProxyA2A: budget spend query failed for %s: %v", workspaceID, err)
+		return nil // fail-open
+	}
+	if over := exceededPeriods(limits, spend); len(over) > 0 {
+		log.Printf("ProxyA2A: budget exceeded for %s (periods=%v limits=%v spend=%v)", workspaceID, over, limits, spend)
 		return &proxyA2AError{
-			Status:   http.StatusPaymentRequired,
-			Response: gin.H{"error": "workspace budget limit exceeded"},
+			Status: http.StatusPaymentRequired,
+			Response: gin.H{
+				"error":            "workspace budget limit exceeded",
+				"exceeded_periods": over,
+			},
 		}
 	}
 	return nil
@@ -375,6 +386,30 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
 				Response: gin.H{"error": "access denied: workspaces cannot communicate per hierarchy rules"},
 			}
 		}
+
+		// #1953 cross-tenant isolation. CanCommunicate alone does NOT enforce
+		// org boundaries: its "root-level siblings — both have no parent" rule
+		// treats every tenant's org root as a sibling, so a caller that is an
+		// org root could resolve and route a2a to another tenant's org root
+		// (and resolveAgentURL accepts ANY workspace id with no org check).
+		// Gate on the SAME parent_id-chain org scoping the OFFSEC-015 broadcast
+		// fix uses: reject before resolveAgentURL when caller and target are in
+		// different orgs. Fail-closed — a DB error denies cross-org routing.
+		ok, err := sameOrg(ctx, db.DB, callerID, workspaceID)
+		if err != nil {
+			log.Printf("ProxyA2A: org-scope check failed %s → %s: %v — denying", callerID, workspaceID, err)
+			return 0, nil, &proxyA2AError{
+				Status:   http.StatusForbidden,
+				Response: gin.H{"error": "access denied: org isolation check failed"},
+			}
+		}
+		if !ok {
+			log.Printf("ProxyA2A: cross-org routing denied %s → %s (#1953)", callerID, workspaceID)
+			return 0, nil, &proxyA2AError{
+				Status:   http.StatusForbidden,
+				Response: gin.H{"error": "access denied: target workspace is in a different org"},
+			}
+		}
 	}

 	// Budget enforcement: reject A2A calls when the workspace has exceeded its
@@ -115,12 +115,15 @@ func (h *WorkspaceHandler) handleA2ADispatchError(ctx context.Context, workspace
 			if logActivity {
 				h.logA2ABusyQueued(ctx, workspaceID, callerID, body, a2aMethod, durationMs)
 			}
-			respBody, _ := json.Marshal(gin.H{
+			respBody, marshalErr := json.Marshal(gin.H{
 				"queued":      true,
 				"queue_id":    qid,
 				"queue_depth": depth,
 				"message":     "workspace agent busy — request queued, will dispatch when capacity available",
 			})
+			if marshalErr != nil {
+				log.Printf("ProxyA2A %s: json.Marshal respBody failed: %v", workspaceID, marshalErr)
+			}
 			return http.StatusAccepted, respBody, nil
 		} else {
 			// Queue insert failed — fall through to legacy 503 behavior
@@ -423,16 +426,34 @@ func nilIfEmpty(s string) *string {
 // (their next /registry/register will mint their first token, after
 // which this branch never fires again for them).
 //
-// Post-RFC#637 addition: when the tokenless workspace is accompanied by
-// canvas or admin auth (same-origin request, admin bearer, or org-level
-// token), the caller is identified as a canvas-user identity rather than
-// a legacy peer agent. The returned isCanvasUser flag lets the A2A proxy
-// bypass CanCommunicate for human users, who sit outside the workspace
-// hierarchy.
+// Post-RFC#637 addition: a request may instead be carrying a HUMAN's
+// canvas-user identity (e.g. the 344a2623-… identity workspace from the
+// RFC#637 rollout). That human sits OUTSIDE the workspace org hierarchy, so
+// the returned isCanvasUser flag lets the A2A proxy bypass CanCommunicate for
+// it. Canvas-user classification is decided by isGenuineCanvasUser using
+// NON-FORGEABLE credentials only (see that function) — never by the caller's
+// X-Workspace-ID alone, and never by a bare same-origin Host/Referer in a
+// SaaS image (those are forgeable; see middleware.IsSameOriginCanvas).
+//
+// #1673: this canvas-user check is now evaluated BEFORE the HasAnyLiveToken
+// peer-token contract. Previously it lived only in the !hasLive branch, so a
+// canvas-user identity workspace that had acquired live tokens fell into the
+// hasLive=true branch, which demands a bearer the canvas frontend never sends
+// → silent 401 → the message was dropped before logA2AReceiveQueued wrote the
+// activity_logs row, breaking canvas chat for poll-mode workspaces. A genuine
+// canvas user is identified by the human's session/admin/org credential, which
+// is independent of whether the identity workspace happens to hold peer tokens.
 //
 // On auth failure this writes the 401 via c and returns an error so the
 // handler aborts without running the proxy.
 func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) (isCanvasUser bool, err error) {
+	// Genuine canvas-user identity? Decided independently of the caller
+	// workspace's token state (the #1673 fix) and using only non-forgeable
+	// signals (the #1944 escalation guard).
+	if isGenuineCanvasUser(ctx, c) {
+		return true, nil
+	}
+
 	hasLive, dbErr := wsauth.HasAnyLiveToken(ctx, db.DB, callerID)
 	if dbErr != nil {
 		// Fail-open here matches the heartbeat path — A2A caller auth is
@@ -443,22 +464,10 @@ func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) (
 		return false, nil
 	}
 	if !hasLive {
-		// Tokenless workspace — could be legacy/pre-upgrade caller or
-		// canvas-user identity. Distinguish by request auth signals.
-		if middleware.IsSameOriginCanvas(c) {
-			return true, nil
-		}
-		tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
-		if tok != "" {
-			adminSecret := os.Getenv("ADMIN_TOKEN")
-			if adminSecret != "" && subtle.ConstantTimeCompare([]byte(tok), []byte(adminSecret)) == 1 {
-				return true, nil
-			}
-			if _, _, _, err := orgtoken.Validate(ctx, db.DB, tok); err == nil {
-				return true, nil
-			}
-		}
-		return false, nil // legacy / pre-upgrade caller
+		// Tokenless, non-canvas-user workspace — legacy / pre-upgrade peer.
+		// Grandfather it through (its next /registry/register mints its
+		// first token, after which it lands in the hasLive=true branch).
+		return false, nil
 	}
 	tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
 	if tok == "" {
@@ -472,6 +481,61 @@ func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) (
 	return false, nil
 }

+// isGenuineCanvasUser reports whether the request is a real human acting
+// through the canvas UI (RFC#637 canvas-user identity), as opposed to a peer
+// workspace agent. A true result lets the A2A proxy bypass CanCommunicate, so
+// it MUST only accept signals an attacker on the platform network cannot forge:
+//
+//   - A control-plane-verified canvas session: the WorkOS session cookie is
+//     confirmed upstream to belong to a MEMBER of THIS tenant's org
+//     (middleware.IsVerifiedCanvasSession → /cp/auth/tenant-member). This is
+//     the production SaaS canvas path.
+//   - An Authorization: Bearer matching ADMIN_TOKEN (break-glass / molecli).
+//   - An Authorization: Bearer matching a live org_api_tokens row (user-minted
+//     org-scoped API token).
+//
+// Deliberately NOT accepted as a canvas-user signal in a SaaS image:
+//
+//   - A bare same-origin Host/Referer/Origin (middleware.IsSameOriginCanvas).
+//     Those headers are trivially forgeable by any container on the Docker
+//     network, and the combined-tenant image (CANVAS_PROXY_URL set) is exactly
+//     where a forged Referer + an arbitrary X-Workspace-ID could otherwise
+//     bypass CanCommunicate and reach cross-workspace A2A — the PR #1944
+//     privilege escalation. Same-origin is only honored as a fallback when CP
+//     session verification is NOT configured (self-hosted / dev), a
+//     single-tenant topology with no cross-tenant boundary to escalate across;
+//     even there the org hierarchy still owns intra-org routing.
+//
+// Note this classification is about the human's credential, not the caller
+// workspace's X-Workspace-ID — so it never trusts an attacker-supplied caller
+// ID, and it is independent of whether that workspace holds peer tokens.
+func isGenuineCanvasUser(ctx context.Context, c *gin.Context) bool {
+	// Production SaaS: control-plane-verified org-member session cookie.
+	if middleware.IsVerifiedCanvasSession(c) {
+		return true
+	}
+
+	if tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")); tok != "" {
+		adminSecret := os.Getenv("ADMIN_TOKEN")
+		if adminSecret != "" && subtle.ConstantTimeCompare([]byte(tok), []byte(adminSecret)) == 1 {
+			return true
+		}
+		if _, _, _, err := orgtoken.Validate(ctx, db.DB, tok); err == nil {
+			return true
+		}
+	}
+
+	// Self-hosted / dev fallback ONLY: when upstream session verification is
+	// not configured there is no verified-cookie signal to use, and the
+	// deployment is single-tenant, so the forgeable same-origin check is an
+	// acceptable canvas signal. In SaaS (CP session configured) this branch is
+	// skipped, closing the forged-same-origin escalation.
+	if !middleware.CPSessionConfigured() && middleware.IsSameOriginCanvas(c) {
+		return true
+	}
+	return false
+}
+
 // errInvalidCallerToken is a sentinel for validateCallerToken's "missing
 // token" branch so the handler-level guard can detect it without string
 // matching (the wsauth errors are typed for the invalid case).
@@ -11,13 +11,14 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"os"
+	"os/exec"
 	"strings"
 	"testing"
 	"time"

-	"github.com/DATA-DOG/go-sqlmock"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
+	"github.com/DATA-DOG/go-sqlmock"
 	"github.com/gin-gonic/gin"
 )

@@ -436,6 +437,10 @@ func TestProxyA2A_CallerIDPropagated(t *testing.T) {
 		WithArgs("ws-target").
 		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", "ws-parent"))

+	// #1953 cross-tenant guard: same-org check after CanCommunicate. Both
+	// workspaces resolve to the same org root → routing allowed.
+	mockSameOrg(mock, "ws-caller", "ws-target", true)
+
 	expectBudgetCheck(mock, "ws-target")

 	// Expect activity log with source_id set
@@ -464,6 +469,24 @@ func TestProxyA2A_CallerIDPropagated(t *testing.T) {
 	}
 }

+// mockSameOrg sets up the two org-root recursive-CTE expectations that the
+// #1953 cross-tenant guard in proxyA2ARequest runs after CanCommunicate passes.
+// sameOrg=true returns the SAME root_id for both caller and target (same tenant);
+// sameOrg=false returns different root_ids (cross-tenant → routing must be denied).
+func mockSameOrg(mock sqlmock.Sqlmock, caller, target string, sameOrg bool) {
+	callerRoot := "org-root-shared"
+	targetRoot := "org-root-shared"
+	if !sameOrg {
+		targetRoot = "org-root-other-tenant"
+	}
+	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(callerRoot))
+	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
+		WithArgs(target).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(targetRoot))
+}
+
 // mockCanCommunicate sets up sqlmock expectations for CanCommunicate(caller, target).
 // allowed=true sets up rows that satisfy the access policy (siblings under same parent).
 // allowed=false sets up rows that don't (different parents).
@@ -658,6 +681,9 @@ func TestProxyA2A_CallerIDDerivedFromBearer(t *testing.T) {
 		WithArgs("ws-target").
 		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", "ws-parent"))

+	// 3b. #1953 cross-tenant guard — same org root → routing allowed.
+	mockSameOrg(mock, "ws-caller", "ws-target", true)
+
 	expectBudgetCheck(mock, "ws-target")

 	// 4. activity_logs INSERT — verify source_id arg is the derived ws-caller
@@ -1244,13 +1270,12 @@ func TestValidateCallerToken_WrongWorkspaceBindingRejected(t *testing.T) {
 }

 func TestValidateCallerToken_CanvasUser_AdminToken(t *testing.T) {
-	mock := setupTestDB(t)
+	setupTestDB(t)
 	setupTestRedis(t)

-	// Tokenless workspace
-	mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens`).
-		WithArgs("ws-canvas-admin").
-		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+	// #1673/#1944: the genuine-canvas-user check (admin bearer here) now runs
+	// BEFORE HasAnyLiveToken, so no SELECT COUNT(*) is issued — the human's
+	// credential, not the caller workspace's token state, decides canvas-user.

 	t.Setenv("ADMIN_TOKEN", "admin-secret-42")

@@ -1276,10 +1301,9 @@ func TestValidateCallerToken_CanvasUser_OrgToken(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	// Tokenless workspace
-	mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens`).
-		WithArgs("ws-canvas-org").
-		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+	// #1673/#1944: the genuine-canvas-user check (org token here) now runs
+	// BEFORE HasAnyLiveToken, so the first DB query is orgtoken.Validate's
+	// lookup — there is no SELECT COUNT(*) expectation anymore.

 	// orgtoken.Validate lookup
 	mock.ExpectQuery(`SELECT id, prefix, org_id FROM org_api_tokens WHERE token_hash = .* AND revoked_at IS NULL`).
@@ -2093,6 +2117,10 @@ func (f *fakeCPProv) Stop(_ context.Context, _ string) error {
 	f.stopCalls++
 	return nil
 }
+func (f *fakeCPProv) StopAndPrune(_ context.Context, _ string) error {
+	f.stopCalls++
+	return nil
+}
 func (f *fakeCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
 	return "", nil
 }
@@ -2341,6 +2369,197 @@ func TestProxyA2A_PollMode_ShortCircuits_NoSSRF_NoDispatch(t *testing.T) {
 	}
 }

+// stubVerifiedCPSession points VerifiedCPSession at a stub control-plane that
+// confirms the given cookie belongs to a tenant-member, so tests can exercise
+// the genuine (non-forgeable) canvas-session path end-to-end without a live CP.
+// It sets CP_UPSTREAM_URL + MOLECULE_ORG_SLUG for the test's lifetime; the
+// real middleware.VerifiedCPSession HTTP+cache code path runs unchanged.
+func stubVerifiedCPSession(t *testing.T, member bool) {
+	t.Helper()
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		if member {
+			fmt.Fprint(w, `{"member":true,"user_id":"user-canvas-1"}`)
+		} else {
+			w.WriteHeader(http.StatusForbidden)
+			fmt.Fprint(w, `{"member":false}`)
+		}
+	}))
+	t.Cleanup(srv.Close)
+	t.Setenv("CP_UPSTREAM_URL", srv.URL)
+	t.Setenv("MOLECULE_ORG_SLUG", "test-tenant")
+}
+
+// TestProxyA2A_PollMode_CanvasUserWithVerifiedSession is the #1673 regression
+// guard. A poll-mode canvas-user identity workspace that HAS acquired live
+// tokens (the exact condition that made #1673 fire) sends a canvas message
+// carrying a control-plane-verified session cookie but no bearer token. The
+// fix must classify it as a canvas user BEFORE the HasAnyLiveToken peer-token
+// contract, so the request is queued (200) and logA2AReceiveQueued writes the
+// activity_logs row — instead of the pre-fix silent 401 that dropped the
+// message before any row landed (breaking canvas chat + chat-history).
+//
+// Runs in a subprocess with CANVAS_PROXY_URL set so middleware.canvasProxyActive
+// is true at package-init time (matching the combined-tenant image), proving the
+// fix does not depend on disabling same-origin detection.
+func TestProxyA2A_PollMode_CanvasUserWithVerifiedSession(t *testing.T) {
+	if os.Getenv("CANVAS_PROXY_URL") == "" {
+		cmd := exec.Command(os.Args[0], "-test.run=^TestProxyA2A_PollMode_CanvasUserWithVerifiedSession$", "-test.v")
+		cmd.Env = append(os.Environ(), "CANVAS_PROXY_URL=http://localhost")
+		out, err := cmd.CombinedOutput()
+		if err != nil {
+			t.Fatalf("subprocess test failed: %v\n%s", err, out)
+		}
+		return
+	}
+
+	stubVerifiedCPSession(t, true)
+
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+
+	const wsTarget = "ws-poll-canvas-target"
+	const wsCanvasUser = "ws-canvas-user-344a"
+
+	// CRUCIAL: no SELECT COUNT(*) FROM workspace_auth_tokens expectation. The
+	// genuine-canvas-user check (verified session) must short-circuit BEFORE
+	// HasAnyLiveToken — that is the #1673 regression path. An identity
+	// workspace that already holds live tokens must NOT fall into the
+	// hasLive=true bearer-required branch.
+
+	// isCanvasUser=true → CanCommunicate is skipped (no parent_id lookups).
+	expectBudgetCheck(mock, wsTarget)
+	mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
+		WithArgs(wsTarget).
+		WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("poll"))
+	// logA2AReceiveQueued must fire synchronously and write the row.
+	mock.ExpectExec("INSERT INTO activity_logs").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: wsTarget}}
+
+	body := `{"jsonrpc":"2.0","id":"canvas-1","method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hello from canvas"}]}}}`
+	req := httptest.NewRequest("POST", "/workspaces/"+wsTarget+"/a2a", bytes.NewBufferString(body))
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("X-Workspace-ID", wsCanvasUser)
+	// Verified canvas session cookie (the genuine, non-forgeable signal).
+	req.Header.Set("Cookie", "wos-session=valid-canvas-session-cookie")
+	// Same-origin headers, present as a real canvas request would send them —
+	// but they are NOT what authorizes the bypass here (the verified session is).
+	req.Host = "localhost"
+	req.Header.Set("Referer", "https://localhost/")
+	c.Request = req
+
+	handler.ProxyA2A(c)
+
+	time.Sleep(50 * time.Millisecond)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200 (queued) for canvas-user with verified session, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("response is not valid JSON: %v", err)
+	}
+	if resp["status"] != "queued" {
+		t.Errorf("response.status = %v, want %q", resp["status"], "queued")
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations (activity_logs row must be written): %v", err)
+	}
+}
+
+// TestProxyA2A_ForgedSameOrigin_CannotBypassCanCommunicate is the security
+// crux of the #1673 fix and the reason PR #1944 was held. In the combined-
+// tenant SaaS image (CANVAS_PROXY_URL set, CP session verification configured),
+// an attacker forges a same-origin request — correct Host + a matching
+// `Referer: https://<host>/` — and supplies an arbitrary X-Workspace-ID naming
+// a workspace it does not control, targeting a workspace it is NOT authorized
+// to reach. It presents NO verified session cookie, NO admin token, NO org
+// token.
+//
+// PR #1944's same-origin bypass would have classified this as a canvas user and
+// skipped CanCommunicate, granting cross-workspace A2A — a privilege
+// escalation. The safe fix must instead fall through to the standard
+// peer-token contract and CanCommunicate, which rejects the cross-hierarchy
+// call with 403. This test proves the escalation is closed.
+func TestProxyA2A_ForgedSameOrigin_CannotBypassCanCommunicate(t *testing.T) {
+	if os.Getenv("CANVAS_PROXY_URL") == "" {
+		cmd := exec.Command(os.Args[0], "-test.run=^TestProxyA2A_ForgedSameOrigin_CannotBypassCanCommunicate$", "-test.v")
+		cmd.Env = append(os.Environ(), "CANVAS_PROXY_URL=http://localhost")
+		out, err := cmd.CombinedOutput()
+		if err != nil {
+			t.Fatalf("subprocess test failed: %v\n%s", err, out)
+		}
+		return
+	}
+
+	// SaaS image with CP session verification configured. The stub CP rejects
+	// any cookie as a non-member; the attacker sends none anyway. This asserts
+	// that with verification configured, same-origin alone is NOT a canvas
+	// signal (CPSessionConfigured()==true disables the dev fallback).
+	stubVerifiedCPSession(t, false)
+
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+
+	const wsTarget = "ws-victim-target"
+	const wsForgedCaller = "ws-attacker-caller"
+
+	// validateCallerToken: not a genuine canvas user (no verified session, no
+	// admin/org token, and the dev same-origin fallback is disabled in SaaS).
+	// So it consults the peer-token contract: HasAnyLiveToken for the forged
+	// caller. Return 0 → tokenless legacy peer → grandfathered through token
+	// validation (isCanvasUser stays false). The request must then still be
+	// gated by CanCommunicate.
+	mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens`).
+		WithArgs(wsForgedCaller).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+
+	// CanCommunicate MUST run (the escalation guard) and DENY: caller and
+	// target sit under different parents.
+	mockCanCommunicate(mock, wsForgedCaller, wsTarget, false)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: wsTarget}}
+
+	body := `{"jsonrpc":"2.0","id":"exploit-1","method":"message/send","params":{"message":{"role":"user","parts":[{"text":"cross-workspace exploit"}]}}}`
+	req := httptest.NewRequest("POST", "/workspaces/"+wsTarget+"/a2a", bytes.NewBufferString(body))
+	req.Header.Set("Content-Type", "application/json")
+	// Arbitrary caller workspace the attacker does not own.
+	req.Header.Set("X-Workspace-ID", wsForgedCaller)
+	// Forged same-origin signals (the #1944 bypass vector).
+	req.Host = "localhost"
+	req.Header.Set("Referer", "https://localhost/")
+	req.Header.Set("Origin", "https://localhost")
+	// No Cookie / Authorization — no genuine canvas credential.
+	c.Request = req
+
+	handler.ProxyA2A(c)
+
+	if w.Code != http.StatusForbidden {
+		t.Fatalf("ESCALATION NOT CLOSED: forged same-origin + arbitrary X-Workspace-ID "+
+			"reached an unauthorized target with status %d (want 403): %s", w.Code, w.Body.String())
+	}
+	var resp map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("body not JSON: %v", err)
+	}
+	if !strings.Contains(fmt.Sprint(resp["error"]), "access denied") {
+		t.Errorf("expected an access-denied error from CanCommunicate, got %v", resp["error"])
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations — CanCommunicate must have been consulted: %v", err)
+	}
+}
+
 // TestProxyA2A_PushMode_NoShortCircuit verifies the symmetric contract:
 // a push-mode workspace (default) is NOT affected by the new short-circuit.
 // It still proceeds to resolveAgentURL + dispatch. Without this guard, a
@@ -160,10 +160,12 @@ func EnqueueA2A(
 	}

 	// Return current queue depth for the caller's visibility.
-	_ = db.DB.QueryRowContext(ctx, `
+	if err := db.DB.QueryRowContext(ctx, `
 		SELECT COUNT(*) FROM a2a_queue
 		WHERE workspace_id = $1 AND status = 'queued'
-	`, workspaceID).Scan(&depth)
+	`, workspaceID).Scan(&depth); err != nil {
+		log.Printf("A2AQueue: depth query failed for workspace %s: %v", workspaceID, err)
+	}

 	log.Printf("A2AQueue: enqueued %s for workspace %s (priority=%d, depth=%d)", id, workspaceID, priority, depth)
 	return id, depth, nil
@@ -249,10 +251,12 @@ func MarkQueueItemFailed(ctx context.Context, id, errMsg string) {
 // can see how many ahead of them.
 func QueueDepth(ctx context.Context, workspaceID string) int {
 	var n int
-	_ = db.DB.QueryRowContext(ctx,
+	if err := db.DB.QueryRowContext(ctx,
 		`SELECT COUNT(*) FROM a2a_queue WHERE workspace_id = $1 AND status = 'queued'`,
 		workspaceID,
-	).Scan(&n)
+	).Scan(&n); err != nil {
+		log.Printf("A2AQueue: QueueDepth query failed for workspace %s: %v", workspaceID, err)
+	}
 	return n
 }

@@ -415,10 +419,14 @@ func (h *WorkspaceHandler) stitchDrainResponseToDelegation(ctx context.Context,
 		return
 	}
 	responseText := extractResponseText(respBody)
-	respJSON, _ := json.Marshal(map[string]interface{}{
+	respJSON, marshalErr := json.Marshal(map[string]interface{}{
 		"text":          responseText,
 		"delegation_id": delegationID,
 	})
+	if marshalErr != nil {
+		log.Printf("a2aQueue stitch %s: json.Marshal respJSON failed: %v", delegationID, marshalErr)
+		return
+	}
 	res, err := db.DB.ExecContext(ctx, `
 		UPDATE activity_logs
 		   SET status        = 'completed',
@@ -434,7 +442,12 @@ func (h *WorkspaceHandler) stitchDrainResponseToDelegation(ctx context.Context,
 		log.Printf("A2AQueue drain stitch: update failed for delegation %s: %v", delegationID, err)
 		return
 	}
-	if rows, _ := res.RowsAffected(); rows == 0 {
+	rows, err := res.RowsAffected()
+	if err != nil {
+		log.Printf("A2AQueue drain stitch: RowsAffected error for delegation %s: %v", delegationID, err)
+		return
+	}
+	if rows == 0 {
 		log.Printf("A2AQueue drain stitch: no delegate_result row for delegation %s (queued-row may not exist yet)", delegationID)
 		return
 	}
@@ -153,7 +153,15 @@ func queueRowAuthFields(ctx context.Context, queueID string) (callerID, workspac
 	if err != nil {
 		return "", "", err
 	}
-	return callerNS.String, workspaceNS.String, nil
+	callerID = ""
+	if callerNS.Valid {
+		callerID = callerNS.String
+	}
+	workspaceID = ""
+	if workspaceNS.Valid {
+		workspaceID = workspaceNS.String
+	}
+	return callerID, workspaceID, nil
 }

 // GetA2AQueueStatus handles GET /workspaces/:id/a2a/queue/:queue_id.
@@ -1,9 +1,62 @@
 package handlers

 import (
+	"context"
 	"testing"
+
+	"github.com/DATA-DOG/go-sqlmock"
 )

+// TestQueueRowAuthFields_NilSafeScan proves queueRowAuthFields returns empty
+// strings (not a panic / garbage) when the a2a_queue row has NULL caller_id
+// or workspace_id. Before the fix it dereferenced NullString.String directly,
+// which is only the zero value when Valid is false but masked the NULL-vs-""
+// distinction; the guard makes the intent explicit and safe.
+func TestQueueRowAuthFields_NilSafeScan(t *testing.T) {
+	mock := setupTestDB(t)
+	queueID := "queue-123"
+
+	mock.ExpectQuery(`SELECT caller_id, workspace_id FROM a2a_queue WHERE id = \$1`).
+		WithArgs(queueID).
+		WillReturnRows(sqlmock.NewRows([]string{"caller_id", "workspace_id"}).AddRow(nil, nil))
+
+	caller, workspace, err := queueRowAuthFields(context.Background(), queueID)
+	if err != nil {
+		t.Fatalf("queueRowAuthFields returned error: %v", err)
+	}
+	if caller != "" {
+		t.Errorf("callerID = %q, want empty string for NULL caller_id", caller)
+	}
+	if workspace != "" {
+		t.Errorf("workspaceID = %q, want empty string for NULL workspace_id", workspace)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet expectations: %v", err)
+	}
+}
+
+// TestQueueRowAuthFields_PopulatedRow confirms the non-NULL path still returns
+// the scanned values unchanged.
+func TestQueueRowAuthFields_PopulatedRow(t *testing.T) {
+	mock := setupTestDB(t)
+	queueID := "queue-456"
+
+	mock.ExpectQuery(`SELECT caller_id, workspace_id FROM a2a_queue WHERE id = \$1`).
+		WithArgs(queueID).
+		WillReturnRows(sqlmock.NewRows([]string{"caller_id", "workspace_id"}).AddRow("caller-x", "ws-y"))
+
+	caller, workspace, err := queueRowAuthFields(context.Background(), queueID)
+	if err != nil {
+		t.Fatalf("queueRowAuthFields returned error: %v", err)
+	}
+	if caller != "caller-x" || workspace != "ws-y" {
+		t.Fatalf("got caller=%q workspace=%q, want caller-x / ws-y", caller, workspace)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet expectations: %v", err)
+	}
+}
+
 // TestExtractExpiresInSeconds covers the JSON parser used at enqueue time
 // to honor a caller-specified TTL. Zero return = "no TTL" — caller leaves
 // expires_at NULL on the queue row.
@@ -18,8 +18,8 @@ import (
 	"testing"
 	"time"

-	"github.com/DATA-DOG/go-sqlmock"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
+	"github.com/DATA-DOG/go-sqlmock"
 	"github.com/alicebob/miniredis/v2"
 )

@@ -209,10 +209,12 @@ func drainSetup(t *testing.T, workspaceID string) (sqlmock.Sqlmock, *WorkspaceHa
 // Named distinctly from handlers_test.go's expectBudgetCheck (which uses MatchPsql
 // escaped-regex and cannot be reused with QueryMatcherEqual tests).
 func expectQueueBudgetCheck(mock sqlmock.Sqlmock, workspaceID string) {
+	// Multi-period (#49): exact-match the budget_limits read; "{}" → no limits →
+	// checkWorkspaceBudget returns early (no spend query).
 	mock.ExpectQuery(
-		"SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1",
+		"SELECT COALESCE(budget_limits, '{}'::jsonb) FROM workspaces WHERE id = $1",
 	).WithArgs(workspaceID).
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}))
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte("{}")))
 }

 // seedRedisURL puts the agent server URL into the Redis cache so resolveAgentURL
@@ -148,6 +148,125 @@ func (h *AdminSchedulesHealthHandler) Health(c *gin.Context) {
 	c.JSON(http.StatusOK, entries)
 }

+// orphanScheduleEntry is one row in the Orphans response.
+type orphanScheduleEntry struct {
+	WorkspaceID     string `json:"workspace_id"`
+	WorkspaceStatus string `json:"workspace_status"` // "removed" | "missing"
+	ScheduleID      string `json:"schedule_id"`
+	ScheduleName    string `json:"schedule_name"`
+	Source          string `json:"source"`
+	Enabled         bool   `json:"enabled"`
+	CronExpr        string `json:"cron_expr"`
+}
+
+// Orphans handles GET /admin/schedules/orphans — the monitor surface for
+// internal#2006. Health (above) reports only LIVE workspaces' schedules, so a
+// schedule left on a removed/recreated workspace silently stops firing and
+// never appears there. This endpoint lists exactly those orphans (workspace
+// removed OR missing) so an operator/monitor can alert. Returns 200 + JSON
+// array (empty when none). Auth via adminAuth() in router.go.
+func (h *AdminSchedulesHealthHandler) Orphans(c *gin.Context) {
+	ctx := c.Request.Context()
+	rows, err := db.DB.QueryContext(ctx, `
+		SELECT s.workspace_id,
+		       CASE WHEN w.id IS NULL THEN 'missing' ELSE 'removed' END AS ws_status,
+		       s.id, s.name, COALESCE(s.source, ''), s.enabled, s.cron_expr
+		FROM workspace_schedules s
+		LEFT JOIN workspaces w ON w.id = s.workspace_id
+		WHERE w.id IS NULL OR w.status = 'removed'
+		ORDER BY s.name ASC
+	`)
+	if err != nil {
+		log.Printf("AdminSchedulesOrphans: query error: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to query orphans"})
+		return
+	}
+	defer rows.Close()
+	out := make([]orphanScheduleEntry, 0)
+	for rows.Next() {
+		var e orphanScheduleEntry
+		if err := rows.Scan(&e.WorkspaceID, &e.WorkspaceStatus, &e.ScheduleID, &e.ScheduleName, &e.Source, &e.Enabled, &e.CronExpr); err != nil {
+			log.Printf("AdminSchedulesOrphans: scan error: %v", err)
+			continue
+		}
+		out = append(out, e)
+	}
+	if err := rows.Err(); err != nil {
+		log.Printf("AdminSchedulesOrphans: rows iteration error: %v", err)
+	}
+	c.JSON(http.StatusOK, out)
+}
+
+// ReapOrphans handles POST /admin/schedules/reap-orphans — the orphan cleaner
+// (internal#2006). For every schedule bound to a removed/nonexistent workspace
+// it re-points runtime-created schedules onto the live successor agent (matched
+// by role+parent, falling back to name+parent) when one exists and doesn't
+// already carry a same-named schedule; schedules with no live successor are
+// disabled (enabled=false) so the scheduler stops firing into a dead workspace.
+// Idempotent: re-running with no orphans is a no-op. Returns a summary count.
+// Auth is enforced by the adminAuth() middleware registered in router.go.
+func (h *AdminSchedulesHealthHandler) ReapOrphans(c *gin.Context) {
+	ctx := c.Request.Context()
+
+	// 1. Re-point runtime schedules onto a live successor (same role+parent,
+	//    else same name+parent). Skip names already present on the successor.
+	repointed, err := db.DB.ExecContext(ctx, `
+		WITH orphan AS (
+			SELECT s.id, s.name, s.workspace_id, prev.role AS role, prev.parent_id AS parent_id
+			FROM workspace_schedules s
+			JOIN workspaces prev ON prev.id = s.workspace_id
+			WHERE prev.status = 'removed' AND s.source = 'runtime'
+		),
+		successor AS (
+			SELECT o.id AS schedule_id, o.name AS schedule_name,
+			       (
+			         SELECT w.id FROM workspaces w
+			         WHERE w.status != 'removed'
+			           AND w.parent_id IS NOT DISTINCT FROM o.parent_id
+			           AND ((o.role IS NOT NULL AND w.role = o.role))
+			         ORDER BY w.updated_at DESC NULLS LAST LIMIT 1
+			       ) AS live_id
+			FROM orphan o
+		)
+		UPDATE workspace_schedules s
+		SET workspace_id = su.live_id, updated_at = now()
+		FROM successor su
+		WHERE s.id = su.schedule_id
+		  AND su.live_id IS NOT NULL
+		  AND NOT EXISTS (
+		      SELECT 1 FROM workspace_schedules t
+		      WHERE t.workspace_id = su.live_id AND t.name = su.schedule_name
+		  )
+	`)
+	if err != nil {
+		log.Printf("ReapOrphans: re-point error: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "re-point failed"})
+		return
+	}
+	repointedN, _ := repointed.RowsAffected()
+
+	// 2. Disable any remaining schedules still bound to a removed/missing
+	//    workspace (no live successor, or template schedules on a dead row).
+	disabled, err := db.DB.ExecContext(ctx, `
+		UPDATE workspace_schedules s
+		SET enabled = false, updated_at = now()
+		WHERE s.enabled = true
+		  AND NOT EXISTS (
+		      SELECT 1 FROM workspaces w
+		      WHERE w.id = s.workspace_id AND w.status != 'removed'
+		  )
+	`)
+	if err != nil {
+		log.Printf("ReapOrphans: disable error: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "disable failed"})
+		return
+	}
+	disabledN, _ := disabled.RowsAffected()
+
+	log.Printf("ReapOrphans: re-pointed %d, disabled %d orphaned schedule(s)", repointedN, disabledN)
+	c.JSON(http.StatusOK, gin.H{"repointed": repointedN, "disabled": disabledN})
+}
+
 // classifyScheduleStatus returns the health status string for a schedule.
 //   - "never_run"  — last_run_at is NULL (schedule has never fired)
 //   - "stale"      — now - last_run_at > staleThreshold (and threshold > 0)
@@ -444,3 +444,72 @@ func TestAdminSchedulesHealth_ResponseFields(t *testing.T) {
 		t.Fatalf("unmet expectations: %v", err)
 	}
 }
+
+// ==================== Orphans + ReapOrphans (internal#2006) ====================
+
+// TestAdminSchedulesOrphans verifies the monitor surface lists schedules bound
+// to a removed/missing workspace (the recreate-orphan failure mode).
+func TestAdminSchedulesOrphans(t *testing.T) {
+	mock := setupTestDB(t)
+	handler := NewAdminSchedulesHealthHandler()
+
+	mock.ExpectQuery(`LEFT JOIN workspaces`).
+		WillReturnRows(sqlmock.NewRows([]string{
+			"workspace_id", "ws_status", "id", "name", "source", "enabled", "cron_expr",
+		}).AddRow("dead-ws", "removed", "sched-1", "minimax-autonomous-tick", "runtime", false, "*/5 * * * *"))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Request = httptest.NewRequest("GET", "/admin/schedules/orphans", nil)
+
+	handler.Orphans(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp []orphanScheduleEntry
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("parse response: %v", err)
+	}
+	if len(resp) != 1 {
+		t.Fatalf("expected 1 orphan, got %d", len(resp))
+	}
+	if resp[0].ScheduleName != "minimax-autonomous-tick" || resp[0].WorkspaceStatus != "removed" || resp[0].Source != "runtime" {
+		t.Errorf("unexpected orphan entry: %+v", resp[0])
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet expectations: %v", err)
+	}
+}
+
+// TestReapOrphans verifies the cleaner re-points runtime schedules onto a live
+// successor then disables any remaining dead-bound schedules, returning counts.
+func TestReapOrphans(t *testing.T) {
+	mock := setupTestDB(t)
+	handler := NewAdminSchedulesHealthHandler()
+
+	mock.ExpectExec(`UPDATE workspace_schedules s\s+SET workspace_id`).
+		WillReturnResult(sqlmock.NewResult(0, 2))
+	mock.ExpectExec(`UPDATE workspace_schedules s\s+SET enabled = false`).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Request = httptest.NewRequest("POST", "/admin/schedules/reap-orphans", nil)
+
+	handler.ReapOrphans(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp map[string]int64
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("parse response: %v", err)
+	}
+	if resp["repointed"] != 2 || resp["disabled"] != 1 {
+		t.Errorf("expected repointed=2 disabled=1, got %+v", resp)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet expectations: %v", err)
+	}
+}
@@ -164,7 +164,11 @@ func (w *AgentMessageWriter) Send(
 		}
 		respPayload["parts"] = fileParts
 	}
-	respJSON, _ := json.Marshal(respPayload)
+	respJSON, marshalErr := json.Marshal(respPayload)
+	if marshalErr != nil {
+		log.Printf("AgentMessageWriter %s: json.Marshal respPayload failed: %v", workspaceID, marshalErr)
+		return nil
+	}
 	preview := textutil.TruncateRunes(message, 80)
 	if _, err := w.db.ExecContext(ctx, `
 		INSERT INTO activity_logs (workspace_id, activity_type, method, summary, response_body, status)
@@ -34,7 +34,10 @@ func (h *ApprovalsHandler) Create(c *gin.Context) {
 		return
 	}

-	ctxJSON, _ := json.Marshal(body.Context)
+	ctxJSON, marshalErr := json.Marshal(body.Context)
+	if marshalErr != nil {
+		log.Printf("Approvals create %s: json.Marshal context failed: %v", workspaceID, marshalErr)
+	}
 	if ctxJSON == nil {
 		ctxJSON = []byte("{}")
 	}
@@ -80,10 +83,12 @@ func (h *ApprovalsHandler) ListAll(c *gin.Context) {
 	ctx := c.Request.Context()

 	// Auto-expire stale approvals (older than 10 min)
-	db.DB.ExecContext(ctx, `
+	if _, err := db.DB.ExecContext(ctx, `
 		UPDATE approval_requests SET status = 'denied', decided_by = 'auto-expired', decided_at = now()
 		WHERE status = 'pending' AND created_at < now() - interval '10 minutes'
-	`)
+	`); err != nil {
+		log.Printf("approvals: auto-expire failed: %v", err)
+	}

 	rows, err := db.DB.QueryContext(ctx, `
 		SELECT a.id, a.workspace_id, w.name, a.action, a.reason, a.status, a.created_at
@@ -200,7 +205,12 @@ func (h *ApprovalsHandler) Decide(c *gin.Context) {
 		return
 	}

-	rows, _ := result.RowsAffected()
+	rows, err := result.RowsAffected()
+	if err != nil {
+		log.Printf("Approval decision RowsAffected error approval=%s workspace=%s: %v", approvalID, workspaceID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update"})
+		return
+	}
 	if rows == 0 {
 		c.JSON(http.StatusNotFound, gin.H{"error": "approval not found or already decided"})
 		return
@@ -344,7 +344,11 @@ func computeAuditHMAC(key []byte, ev *auditEventRow) string {
 		"timestamp":            ev.Timestamp.UTC().Format("2006-01-02T15:04:05Z"),
 	}

-	payload, _ := json.Marshal(canonical) // compact, sorted keys
+	payload, marshalErr := json.Marshal(canonical) // compact, sorted keys
+	if marshalErr != nil {
+		log.Printf("auditChainHash: json.Marshal canonical failed: %v", marshalErr)
+		return ""
+	}
 	mac := hmac.New(sha256.New, key)
 	mac.Write(payload)
 	return hex.EncodeToString(mac.Sum(nil))
@@ -1,7 +1,9 @@
 package handlers

 import (
+	"context"
 	"database/sql"
+	"encoding/json"
 	"log"
 	"net/http"

@@ -12,42 +14,79 @@ import (
 // BudgetHandler exposes per-workspace budget read/write endpoints.
 // Routes (all behind WorkspaceAuth middleware):
 //
-//	GET  /workspaces/:id/budget  — current budget_limit, monthly_spend, budget_remaining
-//	PATCH /workspaces/:id/budget — set or clear budget_limit
+//	GET   /workspaces/:id/budget  — per-period limits, spend, remaining
+//	PATCH /workspaces/:id/budget  — set/clear per-period limits
+//
+// Multi-period (#49): the budget is now four independent rolling windows —
+// hourly/daily/weekly/monthly (budget_periods.go is the SSOT for the set). The
+// canonical config is workspaces.budget_limits (JSONB, USD cents per period);
+// per-period spend is the rolling-window sum over workspace_spend_events. The
+// legacy single monthly budget_limit / monthly_spend are still emitted (and
+// budget_limit kept in sync to the monthly period) for back-compat with
+// pre-deploy canvas/agent builds during the rollout window.
 type BudgetHandler struct{}

 func NewBudgetHandler() *BudgetHandler { return &BudgetHandler{} }

-// budgetResponse is the canonical JSON shape for both GET and PATCH responses.
+// periodBudget is the per-period view: configured ceiling (null = no limit),
+// rolling-window spend, and remaining headroom (null when no limit; may go
+// negative so callers see how far over a period is).
+type periodBudget struct {
+	Limit     *int64 `json:"limit"`
+	Spend     int64  `json:"spend"`
+	Remaining *int64 `json:"remaining"`
+}
+
+// budgetResponse is the canonical JSON shape for GET and PATCH.
 type budgetResponse struct {
-	// BudgetLimit is the monthly spend ceiling in USD cents (null = no limit).
-	// budget_limit=500 means $5.00/month.
-	BudgetLimit *int64 `json:"budget_limit"`
-	// MonthlySpend is the agent's self-reported accumulated LLM API spend
-	// for the current month (USD cents). Incremented via heartbeat.
-	MonthlySpend int64 `json:"monthly_spend"`
-	// BudgetRemaining is null when BudgetLimit is null, otherwise
-	// max(0, budget_limit - monthly_spend). Can be negative — we store the
-	// actual value so callers can see how far over-budget a workspace is.
+	// Periods is keyed by BudgetPeriod ("hourly"/"daily"/"weekly"/"monthly").
+	Periods map[string]periodBudget `json:"periods"`
+
+	// --- back-compat (monthly), for pre-multi-period clients ---
+	BudgetLimit     *int64 `json:"budget_limit"`
+	MonthlySpend    int64  `json:"monthly_spend"`
 	BudgetRemaining *int64 `json:"budget_remaining"`
 }

+// buildBudgetResponse assembles the per-period view from the stored limits +
+// the ledger spend. Single place so GET and PATCH return identical shapes.
+func buildBudgetResponse(ctx context.Context, workspaceID string, limitsRaw []byte) (budgetResponse, error) {
+	limits := parseBudgetLimits(limitsRaw)
+	spend, err := spendByPeriod(ctx, db.DB, workspaceID)
+	if err != nil {
+		return budgetResponse{}, err
+	}
+	periods := make(map[string]periodBudget, len(budgetPeriods))
+	for _, def := range budgetPeriods {
+		pb := periodBudget{Spend: spend[def.Name]}
+		if lim, ok := limits[def.Name]; ok {
+			l := lim
+			pb.Limit = &l
+			r := lim - spend[def.Name]
+			pb.Remaining = &r
+		}
+		periods[string(def.Name)] = pb
+	}
+	resp := budgetResponse{Periods: periods, MonthlySpend: spend[PeriodMonthly]}
+	if m := periods[string(PeriodMonthly)]; m.Limit != nil {
+		resp.BudgetLimit = m.Limit
+		resp.BudgetRemaining = m.Remaining
+	}
+	return resp, nil
+}
+
 // GetBudget handles GET /workspaces/:id/budget.
-// Returns the workspace's current budget ceiling, accumulated spend, and
-// computed remaining headroom. Both budget_limit and budget_remaining are
-// null when no limit has been configured for the workspace.
 func (h *BudgetHandler) GetBudget(c *gin.Context) {
 	workspaceID := c.Param("id")
 	ctx := c.Request.Context()

-	var budgetLimit sql.NullInt64
-	var monthlySpend int64
+	var limitsRaw []byte
 	err := db.DB.QueryRowContext(ctx,
-		`SELECT budget_limit, COALESCE(monthly_spend, 0)
+		`SELECT COALESCE(budget_limits, '{}'::jsonb)
 		 FROM workspaces
 		 WHERE id = $1 AND status != 'removed'`,
 		workspaceID,
-	).Scan(&budgetLimit, &monthlySpend)
+	).Scan(&limitsRaw)
 	if err == sql.ErrNoRows {
 		c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
 		return
@@ -58,66 +97,80 @@ func (h *BudgetHandler) GetBudget(c *gin.Context) {
 		return
 	}

-	resp := budgetResponse{
-		MonthlySpend: monthlySpend,
+	resp, err := buildBudgetResponse(ctx, workspaceID, limitsRaw)
+	if err != nil {
+		log.Printf("GetBudget: spend query failed for %s: %v", workspaceID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
+		return
 	}
-	if budgetLimit.Valid {
-		limit := budgetLimit.Int64
-		resp.BudgetLimit = &limit
-		remaining := limit - monthlySpend
-		resp.BudgetRemaining = &remaining
-	}
-
 	c.JSON(http.StatusOK, resp)
 }

-// PatchBudget handles PATCH /workspaces/:id/budget.
-// Accepts {"budget_limit": <int64>} to set a new ceiling, or
-// {"budget_limit": null} to remove an existing ceiling.
-// Returns the updated budget state in the same shape as GetBudget.
+// PatchBudget handles PATCH /workspaces/:id/budget. Accepts EITHER the
+// multi-period shape
+//
+//	{"budget_limits": {"hourly": 100, "daily": null, "weekly": 500, "monthly": 2000}}
+//
+// (a per-period value of null/absent clears that period; a positive int sets it)
+// OR the legacy single-monthly shape {"budget_limit": 2000} / {"budget_limit": null}.
 func (h *BudgetHandler) PatchBudget(c *gin.Context) {
 	workspaceID := c.Param("id")
 	ctx := c.Request.Context()

-	// We need to distinguish between "field absent" and "field = null",
-	// so we unmarshal into a raw map first.
-	var raw map[string]interface{}
+	var raw map[string]json.RawMessage
 	if err := c.ShouldBindJSON(&raw); err != nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
 		return
 	}
-
-	budgetLimitRaw, ok := raw["budget_limit"]
-	if !ok {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit field is required"})
+	_, hasLimits := raw["budget_limits"]
+	_, hasLegacy := raw["budget_limit"]
+	if !hasLimits && !hasLegacy {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limits or budget_limit field is required"})
 		return
 	}

-	// Validate and convert the value. JSON numbers decode as float64.
-	var budgetArg interface{} // nil → SQL NULL, int64 → new ceiling
-	if budgetLimitRaw != nil {
-		switch v := budgetLimitRaw.(type) {
-		case float64:
-			if v < 0 {
-				c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
+	limits := make(map[BudgetPeriod]int64, len(budgetPeriods))
+	known := make(map[string]bool, len(budgetPeriods))
+	for _, def := range budgetPeriods {
+		known[string(def.Name)] = true
+	}
+
+	if hasLimits {
+		var m map[string]*int64
+		if err := json.Unmarshal(raw["budget_limits"], &m); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limits must be an object of period→int|null"})
+			return
+		}
+		for k, v := range m {
+			if !known[k] {
+				c.JSON(http.StatusBadRequest, gin.H{"error": "unknown budget period: " + k + " (allowed: hourly, daily, weekly, monthly)"})
 				return
 			}
-			cv := int64(v)
-			budgetArg = cv
-		case int64:
-			if v < 0 {
-				c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
+			if v == nil {
+				continue // clear this period (null = no limit)
+			}
+			if *v < 0 {
+				c.JSON(http.StatusBadRequest, gin.H{"error": "budget limit for " + k + " must be >= 0 (USD cents)"})
 				return
 			}
-			budgetArg = v
-		default:
+			limits[BudgetPeriod(k)] = *v // 0 is valid = block-all for this period
+		}
+	} else { // legacy single-monthly
+		var v *int64
+		if err := json.Unmarshal(raw["budget_limit"], &v); err != nil {
 			c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be an integer (USD cents) or null"})
 			return
 		}
+		if v != nil {
+			if *v < 0 {
+				c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
+				return
+			}
+			limits[PeriodMonthly] = *v // 0 is valid = block-all (legacy semantics)
+		}
 	}
-	// budgetArg == nil means "clear the ceiling"

-	// Existence check — return 404 for non-existent / removed workspaces.
+	// Existence check — 404 for non-existent / removed workspaces.
 	var exists bool
 	if err := db.DB.QueryRowContext(ctx,
 		`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1 AND status != 'removed')`,
@@ -127,38 +180,28 @@ func (h *BudgetHandler) PatchBudget(c *gin.Context) {
 		return
 	}

+	// Persist: budget_limits is the SSOT; keep the legacy budget_limit column
+	// synced to the monthly period so pre-deploy enforcement paths stay coherent
+	// during the rollout window.
+	var legacyMonthly interface{}
+	if m, ok := limits[PeriodMonthly]; ok {
+		legacyMonthly = m
+	}
+	encoded := encodeBudgetLimits(limits)
 	if _, err := db.DB.ExecContext(ctx,
-		`UPDATE workspaces SET budget_limit = $2, updated_at = now() WHERE id = $1`,
-		workspaceID, budgetArg,
+		`UPDATE workspaces SET budget_limits = $2, budget_limit = $3, updated_at = now() WHERE id = $1`,
+		workspaceID, encoded, legacyMonthly,
 	); err != nil {
 		log.Printf("PatchBudget: update failed for %s: %v", workspaceID, err)
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
 		return
 	}

-	// Re-read the current state so the response reflects exactly what is in
-	// the DB, including the monthly_spend the agent has already accumulated.
-	var newLimit sql.NullInt64
-	var monthlySpend int64
-	if err := db.DB.QueryRowContext(ctx,
-		`SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`,
-		workspaceID,
-	).Scan(&newLimit, &monthlySpend); err != nil {
+	resp, err := buildBudgetResponse(ctx, workspaceID, encoded)
+	if err != nil {
 		log.Printf("PatchBudget: re-read failed for %s: %v", workspaceID, err)
-		// Still success — just omit the echo.
 		c.JSON(http.StatusOK, gin.H{"status": "updated"})
 		return
 	}
-
-	resp := budgetResponse{
-		MonthlySpend: monthlySpend,
-	}
-	if newLimit.Valid {
-		limit := newLimit.Int64
-		resp.BudgetLimit = &limit
-		remaining := limit - monthlySpend
-		resp.BudgetRemaining = &remaining
-	}
-
 	c.JSON(http.StatusOK, resp)
 }
@@ -0,0 +1,160 @@
+package handlers
+
+import (
+	"context"
+	"database/sql"
+	"encoding/json"
+	"strconv"
+	"time"
+)
+
+// budget_periods.go — SINGLE SOURCE OF TRUTH for the multi-period per-workspace
+// LLM budget (#49 follow-up). The supported periods, their rolling windows, the
+// per-period spend computation (from the workspace_spend_events ledger), and the
+// over-budget decision all live here so the config endpoint (GetBudget/PatchBudget),
+// the display, and enforcement (checkWorkspaceBudget) can never drift.
+//
+// Spend model: the heartbeat records each observed spend INCREMENT into
+// workspace_spend_events (recordSpendDelta). Per-period spend is a rolling-window
+// SUM over that ledger — so the SERVER owns windowing (the agent keeps reporting
+// its cumulative figure unchanged). Rolling (not calendar) windows: no fragile
+// month-boundary reset, and "monthly" = a 30-day trailing window.
+
+// BudgetPeriod is one of the supported rolling budget windows.
+type BudgetPeriod string
+
+const (
+	PeriodHourly  BudgetPeriod = "hourly"
+	PeriodDaily   BudgetPeriod = "daily"
+	PeriodWeekly  BudgetPeriod = "weekly"
+	PeriodMonthly BudgetPeriod = "monthly"
+)
+
+// budgetPeriodDef pairs a period with its rolling window.
+type budgetPeriodDef struct {
+	Name   BudgetPeriod
+	Window time.Duration
+}
+
+// budgetPeriods is the canonical ordered list. ADD A PERIOD = one line here;
+// every consumer iterates this slice, so nothing else needs to change.
+var budgetPeriods = []budgetPeriodDef{
+	{PeriodHourly, time.Hour},
+	{PeriodDaily, 24 * time.Hour},
+	{PeriodWeekly, 7 * 24 * time.Hour},
+	{PeriodMonthly, 30 * 24 * time.Hour}, // rolling 30-day window
+}
+
+// spendLedgerRetention bounds the ledger: rows older than the largest window
+// (+ slack) are never read, so the recorder opportunistically prunes them.
+var spendLedgerRetention = 35 * 24 * time.Hour
+
+// parseBudgetLimits decodes the workspaces.budget_limits JSONB into a map of
+// period → limit (USD cents). A limit of ZERO is valid and means "block all
+// spend for that period" (a $0 ceiling); absent / null / negative / unknown
+// keys mean "no limit for that period". Tolerant of a NULL/empty column.
+func parseBudgetLimits(raw []byte) map[BudgetPeriod]int64 {
+	out := make(map[BudgetPeriod]int64, len(budgetPeriods))
+	if len(raw) == 0 {
+		return out
+	}
+	var m map[string]*int64
+	if err := json.Unmarshal(raw, &m); err != nil {
+		return out
+	}
+	for _, def := range budgetPeriods {
+		if v, ok := m[string(def.Name)]; ok && v != nil && *v >= 0 {
+			out[def.Name] = *v
+		}
+	}
+	return out
+}
+
+// encodeBudgetLimits renders a period→limit map back to the canonical JSONB
+// shape, keeping only KNOWN periods with a non-negative limit (0 = block-all is
+// preserved; a period absent from the map = no limit). Always returns valid JSON.
+func encodeBudgetLimits(limits map[BudgetPeriod]int64) []byte {
+	m := make(map[string]int64, len(limits))
+	for _, def := range budgetPeriods {
+		if v, ok := limits[def.Name]; ok && v >= 0 {
+			m[string(def.Name)] = v
+		}
+	}
+	b, err := json.Marshal(m)
+	if err != nil {
+		return []byte("{}")
+	}
+	return b
+}
+
+// recordSpendDelta appends a positive spend increment to the ledger and
+// opportunistically prunes rows past the retention horizon for this workspace.
+// No-op for delta <= 0. Errors are returned for the caller to log (non-fatal).
+func recordSpendDelta(ctx context.Context, q *sql.DB, workspaceID string, deltaCents int64) error {
+	if deltaCents <= 0 {
+		return nil
+	}
+	if _, err := q.ExecContext(ctx,
+		`INSERT INTO workspace_spend_events (workspace_id, delta_cents) VALUES ($1, $2)`,
+		workspaceID, deltaCents,
+	); err != nil {
+		return err
+	}
+	// Opportunistic prune (cheap; index-backed). Best-effort — ignore error.
+	_, _ = q.ExecContext(ctx,
+		`DELETE FROM workspace_spend_events
+		  WHERE workspace_id = $1 AND occurred_at < now() - $2::interval`,
+		workspaceID, pgInterval(spendLedgerRetention),
+	)
+	return nil
+}
+
+// spendByPeriod returns the rolling-window spend (USD cents) for every period,
+// computed in a SINGLE query over the ledger. The outer predicate bounds to the
+// largest window; per-period FILTERs sum each sub-window. A period with no ledger
+// rows reports 0. This is THE spend computation — used by both display + enforcement.
+func spendByPeriod(ctx context.Context, q *sql.DB, workspaceID string) (map[BudgetPeriod]int64, error) {
+	out := make(map[BudgetPeriod]int64, len(budgetPeriods))
+	for _, def := range budgetPeriods {
+		out[def.Name] = 0
+	}
+	row := q.QueryRowContext(ctx, `
+		SELECT
+			COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '1 hour'), 0),
+			COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '24 hours'), 0),
+			COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '7 days'), 0),
+			COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '30 days'), 0)
+		FROM workspace_spend_events
+		WHERE workspace_id = $1 AND occurred_at > now() - interval '30 days'
+	`, workspaceID)
+	var h, d, w, mo int64
+	if err := row.Scan(&h, &d, &w, &mo); err != nil {
+		return out, err
+	}
+	out[PeriodHourly], out[PeriodDaily], out[PeriodWeekly], out[PeriodMonthly] = h, d, w, mo
+	return out, nil
+}
+
+// exceededPeriods is PURE: given the configured limits and observed spend, it
+// returns the periods whose spend has reached/exceeded their limit (in
+// budgetPeriods order). Only periods WITH a positive limit are considered.
+// Used by enforcement to decide whether to block.
+func exceededPeriods(limits map[BudgetPeriod]int64, spend map[BudgetPeriod]int64) []BudgetPeriod {
+	var over []BudgetPeriod
+	for _, def := range budgetPeriods {
+		limit, ok := limits[def.Name]
+		if !ok {
+			continue // no limit configured for this period
+		}
+		// limit >= 0 is a real ceiling (0 = block-all). spend >= limit → over.
+		if spend[def.Name] >= limit {
+			over = append(over, def.Name)
+		}
+	}
+	return over
+}
+
+// pgInterval renders a Go duration as a Postgres-interval string ("N seconds").
+func pgInterval(d time.Duration) string {
+	return strconv.FormatInt(int64(d.Seconds()), 10) + " seconds"
+}
@@ -0,0 +1,99 @@
+package handlers
+
+import (
+	"reflect"
+	"testing"
+)
+
+// Pure-logic tests for the multi-period budget SSOT (budget_periods.go). The
+// DB-touching helpers (spendByPeriod / recordSpendDelta) are exercised via the
+// handler sqlmock tests; here we pin the parsing + the over-budget decision,
+// which is where the per-period semantics actually live.
+
+func TestParseBudgetLimits(t *testing.T) {
+	cases := []struct {
+		name string
+		raw  string
+		want map[BudgetPeriod]int64
+	}{
+		{"empty", "", map[BudgetPeriod]int64{}},
+		{"empty-object", "{}", map[BudgetPeriod]int64{}},
+		{"all-four", `{"hourly":100,"daily":200,"weekly":300,"monthly":400}`,
+			map[BudgetPeriod]int64{PeriodHourly: 100, PeriodDaily: 200, PeriodWeekly: 300, PeriodMonthly: 400}},
+		{"null-dropped-zero-kept", `{"hourly":null,"daily":0,"weekly":500}`,
+			map[BudgetPeriod]int64{PeriodDaily: 0, PeriodWeekly: 500}}, // 0 = block-all, kept
+		{"negative-dropped", `{"monthly":-5}`, map[BudgetPeriod]int64{}},
+		{"unknown-key-ignored", `{"yearly":999,"daily":10}`, map[BudgetPeriod]int64{PeriodDaily: 10}},
+		{"malformed-json", `{not json`, map[BudgetPeriod]int64{}},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := parseBudgetLimits([]byte(tc.raw))
+			if !reflect.DeepEqual(got, tc.want) {
+				t.Errorf("parseBudgetLimits(%q) = %v, want %v", tc.raw, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestEncodeBudgetLimits_RoundTrip(t *testing.T) {
+	in := map[BudgetPeriod]int64{PeriodHourly: 100, PeriodMonthly: 400}
+	enc := encodeBudgetLimits(in)
+	got := parseBudgetLimits(enc)
+	if !reflect.DeepEqual(got, in) {
+		t.Errorf("round-trip: encode→parse = %v, want %v (enc=%s)", got, in, enc)
+	}
+	// unknown periods dropped; 0 (block-all) kept
+	enc2 := encodeBudgetLimits(map[BudgetPeriod]int64{PeriodDaily: 0, "yearly": 9})
+	if got := parseBudgetLimits(enc2); !reflect.DeepEqual(got, map[BudgetPeriod]int64{PeriodDaily: 0}) {
+		t.Errorf("encode kept 0/dropped unknown: parse(%s) = %v, want {daily:0}", enc2, got)
+	}
+}
+
+func TestExceededPeriods(t *testing.T) {
+	cases := []struct {
+		name   string
+		limits map[BudgetPeriod]int64
+		spend  map[BudgetPeriod]int64
+		want   []BudgetPeriod
+	}{
+		{"no-limits", map[BudgetPeriod]int64{}, map[BudgetPeriod]int64{PeriodHourly: 999}, nil},
+		{"zero-limit-blocks-all", map[BudgetPeriod]int64{PeriodHourly: 0}, map[BudgetPeriod]int64{PeriodHourly: 0}, []BudgetPeriod{PeriodHourly}},
+		{"under-all", map[BudgetPeriod]int64{PeriodDaily: 100}, map[BudgetPeriod]int64{PeriodDaily: 50}, nil},
+		{"at-limit-is-exceeded", map[BudgetPeriod]int64{PeriodDaily: 100}, map[BudgetPeriod]int64{PeriodDaily: 100}, []BudgetPeriod{PeriodDaily}},
+		{"over-limit", map[BudgetPeriod]int64{PeriodHourly: 10}, map[BudgetPeriod]int64{PeriodHourly: 11}, []BudgetPeriod{PeriodHourly}},
+		{"only-hourly-over", map[BudgetPeriod]int64{PeriodHourly: 10, PeriodMonthly: 1000},
+			map[BudgetPeriod]int64{PeriodHourly: 50, PeriodMonthly: 200}, []BudgetPeriod{PeriodHourly}},
+		{"multiple-over-in-order", map[BudgetPeriod]int64{PeriodHourly: 10, PeriodWeekly: 100},
+			map[BudgetPeriod]int64{PeriodHourly: 99, PeriodWeekly: 100}, []BudgetPeriod{PeriodHourly, PeriodWeekly}},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := exceededPeriods(tc.limits, tc.spend)
+			if !reflect.DeepEqual(got, tc.want) {
+				t.Errorf("exceededPeriods(%v,%v) = %v, want %v", tc.limits, tc.spend, got, tc.want)
+			}
+		})
+	}
+}
+
+// TestBudgetPeriods_AllReachable guards the SSOT list: every declared period has
+// a positive window and a unique name (a typo'd duplicate would silently break
+// per-period accounting).
+func TestBudgetPeriods_Wellformed(t *testing.T) {
+	seen := map[BudgetPeriod]bool{}
+	for _, d := range budgetPeriods {
+		if d.Window <= 0 {
+			t.Errorf("period %s has non-positive window %v", d.Name, d.Window)
+		}
+		if seen[d.Name] {
+			t.Errorf("duplicate period name %s", d.Name)
+		}
+		seen[d.Name] = true
+	}
+	for _, p := range []BudgetPeriod{PeriodHourly, PeriodDaily, PeriodWeekly, PeriodMonthly} {
+		if !seen[p] {
+			t.Errorf("period %s missing from budgetPeriods SSOT list", p)
+		}
+	}
+}
@@ -12,15 +12,25 @@ import (
 	"github.com/gin-gonic/gin"
 )

+// Multi-period budget (#49): GET/PATCH now read workspaces.budget_limits (jsonb)
+// and compute per-period spend from the workspace_spend_events ledger
+// (spendByPeriod — matched here by the "FROM workspace_spend_events" fragment).
+// The legacy budget_limit/monthly_spend response fields are still emitted
+// (monthly period) for rollout back-compat, and the legacy {"budget_limit":N}
+// PATCH shape still works.
+
+// spendRows builds the 4-column row spendByPeriod scans (hourly,daily,weekly,monthly).
+func spendRows(h, d, w, m int64) *sqlmock.Rows {
+	return sqlmock.NewRows([]string{"h", "d", "w", "mo"}).AddRow(h, d, w, m)
+}
+
 // ==================== GET /workspaces/:id/budget ====================

-// TestBudgetGet_NotFound verifies that GET /budget returns 404 for an unknown
-// workspace ID (ErrNoRows from the budget query).
 func TestBudgetGet_NotFound(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs("ws-not-there").
 		WillReturnError(sql.ErrNoRows)

@@ -29,8 +39,7 @@ func TestBudgetGet_NotFound(t *testing.T) {
 	c.Params = gin.Params{{Key: "id", Value: "ws-not-there"}}
 	c.Request = httptest.NewRequest("GET", "/workspaces/ws-not-there/budget", nil)

-	h := NewBudgetHandler()
-	h.GetBudget(c)
+	NewBudgetHandler().GetBudget(c)

 	if w.Code != http.StatusNotFound {
 		t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
@@ -40,12 +49,11 @@ func TestBudgetGet_NotFound(t *testing.T) {
 	}
 }

-// TestBudgetGet_DBError verifies that a non-ErrNoRows DB error returns 500.
 func TestBudgetGet_DBError(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs("ws-db-err").
 		WillReturnError(sql.ErrConnDone)

@@ -54,8 +62,7 @@ func TestBudgetGet_DBError(t *testing.T) {
 	c.Params = gin.Params{{Key: "id", Value: "ws-db-err"}}
 	c.Request = httptest.NewRequest("GET", "/workspaces/ws-db-err/budget", nil)

-	h := NewBudgetHandler()
-	h.GetBudget(c)
+	NewBudgetHandler().GetBudget(c)

 	if w.Code != http.StatusInternalServerError {
 		t.Errorf("expected 500, got %d: %s", w.Code, w.Body.String())
@@ -65,24 +72,23 @@ func TestBudgetGet_DBError(t *testing.T) {
 	}
 }

-// TestBudgetGet_NoLimit verifies that budget_limit and budget_remaining are
-// null when the workspace has no budget ceiling configured.
 func TestBudgetGet_NoLimit(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs("ws-free").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(nil, int64(42)))
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{}`)))
+	mock.ExpectQuery(`FROM workspace_spend_events`).
+		WithArgs("ws-free").
+		WillReturnRows(spendRows(0, 0, 0, 42))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
 	c.Params = gin.Params{{Key: "id", Value: "ws-free"}}
 	c.Request = httptest.NewRequest("GET", "/workspaces/ws-free/budget", nil)

-	h := NewBudgetHandler()
-	h.GetBudget(c)
+	NewBudgetHandler().GetBudget(c)

 	if w.Code != http.StatusOK {
 		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -105,24 +111,23 @@ func TestBudgetGet_NoLimit(t *testing.T) {
 	}
 }

-// TestBudgetGet_WithLimit verifies that budget_limit, monthly_spend, and
-// budget_remaining are all returned correctly when a ceiling is set.
 func TestBudgetGet_WithLimit(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs("ws-capped").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(int64(500), int64(123)))
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":500}`)))
+	mock.ExpectQuery(`FROM workspace_spend_events`).
+		WithArgs("ws-capped").
+		WillReturnRows(spendRows(0, 0, 0, 123))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
 	c.Params = gin.Params{{Key: "id", Value: "ws-capped"}}
 	c.Request = httptest.NewRequest("GET", "/workspaces/ws-capped/budget", nil)

-	h := NewBudgetHandler()
-	h.GetBudget(c)
+	NewBudgetHandler().GetBudget(c)

 	if w.Code != http.StatusOK {
 		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -137,7 +142,6 @@ func TestBudgetGet_WithLimit(t *testing.T) {
 	if resp["monthly_spend"] != float64(123) {
 		t.Errorf("expected monthly_spend=123, got %v", resp["monthly_spend"])
 	}
-	// budget_remaining = 500 - 123 = 377
 	if resp["budget_remaining"] != float64(377) {
 		t.Errorf("expected budget_remaining=377, got %v", resp["budget_remaining"])
 	}
@@ -146,24 +150,23 @@ func TestBudgetGet_WithLimit(t *testing.T) {
 	}
 }

-// TestBudgetGet_OverBudget verifies that budget_remaining can be negative
-// when monthly_spend has already exceeded budget_limit.
 func TestBudgetGet_OverBudget(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs("ws-over").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(int64(100), int64(150)))
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":100}`)))
+	mock.ExpectQuery(`FROM workspace_spend_events`).
+		WithArgs("ws-over").
+		WillReturnRows(spendRows(0, 0, 0, 150))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
 	c.Params = gin.Params{{Key: "id", Value: "ws-over"}}
 	c.Request = httptest.NewRequest("GET", "/workspaces/ws-over/budget", nil)

-	h := NewBudgetHandler()
-	h.GetBudget(c)
+	NewBudgetHandler().GetBudget(c)

 	if w.Code != http.StatusOK {
 		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -172,7 +175,6 @@ func TestBudgetGet_OverBudget(t *testing.T) {
 	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
 		t.Fatalf("parse response: %v", err)
 	}
-	// budget_remaining = 100 - 150 = -50 (negative, but we store actual value)
 	if resp["budget_remaining"] != float64(-50) {
 		t.Errorf("expected budget_remaining=-50, got %v", resp["budget_remaining"])
 	}
@@ -181,10 +183,59 @@ func TestBudgetGet_OverBudget(t *testing.T) {
 	}
 }

+// TestBudgetGet_MultiPeriod pins the new per-period shape: each period reports
+// its own limit/spend/remaining, and an over-budget sub-period is visible.
+func TestBudgetGet_MultiPeriod(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
+		WithArgs("ws-mp").
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).
+			AddRow([]byte(`{"hourly":100,"daily":1000}`)))
+	mock.ExpectQuery(`FROM workspace_spend_events`).
+		WithArgs("ws-mp").
+		WillReturnRows(spendRows(120, 300, 300, 300)) // hourly over (120>=100)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-mp"}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/ws-mp/budget", nil)
+
+	NewBudgetHandler().GetBudget(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp struct {
+		Periods map[string]struct {
+			Limit     *int64 `json:"limit"`
+			Spend     int64  `json:"spend"`
+			Remaining *int64 `json:"remaining"`
+		} `json:"periods"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("parse response: %v", err)
+	}
+	if resp.Periods["hourly"].Limit == nil || *resp.Periods["hourly"].Limit != 100 {
+		t.Errorf("hourly.limit: want 100, got %v", resp.Periods["hourly"].Limit)
+	}
+	if resp.Periods["hourly"].Spend != 120 {
+		t.Errorf("hourly.spend: want 120, got %d", resp.Periods["hourly"].Spend)
+	}
+	if r := resp.Periods["hourly"].Remaining; r == nil || *r != -20 {
+		t.Errorf("hourly.remaining: want -20, got %v", r)
+	}
+	if resp.Periods["weekly"].Limit != nil {
+		t.Errorf("weekly.limit: want null (unset), got %v", resp.Periods["weekly"].Limit)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("sqlmock expectations not met: %v", err)
+	}
+}
+
 // ==================== PATCH /workspaces/:id/budget ====================

-// TestBudgetPatch_MissingField verifies that PATCH /budget with no budget_limit
-// field in the body returns 400.
 func TestBudgetPatch_MissingField(t *testing.T) {
 	setupTestDB(t)
 	setupTestRedis(t)
@@ -196,15 +247,13 @@ func TestBudgetPatch_MissingField(t *testing.T) {
 		bytes.NewBufferString(`{"other_field":123}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusBadRequest {
 		t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
 	}
 }

-// TestBudgetPatch_InvalidBody verifies that a malformed JSON body returns 400.
 func TestBudgetPatch_InvalidBody(t *testing.T) {
 	setupTestDB(t)
 	setupTestRedis(t)
@@ -216,15 +265,13 @@ func TestBudgetPatch_InvalidBody(t *testing.T) {
 		bytes.NewBufferString(`not json`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusBadRequest {
 		t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
 	}
 }

-// TestBudgetPatch_NegativeValue verifies that a negative budget_limit is rejected.
 func TestBudgetPatch_NegativeValue(t *testing.T) {
 	setupTestDB(t)
 	setupTestRedis(t)
@@ -236,15 +283,13 @@ func TestBudgetPatch_NegativeValue(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":-1}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusBadRequest {
 		t.Errorf("expected 400 for negative budget_limit, got %d: %s", w.Code, w.Body.String())
 	}
 }

-// TestBudgetPatch_InvalidType verifies that a non-numeric budget_limit returns 400.
 func TestBudgetPatch_InvalidType(t *testing.T) {
 	setupTestDB(t)
 	setupTestRedis(t)
@@ -256,16 +301,32 @@ func TestBudgetPatch_InvalidType(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":"not-a-number"}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusBadRequest {
 		t.Errorf("expected 400 for string budget_limit, got %d: %s", w.Code, w.Body.String())
 	}
 }

-// TestBudgetPatch_WorkspaceNotFound verifies that PATCH /budget returns 404
-// when the workspace doesn't exist.
+// TestBudgetPatch_UnknownPeriod rejects an unsupported period key.
+func TestBudgetPatch_UnknownPeriod(t *testing.T) {
+	setupTestDB(t)
+	setupTestRedis(t)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-badperiod"}}
+	c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-badperiod/budget",
+		bytes.NewBufferString(`{"budget_limits":{"yearly":100}}`))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	NewBudgetHandler().PatchBudget(c)
+
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("expected 400 for unknown period, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
 func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
@@ -281,8 +342,7 @@ func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":500}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusNotFound {
 		t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
@@ -292,25 +352,20 @@ func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
 	}
 }

-// TestBudgetPatch_SetLimit verifies that PATCH /budget with a positive value
-// updates the DB and returns the new budget state.
+// TestBudgetPatch_SetLimit (legacy monthly shape) updates + returns new state.
 func TestBudgetPatch_SetLimit(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	// Existence probe
 	mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
 		WithArgs("ws-set-limit").
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	// UPDATE
-	mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
-		WithArgs("ws-set-limit", int64(500)).
+	mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
+		WithArgs("ws-set-limit", sqlmock.AnyArg(), int64(500)).
 		WillReturnResult(sqlmock.NewResult(0, 1))
-	// Re-read for response
-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
+	mock.ExpectQuery(`FROM workspace_spend_events`).
 		WithArgs("ws-set-limit").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(int64(500), int64(200)))
+		WillReturnRows(spendRows(0, 0, 0, 200))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -319,8 +374,7 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":500}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusOK {
 		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -335,7 +389,6 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
 	if resp["monthly_spend"] != float64(200) {
 		t.Errorf("expected monthly_spend=200, got %v", resp["monthly_spend"])
 	}
-	// budget_remaining = 500 - 200 = 300
 	if resp["budget_remaining"] != float64(300) {
 		t.Errorf("expected budget_remaining=300, got %v", resp["budget_remaining"])
 	}
@@ -344,8 +397,59 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
 	}
 }

-// TestBudgetPatch_ClearLimit verifies that PATCH /budget with budget_limit=null
-// clears the ceiling, making budget_limit and budget_remaining null in the response.
+// TestBudgetPatch_SetMultiPeriod sets several periods at once and verifies the
+// per-period response.
+func TestBudgetPatch_SetMultiPeriod(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
+		WithArgs("ws-mp-set").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
+	// no monthly in payload → legacy budget_limit column set to NULL
+	mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
+		WithArgs("ws-mp-set", sqlmock.AnyArg(), nil).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectQuery(`FROM workspace_spend_events`).
+		WithArgs("ws-mp-set").
+		WillReturnRows(spendRows(10, 20, 30, 40))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-mp-set"}}
+	c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-mp-set/budget",
+		bytes.NewBufferString(`{"budget_limits":{"hourly":100,"daily":200,"monthly":null}}`))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	NewBudgetHandler().PatchBudget(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp struct {
+		Periods map[string]struct {
+			Limit *int64 `json:"limit"`
+			Spend int64  `json:"spend"`
+		} `json:"periods"`
+		BudgetLimit *int64 `json:"budget_limit"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("parse response: %v", err)
+	}
+	if resp.Periods["hourly"].Limit == nil || *resp.Periods["hourly"].Limit != 100 {
+		t.Errorf("hourly.limit want 100, got %v", resp.Periods["hourly"].Limit)
+	}
+	if resp.Periods["daily"].Limit == nil || *resp.Periods["daily"].Limit != 200 {
+		t.Errorf("daily.limit want 200, got %v", resp.Periods["daily"].Limit)
+	}
+	if resp.BudgetLimit != nil {
+		t.Errorf("monthly cleared → budget_limit should be null, got %v", *resp.BudgetLimit)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("sqlmock expectations not met: %v", err)
+	}
+}
+
 func TestBudgetPatch_ClearLimit(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
@@ -353,15 +457,12 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
 	mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
 		WithArgs("ws-clear-limit").
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	// UPDATE with NULL
-	mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
-		WithArgs("ws-clear-limit", nil).
+	mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
+		WithArgs("ws-clear-limit", sqlmock.AnyArg(), nil).
 		WillReturnResult(sqlmock.NewResult(0, 1))
-	// Re-read — budget_limit is now NULL
-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
+	mock.ExpectQuery(`FROM workspace_spend_events`).
 		WithArgs("ws-clear-limit").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(nil, int64(50)))
+		WillReturnRows(spendRows(0, 0, 0, 50))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -370,8 +471,7 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":null}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusOK {
 		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -391,8 +491,6 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
 	}
 }

-// TestBudgetPatch_UpdateDBError verifies that a DB error during the UPDATE
-// returns 500.
 func TestBudgetPatch_UpdateDBError(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
@@ -400,8 +498,8 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
 	mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
 		WithArgs("ws-patch-dberr").
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
-		WithArgs("ws-patch-dberr", int64(500)).
+	mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
+		WithArgs("ws-patch-dberr", sqlmock.AnyArg(), int64(500)).
 		WillReturnError(sql.ErrConnDone)

 	w := httptest.NewRecorder()
@@ -411,8 +509,7 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":500}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusInternalServerError {
 		t.Errorf("expected 500 on UPDATE error, got %d: %s", w.Code, w.Body.String())
@@ -422,8 +519,8 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
 	}
 }

-// TestBudgetPatch_ZeroLimit verifies that budget_limit=0 is accepted (it means
-// every A2A call is blocked — useful to pause a workspace's LLM spend entirely).
+// TestBudgetPatch_ZeroLimit verifies budget_limit=0 is accepted + stored (0 =
+// block-all: every period call is blocked — pauses the workspace's spend).
 func TestBudgetPatch_ZeroLimit(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
@@ -431,13 +528,12 @@ func TestBudgetPatch_ZeroLimit(t *testing.T) {
 	mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
 		WithArgs("ws-zero-limit").
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
-		WithArgs("ws-zero-limit", int64(0)).
+	mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
+		WithArgs("ws-zero-limit", sqlmock.AnyArg(), int64(0)).
 		WillReturnResult(sqlmock.NewResult(0, 1))
-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
+	mock.ExpectQuery(`FROM workspace_spend_events`).
 		WithArgs("ws-zero-limit").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(int64(0), int64(0)))
+		WillReturnRows(spendRows(0, 0, 0, 0))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -446,11 +542,17 @@ func TestBudgetPatch_ZeroLimit(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":0}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusOK {
-		t.Errorf("expected 200 for zero budget_limit, got %d: %s", w.Code, w.Body.String())
+		t.Fatalf("expected 200 for zero budget_limit, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("parse response: %v", err)
+	}
+	if resp["budget_limit"] != float64(0) {
+		t.Errorf("expected budget_limit=0 (block-all), got %v", resp["budget_limit"])
 	}
 	if err := mock.ExpectationsWereMet(); err != nil {
 		t.Errorf("sqlmock expectations not met: %v", err)
@@ -26,6 +26,10 @@ type ChannelHandler struct {
 	manager *channels.Manager
 }

+// channelSlugRe matches valid agent slugs used in [slug] routing.
+// Compiled once at init to avoid recompilation on every webhook call.
+var channelSlugRe = regexp.MustCompile(`^[a-zA-Z0-9 _-]+$`)
+
 // NewChannelHandler creates a channel handler with the given manager.
 func NewChannelHandler(manager *channels.Manager) *ChannelHandler {
 	return &ChannelHandler{manager: manager}
@@ -67,7 +71,9 @@ func (h *ChannelHandler) List(c *gin.Context) {
 		}

 		var config map[string]interface{}
-		json.Unmarshal(configJSON, &config)
+		if err := json.Unmarshal(configJSON, &config); err != nil {
+			log.Printf("Channels: unmarshal config for channel %s: %v", id, err)
+		}
 		// #319: decrypt sensitive fields first so the mask operates on
 		// plaintext (first-4 / last-4 of the real token, not the ciphertext
 		// prefix). Decrypt errors are logged but non-fatal — List must keep
@@ -86,7 +92,9 @@ func (h *ChannelHandler) List(c *gin.Context) {
 		}

 		var allowed []string
-		json.Unmarshal(allowedJSON, &allowed)
+		if err := json.Unmarshal(allowedJSON, &allowed); err != nil {
+			log.Printf("Channels: unmarshal allowed_users for channel %s: %v", id, err)
+		}

 		entry := map[string]interface{}{
 			"id":            id,
@@ -161,8 +169,18 @@ func (h *ChannelHandler) Create(c *gin.Context) {
 		return
 	}

-	configJSON, _ := json.Marshal(body.Config)
-	allowedJSON, _ := json.Marshal(body.AllowedUsers)
+	configJSON, marshalErr := json.Marshal(body.Config)
+	if marshalErr != nil {
+		log.Printf("Channels create %s: json.Marshal config failed: %v", workspaceID, marshalErr)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "marshal config failed"})
+		return
+	}
+	allowedJSON, marshalErr := json.Marshal(body.AllowedUsers)
+	if marshalErr != nil {
+		log.Printf("Channels create %s: json.Marshal allowed_users failed: %v", workspaceID, marshalErr)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "marshal allowed_users failed"})
+		return
+	}
 	enabled := true
 	if body.Enabled != nil {
 		enabled = *body.Enabled
@@ -217,11 +235,21 @@ func (h *ChannelHandler) Update(c *gin.Context) {
 			c.JSON(http.StatusInternalServerError, gin.H{"error": "encrypt failed"})
 			return
 		}
-		j, _ := json.Marshal(body.Config)
+		j, marshalErr := json.Marshal(body.Config)
+		if marshalErr != nil {
+			log.Printf("Channels update %s: json.Marshal config failed: %v", workspaceID, marshalErr)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "marshal config failed"})
+			return
+		}
 		configArg = string(j)
 	}
 	if body.AllowedUsers != nil {
-		j, _ := json.Marshal(body.AllowedUsers)
+		j, marshalErr := json.Marshal(body.AllowedUsers)
+		if marshalErr != nil {
+			log.Printf("Channels update %s: json.Marshal allowed_users failed: %v", workspaceID, marshalErr)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "marshal allowed_users failed"})
+			return
+		}
 		allowedArg = string(j)
 	}

@@ -238,7 +266,13 @@ func (h *ChannelHandler) Update(c *gin.Context) {
 		return
 	}

-	if n, _ := result.RowsAffected(); n == 0 {
+	n, err := result.RowsAffected()
+	if err != nil {
+		log.Printf("Channel update RowsAffected error channel=%s workspace=%s: %v", channelID, workspaceID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
+		return
+	}
+	if n == 0 {
 		c.JSON(http.StatusNotFound, gin.H{"error": "channel not found"})
 		return
 	}
@@ -263,7 +297,13 @@ func (h *ChannelHandler) Delete(c *gin.Context) {
 		return
 	}

-	if n, _ := result.RowsAffected(); n == 0 {
+	n, err := result.RowsAffected()
+	if err != nil {
+		log.Printf("Channel delete RowsAffected error channel=%s workspace=%s: %v", channelID, workspaceID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "delete failed"})
+		return
+	}
+	if n == 0 {
 		c.JSON(http.StatusNotFound, gin.H{"error": "channel not found"})
 		return
 	}
@@ -464,11 +504,10 @@ func (h *ChannelHandler) Webhook(c *gin.Context) {
 	// in a shared channel and route to a specific agent.
 	targetSlug := ""
 	routedText := msg.Text
-	validSlugRe := regexp.MustCompile(`^[a-zA-Z0-9 _-]+$`)
 	if len(msg.Text) > 2 && msg.Text[0] == '[' {
 		if idx := strings.Index(msg.Text, "]"); idx > 1 && idx < 40 {
 			candidate := strings.ToLower(strings.TrimSpace(msg.Text[1:idx]))
-			if validSlugRe.MatchString(candidate) {
+			if channelSlugRe.MatchString(candidate) {
 				targetSlug = candidate
 				routedText = strings.TrimSpace(msg.Text[idx+1:])
 				if routedText == "" {
@@ -499,8 +538,12 @@ func (h *ChannelHandler) Webhook(c *gin.Context) {
 		if err := rows.Scan(&row.ID, &row.WorkspaceID, &row.ChannelType, &configJSON, &row.Enabled, &allowedJSON); err != nil {
 			continue
 		}
-		json.Unmarshal(configJSON, &row.Config)
-		json.Unmarshal(allowedJSON, &row.AllowedUsers)
+		if err := json.Unmarshal(configJSON, &row.Config); err != nil {
+			log.Printf("Channels: unmarshal config for webhook row %s: %v", row.ID, err)
+		}
+		if err := json.Unmarshal(allowedJSON, &row.AllowedUsers); err != nil {
+			log.Printf("Channels: unmarshal allowed_users for webhook row %s: %v", row.ID, err)
+		}
 		if err := channels.DecryptSensitiveFields(row.Config); err != nil {
 			log.Printf("Channels: decrypt webhook row %s: %v", row.ID, err)
 			continue
@@ -229,7 +229,12 @@ func (h *CheckpointsHandler) Delete(c *gin.Context) {
 		return
 	}

-	n, _ := result.RowsAffected()
+	n, err := result.RowsAffected()
+	if err != nil {
+		log.Printf("Delete checkpoints RowsAffected error workspace=%s wf=%s: %v", workspaceID, workflowID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete checkpoints"})
+		return
+	}
 	if n == 0 {
 		c.JSON(http.StatusNotFound, gin.H{"error": "no checkpoints found for workflow"})
 		return
@@ -0,0 +1,427 @@
+package handlers
+
+// cross_tenant_isolation_test.go — #1953 regression tests.
+//
+// Three workspace-server paths historically derived an "org-root sibling set"
+// as `WHERE parent_id IS NULL`, which matches EVERY tenant's org root (the
+// workspaces table has no org_id column) → cross-tenant data exposure:
+//
+//  1. GET /registry/:id/peers   (discovery.Peers)
+//  2. MCP toolListPeers          (mcp_tools.toolListPeers)
+//  3. a2a routing                (a2a_proxy.proxyA2ARequest → resolveAgentURL)
+//
+// These tests assert that a workspace in a DIFFERENT org is never returned as a
+// peer and that a2a refuses to resolve/route to a workspace outside the caller's
+// org, while same-org peers/targets still work. They reuse the SAME parent_id-
+// chain org scoping the OFFSEC-015 broadcast fix introduced (org_scope.go).
+
+import (
+	"bytes"
+	"context"
+	"database/sql"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+	"time"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/gin-gonic/gin"
+)
+
+// dbHandleForTest returns the global sqlmock-backed *sql.DB that setupTestDB
+// installs, for tests that need to hand a *sql.DB to a component (e.g.
+// MCPHandler.database, sameOrg) rather than relying on the package-global.
+func dbHandleForTest() *sql.DB { return db.DB }
+
+// peerColsForIsolation matches queryPeerMaps' SELECT column set.
+var peerColsForIsolation = []string{
+	"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks",
+}
+
+// -------------------------------------------------------------------------
+// Path 1: GET /registry/:id/peers — discovery.Peers
+// -------------------------------------------------------------------------
+
+// TestPeers_CrossTenant_OrgRootNotLeaked is the core #1953 regression for the
+// discovery path. The caller is an org root (parent_id IS NULL). Pre-fix the
+// handler ran `SELECT ... WHERE w.parent_id IS NULL AND w.id != $1`, returning
+// every OTHER tenant's org root as a "sibling" peer. Post-fix an org-root caller
+// issues NO sibling query — its only peers are its own children. If the handler
+// regressed and issued the cross-tenant sibling query, sqlmock would report an
+// unexpected query (the expectation below is intentionally NOT registered) and
+// the test fails.
+func TestPeers_CrossTenant_OrgRootNotLeaked(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	handler := NewDiscoveryHandler()
+
+	// Behavioural leak test: register the OLD leaky `parent_id IS NULL` sibling
+	// query so that IF the handler still issues it, it returns another tenant's
+	// org root (org-b-root). The fix removes that query for an org-root caller,
+	// so org-b-root must never appear in the output. Unordered matching makes
+	// the leaky-sibling expectation optional — the fix simply never consumes it.
+	mock.MatchExpectationsInOrder(false)
+
+	caller := "org-a-root" // parent_id IS NULL — an org root for tenant A
+
+	// parent_id lookup → NULL (caller is an org root)
+	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
+
+	// LEAKY sibling query (pre-fix). Returns a DIFFERENT tenant's org root.
+	// The fix must NOT issue this query; if it does, org-b-root leaks into the
+	// peer list and the output assertion below fails.
+	mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id IS NULL AND w.id != \\$1").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
+			AddRow("org-b-root", "Org B Root", "lead", 0, "online", []byte("null"), "http://b-root", nil, 0))
+
+	// Children query — caller's own org-A children only. Return one child.
+	mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
+		WithArgs(caller, caller).
+		WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
+			AddRow("org-a-child", "Org A Child", "worker", 1, "online", []byte("null"), "http://a-child", caller, 0))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: caller}}
+	c.Request = httptest.NewRequest("GET", "/registry/"+caller+"/peers", nil)
+
+	handler.Peers(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+
+	var peers []map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &peers); err != nil {
+		t.Fatalf("failed to parse response: %v", err)
+	}
+
+	// The other-tenant org root must NEVER appear; only the same-org child.
+	for _, p := range peers {
+		if id, _ := p["id"].(string); id == "org-b-root" {
+			t.Fatalf("cross-tenant leak (#1953): org-b-root appeared in org-a-root's peer list: %v", peers)
+		}
+	}
+	if len(peers) != 1 {
+		t.Fatalf("expected exactly 1 peer (same-org child), got %d: %v", len(peers), peers)
+	}
+	// NOTE: ExpectationsWereMet is intentionally NOT asserted — the leaky
+	// sibling expectation is deliberately left unconsumed by the fixed path.
+}
+
+// TestPeers_SameOrg_SiblingsStillWork is the positive companion: a non-root
+// child caller still sees its same-org siblings, children, and parent. This
+// guards against the fix over-scoping and breaking legitimate intra-org
+// discovery.
+func TestPeers_SameOrg_SiblingsStillWork(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	handler := NewDiscoveryHandler()
+
+	caller := "org-a-child-1"
+	parent := "org-a-root"
+
+	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(parent))
+
+	// Siblings — scoped to the shared parent (one tenant).
+	mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
+		WithArgs(parent, caller).
+		WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
+			AddRow("org-a-child-2", "Org A Sibling", "worker", 1, "online", []byte("null"), "http://a-sib", parent, 0))
+
+	// Children — none.
+	mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
+		WithArgs(caller, caller).
+		WillReturnRows(sqlmock.NewRows(peerColsForIsolation))
+
+	// Parent.
+	mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.id = \\$1 AND w.id != \\$2 AND w.status").
+		WithArgs(parent, caller).
+		WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
+			AddRow(parent, "Org A Root", "lead", 0, "online", []byte("null"), "http://a-root", nil, 0))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: caller}}
+	c.Request = httptest.NewRequest("GET", "/registry/"+caller+"/peers", nil)
+
+	handler.Peers(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var peers []map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &peers); err != nil {
+		t.Fatalf("failed to parse response: %v", err)
+	}
+	// Sibling + parent = 2 same-org peers.
+	if len(peers) != 2 {
+		t.Fatalf("expected 2 same-org peers (sibling + parent), got %d: %v", len(peers), peers)
+	}
+	names := map[string]bool{}
+	for _, p := range peers {
+		names[fmt.Sprint(p["name"])] = true
+	}
+	if !names["Org A Sibling"] || !names["Org A Root"] {
+		t.Errorf("expected same-org sibling + parent in peer list, got %v", names)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// -------------------------------------------------------------------------
+// Path 2: MCP toolListPeers — mcp_tools.toolListPeers
+// -------------------------------------------------------------------------
+
+// mcpPeerCols matches toolListPeers' SELECT column set.
+var mcpPeerCols = []string{"id", "name", "role", "status", "tier"}
+
+// TestToolListPeers_CrossTenant_OrgRootNotLeaked is the #1953 regression for
+// the MCP path. Same shape as the discovery test: an org-root caller must NOT
+// enumerate other tenants' org roots. The cross-tenant `parent_id IS NULL`
+// sibling query is intentionally not registered, so if it runs sqlmock fails.
+func TestToolListPeers_CrossTenant_OrgRootNotLeaked(t *testing.T) {
+	mock := setupTestDB(t)
+	mock.MatchExpectationsInOrder(false)
+	h := &MCPHandler{database: dbHandleForTest()}
+
+	caller := "org-a-root"
+
+	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
+
+	// LEAKY sibling query (pre-fix). Returns another tenant's org root. The fix
+	// must NOT issue this for an org-root caller; if it does, org-b-root leaks
+	// into the output and the assertion below fails. Left optional via
+	// unordered matching, so the fixed path simply never consumes it.
+	mock.ExpectQuery("WHERE w.parent_id IS NULL AND w.id != \\$1").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows(mcpPeerCols).
+			AddRow("org-b-root", "Org B Root", "lead", "online", 0))
+
+	// Children — caller's own org-A children only.
+	mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.status").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows(mcpPeerCols).
+			AddRow("org-a-child", "Org A Child", "worker", "online", 1))
+
+	out, err := h.toolListPeers(context.Background(), caller)
+	if err != nil {
+		t.Fatalf("toolListPeers returned error: %v", err)
+	}
+	if strings.Contains(out, "org-b-root") || strings.Contains(out, "Org B Root") {
+		t.Fatalf("cross-tenant leak (#1953): another tenant's org root appeared in toolListPeers output:\n%s", out)
+	}
+	if !strings.Contains(out, "org-a-child") {
+		t.Errorf("same-org child missing from toolListPeers output:\n%s", out)
+	}
+	// ExpectationsWereMet intentionally NOT asserted — leaky sibling expectation
+	// is deliberately left unconsumed by the fixed path.
+}
+
+// TestToolListPeers_SameOrg_SiblingsStillWork — positive companion for the MCP
+// path: a non-root child still enumerates its same-org siblings + children + parent.
+func TestToolListPeers_SameOrg_SiblingsStillWork(t *testing.T) {
+	mock := setupTestDB(t)
+	h := &MCPHandler{database: dbHandleForTest()}
+
+	caller := "org-a-child-1"
+	parent := "org-a-root"
+
+	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(parent))
+
+	// Siblings — scoped to shared parent.
+	mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
+		WithArgs(parent, caller).
+		WillReturnRows(sqlmock.NewRows(mcpPeerCols).
+			AddRow("org-a-child-2", "Org A Sibling", "worker", "online", 1))
+
+	// Children — none.
+	mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.status").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows(mcpPeerCols))
+
+	// Parent.
+	mock.ExpectQuery("WHERE w.id = \\$1 AND w.status").
+		WithArgs(parent).
+		WillReturnRows(sqlmock.NewRows(mcpPeerCols).
+			AddRow(parent, "Org A Root", "lead", "online", 0))
+
+	out, err := h.toolListPeers(context.Background(), caller)
+	if err != nil {
+		t.Fatalf("toolListPeers returned error: %v", err)
+	}
+	if !strings.Contains(out, "Org A Sibling") || !strings.Contains(out, "Org A Root") {
+		t.Errorf("expected same-org sibling + parent in toolListPeers output:\n%s", out)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// -------------------------------------------------------------------------
+// Path 3: a2a routing — a2a_proxy.proxyA2ARequest / resolveAgentURL
+// -------------------------------------------------------------------------
+
+// TestProxyA2A_CrossTenant_RoutingDenied is the #1953 regression for a2a
+// routing. Caller and target are both org roots (parent_id IS NULL) belonging
+// to DIFFERENT tenants. Pre-fix, CanCommunicate's "root-level siblings" rule
+// waved this through and resolveAgentURL routed to the foreign tenant. Post-fix
+// the org-scope guard resolves each to a different org root and returns 403
+// BEFORE resolveAgentURL/dispatch.
+func TestProxyA2A_CrossTenant_RoutingDenied(t *testing.T) {
+	mock := setupTestDB(t)
+	mr := setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+
+	caller := "org-a-root"
+	target := "org-b-root" // different tenant
+
+	// A URL exists for the target; the guard must deny BEFORE it is used.
+	mr.Set(fmt.Sprintf("ws:%s:url", target), "http://localhost:1")
+
+	// CanCommunicate: both root-level (parent_id NULL) → its weak "root-level
+	// siblings" rule ALLOWS this. The org guard must catch it afterward.
+	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(caller, nil))
+	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
+		WithArgs(target).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(target, nil))
+
+	// #1953 org-scope guard: caller resolves to org-a-root, target to org-b-root
+	// → different orgs → 403. (Each org root resolves to itself.)
+	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(caller))
+	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
+		WithArgs(target).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(target))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: target}}
+	body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"cross-tenant"}]}}}`
+	c.Request = httptest.NewRequest("POST", "/workspaces/"+target+"/a2a", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+	c.Request.Header.Set("X-Workspace-ID", caller)
+
+	handler.ProxyA2A(c)
+
+	if w.Code != http.StatusForbidden {
+		t.Fatalf("expected 403 for cross-tenant a2a routing, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("body not JSON: %v", err)
+	}
+	if msg, _ := resp["error"].(string); !strings.Contains(msg, "different org") {
+		t.Errorf("expected cross-org denial message, got %v", resp["error"])
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestResolveAgentURL_CrossTenant_RejectedViaSameOrg is a direct unit test of
+// the sameOrg primitive that gates resolveAgentURL: a target in a different org
+// must be reported as NOT same-org, so the a2a guard rejects it before
+// resolveAgentURL is ever called.
+func TestResolveAgentURL_CrossTenant_RejectedViaSameOrg(t *testing.T) {
+	mock := setupTestDB(t)
+
+	caller := "org-a-root"
+	target := "org-b-root"
+
+	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(caller))
+	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
+		WithArgs(target).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(target))
+
+	ok, err := sameOrg(context.Background(), dbHandleForTest(), caller, target)
+	if err != nil {
+		t.Fatalf("sameOrg returned unexpected error: %v", err)
+	}
+	if ok {
+		t.Errorf("expected cross-tenant workspaces to be reported as DIFFERENT orgs, got sameOrg=true")
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestProxyA2A_SameOrg_RoutingAllowed — positive companion for a2a: two
+// same-org siblings route successfully (mirrors TestProxyA2A_CallerIDPropagated
+// but named to document the #1953 same-org allow path).
+func TestProxyA2A_SameOrg_RoutingAllowed(t *testing.T) {
+	mock := setupTestDB(t)
+	mr := setupTestRedis(t)
+	allowLoopbackForTest(t)
+	broadcaster := newTestBroadcaster()
+	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+	waitForHandlerAsyncBeforeDBCleanup(t, handler)
+
+	caller := "org-a-child-1"
+	target := "org-a-child-2"
+	parent := "org-a-root"
+
+	agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		fmt.Fprint(w, `{"jsonrpc":"2.0","id":"1","result":{}}`)
+	}))
+	defer agentServer.Close()
+	mr.Set(fmt.Sprintf("ws:%s:url", target), agentServer.URL)
+
+	// CanCommunicate — siblings under shared parent.
+	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(caller, parent))
+	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
+		WithArgs(target).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(target, parent))
+
+	// #1953 org guard — both resolve to the same org root → allowed.
+	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
+		WithArgs(caller).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(parent))
+	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
+		WithArgs(target).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(parent))
+
+	expectBudgetCheck(mock, target)
+	mock.ExpectExec("INSERT INTO activity_logs").WillReturnResult(sqlmock.NewResult(0, 1))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: target}}
+	body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"same-org"}]}}}`
+	c.Request = httptest.NewRequest("POST", "/workspaces/"+target+"/a2a", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+	c.Request.Header.Set("X-Workspace-ID", caller)
+
+	handler.ProxyA2A(c)
+	time.Sleep(50 * time.Millisecond) // allow the async logA2ASuccess INSERT to flush
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200 for same-org a2a routing, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
@@ -57,10 +57,18 @@ func pushDelegationResultToInbox(ctx context.Context, sourceID, delegationID, st
 		"text":          responsePreview,
 		"delegation_id": delegationID,
 	}
-	respJSON, _ := json.Marshal(respPayload)
-	reqJSON, _ := json.Marshal(map[string]interface{}{
+	respJSON, marshalErr := json.Marshal(respPayload)
+	if marshalErr != nil {
+		log.Printf("Delegation %s: json.Marshal respPayload failed: %v", delegationID, marshalErr)
+		return
+	}
+	reqJSON, marshalErr := json.Marshal(map[string]interface{}{
 		"delegation_id": delegationID,
 	})
+	if marshalErr != nil {
+		log.Printf("Delegation %s: json.Marshal reqPayload failed: %v", delegationID, marshalErr)
+		return
+	}
 	logStatus := "ok"
 	if status == "failed" {
 		logStatus = "error"
@@ -165,7 +173,7 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
 	// check_task_status returned status='queued' forever even after a
 	// real reply landed). messageId mirrors delegation_id so the
 	// platform's idempotency-key extraction also keys off the same id.
-	a2aBody, _ := json.Marshal(map[string]interface{}{
+	a2aBody, marshalErr := json.Marshal(map[string]interface{}{
 		"method": "message/send",
 		"params": map[string]interface{}{
 			"message": map[string]interface{}{
@@ -176,6 +184,9 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
 			},
 		},
 	})
+	if marshalErr != nil {
+		log.Printf("Delegation %s: json.Marshal a2aBody failed: %v", delegationID, marshalErr)
+	}

 	// Fire-and-forget: send A2A in a background goroutine.
 	//
@@ -261,10 +272,12 @@ func lookupIdempotentDelegation(ctx context.Context, c *gin.Context, sourceID, i
 		return false
 	}
 	if existingStatus == "failed" {
-		_, _ = db.DB.ExecContext(ctx, `
+		if _, err := db.DB.ExecContext(ctx, `
 			DELETE FROM activity_logs
 			 WHERE workspace_id = $1 AND idempotency_key = $2 AND status = 'failed'
-		`, sourceID, idempotencyKey)
+		`, sourceID, idempotencyKey); err != nil {
+			log.Printf("delegation: failed to clean up failed idempotency row for %s/%s: %v", sourceID, idempotencyKey, err)
+		}
 		return false
 	}
 	c.JSON(http.StatusOK, gin.H{
@@ -302,16 +315,24 @@ const (
 // insertDelegationRow stores the pending delegation row. See
 // insertDelegationOutcome for the three possible return values.
 func insertDelegationRow(ctx context.Context, c *gin.Context, sourceID string, body delegateRequest, delegationID string) insertDelegationOutcome {
-	taskJSON, _ := json.Marshal(map[string]interface{}{
+	taskJSON, marshalErr := json.Marshal(map[string]interface{}{
 		"task":          body.Task,
 		"delegation_id": delegationID,
 	})
+	if marshalErr != nil {
+		log.Printf("Delegation %s: json.Marshal taskJSON failed: %v", delegationID, marshalErr)
+		return insertTrackingUnavailable
+	}
 	// Store delegation_id in response_body so agent check_delegation_status
 	// (which reads response_body->>delegation_id) can locate this row even
 	// when request_body hasn't propagated yet. Fixes mc#984.
-	respJSON, _ := json.Marshal(map[string]interface{}{
+	respJSON, marshalErr := json.Marshal(map[string]interface{}{
 		"delegation_id": delegationID,
 	})
+	if marshalErr != nil {
+		log.Printf("Delegation %s: json.Marshal respJSON failed: %v", delegationID, marshalErr)
+		return insertTrackingUnavailable
+	}
 	var idemArg interface{}
 	if body.IdempotencyKey != "" {
 		idemArg = body.IdempotencyKey
@@ -414,10 +435,12 @@ func (h *DelegationHandler) executeDelegation(ctx context.Context, sourceID, tar
 	if proxyErr != nil && isTransientProxyError(proxyErr) && len(respBody) == 0 {
 		log.Printf("Delegation %s: first attempt failed (%s) — retrying in %s after reactive URL refresh",
 			delegationID, proxyErr.Error(), delegationRetryDelay)
+		timer := time.NewTimer(delegationRetryDelay)
 		select {
 		case <-ctx.Done():
+			timer.Stop()
 			// outer timeout hit before retry window elapsed
-		case <-time.After(delegationRetryDelay):
+		case <-timer.C:
 			status, respBody, proxyErr = h.workspace.proxyA2ARequest(ctx, targetID, a2aBody, sourceID, true, false)
 		}
 	}
@@ -482,15 +505,19 @@ handleSuccess:
 		// dispatch eventually succeeds. Without the key, the drain finds
 		// the row by (workspace_id, target_id, method) but can't tell
 		// multiple-queued-delegations-to-same-target apart.
-		queuedJSON, _ := json.Marshal(map[string]interface{}{
+		queuedJSON, marshalErr := json.Marshal(map[string]interface{}{
 			"delegation_id": delegationID,
 			"queued":        true,
 		})
-		if _, err := db.DB.ExecContext(ctx, `
-			INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, target_id, summary, response_body, status)
-			VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4, $5::jsonb, 'queued')
-		`, sourceID, sourceID, targetID, "Delegation queued — target at capacity", string(queuedJSON)); err != nil {
-			log.Printf("Delegation %s: failed to insert queued log: %v", delegationID, err)
+		if marshalErr != nil {
+			log.Printf("Delegation %s: json.Marshal queuedJSON failed: %v", delegationID, marshalErr)
+		} else {
+			if _, err := db.DB.ExecContext(ctx, `
+				INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, target_id, summary, response_body, status)
+				VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4, $5::jsonb, 'queued')
+			`, sourceID, sourceID, targetID, "Delegation queued — target at capacity", string(queuedJSON)); err != nil {
+				log.Printf("Delegation %s: failed to insert queued log: %v", delegationID, err)
+			}
 		}
 		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationStatus), sourceID, map[string]interface{}{
 			"delegation_id": delegationID, "target_id": targetID, "status": "queued",
@@ -505,15 +532,19 @@ handleSuccess:

 	log.Printf("Delegation %s: step=inserting_success_log", delegationID)
 	// Store success (response_body must be JSONB, include delegation_id)
-	respJSON, _ := json.Marshal(map[string]interface{}{
+	respJSON, marshalErr := json.Marshal(map[string]interface{}{
 		"text":          responseText,
 		"delegation_id": delegationID,
 	})
-	if _, err := db.DB.ExecContext(ctx, `
-		INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, target_id, summary, response_body, status)
-		VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4, $5::jsonb, 'completed')
-	`, sourceID, sourceID, targetID, "Delegation completed ("+textutil.TruncateBytes(responseText, 80)+")", string(respJSON)); err != nil {
-		log.Printf("Delegation %s: failed to insert success log: %v", delegationID, err)
+	if marshalErr != nil {
+		log.Printf("Delegation %s: json.Marshal respJSON failed: %v", delegationID, marshalErr)
+	} else {
+		if _, err := db.DB.ExecContext(ctx, `
+			INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, target_id, summary, response_body, status)
+			VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4, $5::jsonb, 'completed')
+		`, sourceID, sourceID, targetID, "Delegation completed ("+textutil.TruncateBytes(responseText, 80)+")", string(respJSON)); err != nil {
+			log.Printf("Delegation %s: failed to insert success log: %v", delegationID, err)
+		}
 	}
 	log.Printf("Delegation %s: step=recording_ledger_completed", delegationID)

@@ -590,15 +621,25 @@ func (h *DelegationHandler) Record(c *gin.Context) {
 		return
 	}

-	taskJSON, _ := json.Marshal(map[string]interface{}{
+	taskJSON, marshalErr := json.Marshal(map[string]interface{}{
 		"task":          body.Task,
 		"delegation_id": body.DelegationID,
 	})
+	if marshalErr != nil {
+		log.Printf("Delegation %s: json.Marshal taskJSON failed: %v", body.DelegationID, marshalErr)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to marshal task"})
+		return
+	}
 	// Store delegation_id in response_body so agent check_delegation_status
 	// can locate this row. Fixes mc#984.
-	respJSON, _ := json.Marshal(map[string]interface{}{
+	respJSON, marshalErr := json.Marshal(map[string]interface{}{
 		"delegation_id": body.DelegationID,
 	})
+	if marshalErr != nil {
+		log.Printf("Delegation %s: json.Marshal respJSON failed: %v", body.DelegationID, marshalErr)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to marshal response"})
+		return
+	}
 	if _, err := db.DB.ExecContext(ctx, `
 		INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, target_id, summary, request_body, response_body, status)
 		VALUES ($1, 'delegation', 'delegate', $2, $3, $4, $5::jsonb, $6::jsonb, 'dispatched')
@@ -662,15 +703,19 @@ func (h *DelegationHandler) UpdateStatus(c *gin.Context) {
 	h.updateDelegationStatus(ctx, sourceID, delegationID, body.Status, body.Error)

 	if body.Status == "completed" {
-		respJSON, _ := json.Marshal(map[string]interface{}{
+		respJSON, marshalErr := json.Marshal(map[string]interface{}{
 			"text":          body.ResponsePreview,
 			"delegation_id": delegationID,
 		})
-		if _, err := db.DB.ExecContext(ctx, `
-			INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, summary, response_body, status)
-			VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4::jsonb, 'completed')
-		`, sourceID, sourceID, "Delegation completed ("+textutil.TruncateBytes(body.ResponsePreview, 80)+")", string(respJSON)); err != nil {
-			log.Printf("Delegation UpdateStatus: result insert failed for %s: %v", delegationID, err)
+		if marshalErr != nil {
+			log.Printf("Delegation UpdateStatus %s: json.Marshal respJSON failed: %v", delegationID, marshalErr)
+		} else {
+			if _, err := db.DB.ExecContext(ctx, `
+				INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, summary, response_body, status)
+				VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4::jsonb, 'completed')
+			`, sourceID, sourceID, "Delegation completed ("+textutil.TruncateBytes(body.ResponsePreview, 80)+")", string(respJSON)); err != nil {
+				log.Printf("Delegation UpdateStatus: result insert failed for %s: %v", delegationID, err)
+			}
 		}
 		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{
 			"delegation_id":    delegationID,
@@ -140,7 +140,14 @@ func buildHTTPResponse(statusCode int, body string) []byte {
 }

 // setupIntegrationFixtures inserts the rows executeDelegation requires:
-//   - workspaces: source and target (siblings, parent_id=NULL so CanCommunicate=true)
+//   - workspaces: source (org root) + target as its CHILD, so both live in the
+//     SAME org. CanCommunicate=true (parent↔child) AND the #1953 sameOrg() guard
+//     in proxyA2ARequest passes (both resolve to the same org root). A real
+//     delegation happens INSIDE one org. (Previously both were parent_id=NULL —
+//     two DISTINCT org roots — which only "communicated" via CanCommunicate's
+//     root-sibling rule; #1953 added a sameOrg() guard that now denies routing
+//     between two org roots as cross-tenant, so the success-path tests below
+//     must use a same-org source/target pair.)
 //   - activity_logs: the 'delegate' row that updateDelegationStatus UPDATE will find
 //   - delegations: the ledger row that recordLedgerStatus will UPDATE
 //
@@ -148,13 +155,14 @@ func buildHTTPResponse(statusCode int, body string) []byte {
 func setupIntegrationFixtures(t *testing.T, conn *sql.DB) func() {
 	t.Helper()
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	sourceID := integrationTestSourceID // org root (parent_id NULL); target hangs off it
 	for _, ws := range []struct {
 		id       string
 		name     string
 		parentID *string
 	}{
 		{integrationTestSourceID, "test-source", nil},
-		{integrationTestTargetID, "test-target", nil},
+		{integrationTestTargetID, "test-target", &sourceID}, // child of source → same org
 	} {
 		if _, err := conn.ExecContext(ctx,
 			`INSERT INTO workspaces (id, name, parent_id) VALUES ($1::uuid, $2, $3) ON CONFLICT (id) DO NOTHING`,
@@ -510,6 +518,94 @@ func TestIntegration_ExecuteDelegation_RedisDown_FallsBackToDB(t *testing.T) {
 	}
 }

+// TestIntegration_SameOrg_RealCTE_ResolvesAncestorChain is the regression gate
+// for the org_scope.go recursive-CTE bug (#1953 follow-up). The sqlmock unit
+// tests feed sameOrg() a pre-computed root_id row, so they CANNOT catch a wrong
+// CTE — they assume it already returns the right value. Only a real Postgres
+// run exercises orgRootSubtreeCTE itself.
+//
+// The bug: the CTE carried `id AS root_id` from the recursive SEED, so a
+// non-root workspace resolved to ITSELF instead of its topmost ancestor. That
+// made sameOrg() return false for two genuinely same-org workspaces and 403 a
+// legitimate same-org a2a route (over-block). This test seeds a real
+// root → child → grandchild chain plus a separate org root, and asserts:
+//   - every node in the chain resolves to the SAME org root (root, child, grandchild)
+//   - two workspaces in the same chain are sameOrg (incl. grandchild ↔ root)
+//   - a workspace in a DIFFERENT chain is NOT sameOrg (cross-tenant stays closed)
+func TestIntegration_SameOrg_RealCTE_ResolvesAncestorChain(t *testing.T) {
+	conn := integrationDB(t)
+
+	const (
+		rootA       = "11111111-1111-1111-1111-111111111111"
+		childA      = "22222222-2222-2222-2222-222222222222"
+		grandchildA = "33333333-3333-3333-3333-333333333333"
+		rootB       = "44444444-4444-4444-4444-444444444444"
+	)
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	t.Cleanup(func() {
+		c2, cancel2 := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel2()
+		// Delete leaf-first to respect the parent_id self-FK.
+		for _, id := range []string{grandchildA, childA, rootA, rootB} {
+			conn.ExecContext(c2, `DELETE FROM workspaces WHERE id = $1`, id)
+		}
+	})
+
+	// Insert parent-before-child to satisfy the self-referential FK.
+	seed := []struct {
+		id, name string
+		parent   *string
+	}{
+		{rootA, "org-a-root", nil},
+		{childA, "org-a-child", strPtr(rootA)},
+		{grandchildA, "org-a-grandchild", strPtr(childA)},
+		{rootB, "org-b-root", nil},
+	}
+	for _, s := range seed {
+		if _, err := conn.ExecContext(ctx,
+			`INSERT INTO workspaces (id, name, parent_id) VALUES ($1::uuid, $2, $3) ON CONFLICT (id) DO NOTHING`,
+			s.id, s.name, s.parent); err != nil {
+			t.Fatalf("seed %s: %v", s.name, err)
+		}
+	}
+
+	// Every node in chain A must resolve to rootA via the REAL CTE.
+	for _, id := range []string{rootA, childA, grandchildA} {
+		got, err := orgRootID(ctx, conn, id)
+		if err != nil {
+			t.Fatalf("orgRootID(%s): %v", id, err)
+		}
+		if got != rootA {
+			t.Errorf("orgRootID(%s) = %q, want rootA %q (CTE must walk to topmost ancestor)", id, got, rootA)
+		}
+	}
+
+	// Same-org positives — including the grandchild↔root pair that the buggy
+	// CTE got wrong.
+	for _, pair := range [][2]string{{childA, grandchildA}, {rootA, grandchildA}, {rootA, childA}} {
+		ok, err := sameOrg(ctx, conn, pair[0], pair[1])
+		if err != nil {
+			t.Fatalf("sameOrg(%s,%s): %v", pair[0], pair[1], err)
+		}
+		if !ok {
+			t.Errorf("sameOrg(%s,%s) = false, want true (same org chain)", pair[0], pair[1])
+		}
+	}
+
+	// Cross-org negative — isolation must stay closed.
+	for _, pair := range [][2]string{{rootA, rootB}, {grandchildA, rootB}, {childA, rootB}} {
+		ok, err := sameOrg(ctx, conn, pair[0], pair[1])
+		if err != nil {
+			t.Fatalf("sameOrg(%s,%s): %v", pair[0], pair[1], err)
+		}
+		if ok {
+			t.Errorf("sameOrg(%s,%s) = true, want false (different orgs — cross-tenant must stay denied)", pair[0], pair[1])
+		}
+	}
+}
+
 // extractHostPort parses "http://127.0.0.1:PORT/" and returns "127.0.0.1:PORT".
 func extractHostPort(rawURL string) string {
 	// Simple parse: strip "http://" prefix and trailing slash.
@@ -1059,13 +1059,25 @@ func expectExecuteDelegationBase(mock sqlmock.Sqlmock) {
 		WillReturnResult(sqlmock.NewResult(0, 1))

 	// CanCommunicate: getWorkspaceRef(source) + getWorkspaceRef(target).
-	// Both are root-level workspaces (parent_id=NULL) → root-level siblings → allowed.
+	// Source and target are siblings under one shared parent (one tenant) →
+	// CanCommunicate allowed. (#1953: they must NOT both be parent_id=NULL —
+	// two distinct org roots are now treated as DIFFERENT orgs and routing
+	// between them is denied. A real delegation happens inside one org.)
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
 		WithArgs(testDeliverySourceID).
-		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliverySourceID, nil))
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliverySourceID, "ws-org-root-159"))
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
 		WithArgs(testDeliveryTargetID).
-		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliveryTargetID, nil))
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliveryTargetID, "ws-org-root-159"))
+
+	// #1953 cross-tenant guard: same-org check after CanCommunicate. Both
+	// resolve to the same org root → routing allowed.
+	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
+		WithArgs(testDeliverySourceID).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow("ws-org-root-159"))
+	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
+		WithArgs(testDeliveryTargetID).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow("ws-org-root-159"))

 	// resolveAgentURL: test callers always set the URL in Redis (mr.Set ws:{id}:url),
 	// so resolveAgentURL gets a cache hit and never falls back to DB.
@@ -1,464 +0,0 @@
-package handlers
-
-// derive_provider_drift_test.go — behavior-based AST/text drift gate.
-//
-// Why this exists: PR #2535 introduced a Go port of derive-provider.sh
-// (see deriveProviderFromModelSlug in workspace_provision.go) so the
-// workspace-server can persist LLM_PROVIDER into workspace_secrets at
-// provision time. That created two sources of truth:
-//
-//   1. molecule-ai-workspace-template-hermes/scripts/derive-provider.sh —
-//      runs inside the container at boot, has the final say on which
-//      provider hermes targets (writes ~/.hermes/config.yaml's
-//      model.provider field). The shell script lives in a separate
-//      OSS repo, so we vendor a snapshot at testdata/derive-provider.sh
-//      to keep this gate hermetic.
-//   2. workspace-server/internal/handlers/workspace_provision.go's
-//      deriveProviderFromModelSlug — runs at provision time on the
-//      platform side so LLM_PROVIDER lands in workspace_secrets and
-//      survives Save+Restart.
-//
-// If a future PR adds a new provider prefix to one but not the other,
-// the workspace-server's persisted LLM_PROVIDER silently disagrees
-// with what the container's derive-provider.sh produces. The container
-// wins (it writes the actual config.yaml), so the workspace-server's
-// persisted value becomes stale and misleading without anything
-// flipping red in CI.
-//
-// This gate pins the invariant that the *prefix set* the two functions
-// know about is identical, modulo a small hardcoded acceptedDivergences
-// map for the two intentional differences documented in
-// deriveProviderFromModelSlug's doc comment (nousresearch/* and
-// openai/* both fall back to "openrouter" at provision time because
-// the runtime env that picks "nous" / "custom" isn't available yet).
-//
-// Pattern: the "behavior-based AST gate" from PR #2367 / memory
-// feedback_behavior_based_ast_gates — pin invariants by what a
-// function maps, not by what it's named. Walks the actual Go AST of
-// deriveProviderFromModelSlug's switch statement so a rename or a
-// duplicate function in another file can't sneak past the gate.
-//
-// Task: #242. Companion to the table-driven mapping test in
-// workspace_provision_shared_test.go (TestDeriveProviderFromModelSlug)
-// which pins the *values*; this test pins the *coverage* of the
-// prefix set itself.
-//
-// Hermetic: reads two files (vendored shell script + Go source) from
-// paths relative to the test package directory and parses them
-// in-process. No network, no docker, no DB. The vendored shell script
-// at testdata/derive-provider.sh is a snapshot of the upstream OSS
-// template repo's script — refresh it via the cp command in that file's
-// header when upstream changes.
-
-import (
-	"go/ast"
-	"go/parser"
-	"go/token"
-	"os"
-	"regexp"
-	"sort"
-	"strconv"
-	"strings"
-	"testing"
-)
-
-// acceptedDivergences pins the prefixes where the Go port intentionally
-// differs from derive-provider.sh. Each entry's value is the provider
-// the Go function returns; the shell would (at runtime, with the right
-// env keys present) return something else. Documented in
-// deriveProviderFromModelSlug's doc comment in workspace_provision.go.
-//
-// If a NEW divergence appears, this test fails and the engineer must
-// either (a) align the Go function with the shell, or (b) add the
-// prefix here with a comment explaining why the divergence is
-// intentional and safe at provision time.
-var acceptedDivergences = map[string]string{
-	// Shell: "nous" if HERMES_API_KEY/NOUS_API_KEY set, else "openrouter".
-	// Go:    "openrouter" unconditionally — runtime keys aren't loaded at
-	//        provision time. derive-provider.sh upgrades to "nous" at boot
-	//        when the keys are present.
-	"nousresearch": "openrouter",
-	// Shell: "custom" if OPENAI_API_KEY set, "openrouter" if OPENROUTER_API_KEY
-	//        set, else "openrouter" as a no-key fallback.
-	// Go:    "openrouter" unconditionally — same reason as nousresearch/*.
-	//        derive-provider.sh upgrades to "custom" at boot when
-	//        OPENAI_API_KEY is present.
-	"openai": "openrouter",
-}
-
-// TestDeriveProviderDrift_ShellAndGoStayInSync is the drift gate.
-// It extracts the prefix→provider mapping from both sources and
-// asserts:
-//
-//  1. Every prefix the shell knows about, the Go function also handles
-//     (returning either the same provider OR the value pinned in
-//     acceptedDivergences for that prefix).
-//  2. Every prefix the Go function handles (extracted from its switch
-//     statement via go/ast), the shell case statement also lists.
-func TestDeriveProviderDrift_ShellAndGoStayInSync(t *testing.T) {
-	t.Parallel()
-
-	shellMap := loadShellPrefixMap(t)
-	goMap := loadGoPrefixMap(t)
-
-	if len(shellMap) == 0 {
-		t.Fatalf("parsed zero prefixes from derive-provider.sh — regex likely broke; rebuild parser before trusting this gate")
-	}
-	if len(goMap) == 0 {
-		t.Fatalf("parsed zero prefixes from deriveProviderFromModelSlug — AST walk likely broke; rebuild parser before trusting this gate")
-	}
-
-	// Direction 1: every shell prefix must be in the Go map (with the
-	// same provider value, or with the documented divergence).
-	for prefix, shellProvider := range shellMap {
-		goProvider, ok := goMap[prefix]
-		if !ok {
-			t.Errorf(
-				"DRIFT: derive-provider.sh has prefix %q -> %q but deriveProviderFromModelSlug doesn't handle it.\n"+
-					"Fix: either add a case for %q to deriveProviderFromModelSlug in "+
-					"workspace-server/internal/handlers/workspace_provision.go (returning %q to match the shell), "+
-					"OR if this prefix is intentionally provision-time-divergent, add it to acceptedDivergences{} "+
-					"in this test with a comment explaining why.",
-				prefix, shellProvider, prefix, shellProvider,
-			)
-			continue
-		}
-		if goProvider == shellProvider {
-			continue
-		}
-		// Mismatch — only acceptable if it's on the explicit divergence list
-		// AND the Go side returns exactly the documented value.
-		expected, divergenceAllowed := acceptedDivergences[prefix]
-		if !divergenceAllowed {
-			t.Errorf(
-				"DRIFT: prefix %q maps to %q in derive-provider.sh but %q in deriveProviderFromModelSlug.\n"+
-					"Fix: align the Go function with the shell (preferred — they should agree), "+
-					"OR if the divergence is intentional and safe at provision time, "+
-					"add %q: %q to acceptedDivergences{} in this test with a comment explaining why.",
-				prefix, shellProvider, goProvider, prefix, goProvider,
-			)
-			continue
-		}
-		if goProvider != expected {
-			t.Errorf(
-				"DRIFT: prefix %q is on the acceptedDivergences list with expected Go value %q but "+
-					"deriveProviderFromModelSlug now returns %q.\n"+
-					"Fix: update acceptedDivergences[%q] in this test to %q (and update its comment), "+
-					"OR revert the Go function to return %q.",
-				prefix, expected, goProvider, prefix, goProvider, expected,
-			)
-		}
-	}
-
-	// Direction 2: every Go prefix must be in the shell map. Drift in
-	// this direction is rarer (someone added a Go case without touching
-	// the shell) but produces the same broken state — provision-time
-	// LLM_PROVIDER disagrees with what the container actually uses.
-	for prefix, goProvider := range goMap {
-		if _, ok := shellMap[prefix]; ok {
-			continue
-		}
-		t.Errorf(
-			"DRIFT: deriveProviderFromModelSlug handles prefix %q -> %q but derive-provider.sh doesn't list it.\n"+
-				"Fix: add a `%s/*) PROVIDER=%q ;;` case to "+
-				"workspace-configs-templates/hermes/scripts/derive-provider.sh — the Go provision-time hint "+
-				"is meaningless if the container's runtime script doesn't recognize the same prefix.",
-			prefix, goProvider, prefix, goProvider,
-		)
-	}
-
-	// Belt-and-braces: every entry in acceptedDivergences must actually
-	// appear in BOTH maps. A stale divergence entry (prefix removed from
-	// either source) silently weakens the gate.
-	for prefix := range acceptedDivergences {
-		if _, ok := shellMap[prefix]; !ok {
-			t.Errorf(
-				"acceptedDivergences contains prefix %q but derive-provider.sh no longer lists it. "+
-					"Remove the entry from acceptedDivergences{} in this test.",
-				prefix,
-			)
-		}
-		if _, ok := goMap[prefix]; !ok {
-			t.Errorf(
-				"acceptedDivergences contains prefix %q but deriveProviderFromModelSlug no longer lists it. "+
-					"Remove the entry from acceptedDivergences{} in this test.",
-				prefix,
-			)
-		}
-	}
-}
-
-// vendoredShellPath is the testdata snapshot of upstream
-// derive-provider.sh. The path is relative to the test package
-// directory (which is what `go test` sets as cwd). See the file's
-// header for the refresh procedure when upstream changes.
-const vendoredShellPath = "testdata/derive-provider.sh"
-
-// goSourcePath is the file containing deriveProviderFromModelSlug.
-// Relative to the test package directory.
-const goSourcePath = "workspace_provision.go"
-
-// loadShellPrefixMap parses derive-provider.sh and returns a
-// map[prefix]provider for every case clause. Aliases inside a single
-// `pat1/*|pat2/*)` clause expand to one map entry per alias, both
-// pointing at the same provider.
-//
-// Stops at the first `*)` (the catch-all) and ignores it — the
-// catch-all maps to PROVIDER="auto" which has no Go counterpart by
-// design (deriveProviderFromModelSlug returns "" for unknowns and
-// lets the shell's *=auto branch decide at runtime).
-//
-// Ambiguity: case clauses whose body branches on env vars (openai/*,
-// nousresearch/*) are still extracted as the FIRST PROVIDER= literal
-// inside the body. The shell's full conditional logic is documented
-// via the acceptedDivergences map in this file rather than re-encoded
-// in the parser, because re-encoding sh `if` semantics in regex is a
-// fool's errand — the divergences are stable and small enough to
-// hardcode.
-func loadShellPrefixMap(t *testing.T) map[string]string {
-	t.Helper()
-	raw, err := os.ReadFile(vendoredShellPath)
-	if err != nil {
-		t.Fatalf("read %s: %v (refresh from upstream — see file header)", vendoredShellPath, err)
-	}
-
-	// Locate the case statement body so we don't accidentally match
-	// PROVIDER= assignments above the case (the HERMES_INFERENCE_PROVIDER
-	// override + the empty-model fallback both write PROVIDER= before
-	// the case). Upstream renamed the case variable to ${_HERMES_MODEL}
-	// in v0.12.0 (the resolved value of HERMES_INFERENCE_MODEL with a
-	// HERMES_DEFAULT_MODEL legacy fallback); accept either spelling so
-	// this test survives a future rename.
-	caseStart := regexp.MustCompile(`(?m)^case\s+"\$\{(_?HERMES(?:_DEFAULT|_INFERENCE)?_MODEL)\}"\s+in\s*$`)
-	startLoc := caseStart.FindIndex(raw)
-	if startLoc == nil {
-		t.Fatalf("could not locate `case \"${...HERMES...MODEL}\" in` in %s — shell file shape changed; rebuild parser", vendoredShellPath)
-	}
-	caseEnd := regexp.MustCompile(`(?m)^esac\s*$`)
-	endLoc := caseEnd.FindIndex(raw[startLoc[1]:])
-	if endLoc == nil {
-		t.Fatalf("could not locate `esac` after the case statement in %s — shell file shape changed", vendoredShellPath)
-	}
-	body := string(raw[startLoc[1] : startLoc[1]+endLoc[0]])
-
-	out := map[string]string{}
-
-	// Pattern A: single-line clauses like
-	//   minimax-cn/*)            PROVIDER="minimax-cn" ;;
-	//   alibaba/*|dashscope/*|qwen/*) PROVIDER="alibaba" ;;
-	// Capture group 1 is the patterns (e.g. `minimax-cn/*` or
-	// `alibaba/*|dashscope/*|qwen/*`); group 2 is the provider literal.
-	singleLine := regexp.MustCompile(`(?m)^\s*([a-zA-Z0-9_./*|\-]+)\)\s*PROVIDER="([^"]+)"\s*;;`)
-
-	// Pattern B: multi-line clauses like
-	//   openai/*)
-	//     if [ -n "${OPENAI_API_KEY:-}" ]; then
-	//       PROVIDER="custom"
-	//     ...
-	// We capture the patterns and the FIRST PROVIDER= that follows
-	// (before the next `;;`). The acceptedDivergences map handles the
-	// fact that the runtime branching can pick a different value.
-	multiLine := regexp.MustCompile(`(?ms)^\s*([a-zA-Z0-9_./*|\-]+)\)\s*\n(.*?);;`)
-
-	addEntry := func(patterns, provider string) {
-		// Skip the `*)` catch-all — it has no Go counterpart by design.
-		if strings.TrimSpace(patterns) == "*" {
-			return
-		}
-		for _, alt := range strings.Split(patterns, "|") {
-			alt = strings.TrimSpace(alt)
-			// Each alternative is `<prefix>/*` — strip the trailing `/*`.
-			alt = strings.TrimSuffix(alt, "/*")
-			if alt == "" {
-				continue
-			}
-			// First write wins — a single-line match outranks a multi-line
-			// fallback for the same patterns block (defensive; the regexes
-			// shouldn't overlap on the same line in practice).
-			if _, exists := out[alt]; !exists {
-				out[alt] = provider
-			}
-		}
-	}
-
-	// Run single-line first so it claims its lines before the multi-line
-	// pass sees them.
-	consumed := map[int]bool{}
-	for _, m := range singleLine.FindAllStringSubmatchIndex(body, -1) {
-		addEntry(body[m[2]:m[3]], body[m[4]:m[5]])
-		// Mark every line touched so multi-line pass can skip it.
-		for i := m[0]; i < m[1]; i++ {
-			consumed[i] = true
-		}
-	}
-
-	for _, m := range multiLine.FindAllStringSubmatchIndex(body, -1) {
-		// Skip if the start of this match overlaps a single-line clause.
-		if consumed[m[0]] {
-			continue
-		}
-		patterns := body[m[2]:m[3]]
-		clauseBody := body[m[4]:m[5]]
-		// Extract the FIRST PROVIDER="..." from the clause body.
-		firstProvider := regexp.MustCompile(`PROVIDER="([^"]+)"`).FindStringSubmatch(clauseBody)
-		if firstProvider == nil {
-			t.Errorf("multi-line case clause for %q has no PROVIDER= literal — shell file shape changed; rebuild parser", patterns)
-			continue
-		}
-		addEntry(patterns, firstProvider[1])
-	}
-
-	return out
-}
-
-// loadGoPrefixMap parses workspace_provision.go and walks the AST to
-// extract the prefix→provider mapping from deriveProviderFromModelSlug's
-// switch statement.
-//
-// Each case clause's string-literal labels become map keys, all
-// pointing at the provider returned by that case body's `return "..."`
-// statement. A clause like `case "alibaba", "dashscope", "qwen":
-// return "alibaba"` produces three map entries.
-//
-// Skips the default clause (returns ""). Skips any case clause whose
-// body's first statement isn't a single `return STRING_LITERAL` — those
-// would need their own divergence handling and don't currently exist
-// in the function.
-func loadGoPrefixMap(t *testing.T) map[string]string {
-	t.Helper()
-
-	fset := token.NewFileSet()
-	file, err := parser.ParseFile(fset, goSourcePath, nil, parser.ParseComments)
-	if err != nil {
-		t.Fatalf("parse %s: %v", goSourcePath, err)
-	}
-
-	var fn *ast.FuncDecl
-	for _, decl := range file.Decls {
-		f, ok := decl.(*ast.FuncDecl)
-		if !ok {
-			continue
-		}
-		if f.Name.Name == "deriveProviderFromModelSlug" {
-			fn = f
-			break
-		}
-	}
-	if fn == nil {
-		t.Fatalf("could not find deriveProviderFromModelSlug in %s — function renamed/removed; this gate's invariant has been violated", goSourcePath)
-	}
-
-	// Walk the function body for the SwitchStmt.
-	var sw *ast.SwitchStmt
-	ast.Inspect(fn.Body, func(n ast.Node) bool {
-		if s, ok := n.(*ast.SwitchStmt); ok {
-			sw = s
-			return false
-		}
-		return true
-	})
-	if sw == nil {
-		t.Fatalf("no switch statement found in deriveProviderFromModelSlug — function shape changed; rebuild parser")
-	}
-
-	out := map[string]string{}
-	for _, stmt := range sw.Body.List {
-		clause, ok := stmt.(*ast.CaseClause)
-		if !ok {
-			continue
-		}
-		// Default clause has no list — skip.
-		if len(clause.List) == 0 {
-			continue
-		}
-		// Find the first return statement in the clause body.
-		var ret *ast.ReturnStmt
-		for _, bodyStmt := range clause.Body {
-			if r, ok := bodyStmt.(*ast.ReturnStmt); ok {
-				ret = r
-				break
-			}
-		}
-		if ret == nil || len(ret.Results) != 1 {
-			t.Errorf("case clause at %s has no single-value return — function shape changed; gate may be incomplete",
-				fset.Position(clause.Pos()))
-			continue
-		}
-		lit, ok := ret.Results[0].(*ast.BasicLit)
-		if !ok || lit.Kind != token.STRING {
-			t.Errorf("case clause at %s returns a non-literal — gate cannot extract provider value",
-				fset.Position(clause.Pos()))
-			continue
-		}
-		provider, err := strconv.Unquote(lit.Value)
-		if err != nil {
-			t.Errorf("case clause at %s has unparseable string literal %q: %v",
-				fset.Position(clause.Pos()), lit.Value, err)
-			continue
-		}
-
-		for _, expr := range clause.List {
-			lbl, ok := expr.(*ast.BasicLit)
-			if !ok || lbl.Kind != token.STRING {
-				t.Errorf("case clause at %s has a non-string-literal label — gate cannot extract prefix",
-					fset.Position(clause.Pos()))
-				continue
-			}
-			prefix, err := strconv.Unquote(lbl.Value)
-			if err != nil {
-				t.Errorf("case clause at %s has unparseable label literal %q: %v",
-					fset.Position(clause.Pos()), lbl.Value, err)
-				continue
-			}
-			out[prefix] = provider
-		}
-	}
-	return out
-}
-
-// TestDeriveProviderDrift_ShellParserIsSane is a guard test: the shell
-// parser is regex-based, so we sanity-check that it actually finds the
-// well-known prefixes documented in derive-provider.sh's header
-// comment. If this test passes but the main drift test reports
-// missing prefixes, the bug is almost certainly in the regex (not in
-// the production code).
-func TestDeriveProviderDrift_ShellParserIsSane(t *testing.T) {
-	t.Parallel()
-	shellMap := loadShellPrefixMap(t)
-
-	// Anchor prefixes — these have lived in derive-provider.sh since it
-	// was first introduced. If the parser can't find them, it's broken.
-	mustHave := map[string]string{
-		"anthropic":    "anthropic",
-		"minimax":      "minimax",
-		"minimax-cn":   "minimax-cn",
-		"openrouter":   "openrouter",
-		"custom":       "custom",
-		"alibaba":      "alibaba", // in an alias group with dashscope/qwen
-		"dashscope":    "alibaba", // ditto
-		"qwen":         "alibaba", // ditto
-		"openai":       "custom",  // multi-line; first PROVIDER= is "custom"
-		"nousresearch": "nous",    // multi-line; first PROVIDER= is "nous"
-	}
-
-	missing := []string{}
-	wrong := []string{}
-	for prefix, want := range mustHave {
-		got, ok := shellMap[prefix]
-		if !ok {
-			missing = append(missing, prefix)
-			continue
-		}
-		if got != want {
-			wrong = append(wrong, prefix+" got="+got+" want="+want)
-		}
-	}
-	sort.Strings(missing)
-	sort.Strings(wrong)
-	if len(missing) > 0 {
-		t.Errorf("shell parser failed to extract anchor prefixes: %v", missing)
-	}
-	if len(wrong) > 0 {
-		t.Errorf("shell parser extracted wrong values for anchor prefixes: %v", wrong)
-	}
-}
@@ -237,7 +237,17 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) {

 	var peers []map[string]interface{}

-	// Siblings
+	// Siblings — workspaces sharing the caller's parent.
+	//
+	// #1953 cross-tenant isolation: the OLD code's else-branch handled the
+	// org-root caller (parent_id IS NULL) by returning EVERY workspace with
+	// parent_id IS NULL — i.e. every other tenant's org root, since the
+	// workspaces table has no org_id column. That leaked peer identities/URLs
+	// across tenants. An org root has no siblings inside its own org (each
+	// tenant is a distinct org root), so the org-root caller now gets an empty
+	// sibling set; its real peers are its children, returned below. Only the
+	// parent_id-bound branch enumerates siblings, and that is already scoped to
+	// one parent (one tenant).
 	if parentID.Valid {
 		siblings, _ := queryPeerMaps(`
 			SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status,
@@ -246,14 +256,6 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) {
 			FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`,
 			parentID.String, workspaceID)
 		peers = append(peers, siblings...)
-	} else {
-		siblings, _ := queryPeerMaps(`
-			SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status,
-				   COALESCE(w.agent_card, 'null'::jsonb), COALESCE(w.url, ''),
-				   w.parent_id, w.active_tasks
-			FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`,
-			workspaceID)
-		peers = append(peers, siblings...)
 	}

 	// Children — exclude self defensively. A child row whose parent_id
@@ -223,10 +223,10 @@ func TestPeers_RootWorkspace_NoPeers(t *testing.T) {

 	peerCols := []string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}

-	// Siblings (other root-level workspaces) — none
-	mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id IS NULL AND w.id != \\$1").
-		WithArgs("ws-root-alone").
-		WillReturnRows(sqlmock.NewRows(peerCols))
+	// #1953: an org-root caller (parent_id IS NULL) now issues NO sibling
+	// query at all. The old `WHERE w.parent_id IS NULL` sibling read returned
+	// EVERY tenant's org root (cross-tenant leak); an org root has no siblings
+	// inside its own org, so the handler skips the sibling read entirely.

 	// Children — none. #383 added explicit `w.id != $2` self-filter.
 	mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
@@ -155,7 +155,10 @@ func generateAppInstallationToken() (string, time.Time, error) {
 	if err != nil {
 		return "", time.Time{}, fmt.Errorf("sign JWT: %w", err)
 	}
-	req, _ := http.NewRequest("POST", fmt.Sprintf("https://api.github.com/app/installations/%d/access_tokens", installID), nil)
+	req, err := http.NewRequest("POST", fmt.Sprintf("https://api.github.com/app/installations/%d/access_tokens", installID), nil)
+	if err != nil {
+		return "", time.Time{}, fmt.Errorf("build request: %w", err)
+	}
 	req.Header.Set("Authorization", "Bearer "+signed)
 	req.Header.Set("Accept", "application/vnd.github+json")
 	client := &http.Client{Timeout: 30 * time.Second}
@@ -164,6 +167,9 @@ func generateAppInstallationToken() (string, time.Time, error) {
 		return "", time.Time{}, err
 	}
 	defer func() { _ = resp.Body.Close() }()
+	if resp.StatusCode != http.StatusCreated {
+		return "", time.Time{}, fmt.Errorf("github token endpoint returned status %d", resp.StatusCode)
+	}
 	var result struct {
 		Token     string    `json:"token"`
 		ExpiresAt time.Time `json:"expires_at"`
@@ -255,9 +255,21 @@ func TestExtended_SecretsListEmpty(t *testing.T) {
 // ---------- TestSecretsSet (Extended) ----------

 func TestExtended_SecretsSet(t *testing.T) {
+	// internal#718 P2-B: the per-workspace strip gate keys off the DERIVED mode
+	// (org rung retired). This test's intent is the happy path of persisting a
+	// vendor key on a byok workspace; the realistic way a workspace is byok for
+	// a direct vendor-key write is an explicit operator override (the escape
+	// hatch the reject error itself points to: PUT /admin/.../llm-billing-mode).
+	// The override short-circuits the resolver to byok in a single read, so the
+	// bypass-list check is skipped and the write proceeds.
+	t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed") // org env ignored now
 	mock := setupTestDB(t)
 	handler := NewSecretsHandler(nil)

+	mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
+		WithArgs("22222222-2222-2222-2222-222222222222").
+		WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
+
 	// Expect INSERT (encrypted value is dynamic, use AnyArg)
 	mock.ExpectExec("INSERT INTO workspace_secrets").
 		WithArgs("22222222-2222-2222-2222-222222222222", "OPENAI_API_KEY", sqlmock.AnyArg(), sqlmock.AnyArg()).
@@ -293,6 +305,26 @@ func TestExtended_SecretsSet(t *testing.T) {
 	}
 }

+func TestExtended_SecretsSetRejectsHermesCustomProviderInPlatformManagedMode(t *testing.T) {
+	t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed")
+	_ = setupTestDB(t)
+	handler := NewSecretsHandler(nil)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "22222222-2222-2222-2222-222222222222"}}
+
+	body := `{"key":"KIMI_API_KEY","value":"sk-test-moonshot"}`
+	c.Request = httptest.NewRequest("POST", "/workspaces/22222222-2222-2222-2222-222222222222/secrets", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	handler.Set(c)
+
+	if w.Code != http.StatusBadRequest {
+		t.Fatalf("expected status 400, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
 // ---------- TestSecretsDelete (Extended) ----------

 func TestExtended_SecretsDelete(t *testing.T) {
@@ -419,6 +451,14 @@ func TestExtended_DiscoverMissingHeader(t *testing.T) {

 // ---------- TestPeers (Extended) ----------

+// TestExtended_Peers verifies a root-level (org-root) workspace's peer view.
+//
+// #1953: previously a root-level caller issued `WHERE w.parent_id IS NULL`
+// for siblings, which returned EVERY other tenant's org root as a "peer"
+// (cross-tenant leak, since the workspaces table has no org_id column). After
+// the fix an org root has no cross-tenant siblings; its only peers are its own
+// children. This test asserts the child is returned and that NO sibling query
+// is issued (no `parent_id IS NULL` read).
 func TestExtended_Peers(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
@@ -429,17 +469,14 @@ func TestExtended_Peers(t *testing.T) {
 		WithArgs("ws-peer").
 		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))

-	// Expect root-level siblings query (parent IS NULL, excluding self)
-	mock.ExpectQuery("SELECT w.id, w.name").
-		WithArgs("ws-peer").
-		WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}).
-			AddRow("ws-sibling", "Sibling Agent", "worker", 1, "online", []byte("null"), "http://localhost:9001", nil, 0))
+	// NO root-level sibling query is issued for an org-root caller anymore.

-	// Expect children query (workspaces with parent_id = ws-peer, excluding self)
-	// Query now binds (parent_id, self_id) for the self-filter guard added in #383.
+	// Children query (workspaces with parent_id = ws-peer, excluding self).
+	// Query binds (parent_id, self_id) for the self-filter guard added in #383.
 	mock.ExpectQuery("SELECT w.id, w.name").
 		WithArgs("ws-peer", "ws-peer").
-		WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}))
+		WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}).
+			AddRow("ws-child", "Child Agent", "worker", 1, "online", []byte("null"), "http://localhost:9001", "ws-peer", 0))

 	// No parent query since workspace is root-level

@@ -459,10 +496,10 @@ func TestExtended_Peers(t *testing.T) {
 		t.Fatalf("failed to parse response: %v", err)
 	}
 	if len(resp) != 1 {
-		t.Fatalf("expected 1 peer, got %d", len(resp))
+		t.Fatalf("expected 1 peer (the child), got %d", len(resp))
 	}
-	if resp[0]["name"] != "Sibling Agent" {
-		t.Errorf("expected peer name 'Sibling Agent', got %v", resp[0]["name"])
+	if resp[0]["name"] != "Child Agent" {
+		t.Errorf("expected peer name 'Child Agent', got %v", resp[0]["name"])
 	}

 	if err := mock.ExpectationsWereMet(); err != nil {
@@ -12,12 +12,12 @@ import (
 	"testing"
 	"time"

-	"github.com/DATA-DOG/go-sqlmock"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/ws"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/wsauth"
+	"github.com/DATA-DOG/go-sqlmock"
 	"github.com/alicebob/miniredis/v2"
 	"github.com/gin-gonic/gin"
 	"github.com/redis/go-redis/v9"
@@ -158,9 +158,11 @@ func allowLoopbackForTest(t *testing.T) {
 // handler in the 2026-04-18 restructure but the tests never caught up,
 // leaving Platform (Go) CI red for weeks.
 func expectBudgetCheck(mock sqlmock.Sqlmock, workspaceID string) {
-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id = \$1`).
+	// Multi-period (#49): checkWorkspaceBudget reads budget_limits jsonb. An
+	// empty map → no limits → returns early (no spend query), enforcement skipped.
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs(workspaceID).
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}))
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte("{}")))
 }

 // ---------- TestRegisterHandler ----------
@@ -169,7 +169,13 @@ func (h *InstructionsHandler) Update(c *gin.Context) {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
 		return
 	}
-	if n, _ := result.RowsAffected(); n == 0 {
+	n, err := result.RowsAffected()
+	if err != nil {
+		log.Printf("Instructions update RowsAffected error: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
+		return
+	}
+	if n == 0 {
 		c.JSON(http.StatusNotFound, gin.H{"error": "instruction not found"})
 		return
 	}
@@ -186,7 +192,13 @@ func (h *InstructionsHandler) Delete(c *gin.Context) {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "delete failed"})
 		return
 	}
-	if n, _ := result.RowsAffected(); n == 0 {
+	n, err := result.RowsAffected()
+	if err != nil {
+		log.Printf("Instructions delete RowsAffected error: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "delete failed"})
+		return
+	}
+	if n == 0 {
 		c.JSON(http.StatusNotFound, gin.H{"error": "instruction not found"})
 		return
 	}
@@ -0,0 +1,478 @@
+package handlers
+
+// llm_billing_mode.go — per-workspace LLM billing mode resolution (internal#691).
+//
+// The resolver answers a single question at provision time:
+//   "Should we strip CLAUDE_CODE_OAUTH_TOKEN + every vendor key from this
+//    workspace's env, force-route to the CP proxy, and bill org credits?"
+//
+// That question used to be a single env-var read inside applyPlatformManagedLLMEnv:
+//
+//   os.Getenv("MOLECULE_LLM_BILLING_MODE") == "platform_managed"  → strip
+//
+// where MOLECULE_LLM_BILLING_MODE was an ORG-level value, fetched from CP's
+// tenant_config and exported into the workspace-server process at boot. That
+// shape made it impossible to mix billing modes across workspaces in the same
+// org: turning the org dial to `byok` so one workspace could keep its OAuth
+// stops the strip for EVERY workspace in the org. Turning it to `platform_managed`
+// blocks every workspace's own OAuth/vendor keys.
+//
+// The resolver replaces the env-var read with a per-workspace lookup:
+//
+//   workspaces.llm_billing_mode (per-workspace override, NULLABLE)
+//     ?? organizations.llm_billing_mode (org default, fetched via tenant_config)
+//     ?? "platform_managed" (closed default — the existing implicit default)
+//
+// Default-closed contract — non-negotiable per the RFC Safety axis:
+//
+//   - workspace row missing (sql.ErrNoRows)         → fall through to org default
+//   - DB error on the lookup                         → "platform_managed" + propagated error
+//   - workspace override = NULL                      → fall through to org default
+//   - workspace override = unknown string            → "platform_managed" (default-closed)
+//   - org default = NULL / empty / unknown string    → "platform_managed" (closed default)
+//   - org default = recognized non-pm string + ws null → org default (byok/disabled honored)
+//
+// The ONLY way to resolve to "byok" or "disabled" is an explicit, recognized
+// string in the workspace override OR the org default. A NULL JOIN, transient
+// resolver error, or garbled enum value MUST NOT silently flip a workspace
+// off of platform_managed — that would shadow the org's billing policy and
+// is the exact failure mode the RFC's Safety hot-spot calls out.
+
+import (
+	"context"
+	"database/sql"
+	"errors"
+	"fmt"
+	"log"
+	"sync"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
+)
+
+// providerManifest is the parsed provider registry, loaded once. The registry
+// is embedded (go:embed, no network) and immutable for the process lifetime, so
+// a single Load is safe to memoize. A load failure is cached too (registryErr):
+// it can only happen on a malformed embedded YAML, which is a build-time defect
+// the verify-providers-gen + sync gates already catch, so failing closed
+// (treat as "cannot derive" → platform default) is correct and we don't retry.
+var (
+	providerRegistryOnce     sync.Once
+	providerRegistryManifest *providers.Manifest
+	providerRegistryErr      error
+)
+
+func providerRegistry() (*providers.Manifest, error) {
+	providerRegistryOnce.Do(func() {
+		providerRegistryManifest, providerRegistryErr = providers.LoadManifest()
+		if providerRegistryErr != nil {
+			log.Printf("llm_billing_mode: FATAL — provider registry failed to load: %v (billing will default-closed to platform_managed)", providerRegistryErr)
+		}
+	})
+	return providerRegistryManifest, providerRegistryErr
+}
+
+// Constants mirror molecule-controlplane/internal/credits/llm_billing.go.
+// Kept as string literals (not imports) because workspace-server has no
+// build-time dependency on the CP module; the values are stable wire
+// strings used in the tenant_config response, the workspaces.llm_billing_mode
+// column check constraint, and the CP route bodies.
+const (
+	LLMBillingModePlatformManaged = "platform_managed"
+	LLMBillingModeBYOK            = "byok"
+	LLMBillingModeDisabled        = "disabled"
+)
+
+// BillingModeSource describes which layer of the resolution stack supplied
+// the final mode. Surfaced via the admin route for operator debug
+// ("why is this workspace being stripped?") per the RFC Observability axis.
+type BillingModeSource string
+
+const (
+	BillingModeSourceWorkspaceOverride BillingModeSource = "workspace_override"
+	BillingModeSourceOrgDefault        BillingModeSource = "org_default"
+	BillingModeSourceConstantFallback  BillingModeSource = "constant_fallback"
+	// BillingModeSourceDerivedProvider means the mode was DERIVED from the
+	// workspace's (runtime, model) via the provider registry — the SSOT
+	// (internal#718 P2-B). IsPlatform(derived) → platform_managed, else byok.
+	// This is the highest-precedence source after an explicit operator override
+	// and SUPERSEDES the prior stored-LLM_PROVIDER read (#1966).
+	BillingModeSourceDerivedProvider BillingModeSource = "derived_provider"
+	// BillingModeSourceDerivedDefault means the registry could not derive a
+	// provider for the (runtime, model) — no model, unknown runtime,
+	// unregistered/ambiguous model — so the mode defaulted closed to
+	// platform_managed (CTO-confirmed "unset → platform default"). Distinct from
+	// derived_provider so operators can see "we defaulted" vs "we derived
+	// platform".
+	BillingModeSourceDerivedDefault BillingModeSource = "derived_default"
+)
+
+// BillingModeResolution is the structured answer the admin GET route returns
+// and the strip gate logs at INFO. The same struct is the unit-test fixture
+// shape, so the resolver test asserts both the mode AND the source per case
+// (catches a bug where the right mode is returned via the wrong layer).
+type BillingModeResolution struct {
+	WorkspaceID       string            `json:"workspace_id"`
+	ResolvedMode      string            `json:"resolved_mode"`
+	WorkspaceOverride *string           `json:"workspace_override"` // nil = inherit
+	OrgDefault        string            `json:"org_default"`        // RETIRED as a billing source (internal#718 P2-B); always platform_managed, kept for wire-compat
+	Source            BillingModeSource `json:"source"`
+	// ProviderSelection surfaces the DERIVED provider name (internal#718 P2-B)
+	// when the mode came from the registry derivation — the literal provider the
+	// (runtime, model) resolved to (e.g. "platform", "kimi-coding", "openai"), or
+	// the raw model id when derivation failed. nil when an explicit operator
+	// override or the empty-id default decided. Lets the admin route answer "why
+	// is this workspace byok?" with the derived provider, not a stored value.
+	ProviderSelection *string `json:"provider_selection"`
+}
+
+// isKnownBillingMode is the enum-recognizer for the resolver's default-closed
+// branch. Returning false for an unknown string forces the resolver to fall
+// through to the next layer (or the constant fallback) — NEVER to honor a
+// garbled value as if it were valid. This is what makes a row with mode='byokk'
+// (typo) resolve to platform_managed instead of accidentally to byok.
+func isKnownBillingMode(s string) bool {
+	switch s {
+	case LLMBillingModePlatformManaged, LLMBillingModeBYOK, LLMBillingModeDisabled:
+		return true
+	default:
+		return false
+	}
+}
+
+// readWorkspaceBillingOverride reads the OPTIONAL explicit operator override
+// (workspaces.llm_billing_mode). Returns:
+//
+//	(mode, true,  nil) — a recognized override is set → operator pinned the mode
+//	("",   false, nil) — NULL / garbled / row-missing → no explicit override
+//	("",   false, err) — DB error → caller defaults closed + propagates
+//
+// internal#718 P2-B retires the org rung; this column is the ONLY stored
+// billing signal that survives, and ONLY as an explicit override on top of the
+// derived provider (CTO 2026-05-27).
+func readWorkspaceBillingOverride(ctx context.Context, workspaceID string) (string, bool, error) {
+	var wsOverride sql.NullString
+	err := db.DB.QueryRowContext(ctx,
+		`SELECT llm_billing_mode FROM workspaces WHERE id = $1`,
+		workspaceID,
+	).Scan(&wsOverride)
+	switch {
+	case errors.Is(err, sql.ErrNoRows):
+		return "", false, nil
+	case err != nil:
+		return "", false, fmt.Errorf("resolve workspace llm_billing_mode override for %s: %w", workspaceID, err)
+	}
+	if wsOverride.Valid && isKnownBillingMode(wsOverride.String) {
+		return wsOverride.String, true, nil
+	}
+	return "", false, nil
+}
+
+// ResolveLLMBillingModeDerived is the SSOT billing-mode resolver (internal#718
+// P2-B). It DERIVES the provider from (runtime, model) via the provider
+// registry and decides platform-vs-byok from IsPlatform(derived) — it does NOT
+// read a stored LLM_PROVIDER (superseding #1966's stored-read approach) and
+// does NOT read the org rung (retired, CTO 2026-05-27).
+//
+// Precedence (highest first):
+//
+//  1. EXPLICIT operator override (workspaces.llm_billing_mode, a recognized
+//     value). The only stored billing signal that survives — an escape hatch,
+//     not the primary signal.
+//  2. DERIVE: providers.DeriveProvider(runtime, model, availableAuthEnv).
+//     - resolves to the closed `platform` provider → platform_managed
+//     - resolves to any other (BYOK/third-party) provider → byok  ← THE FIX
+//  3. DEFAULT-CLOSED: derive fails (no model, unknown runtime, unregistered or
+//     ambiguous model) → platform_managed (CTO "unset → platform default"). A
+//     derive failure NEVER silently flips a workspace to byok (which would
+//     strip the platform creds it may legitimately need).
+//
+// availableAuthEnv is the set of auth-env-var NAMES present for the workspace
+// (never secret values) — the same disambiguation input DeriveProvider uses to
+// split anthropic-oauth from anthropic-api. May be nil.
+//
+// A returned error never prevents a decision: ResolvedMode is always a valid
+// enum value (default-closed). The error is informational (log + surface).
+func ResolveLLMBillingModeDerived(ctx context.Context, workspaceID, runtime, model string, availableAuthEnv []string) (BillingModeResolution, error) {
+	res := BillingModeResolution{
+		WorkspaceID: workspaceID,
+		// OrgDefault is retired as a billing source (internal#718 P2-B). Kept on
+		// the struct for wire-compat (admin route / CP mirror) but always the
+		// closed constant — never consulted in the decision.
+		OrgDefault: LLMBillingModePlatformManaged,
+	}
+
+	// Pre-provision context (no workspace row yet): no override to read, default
+	// closed. (DeriveProvider could still run from the passed runtime/model, but
+	// the no-id path historically does no DB work and the strip gate only runs
+	// post-create, so keep it a pure default to preserve that contract.)
+	if workspaceID == "" {
+		res.ResolvedMode = LLMBillingModePlatformManaged
+		res.Source = BillingModeSourceDerivedDefault
+		return res, nil
+	}
+
+	// Precedence 1: explicit operator override.
+	if mode, ok, err := readWorkspaceBillingOverride(ctx, workspaceID); err != nil {
+		// DB error — default closed AND propagate (never flip on a transient error).
+		res.ResolvedMode = LLMBillingModePlatformManaged
+		res.Source = BillingModeSourceConstantFallback
+		return res, err
+	} else if ok {
+		m := mode
+		res.WorkspaceOverride = &m
+		res.ResolvedMode = mode
+		res.Source = BillingModeSourceWorkspaceOverride
+		return res, nil
+	}
+
+	// Precedence 2: DERIVE the provider from (runtime, model).
+	manifest, mErr := providerRegistry()
+	if mErr != nil || manifest == nil {
+		// Registry unavailable (malformed embedded YAML — a build-time defect the
+		// gates catch). Default closed.
+		res.ResolvedMode = LLMBillingModePlatformManaged
+		res.Source = BillingModeSourceDerivedDefault
+		return res, mErr
+	}
+	provider, dErr := manifest.DeriveProvider(runtime, model, availableAuthEnv)
+	if dErr != nil {
+		// No model / unknown runtime / unregistered / ambiguous → default closed.
+		// NOT an error to the caller: an unregistered model is a legitimate
+		// "we can't say it's BYOK, so bill the platform default" outcome, and the
+		// only-registered gate at the create/config API is where an unregistered
+		// model is rejected loudly. Here we just fail closed for safety.
+		res.ResolvedMode = LLMBillingModePlatformManaged
+		res.Source = BillingModeSourceDerivedDefault
+		sel := model
+		if sel != "" {
+			res.ProviderSelection = &sel
+		}
+		return res, nil
+	}
+	derivedName := provider.Name
+	res.ProviderSelection = &derivedName
+	res.Source = BillingModeSourceDerivedProvider
+	if provider.IsPlatform() {
+		res.ResolvedMode = LLMBillingModePlatformManaged
+	} else {
+		// A specific (non-platform) vendor was derived → bring-your-own-key.
+		res.ResolvedMode = LLMBillingModeBYOK
+	}
+	return res, nil
+}
+
+// ResolveLLMBillingMode is the legacy-signature resolver retained for callers
+// that do not have (runtime, model) in hand (the admin GET/PUT route and the
+// secrets remote-pull path). It reads the workspace's stored runtime + model +
+// available auth env from the DB and delegates to the DERIVED resolver
+// (internal#718 P2-B) — the orgMode parameter is RETIRED (the org rung is no
+// longer a billing source) and is ignored; it stays in the signature only to
+// avoid churning the two callers in this PR. The architectural test asserts no
+// remaining code path gates on os.Getenv("MOLECULE_LLM_BILLING_MODE") for the
+// strip decision (that env is no longer read into the decision at all).
+//
+// Returning an error does NOT prevent the caller from making a decision —
+// the returned mode is always a valid enum value (default-closed to
+// platform_managed) so the caller can proceed without a separate fail-closed
+// branch. The error is informational: log it, surface it to operators, but
+// the strip-gate decision is already safe.
+func ResolveLLMBillingMode(ctx context.Context, workspaceID, orgMode string) (BillingModeResolution, error) {
+	_ = orgMode // org rung retired (internal#718 P2-B); parameter ignored.
+
+	if workspaceID == "" {
+		// Pre-provision context (templating, validation): default closed, no DB.
+		return ResolveLLMBillingModeDerived(ctx, "", "", "", nil)
+	}
+
+	// Precedence 1: explicit operator override. Read it FIRST so an overridden
+	// workspace short-circuits without the extra runtime/secrets reads (and so
+	// the query order is override → runtime → secrets, matching the derived
+	// resolver's own override-first precedence).
+	if mode, ok, err := readWorkspaceBillingOverride(ctx, workspaceID); err != nil {
+		return BillingModeResolution{
+			WorkspaceID:  workspaceID,
+			OrgDefault:   LLMBillingModePlatformManaged,
+			ResolvedMode: LLMBillingModePlatformManaged,
+			Source:       BillingModeSourceConstantFallback,
+		}, err
+	} else if ok {
+		m := mode
+		return BillingModeResolution{
+			WorkspaceID:       workspaceID,
+			OrgDefault:        LLMBillingModePlatformManaged,
+			ResolvedMode:      mode,
+			WorkspaceOverride: &m,
+			Source:            BillingModeSourceWorkspaceOverride,
+		}, nil
+	}
+
+	// Precedence 2: DERIVE. Read the stored (runtime, model, available-auth-env)
+	// so the derived resolver can DeriveProvider for callers that don't carry
+	// them (admin route, secrets remote-pull). A read miss/error degrades
+	// gracefully: pass the empty/partial inputs through — DeriveProvider then
+	// errors and the derived resolver defaults closed to platform_managed.
+	//
+	// ResolveLLMBillingModeDerived re-reads the override (NULL again here) before
+	// deriving; that one extra cheap read keeps the derived resolver a complete,
+	// independently-callable SSOT rather than splitting its precedence across two
+	// functions.
+	runtime, model, authEnv := readWorkspaceDeriveInputs(ctx, workspaceID)
+	return ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, model, authEnv)
+}
+
+// readWorkspaceDeriveInputs loads the workspace's stored runtime + selected
+// model + the auth-env-var NAMES present in its secrets — the inputs
+// DeriveProvider needs. Best-effort: any read error returns whatever was
+// gathered (the derived resolver fails closed on incomplete inputs). The model
+// is the MODEL workspace_secret (the canvas-picked id, written by setModelSecret
+// / Create); runtime is the workspaces.runtime column (defaults claude-code).
+// availableAuthEnv is the subset of secret KEYS that are recognized provider
+// auth-env names (never values), so DeriveProvider's auth-env tie-break can fire
+// the same way it does on the provision path.
+func readWorkspaceDeriveInputs(ctx context.Context, workspaceID string) (runtime, model string, availableAuthEnv []string) {
+	var rt sql.NullString
+	if err := db.DB.QueryRowContext(ctx,
+		`SELECT runtime FROM workspaces WHERE id = $1`, workspaceID,
+	).Scan(&rt); err != nil {
+		if !errors.Is(err, sql.ErrNoRows) {
+			log.Printf("llm_billing_mode: read runtime for %s: %v (deriving with empty runtime)", workspaceID, err)
+		}
+	}
+	runtime = rt.String
+	if runtime == "" {
+		// Mirror the DB column default so an unset runtime still derives.
+		runtime = "claude-code"
+	}
+
+	// Gather model + auth-env-name keys from workspace_secrets in one pass.
+	authSet := authEnvNameSet()
+	rows, err := db.DB.QueryContext(ctx,
+		`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1`,
+		workspaceID,
+	)
+	if err != nil {
+		log.Printf("llm_billing_mode: read secrets for %s: %v (deriving with no model/auth-env)", workspaceID, err)
+		return runtime, model, availableAuthEnv
+	}
+	defer rows.Close()
+	for rows.Next() {
+		var k string
+		var v []byte
+		var ver int
+		if rows.Scan(&k, &v, &ver) != nil {
+			continue
+		}
+		if k == "MODEL" {
+			if dec, derr := crypto.DecryptVersioned(v, ver); derr == nil {
+				model = string(dec)
+			}
+			continue
+		}
+		// Only the KEY matters for auth-env disambiguation (the value is the
+		// secret; we never decrypt it for this purpose). Record recognized
+		// provider auth-env names.
+		if _, ok := authSet[k]; ok {
+			availableAuthEnv = append(availableAuthEnv, k)
+		}
+	}
+	return runtime, model, availableAuthEnv
+}
+
+// authEnvNameSet is the union of every provider's auth_env names in the
+// registry — the recognized set readWorkspaceDeriveInputs filters secret keys
+// against. Loaded once from the registry so it stays in sync with the SSOT (no
+// hardcoded auth-env vocabulary). Registry-load failure yields an empty set
+// (derive then runs without the auth-env tie-break, which only matters for the
+// oauth-vs-api overlap; safe — it errors to default-closed rather than guessing).
+var (
+	authEnvNameSetOnce sync.Once
+	authEnvNameSetVal  map[string]struct{}
+)
+
+func authEnvNameSet() map[string]struct{} {
+	authEnvNameSetOnce.Do(func() {
+		authEnvNameSetVal = map[string]struct{}{}
+		m, err := providerRegistry()
+		if err != nil || m == nil {
+			return
+		}
+		for _, p := range m.Providers {
+			for _, e := range p.AuthEnv {
+				authEnvNameSetVal[e] = struct{}{}
+			}
+		}
+	})
+	return authEnvNameSetVal
+}
+
+// availableAuthEnvNames returns the recognized provider auth-env-var NAMES
+// present (non-empty) in envVars — the DeriveProvider auth-env tie-break input.
+// Never returns secret VALUES, only the env-var names. Used by the provision
+// path (applyPlatformManagedLLMEnv), which already has the workspace env in
+// hand, so it derives without a secrets DB round-trip.
+func availableAuthEnvNames(envVars map[string]string) []string {
+	authSet := authEnvNameSet()
+	var out []string
+	for k, v := range envVars {
+		if v == "" {
+			continue
+		}
+		if _, ok := authSet[k]; ok {
+			out = append(out, k)
+		}
+	}
+	return out
+}
+
+// derefOrEmpty returns the pointed-to string or "" for a nil pointer. Used in
+// log lines that surface an optional *string field.
+func derefOrEmpty(s *string) string {
+	if s == nil {
+		return ""
+	}
+	return *s
+}
+
+// SetWorkspaceLLMBillingMode writes the override column. Pass mode=="" to
+// clear (set to NULL = inherit). Validates the mode against the enum set
+// so the route handler doesn't have to duplicate validation; a garbled
+// mode round-trips as an explicit 400 from the caller, not a CHECK-
+// constraint error from the DB driver.
+func SetWorkspaceLLMBillingMode(ctx context.Context, workspaceID, mode string) error {
+	if workspaceID == "" {
+		return errors.New("SetWorkspaceLLMBillingMode: workspace id required")
+	}
+	if mode == "" {
+		// NULL = inherit. Caller asked to clear the override.
+		res, err := db.DB.ExecContext(ctx,
+			`UPDATE workspaces SET llm_billing_mode = NULL WHERE id = $1`,
+			workspaceID,
+		)
+		if err != nil {
+			return fmt.Errorf("clear workspace llm_billing_mode for %s: %w", workspaceID, err)
+		}
+		n, _ := res.RowsAffected()
+		if n == 0 {
+			return sql.ErrNoRows
+		}
+		return nil
+	}
+	if !isKnownBillingMode(mode) {
+		return fmt.Errorf("unknown billing mode %q (allowed: %s, %s, %s)",
+			mode, LLMBillingModePlatformManaged, LLMBillingModeBYOK, LLMBillingModeDisabled)
+	}
+	res, err := db.DB.ExecContext(ctx,
+		`UPDATE workspaces SET llm_billing_mode = $1 WHERE id = $2`,
+		mode, workspaceID,
+	)
+	if err != nil {
+		return fmt.Errorf("set workspace llm_billing_mode for %s: %w", workspaceID, err)
+	}
+	n, _ := res.RowsAffected()
+	if n == 0 {
+		return sql.ErrNoRows
+	}
+	return nil
+}
--- a/Show More
+++ b/Show More