Merge main into fix/internal-804-parser-json-variant to pick up e2e-chat curl fix

Merge pull request 'fix(ci): wait for platform /health on a migration-chain-proof budget (#2205 )' (#2206 ) from fix/e2e-api-health-wait-migration-chain into main
Merge pull request 'test(2163-followup): tighten live-fire freshness check via run_id parsing' (#2173 ) from fix/2163-cr2-live-fire-freshness into main
2026-06-04 05:48:11 +00:00 · 2026-06-04 05:32:28 +00:00 · 2026-06-04 05:30:21 +00:00 · 2026-06-04 05:30:17 +00:00 · 2026-06-04 05:18:12 +00:00 · 2026-06-03 21:48:03 -07:00
59 changed files with 4663 additions and 206 deletions
@@ -8,7 +8,8 @@ pair diverges.
 Sources:
  A. `.gitea/workflows/ci.yml` jobs  (CI source — the actual job set)
  B. `status_check_contexts` in branch_protections (the merge gate)
-  C. `REQUIRED_CHECKS` env in audit-force-merge.yml (the audit env)
+  C. `REQUIRED_CHECKS_JSON` (preferred) or `REQUIRED_CHECKS` (legacy)
+     env in audit-force-merge.yml (the audit env)

 Three failure classes:
  F1  Job in (A) is not under the sentinel's `needs:` — sentinel
@@ -250,13 +251,21 @@ def sentinel_needs(ci_doc: dict) -> set[str]:
    return set(needs)


-def required_checks_env(audit_doc: dict) -> set[str]:
-    """Pull the REQUIRED_CHECKS env value from audit-force-merge.yml.
+def required_checks_env(audit_doc: dict, branch: str) -> set[str]:
+    """Pull the required-checks env value from audit-force-merge.yml.
+
    Walks the YAML AST per `feedback_behavior_based_ast_gates`: we do
-    NOT grep for `REQUIRED_CHECKS:` — that breaks under reformatting,
+    NOT grep for env keys — that breaks under reformatting,
    multi-job workflows, or a future move of the env to a different
-    step. Instead, look inside every job's every step's `env:` map."""
-    found: list[str] = []
+    step. Instead, look inside every job's every step's `env:` map.
+
+    Supports two variants:
+      - REQUIRED_CHECKS_JSON (preferred): JSON dict keyed by branch name.
+        We extract the array for the target branch.
+      - REQUIRED_CHECKS (legacy): newline-separated list of context names.
+    """
+    found_json: list[str] = []
+    found_legacy: list[str] = []
    jobs = audit_doc.get("jobs", {})
    if not isinstance(jobs, dict):
        sys.stderr.write(f"::warning::{AUDIT_WORKFLOW_PATH} has no jobs: mapping\n")
@@ -268,27 +277,67 @@ def required_checks_env(audit_doc: dict) -> set[str]:
            if not isinstance(step, dict):
                continue
            step_env = step.get("env") or {}
-            if isinstance(step_env, dict) and "REQUIRED_CHECKS" in step_env:
-                v = step_env["REQUIRED_CHECKS"]
-                if isinstance(v, str):
-                    found.append(v)
-    if not found:
-        sys.stderr.write(
-            f"::error::REQUIRED_CHECKS env not found in any step of "
-            f"{AUDIT_WORKFLOW_PATH}\n"
-        )
-        sys.exit(3)
-    if len(found) > 1:
-        # Defensive: refuse to guess which one is canonical.
-        sys.stderr.write(
-            f"::error::REQUIRED_CHECKS env present in {len(found)} steps; ambiguous\n"
-        )
-        sys.exit(3)
-    raw = found[0]
-    # YAML block-scalars (`|`) leave a trailing newline + blanks; trim
-    # consistently with audit-force-merge.sh's parser so both sides
-    # produce identical sets.
-    return {line.strip() for line in raw.splitlines() if line.strip()}
+            if isinstance(step_env, dict):
+                if "REQUIRED_CHECKS_JSON" in step_env:
+                    v = step_env["REQUIRED_CHECKS_JSON"]
+                    if isinstance(v, str):
+                        found_json.append(v)
+                if "REQUIRED_CHECKS" in step_env:
+                    v = step_env["REQUIRED_CHECKS"]
+                    if isinstance(v, str):
+                        found_legacy.append(v)
+
+    # JSON variant takes precedence.
+    if found_json:
+        if len(found_json) > 1:
+            sys.stderr.write(
+                f"::error::REQUIRED_CHECKS_JSON env present in {len(found_json)} steps; ambiguous\n"
+            )
+            sys.exit(3)
+        try:
+            parsed = json.loads(found_json[0])
+        except json.JSONDecodeError as e:
+            sys.stderr.write(
+                f"::error::REQUIRED_CHECKS_JSON is not valid JSON: {e}\n"
+            )
+            sys.exit(3)
+        if not isinstance(parsed, dict):
+            sys.stderr.write(
+                f"::error::REQUIRED_CHECKS_JSON parsed to {type(parsed).__name__}, expected dict\n"
+            )
+            sys.exit(3)
+        branch_checks = parsed.get(branch)
+        if branch_checks is None:
+            sys.stderr.write(
+                f"::error::REQUIRED_CHECKS_JSON has no entry for branch '{branch}'\n"
+            )
+            sys.exit(3)
+        if not isinstance(branch_checks, list):
+            sys.stderr.write(
+                f"::error::REQUIRED_CHECKS_JSON['{branch}'] is {type(branch_checks).__name__}, expected list\n"
+            )
+            sys.exit(3)
+        return {str(item).strip() for item in branch_checks if str(item).strip()}
+
+    # Legacy variant fallback.
+    if found_legacy:
+        if len(found_legacy) > 1:
+            # Defensive: refuse to guess which one is canonical.
+            sys.stderr.write(
+                f"::error::REQUIRED_CHECKS env present in {len(found_legacy)} steps; ambiguous\n"
+            )
+            sys.exit(3)
+        raw = found_legacy[0]
+        # YAML block-scalars (`|`) leave a trailing newline + blanks; trim
+        # consistently with audit-force-merge.sh's parser so both sides
+        # produce identical sets.
+        return {line.strip() for line in raw.splitlines() if line.strip()}
+
+    sys.stderr.write(
+        f"::error::Neither REQUIRED_CHECKS_JSON nor REQUIRED_CHECKS env found in any step of "
+        f"{AUDIT_WORKFLOW_PATH}\n"
+    )
+    sys.exit(3)


 # --------------------------------------------------------------------------
@@ -330,7 +379,7 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
    jobs = ci_job_names(ci_doc)
    jobs_all = ci_jobs_all(ci_doc)
    needs = sentinel_needs(ci_doc)
-    env_set = required_checks_env(audit_doc)
+    env_set = required_checks_env(audit_doc, branch)

    # Protection
    # api() raises ApiError on non-2xx. Transient 5xx should fail loud.
@@ -524,7 +573,7 @@ def render_body(branch: str, findings: list[str], debug: dict) -> str:
            "- **F2**: rename the protection context to match an emitter, "
            "or remove it from `status_check_contexts` "
            "(PATCH `/api/v1/repos/{owner}/{repo}/branch_protections/{branch}`).",
-            "- **F3a / F3b**: bring `REQUIRED_CHECKS` env in "
+            "- **F3a / F3b**: bring `REQUIRED_CHECKS_JSON` (or `REQUIRED_CHECKS` legacy) env in "
            "`.gitea/workflows/audit-force-merge.yml` into set-equality with "
            "`status_check_contexts` (single PR, both files).",
            "",
@@ -26,6 +26,10 @@ PROFILES: dict[str, dict[str, str]] = {
        "handlers": (
            r"^workspace-server/internal/handlers/"
            r"|^workspace-server/internal/wsauth/"
+            # #2149: the scheduler real-PG integration tests run in this same
+            # workflow (they reuse its migrated Postgres), so changes to the
+            # scheduler package must trigger the job too.
+            r"|^workspace-server/internal/scheduler/"
            r"|^workspace-server/migrations/"
            r"|^\.gitea/workflows/handlers-postgres-integration\.yml$"
        ),
@@ -174,3 +178,4 @@ def main(argv: list[str]) -> int:

 if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
+
@@ -466,12 +466,40 @@ def fetch_log(target_url: str) -> str | None:

 def grep_fail_markers(log_text: str) -> list[str]:
    """Return up to 5 sample matching lines for any FAIL_PATTERNS hit.
-    Empty list = clean log."""
+    Empty list = clean log.
+
+    Heuristic: skip lines where the marker appears inside script source
+    (e.g. ``echo "::error::..."`` in a ``::group::Run`` block) rather
+    than actual execution output. The Gitea Actions log prints the raw
+    script before executing it; ``echo "::error::"`` lines in that
+    display are false positives.
+    """
    matches: list[str] = []
+    in_run_group = False
+    group_depth = 0
    for line in log_text.splitlines():
+        stripped = line.strip()
+        # Track Gitea Actions group markers so we can skip the
+        # ``::group::Run`` script-source display blocks.
+        if stripped.startswith("::group::Run"):
+            in_run_group = True
+            group_depth = 1
+            continue
+        if stripped == "::endgroup::":
+            if in_run_group:
+                in_run_group = False
+                group_depth = 0
+            continue
+        if in_run_group:
+            continue
        for pat in FAIL_PATTERNS:
            if pat in line:
-                # Truncate to keep error output bounded.
+                # Additional false-positive guard: ``echo "::error::"``
+                # is script source, not a runtime error emission.
+                if pat == "::error::":
+                    prefix = line[: line.index(pat)].strip()
+                    if prefix.endswith('echo') or prefix.endswith("echo '") or prefix.endswith('echo "'):
+                        break
                matches.append(line.strip()[:240])
                break
        if len(matches) >= 5:
@@ -364,6 +364,71 @@ def _api_json_optional(url: str, token: str) -> tuple[int, dict | None]:
        return exc.code, None


+def current_branch_head(env: dict[str, str]) -> str | None:
+    """Return the SHA at the tip of the deploy branch (main) per Gitea, or None.
+
+    Used to detect a *superseded* deploy job (see `superseded_by`). Fail-safe:
+    any read error / missing token returns None so the caller treats the job as
+    NOT superseded and the strict /buildinfo verify still runs. We never let an
+    unreadable head silently green a deploy.
+    """
+
+    token = env.get("GITEA_TOKEN", "").strip()
+    if not token:
+        return None
+    host = env.get("GITEA_HOST", "git.moleculesai.app")
+    repo = env.get("GITHUB_REPOSITORY", "molecule-ai/molecule-core")
+    # Deploy lane is on: push:main; the branch is always main here, but read it
+    # from the ref name when present so a future branch rename doesn't break us.
+    branch = env.get("GITHUB_REF_NAME", "").strip() or "main"
+    url = f"https://{host}/api/v1/repos/{repo}/branches/{quote(branch, safe='')}"
+    status, body = _api_json_optional(url, token)
+    if status != 200 or not isinstance(body, dict):
+        return None
+    commit = body.get("commit")
+    if isinstance(commit, dict):
+        head = commit.get("id") or commit.get("sha")
+        if isinstance(head, str) and head.strip():
+            return head.strip()
+    return None
+
+
+def superseded_by(env: dict[str, str]) -> str | None:
+    """Return the newer head SHA if THIS deploy job has been superseded, else None.
+
+    This workflow runs with no `concurrency:` (intentional — Gitea 1.22.6 cancels
+    queued runs, which is unacceptable for a prod deploy). When two main pushes
+    land close together, BOTH deploy-production jobs run. The newer push rolls the
+    fleet forward first; the OLDER job's strict /buildinfo verify then sees tenants
+    on the NEWER SHA and false-reds with "$slug is stale" — even though the fleet
+    is AHEAD, not behind. Git SHAs aren't ordered, so the verify can't tell ahead
+    from behind on its own (and /buildinfo exposes only git_sha, no build time).
+
+    Resolve it at the source of truth for ordering — the branch ref: if main's
+    current head is a DIFFERENT SHA than the one this job is deploying, a newer
+    commit has landed and this job is superseded; the newest job's verify is the
+    authoritative one. We return that head SHA so the caller can log it and exit
+    success early, skipping the strict-equality verify for this stale job.
+
+    Fail-safe: returns None (NOT superseded) when the head can't be read or equals
+    our SHA, so a genuinely-behind tenant under the LATEST deploy job still fails
+    the strict verify loudly. This never suppresses a real-stale signal — it only
+    excuses a job that is no longer the latest from asserting exact equality.
+    """
+
+    sha = env.get("GITHUB_SHA", "").strip()
+    if not sha:
+        return None
+    head = current_branch_head(env)
+    if not head:
+        return None
+    # SHA lengths can differ (short vs full); compare on the shorter prefix.
+    n = min(len(head), len(sha))
+    if head[:n].lower() == sha[:n].lower():
+        return None
+    return head
+
+
 def live_disable_flag(env: dict[str, str]) -> str:
    """Return a live disable value from Gitea variables when readable.

@@ -442,6 +507,14 @@ def main() -> int:
    sub.add_parser("plan", help="print production deploy plan as JSON")
    sub.add_parser("assert-enabled", help="fail if production deploy is currently disabled")
    sub.add_parser("wait-ci", help="block until required CI context is green")
+    sub.add_parser(
+        "check-superseded",
+        help=(
+            "exit 0 if a newer commit has landed on the deploy branch (this job "
+            "is superseded; prints the newer head SHA), exit 10 if this job is "
+            "still the latest"
+        ),
+    )
    rollout_parser = sub.add_parser("rollout", help="execute canary-first scoped production rollout")
    rollout_parser.add_argument("--plan", required=True, help="path to prod-auto-deploy plan JSON")
    rollout_parser.add_argument("--response", required=True, help="path to write aggregate response JSON")
@@ -457,6 +530,16 @@ def main() -> int:
        if args.command == "wait-ci":
            wait_for_ci_context(dict(os.environ))
            return 0
+        if args.command == "check-superseded":
+            newer = superseded_by(dict(os.environ))
+            if newer:
+                print(newer)
+                return 0
+            # Exit 10 (not 0, not 1): "this job is still the latest". The
+            # workflow treats only exit 0 as superseded; 10 means proceed to
+            # the strict verify. A non-zero code here is informational, not a
+            # failure — the workflow step swallows it.
+            return 10
        if args.command == "rollout":
            rollout_from_plan_file(args.plan, args.response, dict(os.environ))
            return 0
@@ -1,4 +1,5 @@
 import importlib.util
+import json
 import sys
 from pathlib import Path
 from unittest.mock import patch
@@ -36,6 +37,76 @@ def _make_audit_doc(required_checks: list[str]) -> dict:
    }


+def _make_audit_doc_json(required_checks_json: dict) -> dict:
+    return {
+        "jobs": {
+            "audit": {
+                "steps": [
+                    {"env": {"REQUIRED_CHECKS_JSON": json.dumps(required_checks_json)}}
+                ]
+            }
+        }
+    }
+
+
+# ---------------------------------------------------------------------------
+# required_checks_env — dual-variant parsing
+# ---------------------------------------------------------------------------
+
+def test_required_checks_env_prefers_json_over_legacy():
+    doc = {
+        "jobs": {
+            "audit": {
+                "steps": [
+                    {
+                        "env": {
+                            "REQUIRED_CHECKS_JSON": json.dumps(
+                                {"main": ["ctx-a"], "staging": ["ctx-b"]}
+                            ),
+                            "REQUIRED_CHECKS": "ctx-legacy\nctx-old",
+                        }
+                    }
+                ]
+            }
+        }
+    }
+    assert drift.required_checks_env(doc, "main") == {"ctx-a"}
+    assert drift.required_checks_env(doc, "staging") == {"ctx-b"}
+
+
+def test_required_checks_env_falls_back_to_legacy():
+    doc = _make_audit_doc(["legacy-ctx"])
+    assert drift.required_checks_env(doc, "main") == {"legacy-ctx"}
+
+
+def test_required_checks_env_json_missing_branch_fails():
+    doc = _make_audit_doc_json({"staging": ["ctx-b"]})
+    try:
+        drift.required_checks_env(doc, "main")
+    except SystemExit as exc:
+        assert exc.code == 3
+    else:
+        raise AssertionError("expected SystemExit(3)")
+
+
+def test_required_checks_env_json_malformed_fails():
+    doc = {
+        "jobs": {
+            "audit": {
+                "steps": [
+                    {"env": {"REQUIRED_CHECKS_JSON": "not-json"}}
+                ]
+            }
+        }
+    }
+    try:
+        drift.required_checks_env(doc, "main")
+    except SystemExit as exc:
+        assert exc.code == 3
+    else:
+        raise AssertionError("expected SystemExit(3)")
+
+
 # ---------------------------------------------------------------------------
 # sentinel_needs
 # ---------------------------------------------------------------------------
@@ -0,0 +1,244 @@
+"""Live-fire regression test for #2159 — gate auto-fire runtime verification.
+
+Static tests (test_gate_review_auto_fire.py) validate that the workflow YAML
+is structurally correct. This test validates the *runtime* path: submitting an
+APPROVED review to a PR whose head contains the current gate workflows causes
+Gitea Actions to queue the qa-review + security-review workflows and POST the
+branch-protection-required (pull_request_target) contexts within a reasonable
+window.
+
+Skipped when Gitea API credentials are not available. Intended for:
+  - manual developer verification
+  - CI jobs provisioned with a service-account token
+
+Environment:
+  GITEA_HOST            — default: git.moleculesai.app
+  GITEA_TOKEN           — token with read:repository + write:issues (for review POST)
+  REPO                  — default: molecule-ai/molecule-core
+  LIVEFIRE_PR_NUMBER    — optional; if omitted the test tries to find a
+                          suitable open PR automatically, or skips.
+  LIVEFIRE_TIMEOUT_SEC  — default: 120
+"""
+
+import base64
+import json
+import os
+import re
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+import pytest
+
+import yaml
+
+GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
+GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
+REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
+LIVEFIRE_PR_NUMBER = os.environ.get("LIVEFIRE_PR_NUMBER", "")
+LIVEFIRE_TIMEOUT_SEC = int(os.environ.get("LIVEFIRE_TIMEOUT_SEC", "120"))
+
+REQUIRED_CONTEXTS = [
+    "qa-review / approved (pull_request_target)",
+    "security-review / approved (pull_request_target)",
+]
+
+skip_no_token = pytest.mark.skipif(
+    not GITEA_TOKEN,
+    reason="GITEA_TOKEN not set — live-fire test requires API credentials",
+)
+
+
+def _api(method: str, path: str, body: dict | None = None) -> tuple[int, dict]:
+    url = f"https://{GITEA_HOST}/api/v1{path}"
+    headers = {
+        "Authorization": f"token {GITEA_TOKEN}",
+        "Content-Type": "application/json",
+    }
+    data = json.dumps(body).encode() if body else None
+    req = urllib.request.Request(url, data=data, headers=headers, method=method)
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            raw = resp.read()
+            code = resp.status
+    except urllib.error.HTTPError as exc:
+        raw = exc.read()
+        code = exc.code
+    payload = json.loads(raw) if raw else {}
+    return code, payload
+
+
+def _get_pr(number: int) -> dict:
+    code, pr = _api("GET", f"/repos/{REPO}/pulls/{number}")
+    if code != 200:
+        pytest.fail(f"GET /pulls/{number} returned HTTP {code}: {pr}")
+    return pr
+
+
+def _list_open_prs() -> list[dict]:
+    code, prs = _api("GET", f"/repos/{REPO}/pulls?state=open&limit=50")
+    if code != 200:
+        pytest.fail(f"GET /pulls?state=open returned HTTP {code}: {prs}")
+    return prs
+
+
+def _pr_has_trigger_in_head(pr: dict) -> bool:
+    """Return True if the PR head contains pull_request_review in both workflows."""
+    head_sha = pr["head"]["sha"]
+    for wf_name in ("qa-review.yml", "security-review.yml"):
+        path = f"/repos/{REPO}/contents/.gitea/workflows/{wf_name}?ref={head_sha}"
+        code, payload = _api("GET", path)
+        if code != 200:
+            return False
+        raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
+        wf = yaml.safe_load(raw)
+        on = wf.get(True) or wf.get("on") or {}
+        if isinstance(on, str):
+            if on != "pull_request_review":
+                return False
+        elif "pull_request_review" not in on:
+            return False
+    return True
+
+
+def _find_suitable_pr() -> dict:
+    if LIVEFIRE_PR_NUMBER:
+        pr = _get_pr(int(LIVEFIRE_PR_NUMBER))
+        if pr.get("state") != "open":
+            pytest.skip(f"PR {LIVEFIRE_PR_NUMBER} is not open")
+        return pr
+
+    prs = _list_open_prs()
+    for pr in prs:
+        if _pr_has_trigger_in_head(pr):
+            return pr
+    pytest.skip("No open PR found whose head contains the pull_request_review trigger")
+
+
+def _submit_approved_review(pr_number: int) -> dict:
+    code, review = _api(
+        "POST",
+        f"/repos/{REPO}/pulls/{pr_number}/reviews",
+        {"body": "Live-fire test APPROVED review", "event": "APPROVED"},
+    )
+    # 200 = created, 422 = review already exists (idempotent enough for our purposes)
+    if code not in (200, 201, 422):
+        pytest.fail(f"POST /pulls/{pr_number}/reviews returned HTTP {code}")
+    return review
+
+
+def _get_status_snapshot(sha: str) -> dict[str, dict]:
+    """Return mapping context -> {id, updated_at, target_url} for required contexts."""
+    code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
+    if code != 200:
+        return {}
+    result: dict[str, dict] = {}
+    for st in statuses:
+        ctx = st.get("context", "")
+        if ctx in REQUIRED_CONTEXTS:
+            result[ctx] = {
+                "id": st.get("id"),
+                "updated_at": st.get("updated_at", st.get("created_at", "")),
+                "target_url": st.get("target_url"),
+            }
+    return result
+
+
+def _extract_run_id(target_url: str | None) -> str | None:
+    """Extract the Actions run_id from a status target_url."""
+    if not target_url:
+        return None
+    m = re.search(r"/actions/runs/(\d+)", target_url)
+    return m.group(1) if m else None
+
+
+def _poll_fresh_statuses(
+    sha: str,
+    prior_snapshot: dict[str, dict],
+    timeout_sec: int = LIVEFIRE_TIMEOUT_SEC,
+) -> dict[str, dict]:
+    """Poll until required contexts appear fresh (newer timestamp, id, or run)."""
+    deadline = time.monotonic() + timeout_sec
+    found: dict[str, dict] = {}
+    while time.monotonic() < deadline:
+        code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
+        if code == 200:
+            for st in statuses:
+                ctx = st.get("context", "")
+                if ctx in REQUIRED_CONTEXTS:
+                    updated_at = st.get("updated_at", st.get("created_at", ""))
+                    status_id = st.get("id")
+                    target_url = st.get("target_url")
+                    prior = prior_snapshot.get(ctx, {})
+                    # Fresh if timestamp changed, id changed, or target_url changed.
+                    is_fresh = (
+                        ctx not in prior_snapshot
+                        or updated_at != prior.get("updated_at", "")
+                        or status_id != prior.get("id")
+                        or target_url != prior.get("target_url")
+                    )
+                    if is_fresh:
+                        found[ctx] = {
+                            "state": st.get("state", st.get("status", "")),
+                            "updated_at": updated_at,
+                            "id": status_id,
+                            "target_url": target_url,
+                        }
+        if all(ctx in found for ctx in REQUIRED_CONTEXTS):
+            return found
+        time.sleep(5)
+    return found
+
+
+@skip_no_token
+class TestGateAutoFireLive:
+    def test_auto_fire_posts_required_contexts(self):
+        """Submit APPROVED review; assert BP-required contexts appear fresh within timeout."""
+        pr = _find_suitable_pr()
+        pr_number = pr["number"]
+        head_sha = pr["head"]["sha"]
+
+        # Capture pre-existing status snapshot so we can prove FRESH contexts
+        # were posted after the review submission (not stale from a prior run).
+        prior_snapshot = _get_status_snapshot(head_sha)
+        prior_run_ids = {
+            _extract_run_id(s["target_url"])
+            for s in prior_snapshot.values()
+            if _extract_run_id(s["target_url"])
+        }
+
+        review = _submit_approved_review(pr_number)
+
+        found = _poll_fresh_statuses(head_sha, prior_snapshot)
+
+        missing = [ctx for ctx in REQUIRED_CONTEXTS if ctx not in found]
+        if missing:
+            pytest.fail(
+                f"After {LIVEFIRE_TIMEOUT_SEC}s, fresh contexts still missing: {missing}. "
+                f"Found: {found}. Prior snapshot: {prior_snapshot}. "
+                f"PR #{pr_number} head={head_sha}. "
+                f"This indicates the pull_request_review trigger did not fire at runtime."
+            )
+
+        # The contexts appeared fresh — that's the proof of auto-fire.
+        # We do NOT assert success vs failure; the evaluator decides that.
+        # The point of #2159 is that the workflows QUEUE and POST at all.
+        for ctx, info in found.items():
+            state = info["state"]
+            assert state in ("pending", "success", "failure"), (
+                f"Unexpected state {state!r} for {ctx}"
+            )
+
+            # CR2 Finding 1: prove a NEW workflow run was triggered, not just
+            # an in-place status update. Gitea 1.22.6 lacks REST /actions/runs/*
+            # endpoints, so we use the run_id embedded in the status target_url
+            # as a proxy for distinct run_id.
+            run_id = _extract_run_id(info.get("target_url"))
+            if run_id and run_id in prior_run_ids:
+                pytest.fail(
+                    f"Context {ctx!r} has target_url run_id {run_id} which existed "
+                    f"BEFORE the review was submitted. This means the status was "
+                    f"updated in-place by an existing run, not by a new workflow "
+                    f"run triggered from the pull_request_review event."
+                )
@@ -0,0 +1,145 @@
+"""Stale-head diagnostic test for #2159.
+
+Deterministically reports whether a PR's HEAD contains the pull_request_review
+trigger in qa-review.yml and security-review.yml. If the trigger is absent,
+auto-fire on APPROVED review is impossible for that PR.
+
+This is used as a self-diagnostic for future stale-PR situations (PRs opened
+before #2157 merged, or branches cut from old bases).
+
+Environment:
+  GITEA_HOST  — default: git.moleculesai.app
+  GITEA_TOKEN — token with read:repository scope (optional; falls back to local files)
+  REPO        — default: molecule-ai/molecule-core
+  PR_NUMBER   — required when running against a real PR
+"""
+
+import base64
+import json
+import os
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+import pytest
+
+import yaml
+
+GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
+GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
+REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
+PR_NUMBER = os.environ.get("PR_NUMBER", "")
+
+ROOT = Path(__file__).resolve().parents[2]
+
+
+def _api(method: str, path: str) -> tuple[int, dict]:
+    url = f"https://{GITEA_HOST}/api/v1{path}"
+    headers = {"Authorization": f"token {GITEA_TOKEN}"}
+    req = urllib.request.Request(url, headers=headers, method=method)
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return resp.status, json.loads(resp.read())
+    except urllib.error.HTTPError as exc:
+        body = exc.read()
+        return exc.code, json.loads(body) if body else {}
+
+
+def _fetch_workflow_from_ref(workflow_name: str, ref: str) -> dict:
+    path = f"/repos/{REPO}/contents/.gitea/workflows/{workflow_name}?ref={ref}"
+    code, payload = _api("GET", path)
+    if code != 200:
+        pytest.fail(
+            f"GET {path} returned HTTP {code}: {payload}. "
+            f"Cannot determine whether PR head contains the trigger."
+        )
+    raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
+    return yaml.safe_load(raw)
+
+
+def _fetch_workflow_local(workflow_name: str) -> dict:
+    p = ROOT / "workflows" / workflow_name
+    if not p.exists():
+        pytest.fail(f"Local workflow file not found: {p}")
+    return yaml.safe_load(p.read_text())
+
+
+def _has_pull_request_review_trigger(wf: dict) -> bool:
+    on = wf.get(True) or wf.get("on") or {}
+    if isinstance(on, list):
+        return "pull_request_review" in on
+    if isinstance(on, dict):
+        return "pull_request_review" in on
+    if isinstance(on, str):
+        return on == "pull_request_review"
+    return False
+
+
+def _diagnose_pr(pr_number: int) -> dict[str, bool]:
+    code, pr = _api("GET", f"/repos/{REPO}/pulls/{pr_number}")
+    if code != 200:
+        pytest.fail(f"GET /pulls/{pr_number} returned HTTP {code}: {pr}")
+
+    head_ref = pr["head"]["ref"]
+    head_sha = pr["head"]["sha"]
+
+    results: dict[str, bool] = {}
+    for wf_name in ("qa-review.yml", "security-review.yml"):
+        wf = _fetch_workflow_from_ref(wf_name, head_sha)
+        results[wf_name] = _has_pull_request_review_trigger(wf)
+
+    return {
+        "pr_number": pr_number,
+        "head_ref": head_ref,
+        "head_sha": head_sha,
+        "triggers": results,
+        "auto_fire_possible": all(results.values()),
+    }
+
+
+def _diagnose_local() -> dict[str, bool]:
+    results: dict[str, bool] = {}
+    for wf_name in ("qa-review.yml", "security-review.yml"):
+        wf = _fetch_workflow_local(wf_name)
+        results[wf_name] = _has_pull_request_review_trigger(wf)
+    return {
+        "pr_number": None,
+        "head_ref": "local-checkout",
+        "head_sha": None,
+        "triggers": results,
+        "auto_fire_possible": all(results.values()),
+    }
+
+
+class TestStaleHeadDiagnostic:
+    """Test deterministically reports 'auto-fire impossible for this PR' when
+    the PR head lacks the pull_request_review trigger.
+    """
+
+    def test_local_checkout_has_pull_request_review_trigger(self):
+        """Local files (the ones in this checkout) must contain the trigger.
+
+        This is the baseline: if the checkout itself is stale, every PR cut
+        from it will also be stale.
+        """
+        diag = _diagnose_local()
+        missing = [n for n, ok in diag["triggers"].items() if not ok]
+        if missing:
+            pytest.fail(
+                f"Local checkout is missing pull_request_review trigger in: {missing}. "
+                f"This branch cannot produce PRs that auto-fire."
+            )
+
+    @pytest.mark.skipif(not GITEA_TOKEN, reason="GITEA_TOKEN not set")
+    @pytest.mark.skipif(not PR_NUMBER, reason="PR_NUMBER not set")
+    def test_pr_head_has_pull_request_review_trigger(self):
+        """When PR_NUMBER is given, assert the PR head contains the trigger."""
+        diag = _diagnose_pr(int(PR_NUMBER))
+        if not diag["auto_fire_possible"]:
+            missing = [n for n, ok in diag["triggers"].items() if not ok]
+            pytest.fail(
+                f"Auto-fire impossible for PR #{diag['pr_number']}. "
+                f"Head ref={diag['head_ref']} sha={diag['head_sha']}. "
+                f"Missing trigger in: {missing}. "
+                f"This PR needs /qa-recheck + /security-recheck fallback, or a rebase onto current main."
+            )
@@ -486,3 +486,88 @@ def test_scoped_rollout_dry_run_does_not_assert_coverage():
        sleep=lambda _s: None,
    )
    assert aggregate["ok"] is True
+
+
+# --- Superseded-deploy guard (false-stale fix) -----------------------------
+#
+# Scenario this fixes: no `concurrency:` on the prod-deploy workflow means two
+# close main pushes run BOTH deploy-production jobs. eb31bcf (Fix A) and 286338
+# (Fix C) merge back-to-back; the 286338 job rolls the fleet to staging-2863380
+# first; the OLDER eb31bcf job's strict verify then sees tenants on 2863380 and
+# false-reds "stale" though the fleet is AHEAD. superseded_by detects that main's
+# head is no longer eb31bcf and lets the older job succeed without weakening the
+# behind-tenant signal for whichever job IS the latest.
+
+
+def test_superseded_by_returns_newer_head_when_main_moved_ahead(monkeypatch):
+    # eb31bcf job: main head is now 2863380 -> superseded, return the newer head.
+    monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
+    newer = prod.superseded_by({"GITHUB_SHA": "eb31bcffullhash"})
+    assert newer == "2863380fullhash"
+
+
+def test_superseded_by_none_when_this_job_is_still_head(monkeypatch):
+    # 2863380 job (the latest): head == our SHA -> NOT superseded -> strict verify
+    # runs, so a genuinely-behind tenant still fails loudly.
+    monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
+    assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
+
+
+def test_superseded_by_matches_on_short_vs_full_sha_prefix(monkeypatch):
+    # GITHUB_SHA is full; Gitea may return a different-length id. Equal prefixes
+    # must NOT count as superseded (avoid false-skipping the real latest job).
+    monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380")
+    assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
+    monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380FULLHASH")
+    assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
+
+
+def test_superseded_by_fail_safe_returns_none_when_head_unreadable(monkeypatch):
+    # Fail-safe: unreadable head (no token / API error) must NOT be treated as
+    # superseded, so the strict verify still runs and never silently greens.
+    monkeypatch.setattr(prod, "current_branch_head", lambda _env: None)
+    assert prod.superseded_by({"GITHUB_SHA": "eb31bcffullhash"}) is None
+
+
+def test_superseded_by_none_without_github_sha(monkeypatch):
+    monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
+    assert prod.superseded_by({}) is None
+
+
+def test_current_branch_head_parses_gitea_branch_commit_id(monkeypatch):
+    captured = {}
+
+    def fake_optional(url, _token):
+        captured["url"] = url
+        return 200, {"name": "main", "commit": {"id": "2863380fullhash"}}
+
+    monkeypatch.setattr(prod, "_api_json_optional", fake_optional)
+    head = prod.current_branch_head(
+        {"GITEA_TOKEN": "secret", "GITHUB_REPOSITORY": "molecule-ai/molecule-core"}
+    )
+    assert head == "2863380fullhash"
+    assert captured["url"].endswith("/repos/molecule-ai/molecule-core/branches/main")
+
+
+def test_current_branch_head_uses_ref_name_branch(monkeypatch):
+    captured = {}
+
+    def fake_optional(url, _token):
+        captured["url"] = url
+        return 200, {"commit": {"sha": "deadbeef"}}
+
+    monkeypatch.setattr(prod, "_api_json_optional", fake_optional)
+    head = prod.current_branch_head(
+        {"GITEA_TOKEN": "secret", "GITHUB_REF_NAME": "release"}
+    )
+    assert head == "deadbeef"
+    assert captured["url"].endswith("/branches/release")
+
+
+def test_current_branch_head_none_without_token():
+    assert prod.current_branch_head({}) is None
+
+
+def test_current_branch_head_none_on_non_200(monkeypatch):
+    monkeypatch.setattr(prod, "_api_json_optional", lambda _u, _t: (500, None))
+    assert prod.current_branch_head({"GITEA_TOKEN": "secret"}) is None
@@ -123,8 +123,9 @@ jobs:
    # integration). See internal#512 for the class defect.
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    outputs:
      api: ${{ steps.decide.outputs.api }}
    steps:
@@ -160,8 +161,9 @@ jobs:
    # detect-changes for the full rationale.
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    timeout-minutes: 15
    env:
      # Unique per-run container names so concurrent runs on the host-
@@ -328,16 +330,40 @@ jobs:
      - name: Wait for /health
        if: needs.detect-changes.outputs.api == 'true'
        run: |
-          for i in $(seq 1 30); do
+          # Readiness signal: the platform binds /health only AFTER the full
+          # migration chain has been applied on cold start (it prints
+          # "Platform starting on :PORT" at that point). So a 200 from /health
+          # is the real "migrations done + server listening" signal.
+          #
+          # The migration chain grows every release, so a fixed ~30s budget is
+          # brittle by construction (it WILL be exceeded as migrations accrue).
+          # Use a generous wall-clock budget that comfortably exceeds
+          # cold-start + full-migration time, polling fast. This is robust to a
+          # growing chain WITHOUT masking a genuinely dead platform: if the
+          # background platform-server process has exited (e.g. a broken
+          # migration crashed it), we stop and fail loudly at once instead of
+          # waiting out the whole budget.
+          DEADLINE_SECS=180          # cold-start + full migration chain headroom
+          PLATFORM_PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"
+          start=$(date +%s)
+          while :; do
            if curl -sf "$BASE/health" > /dev/null; then
-              echo "Platform up after ${i}s"
+              echo "Platform healthy after $(( $(date +%s) - start ))s"
              exit 0
            fi
+            # Fast-fail: if the platform process died, /health will never come.
+            if [ -n "$PLATFORM_PID" ] && ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
+              echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
+              cat workspace-server/platform.log || true
+              exit 1
+            fi
+            if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
+              echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
+              cat workspace-server/platform.log || true
+              exit 1
+            fi
            sleep 1
          done
-          echo "::error::Platform did not become healthy in 30s"
-          cat workspace-server/platform.log || true
-          exit 1
      - name: Assert migrations applied
        if: needs.detect-changes.outputs.api == 'true'
        run: |
@@ -242,16 +242,36 @@ jobs:
      - name: Wait for /health
        if: needs.detect-changes.outputs.chat == 'true'
        run: |
-          for i in $(seq 1 30); do
+          # Readiness signal: the platform binds /health only AFTER the full
+          # migration chain has been applied on cold start (it prints
+          # "Platform starting on :PORT" at that point). So a 200 from /health
+          # is the real "migrations done + server listening" signal.
+          #
+          # The migration chain grows every release, so a fixed ~30s budget is
+          # brittle by construction. Use a generous wall-clock budget that
+          # comfortably exceeds cold-start + full-migration time, polling fast.
+          # Robust to a growing chain WITHOUT masking a dead platform: if the
+          # background platform-server process has exited, fail loudly at once.
+          DEADLINE_SECS=180          # cold-start + full migration chain headroom
+          PLATFORM_PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"
+          start=$(date +%s)
+          while :; do
            if curl -sf "http://127.0.0.1:${PLATFORM_PORT}/health" > /dev/null; then
-              echo "Platform up after ${i}s"
+              echo "Platform healthy after $(( $(date +%s) - start ))s"
              exit 0
            fi
+            if [ -n "$PLATFORM_PID" ] && ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
+              echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
+              cat workspace-server/platform.log || true
+              exit 1
+            fi
+            if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
+              echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
+              cat workspace-server/platform.log || true
+              exit 1
+            fi
            sleep 1
          done
-          echo "::error::Platform did not become healthy in 30s"
-          cat workspace-server/platform.log || true
-          exit 1

      - name: Install canvas dependencies
        if: needs.detect-changes.outputs.chat == 'true'
@@ -278,16 +298,38 @@ jobs:
          export NEXT_PUBLIC_WS_URL="ws://127.0.0.1:${PLATFORM_PORT}/ws"
          npx next dev --turbopack -p "${CANVAS_PORT}" > canvas.log 2>&1 &
          echo $! > canvas.pid
-          for i in $(seq 1 30); do
-            if curl -sf "http://localhost:${CANVAS_PORT}" > /dev/null 2>&1; then
-              echo "Canvas up after ${i}s"
-              exit 0
+          # Readiness must wait for the actual chat route to *compile*, not
+          # just for the dev server to bind the port. `next dev --turbopack`
+          # accepts the TCP connection well before it has compiled a route
+          # on first request, so a bare `curl /` can 200 (or hang) while the
+          # page the tests load is still building. We therefore probe the
+          # real route the specs navigate to (`/?m=chat`) and require a 2xx,
+          # which only happens once Turbopack has finished the first
+          # compile. The previous 30s budget was also too tight for a cold
+          # Turbopack first-compile on a loaded operator-host runner — the
+          # `Canvas did not start in 30s` flake. Raise to 120s (job
+          # timeout-minutes is 15, so this is comfortably bounded) and probe
+          # every 2s.
+          READY=""
+          for i in $(seq 1 60); do
+            # Tempfile-routed -w + set +e/-e prevents curl-exit-code
+            # pollution of the captured status (lint-curl-status-capture.yml).
+            set +e
+            curl -s -o /dev/null -w '%{http_code}' "http://localhost:${CANVAS_PORT}/?m=chat" > /tmp/canvas-ready.code
+            set -e
+            CODE=$(cat /tmp/canvas-ready.code 2>/dev/null || echo "000")
+            if [ "$CODE" -ge 200 ] && [ "$CODE" -lt 400 ]; then
+              echo "Canvas (chat route compiled) up after ~$((i*2))s (HTTP ${CODE})"
+              READY=1
+              break
            fi
-            sleep 1
+            sleep 2
          done
-          echo "::error::Canvas did not start in 30s"
-          cat canvas.log || true
-          exit 1
+          if [ -z "$READY" ]; then
+            echo "::error::Canvas chat route did not compile in 120s (last HTTP ${CODE})"
+            cat canvas.log || true
+            exit 1
+          fi

      - name: Run Playwright E2E tests
        if: needs.detect-changes.outputs.chat == 'true'
@@ -130,13 +130,37 @@ jobs:
        run: |
          set -euo pipefail
          ./workspace-server/platform-server > workspace-server/platform.log 2>&1 &
-          echo $! > workspace-server/platform.pid
-          for i in $(seq 1 30); do
-            curl -sf "$BASE/health" >/dev/null && exit 0
+          PLATFORM_PID=$!
+          echo "$PLATFORM_PID" > workspace-server/platform.pid
+          # Readiness signal: the platform binds /health only AFTER the full
+          # migration chain has been applied on cold start (it prints
+          # "Platform starting on :PORT" at that point). So a 200 from /health
+          # is the real "migrations done + server listening" signal.
+          #
+          # The migration chain grows every release, so a fixed ~30s budget is
+          # brittle by construction. Use a generous wall-clock budget that
+          # comfortably exceeds cold-start + full-migration time, polling fast.
+          # Robust to a growing chain WITHOUT masking a dead platform: if the
+          # background platform-server process has exited, fail loudly at once.
+          DEADLINE_SECS=180          # cold-start + full migration chain headroom
+          start=$(date +%s)
+          while :; do
+            if curl -sf "$BASE/health" >/dev/null; then
+              echo "Platform healthy after $(( $(date +%s) - start ))s"
+              exit 0
+            fi
+            if ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
+              echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
+              cat workspace-server/platform.log || true
+              exit 1
+            fi
+            if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
+              echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
+              cat workspace-server/platform.log || true
+              exit 1
+            fi
            sleep 1
          done
-          cat workspace-server/platform.log || true
-          exit 1

      - name: Run comprehensive E2E
        run: bash tests/e2e/test_comprehensive_e2e.sh
@@ -267,12 +267,36 @@ jobs:
          echo $! > platform.pid
      - name: Wait for /health
        run: |
-          for i in $(seq 1 30); do
-            curl -sf "$BASE/health" > /dev/null && { echo "Platform up after ${i}s"; exit 0; }
+          # Readiness signal: the platform binds /health only AFTER the full
+          # migration chain has been applied on cold start (it prints
+          # "Platform starting on :PORT" at that point). So a 200 from /health
+          # is the real "migrations done + server listening" signal.
+          #
+          # The migration chain grows every release, so a fixed ~30s budget is
+          # brittle by construction. Use a generous wall-clock budget that
+          # comfortably exceeds cold-start + full-migration time, polling fast.
+          # Robust to a growing chain WITHOUT masking a dead platform: if the
+          # background platform-server process has exited, fail loudly at once.
+          DEADLINE_SECS=180          # cold-start + full migration chain headroom
+          PLATFORM_PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"
+          start=$(date +%s)
+          while :; do
+            if curl -sf "$BASE/health" > /dev/null; then
+              echo "Platform healthy after $(( $(date +%s) - start ))s"
+              exit 0
+            fi
+            if [ -n "$PLATFORM_PID" ] && ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
+              echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
+              cat workspace-server/platform.log || true
+              exit 1
+            fi
+            if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
+              echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
+              cat workspace-server/platform.log || true
+              exit 1
+            fi
            sleep 1
          done
-          echo "::error::Platform did not become healthy in 30s"
-          cat workspace-server/platform.log || true; exit 1
      - name: Run LOCAL fresh-provision peer-visibility E2E (literal MCP list_peers)
        # HONEST gate — NO continue-on-error. The local backend uses
        # external-mode workspaces so this context tests the literal MCP
@@ -48,8 +48,10 @@ on:
      - 'workspace-server/internal/handlers/a2a_proxy.go'
      - 'workspace-server/internal/middleware/**'
      - 'workspace-server/internal/provisioner/**'
+      - 'workspace-server/internal/providers/providers.yaml'
      - 'tests/e2e/test_staging_full_saas.sh'
      - 'tests/e2e/lib/completion_assert.sh'
+      - 'tests/e2e/lib/model_slug.sh'
      - 'tests/e2e/lib/aws_leak_check.sh'
      - 'tests/e2e/test_aws_leak_check.sh'
      - '.gitea/workflows/e2e-staging-saas.yml'
@@ -61,8 +63,10 @@ on:
      - 'workspace-server/internal/handlers/a2a_proxy.go'
      - 'workspace-server/internal/middleware/**'
      - 'workspace-server/internal/provisioner/**'
+      - 'workspace-server/internal/providers/providers.yaml'
      - 'tests/e2e/test_staging_full_saas.sh'
      - 'tests/e2e/lib/completion_assert.sh'
+      - 'tests/e2e/lib/model_slug.sh'
      - 'tests/e2e/lib/aws_leak_check.sh'
      - 'tests/e2e/test_aws_leak_check.sh'
      - '.gitea/workflows/e2e-staging-saas.yml'
@@ -315,3 +319,148 @@ jobs:
            echo "::warning::saas teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
          fi
          exit 0
+
+  # ── PLATFORM-MANAGED BOOT REGRESSION (moonshot/kimi NOT_CONFIGURED) ──────────
+  #
+  # The REAL-boot complement to the deterministic unit suite
+  # (workspace_provision_platform_boot_test.go). Provisions a REAL staging
+  # claude-code workspace on the PLATFORM-managed path — provider=platform,
+  # model=moonshot/kimi-k2.6, NO tenant LLM key — and asserts it reaches
+  # status=online (NOT not_configured) and a completion returns 200, via the same
+  # online-wait + completion-assert the BYOK job uses.
+  #
+  # Why a SEPARATE job (not a matrix leg of e2e-staging-saas): the platform path
+  # injects NO secret and pins a different model, so its env block diverges from
+  # the BYOK job's. A dedicated job keeps each path's "verify key present" preflight
+  # honest (BYOK requires a key; platform requires its ABSENCE not to matter) and
+  # gives the regression its own named commit-status for branch protection.
+  #
+  # Add `E2E Staging Platform Boot` to branch protection after 3 consecutive
+  # green runs on main (de-flake window; this path shares the cp#245
+  # boot-timeout flake surface the BYOK job has, so it must prove stable before
+  # it can BLOCK — see the gate-making plan in the PR body).
+  # bp-required: pending #2187
+  e2e-staging-platform-boot:
+    name: E2E Staging Platform Boot
+    runs-on: ubuntu-latest
+    # Phase 3 (RFC #219 §1): surface without blocking until the de-flake window
+    # closes. mc#1982: do NOT renew this mask silently — the gate-making plan
+    # tracks the flip to false under #2187.
+    continue-on-error: true
+    timeout-minutes: 45
+    permissions:
+      contents: read
+
+    env:
+      MOLECULE_CP_URL: https://staging-api.moleculesai.app
+      MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+      AWS_DEFAULT_REGION: us-east-2
+      E2E_AWS_LEAK_CHECK: required
+      E2E_AWS_TERMINATE_LEAKS: '1'
+      # The regression combo: claude-code + platform-managed + moonshot/kimi-k2.6.
+      # NO E2E_*_API_KEY is set — platform-managed billing is owned by Molecule via
+      # the CP LLM proxy. The harness's E2E_LLM_PATH=platform branch sends empty
+      # secrets and pin-selects the platform model.
+      E2E_RUNTIME: claude-code
+      E2E_LLM_PATH: platform
+      # Smoke mode: a single parent workspace is enough to prove online +
+      # completion for the platform path (the A2A/delegation matrix is the BYOK
+      # job's job). Override E2E_DEFAULT_PLATFORM_MODEL via workflow_dispatch to
+      # exercise another platform model id.
+      E2E_MODE: smoke
+      E2E_RUN_ID: "platform-${{ github.run_id }}-${{ github.run_attempt }}"
+      E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Verify admin token present
+        run: |
+          if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
+            echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
+            exit 2
+          fi
+          for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
+            if [ -z "${!var:-}" ]; then
+              echo "::error::$var not set — EC2 leak verification cannot run"
+              exit 2
+            fi
+          done
+          echo "Admin token present ✓"
+
+      - name: Assert NO BYOK key leaks into the platform run
+        run: |
+          # The whole point of this job is the platform-managed path. A stray
+          # E2E_*_API_KEY in the runner env would (via the harness) still be
+          # skipped by the E2E_LLM_PATH=platform branch — but assert their
+          # absence loudly here so a future env edit can't silently convert this
+          # into a masked BYOK run that no longer exercises the regression.
+          for var in E2E_MINIMAX_API_KEY E2E_ANTHROPIC_API_KEY E2E_OPENAI_API_KEY; do
+            if [ -n "${!var:-}" ]; then
+              echo "::warning::$var is set in this platform-boot job's env — the harness ignores it on E2E_LLM_PATH=platform, but it should not be wired here."
+            fi
+          done
+          echo "Platform-managed path: no tenant LLM key required ✓"
+
+      - name: CP staging health preflight
+        run: |
+          code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
+          if [ "$code" != "200" ]; then
+            echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a workspace bug."
+            exit 1
+          fi
+          echo "Staging CP healthy ✓"
+
+      - name: Run platform-managed boot E2E (online + completion)
+        id: e2e
+        run: bash tests/e2e/test_staging_full_saas.sh
+
+      - name: Teardown safety net (runs on cancel/failure)
+        if: always()
+        env:
+          ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
+        run: |
+          set +e
+          orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
+            -H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
+            | python3 -c "
+          import json, sys, os, datetime
+          run_id = os.environ.get('GITHUB_RUN_ID', '')
+          d = json.load(sys.stdin)
+          today = datetime.date.today()
+          yesterday = today - datetime.timedelta(days=1)
+          dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
+          # smoke mode slugs are e2e-smoke-YYYYMMDD-platform-<run_id>-...
+          if run_id:
+              prefixes = tuple(f'e2e-smoke-{d}-platform-{run_id}-' for d in dates)
+          else:
+              prefixes = tuple(f'e2e-smoke-{d}-platform-' for d in dates)
+          candidates = [o['slug'] for o in d.get('orgs', [])
+                        if any(o.get('slug','').startswith(p) for p in prefixes)
+                        and o.get('instance_status') not in ('purged',)]
+          print('\n'.join(candidates))
+          " 2>/dev/null)
+          leaks=()
+          for slug in $orgs; do
+            echo "Safety-net teardown: $slug"
+            set +e
+            curl -sS -o /tmp/plat-cleanup.out -w "%{http_code}" \
+              -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
+              -H "Authorization: Bearer $ADMIN_TOKEN" \
+              -H "Content-Type: application/json" \
+              -d "{\"confirm\":\"$slug\"}" >/tmp/plat-cleanup.code
+            set -e
+            code=$(cat /tmp/plat-cleanup.code 2>/dev/null || echo "000")
+            if [ "$code" = "200" ] || [ "$code" = "204" ]; then
+              echo "[teardown] deleted $slug (HTTP $code)"
+            else
+              echo "::warning::platform-boot teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/plat-cleanup.out 2>/dev/null)"
+              leaks+=("$slug")
+            fi
+          done
+          if [ ${#leaks[@]} -gt 0 ]; then
+            echo "::warning::platform-boot teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
+          fi
+          exit 0
@@ -88,8 +88,9 @@ jobs:
    # surprises and keeps the routing rule discoverable in one place.
    runs-on: docker-host
    # mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    outputs:
      handlers: ${{ steps.filter.outputs.handlers }}
    steps:
@@ -119,8 +120,9 @@ jobs:
    # exists). See detect-changes for the full routing rationale.
    runs-on: docker-host
    # mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    env:
      # Unique name per run so concurrent jobs don't collide on the
      # bridge network. ${RUN_ID}-${RUN_ATTEMPT} is unique even across
@@ -241,7 +243,8 @@ jobs:
          # MUST exist for the integration tests to be meaningful. Hard-
          # fail if any didn't land — that would be a real regression we
          # want loud.
-          for tbl in delegations workspaces activity_logs pending_uploads; do
+          # workspace_schedules added for the #2149 scheduler integration tests.
+          for tbl in delegations workspaces activity_logs pending_uploads workspace_schedules; do
            if ! psql -h "${PG_HOST}" -U postgres -d molecule -tA \
                -c "SELECT 1 FROM information_schema.tables WHERE table_name = '$tbl'" \
                | grep -q 1; then
@@ -251,6 +254,19 @@ jobs:
            echo "✓ $tbl table present"
          done

+      - if: needs.detect-changes.outputs.handlers == 'true'
+        name: Preflight — INTEGRATION_DB_URL must be present
+        run: |
+          # Belt-and-suspenders: if the postgres-start step failed to
+          # export INTEGRATION_DB_URL, fail loud BEFORE go test can
+          # t.Skip its way to a green build. Closes the workflow-level
+          # fail-open gap identified in PR #2166 blocker #2.
+          if [ -z "${INTEGRATION_DB_URL:-}" ]; then
+            echo "::error::INTEGRATION_DB_URL is empty — postgres-start step did not export the connection string"
+            exit 1
+          fi
+          echo "INTEGRATION_DB_URL is set"
+
      - if: needs.detect-changes.outputs.handlers == 'true'
        name: Run integration tests
        run: |
@@ -259,6 +275,16 @@ jobs:
          # workflow runs don't fight over a host-net 5432 port.
          go test -tags=integration -timeout 5m -v ./internal/handlers/ -run "^TestIntegration_"

+      - if: needs.detect-changes.outputs.handlers == 'true'
+        name: Run scheduler integration tests (#2149)
+        run: |
+          # #2149: real-PG regression coverage for the scheduler firing loop
+          # (tick → A2A fire → write-back of last_run_at/next_run_at/run_count/
+          # activity_logs jsonb incl. invalid-UTF-8 sanitization + sweepPhantomBusy).
+          # Reuses the same migrated Postgres (workspace_schedules / activity_logs
+          # / workspaces all landed by the migration replay step above).
+          go test -tags=integration -timeout 5m -v ./internal/scheduler/ -run "^TestIntegration_"
+
      - if: failure() && needs.detect-changes.outputs.handlers == 'true'
        name: Diagnostic dump on failure
        env:
@@ -49,37 +49,56 @@ jobs:
      GITHUB_SERVER_URL: https://git.moleculesai.app
    steps:
      - name: Identify runner
+        id: identify
+        continue-on-error: true
        run: |
          set -eu
          echo "arch=$(uname -m)"
          echo "kernel=$(uname -sr)"
          echo "shell=$BASH_VERSION"
          # Sanity: must actually be arm64. If amd64 sneaks in here,
-          # fail fast — that means the label routing is wrong.
+          # the job skips gracefully rather than hard-failing, because
+          # a mislabelled runner is an ops concern, not a code defect.
+          # Pilot lane must not make main red (#2146).
          case "$(uname -m)" in
-            aarch64|arm64) echo "arm64 confirmed" ;;
-            *) echo "ERROR: expected arm64, got $(uname -m)"; exit 1 ;;
+            aarch64|arm64)
+              echo "arm64 confirmed"
+              echo "arm64=true" >> "$GITHUB_OUTPUT"
+              ;;
+            *)
+              echo "ERROR: expected arm64, got $(uname -m) — label routing may be wrong"
+              echo "arm64=false" >> "$GITHUB_OUTPUT"
+              exit 1
+              ;;
          esac

      - name: Checkout
+        if: steps.identify.outputs.arm64 == 'true'
        uses: actions/checkout@v4
        with:
          fetch-depth: 1

      - name: Install shellcheck (arm64)
+        if: steps.identify.outputs.arm64 == 'true'
        continue-on-error: true
        run: |
          set -eu
          if command -v shellcheck >/dev/null 2>&1; then
            echo "shellcheck already present: $(shellcheck --version | head -1)"
          else
-            # Prefer apt if the runner base ships it; else download arm64 binary.
+            # Prefer apt if the runner base ships it; else download the
+            # correct platform binary (darwin vs linux).
            if command -v apt-get >/dev/null 2>&1; then
              sudo apt-get update -qq
              sudo apt-get install -y --no-install-recommends shellcheck
            else
              SC_VER=v0.10.0
-              curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/shellcheck-${SC_VER}.linux.aarch64.tar.xz" \
+              if [ "$(uname -s)" = "Darwin" ]; then
+                SC_PKG="shellcheck-${SC_VER}.darwin.aarch64.tar.xz"
+              else
+                SC_PKG="shellcheck-${SC_VER}.linux.aarch64.tar.xz"
+              fi
+              curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/${SC_PKG}" \
                | tar -xJf - --strip-components=1
              sudo mv shellcheck /usr/local/bin/
            fi
@@ -87,14 +106,15 @@ jobs:
          shellcheck --version | head -2

      - name: Run shellcheck on .gitea/scripts/*.sh
+        if: steps.identify.outputs.arm64 == 'true'
        continue-on-error: true
        run: |
          set -eu
          # Only the scripts we control under .gitea/scripts. Pilot
          # scope is intentionally narrow — broaden in a follow-up
          # once the lane is proven.
-          if ! command -v shellcheck >/dev/null 2>&1; then
-            echo "WARN: shellcheck binary not found — skipping (pilot mode)"
+          if ! command -v shellcheck >/dev/null 2>&1 || ! shellcheck --version >/dev/null 2>&1; then
+            echo "WARN: shellcheck not functional — skipping (pilot mode)"
            exit 0
          fi
          # NOTE: macOS ships Bash 3.2 (Apple license), no `mapfile`
@@ -16,14 +16,24 @@ name: publish-workspace-server-image
 #
 # Image tags produced:
 #   :staging-<sha> — per-commit digest, stable for canary verify
-#   :staging-latest — tracks most recent build on this branch
+#   :staging-latest — tracks most recent BUILD on this branch (set by the
+#                     build job, last-writer-wins, NOT prod-gated)
+#   :latest — tracks the most recent PROD-PROMOTED build. Re-pointed by the
+#             deploy-production job ONLY after green main CI + canary +
+#             fleet rollout + /buildinfo verification pass. So :latest ==
+#             "current prod image", never the raw build. (Added 2026-06-03
+#             after a stale :latest — last moved 2026-05-10 — reverted a
+#             production tenant on a no-arg redeploy.)
 #
 # Production auto-deploy:
 #   After both platform and tenant images are pushed, deploy-production waits
 #   for strict required push contexts on the same SHA to go green, then
 #   calls the production CP redeploy-fleet endpoint with target_tag=
-#   staging-<sha>. Set repo variable or secret PROD_AUTO_DEPLOY_DISABLED=true
-#   to stop production rollout while keeping image publishing enabled.
+#   staging-<sha>. On success (rollout + buildinfo verified) it re-points
+#   :latest to the same SHA. Set repo variable or secret
+#   PROD_AUTO_DEPLOY_DISABLED=true to stop production rollout while keeping
+#   image publishing enabled — in which case :latest is NOT advanced either
+#   (correct: an unpromoted build must not become :latest).
 #
 # Primary ECR target: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/*
 # Optional staging tenant mirror target:
@@ -253,6 +263,19 @@ jobs:
      PROD_AUTO_DEPLOY_DRY_RUN: ${{ vars.PROD_AUTO_DEPLOY_DRY_RUN || '' }}
      PROD_ALLOW_NON_PROD_CP_URL: ${{ vars.PROD_ALLOW_NON_PROD_CP_URL || '' }}
    steps:
+      # The publish runner's default HOME (/home/hongming) is not writable, so
+      # git/docker credential saves fail (`Error saving credentials: mkdir
+      # /home/hongming: permission denied`) and halt the production rollout
+      # (#2193). Point HOME + DOCKER_CONFIG at the writable job temp dir —
+      # mirrors build-and-push's "Prepare writable Docker config" fix above.
+      - name: Prepare writable HOME + Docker config
+        run: |
+          set -euo pipefail
+          H="$RUNNER_TEMP/auto-deploy-home"
+          mkdir -p "$H/.docker"
+          echo "HOME=$H" >> "$GITHUB_ENV"
+          echo "DOCKER_CONFIG=$H/.docker" >> "$GITHUB_ENV"
+
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

@@ -363,6 +386,36 @@ jobs:
        run: |
          set -euo pipefail
          RESP="$RUNNER_TEMP/prod-redeploy-response.json"
+
+          # Superseded-job guard. This workflow has no `concurrency:` (header
+          # explains why: Gitea 1.22.6 cancels queued prod deploys). So two
+          # close main pushes run BOTH deploy-production jobs. The newer one
+          # rolls the fleet to its (newer) build first; this older job's strict
+          # equality check below would then see tenants on the NEWER SHA and
+          # false-red "$slug is stale" even though the fleet is AHEAD, not
+          # behind (git SHAs aren't ordered; /buildinfo exposes only git_sha).
+          #
+          # If main's current head is no longer THIS job's SHA, a newer commit
+          # has landed and this deploy is superseded — the newest job's verify
+          # is authoritative. Skip strict verify and succeed. exit 0 => newer
+          # head printed (superseded); exit 10 => still the latest, proceed to
+          # the strict verify so a genuinely-behind tenant still fails loudly.
+          set +e
+          NEWER_HEAD="$(python3 .gitea/scripts/prod-auto-deploy.py check-superseded)"
+          SUPERSEDED_EXIT=$?
+          set -e
+          if [ "$SUPERSEDED_EXIT" -eq 0 ] && [ -n "$NEWER_HEAD" ]; then
+            echo "::notice::Superseded deploy: main head is now ${NEWER_HEAD:0:7} (this job deployed ${GITHUB_SHA:0:7}). The fleet is at or ahead of this build; the newer deploy job's verify is authoritative. Skipping strict SHA verify."
+            {
+              echo ""
+              echo "### Buildinfo verification skipped — superseded deploy"
+              echo ""
+              echo "This deploy job's SHA \`${GITHUB_SHA:0:7}\` is no longer the head of \`main\` (now \`${NEWER_HEAD:0:7}\`)."
+              echo "A newer deploy job is rolling the fleet forward; its verify is authoritative."
+            } >> "$GITHUB_STEP_SUMMARY"
+            exit 0
+          fi
+
          mapfile -t SLUGS < <(jq -r '.results[]? | .slug' "$RESP")
          if [ ${#SLUGS[@]} -eq 0 ]; then
            echo "::error::No tenants returned from redeploy-fleet; refusing to mark production deploy verified."
@@ -409,3 +462,65 @@ jobs:
          if [ "$STALE_COUNT" -gt 0 ] || [ "$UNHEALTHY_COUNT" -gt 0 ] || [ "$UNREACHABLE_COUNT" -gt 0 ]; then
            exit 1
          fi
+
+      # Re-point :latest to the just-promoted image — ONLY after the
+      # production rollout + buildinfo verification above have passed.
+      #
+      # WHY HERE (promote point), not at build time:
+      #   The platform-tenant ECR `:latest` tag was last moved 2026-05-10
+      #   and went 3.5 weeks stale because the build step only pushes
+      #   :staging-<sha> + :staging-latest and never re-points :latest. A
+      #   no-arg POST /cp/admin/tenants/:slug/redeploy (whose default tag
+      #   fell through to "latest") then pulled the 3.5-week-old image and
+      #   REVERTED the tenant (incident: molecule-adk-demo, 2026-06-03).
+      #
+      #   The defense-in-depth half of this fix changes that redeploy
+      #   default to :staging-latest, but :latest itself must also be
+      #   kept meaningful. We make :latest track the PROD-BLESSED build,
+      #   not the raw build: by living at the end of deploy-production —
+      #   after `wait-ci` (green main CI), the canary-first batched fleet
+      #   rollout, AND the /buildinfo SHA verification — :latest only ever
+      #   advances to a SHA that is actually green and confirmed running
+      #   across the live fleet. So `:latest` == "current prod image",
+      #   and any consumer that pulls :latest (legacy callers, manual
+      #   `docker pull`, a redeploy that somehow still resolves "latest")
+      #   gets the blessed image instead of whatever happened to build.
+      #
+      #   Re-tag is digest-level (imagetools create), so no rebuild and
+      #   :latest is byte-identical to :staging-<sha> for this commit.
+      - name: Promote :latest to the verified prod image
+        if: ${{ steps.plan.outputs.enabled == 'true' }}
+        env:
+          TENANT_IMAGE_NAME: ${{ env.TENANT_IMAGE_NAME }}
+          STAGING_TENANT_IMAGE_NAME: ${{ env.STAGING_TENANT_IMAGE_NAME }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          AWS_DEFAULT_REGION: us-east-2
+        run: |
+          set -euo pipefail
+          SHA_TAG="staging-${GITHUB_SHA::7}"
+          PROD_ECR_REGISTRY="${TENANT_IMAGE_NAME%%/*}"
+          STAGING_ECR_REGISTRY="${STAGING_TENANT_IMAGE_NAME%%/*}"
+
+          aws ecr get-login-password --region us-east-2 | \
+            docker login --username AWS --password-stdin "${PROD_ECR_REGISTRY}"
+          aws ecr get-login-password --region us-east-2 | \
+            docker login --username AWS --password-stdin "${STAGING_ECR_REGISTRY}"
+
+          # imagetools create copies the source manifest to the new tag by
+          # digest (no pull/rebuild). :latest now points at the exact image
+          # that just passed the prod gate.
+          docker buildx imagetools create \
+            --tag "${TENANT_IMAGE_NAME}:latest" \
+            "${TENANT_IMAGE_NAME}:${SHA_TAG}"
+          docker buildx imagetools create \
+            --tag "${STAGING_TENANT_IMAGE_NAME}:latest" \
+            "${STAGING_TENANT_IMAGE_NAME}:${SHA_TAG}"
+
+          {
+            echo ""
+            echo "### :latest promoted"
+            echo ""
+            echo "Re-pointed \`platform-tenant:latest\` → \`${SHA_TAG}\` (prod + staging ECR)."
+            echo ":latest now tracks the prod-blessed, fleet-verified image."
+          } >> "$GITHUB_STEP_SUMMARY"
@@ -60,11 +60,26 @@ test.describe("MobileChat", () => {

    await expect(page.getByText("Echo: Mobile persistence")).toBeVisible({ timeout: 15_000 });

+    // Reload and deterministically wait for the chat-history GET that
+    // rehydrates the transcript to come back 2xx, rather than racing a
+    // fixed-timeout render assertion against an in-flight fetch. The
+    // server now persists the a2a_receive row SYNCHRONOUSLY before the
+    // send's 200 (workspace-server logA2ASuccess), so the row is
+    // guaranteed present by the time this GET runs — the wait is for
+    // hydration latency, not for a still-racing write.
+    const historyResponse = page.waitForResponse(
+      (resp) =>
+        resp.url().includes("/chat-history") &&
+        resp.request().method() === "GET" &&
+        resp.status() === 200,
+      { timeout: 15_000 },
+    );
    await page.reload();
    await page.waitForSelector("[data-testid='chat-panel']", { timeout: 10_000 });
+    await historyResponse;

-    await expect(page.getByText("Mobile persistence", { exact: true })).toBeVisible({ timeout: 5_000 });
-    await expect(page.getByText("Echo: Mobile persistence")).toBeVisible({ timeout: 5_000 });
+    await expect(page.getByText("Mobile persistence", { exact: true })).toBeVisible();
+    await expect(page.getByText("Echo: Mobile persistence")).toBeVisible();
  });

  test("composer auto-grows with multi-line text", async ({ page }) => {
@@ -250,7 +250,38 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
  const workspaceId = ws.body.id as string;
  console.log(`[staging-setup] Workspace created: ${workspaceId}`);

-  // 6. Wait for workspace online
+  // 6. Wait for workspace RENDERABLE.
+  //
+  // This harness exists to verify the canvas *tab UI* renders (staging-
+  // tabs.spec.ts: open each of the 13 workspace-panel tabs, assert no hard
+  // crash / no "Failed to load" toast). It does NOT exercise the agent —
+  // no LLM call is made, the spec even mocks /cp/auth/me and 401→200. All
+  // it needs is a workspace ROW that the canvas lists so the node renders
+  // and the side-panel tabs open. A fully-`online` agent is NOT required.
+  //
+  // That distinction became load-bearing on 2026-06-03: workspace-server
+  // #2162 (fix(provision): platform-managed workspace must fail-closed when
+  // CP proxy env absent) made a platform_managed workspace ABORT AT BOOT
+  // with MISSING_PLATFORM_PROXY when MOLECULE_LLM_BASE_URL /
+  // MOLECULE_LLM_USAGE_TOKEN are not present in the tenant's env. The
+  // canvas E2E creates a bare hermes/gpt-4o workspace, which defaults
+  // closed to platform_managed (workspace_provision.go:~1009), and the
+  // staging tenant does not carry the CP proxy env — so the agent never
+  // starts. Pre-#2162 this same workspace booted credential-less (the bug
+  // #2162 fixed) and the tabs rendered fine; #2162 is a correct production
+  // safety fix, but it surfaced here as `status:"failed", uptime_seconds:0,
+  // last_sample_error:null` — the pre-start credential-abort shape — and the
+  // old hard-throw turned a UI-irrelevant boot skip into a main-red
+  // (core#2199). The agent boot stage is simply not what this test gates.
+  //
+  // So: online is the happy path. A `failed` row that is the PRE-START
+  // credential-abort shape (the agent process never ran: uptime_seconds==0
+  // AND no last_sample_error) is treated as RENDERABLE — the row exists,
+  // the node + tabs render, proceed. We do NOT mask a real boot regression:
+  // any `failed` carrying a last_sample_error, OR a non-zero uptime (the
+  // agent started then crashed — image pull, panic, PYTHONPATH, etc.),
+  // still hard-throws. Genuine *infra* provision failure is already caught
+  // loud one step earlier at the org level (instance_status === "failed").
  await waitFor<boolean>(
    async () => {
      const r = await jsonFetch(`${tenantURL}/workspaces/${workspaceId}`, {
@@ -259,6 +290,24 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
      if (r.status !== 200) return null;
      if (r.body?.status === "online") return true;
      if (r.body?.status === "failed") {
+        const uptime = Number(r.body?.uptime_seconds ?? 0);
+        const sampleErr = r.body?.last_sample_error;
+        const preStartCredentialAbort = uptime === 0 && !sampleErr;
+        if (preStartCredentialAbort) {
+          // Agent never started (no LLM cred on this staging tenant — the
+          // expected #2162 platform-proxy gap). The workspace row still
+          // renders, which is all the tab-UI test needs. Proceed, but log
+          // loudly so a real "agent never booted because of something else"
+          // is not silently normalized.
+          console.warn(
+            `[staging-setup] workspace ${workspaceId} is 'failed' with the pre-start ` +
+              `credential-abort shape (uptime_seconds=0, no last_sample_error) — agent did ` +
+              `not boot (expected on staging without CP LLM proxy env, post workspace-server ` +
+              `#2162). The tab-UI test does not exercise the agent; proceeding with the ` +
+              `workspace row, which renders regardless. full body: ${JSON.stringify(r.body)}`,
+          );
+          return true;
+        }
        // last_sample_error is often empty when the failure happens before
        // the agent emits a sample (e.g. boot crash, image pull error,
        // missing PYTHONPATH, OpenAI quota at startup). Dumping the full
@@ -266,8 +315,8 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
        // needs without a second probe. Otherwise this propagates as a
        // bare "Workspace failed: " — the exact useless message that
        // sent #2632 to the issue tracker.
-        const detail = r.body.last_sample_error
-          ? r.body.last_sample_error
+        const detail = sampleErr
+          ? sampleErr
          : `(no last_sample_error) full body: ${JSON.stringify(r.body)}`;
        throw new Error(`Workspace failed: ${detail}`);
      }
@@ -277,7 +326,7 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
    10_000,
    "workspace online",
  );
-  console.log(`[staging-setup] Workspace online`);
+  console.log(`[staging-setup] Workspace renderable`);

  // 7. Hand state off to tests + teardown — overwrite the slug-only
  // bootstrap state with the full state spec tests need.
@@ -8,9 +8,12 @@ import { ExternalConnectModal, type ExternalConnectionInfo } from "./ExternalCon
 import {
  ProviderModelSelector,
  buildProviderCatalog,
+  buildProviderCatalogFromRegistry,
  findProviderForModel,
  type SelectorModel,
  type SelectorValue,
+  type RegistryProvider,
+  type RegistryModel,
 } from "./ProviderModelSelector";

 interface WorkspaceOption {
@@ -32,6 +35,16 @@ interface TemplateSpec {
  model?: string;
  models?: SelectorModel[];
  providers?: string[];
+  // internal#718 P3 registry-served fields (additive; absent on older
+  // backends and for non-registry runtimes). When registry_backed is true the
+  // provider→model catalog is built from registry_providers/registry_models so
+  // each model's DERIVED provider (e.g. moonshot/kimi-k2.6 → "platform") drives
+  // the dropdown bucket and the create payload's llm_provider — instead of the
+  // legacy inferVendor heuristic that slash-splits the id into "moonshot".
+  // Mirrors ConfigTab's RuntimeOption loader (RFC#340 Fix C).
+  registry_backed?: boolean;
+  registry_providers?: RegistryProvider[];
+  registry_models?: RegistryModel[];
 }

 const DEFAULT_RUNTIME = "claude-code";
@@ -168,15 +181,53 @@ export function CreateWorkspaceButton() {
    }),
    [runtime, templateSpecs],
  );
-  const llmModels = useMemo(
-    () => {
-      const sourceSpec = selectedTemplateSpec ?? selectedRuntimeTemplateSpec;
-      if (!sourceSpec?.models?.length) return [];
-      return sourceSpec.models;
-    },
+  // The /templates row backing the LLM picker: an explicitly-selected
+  // workspace template wins, else the base runtime template row.
+  const llmSourceSpec = useMemo<TemplateSpec | null>(
+    () => selectedTemplateSpec ?? selectedRuntimeTemplateSpec,
    [selectedRuntimeTemplateSpec, selectedTemplateSpec],
  );
-  const llmCatalog = useMemo(() => buildProviderCatalog(llmModels), [llmModels]);
+  // internal#718 P3 / RFC#340 Fix C: a runtime is registry-backed when the
+  // /templates row says so AND it served a non-empty registry_models set.
+  // Mirrors ConfigTab's `registryBacked` derivation exactly.
+  const registryBacked = useMemo(
+    () =>
+      llmSourceSpec?.registry_backed === true &&
+      (llmSourceSpec.registry_models?.length ?? 0) > 0,
+    [llmSourceSpec],
+  );
+  // Models fed to the selector dropdown. For a registry-backed runtime use the
+  // registry-served native set, carrying each model's DERIVED provider so the
+  // selector buckets it correctly (moonshot/kimi-k2.6 → "platform", not the
+  // inferVendor "moonshot"). Otherwise fall back to the template-served
+  // models[] + the legacy heuristic — same fallback ConfigTab keeps.
+  const llmModels = useMemo<SelectorModel[]>(
+    () => {
+      if (registryBacked) {
+        return (llmSourceSpec?.registry_models ?? []).map((m) => ({
+          id: m.id,
+          name: m.name,
+          ...(m.provider ? { provider: m.provider } : {}),
+        }));
+      }
+      return llmSourceSpec?.models?.length ? llmSourceSpec.models : [];
+    },
+    [registryBacked, llmSourceSpec],
+  );
+  // Registry-backed path: build the catalog from registry_providers/
+  // registry_models so dropdown labels + billing + the derived provider come
+  // from the provider-registry SSOT (restores the "Platform" bucket). Legacy
+  // path: re-infer from models[] via buildProviderCatalog (inferVendor).
+  const llmCatalog = useMemo(
+    () =>
+      registryBacked
+        ? buildProviderCatalogFromRegistry(
+            llmSourceSpec?.registry_providers ?? [],
+            llmSourceSpec?.registry_models ?? [],
+          )
+        : buildProviderCatalog(llmModels),
+    [registryBacked, llmSourceSpec, llmModels],
+  );
  const selectedLLMProvider = useMemo(
    () => llmCatalog.find((p) => p.id === llmSelection.providerId) ?? llmCatalog[0],
    [llmCatalog, llmSelection.providerId],
@@ -184,7 +235,7 @@ export function CreateWorkspaceButton() {

  useEffect(() => {
    if (llmCatalog.length === 0) return;
-    const sourceDefault = (selectedTemplateSpec ?? selectedRuntimeTemplateSpec)?.model?.trim();
+    const sourceDefault = llmSourceSpec?.model?.trim();
    const platformProvider = llmCatalog.find((p) => p.vendor === "platform");
    const matched = sourceDefault ? findProviderForModel(llmCatalog, sourceDefault) : null;
    const next = platformProvider ?? matched ?? llmCatalog[0];
@@ -197,7 +248,7 @@ export function CreateWorkspaceButton() {
      envVars: next.envVars,
    });
    setLLMSecret("");
-  }, [llmCatalog, selectedRuntimeTemplateSpec, selectedTemplateSpec]);
+  }, [llmCatalog, llmSourceSpec]);

  // Reset form and load workspaces whenever dialog opens
  useEffect(() => {
@@ -461,6 +512,7 @@ export function CreateWorkspaceButton() {
                </div>
                <ProviderModelSelector
                  models={llmModels}
+                  catalog={registryBacked ? llmCatalog : undefined}
                  value={llmSelection}
                  onChange={(next) => {
                    setLLMSelection(next);
@@ -454,6 +454,128 @@ describe("CreateWorkspaceDialog — dynamic runtime provider picker", () => {
  });
 });

+// ---------------------------------------------------------------------------
+// Registry-backed provider catalog (RFC#340 Fix C)
+//
+// Regression guard for the mis-bucketing bug: when a registry-backed
+// claude-code template serves `moonshot/kimi-k2.6` whose DERIVED provider is
+// `platform`, the dialog must build the dropdown from registry_providers/
+// registry_models (buildProviderCatalogFromRegistry) — NOT the legacy
+// inferVendor heuristic which slash-splits the id into "moonshot". The
+// distinguishing trait of this fixture: the plain `models[]` array does NOT
+// carry an explicit `provider` field, so the LEGACY path would bucket the
+// model under "moonshot" and send llm_provider:"moonshot". Only the
+// registry-backed path yields the Platform bucket + llm_provider:"platform".
+// ---------------------------------------------------------------------------
+
+// claude-code template whose plain models[] is UN-annotated (no explicit
+// provider). The derived-provider annotation lives ONLY in registry_models.
+const REGISTRY_TEMPLATE = {
+  id: "claude-code-default",
+  name: "Claude Code Agent",
+  runtime: "claude-code",
+  model: "moonshot/kimi-k2.6",
+  // Legacy fields — note: NO explicit provider on the platform model, so the
+  // legacy inferVendor path would slash-split it into "moonshot".
+  providers: ["platform", "minimax", "anthropic"],
+  models: [
+    { id: "moonshot/kimi-k2.6", name: "Kimi K2.6", required_env: [] },
+    { id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
+    { id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", required_env: ["ANTHROPIC_API_KEY"] },
+  ],
+  // Registry-served SSOT (internal#718 P3). DeriveProvider resolved
+  // moonshot/kimi-k2.6 → "platform"; MiniMax-M2.7 → "minimax".
+  registry_backed: true,
+  registry_providers: [
+    { name: "platform", display_name: "Platform", auth_env: [], billing_mode: "platform_managed" },
+    { name: "minimax", display_name: "MiniMax", auth_env: ["MINIMAX_API_KEY"], billing_mode: "byok" },
+    { name: "anthropic", display_name: "Anthropic API", auth_env: ["ANTHROPIC_API_KEY"], billing_mode: "byok" },
+  ],
+  registry_models: [
+    { id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", billing_mode: "platform_managed" },
+    { id: "MiniMax-M2.7", name: "MiniMax M2.7", provider: "minimax", billing_mode: "byok" },
+    { id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", provider: "anthropic", billing_mode: "byok" },
+  ],
+};
+
+describe("CreateWorkspaceDialog — registry-backed provider catalog (RFC#340 Fix C)", () => {
+  beforeEach(() => {
+    mockGet.mockImplementation(async (url: string) => {
+      if (url === "/templates") {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        return [REGISTRY_TEMPLATE] as any;
+      }
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      return SAMPLE_WORKSPACES as any;
+    });
+  });
+
+  it("shows the Platform provider bucket for the registry-backed claude-code runtime", async () => {
+    await openDialog();
+    const providerSelect = await waitFor(() => {
+      const sel = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
+      expect(sel).toBeTruthy();
+      return sel;
+    });
+    const labels = Array.from(providerSelect.options).map((o) => o.text.trim());
+    // Registry display_name "Platform" appears — NOT "moonshot" from the
+    // legacy slash-split heuristic.
+    expect(labels).toContain("Platform");
+    expect(labels).not.toContain("moonshot");
+    // Bucket id is the registry-keyed id, vendor is the bare provider name.
+    const values = Array.from(providerSelect.options).map((o) => o.value);
+    expect(values).toContain("registry|platform");
+  });
+
+  it("sends llm_provider: platform (not moonshot) for moonshot/kimi-k2.6", async () => {
+    await openDialog();
+    fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
+      target: { value: "Kimi Agent" },
+    });
+    // Wait for the registry default to settle on the Platform bucket + model.
+    await waitFor(() => {
+      const modelSelect = document.querySelector("[data-testid='model-select']") as HTMLSelectElement;
+      expect(modelSelect?.value).toBe("moonshot/kimi-k2.6");
+    });
+
+    const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
+    fireEvent.click(createBtn!);
+
+    await waitFor(() => expect(mockPost).toHaveBeenCalled());
+    const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.model).toBe("moonshot/kimi-k2.6");
+    expect(body.llm_provider).toBe("platform");
+    // Platform is auth-env-free → no BYOK secret.
+    expect(body.secrets).toBeUndefined();
+  });
+
+  it("buckets MiniMax-M2.7 under its derived provider and sends llm_provider: minimax", async () => {
+    await openDialog();
+    fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
+      target: { value: "MiniMax Agent" },
+    });
+    await waitFor(() => {
+      const sel = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
+      expect(Array.from(sel.options).map((o) => o.value)).toContain("registry|minimax");
+    });
+    fireEvent.change(document.querySelector("[data-testid='provider-select']") as HTMLSelectElement, {
+      target: { value: "registry|minimax" },
+    });
+    fireEvent.change(document.getElementById("llm-secret-input") as HTMLInputElement, {
+      target: { value: "sk-minimax-test" },
+    });
+
+    const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
+    fireEvent.click(createBtn!);
+
+    await waitFor(() => expect(mockPost).toHaveBeenCalled());
+    const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
+    expect(body.model).toBe("MiniMax-M2.7");
+    expect(body.llm_provider).toBe("minimax");
+    expect(body.secrets).toEqual({ MINIMAX_API_KEY: "sk-minimax-test" });
+  });
+});
+
 // ---------------------------------------------------------------------------
 // budget_limit field tests (#541)
 // ---------------------------------------------------------------------------
@@ -34,8 +34,6 @@
  ],
  "org_templates": [
    {"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "main"},
-    {"name": "free-beats-all", "repo": "molecule-ai/molecule-ai-org-template-free-beats-all", "ref": "main"},
-    {"name": "medo-smoke", "repo": "molecule-ai/molecule-ai-org-template-medo-smoke", "ref": "main"},
    {"name": "molecule-worker-gemini", "repo": "molecule-ai/molecule-ai-org-template-molecule-worker-gemini", "ref": "main"},
    {"name": "ux-ab-lab", "repo": "molecule-ai/molecule-ai-org-template-ux-ab-lab", "ref": "main"}
  ]
@@ -0,0 +1,131 @@
+# Developer SOP — PR review gate auto-fire and stale-head handling
+
+> Last updated: 2026-06-03 (cp#2159 follow-up)
+>
+> Applies to: all core-PR authors and reviewers on `molecule-core` and sibling
+> repos using the `qa-review` + `security-review` branch-protection gates.
+
+---
+
+## 1. Gitea PR-head workflow-selection rule
+
+**Rule:** For `pull_request_target` and `pull_request_review` events, Gitea
+loads the workflow definition from the **PR's HEAD branch**, not from the
+base (`main`) branch.
+
+This is different from GitHub Actions, where `pull_request_target` always
+loads workflows from the base branch. Gitea's behaviour means:
+
+- A PR that was opened **before** the `pull_request_review` trigger was added
+to `qa-review.yml` / `security-review.yml` will **NOT** auto-fire on review,
+because its HEAD still contains the old workflow YAML (no trigger).
+
+- A PR that was opened **after** the trigger was added (or that has been
+rebased onto a commit containing the trigger) **WILL** auto-fire, because its
+HEAD contains the new workflow YAML.
+
+### Ops implication
+
+| PR head contains `pull_request_review` trigger? | Behaviour on APPROVED review |
+|---|---|
+| **Yes** (cut from current main, or rebased) | Workflows auto-queue, evaluate, and POST the `(pull_request_target)` context automatically. No slash-command needed. |
+| **No** (stale head, opened before #2157) | Nothing fires. Use `/qa-recheck` + `/security-recheck` slash-commands in a PR comment, OR rebase onto current main. |
+
+---
+
+## 2. Standard core-PR flow (post-#2157)
+
+```
+1. Author opens PR from a branch based on current main
+   → qa-review + security-review workflows run on pull_request_target
+   → status contexts post (initial eval, usually red until reviews land)
+
+2. Reviewers submit real APPROVED reviews
+   → If PR head has the trigger: workflows AUTO-FIRE on pull_request_review
+   → Contexts flip green (or stay red if reviewer is not in team)
+
+3. [Optional] If contexts did not flip (stale head, event lost, etc.):
+   → Anyone can comment `/qa-recheck` or `/security-recheck`
+   → sop-checklist.yml refires the evaluator (read-only, idempotent)
+
+4. Both qa-review + security-review contexts are green
+   → Plain Do:merge (no force-merge needed)
+```
+
+### Key point
+
+The `/qa-recheck` and `/security-recheck` commands are a **backstop**, not the
+primary path. PRs cut from current main should auto-fire without manual
+intervention.
+
+---
+
+## 3. Diagnosing a stale head
+
+If a PR has real team-member APPROVED reviews but the qa/security contexts
+remain red and no workflow run appears on the PR's "Actions" tab for the
+review event, the PR head is likely stale.
+
+### Quick check
+
+```bash
+# From the PR page, look at the head commit SHA, then:
+curl -sS "https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-core/contents/.gitea/workflows/qa-review.yml?ref=<HEAD_SHA>" \
+  | jq -r '.content' | base64 -d | grep -c 'pull_request_review'
+# 0  → stale head (no trigger in that version of the workflow)
+# >0 → trigger present; auto-fire SHOULD work (if it didn't, file a tracker)
+```
+
+### Automated diagnostic
+
+The test suite includes `test_gate_stale_head_diagnostic.py`, which reports
+"auto-fire impossible for this PR" when the head lacks the trigger. Run it
+in CI or locally with:
+
+```bash
+PR_NUMBER=123 python -m pytest .gitea/scripts/tests/test_gate_stale_head_diagnostic.py -v
+```
+
+---
+
+## 4. Rebasing vs. slash-refire
+
+| Approach | When to use | Trade-off |
+|---|---|---|
+| **Rebase onto current main** | PR is genuinely stale (head lacks trigger OR head is far behind main) | Clean history, gets all recent fixes, but requires force-push and re-approval if the branch was protected |
+| **`/qa-recheck` + `/security-recheck`** | PR head is recent but the review event was missed, or you want to avoid rebase churn | Quick, no force-push, but does NOT fix a missing trigger in the head |
+
+**Do not** use slash-refire as a substitute for rebasing a stale head. If the
+workflow YAML in the PR head does not contain `pull_request_review`, no amount
+of rechecking will make auto-fire work.
+
+---
+
+## 5. Live-fire verification
+
+The `test_gate_auto_fire_live.py` regression test exercises the full runtime
+path: it submits an APPROVED review to a test PR and polls for the
+`(pull_request_target)` status contexts. It is skipped when no API token is
+available, and is intended to catch runtime non-fire that static structural
+tests (e.g. `test_gate_review_auto_fire.py`) cannot detect.
+
+Run manually with:
+
+```bash
+export GITEA_HOST=git.moleculesai.app
+export GITEA_TOKEN=<your-token>
+export REPO=molecule-ai/molecule-core
+export LIVEFIRE_PR_NUMBER=<test-pr-number>
+python -m pytest .gitea/scripts/tests/test_gate_auto_fire_live.py -v
+```
+
+---
+
+## References
+
+- #2159 — gate auto-trigger not firing (root cause: stale PR heads lacking
+the `pull_request_review` trigger, NOT a workflow code defect)
+- #765 — static structural regression test for gate configuration
+- #2157 — merged trigger addition (`pull_request_review` types: [submitted])
+- #2020 — milestone confirming gate infrastructure is stable
+- RFC#324 — qa-review + security-review design
@@ -23,23 +23,61 @@
 #                  their provider entries, otherwise the workspace boots
 #                  reachable but the first A2A call hits the wrong auth path.
 #
-# When E2E_MODEL_SLUG is set, it overrides this dispatch — useful when an
-# operator dispatches the workflow to test a specific slug.
+# PLATFORM-MANAGED path (E2E_LLM_PATH=platform) — the moonshot/kimi
+# NOT_CONFIGURED regression (RFC#340 Fix A #2187):
+#
+#   The branches above all exercise BYOK: a tenant key (MINIMAX/ANTHROPIC/
+#   OPENAI) is injected as a workspace secret and the model id resolves to that
+#   vendor's *BYOK* provider entry. That path NEVER exercises the platform arm —
+#   the exact arm that booted "moonshot/kimi-k2.6" into NOT_CONFIGURED in prod,
+#   because the generated config.yaml lacked the derived `provider: platform`.
+#
+#   E2E_LLM_PATH=platform selects a platform-managed model id (slash-namespaced,
+#   no tenant key — Molecule owns billing via the CP LLM proxy). The default is
+#   "moonshot/kimi-k2.6", the headline incident combo. Override the specific
+#   platform model with E2E_MODEL_SLUG. The provision branch in
+#   test_staging_full_saas.sh sends NO secrets for this path (platform-managed
+#   needs none), so the workspace must boot online purely on the proxy env the
+#   control plane injects + the manifest-derived `provider: platform` that Fix A
+#   stamps. That is the REAL boot-path assertion the deterministic unit test
+#   (workspace_provision_platform_boot_test.go) cannot make.
+#
+# When E2E_MODEL_SLUG is set, it overrides this dispatch entirely — useful when
+# an operator dispatches the workflow to test a specific slug (or a specific
+# platform model id).
 #
 # Unit tested by tests/e2e/test_model_slug.sh — every branch must stay
 # pinned because regressions silently mask as "Could not resolve
 # authentication method" + the synth-E2E gate goes red without naming
 # the slug-format mismatch.

+# Default platform-managed model for the platform-boot regression path. The
+# exact id that booted NOT_CONFIGURED in prod. Must stay a member of the
+# claude-code `platform` arm in workspace-server/internal/providers/providers.yaml
+# (the deterministic suite TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel
+# enforces every member of that arm derives provider=platform). Resolved INSIDE
+# pick_model_slug via ${E2E_DEFAULT_PLATFORM_MODEL:-...} so callers can override
+# it (or unset it) without tripping `set -u`.
+E2E_DEFAULT_PLATFORM_MODEL_FALLBACK="moonshot/kimi-k2.6"
+
 # Usage: pick_model_slug <runtime>
 #   stdout: the slug string
 #   E2E_MODEL_SLUG (env): if set + non-empty, used as-is (operator override)
+#   E2E_LLM_PATH=platform (env): select the platform-managed model id
+#     (E2E_DEFAULT_PLATFORM_MODEL) instead of a BYOK slug. Takes precedence over
+#     the per-key BYOK branches; E2E_MODEL_SLUG still wins over everything.
 pick_model_slug() {
  local runtime="${1:-}"
  if [ -n "${E2E_MODEL_SLUG:-}" ]; then
    printf '%s' "$E2E_MODEL_SLUG"
    return 0
  fi
+  # Platform-managed path: the slash-namespaced platform model, no tenant key.
+  # Exercises the arm the moonshot/kimi NOT_CONFIGURED bug shipped on.
+  if [ "${E2E_LLM_PATH:-}" = "platform" ]; then
+    printf '%s' "${E2E_DEFAULT_PLATFORM_MODEL:-$E2E_DEFAULT_PLATFORM_MODEL_FALLBACK}"
+    return 0
+  fi
  case "$runtime" in
    hermes)      printf 'openai/gpt-4o' ;;
    claude-code)
@@ -65,6 +65,28 @@ assert_eq "claude-code + both keys → MiniMax priority"            "$got" "Mini
 run_test "unknown runtime → slash-form fallback"                   gemini      "openai/gpt-4o"
 run_test "empty runtime → slash-form fallback"                     ""          "openai/gpt-4o"

+# ── Platform-managed path (E2E_LLM_PATH=platform) ──
+# The moonshot/kimi NOT_CONFIGURED regression path (RFC#340 Fix A #2187).
+# Selects the slash-namespaced platform model (default moonshot/kimi-k2.6),
+# takes precedence over the per-key BYOK branches, and is itself overridden by
+# E2E_MODEL_SLUG. These pins guard the harness's ability to drive the platform
+# arm — the one the prod bug shipped on.
+echo
+echo "Test: pick_model_slug — platform-managed path (E2E_LLM_PATH=platform)"
+echo
+
+got=$(unset E2E_MODEL_SLUG E2E_DEFAULT_PLATFORM_MODEL; E2E_LLM_PATH=platform pick_model_slug claude-code)
+assert_eq "claude-code + platform path → headline kimi model"      "$got" "moonshot/kimi-k2.6"
+
+got=$(unset E2E_MODEL_SLUG E2E_DEFAULT_PLATFORM_MODEL; E2E_LLM_PATH=platform E2E_MINIMAX_API_KEY="mx-stray" pick_model_slug claude-code)
+assert_eq "platform path beats a stray BYOK key (no mask)"         "$got" "moonshot/kimi-k2.6"
+
+got=$(unset E2E_MODEL_SLUG; E2E_LLM_PATH=platform E2E_DEFAULT_PLATFORM_MODEL="minimax/MiniMax-M3" pick_model_slug claude-code)
+assert_eq "platform path honours E2E_DEFAULT_PLATFORM_MODEL"        "$got" "minimax/MiniMax-M3"
+
+got=$(unset E2E_DEFAULT_PLATFORM_MODEL; E2E_MODEL_SLUG="anthropic/claude-opus-4-7" E2E_LLM_PATH=platform pick_model_slug claude-code)
+assert_eq "E2E_MODEL_SLUG still wins over platform path"            "$got" "anthropic/claude-opus-4-7"
+
 # ── Override via E2E_MODEL_SLUG ──
 # When the operator sets E2E_MODEL_SLUG, the per-runtime dispatch is
 # bypassed. Used during workflow_dispatch to A/B specific slugs.
@@ -476,7 +476,19 @@ wait_workspaces_online_routable() {
 # All empty → '{}' (workspace will fail at first turn with an
 # expected, actionable auth error rather than masking the test).
 SECRETS_JSON='{}'
-if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
+# Platform-managed path (E2E_LLM_PATH=platform) — the moonshot/kimi
+# NOT_CONFIGURED regression (RFC#340 Fix A #2187). Molecule owns billing via the
+# CP LLM proxy, so the workspace needs NO tenant key: provision with empty
+# secrets and let the workspace boot purely on (a) the proxy env the control
+# plane injects + (b) the manifest-derived `provider: platform` Fix A stamps into
+# the generated config.yaml. This is the path that booted NOT_CONFIGURED in prod
+# precisely because the BYOK branches below never exercise it. We deliberately
+# skip the key-injection branches so a stray E2E_*_API_KEY in the runner env
+# cannot silently convert this into a BYOK run and mask the regression.
+if [ "${E2E_LLM_PATH:-}" = "platform" ]; then
+  log "    LLM path: PLATFORM-MANAGED (no tenant key; proxy + Fix A provider stamp)"
+  SECRETS_JSON='{}'
+elif [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
  SECRETS_JSON=$(python3 -c "
 import json, os
 k = os.environ['E2E_MINIMAX_API_KEY']
@@ -858,6 +870,24 @@ fi
 if echo "$AGENT_TEXT" | grep -qiE "exceeded your current quota|insufficient_quota"; then
  fail "A2A — PROVIDER QUOTA EXHAUSTED (NOT a platform regression). Operator action: top up MOLECULE_STAGING_OPENAI_API_KEY billing or rotate to a higher-quota org at Settings → Secrets and Variables → Actions. Tracked in #2578. Raw: $AGENT_TEXT"
 fi
+# Empty-completion class — the agent runtime reached the LLM and got a
+# 2xx back, but the assistant turn carried NO text part (empty content,
+# or tool_calls/reasoning-only with no surfaced text), so the runtime
+# returns the literal "Error: message contained no text content." as its
+# reply text. Steps 0-7 passing means the platform is healthy (CP up,
+# tenant provisioned, workspace online + routable, A2A delivery e2e); the
+# break is the configured completion BACKEND returning an empty turn — a
+# model/provider-side regression, NOT a workspace-server or harness bug,
+# and NOT NOT_CONFIGURED (that fails earlier, at boot). Name it explicitly
+# so the canary alert points at the model, not the platform: a generic
+# "error-shaped response" misdirects triage to workspace-server. Observed
+# 2026-06-03/04 across every staging canary on MODEL_SLUG=MiniMax-M2 (the
+# canary default since #2710) — 100% on the parent's first cold turn,
+# identical on main's scheduled synthetic E2E and on PRs (so it is an
+# environmental backend regression, never PR-introduced).
+if echo "$AGENT_TEXT" | grep -qiF "message contained no text content"; then
+  fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (MiniMax-M2 since #2710) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT"
+fi
 # Generic catch-all — falls through if none of the known regressions hit.
 if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then
  fail "A2A returned an error-shaped response: $AGENT_TEXT"
@@ -18,6 +18,7 @@ No network. No live Gitea calls.
 from __future__ import annotations

 import importlib.util
+import json
 import os
 import textwrap
 from pathlib import Path
@@ -117,6 +118,31 @@ def _write_audit_yaml(tmp_path: Path, required_checks: list[str]) -> Path:
    return p


+def _write_audit_yaml_json(tmp_path: Path, required_checks_json: dict) -> Path:
+    """Write a synthetic audit-force-merge.yml with REQUIRED_CHECKS_JSON env."""
+    block = json.dumps(required_checks_json, indent=2)
+    text = textwrap.dedent(
+        f"""\
+        name: audit-force-merge
+        on:
+          schedule:
+            - cron: '*/30 * * * *'
+        jobs:
+          audit:
+            runs-on: ubuntu-latest
+            steps:
+              - name: Run audit
+                env:
+                  REQUIRED_CHECKS_JSON: |
+                    {block.replace(chr(10), chr(10) + '                    ')}
+                run: bash .gitea/scripts/audit-force-merge.sh
+        """
+    )
+    p = tmp_path / "audit-force-merge.yml"
+    p.write_text(text, encoding="utf-8")
+    return p
+
+
 def _make_stub_api(responses: dict):
    """Build a fake `api()` callable.

@@ -363,6 +389,107 @@ def test_happy_path_no_drift(drift_module, tmp_path, monkeypatch):
    assert findings == [], findings


+# --------------------------------------------------------------------------
+# REQUIRED_CHECKS_JSON variant drift tests
+# --------------------------------------------------------------------------
+def test_f3a_env_wider_than_protection_json_variant(drift_module, tmp_path, monkeypatch):
+    """F3a: REQUIRED_CHECKS_JSON env has a context NOT in protection."""
+    ci = _write_ci_yaml(
+        tmp_path,
+        jobs={"build": {"runs-on": "ubuntu-latest"}},
+        sentinel_needs=["build"],
+    )
+    audit = _write_audit_yaml_json(
+        tmp_path,
+        {"main": ["ci / build (pull_request)", "ci / ghost (pull_request)"]},
+    )
+    _patch_paths(drift_module, monkeypatch, ci, audit)
+
+    stub = _make_stub_api({
+        ("GET", "/repos/owner/repo/branch_protections/main"): (
+            200,
+            {"status_check_contexts": ["ci / build (pull_request)"]},
+        ),
+    })
+    monkeypatch.setattr(drift_module, "api", stub)
+
+    findings, _ = drift_module.detect_drift("main")
+    assert any("F3a" in f and "ghost" in f for f in findings), findings
+
+
+def test_f3b_protection_wider_than_env_json_variant(drift_module, tmp_path, monkeypatch):
+    """F3b: protection has a context NOT in REQUIRED_CHECKS_JSON env."""
+    ci = _write_ci_yaml(
+        tmp_path,
+        jobs={
+            "build": {"runs-on": "ubuntu-latest"},
+            "test": {"runs-on": "ubuntu-latest"},
+        },
+        sentinel_needs=["build", "test"],
+    )
+    audit = _write_audit_yaml_json(
+        tmp_path,
+        {"main": ["ci / build (pull_request)"]},
+    )
+    _patch_paths(drift_module, monkeypatch, ci, audit)
+
+    stub = _make_stub_api({
+        ("GET", "/repos/owner/repo/branch_protections/main"): (
+            200,
+            {
+                "status_check_contexts": [
+                    "ci / build (pull_request)",
+                    "ci / test (pull_request)",
+                ]
+            },
+        ),
+    })
+    monkeypatch.setattr(drift_module, "api", stub)
+
+    findings, _ = drift_module.detect_drift("main")
+    assert any("F3b" in f and "ci / test (pull_request)" in f for f in findings), findings
+
+
+def test_happy_path_no_drift_json_variant(drift_module, tmp_path, monkeypatch):
+    """Happy path with REQUIRED_CHECKS_JSON: all aligned."""
+    ci = _write_ci_yaml(
+        tmp_path,
+        jobs={
+            "build": {"runs-on": "ubuntu-latest"},
+            "test": {"runs-on": "ubuntu-latest"},
+        },
+        sentinel_needs=["build", "test"],
+    )
+    audit = _write_audit_yaml_json(
+        tmp_path,
+        {
+            "main": [
+                "ci / build (pull_request)",
+                "ci / test (pull_request)",
+                "ci / all-required (pull_request)",
+            ]
+        },
+    )
+    _patch_paths(drift_module, monkeypatch, ci, audit)
+
+    stub = _make_stub_api({
+        ("GET", "/repos/owner/repo/branch_protections/main"): (
+            200,
+            {
+                "status_check_contexts": [
+                    "ci / build (pull_request)",
+                    "ci / test (pull_request)",
+                    "ci / all-required (pull_request)",
+                ]
+            },
+        ),
+    })
+    monkeypatch.setattr(drift_module, "api", stub)
+
+    findings, _ = drift_module.detect_drift("main")
+    assert findings == [], findings
+
+
 # --------------------------------------------------------------------------
 # MUST-FIX 1: find_open_issue must raise on transient HTTP errors
 # --------------------------------------------------------------------------
@@ -26,11 +26,12 @@ import (
 //     the update cycle — no ssh, no re-provision, no ops toil.
 //
 // Contract (paired with cp-side GET /cp/tenants/config):
-//   Request:  GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
-//             Authorization: Bearer <ADMIN_TOKEN>
-//             X-Molecule-Org-Id: <MOLECULE_ORG_ID>
-//   Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
-//             401 on bearer mismatch or unknown org
+//
+//	Request:  GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
+//	          Authorization: Bearer <ADMIN_TOKEN>
+//	          X-Molecule-Org-Id: <MOLECULE_ORG_ID>
+//	Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
+//	          401 on bearer mismatch or unknown org
 //
 // Best-effort: any failure logs and returns — main() keeps booting.
 // Self-hosted deploys without MOLECULE_ORG_ID or ADMIN_TOKEN set
@@ -105,3 +106,53 @@ func refreshEnvFromCP() error {
 	log.Printf("CP env refresh: applied %d values from %s/cp/tenants/config", applied, base)
 	return nil
 }
+
+// requiredLLMEnvVars is the set of LLM proxy env vars a managed SaaS
+// tenant must have populated after refreshEnvFromCP. cp#469 (tenant
+// proxy-env delivery) — guaranteed CP-delivered creds reach the
+// tenant process env on boot. Per Researcher Task #37 / Spec 2 and
+// Task #46 (watch-fail-first test).
+//
+// Key set byte-matched against Researcher's verified emission in
+// controlplane tenant_config.go:140-144 (Researcher REQUEST_CHANGES
+// iterate body, 3987f59c). The four keys below ARE the LLM-proxy
+// subset of the 8 CP-emitted keys; OPENAI_BASE_URL / OPENAI_API_KEY /
+// ANTHROPIC_BASE_URL / ANTHROPIC_API_KEY are out of scope for cp#469
+// (different feature surfaces — direct-to-provider fallbacks, not
+// the proxy). v2 fix: MOLECULE_LLM_USAGE_TOKEN, MOLECULE_LLM_USAGE_URL,
+// MOLECULE_LLM_BASE_URL, MOLECULE_LLM_ANTHROPIC_BASE_URL — note the
+// 4th key is namespaced MOLECULE_LLM_ANTHROPIC_BASE_URL, NOT bare
+// ANTHROPIC_BASE_URL. Bare ANTHROPIC_BASE_URL is a separate CP-emitted
+// key for direct-provider use, not the LLM proxy.
+var requiredLLMEnvVars = []string{
+	"MOLECULE_LLM_USAGE_TOKEN",
+	"MOLECULE_LLM_USAGE_URL", // CRITICAL fix v2: was MOLECULE_LLM_URL in v1
+	"MOLECULE_LLM_BASE_URL",
+	"MOLECULE_LLM_ANTHROPIC_BASE_URL", // CRITICAL fix v3: was ANTHROPIC_BASE_URL in v2 (different key!)
+}
+
+// assertManagedTenantHasLLMEnv verifies that, when running as a
+// managed SaaS tenant (MOLECULE_ORG_ID + ADMIN_TOKEN both set), all
+// required LLM proxy env vars are populated after refreshEnvFromCP.
+//
+// Self-hosted (no orgID/adminToken) is exempt — dev must not be
+// blocked here. Managed tenants with missing LLM keys fail with
+// MISSING_CP_LLM_ENV so they do not silently boot with broken proxy
+// creds. Caller in main.go decides whether to log and continue or
+// log.Fatalf depending on deployment context.
+func assertManagedTenantHasLLMEnv() error {
+	if os.Getenv("MOLECULE_ORG_ID") == "" || os.Getenv("ADMIN_TOKEN") == "" {
+		// Self-hosted dev / not yet provisioned — not a managed tenant.
+		return nil
+	}
+	var missing []string
+	for _, k := range requiredLLMEnvVars {
+		if os.Getenv(k) == "" {
+			missing = append(missing, k)
+		}
+	}
+	if len(missing) > 0 {
+		return fmt.Errorf("MISSING_CP_LLM_ENV: required LLM proxy keys not set after refreshEnvFromCP: %v", missing)
+	}
+	return nil
+}
@@ -5,6 +5,7 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"os"
+	"strings"
 	"testing"
 )

@@ -59,6 +60,138 @@ func TestRefreshEnvFromCP_AppliesCPResponse(t *testing.T) {
 	}
 }

+// TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: watch-fail-first
+// per Researcher Task #46. When running as a managed tenant
+// (MOLECULE_ORG_ID + ADMIN_TOKEN set), missing LLM proxy env vars
+// after refreshEnvFromCP MUST surface as MISSING_CP_LLM_ENV, not be
+// silently accepted. Without this guard, a CP that loses its LLM
+// creds (e.g. during an incident) would let a tenant boot and then
+// fail later at first LLM call — worse than a loud refusal here.
+func TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// Stub CP returns a CP response WITHOUT any of the required
+		// LLM keys — simulates the failure mode where the CP side
+		// dropped or never had the LLM creds for this org.
+		w.Header().Set("Content-Type", "application/json")
+		fmt.Fprint(w, `{"MOLECULE_CP_SHARED_SECRET":"x","MOLECULE_CP_URL":"https://api.moleculesai.app"}`)
+	}))
+	defer srv.Close()
+
+	t.Setenv("MOLECULE_ORG_ID", "org-managed-1")
+	t.Setenv("ADMIN_TOKEN", "admin-tok")
+	t.Setenv("MOLECULE_CP_URL", srv.URL)
+	// Clear all LLM keys to simulate the boot-without-LLM-env failure mode.
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+
+	// refreshEnvFromCP itself should succeed — CP is reachable, returned 200.
+	if err := refreshEnvFromCP(); err != nil {
+		t.Fatalf("refreshEnvFromCP: %v", err)
+	}
+	// The boot assertion must catch the missing LLM keys.
+	err := assertManagedTenantHasLLMEnv()
+	if err == nil {
+		t.Fatal("expected MISSING_CP_LLM_ENV error for managed tenant without LLM keys, got nil")
+	}
+	if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
+		t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
+	}
+}
+
+// TestRefreshEnvFromCP_ManagedTenantHappyPath: when the CP returns
+// all 4 LLM-proxy keys, the gate must PASS — no MISSING_CP_LLM_ENV
+// for a properly-configured managed tenant. Watch-fail counterpart
+// to TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: if THIS test
+// ever fires MISSING_CP_LLM_ENV on the byte-correct key set, the
+// requiredLLMEnvVars list has drifted from the CP emission again.
+// Per Researcher REQUEST_CHANGES TEST ADEQUACY note.
+func TestRefreshEnvFromCP_ManagedTenantHappyPath(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		// Return ALL 4 LLM-proxy keys — names byte-matched to
+		// tenant_config.go:140-144 CP emission.
+		fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com","MOLECULE_LLM_ANTHROPIC_BASE_URL":"https://llm.example.com/anthropic"}`)
+	}))
+	defer srv.Close()
+
+	t.Setenv("MOLECULE_ORG_ID", "org-managed-happy")
+	t.Setenv("ADMIN_TOKEN", "admin-tok")
+	t.Setenv("MOLECULE_CP_URL", srv.URL)
+	// Pre-clear so we can verify the refresh actually populated them.
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+
+	if err := refreshEnvFromCP(); err != nil {
+		t.Fatalf("refreshEnvFromCP: %v", err)
+	}
+	// Sanity: refresh actually applied the keys.
+	if got := os.Getenv("MOLECULE_LLM_USAGE_TOKEN"); got != "tok-1" {
+		t.Errorf("refresh did not apply USAGE_TOKEN: got %q", got)
+	}
+	// The boot assertion must pass — no MISSING_CP_LLM_ENV.
+	if err := assertManagedTenantHasLLMEnv(); err != nil {
+		t.Errorf("managed happy path must not MISSING_CP_LLM_ENV, got: %v", err)
+	}
+}
+
+// TestRefreshEnvFromCP_ManagedTenantPartialEnv: when the CP returns
+// 3 of 4 LLM-proxy keys (one missing), the gate must STILL catch it
+// and the error must name the missing key. Per Researcher
+// REQUEST_CHANGES TEST ADEQUACY note — partial-env coverage is
+// critical because the production failure mode is usually "one
+// key dropped" not "all keys dropped".
+func TestRefreshEnvFromCP_ManagedTenantPartialEnv(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		// 3 of 4 — MOLECULE_LLM_ANTHROPIC_BASE_URL is missing.
+		fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com"}`)
+	}))
+	defer srv.Close()
+
+	t.Setenv("MOLECULE_ORG_ID", "org-managed-partial")
+	t.Setenv("ADMIN_TOKEN", "admin-tok")
+	t.Setenv("MOLECULE_CP_URL", srv.URL)
+	// Pre-clear all 4 so the 3 that come back from CP are the only
+	// ones set; the 4th (MOLECULE_LLM_ANTHROPIC_BASE_URL) stays empty.
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+
+	if err := refreshEnvFromCP(); err != nil {
+		t.Fatalf("refreshEnvFromCP: %v", err)
+	}
+	err := assertManagedTenantHasLLMEnv()
+	if err == nil {
+		t.Fatal("expected MISSING_CP_LLM_ENV for partial env (3 of 4 keys), got nil")
+	}
+	if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
+		t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
+	}
+	if !strings.Contains(err.Error(), "MOLECULE_LLM_ANTHROPIC_BASE_URL") {
+		t.Errorf("expected error to name the missing key MOLECULE_LLM_ANTHROPIC_BASE_URL, got: %v", err)
+	}
+}
+
+// TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop: self-hosted
+// (no orgID/adminToken) must NOT block on missing LLM keys — dev
+// ergonomics matter and the assertion's contract is "managed only".
+func TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop(t *testing.T) {
+	t.Setenv("MOLECULE_ORG_ID", "")
+	t.Setenv("ADMIN_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+	if err := assertManagedTenantHasLLMEnv(); err != nil {
+		t.Errorf("self-hosted (not managed) must not block, got: %v", err)
+	}
+}
+
 // TestRefreshEnvFromCP_CPUnreachableDoesNotFailBoot: network errors must
 // return non-nil BUT main.go treats that as warn-and-continue. We assert
 // the function returns an error (not a panic) so the caller can log.
@@ -82,6 +82,16 @@ func main() {
 		log.Printf("CP env refresh: %v (continuing with baked-in env)", err)
 	}

+	// Managed-tenant boot assertion (cp#469 — tenant proxy-env delivery).
+	// If we're a managed SaaS tenant (orgID + adminToken set), all required
+	// LLM proxy env vars must be present after refresh. Missing keys block
+	// the tenant from booting with broken LLM creds — silent-fail is worse
+	// than a loud refusal. Self-hosted (no orgID/adminToken) short-circuits
+	// inside the assertion, so this never fires for dev.
+	if err := assertManagedTenantHasLLMEnv(); err != nil {
+		log.Fatalf("Managed tenant boot assertion: %v", err)
+	}
+
 	// Secrets encryption. In MOLECULE_ENV=prod, boot refuses to start
 	// without a valid SECRETS_ENCRYPTION_KEY (fail-secure — Top-5 #5).
 	// In any other environment, missing keys just log a warning and
@@ -359,7 +369,6 @@ func main() {
 	// (WorkspaceHandler.BootstrapFailed) wires its own capture inline.
 	registry.BootFailureRescueHook = handlers.BootFailureRescueHook

-
 	// Provision-timeout sweep — flips workspaces that have been stuck in
 	// status='provisioning' past the timeout window to 'failed' and emits
 	// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
@@ -149,9 +149,11 @@ func markFailed(ctx context.Context, wsID string, broadcaster *events.Broadcaste
 		models.StatusFailed, msg, wsID); dbErr != nil {
 		log.Printf("bundle import: failed to mark workspace %s as failed: %v", wsID, dbErr)
 	}
-	broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
+	if bcErr := broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
 		"error": msg,
-	})
+	}); bcErr != nil {
+		log.Printf("bundle import: failed to broadcast provision failed for %s: %v", wsID, bcErr)
+	}
 }

 func nilIfEmpty(s string) interface{} {
@@ -407,12 +407,14 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound

 	// Broadcast event
 	if m.broadcaster != nil {
-		m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
+		if err := m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
 			"channel_id":   ch.ID,
 			"channel_type": ch.ChannelType,
 			"username":     msg.Username,
 			"direction":    "inbound",
-		})
+		}); err != nil {
+			log.Printf("Channels: failed to broadcast inbound event: %v", err)
+		}
 	}

 	return nil
@@ -453,11 +455,13 @@ func (m *Manager) SendOutbound(ctx context.Context, channelID string, text strin
 	}

 	if m.broadcaster != nil {
-		m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
+		if err := m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
 			"channel_id":   ch.ID,
 			"channel_type": ch.ChannelType,
 			"direction":    "outbound",
-		})
+		}); err != nil {
+			log.Printf("Channels: failed to broadcast outbound event: %v", err)
+		}
 	}

 	return nil
@@ -517,7 +517,9 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in

 				// Acknowledge the button press (removes loading spinner)
 				ackCfg := tgbotapi.NewCallback(cb.ID, "Received")
-				bot.Send(ackCfg)
+				if _, err := bot.Send(ackCfg); err != nil {
+					log.Printf("telegram: failed to send callback ack: %v", err)
+				}

 				// Update the message to show what was clicked
 				decision := "approved"
@@ -529,7 +531,9 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in
 					cb.Message.MessageID,
 					cb.Message.Text+"\n\n✅ CEO "+decision,
 				)
-				bot.Send(editMsg)
+				if _, err := bot.Send(editMsg); err != nil {
+					log.Printf("telegram: failed to send edit message: %v", err)
+				}

 				// Route the decision as an inbound message to the agent
 				inbound := &InboundMessage{
@@ -383,23 +383,48 @@ func (h *WorkspaceHandler) logA2ASuccess(ctx context.Context, workspaceID, calle
 	}
 	summary := a2aMethod + " → " + wsNameForLog
 	toolTrace := extractToolTrace(respBody)
-	parent := ctx
-	h.goAsync(func() {
-		logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
-		defer cancel()
-		LogActivity(logCtx, h.broadcaster, ActivityParams{
-			WorkspaceID:  workspaceID,
-			ActivityType: "a2a_receive",
-			SourceID:     nilIfEmpty(callerID),
-			TargetID:     &workspaceID,
-			Method:       &a2aMethod,
-			Summary:      &summary,
-			RequestBody:  json.RawMessage(body),
-			ResponseBody: json.RawMessage(respBody),
-			ToolTrace:    toolTrace,
-			DurationMs:   &durationMs,
-			Status:       logStatus,
-		})
+
+	// DATA-LOSS FIX (internal#470 / #1347 push-mode sibling): this
+	// a2a_receive row is the ONLY durable record of a push-mode chat
+	// round-trip — request_body carries the user's message, response_body
+	// carries the agent's reply, and chat-history hydration
+	// (messagestore.PostgresMessageStore) reads BOTH back to rebuild the
+	// transcript on canvas reopen / reload. It MUST be written
+	// SYNCHRONOUSLY, before proxyA2ARequest returns and ProxyA2A flushes
+	// the 200 to the canvas — otherwise the canvas sees the reply
+	// acknowledged (and rendered optimistically) while the row is still
+	// racing in a detached goroutine, and a reload (or a workspace-server
+	// restart / deploy / OOM) between the 200 and the goroutine's commit
+	// loses the message permanently on reopen.
+	//
+	// This mirrors the discipline already applied to the poll-mode ingest
+	// path (logA2AReceiveQueued / persistUserMessageAtIngest); the
+	// push-mode counterpart was left async, which the E2E Chat
+	// "history persists across reload" test surfaced as an intermittent
+	// red (the reload out-raced the INSERT).
+	//
+	//   - context.WithoutCancel: a client disconnect on chat-exit (which
+	//     cancels the inbound request ctx) MUST NOT abort this write.
+	//   - SYNCHRONOUS (no goAsync): the row must be durable before the 200.
+	//   - Best-effort: LogActivity logs+swallows INSERT errors internally,
+	//     so a DB hiccup never blocks or fails the user's send — behaviour
+	//     for that one request is never worse than the pre-fix async path.
+	//   - The post-commit ACTIVITY_LOGGED broadcast still fires inside
+	//     LogActivity; the durable row is the truth the canvas re-reads.
+	logCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 30*time.Second)
+	defer cancel()
+	LogActivity(logCtx, h.broadcaster, ActivityParams{
+		WorkspaceID:  workspaceID,
+		ActivityType: "a2a_receive",
+		SourceID:     nilIfEmpty(callerID),
+		TargetID:     &workspaceID,
+		Method:       &a2aMethod,
+		Summary:      &summary,
+		RequestBody:  json.RawMessage(body),
+		ResponseBody: json.RawMessage(respBody),
+		ToolTrace:    toolTrace,
+		DurationMs:   &durationMs,
+		Status:       logStatus,
 	})

 	if callerID == "" && statusCode < 400 {
@@ -60,10 +60,10 @@ func sanitizeErrorDetailForBroadcast(s string) string {
 }

 type ActivityHandler struct {
-	broadcaster *events.Broadcaster
+	broadcaster events.EventEmitter
 }

-func NewActivityHandler(b *events.Broadcaster) *ActivityHandler {
+func NewActivityHandler(b events.EventEmitter) *ActivityHandler {
 	return &ActivityHandler{broadcaster: b}
 }

@@ -54,23 +54,29 @@ func (h *ApprovalsHandler) Create(c *gin.Context) {
 		return
 	}

-	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
+	if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
 		"approval_id": approvalID,
 		"action":      body.Action,
 		"reason":      body.Reason,
 		"task_id":     body.TaskID,
-	})
+	}); err != nil {
+		log.Printf("approvals: failed to broadcast approval requested: %v", err)
+	}

 	// Auto-escalate to parent
 	var parentID *string
-	db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID)
+	if err := db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID); err != nil {
+		log.Printf("approvals: failed to lookup parent for escalation: %v", err)
+	}
 	if parentID != nil {
-		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
+		if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
 			"approval_id":       approvalID,
 			"from_workspace_id": workspaceID,
 			"action":            body.Action,
 			"reason":            body.Reason,
-		})
+		}); err != nil {
+			log.Printf("approvals: failed to broadcast approval escalated: %v", err)
+		}
 	}

 	c.JSON(http.StatusCreated, gin.H{"approval_id": approvalID, "status": "pending"})
@@ -221,11 +227,13 @@ func (h *ApprovalsHandler) Decide(c *gin.Context) {
 		eventType = "APPROVAL_DENIED"
 	}

-	h.broadcaster.RecordAndBroadcast(ctx, eventType, workspaceID, map[string]interface{}{
+	if err := h.broadcaster.RecordAndBroadcast(ctx, eventType, workspaceID, map[string]interface{}{
 		"approval_id": approvalID,
 		"decision":    body.Decision,
 		"decided_by":  decidedBy,
-	})
+	}); err != nil {
+		log.Printf("approvals: failed to broadcast approval decision: %v", err)
+	}

 	c.JSON(http.StatusOK, gin.H{"status": body.Decision, "approval_id": approvalID})
 }
@@ -102,10 +102,10 @@ func pushDelegationResultToInbox(ctx context.Context, sourceID, delegationID, st
 // and the A2A request runs in the background.
 type DelegationHandler struct {
 	workspace   *WorkspaceHandler
-	broadcaster *events.Broadcaster
+	broadcaster events.EventEmitter
 }

-func NewDelegationHandler(wh *WorkspaceHandler, b *events.Broadcaster) *DelegationHandler {
+func NewDelegationHandler(wh *WorkspaceHandler, b events.EventEmitter) *DelegationHandler {
 	return &DelegationHandler{workspace: wh, broadcaster: b}
 }

@@ -36,7 +36,6 @@ package handlers
 import (
 	"context"
 	"database/sql"
-	"os"
 	"strings"
 	"testing"
 	"time"
@@ -57,10 +56,7 @@ import (
 // directly rather than going through the package global.
 func integrationDB(t *testing.T) *sql.DB {
 	t.Helper()
-	url := os.Getenv("INTEGRATION_DB_URL")
-	if url == "" {
-		t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)")
-	}
+	url := requireIntegrationDBURL(t)
 	conn, err := sql.Open("postgres", url)
 	if err != nil {
 		t.Fatalf("open: %v", err)
@@ -0,0 +1,40 @@
+//go:build integration
+// +build integration
+
+// integration_helper_test.go — shared preflight for handler Postgres
+// integration tests. Extracted so the fail-open/skip logic is in ONE place
+// and can be tightened without editing every integration test file.
+//
+// See delegation_ledger_integration_test.go for the docker-postgres setup
+// incantation used by local devs.
+
+package handlers
+
+import (
+	"os"
+	"testing"
+)
+
+// requireIntegrationDBURL returns $INTEGRATION_DB_URL.
+//
+// In CI (CI, GITHUB_ACTIONS, or GITEA_ACTIONS env var is non-empty), an
+// empty URL is a fatal error — it means the workflow failed to export the
+// variable (postgres container did not start, bridge IP resolution failed,
+// or a regression in the workflow YAML). t.Fatalf keeps the test red so the
+// failure is visible; t.Skip would silently pass and mask the defect.
+//
+// Locally (none of the three CI markers set), an empty URL skips the test
+// so devs can run `go test ./...` without booting a Postgres container.
+func requireIntegrationDBURL(t *testing.T) string {
+	t.Helper()
+	url := os.Getenv("INTEGRATION_DB_URL")
+	if url == "" {
+		if os.Getenv("CI") != "" ||
+			os.Getenv("GITHUB_ACTIONS") != "" ||
+			os.Getenv("GITEA_ACTIONS") != "" {
+			t.Fatalf("INTEGRATION_DB_URL required in CI handler integration tests — check workflow env export")
+		}
+		t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)")
+	}
+	return url
+}
@@ -17,6 +17,7 @@ package handlers

 import (
 	"fmt"
+	"sort"
 	"strings"
 )

@@ -55,3 +56,95 @@ func validateRegisteredModelForRuntime(runtime, model string) (bool, string) {
 		"model %q is not a registered model for runtime %q; pick one of the runtime's registered models (provider-registry SSOT, internal#718)",
 		model, runtime)
 }
+
+// validateDerivedProviderInRegistry (issue #2172) is the provider-side companion
+// to validateRegisteredModelForRuntime. The model-side check asks "is this
+// (runtime, model) in the registry?"; the provider-side check asks "is the
+// provider this model DERIVES to — the same one the adapter will resolve at
+// boot — a known provider in providers.yaml?"
+//
+// Live trigger (adk-demo Assistant, 2026-06-03): workspace config
+// `model=moonshot/kimi-k2.6` (claude-code) → adapter derives `provider=moonshot`
+// → `ValueError: provider=moonshot not in providers registry` at BOOT. The
+// save was accepted (no validation at the API boundary), and the failure only
+// surfaced when the agent tried to register. CI never saw it. The drift gate
+// (RFC#580) validates TEMPLATES against the registry, NOT per-workspace
+// configs; the existing model-side check rejects a model the runtime doesn't
+// own but says nothing about the DERIVED provider's registry membership.
+//
+// Returns:
+//
+//	(true,  "")     — pass: model is empty (MODEL_REQUIRED owns it), the
+//	                  runtime is not in the registry (fail-open for
+//	                  federated / non-first-party runtimes — mirror of the
+//	                  model-side check's federation contract), the registry
+//	                  failed to load (build-time gate owns it), OR the
+//	                  derived provider name is a known provider in the
+//	                  registry's `providers:` list.
+//	(false, reason) — reject: a known (runtime, model) pair derives to a
+//	                  provider name absent from the providers list. This is
+//	                  the structural class the adk-demo boot failure belongs
+//	                  to — the registry's `runtimes:` block references a
+//	                  provider not declared in `providers:`, which by
+//	                  construction is a registry-data bug. Catching it at
+//	                  config-SAVE keeps it out of the agent-boot path.
+//
+// Defense-in-depth: by construction, a model in a runtime's native provider set
+// has a provider that IS in the catalog (the runtime ref names a provider from
+// the providers list). So the rejection path is primarily a registry-consistency
+// guard. The real value is the FAIL-LOUD semantics — any future drift between
+// `providers:` and `runtimes:` fails the create call with a clear pointer to
+// the missing provider, instead of silently wedging the agent at boot.
+func validateDerivedProviderInRegistry(runtime, model string) (bool, string) {
+	model = strings.TrimSpace(model)
+	if model == "" {
+		return true, "" // MODEL_REQUIRED owns this.
+	}
+	m, err := providerRegistry()
+	if err != nil || m == nil {
+		// Registry unavailable (build-time defect the gates catch). Fail open —
+		// do not block create on a registry-load failure.
+		return true, ""
+	}
+	// DeriveProvider is fail-closed for unknown runtimes. Mirror the
+	// model-side check's federation contract: a runtime the registry does
+	// NOT know (langgraph / external / kimi / mock / federated) is allowed
+	// to pass through. DeriveProvider's `unknown runtime` error IS that
+	// signal — treat it as fail-open, identical to ModelsForRuntime's
+	// not-found behavior above.
+	p, err := m.DeriveProvider(runtime, model, nil)
+	if err != nil {
+		// Either the runtime is unknown (fail-open by contract) OR the model
+		// is not native to the runtime (the model-side validator already
+		// rejected this — DeriveProvider's error here means
+		// validateRegisteredModelForRuntime should have caught it. Don't
+		// double-reject: pass through and let the model-side response own
+		// the message).
+		return true, ""
+	}
+	// Defense-in-depth: confirm the DERIVED provider is a known entry in the
+	// providers list. By construction it should be (DeriveProvider only
+	// returns a Provider that was looked up by name from `providers:`), but
+	// a future federation merge could introduce a runtime ref pointing at a
+	// contributed provider absent from the core catalog. Reject loudly here
+	// rather than letting the save reach the agent-boot path and wedge with
+	// "provider=X not in providers registry" (the original adk-demo class).
+	for _, candidate := range m.Providers {
+		if candidate.Name == p.Name {
+			return true, ""
+		}
+	}
+	// Build a sorted, comma-separated list of valid provider names so the
+	// operator/caller sees the actionable list (the boot-time error message
+	// the adk-demo class produced does NOT include this — the fix is to
+	// surface it at the API boundary, where the caller can fix the request
+	// without a stuck workspace + operator page).
+	valid := make([]string, 0, len(m.Providers))
+	for _, c := range m.Providers {
+		valid = append(valid, c.Name)
+	}
+	sort.Strings(valid)
+	return false, fmt.Sprintf(
+		"derived provider %q (for model %q on runtime %q) is not in the providers registry; pick a model whose derived provider is one of: %s",
+		p.Name, model, runtime, strings.Join(valid, ", "))
+}
@@ -6,8 +6,17 @@ package handlers
 // fail OPEN (allow) for a runtime the registry doesn't know yet (federation /
 // langgraph/etc. not in the first-party registry) so the existing knownRuntimes
 // gate stays authoritative there.
+//
+// TestValidateDerivedProviderInRegistry (issue #2172) is the provider-side
+// companion: once the model-side check passes, confirm the DERIVED provider
+// (the one the adapter will resolve at boot) is a known provider in
+// providers.yaml. Catches the adk-demo "provider=X not in providers registry"
+// class at config-SAVE time instead of letting it wedge the agent at boot.

-import "testing"
+import (
+	"strings"
+	"testing"
+)

 func TestValidateRegisteredModelForRuntime(t *testing.T) {
 	type tc struct {
@@ -80,3 +89,163 @@ func TestValidateRegisteredModelForRuntime(t *testing.T) {
 		})
 	}
 }
+
+func TestValidateDerivedProviderInRegistry(t *testing.T) {
+	type tc struct {
+		name    string
+		runtime string
+		model   string
+		wantOK  bool
+		// wantReasonContains: a substring the rejection reason must include
+		// (skipped for OK cases). Pins the actionable list / derivation pointer
+		// so the caller knows which provider was missing and what the valid
+		// set looks like — this is the fix that distinguishes the new gate
+		// from the boot-time "provider=X not in providers registry" string
+		// it replaces.
+		wantReasonContains string
+	}
+	cases := []tc{
+		// PASS — every native (runtime, model) in the catalog derives to a
+		// provider that IS in the providers list. These are the live corpus
+		// entries; the test pins the registry-consistency invariant.
+		{
+			name:   "claude_code_anthropic_api_native",
+			runtime: "claude-code",
+			model:   "claude-sonnet-4-6",
+			wantOK: true,
+		},
+		{
+			name:   "claude_code_kimi_coding_native",
+			runtime: "claude-code",
+			model:   "kimi-for-coding",
+			wantOK: true,
+		},
+		{
+			name:   "claude_code_minimax_native",
+			runtime: "claude-code",
+			model:   "MiniMax-M2.7",
+			wantOK: true,
+		},
+		{
+			name:   "claude_code_platform_namespaced",
+			runtime: "claude-code",
+			model:   "moonshot/kimi-k2.6",
+			wantOK: true,
+		},
+		{
+			name:   "codex_openai_subscription_default_arm",
+			runtime: "codex",
+			model:   "gpt-5.5",
+			wantOK: true,
+		},
+		{
+			name:   "codex_platform_namespaced",
+			runtime: "codex",
+			model:   "openai/gpt-5.4-mini",
+			wantOK: true,
+		},
+		{
+			name:   "hermes_kimi_coding",
+			runtime: "hermes",
+			model:   "kimi-coding/kimi-k2",
+			wantOK: true,
+		},
+		{
+			name:   "hermes_platform_namespaced",
+			runtime: "hermes",
+			model:   "moonshot/kimi-k2.6",
+			wantOK: true,
+		},
+		{
+			name:   "openclaw_kimi_coding",
+			runtime: "openclaw",
+			model:   "moonshot:kimi-k2.6",
+			wantOK: true,
+		},
+		// FAIL — model-side validator catches this, but the provider-side
+		// gate is called AFTER it in Create and inherits the fail-open
+		// contract for "model is not native to runtime" (DeriveProvider
+		// errors → allow, letting the model-side response own the message).
+		// This is the deliberate "don't double-reject" decision.
+		{
+			name:   "unregistered_model_pass_through_to_model_side",
+			runtime: "claude-code",
+			model:   "totally-made-up-model-xyz",
+			wantOK: true, // pass-through: model-side validator owns the rejection
+		},
+		// Federation contract — mirror of the model-side test above.
+		{
+			name:   "langgraph_runtime_failopen",
+			runtime: "langgraph",
+			model:   "anything-goes",
+			wantOK: true,
+		},
+		{
+			name:   "external_runtime_failopen",
+			runtime: "external",
+			model:   "whatever",
+			wantOK: true,
+		},
+		// Empty model — MODEL_REQUIRED owns it; allow.
+		{
+			name:   "empty_model_allowed_other_gate_owns_it",
+			runtime: "claude-code",
+			model:   "",
+			wantOK: true,
+		},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			ok, why := validateDerivedProviderInRegistry(c.runtime, c.model)
+			if ok != c.wantOK {
+				t.Errorf("validateDerivedProviderInRegistry(%q,%q) ok=%v want %v (reason=%q)",
+					c.runtime, c.model, ok, c.wantOK, why)
+			}
+			if !c.wantOK && c.wantReasonContains != "" && !strings.Contains(why, c.wantReasonContains) {
+				t.Errorf("rejection reason missing %q: got %q", c.wantReasonContains, why)
+			}
+		})
+	}
+}
+
+// TestRegistryConsistency_AllNativeModelsDeriveToKnownProvider walks every
+// (runtime, model) pair in the registry's native model sets and asserts each
+// one derives to a provider that IS in the providers list. This is the
+// static regression gate the issue calls for ("a CI test fails if any shipped
+// demo/template config references an unregistered provider") — generalized
+// to the catalog as a whole: if anyone edits providers.yaml such that a
+// `runtimes:` block names a provider absent from `providers:`, this test
+// fires before the bad config can reach a customer workspace.
+//
+// By construction this invariant should always hold (DeriveProvider only
+// returns a Provider that was looked up by name from `providers:`), so the
+// test primarily guards against future federation merges that introduce a
+// runtime ref pointing at a contributed provider absent from the core
+// catalog — exactly the failure shape the adk-demo Assistant wedge
+// belongs to.
+func TestRegistryConsistency_AllNativeModelsDeriveToKnownProvider(t *testing.T) {
+	m, err := providerRegistry()
+	if err != nil || m == nil {
+		t.Skipf("providerRegistry unavailable in test env (err=%v); skipping consistency walk", err)
+	}
+	providerNames := make(map[string]struct{}, len(m.Providers))
+	for _, p := range m.Providers {
+		providerNames[p.Name] = struct{}{}
+	}
+	for runtimeName, runtime := range m.Runtimes {
+		for _, ref := range runtime.Providers {
+			for _, modelID := range ref.Models {
+				p, err := m.DeriveProvider(runtimeName, modelID, nil)
+				if err != nil {
+					t.Errorf("catalog invariant broken: runtime=%q model=%q failed DeriveProvider: %v",
+						runtimeName, modelID, err)
+					continue
+				}
+				if _, ok := providerNames[p.Name]; !ok {
+					t.Errorf("catalog invariant broken: runtime=%q model=%q derives to provider %q which is not in the providers list (refs=%q)",
+						runtimeName, modelID, p.Name, ref.Name)
+				}
+			}
+		}
+	}
+}
@@ -43,7 +43,6 @@ package handlers
 import (
 	"context"
 	"database/sql"
-	"os"
 	"strings"
 	"testing"
 	"time"
@@ -63,10 +62,7 @@ import (
 // but kept separate so each table's wipe step is local to its tests.
 func integrationDB_PendingUploads(t *testing.T) *sql.DB {
 	t.Helper()
-	url := os.Getenv("INTEGRATION_DB_URL")
-	if url == "" {
-		t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)")
-	}
+	url := requireIntegrationDBURL(t)
 	conn, err := sql.Open("postgres", url)
 	if err != nil {
 		t.Fatalf("open: %v", err)
@@ -0,0 +1,21 @@
+package handlers
+
+import (
+	"testing"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
+)
+
+// Proper-SSOT (task #65): required_env is DERIVED from the resolved provider's
+// serving classification (IsPlatform), not hand-authored — platform injects
+// creds server-side (none required), BYOK requires its auth_env.
+func TestRequiredEnvForRegistryProvider(t *testing.T) {
+	if got := requiredEnvForRegistryProvider(providers.Provider{Name: providers.PlatformProviderName}); got != nil {
+		t.Errorf("platform provider requiredEnv = %v; want nil (creds injected server-side)", got)
+	}
+	byok := providers.Provider{Name: "google", AuthEnv: []string{"GEMINI_API_KEY", "GOOGLE_API_KEY"}}
+	got := requiredEnvForRegistryProvider(byok)
+	if len(got) != 2 || got[0] != "GEMINI_API_KEY" {
+		t.Errorf("byok requiredEnv = %v; want its auth_env", got)
+	}
+}
@@ -176,6 +176,10 @@ func TestResolveAgentURLForRestartSignal_CacheMiss(t *testing.T) {
 // TestGracefulPreRestart_Success verifies that when the workspace returns 200,
 // the signal is logged as acknowledged without error.
 func TestGracefulPreRestart_Success(t *testing.T) {
+	hWrapper := &resolveURLTestWrapper{
+		WorkspaceHandler: newHandlerWithTestDeps(t),
+		testURL:          "http://fake-agent.example/agent",
+	}
 	_ = setupTestDB(t)

 	// httptest server simulating the workspace container's /signals/restart_pending
@@ -205,18 +209,15 @@ func TestGracefulPreRestart_Success(t *testing.T) {
 		})
 	}))
 	defer srv.Close()
+	hWrapper.testURL = srv.URL + "/agent"

 	// Pre-populate Redis cache with the test server URL
 	_ = setupTestRedisWithURL(t, srv.URL)

-	// Use a wrapper so gracefulPreRestart runs through the embedded handler.
-	hWrapper := &resolveURLTestWrapper{
-		WorkspaceHandler: newHandlerWithTestDeps(t),
-		testURL:          srv.URL + "/agent",
-	}
+	// gracefulPreRestart runs in a goroutine; wait for it before db.DB is restored.
+	// Must be registered AFTER setupTestDB (LIFO: async wait → db.DB restore).
+	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

-	// gracefulPreRestart runs in a goroutine with its own timeout.
-	// We give it time to complete before the test ends.
 	hWrapper.gracefulPreRestart(context.Background(), "ws-ack-789")
 	time.Sleep(200 * time.Millisecond)
 }
@@ -224,19 +225,22 @@ func TestGracefulPreRestart_Success(t *testing.T) {
 // TestGracefulPreRestart_NotImplemented verifies that when the workspace returns
 // 404 (old SDK version), the platform proceeds gracefully (log + no error).
 func TestGracefulPreRestart_NotImplemented(t *testing.T) {
+	hWrapper := &resolveURLTestWrapper{
+		WorkspaceHandler: newHandlerWithTestDeps(t),
+		testURL:          "http://fake-agent.example/agent",
+	}
 	_ = setupTestDB(t)

 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusNotFound)
 	}))
 	defer srv.Close()
+	hWrapper.testURL = srv.URL + "/agent"

 	_ = setupTestRedisWithURL(t, srv.URL)

-	hWrapper := &resolveURLTestWrapper{
-		WorkspaceHandler: newHandlerWithTestDeps(t),
-		testURL:          srv.URL + "/agent",
-	}
+	// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
+	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

 	hWrapper.gracefulPreRestart(context.Background(), "ws-noimpl-999")
 	time.Sleep(200 * time.Millisecond)
@@ -246,15 +250,18 @@ func TestGracefulPreRestart_NotImplemented(t *testing.T) {
 // TestGracefulPreRestart_ConnectionRefused verifies that when the workspace
 // is unreachable, the platform proceeds gracefully without error.
 func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
-	_ = setupTestDB(t)
-
-	mr := setupTestRedisWithURL(t, "http://localhost:19999/agent") // nothing listening on 19999
-	_ = mr
-
 	hWrapper := &resolveURLTestWrapper{
 		WorkspaceHandler: newHandlerWithTestDeps(t),
 		testURL:          "http://localhost:19999/agent",
 	}
+	_ = setupTestDB(t)
+
+	// Nothing listening on 19999 — deliberate connection failure.
+	mr := setupTestRedisWithURL(t, "http://localhost:19999/agent")
+	_ = mr
+
+	// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
+	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

 	hWrapper.gracefulPreRestart(context.Background(), "ws-unreachable-000")
 	time.Sleep(200 * time.Millisecond)
@@ -264,13 +271,17 @@ func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
 // TestGracefulPreRestart_URLResolutionError verifies that when URL resolution
 // fails, the platform proceeds gracefully without blocking the restart.
 func TestGracefulPreRestart_URLResolutionError(t *testing.T) {
-	_ = setupTestDB(t)
-	_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
-
 	hWrapper := &resolveURLTestWrapper{
 		WorkspaceHandler: newHandlerWithTestDeps(t),
 		errToReturn:      context.DeadlineExceeded,
 	}
+	_ = setupTestDB(t)
+	_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
+
+	// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
+	// This ensures goroutines (which access both DB and Redis) are drained before
+	// any cleanup fires. setupTestRedis comes after newHandlerWithTestDeps
+	// so the handler holds the correct Redis client reference.
 	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

 	hWrapper.gracefulPreRestart(context.Background(), "ws-url-err-111")
@@ -44,6 +44,20 @@ func billingModeForRegistryProvider(p providers.Provider) string {
 	return LLMBillingModeBYOK
 }

+// requiredEnvForRegistryProvider derives the env vars the USER must supply for
+// a model owned by the resolved provider — the proper-SSOT single fact behind
+// the canvas "Missing API Keys" preflight (task #65). The closed platform
+// provider injects credentials server-side (the metered proxy) -> nothing
+// required; BYOK providers require their auth_env. Derived from IsPlatform +
+// AuthEnv so a template can no longer hand-author a required_env that drifts
+// from the registry's serving classification.
+func requiredEnvForRegistryProvider(p providers.Provider) []string {
+	if p.IsPlatform() {
+		return nil
+	}
+	return p.AuthEnv
+}
+
 // enrichFromRegistry populates the registry-served fields on a templateSummary
 // when its runtime is known to the provider registry. It is a no-op (leaves
 // RegistryBacked=false and the registry slices nil) for a runtime the registry
@@ -98,6 +112,7 @@ func enrichFromRegistry(summary *templateSummary, runtime string) {
 		if derived, derr := m.DeriveProvider(runtime, id, nil); derr == nil {
 			ms.Provider = derived.Name
 			ms.BillingMode = billingModeForRegistryProvider(derived)
+			ms.RequiredEnv = requiredEnvForRegistryProvider(derived)
 		}
 		// If DeriveProvider errors (ambiguous/overlap — a manifest defect the
 		// loader's tests pin against), still serve the id without a provider
@@ -474,6 +474,32 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
 			})
 			return
 		}
+		// issue #2172 (provider-side companion): once the (runtime, model)
+		// pair is known to be registered, confirm the DERIVED provider
+		// (the one the adapter will resolve at boot) is a known provider
+		// in the providers.yaml catalog. Live trigger (adk-demo Assistant,
+		// 2026-06-03): the model-side check passed for a registry-resident
+		// model whose derived provider name was NOT in the providers list,
+		// so the save was accepted and the agent wedged at boot with
+		// "provider=X not in providers registry". This check is a
+		// defense-in-depth registry-consistency guard: by construction a
+		// model in a runtime's native set derives to a provider that IS in
+		// the catalog, so the rejection path is primarily a registry-data
+		// invariant — any future drift between `providers:` and `runtimes:`
+		// fails the create with a clear pointer to the missing provider
+		// rather than silently wedging the agent. Fails open for runtimes
+		// the registry doesn't know (langgraph/external/kimi/mock/federated
+		// — the federation contract the model-side check also honors).
+		if ok, why := validateDerivedProviderInRegistry(payload.Runtime, payload.Model); !ok {
+			log.Printf("Create: 422 DERIVED_PROVIDER_NOT_IN_REGISTRY (runtime=%q model=%q): %s [issue #2172 hard-reject]", payload.Runtime, payload.Model, why)
+			c.JSON(http.StatusUnprocessableEntity, gin.H{
+				"error":   why,
+				"runtime": payload.Runtime,
+				"model":   payload.Model,
+				"code":    "DERIVED_PROVIDER_NOT_IN_REGISTRY",
+			})
+			return
+		}
 	}

 	ctx := c.Request.Context()
@@ -56,7 +56,20 @@ func PatchAbilities(c *gin.Context) {
 		return
 	}

-	if body.BroadcastEnabled != nil {
+	// Atomic update: when both fields are supplied, apply them in one SQL
+	// statement so the request is all-or-nothing (#2131). A partial mutation
+	// (e.g. broadcast_enabled updated but talk_to_user_enabled failing) would
+	// leave the workspace in an ambiguous capability state.
+	if body.BroadcastEnabled != nil && body.TalkToUserEnabled != nil {
+		if _, err := db.DB.ExecContext(ctx,
+			`UPDATE workspaces SET broadcast_enabled = $2, talk_to_user_enabled = $3, updated_at = now() WHERE id = $1`,
+			id, *body.BroadcastEnabled, *body.TalkToUserEnabled,
+		); err != nil {
+			log.Printf("PatchAbilities both-fields for %s: %v", id, err)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
+			return
+		}
+	} else if body.BroadcastEnabled != nil {
 		if _, err := db.DB.ExecContext(ctx,
 			`UPDATE workspaces SET broadcast_enabled = $2, updated_at = now() WHERE id = $1`,
 			id, *body.BroadcastEnabled,
@@ -65,9 +78,7 @@ func PatchAbilities(c *gin.Context) {
 			c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
 			return
 		}
-	}
-
-	if body.TalkToUserEnabled != nil {
+	} else if body.TalkToUserEnabled != nil {
 		if _, err := db.DB.ExecContext(ctx,
 			`UPDATE workspaces SET talk_to_user_enabled = $2, updated_at = now() WHERE id = $1`,
 			id, *body.TalkToUserEnabled,
@@ -130,11 +130,8 @@ func TestPatchAbilities_BothFields(t *testing.T) {
 	mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1 AND status != 'removed'\)`).
 		WithArgs(wsUUID1).
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
-		WithArgs(wsUUID1, true).
-		WillReturnResult(sqlmock.NewResult(0, 1))
-	mock.ExpectExec(`UPDATE workspaces SET talk_to_user_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
-		WithArgs(wsUUID1, true).
+	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, talk_to_user_enabled = \$3, updated_at = now\(\) WHERE id = \$1`).
+		WithArgs(wsUUID1, true, true).
 		WillReturnResult(sqlmock.NewResult(0, 1))

 	w := patchAbilitiesReq(t, wsUUID1, `{"broadcast_enabled":true,"talk_to_user_enabled":true}`)
@@ -182,19 +179,25 @@ func TestPatchAbilities_TalkToUserUpdateError(t *testing.T) {
 	}
 }

-func TestPatchAbilities_BothFields_BroadcastFails(t *testing.T) {
+// TestPatchAbilities_BothFields_UpdateError — regression for #2131. When
+// both fields are supplied the handler uses a single combined UPDATE. A
+// failure of that UPDATE must leave the workspace unchanged (atomic).
+func TestPatchAbilities_BothFields_UpdateError(t *testing.T) {
 	mock, cleanup := withMockDB(t)
 	defer cleanup()

 	mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1 AND status != 'removed'\)`).
 		WithArgs(wsUUID1).
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
-		WithArgs(wsUUID1, true).
+	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, talk_to_user_enabled = \$3, updated_at = now\(\) WHERE id = \$1`).
+		WithArgs(wsUUID1, true, true).
 		WillReturnError(errors.New("disk full"))

 	w := patchAbilitiesReq(t, wsUUID1, `{"broadcast_enabled":true,"talk_to_user_enabled":true}`)
 	if w.Code != http.StatusInternalServerError {
 		t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
 	}
+	// Because only one UPDATE is issued, there is no partial-mutation
+	// path to assert against; sqlmock implicitly verifies no second
+	// exec occurred.
 }
@@ -95,6 +95,14 @@ func TestIntegration_BroadcastOrgRoot_NonRootSenderResolvesToRoot(t *testing.T)
 		}
 	})

+	// Pre-test hygiene: if a prior run crashed or was killed, its rows may
+	// still be in the shared integration DB. Remove them before inserting so
+	// the unique index workspaces_parent_name_uniq does not conflict.
+	if _, err := conn.ExecContext(ctx,
+		`DELETE FROM workspaces WHERE name LIKE $1`, prefix+"%"); err != nil {
+		t.Logf("pre-test cleanup (non-fatal): %v", err)
+	}
+
 	rootID := uuid.New().String()
 	midID := uuid.New().String()
 	leafID := uuid.New().String()
@@ -41,7 +41,6 @@ import (
 	"context"
 	"database/sql"
 	"fmt"
-	"os"
 	"testing"

 	"github.com/google/uuid"
@@ -59,10 +58,7 @@ import (
 // only those.
 func integrationDB_WorkspaceCreateName(t *testing.T) *sql.DB {
 	t.Helper()
-	url := os.Getenv("INTEGRATION_DB_URL")
-	if url == "" {
-		t.Skip("INTEGRATION_DB_URL not set; skipping (see file header)")
-	}
+	url := requireIntegrationDBURL(t)
 	conn, err := sql.Open("postgres", url)
 	if err != nil {
 		t.Fatalf("open: %v", err)
@@ -613,6 +613,32 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
 	if model == "" {
 		log.Printf("ensureDefaultConfig: workspace %s reached provisioning with empty model — Create handler should have rejected this; rendering empty model: \"\" in config.yaml (workspace will boot not_configured)", workspaceID)
 	}
+
+	// Derive the provider from the providers manifest and stamp it into the
+	// generated config BEFORE claude-code model normalization strips the
+	// slash-prefix. DeriveProvider needs the FULL, un-normalized model id
+	// (e.g. "moonshot/kimi-k2.6") for the exact-id match that resolves the
+	// canvas claude-code case to provider=platform — normalizing to
+	// "kimi-k2.6" first would lose that match.
+	//
+	// Why this exists (RFC#340 Fix A): a canvas-created claude-code workspace
+	// with model "moonshot/kimi-k2.6" booted NOT_CONFIGURED — the adapter
+	// derived provider="moonshot" (slash-split of the model id) which is not
+	// in the providers registry. CP bakes `provider: platform` via heredoc,
+	// but the cp#329 config-bundle fetch overwrites /configs/config.yaml with
+	// THIS (previously providerless) bundle version, so molecule-runtime
+	// config.py re-derived the wrong provider. Stamping the manifest-derived
+	// provider here (mirroring CP's buildModelProviderYAML shape) makes the
+	// config the adapter reads carry the canonical provider.
+	//
+	// Reuses the SAME manifest path the config-SAVE validators use
+	// (providerRegistry() + Manifest.DeriveProvider; see
+	// model_registry_validation.go). On a derive MISS (unknown/unregistered
+	// model, or registry unavailable) provider is left empty and the field is
+	// omitted below — preserving today's behavior; never fail provisioning on
+	// a derive miss.
+	derivedProvider := deriveDefaultConfigProvider(runtime, model)
+
 	if runtime == "claude-code" {
 		model = normalizeClaudeCodeModel(model)
 	}
@@ -640,6 +666,14 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
 	// Model always at top level — config.py reads raw["model"] for all runtimes.
 	configYAML += fmt.Sprintf("model: %s\n", quoteModel)

+	// Stamp the manifest-derived provider at top level (mirroring CP's
+	// buildModelProviderYAML). Omitted entirely on a derive miss so the prior
+	// behavior — no `provider:` key, runtime re-derives — is preserved for
+	// unregistered models (requirement #3).
+	if derivedProvider != "" {
+		configYAML += fmt.Sprintf("provider: '%s'\n", yamlEscapeSingleQuotedProvider(derivedProvider))
+	}
+
 	// Add runtime_config. required_env is intentionally omitted — the
 	// platform injects secrets at container-start time via the secrets API,
 	// and preflight already validates that the env vars are present before
@@ -649,6 +683,10 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
 	if runtime == "claude-code" {
 		configYAML += fmt.Sprintf("  model: %s\n", quoteModel)
 	}
+	// Mirror the top-level provider under runtime_config (CP writes both).
+	if derivedProvider != "" {
+		configYAML += fmt.Sprintf("  provider: '%s'\n", yamlEscapeSingleQuotedProvider(derivedProvider))
+	}
 	configYAML += "  timeout: 0\n"

 	files["config.yaml"] = []byte(configYAML)
@@ -657,6 +695,48 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
 	return files
 }

+// deriveDefaultConfigProvider resolves the provider name the adapter should
+// see for (runtime, model) using the SAME providers manifest the config-SAVE
+// validators use (providerRegistry() + Manifest.DeriveProvider; see
+// model_registry_validation.go). It is intentionally fail-OPEN: any miss
+// (empty model, registry unavailable, unknown runtime, or a model the runtime
+// does not own) returns "" so the caller omits the `provider:` field and the
+// generated config keeps its pre-fix shape. It NEVER fails provisioning.
+//
+// `model` must be the FULL, un-normalized id (e.g. "moonshot/kimi-k2.6") so
+// DeriveProvider's exact-id match resolves the canvas claude-code case to
+// provider=platform. The availableAuthEnv arg is nil here — config-generation
+// has no per-workspace auth context yet (secrets are injected at container
+// start), matching the validators' nil call.
+func deriveDefaultConfigProvider(runtime, model string) string {
+	if strings.TrimSpace(model) == "" {
+		return ""
+	}
+	m, err := providerRegistry()
+	if err != nil || m == nil {
+		// Registry unavailable (a build-time defect the gen/sync gates catch).
+		// Fail open — do not stamp a provider, do not block provisioning.
+		return ""
+	}
+	p, err := m.DeriveProvider(runtime, model, nil)
+	if err != nil {
+		// Unknown runtime (federation / non-first-party) or a model the
+		// runtime does not own. Either way, omit the provider and let the
+		// runtime fall back to its prior derivation — preserving today's
+		// behavior for unregistered models.
+		return ""
+	}
+	return p.Name
+}
+
+// yamlEscapeSingleQuotedProvider escapes a value for a YAML single-quoted
+// scalar, mirroring CP's buildModelProviderYAML (a literal single quote is
+// doubled). Provider names are registry-controlled identifiers, so this is a
+// defense-in-depth measure rather than a hot path.
+func yamlEscapeSingleQuotedProvider(v string) string {
+	return strings.ReplaceAll(v, "'", "''")
+}
+
 func normalizeClaudeCodeModel(model string) string {
 	model = strings.TrimSpace(model)
 	if before, after, ok := strings.Cut(model, "/"); ok && before != "" && after != "" {
@@ -0,0 +1,196 @@
+package handlers
+
+// workspace_provision_platform_boot_test.go — the deterministic, SSOT-driven
+// regression suite for the class of bug behind the moonshot/kimi
+// "canvas-created claude-code workspace boots NOT_CONFIGURED" production
+// incident (RFC#340 Fix A #2187, canvas Fix C #2188).
+//
+// THE BUG (what shipped to prod):
+//   A claude-code workspace created via the canvas with provider=Platform +
+//   model="moonshot/kimi-k2.6" booted NOT_CONFIGURED. Unit tests passed; the
+//   REAL boot path was broken. ensureDefaultConfig generated a config.yaml that
+//   carried NO derived `provider:` key, so the cp#329 config-bundle the adapter
+//   actually reads left molecule-runtime config.py to slash-split the model id
+//   "moonshot/kimi-k2.6" -> provider="moonshot", which is NOT in the providers
+//   registry -> NOT_CONFIGURED.
+//
+// THE FIX A INVARIANT (this file pins it, and pins it for the WHOLE class):
+//   ensureDefaultConfig MUST stamp the manifest-derived provider into the
+//   generated config.yaml — at BOTH the top level and under runtime_config —
+//   for every (runtime, model) the providers SSOT maps to a platform provider.
+//   The single-combo pin (TestEnsureDefaultConfig_StampsDerivedProvider in
+//   workspace_provision_test.go) proves the headline case. THIS file closes the
+//   gap that single pin leaves: it is PARAMETRIZED OVER THE SSOT, so when a NEW
+//   platform model is added to providers.yaml for claude-code (or any runtime
+//   with a platform arm), the new id is automatically covered — a future
+//   platform model that fails to derive `provider: platform` fails THIS test at
+//   build time, before it can ship a NOT_CONFIGURED boot.
+//
+// WHY SSOT-DRIVEN AND NOT A HAND-MAINTAINED LIST:
+//   The original bug was a divergence between "what the canvas offers"
+//   (providers.yaml platform arm) and "what the config generator stamps". A
+//   hardcoded test model list would itself drift from the SSOT and re-open the
+//   same divergence gap. By enumerating the platform model set directly from the
+//   loaded providers.Manifest (the SAME manifest ensureDefaultConfig's
+//   deriveDefaultConfigProvider resolves against), this test cannot fall behind
+//   the offered set: add a platform model, get a test case for free; the test
+//   only passes if the generator actually stamps it.
+//
+// SCOPE: deterministic, no live infra. The REAL-boot complement (provision a
+// staging workspace and assert status=online + a completion returns 200 for the
+// SAME combo) is the bash staging harness — see
+// tests/e2e/test_staging_full_saas.sh (E2E_LLM_PATH=platform) and the
+// e2e-staging-platform-boot job in .gitea/workflows/e2e-staging-saas.yml. That
+// asserts the REAL artifact (booted status / completion); THIS asserts the
+// deterministic config-generation invariant the real boot depends on.
+
+import (
+	"testing"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
+	"gopkg.in/yaml.v3"
+)
+
+// platformModelsForRuntime returns the exact model ids the providers SSOT lists
+// under runtime rt's `platform` native provider arm — the set the canvas offers
+// as provider=Platform and the set ensureDefaultConfig MUST stamp
+// `provider: platform` for. Reads the SAME embedded manifest the config
+// generator derives against (providers.LoadManifest), so it can never drift from
+// the offered set. Returns nil when the runtime has no platform arm.
+func platformModelsForRuntime(t *testing.T, rt string) []string {
+	t.Helper()
+	m, err := providers.LoadManifest()
+	if err != nil {
+		t.Fatalf("LoadManifest: %v", err)
+	}
+	native, ok := m.Runtimes[rt]
+	if !ok {
+		t.Fatalf("providers SSOT has no runtimes entry for %q", rt)
+	}
+	for _, ref := range native.Providers {
+		if ref.Name == "platform" {
+			return ref.Models
+		}
+	}
+	return nil
+}
+
+// TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel is the
+// class-level regression for the moonshot/kimi NOT_CONFIGURED incident. For
+// EVERY model the providers SSOT offers under claude-code's platform arm, it
+// asserts the generated config.yaml carries the manifest-derived provider at
+// both the top level and under runtime_config. This is the Fix A invariant,
+// parametrized over the SSOT so a newly-offered platform model cannot ship
+// without the stamp (the exact divergence — offered-but-not-stamped — that
+// booted "moonshot/kimi-k2.6" into NOT_CONFIGURED).
+func TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel(t *testing.T) {
+	const runtime = "claude-code"
+	platformModels := platformModelsForRuntime(t, runtime)
+	if len(platformModels) == 0 {
+		t.Fatalf("providers SSOT lists no platform models for runtime %q — the regression matrix would be empty; the SSOT shape changed (this test is the canary)", runtime)
+	}
+	// Headline sentinel: the exact id that booted NOT_CONFIGURED in prod MUST be
+	// in the enumerated set. If a refactor drops it from the platform arm, this
+	// test must still cover it explicitly — fail loud rather than silently
+	// shrinking the matrix.
+	if !containsString(platformModels, "moonshot/kimi-k2.6") {
+		t.Fatalf("the headline incident model \"moonshot/kimi-k2.6\" is no longer in the claude-code platform SSOT set (%v) — regression coverage for the original bug would be lost", platformModels)
+	}
+
+	for _, model := range platformModels {
+		model := model
+		t.Run(model, func(t *testing.T) {
+			broadcaster := newTestBroadcaster()
+			handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+
+			files := handler.ensureDefaultConfig("ws-platform-boot", models.CreateWorkspacePayload{
+				Name:    "Platform Boot Agent",
+				Tier:    2,
+				Runtime: runtime,
+				Model:   model,
+			})
+
+			raw, ok := files["config.yaml"]
+			if !ok {
+				t.Fatalf("expected config.yaml in generated files for model %q", model)
+			}
+
+			var parsed struct {
+				Model         string `yaml:"model"`
+				Provider      string `yaml:"provider"`
+				RuntimeConfig struct {
+					Model    string `yaml:"model"`
+					Provider string `yaml:"provider"`
+				} `yaml:"runtime_config"`
+			}
+			if err := yaml.Unmarshal(raw, &parsed); err != nil {
+				t.Fatalf("generated YAML invalid for model %q: %v\n%s", model, err, raw)
+			}
+
+			// The load-bearing invariant: BOTH the top-level and the
+			// runtime_config provider must be exactly "platform". An empty or
+			// vendor-namespace ("moonshot") value here is the prod NOT_CONFIGURED
+			// boot — the adapter would slash-split the model id and look up an
+			// unregistered provider.
+			if parsed.Provider != "platform" {
+				t.Errorf("model %q: top-level provider = %q, want \"platform\" (Fix A invariant — empty/vendor value is the NOT_CONFIGURED boot)\n%s", model, parsed.Provider, raw)
+			}
+			if parsed.RuntimeConfig.Provider != "platform" {
+				t.Errorf("model %q: runtime_config.provider = %q, want \"platform\"\n%s", model, parsed.RuntimeConfig.Provider, raw)
+			}
+			// Sanity: the config must still render a non-empty model (a config
+			// with provider but no model is equally undeployable).
+			if parsed.Model == "" {
+				t.Errorf("model %q: generated config has empty top-level model\n%s", model, raw)
+			}
+		})
+	}
+}
+
+// TestPlatformModelDeriveProvider_SSOTConsistency is the upstream half of the
+// same invariant, one layer below ensureDefaultConfig: it asserts the providers
+// manifest's DeriveProvider — the resolver deriveDefaultConfigProvider calls —
+// maps every SSOT-offered claude-code platform model to a provider whose Name is
+// "platform". If DeriveProvider itself regressed (e.g. a model_prefix_match
+// change made "moonshot/kimi-k2.6" resolve to the bare "moonshot" entry again),
+// this fails closer to the root cause than the config-shape test above, making
+// the diagnosis unambiguous: SSOT/derive regression vs config-emission
+// regression.
+func TestPlatformModelDeriveProvider_SSOTConsistency(t *testing.T) {
+	const runtime = "claude-code"
+	m, err := providers.LoadManifest()
+	if err != nil {
+		t.Fatalf("LoadManifest: %v", err)
+	}
+	platformModels := platformModelsForRuntime(t, runtime)
+	if len(platformModels) == 0 {
+		t.Fatalf("no platform models for %q in SSOT", runtime)
+	}
+	for _, model := range platformModels {
+		model := model
+		t.Run(model, func(t *testing.T) {
+			// nil availableAuthEnv mirrors deriveDefaultConfigProvider's call at
+			// config-generation time (no per-workspace auth context yet).
+			p, err := m.DeriveProvider(runtime, model, nil)
+			if err != nil {
+				t.Fatalf("DeriveProvider(%q, %q): unexpected error %v — an SSOT-offered platform model MUST derive", runtime, model, err)
+			}
+			if p.Name != "platform" {
+				t.Errorf("DeriveProvider(%q, %q).Name = %q, want \"platform\" (this is the exact slash-split-to-vendor regression that booted NOT_CONFIGURED)", runtime, model, p.Name)
+			}
+		})
+	}
+}
+
+// containsString is a tiny local membership helper. Kept here (not a shared
+// test util) so this regression file is self-contained and can be read top to
+// bottom without chasing helpers across the package.
+func containsString(xs []string, want string) bool {
+	for _, x := range xs {
+		if x == want {
+			return true
+		}
+	}
+	return false
+}
@@ -363,6 +363,74 @@ runtime_config:
 	}
 }

+// TestEnsureDefaultConfig_StampsDerivedProvider pins RFC#340 Fix A: a
+// canvas-created claude-code workspace with model "moonshot/kimi-k2.6" must
+// have the manifest-derived provider stamped into config.yaml at BOTH the top
+// level and under runtime_config, so the cp#329 config-bundle the adapter
+// reads no longer leaves the runtime to slash-split "moonshot/..." → an
+// unregistered provider="moonshot" (the original NOT_CONFIGURED boot). The
+// canonical manifest exact-id-matches "moonshot/kimi-k2.6" to provider=platform.
+func TestEnsureDefaultConfig_StampsDerivedProvider(t *testing.T) {
+	broadcaster := newTestBroadcaster()
+	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+
+	files := handler.ensureDefaultConfig("ws-moonshot", models.CreateWorkspacePayload{
+		Name:    "Kimi Agent",
+		Tier:    2,
+		Runtime: "claude-code",
+		Model:   "moonshot/kimi-k2.6",
+	})
+
+	var parsed struct {
+		Model         string `yaml:"model"`
+		Provider      string `yaml:"provider"`
+		RuntimeConfig struct {
+			Model    string `yaml:"model"`
+			Provider string `yaml:"provider"`
+		} `yaml:"runtime_config"`
+	}
+	if err := yaml.Unmarshal(files["config.yaml"], &parsed); err != nil {
+		t.Fatalf("generated YAML invalid: %v\n%s", err, files["config.yaml"])
+	}
+	if parsed.Provider != "platform" {
+		t.Errorf("top-level provider = %q, want platform\n%s", parsed.Provider, files["config.yaml"])
+	}
+	if parsed.RuntimeConfig.Provider != "platform" {
+		t.Errorf("runtime_config.provider = %q, want platform\n%s", parsed.RuntimeConfig.Provider, files["config.yaml"])
+	}
+	// The claude-code model normalization still strips the slash prefix.
+	if parsed.Model != "kimi-k2.6" {
+		t.Errorf("top-level model = %q, want kimi-k2.6\n%s", parsed.Model, files["config.yaml"])
+	}
+}
+
+// TestEnsureDefaultConfig_DeriveMissOmitsProvider pins requirement #3: a model
+// the providers manifest does NOT recognize for the runtime (a derive miss)
+// must NOT write any `provider:` key — neither top-level nor under
+// runtime_config — preserving the pre-fix behavior (no empty `provider:`,
+// provisioning never fails on a miss). "gpt-4o" is not a registered
+// claude-code model, so DeriveProvider errors and the field is omitted.
+func TestEnsureDefaultConfig_DeriveMissOmitsProvider(t *testing.T) {
+	broadcaster := newTestBroadcaster()
+	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+
+	files := handler.ensureDefaultConfig("ws-derivemiss", models.CreateWorkspacePayload{
+		Name:    "Unregistered Agent",
+		Tier:    1,
+		Runtime: "claude-code",
+		Model:   "gpt-4o",
+	})
+
+	content := string(files["config.yaml"])
+	if strings.Contains(content, "provider:") {
+		t.Errorf("derive miss must NOT write any provider: key, got:\n%s", content)
+	}
+	// Sanity: a derive miss must still produce a valid, model-bearing config.
+	if !strings.Contains(content, `model: "gpt-4o"`) {
+		t.Errorf("derive miss should still render the model, got:\n%s", content)
+	}
+}
+
 func TestEnsureDefaultConfig_CustomModel(t *testing.T) {
 	broadcaster := newTestBroadcaster()
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
@@ -876,8 +876,9 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) {
 	h.provisionWorkspaceAutoSync(workspaceID, "", nil, payload)
 	// sendRestartContext is a one-way notification to the new container; safe
 	// to fire async — the next restart cycle won't depend on it completing.
-	// Tracked via goAsync so the test harness can drain it before the
-	// global db.DB swap (sendRestartContext reads db.DB).
+	// Tracked via h.goAsync so tests can wait for it via h.asyncWG before
+	// closing the sqlmock. Without this, untracked goroutines hit the restored
+	// mock and cause "was not expected" errors in parallel CI execution (mc#1264).
 	h.goAsync(func() { h.sendRestartContext(workspaceID, restartData) })
 }

@@ -16,7 +16,7 @@ const SchemaVersion = 1
 // Fingerprint is a stable content hash of the generated projection (schema
 // version + provider catalog + runtime native sets). It changes iff the
 // registry DATA changes (comment-only YAML edits do not churn it).
-const Fingerprint = "8f733b112695b926"
+const Fingerprint = "a491f5ff8a17ef59"

 // GenProvider is the generated projection of one provider catalog entry —
 // the subset a downstream consumer needs to derive + display a provider.
@@ -50,7 +50,7 @@ var Providers = []GenProvider{
 	{Name: "openai-api", DisplayName: "OpenAI API", Protocol: "openai", AuthMode: "anthropic_api", AuthEnv: []string{"OPENAI_API_KEY"}, ModelPrefixMatch: "^openai-api[:/]", IsPlatform: false, UpstreamVendor: "openai"},
 	{Name: "moonshot", DisplayName: "Moonshot (Kimi)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOONSHOT_API_KEY", "KIMI_API_KEY"}, ModelPrefixMatch: "^moonshot[:/-]", IsPlatform: false, UpstreamVendor: "moonshot"},
 	{Name: "minimax", DisplayName: "MiniMax", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^minimax-m", IsPlatform: false, UpstreamVendor: "minimax"},
-	{Name: "platform", DisplayName: "Platform", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"}, ModelPrefixMatch: "^platform/", IsPlatform: true},
+	{Name: "platform", DisplayName: "Platform", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOLECULE_LLM_USAGE_TOKEN"}, ModelPrefixMatch: "^platform/", IsPlatform: true},
 	{Name: "xiaomi-mimo", DisplayName: "Xiaomi MiMo", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "^mimo-", IsPlatform: false},
 	{Name: "zai", DisplayName: "Z.ai (GLM)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GLM_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^glm-", IsPlatform: false},
 	{Name: "kimi-coding", DisplayName: "Moonshot Kimi (coding-tuned)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"KIMI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^kimi-", IsPlatform: false},
@@ -89,8 +89,9 @@ var Runtimes = map[string][]GenRuntimeRef{
 		{Name: "platform", Models: []string{"openai/gpt-5.4", "openai/gpt-5.4-mini"}},
 	},
 	"google-adk": {
+		{Name: "platform", Models: []string{"platform:gemini-2.5-pro", "platform:gemini-2.5-flash"}},
+		{Name: "google", Models: []string{"gemini-2.5-pro", "gemini-2.5-flash"}},
 		{Name: "vertex", Models: []string{"vertex:gemini-2.5-pro"}},
-		{Name: "google", Models: []string{"gemini-2.5-pro"}},
 	},
 	"hermes": {
 		{Name: "kimi-coding", Models: []string{"kimi-coding/kimi-k2"}},
@@ -0,0 +1,42 @@
+package providers
+
+import "testing"
+
+// Proper-SSOT (task #65): google-adk keyless Gemini resolves to the closed
+// platform provider -> IsPlatform=true; BYOK AI Studio -> google. The
+// platform: select ids are registered so workspace-create accepts them
+// (was 422 UNREGISTERED_MODEL_FOR_RUNTIME).
+func TestGoogleADK_PlatformGeminiResolvesToPlatform(t *testing.T) {
+	m, err := LoadManifest()
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, id := range []string{"platform:gemini-2.5-pro", "platform:gemini-2.5-flash"} {
+		p, err := m.DeriveProvider("google-adk", id, nil)
+		if err != nil {
+			t.Fatalf("%s: %v", id, err)
+		}
+		if p.Name != PlatformProviderName || !p.IsPlatform() {
+			t.Errorf("%s -> %q IsPlatform=%v; want platform", id, p.Name, p.IsPlatform())
+		}
+	}
+	p, err := m.DeriveProvider("google-adk", "gemini-2.5-pro", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if p.IsPlatform() || p.Name != "google" {
+		t.Errorf("gemini-2.5-pro -> %q IsPlatform=%v; want google byok", p.Name, p.IsPlatform())
+	}
+	models, _ := m.ModelsForRuntime("google-adk")
+	want := map[string]bool{"platform:gemini-2.5-pro": false, "platform:gemini-2.5-flash": false}
+	for _, id := range models {
+		if _, ok := want[id]; ok {
+			want[id] = true
+		}
+	}
+	for id, ok := range want {
+		if !ok {
+			t.Errorf("%s not registered for google-adk — create would 422", id)
+		}
+	}
+}
@@ -292,7 +292,7 @@ providers:
    # PR-1 simplification when only claude-code referenced platform.
    base_url_template: "https://api.moleculesai.app/api/v1/internal/llm/openai/v1"
    base_url_anthropic: "https://api.moleculesai.app/api/v1/internal/llm/anthropic/v1"
-    auth_env: [ANTHROPIC_API_KEY, MOLECULE_LLM_USAGE_TOKEN]
+    auth_env: [MOLECULE_LLM_USAGE_TOKEN]
    auth_token_env: ANTHROPIC_API_KEY
    # Adapter routes kimi- / moonshot/ through platform by default. No bare
    # vendor prefix of its own; it multiplexes other vendors' slugs. Match
@@ -412,14 +412,24 @@ providers:
    model_prefix_match: "^gemini-"
    model_aliases: []

-  # Google Vertex AI — KEYLESS arm (mirrors the anthropic-oauth / anthropic-api
-  # and openai-subscription / openai-api split: same vendor, distinct auth).
-  # google-adk serves Gemini via Vertex using Application Default Credentials
-  # over Workload Identity Federation (AWS EC2 role -> GCP STS -> SA), injected
-  # by the provisioner (cp#416 + envs.yaml vertex block) as a NON-SECRET
-  # external_account cred-config at GOOGLE_APPLICATION_CREDENTIALS. No API key.
-  # Distinct `vertex:` model namespace keeps it unambiguous vs the API-key
-  # `google` vendor's ^gemini- (TestNoAmbiguousModelMatch).
+  # Google Vertex AI — served via the Molecule CP LLM proxy (NOT on-box ADC).
+  # google-adk routes ALL Gemini through the proxy, which mints a Vertex token
+  # server-side over Workload Identity Federation (AWS -> GCP STS -> SA; see
+  # internal/vertexauth + llm_proxy.go google/vertex case) and meters usage to
+  # org credits. The former on-box ADC delivery (a NON-SECRET external_account
+  # cred-config written to /configs/gcp-adc.json via GOOGLE_APPLICATION_CREDENTIALS,
+  # injected by the provisioner) was REMOVED: a tenant has EC2 root and could
+  # have used that credential to call Vertex DIRECTLY, bypassing metering
+  # (keyless-Vertex billing leak — task #64 / RFC internal#763; provisioner
+  # force-off + template routes vertex: through the proxy). The `vertex:`
+  # namespace + this entry remain for proxy routing/billing of Vertex-upstream
+  # requests, distinct from the API-key `google` vendor's ^gemini-
+  # (TestNoAmbiguousModelMatch).
+  #
+  # NOTE: display_name ("keyless ADC") and auth_env (GOOGLE_APPLICATION_CREDENTIALS)
+  # are now VESTIGIAL — no consumer reads auth_env post-leak-fix, but it must stay
+  # non-empty (providers.go validate). Left as-is to keep this a comment-only,
+  # regen-free change; retiring them is a registry-regen follow-up.
  - name: vertex
    display_name: "Google Vertex AI (keyless ADC)"
    vendor_logo: "google"
@@ -844,11 +854,24 @@ runtimes:
  # this runtimes entry declares the selectable model set.
  google-adk:
    providers:
-      # Keyless Vertex (org-compliant default): Gemini via Vertex AI + ADC/WIF.
-      - name: vertex
+      # Platform-managed (keyless, metered) Gemini via the Molecule LLM proxy ->
+      # Vertex AI (server-side WIF mint; NO credential on the tenant box — the
+      # keyless-Vertex leak fix). Resolves to the closed `platform` provider ->
+      # IsPlatform=true -> platform_managed billing + required_env=[] (derived).
+      # The org-compliant default. The runtime translates the `platform:` select
+      # id to the bare wire id the proxy routes to Vertex.
+      - name: platform
        models:
-          - vertex:gemini-2.5-pro
-      # API-key BYOK arm: AI Studio GEMINI_API_KEY/GOOGLE_API_KEY.
+          - platform:gemini-2.5-pro
+          - platform:gemini-2.5-flash
+      # API-key BYOK arm: AI Studio (the tenant's OWN GOOGLE_API_KEY).
      - name: google
        models:
-          - gemini-2.5-pro
+          - gemini-2.5-pro
+          - gemini-2.5-flash
+      # DEPRECATED transitional: vertex: ids stay registered until templates
+      # move to platform: (superseded by the platform arm above). Remove in a
+      # cleanup once no template references vertex:gemini-*.
+      - name: vertex
+        models:
+          - vertex:gemini-2.5-pro
@@ -29,7 +29,7 @@ import (
 // canonicalProvidersYAMLSHA256 is the sha256 of the canonical providers.yaml as
 // synced from molecule-controlplane. Bumped deliberately on each re-sync (see
 // file doc). Cross-checked live by the sync-providers-yaml CI workflow.
-const canonicalProvidersYAMLSHA256 = "dec73199e26cee2d395a0acece99771618d3879dc5ca724ba57cb5b38079c6ce"
+const canonicalProvidersYAMLSHA256 = "021ae082c2bbbbb61c406cae03205ac6b7fff160ae5976cfc64de3de676d02b2"

 func TestSyncedYAMLMatchesCanonicalSHA(t *testing.T) {
 	sum := sha256.Sum256(embeddedYAML)
@@ -0,0 +1,558 @@
+//go:build integration
+// +build integration
+
+// scheduler_integration_test.go — REAL Postgres integration tests for the
+// workspace-server cron scheduler firing loop. Regression coverage for
+// molecule-core issue #2149 (filed under SOP rule internal#765).
+//
+// Run with:
+//
+//	docker run --rm -d --name pg-integration \
+//	  -e POSTGRES_PASSWORD=test -e POSTGRES_DB=molecule \
+//	  -p 55432:5432 postgres:15-alpine
+//	sleep 4
+//	# apply every migration up.sql / legacy .sql in lexicographic order
+//	for f in $(ls workspace-server/migrations/*.sql | grep -v '\.down\.sql$' | sort); do \
+//	  psql "postgres://postgres:test@localhost:55432/molecule?sslmode=disable" -f "$f"; done
+//	cd workspace-server
+//	INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \
+//	  go test -tags=integration ./internal/scheduler/ -run '^TestIntegration_'
+//
+// CI: .gitea/workflows/handlers-postgres-integration.yml runs these on every
+// PR/push that touches workspace-server/internal/scheduler/ (the
+// `handlers-postgres` detect-changes profile was extended to include the
+// scheduler package + this workflow file).
+//
+// Why these are NOT the existing sqlmock unit tests (scheduler_test.go)
+// --------------------------------------------------------------------
+// The strict-sqlmock unit tests pin which SQL statements fire — fast, no DB.
+// But sqlmock CANNOT validate:
+//   - the activity_logs `$3::jsonb` cast (#2026 wedge) — sqlmock never parses
+//     the payload, so an invalid-UTF-8 jsonb body that wedges a real INSERT
+//     looks "green" under mock.ExpectExec(`INSERT INTO activity_logs`).
+//   - the ROW STATE after tick()/fireSchedule run: that last_run_at,
+//     next_run_at, run_count, last_status actually landed on the row.
+//   - sweepPhantomBusy's NOT IN (SELECT … activity_logs) subquery semantics
+//     against real rows — it has no unit test at all (#2149).
+//
+// A SQL regression here = a fleet-wide silent cron outage (#85 ran 12h before
+// detection). These tests boot a real Postgres, insert real rows, run the
+// production tick()/sweepPhantomBusy, and SELECT the rows back to assert the
+// observable end state — the gap sqlmock structurally cannot cover.
+//
+// Watch-fail intent: each test is written to FAIL on a regression of the
+// behavior under test (e.g. drop the activity_logs INSERT, drop the
+// write-back UPDATE, drop the UTF-8 sanitize, or break the phantom-busy
+// subquery) and to PASS against the current-correct scheduler.go.
+
+package scheduler
+
+import (
+	"context"
+	"database/sql"
+	"os"
+	"testing"
+	"time"
+
+	mdb "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
+	_ "github.com/lib/pq"
+)
+
+// ── test doubles ──────────────────────────────────────────────────────────
+
+// recordingProxy is an A2AProxy that records each fire and returns a
+// configurable response. Used to assert that tick()/fireSchedule actually
+// reached the A2A boundary for the due schedule.
+type recordingProxy struct {
+	status int
+	body   []byte
+	err    error
+
+	fires       int
+	lastBody    []byte
+	lastCaller  string
+	lastLogFlag bool
+	lastWSID    string
+}
+
+func (p *recordingProxy) ProxyA2ARequest(
+	_ context.Context, workspaceID string, body []byte, callerID string, logActivity bool,
+) (int, []byte, error) {
+	p.fires++
+	p.lastWSID = workspaceID
+	p.lastBody = body
+	p.lastCaller = callerID
+	p.lastLogFlag = logActivity
+	if p.err != nil {
+		return 0, nil, p.err
+	}
+	return p.status, p.body, nil
+}
+
+// ── connection + fixture helpers ──────────────────────────────────────────
+
+// integrationDB returns the configured integration-test connection or skips
+// the test if INTEGRATION_DB_URL is unset. Hot-swaps the package-level
+// mdb.DB so the production scheduler helpers (tick, fireSchedule,
+// sweepPhantomBusy) operate on this connection; restores it via t.Cleanup.
+//
+// NOT SAFE FOR t.Parallel(): the package-global swap races across tests.
+func integrationDB(t *testing.T) *sql.DB {
+	t.Helper()
+	url := os.Getenv("INTEGRATION_DB_URL")
+	if url == "" {
+		t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)")
+	}
+	conn, err := sql.Open("postgres", url)
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	if err := conn.PingContext(ctx); err != nil {
+		t.Fatalf("ping: %v", err)
+	}
+	// Clean slate. activity_logs + workspace_schedules cascade off workspaces,
+	// but we DELETE explicitly (and in FK order) so a partial prior run can't
+	// leave orphan rows that perturb the next test's assertions.
+	cctx, ccancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer ccancel()
+	for _, q := range []string{
+		`DELETE FROM activity_logs`,
+		`DELETE FROM workspace_schedules`,
+		`DELETE FROM workspaces`,
+	} {
+		if _, err := conn.ExecContext(cctx, q); err != nil {
+			t.Fatalf("cleanup %q: %v", q, err)
+		}
+	}
+	prev := mdb.DB
+	mdb.DB = conn
+	t.Cleanup(func() {
+		mdb.DB = prev
+		conn.Close()
+	})
+	return conn
+}
+
+// insertWorkspace inserts a workspace row and returns its UUID. active is the
+// initial active_tasks value; status defaults to 'online' (valid workspace_status enum).
+func insertWorkspace(t *testing.T, conn *sql.DB, name string, active int) string {
+	t.Helper()
+	var id string
+	err := conn.QueryRowContext(context.Background(), `
+		INSERT INTO workspaces (name, status, active_tasks, max_concurrent_tasks)
+		VALUES ($1, 'online', $2, 1)
+		RETURNING id
+	`, name, active).Scan(&id)
+	if err != nil {
+		t.Fatalf("insertWorkspace(%s): %v", name, err)
+	}
+	return id
+}
+
+// insertSchedule inserts an enabled workspace_schedules row whose next_run_at
+// is in the past (so tick() picks it up immediately) and returns its UUID.
+func insertSchedule(t *testing.T, conn *sql.DB, wsID, name, cronExpr, prompt string) string {
+	t.Helper()
+	var id string
+	err := conn.QueryRowContext(context.Background(), `
+		INSERT INTO workspace_schedules
+		    (workspace_id, name, cron_expr, timezone, prompt, enabled, next_run_at, source)
+		VALUES ($1, $2, $3, 'UTC', $4, true, now() - interval '1 minute', 'runtime')
+		RETURNING id
+	`, wsID, name, cronExpr, prompt).Scan(&id)
+	if err != nil {
+		t.Fatalf("insertSchedule(%s): %v", name, err)
+	}
+	return id
+}
+
+type scheduleState struct {
+	lastRunAt  sql.NullTime
+	nextRunAt  sql.NullTime
+	runCount   int
+	lastStatus string
+	lastError  string
+}
+
+func readScheduleState(t *testing.T, conn *sql.DB, id string) scheduleState {
+	t.Helper()
+	var st scheduleState
+	var status, errStr sql.NullString
+	err := conn.QueryRowContext(context.Background(), `
+		SELECT last_run_at, next_run_at, run_count, last_status, last_error
+		FROM workspace_schedules WHERE id = $1
+	`, id).Scan(&st.lastRunAt, &st.nextRunAt, &st.runCount, &status, &errStr)
+	if err != nil {
+		t.Fatalf("readScheduleState(%s): %v", id, err)
+	}
+	st.lastStatus = status.String
+	st.lastError = errStr.String
+	return st
+}
+
+// ── TestIntegration_TickFiresAndWritesBack (#2149 core) ───────────────────
+//
+// Insert one due schedule, run tick() once, and assert the full firing
+// loop landed against a REAL Postgres:
+//   - the A2A proxy was invoked exactly once for the schedule's workspace
+//   - the post-fire UPDATE wrote last_run_at (was NULL), advanced next_run_at
+//     into the future, bumped run_count to 1, set last_status='ok'
+//   - a cron_run activity_logs row was inserted with VALID jsonb request_body
+//     (the `$3::jsonb` cast #2026 path) carrying the schedule metadata
+//
+// Regression watch-fail: if a refactor drops the write-back UPDATE, the
+// activity_logs INSERT, or breaks the jsonb cast, this test fails where every
+// sqlmock unit test stays green.
+func TestIntegration_TickFiresAndWritesBack(t *testing.T) {
+	conn := integrationDB(t)
+
+	wsID := insertWorkspace(t, conn, "cron-fire-ws", 0)
+	schedID := insertSchedule(t, conn, wsID, "hourly-audit", "0 * * * *", "run the hourly audit")
+
+	proxy := &recordingProxy{
+		status: 200,
+		body:   []byte(`{"jsonrpc":"2.0","result":{"kind":"message","parts":[{"kind":"text","text":"done"}]},"id":"1"}`),
+	}
+	s := New(proxy, nil)
+	s.tick(context.Background())
+
+	// 1. A2A boundary reached exactly once for the right workspace.
+	if proxy.fires != 1 {
+		t.Fatalf("proxy fires = %d, want 1 (tick must fire the one due schedule)", proxy.fires)
+	}
+	if proxy.lastWSID != wsID {
+		t.Errorf("proxy fired for workspace %q, want %q", proxy.lastWSID, wsID)
+	}
+	// Empty callerID = canvas-style (bypasses access control); logActivity=true.
+	if proxy.lastCaller != "" {
+		t.Errorf("callerID = %q, want empty (canvas-style scheduler fire)", proxy.lastCaller)
+	}
+	if !proxy.lastLogFlag {
+		t.Error("logActivity flag = false, want true")
+	}
+
+	// 2. Row write-back.
+	st := readScheduleState(t, conn, schedID)
+	if !st.lastRunAt.Valid {
+		t.Error("last_run_at is NULL after fire, want set (write-back UPDATE did not land)")
+	}
+	if !st.nextRunAt.Valid {
+		t.Fatal("next_run_at is NULL after fire, want a future timestamp")
+	}
+	if !st.nextRunAt.Time.After(time.Now()) {
+		t.Errorf("next_run_at = %v, want a time in the future (schedule would tight-loop otherwise)", st.nextRunAt.Time)
+	}
+	if st.runCount != 1 {
+		t.Errorf("run_count = %d, want 1", st.runCount)
+	}
+	if st.lastStatus != "ok" {
+		t.Errorf("last_status = %q, want \"ok\"", st.lastStatus)
+	}
+
+	// 3. activity_logs cron_run row with valid jsonb request_body.
+	var actCount int
+	var summary, status string
+	var reqBody []byte
+	err := conn.QueryRowContext(context.Background(), `
+		SELECT count(*) OVER (), summary, status, request_body
+		FROM activity_logs
+		WHERE workspace_id = $1 AND activity_type = 'cron_run'
+		LIMIT 1
+	`, wsID).Scan(&actCount, &summary, &status, &reqBody)
+	if err == sql.ErrNoRows {
+		t.Fatal("no cron_run activity_logs row inserted after fire (#152/#2026 path missing)")
+	}
+	if err != nil {
+		t.Fatalf("read activity_logs: %v", err)
+	}
+	if actCount != 1 {
+		t.Errorf("cron_run activity_logs rows = %d, want 1", actCount)
+	}
+	if status != "ok" {
+		t.Errorf("activity_logs.status = %q, want \"ok\"", status)
+	}
+	// request_body must be valid jsonb carrying the schedule_id — proves the
+	// `$3::jsonb` cast accepted the payload (the #2026 wedge surface).
+	var sid string
+	if err := conn.QueryRowContext(context.Background(), `
+		SELECT request_body->>'schedule_id'
+		FROM activity_logs WHERE workspace_id = $1 AND activity_type = 'cron_run' LIMIT 1
+	`, wsID).Scan(&sid); err != nil {
+		t.Fatalf("request_body is not queryable jsonb: %v", err)
+	}
+	if sid != schedID {
+		t.Errorf("activity_logs request_body->>'schedule_id' = %q, want %q", sid, schedID)
+	}
+}
+
+// ── TestIntegration_InvalidUTF8PromptSanitizedIntoJsonb (#2026 / #2149) ────
+//
+// The agent-editable prompt can carry raw invalid-UTF-8 bytes. Postgres jsonb
+// columns REJECT invalid UTF-8, which (pre-#2026) wedged the activity_logs
+// INSERT and held the transaction open — stalling the whole scheduler.
+// fireSchedule now sanitizeUTF8()s every string before the `$3::jsonb` insert.
+//
+// Postgres TEXT columns (workspace_schedules.prompt) also reject invalid UTF-8
+// in a UTF-8 database, so we cannot INSERT the bad bytes through the fixture.
+// Instead we insert a valid prompt, then call fireSchedule directly with a
+// scheduleRow whose Prompt field contains the invalid bytes — this simulates
+// the real regression path (e.g. truncation splitting a multi-byte rune, or
+// an agent-edited template arriving via a path that bypasses DB validation).
+//
+// Assertions:
+//   - the fire still completed (write-back UPDATE landed)
+//   - the cron_run activity_logs row was inserted (the jsonb cast accepted
+//     the SANITIZED payload — the INSERT did not wedge)
+//   - the stored request_body is queryable jsonb (valid UTF-8 on disk)
+//
+// Watch-fail: remove the sanitizeUTF8() wrapping around the jsonb payload and
+// this test fails on a real Postgres (INSERT errors / row absent), while the
+// sqlmock unit test that only checks "an INSERT fired" stays green.
+func TestIntegration_InvalidUTF8PromptSanitizedIntoJsonb(t *testing.T) {
+	conn := integrationDB(t)
+
+	wsID := insertWorkspace(t, conn, "utf8-ws", 0)
+	// Insert with valid UTF-8 — Postgres TEXT rejects 0x80/0xff.
+	schedID := insertSchedule(t, conn, wsID, "utf8-job", "0 * * * *", "valid prompt")
+
+	// Prompt with invalid UTF-8: orphan continuation byte + bare 0xff.
+	badPrompt := "audit \x80 report \xff end"
+	row := scheduleRow{
+		ID:          schedID,
+		WorkspaceID: wsID,
+		Name:        "utf8-job",
+		CronExpr:    "0 * * * *",
+		Timezone:    "UTC",
+		Prompt:      badPrompt,
+	}
+
+	proxy := &recordingProxy{
+		status: 200,
+		body:   []byte(`{"result":{"kind":"message","parts":[{"kind":"text","text":"ok"}]}}`),
+	}
+	s := New(proxy, nil)
+	s.fireSchedule(context.Background(), row)
+
+	if proxy.fires != 1 {
+		t.Fatalf("proxy fires = %d, want 1", proxy.fires)
+	}
+
+	// Write-back must have landed despite the bad prompt bytes.
+	st := readScheduleState(t, conn, schedID)
+	if st.runCount != 1 || st.lastStatus != "ok" {
+		t.Errorf("post-fire state run_count=%d last_status=%q, want 1/\"ok\" "+
+			"(invalid-UTF-8 prompt must not block the fire)", st.runCount, st.lastStatus)
+	}
+
+	// The cron_run activity_logs row MUST exist — proving the `$3::jsonb`
+	// INSERT accepted the sanitized payload (did not wedge on invalid UTF-8).
+	var n int
+	if err := conn.QueryRowContext(context.Background(), `
+		SELECT count(*) FROM activity_logs
+		WHERE workspace_id = $1 AND activity_type = 'cron_run'
+	`, wsID).Scan(&n); err != nil {
+		t.Fatalf("count cron_run rows: %v", err)
+	}
+	if n != 1 {
+		t.Fatalf("cron_run activity_logs rows = %d, want 1 — the jsonb INSERT wedged "+
+			"on invalid UTF-8 (the #2026 regression)", n)
+	}
+
+	// The stored prompt inside request_body must be queryable + valid UTF-8.
+	var storedPrompt string
+	if err := conn.QueryRowContext(context.Background(), `
+		SELECT request_body->>'prompt'
+		FROM activity_logs WHERE workspace_id = $1 AND activity_type = 'cron_run' LIMIT 1
+	`, wsID).Scan(&storedPrompt); err != nil {
+		t.Fatalf("request_body->>'prompt' not queryable jsonb: %v", err)
+	}
+	if storedPrompt == "" {
+		t.Error("stored prompt is empty, want the sanitized prompt text")
+	}
+	// Round-trip through Postgres jsonb guarantees valid UTF-8; assert the
+	// replacement character replaced the bad bytes rather than them surviving.
+	for i := 0; i < len(storedPrompt); i++ {
+		if storedPrompt[i] == 0x80 || storedPrompt[i] == 0xff {
+			t.Fatalf("stored prompt still contains raw invalid byte 0x%x at %d", storedPrompt[i], i)
+		}
+	}
+}
+
+// ── TestIntegration_TickErrorStatusWriteBack (#2149) ──────────────────────
+//
+// When the A2A proxy returns a transport error, fireSchedule must still write
+// back: last_status='error', last_error populated, next_run_at advanced (so
+// the schedule does not get stuck re-firing), run_count bumped. Verifies the
+// error path persists to a real row, not just that "an UPDATE fired".
+func TestIntegration_TickErrorStatusWriteBack(t *testing.T) {
+	conn := integrationDB(t)
+
+	wsID := insertWorkspace(t, conn, "err-ws", 0)
+	schedID := insertSchedule(t, conn, wsID, "err-job", "0 * * * *", "do work")
+
+	proxy := &recordingProxy{err: context.DeadlineExceeded}
+	s := New(proxy, nil)
+	s.tick(context.Background())
+
+	st := readScheduleState(t, conn, schedID)
+	if st.lastStatus != "error" {
+		t.Errorf("last_status = %q, want \"error\"", st.lastStatus)
+	}
+	if st.lastError == "" {
+		t.Error("last_error is empty, want the proxy error text persisted (#152)")
+	}
+	if st.runCount != 1 {
+		t.Errorf("run_count = %d, want 1 (run still counted on error)", st.runCount)
+	}
+	if !st.nextRunAt.Valid || !st.nextRunAt.Time.After(time.Now()) {
+		t.Errorf("next_run_at not advanced to future on error path (= %v) — schedule would tight-loop", st.nextRunAt)
+	}
+	// The error activity_logs row must carry status='error' + error_detail.
+	var status, errDetail string
+	if err := conn.QueryRowContext(context.Background(), `
+		SELECT status, COALESCE(error_detail,'') FROM activity_logs
+		WHERE workspace_id = $1 AND activity_type = 'cron_run' LIMIT 1
+	`, wsID).Scan(&status, &errDetail); err != nil {
+		t.Fatalf("read error activity_logs: %v", err)
+	}
+	if status != "error" {
+		t.Errorf("activity_logs.status = %q, want \"error\"", status)
+	}
+	if errDetail == "" {
+		t.Error("activity_logs.error_detail empty on error fire, want the error message (#152)")
+	}
+}
+
+// ── TestIntegration_SweepPhantomBusy (#2149 — no prior test) ──────────────
+//
+// sweepPhantomBusy resets active_tasks=0 for workspaces stuck busy with NO
+// activity_logs row in the last phantomStaleThreshold window, and must LEAVE
+// ALONE workspaces that have recent activity. The NOT IN (SELECT DISTINCT
+// workspace_id FROM activity_logs WHERE created_at > now() - interval) subquery
+// is exactly the kind of set-semantics that sqlmock cannot validate — there is
+// no unit test for this method at all (#2149).
+//
+// Fixture:
+//   - phantomWS: active_tasks=3, NO recent activity_log     → must reset to 0
+//   - recentWS:  active_tasks=2, activity_log 1 min ago      → must stay at 2
+//   - staleWS:   active_tasks=1, activity_log 30 min ago     → must reset to 0
+//   - removedWS: active_tasks=4, status='removed', no activity → must stay (status guard)
+//   - idleWS:    active_tasks=0                               → untouched (not >0)
+//
+// Watch-fail: break the subquery (e.g. drop the status!='removed' guard, or
+// invert the NOT IN), and the asserted end-state diverges on a real Postgres.
+func TestIntegration_SweepPhantomBusy(t *testing.T) {
+	conn := integrationDB(t)
+
+	phantomWS := insertWorkspace(t, conn, "phantom-ws", 3)
+	recentWS := insertWorkspace(t, conn, "recent-ws", 2)
+	staleWS := insertWorkspace(t, conn, "stale-ws", 1)
+	idleWS := insertWorkspace(t, conn, "idle-ws", 0)
+
+	// removedWS: busy but status='removed' — the sweep must skip it.
+	var removedWS string
+	if err := conn.QueryRowContext(context.Background(), `
+		INSERT INTO workspaces (name, status, active_tasks, max_concurrent_tasks)
+		VALUES ('removed-ws', 'removed', 4, 1) RETURNING id
+	`).Scan(&removedWS); err != nil {
+		t.Fatalf("insert removedWS: %v", err)
+	}
+
+	// recentWS has a fresh activity_log (1 min ago → inside the 10-min window).
+	if _, err := conn.ExecContext(context.Background(), `
+		INSERT INTO activity_logs (workspace_id, activity_type, status, created_at)
+		VALUES ($1, 'a2a_receive', 'ok', now() - interval '1 minute')
+	`, recentWS); err != nil {
+		t.Fatalf("insert recent activity_log: %v", err)
+	}
+	// staleWS has only an OLD activity_log (30 min ago → outside the window).
+	if _, err := conn.ExecContext(context.Background(), `
+		INSERT INTO activity_logs (workspace_id, activity_type, status, created_at)
+		VALUES ($1, 'a2a_receive', 'ok', now() - interval '30 minutes')
+	`, staleWS); err != nil {
+		t.Fatalf("insert stale activity_log: %v", err)
+	}
+
+	s := New(nil, nil)
+	s.sweepPhantomBusy(context.Background())
+
+	active := func(id string) int {
+		var n int
+		if err := conn.QueryRowContext(context.Background(),
+			`SELECT active_tasks FROM workspaces WHERE id = $1`, id).Scan(&n); err != nil {
+			t.Fatalf("read active_tasks(%s): %v", id, err)
+		}
+		return n
+	}
+
+	if got := active(phantomWS); got != 0 {
+		t.Errorf("phantomWS active_tasks = %d, want 0 (busy + no recent activity → must be swept)", got)
+	}
+	if got := active(staleWS); got != 0 {
+		t.Errorf("staleWS active_tasks = %d, want 0 (only stale activity → must be swept)", got)
+	}
+	if got := active(recentWS); got != 2 {
+		t.Errorf("recentWS active_tasks = %d, want 2 (recent activity → must NOT be swept)", got)
+	}
+	if got := active(removedWS); got != 4 {
+		t.Errorf("removedWS active_tasks = %d, want 4 (status='removed' → sweep must skip it)", got)
+	}
+	if got := active(idleWS); got != 0 {
+		t.Errorf("idleWS active_tasks = %d, want 0 (was never busy)", got)
+	}
+
+	// The swept rows must also have current_task cleared.
+	var ct string
+	if err := conn.QueryRowContext(context.Background(),
+		`SELECT COALESCE(current_task,'') FROM workspaces WHERE id = $1`, phantomWS).Scan(&ct); err != nil {
+		t.Fatalf("read current_task: %v", err)
+	}
+	if ct != "" {
+		t.Errorf("phantomWS current_task = %q, want empty after sweep", ct)
+	}
+}
+
+// ── TestIntegration_NativeSchedulerSkipAdvancesNextRunAt (#2149) ──────────
+//
+// When a workspace's adapter owns scheduling natively, tick() must SKIP the
+// fire but still advance next_run_at (so the row doesn't tight-loop on every
+// poll) — observability (next_run_at) is preserved while the fire is dropped.
+// Asserts the native-skip UPDATE landed on a real row and the proxy was NOT
+// invoked. This is the native-skip UPDATE path #2149 calls out — sqlmock can
+// only assert an UPDATE fired, not that next_run_at moved forward.
+func TestIntegration_NativeSchedulerSkipAdvancesNextRunAt(t *testing.T) {
+	conn := integrationDB(t)
+
+	wsID := insertWorkspace(t, conn, "native-ws", 0)
+	schedID := insertSchedule(t, conn, wsID, "native-job", "0 * * * *", "native run")
+
+	// Capture the pre-tick next_run_at (it is in the past by construction).
+	before := readScheduleState(t, conn, schedID)
+	if !before.nextRunAt.Valid || before.nextRunAt.Time.After(time.Now()) {
+		t.Fatalf("precondition: next_run_at should start in the past, got %v", before.nextRunAt)
+	}
+
+	proxy := &recordingProxy{status: 200, body: []byte(`{}`)}
+	s := New(proxy, nil)
+	// Every workspace reports native scheduling → fire must be skipped.
+	s.SetNativeSchedulerCheck(func(string) bool { return true })
+	s.tick(context.Background())
+
+	if proxy.fires != 0 {
+		t.Errorf("proxy fires = %d, want 0 (native-scheduler workspace must NOT fire)", proxy.fires)
+	}
+
+	after := readScheduleState(t, conn, schedID)
+	if !after.nextRunAt.Valid || !after.nextRunAt.Time.After(time.Now()) {
+		t.Errorf("next_run_at = %v, want advanced into the future (native-skip UPDATE must still run)", after.nextRunAt)
+	}
+	// Skip path does NOT bump run_count or write last_run_at (no fire happened).
+	if after.runCount != 0 {
+		t.Errorf("run_count = %d, want 0 (skip must not count as a run)", after.runCount)
+	}
+	if after.lastRunAt.Valid {
+		t.Error("last_run_at set on native-skip, want NULL (no fire occurred)")
+	}
+}