Merge branch 'main' into fix/internal-760-review-event-trigger

Resolved conflicts in qa-review.yml and security-review.yml by keeping both pull_request_review and pull_request_review_approved triggers and using a defensive job guard that checks both review.type and review.state. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
Merge pull request 'test(#2151 ): real-infra integration tests for Activity + Delegation + A2A handlers (CHUNK 1 + CHUNK 2)' (#2166 ) from fix/2151-chunk1-activity-delegation-a2a-integration-tests into main
2026-06-03 20:40:06 +00:00 · 2026-06-03 14:19:38 +00:00 · 2026-06-03 14:12:10 +00:00 · 2026-06-03 14:12:10 +00:00 · 2026-06-03 14:12:10 +00:00 · 2026-06-03 14:12:10 +00:00
24 changed files with 2039 additions and 100 deletions
@@ -466,12 +466,40 @@ def fetch_log(target_url: str) -> str | None:

 def grep_fail_markers(log_text: str) -> list[str]:
    """Return up to 5 sample matching lines for any FAIL_PATTERNS hit.
-    Empty list = clean log."""
+    Empty list = clean log.
+
+    Heuristic: skip lines where the marker appears inside script source
+    (e.g. ``echo "::error::..."`` in a ``::group::Run`` block) rather
+    than actual execution output. The Gitea Actions log prints the raw
+    script before executing it; ``echo "::error::"`` lines in that
+    display are false positives.
+    """
    matches: list[str] = []
+    in_run_group = False
+    group_depth = 0
    for line in log_text.splitlines():
+        stripped = line.strip()
+        # Track Gitea Actions group markers so we can skip the
+        # ``::group::Run`` script-source display blocks.
+        if stripped.startswith("::group::Run"):
+            in_run_group = True
+            group_depth = 1
+            continue
+        if stripped == "::endgroup::":
+            if in_run_group:
+                in_run_group = False
+                group_depth = 0
+            continue
+        if in_run_group:
+            continue
        for pat in FAIL_PATTERNS:
            if pat in line:
-                # Truncate to keep error output bounded.
+                # Additional false-positive guard: ``echo "::error::"``
+                # is script source, not a runtime error emission.
+                if pat == "::error::":
+                    prefix = line[: line.index(pat)].strip()
+                    if prefix.endswith('echo') or prefix.endswith("echo '") or prefix.endswith('echo "'):
+                        break
                matches.append(line.strip()[:240])
                break
        if len(matches) >= 5:
@@ -0,0 +1,198 @@
+"""Live-fire regression test for #2159 — gate auto-fire runtime verification.
+
+Static tests (test_gate_review_auto_fire.py) validate that the workflow YAML
+is structurally correct. This test validates the *runtime* path: submitting an
+APPROVED review to a PR whose head contains the current gate workflows causes
+Gitea Actions to queue the qa-review + security-review workflows and POST the
+branch-protection-required (pull_request_target) contexts within a reasonable
+window.
+
+Skipped when Gitea API credentials are not available. Intended for:
+  - manual developer verification
+  - CI jobs provisioned with a service-account token
+
+Environment:
+  GITEA_HOST            — default: git.moleculesai.app
+  GITEA_TOKEN           — token with read:repository + write:issues (for review POST)
+  REPO                  — default: molecule-ai/molecule-core
+  LIVEFIRE_PR_NUMBER    — optional; if omitted the test tries to find a
+                          suitable open PR automatically, or skips.
+  LIVEFIRE_TIMEOUT_SEC  — default: 120
+"""
+
+import base64
+import json
+import os
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+import pytest
+
+import yaml
+
+GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
+GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
+REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
+LIVEFIRE_PR_NUMBER = os.environ.get("LIVEFIRE_PR_NUMBER", "")
+LIVEFIRE_TIMEOUT_SEC = int(os.environ.get("LIVEFIRE_TIMEOUT_SEC", "120"))
+
+REQUIRED_CONTEXTS = [
+    "qa-review / approved (pull_request_target)",
+    "security-review / approved (pull_request_target)",
+]
+
+skip_no_token = pytest.mark.skipif(
+    not GITEA_TOKEN,
+    reason="GITEA_TOKEN not set — live-fire test requires API credentials",
+)
+
+
+def _api(method: str, path: str, body: dict | None = None) -> tuple[int, dict]:
+    url = f"https://{GITEA_HOST}/api/v1{path}"
+    headers = {
+        "Authorization": f"token {GITEA_TOKEN}",
+        "Content-Type": "application/json",
+    }
+    data = json.dumps(body).encode() if body else None
+    req = urllib.request.Request(url, data=data, headers=headers, method=method)
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            raw = resp.read()
+            code = resp.status
+    except urllib.error.HTTPError as exc:
+        raw = exc.read()
+        code = exc.code
+    payload = json.loads(raw) if raw else {}
+    return code, payload
+
+
+def _get_pr(number: int) -> dict:
+    code, pr = _api("GET", f"/repos/{REPO}/pulls/{number}")
+    if code != 200:
+        pytest.fail(f"GET /pulls/{number} returned HTTP {code}: {pr}")
+    return pr
+
+
+def _list_open_prs() -> list[dict]:
+    code, prs = _api("GET", f"/repos/{REPO}/pulls?state=open&limit=50")
+    if code != 200:
+        pytest.fail(f"GET /pulls?state=open returned HTTP {code}: {prs}")
+    return prs
+
+
+def _pr_has_trigger_in_head(pr: dict) -> bool:
+    """Return True if the PR head contains pull_request_review in both workflows."""
+    head_sha = pr["head"]["sha"]
+    for wf_name in ("qa-review.yml", "security-review.yml"):
+        path = f"/repos/{REPO}/contents/.gitea/workflows/{wf_name}?ref={head_sha}"
+        code, payload = _api("GET", path)
+        if code != 200:
+            return False
+        raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
+        wf = yaml.safe_load(raw)
+        on = wf.get(True) or wf.get("on") or {}
+        if isinstance(on, str):
+            if on != "pull_request_review":
+                return False
+        elif "pull_request_review" not in on:
+            return False
+    return True
+
+
+def _find_suitable_pr() -> dict:
+    if LIVEFIRE_PR_NUMBER:
+        pr = _get_pr(int(LIVEFIRE_PR_NUMBER))
+        if pr.get("state") != "open":
+            pytest.skip(f"PR {LIVEFIRE_PR_NUMBER} is not open")
+        return pr
+
+    prs = _list_open_prs()
+    for pr in prs:
+        if _pr_has_trigger_in_head(pr):
+            return pr
+    pytest.skip("No open PR found whose head contains the pull_request_review trigger")
+
+
+def _submit_approved_review(pr_number: int) -> dict:
+    code, review = _api(
+        "POST",
+        f"/repos/{REPO}/pulls/{pr_number}/reviews",
+        {"body": "Live-fire test APPROVED review", "event": "APPROVED"},
+    )
+    # 200 = created, 422 = review already exists (idempotent enough for our purposes)
+    if code not in (200, 201, 422):
+        pytest.fail(f"POST /pulls/{pr_number}/reviews returned HTTP {code}")
+    return review
+
+
+def _get_status_updated_at(sha: str) -> dict[str, str]:
+    """Return mapping context -> updated_at for required contexts on this SHA."""
+    code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
+    if code != 200:
+        return {}
+    result: dict[str, str] = {}
+    for st in statuses:
+        ctx = st.get("context", "")
+        if ctx in REQUIRED_CONTEXTS:
+            result[ctx] = st.get("updated_at", st.get("created_at", ""))
+    return result
+
+
+def _poll_fresh_statuses(
+    sha: str,
+    prior_updated_at: dict[str, str],
+    timeout_sec: int = LIVEFIRE_TIMEOUT_SEC,
+) -> dict[str, str]:
+    """Poll until required contexts appear with updated_at fresher than prior."""
+    deadline = time.monotonic() + timeout_sec
+    found: dict[str, str] = {}
+    while time.monotonic() < deadline:
+        code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
+        if code == 200:
+            for st in statuses:
+                ctx = st.get("context", "")
+                if ctx in REQUIRED_CONTEXTS:
+                    updated_at = st.get("updated_at", st.get("created_at", ""))
+                    # Fresh if the context was absent before, OR its timestamp changed.
+                    if ctx not in prior_updated_at or updated_at != prior_updated_at[ctx]:
+                        found[ctx] = st.get("state", st.get("status", ""))
+        if all(ctx in found for ctx in REQUIRED_CONTEXTS):
+            return found
+        time.sleep(5)
+    return found
+
+
+@skip_no_token
+class TestGateAutoFireLive:
+    def test_auto_fire_posts_required_contexts(self):
+        """Submit APPROVED review; assert BP-required contexts appear fresh within timeout."""
+        pr = _find_suitable_pr()
+        pr_number = pr["number"]
+        head_sha = pr["head"]["sha"]
+
+        # Capture pre-existing status timestamps so we can prove FRESH contexts
+        # were posted after the review submission (not stale from a prior run).
+        prior_updated_at = _get_status_updated_at(head_sha)
+
+        _submit_approved_review(pr_number)
+
+        found = _poll_fresh_statuses(head_sha, prior_updated_at)
+
+        missing = [ctx for ctx in REQUIRED_CONTEXTS if ctx not in found]
+        if missing:
+            pytest.fail(
+                f"After {LIVEFIRE_TIMEOUT_SEC}s, fresh contexts still missing: {missing}. "
+                f"Found: {found}. Prior timestamps: {prior_updated_at}. "
+                f"PR #{pr_number} head={head_sha}. "
+                f"This indicates the pull_request_review trigger did not fire at runtime."
+            )
+
+        # The contexts appeared fresh — that's the proof of auto-fire.
+        # We do NOT assert success vs failure; the evaluator decides that.
+        # The point of #2159 is that the workflows QUEUE and POST at all.
+        for ctx, state in found.items():
+            assert state in ("pending", "success", "failure"), (
+                f"Unexpected state {state!r} for {ctx}"
+            )
@@ -0,0 +1,145 @@
+"""Stale-head diagnostic test for #2159.
+
+Deterministically reports whether a PR's HEAD contains the pull_request_review
+trigger in qa-review.yml and security-review.yml. If the trigger is absent,
+auto-fire on APPROVED review is impossible for that PR.
+
+This is used as a self-diagnostic for future stale-PR situations (PRs opened
+before #2157 merged, or branches cut from old bases).
+
+Environment:
+  GITEA_HOST  — default: git.moleculesai.app
+  GITEA_TOKEN — token with read:repository scope (optional; falls back to local files)
+  REPO        — default: molecule-ai/molecule-core
+  PR_NUMBER   — required when running against a real PR
+"""
+
+import base64
+import json
+import os
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+import pytest
+
+import yaml
+
+GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
+GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
+REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
+PR_NUMBER = os.environ.get("PR_NUMBER", "")
+
+ROOT = Path(__file__).resolve().parents[2]
+
+
+def _api(method: str, path: str) -> tuple[int, dict]:
+    url = f"https://{GITEA_HOST}/api/v1{path}"
+    headers = {"Authorization": f"token {GITEA_TOKEN}"}
+    req = urllib.request.Request(url, headers=headers, method=method)
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return resp.status, json.loads(resp.read())
+    except urllib.error.HTTPError as exc:
+        body = exc.read()
+        return exc.code, json.loads(body) if body else {}
+
+
+def _fetch_workflow_from_ref(workflow_name: str, ref: str) -> dict:
+    path = f"/repos/{REPO}/contents/.gitea/workflows/{workflow_name}?ref={ref}"
+    code, payload = _api("GET", path)
+    if code != 200:
+        pytest.fail(
+            f"GET {path} returned HTTP {code}: {payload}. "
+            f"Cannot determine whether PR head contains the trigger."
+        )
+    raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
+    return yaml.safe_load(raw)
+
+
+def _fetch_workflow_local(workflow_name: str) -> dict:
+    p = ROOT / "workflows" / workflow_name
+    if not p.exists():
+        pytest.fail(f"Local workflow file not found: {p}")
+    return yaml.safe_load(p.read_text())
+
+
+def _has_pull_request_review_trigger(wf: dict) -> bool:
+    on = wf.get(True) or wf.get("on") or {}
+    if isinstance(on, list):
+        return "pull_request_review" in on
+    if isinstance(on, dict):
+        return "pull_request_review" in on
+    if isinstance(on, str):
+        return on == "pull_request_review"
+    return False
+
+
+def _diagnose_pr(pr_number: int) -> dict[str, bool]:
+    code, pr = _api("GET", f"/repos/{REPO}/pulls/{pr_number}")
+    if code != 200:
+        pytest.fail(f"GET /pulls/{pr_number} returned HTTP {code}: {pr}")
+
+    head_ref = pr["head"]["ref"]
+    head_sha = pr["head"]["sha"]
+
+    results: dict[str, bool] = {}
+    for wf_name in ("qa-review.yml", "security-review.yml"):
+        wf = _fetch_workflow_from_ref(wf_name, head_sha)
+        results[wf_name] = _has_pull_request_review_trigger(wf)
+
+    return {
+        "pr_number": pr_number,
+        "head_ref": head_ref,
+        "head_sha": head_sha,
+        "triggers": results,
+        "auto_fire_possible": all(results.values()),
+    }
+
+
+def _diagnose_local() -> dict[str, bool]:
+    results: dict[str, bool] = {}
+    for wf_name in ("qa-review.yml", "security-review.yml"):
+        wf = _fetch_workflow_local(wf_name)
+        results[wf_name] = _has_pull_request_review_trigger(wf)
+    return {
+        "pr_number": None,
+        "head_ref": "local-checkout",
+        "head_sha": None,
+        "triggers": results,
+        "auto_fire_possible": all(results.values()),
+    }
+
+
+class TestStaleHeadDiagnostic:
+    """Test deterministically reports 'auto-fire impossible for this PR' when
+    the PR head lacks the pull_request_review trigger.
+    """
+
+    def test_local_checkout_has_pull_request_review_trigger(self):
+        """Local files (the ones in this checkout) must contain the trigger.
+
+        This is the baseline: if the checkout itself is stale, every PR cut
+        from it will also be stale.
+        """
+        diag = _diagnose_local()
+        missing = [n for n, ok in diag["triggers"].items() if not ok]
+        if missing:
+            pytest.fail(
+                f"Local checkout is missing pull_request_review trigger in: {missing}. "
+                f"This branch cannot produce PRs that auto-fire."
+            )
+
+    @pytest.mark.skipif(not GITEA_TOKEN, reason="GITEA_TOKEN not set")
+    @pytest.mark.skipif(not PR_NUMBER, reason="PR_NUMBER not set")
+    def test_pr_head_has_pull_request_review_trigger(self):
+        """When PR_NUMBER is given, assert the PR head contains the trigger."""
+        diag = _diagnose_pr(int(PR_NUMBER))
+        if not diag["auto_fire_possible"]:
+            missing = [n for n, ok in diag["triggers"].items() if not ok]
+            pytest.fail(
+                f"Auto-fire impossible for PR #{diag['pr_number']}. "
+                f"Head ref={diag['head_ref']} sha={diag['head_sha']}. "
+                f"Missing trigger in: {missing}. "
+                f"This PR needs /qa-recheck + /security-recheck fallback, or a rebase onto current main."
+            )
@@ -123,8 +123,9 @@ jobs:
    # integration). See internal#512 for the class defect.
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    outputs:
      api: ${{ steps.decide.outputs.api }}
    steps:
@@ -160,8 +161,9 @@ jobs:
    # detect-changes for the full rationale.
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    timeout-minutes: 15
    env:
      # Unique per-run container names so concurrent runs on the host-
@@ -88,8 +88,9 @@ jobs:
    # surprises and keeps the routing rule discoverable in one place.
    runs-on: docker-host
    # mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    outputs:
      handlers: ${{ steps.filter.outputs.handlers }}
    steps:
@@ -119,8 +120,9 @@ jobs:
    # exists). See detect-changes for the full routing rationale.
    runs-on: docker-host
    # mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    env:
      # Unique name per run so concurrent jobs don't collide on the
      # bridge network. ${RUN_ID}-${RUN_ATTEMPT} is unique even across
@@ -49,37 +49,56 @@ jobs:
      GITHUB_SERVER_URL: https://git.moleculesai.app
    steps:
      - name: Identify runner
+        id: identify
+        continue-on-error: true
        run: |
          set -eu
          echo "arch=$(uname -m)"
          echo "kernel=$(uname -sr)"
          echo "shell=$BASH_VERSION"
          # Sanity: must actually be arm64. If amd64 sneaks in here,
-          # fail fast — that means the label routing is wrong.
+          # the job skips gracefully rather than hard-failing, because
+          # a mislabelled runner is an ops concern, not a code defect.
+          # Pilot lane must not make main red (#2146).
          case "$(uname -m)" in
-            aarch64|arm64) echo "arm64 confirmed" ;;
-            *) echo "ERROR: expected arm64, got $(uname -m)"; exit 1 ;;
+            aarch64|arm64)
+              echo "arm64 confirmed"
+              echo "arm64=true" >> "$GITHUB_OUTPUT"
+              ;;
+            *)
+              echo "ERROR: expected arm64, got $(uname -m) — label routing may be wrong"
+              echo "arm64=false" >> "$GITHUB_OUTPUT"
+              exit 1
+              ;;
          esac

      - name: Checkout
+        if: steps.identify.outputs.arm64 == 'true'
        uses: actions/checkout@v4
        with:
          fetch-depth: 1

      - name: Install shellcheck (arm64)
+        if: steps.identify.outputs.arm64 == 'true'
        continue-on-error: true
        run: |
          set -eu
          if command -v shellcheck >/dev/null 2>&1; then
            echo "shellcheck already present: $(shellcheck --version | head -1)"
          else
-            # Prefer apt if the runner base ships it; else download arm64 binary.
+            # Prefer apt if the runner base ships it; else download the
+            # correct platform binary (darwin vs linux).
            if command -v apt-get >/dev/null 2>&1; then
              sudo apt-get update -qq
              sudo apt-get install -y --no-install-recommends shellcheck
            else
              SC_VER=v0.10.0
-              curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/shellcheck-${SC_VER}.linux.aarch64.tar.xz" \
+              if [ "$(uname -s)" = "Darwin" ]; then
+                SC_PKG="shellcheck-${SC_VER}.darwin.aarch64.tar.xz"
+              else
+                SC_PKG="shellcheck-${SC_VER}.linux.aarch64.tar.xz"
+              fi
+              curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/${SC_PKG}" \
                | tar -xJf - --strip-components=1
              sudo mv shellcheck /usr/local/bin/
            fi
@@ -87,14 +106,15 @@ jobs:
          shellcheck --version | head -2

      - name: Run shellcheck on .gitea/scripts/*.sh
+        if: steps.identify.outputs.arm64 == 'true'
        continue-on-error: true
        run: |
          set -eu
          # Only the scripts we control under .gitea/scripts. Pilot
          # scope is intentionally narrow — broaden in a follow-up
          # once the lane is proven.
-          if ! command -v shellcheck >/dev/null 2>&1; then
-            echo "WARN: shellcheck binary not found — skipping (pilot mode)"
+          if ! command -v shellcheck >/dev/null 2>&1 || ! shellcheck --version >/dev/null 2>&1; then
+            echo "WARN: shellcheck not functional — skipping (pilot mode)"
            exit 0
          fi
          # NOTE: macOS ships Bash 3.2 (Apple license), no `mapfile`
@@ -10,19 +10,17 @@
 #     - `pull_request_target`: opened, synchronize, reopened
 #         → initial status posts when PR opens / re-pushes
 #     - `pull_request_review` types: [submitted]
-#         → re-evaluate when a team member submits an APPROVE review so
-#           the gate flips immediately (no wait for the next push or
-#           slash-command). Verified live: sop-tier-check.yml uses this
-#           same event and provably fires (produces
-#           `sop-tier-check / tier-check (pull_request_review)` contexts).
-#           The job-level `if:` guard checks
-#           `github.event.review.state == 'APPROVED' || 'approved'` so
-#           only APPROVE reviews run the evaluator; COMMENT and
+#         → defensive fallback. Empirical evidence (live status audit)
+#           `pull_request_review_approved` does not always fire on this
+#           Gitea instance, while `pull_request_review` does (verified
+#           by sop-tier-check live status context). The job guard checks
+#           both `github.event.review.type` and `github.event.review.state`
+#           so only APPROVE reviews run the evaluator; COMMENT and
 #           REQUEST_CHANGES are skipped at the job level.
-#           Branch-protection requires the `(pull_request_target)`
-#           context variant, so the review-event path EXPLICITLY POSTS
-#           the required context via the API. Trust boundary preserved
-#           (BASE ref, no PR-head).
+#     - `pull_request_review_approved`
+#         → retained as a forward-compatible trigger in case a future
+#           Gitea version fires it reliably. The same job guard covers
+#           both events.
 #     - comment refires are handled by `sop-checklist.yml` review-refire job
 #         → `/qa-recheck` slash-command re-evaluates this gate.
 #   Workflow name = `qa-review` ; job name = `approved`.
@@ -99,6 +97,7 @@ on:
    types: [opened, synchronize, reopened]
  pull_request_review:
    types: [submitted]
+  pull_request_review_approved:

 permissions:
  contents: read
@@ -115,8 +114,11 @@ jobs:
    # Comment-triggered refires live in sop-checklist.yml review-refire job.
    if: |
      github.event_name == 'pull_request_target' ||
+      github.event_name == 'pull_request_review_approved' ||
      (github.event_name == 'pull_request_review' &&
-       (github.event.review.state == 'APPROVED' || github.event.review.state == 'approved'))
+       (github.event.review.type == 'pull_request_review_approved' ||
+        github.event.review.state == 'APPROVED' ||
+        github.event.review.state == 'approved'))
    runs-on: ubuntu-latest
    steps:
      - name: Privilege check (A1.1 — INFORMATIONAL log only, NOT a gate)
@@ -187,7 +189,13 @@ jobs:
        # for the explicit status POST. Evaluator step stays on
        # SOP_TIER_CHECK_TOKEN (read-only) per deliberate security
        # separation: eval computes, POST writes, never the same cred.
-        if: github.event_name == 'pull_request_review' && always()
+        if: |
+          (github.event_name == 'pull_request_review_approved' ||
+           (github.event_name == 'pull_request_review' &&
+            (github.event.review.type == 'pull_request_review_approved' ||
+             github.event.review.state == 'APPROVED' ||
+             github.event.review.state == 'approved')))
+          && always()
        env:
          GITEA_TOKEN: ${{ secrets.STATUS_POST_TOKEN }}
          GITEA_HOST: git.moleculesai.app
@@ -9,15 +9,18 @@
 #
 # A1-α addendum (internal#760): review-event trigger added so the security
 # gate flips immediately when a team member submits an APPROVE review.
-# Uses `pull_request_review` types: [submitted] — verified live via
-# sop-tier-check.yml which provably fires this event (produces
-# `sop-tier-check / tier-check (pull_request_review)` contexts).
-# The job-level `if:` guard checks
-# `github.event.review.state == 'APPROVED' || 'approved'` so only APPROVE
-# reviews run the evaluator; COMMENT and REQUEST_CHANGES are skipped at
-# the job level. Branch-protection requires the `(pull_request_target)`
-# context variant, so the review-event path EXPLICITLY POSTS the required
-# context via the API. Trust boundary preserved (BASE ref, no PR-head).
+# We listen to BOTH `pull_request_review` (types: [submitted]) and
+# `pull_request_review_approved`. Empirical evidence (live status audit)
+# `pull_request_review_approved` does not always fire on this Gitea
+# instance, while `pull_request_review` does (verified by sop-tier-check
+# live status context). The job guard checks both
+# `github.event.review.type` and `github.event.review.state` so only
+# APPROVE reviews run the evaluator; COMMENT and REQUEST_CHANGES are
+# skipped at the job level. `pull_request_review_approved` is retained
+# as a forward-compatible trigger. Branch-protection requires the
+# `(pull_request_target)` context variant, so the review-event path
+# EXPLICITLY POSTS the required context via the API. Trust boundary
+# preserved (BASE ref, no PR-head).

 name: security-review

@@ -26,6 +29,7 @@ on:
    types: [opened, synchronize, reopened]
  pull_request_review:
    types: [submitted]
+  pull_request_review_approved:

 permissions:
  contents: read
@@ -42,8 +46,11 @@ jobs:
    # Comment-triggered refires live in sop-checklist.yml review-refire job.
    if: |
      github.event_name == 'pull_request_target' ||
+      github.event_name == 'pull_request_review_approved' ||
      (github.event_name == 'pull_request_review' &&
-       (github.event.review.state == 'APPROVED' || github.event.review.state == 'approved'))
+       (github.event.review.type == 'pull_request_review_approved' ||
+        github.event.review.state == 'APPROVED' ||
+        github.event.review.state == 'approved'))
    runs-on: ubuntu-latest
    steps:
      - name: Privilege check (A1.1 — INFORMATIONAL log only, NOT a gate)
@@ -100,7 +107,13 @@ jobs:
        # for the explicit status POST. Evaluator step stays on
        # SOP_TIER_CHECK_TOKEN (read-only) per deliberate security
        # separation: eval computes, POST writes, never the same cred.
-        if: github.event_name == 'pull_request_review' && always()
+        if: |
+          (github.event_name == 'pull_request_review_approved' ||
+           (github.event_name == 'pull_request_review' &&
+            (github.event.review.type == 'pull_request_review_approved' ||
+             github.event.review.state == 'APPROVED' ||
+             github.event.review.state == 'approved')))
+          && always()
        env:
          GITEA_TOKEN: ${{ secrets.STATUS_POST_TOKEN }}
          GITEA_HOST: git.moleculesai.app
@@ -0,0 +1,131 @@
+# Developer SOP — PR review gate auto-fire and stale-head handling
+
+> Last updated: 2026-06-03 (cp#2159 follow-up)
+>
+> Applies to: all core-PR authors and reviewers on `molecule-core` and sibling
+> repos using the `qa-review` + `security-review` branch-protection gates.
+
+---
+
+## 1. Gitea PR-head workflow-selection rule
+
+**Rule:** For `pull_request_target` and `pull_request_review` events, Gitea
+loads the workflow definition from the **PR's HEAD branch**, not from the
+base (`main`) branch.
+
+This is different from GitHub Actions, where `pull_request_target` always
+loads workflows from the base branch. Gitea's behaviour means:
+
+- A PR that was opened **before** the `pull_request_review` trigger was added
+to `qa-review.yml` / `security-review.yml` will **NOT** auto-fire on review,
+because its HEAD still contains the old workflow YAML (no trigger).
+
+- A PR that was opened **after** the trigger was added (or that has been
+rebased onto a commit containing the trigger) **WILL** auto-fire, because its
+HEAD contains the new workflow YAML.
+
+### Ops implication
+
+| PR head contains `pull_request_review` trigger? | Behaviour on APPROVED review |
+|---|---|
+| **Yes** (cut from current main, or rebased) | Workflows auto-queue, evaluate, and POST the `(pull_request_target)` context automatically. No slash-command needed. |
+| **No** (stale head, opened before #2157) | Nothing fires. Use `/qa-recheck` + `/security-recheck` slash-commands in a PR comment, OR rebase onto current main. |
+
+---
+
+## 2. Standard core-PR flow (post-#2157)
+
+```
+1. Author opens PR from a branch based on current main
+   → qa-review + security-review workflows run on pull_request_target
+   → status contexts post (initial eval, usually red until reviews land)
+
+2. Reviewers submit real APPROVED reviews
+   → If PR head has the trigger: workflows AUTO-FIRE on pull_request_review
+   → Contexts flip green (or stay red if reviewer is not in team)
+
+3. [Optional] If contexts did not flip (stale head, event lost, etc.):
+   → Anyone can comment `/qa-recheck` or `/security-recheck`
+   → sop-checklist.yml refires the evaluator (read-only, idempotent)
+
+4. Both qa-review + security-review contexts are green
+   → Plain Do:merge (no force-merge needed)
+```
+
+### Key point
+
+The `/qa-recheck` and `/security-recheck` commands are a **backstop**, not the
+primary path. PRs cut from current main should auto-fire without manual
+intervention.
+
+---
+
+## 3. Diagnosing a stale head
+
+If a PR has real team-member APPROVED reviews but the qa/security contexts
+remain red and no workflow run appears on the PR's "Actions" tab for the
+review event, the PR head is likely stale.
+
+### Quick check
+
+```bash
+# From the PR page, look at the head commit SHA, then:
+curl -sS "https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-core/contents/.gitea/workflows/qa-review.yml?ref=<HEAD_SHA>" \
+  | jq -r '.content' | base64 -d | grep -c 'pull_request_review'
+# 0  → stale head (no trigger in that version of the workflow)
+# >0 → trigger present; auto-fire SHOULD work (if it didn't, file a tracker)
+```
+
+### Automated diagnostic
+
+The test suite includes `test_gate_stale_head_diagnostic.py`, which reports
+"auto-fire impossible for this PR" when the head lacks the trigger. Run it
+in CI or locally with:
+
+```bash
+PR_NUMBER=123 python -m pytest .gitea/scripts/tests/test_gate_stale_head_diagnostic.py -v
+```
+
+---
+
+## 4. Rebasing vs. slash-refire
+
+| Approach | When to use | Trade-off |
+|---|---|---|
+| **Rebase onto current main** | PR is genuinely stale (head lacks trigger OR head is far behind main) | Clean history, gets all recent fixes, but requires force-push and re-approval if the branch was protected |
+| **`/qa-recheck` + `/security-recheck`** | PR head is recent but the review event was missed, or you want to avoid rebase churn | Quick, no force-push, but does NOT fix a missing trigger in the head |
+
+**Do not** use slash-refire as a substitute for rebasing a stale head. If the
+workflow YAML in the PR head does not contain `pull_request_review`, no amount
+of rechecking will make auto-fire work.
+
+---
+
+## 5. Live-fire verification
+
+The `test_gate_auto_fire_live.py` regression test exercises the full runtime
+path: it submits an APPROVED review to a test PR and polls for the
+`(pull_request_target)` status contexts. It is skipped when no API token is
+available, and is intended to catch runtime non-fire that static structural
+tests (e.g. `test_gate_review_auto_fire.py`) cannot detect.
+
+Run manually with:
+
+```bash
+export GITEA_HOST=git.moleculesai.app
+export GITEA_TOKEN=<your-token>
+export REPO=molecule-ai/molecule-core
+export LIVEFIRE_PR_NUMBER=<test-pr-number>
+python -m pytest .gitea/scripts/tests/test_gate_auto_fire_live.py -v
+```
+
+---
+
+## References
+
+- #2159 — gate auto-trigger not firing (root cause: stale PR heads lacking
+the `pull_request_review` trigger, NOT a workflow code defect)
+- #765 — static structural regression test for gate configuration
+- #2157 — merged trigger addition (`pull_request_review` types: [submitted])
+- #2020 — milestone confirming gate infrastructure is stable
+- RFC#324 — qa-review + security-review design
@@ -26,11 +26,12 @@ import (
 //     the update cycle — no ssh, no re-provision, no ops toil.
 //
 // Contract (paired with cp-side GET /cp/tenants/config):
-//   Request:  GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
-//             Authorization: Bearer <ADMIN_TOKEN>
-//             X-Molecule-Org-Id: <MOLECULE_ORG_ID>
-//   Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
-//             401 on bearer mismatch or unknown org
+//
+//	Request:  GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
+//	          Authorization: Bearer <ADMIN_TOKEN>
+//	          X-Molecule-Org-Id: <MOLECULE_ORG_ID>
+//	Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
+//	          401 on bearer mismatch or unknown org
 //
 // Best-effort: any failure logs and returns — main() keeps booting.
 // Self-hosted deploys without MOLECULE_ORG_ID or ADMIN_TOKEN set
@@ -105,3 +106,53 @@ func refreshEnvFromCP() error {
 	log.Printf("CP env refresh: applied %d values from %s/cp/tenants/config", applied, base)
 	return nil
 }
+
+// requiredLLMEnvVars is the set of LLM proxy env vars a managed SaaS
+// tenant must have populated after refreshEnvFromCP. cp#469 (tenant
+// proxy-env delivery) — guaranteed CP-delivered creds reach the
+// tenant process env on boot. Per Researcher Task #37 / Spec 2 and
+// Task #46 (watch-fail-first test).
+//
+// Key set byte-matched against Researcher's verified emission in
+// controlplane tenant_config.go:140-144 (Researcher REQUEST_CHANGES
+// iterate body, 3987f59c). The four keys below ARE the LLM-proxy
+// subset of the 8 CP-emitted keys; OPENAI_BASE_URL / OPENAI_API_KEY /
+// ANTHROPIC_BASE_URL / ANTHROPIC_API_KEY are out of scope for cp#469
+// (different feature surfaces — direct-to-provider fallbacks, not
+// the proxy). v2 fix: MOLECULE_LLM_USAGE_TOKEN, MOLECULE_LLM_USAGE_URL,
+// MOLECULE_LLM_BASE_URL, MOLECULE_LLM_ANTHROPIC_BASE_URL — note the
+// 4th key is namespaced MOLECULE_LLM_ANTHROPIC_BASE_URL, NOT bare
+// ANTHROPIC_BASE_URL. Bare ANTHROPIC_BASE_URL is a separate CP-emitted
+// key for direct-provider use, not the LLM proxy.
+var requiredLLMEnvVars = []string{
+	"MOLECULE_LLM_USAGE_TOKEN",
+	"MOLECULE_LLM_USAGE_URL", // CRITICAL fix v2: was MOLECULE_LLM_URL in v1
+	"MOLECULE_LLM_BASE_URL",
+	"MOLECULE_LLM_ANTHROPIC_BASE_URL", // CRITICAL fix v3: was ANTHROPIC_BASE_URL in v2 (different key!)
+}
+
+// assertManagedTenantHasLLMEnv verifies that, when running as a
+// managed SaaS tenant (MOLECULE_ORG_ID + ADMIN_TOKEN both set), all
+// required LLM proxy env vars are populated after refreshEnvFromCP.
+//
+// Self-hosted (no orgID/adminToken) is exempt — dev must not be
+// blocked here. Managed tenants with missing LLM keys fail with
+// MISSING_CP_LLM_ENV so they do not silently boot with broken proxy
+// creds. Caller in main.go decides whether to log and continue or
+// log.Fatalf depending on deployment context.
+func assertManagedTenantHasLLMEnv() error {
+	if os.Getenv("MOLECULE_ORG_ID") == "" || os.Getenv("ADMIN_TOKEN") == "" {
+		// Self-hosted dev / not yet provisioned — not a managed tenant.
+		return nil
+	}
+	var missing []string
+	for _, k := range requiredLLMEnvVars {
+		if os.Getenv(k) == "" {
+			missing = append(missing, k)
+		}
+	}
+	if len(missing) > 0 {
+		return fmt.Errorf("MISSING_CP_LLM_ENV: required LLM proxy keys not set after refreshEnvFromCP: %v", missing)
+	}
+	return nil
+}
@@ -5,6 +5,7 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"os"
+	"strings"
 	"testing"
 )

@@ -59,6 +60,138 @@ func TestRefreshEnvFromCP_AppliesCPResponse(t *testing.T) {
 	}
 }

+// TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: watch-fail-first
+// per Researcher Task #46. When running as a managed tenant
+// (MOLECULE_ORG_ID + ADMIN_TOKEN set), missing LLM proxy env vars
+// after refreshEnvFromCP MUST surface as MISSING_CP_LLM_ENV, not be
+// silently accepted. Without this guard, a CP that loses its LLM
+// creds (e.g. during an incident) would let a tenant boot and then
+// fail later at first LLM call — worse than a loud refusal here.
+func TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// Stub CP returns a CP response WITHOUT any of the required
+		// LLM keys — simulates the failure mode where the CP side
+		// dropped or never had the LLM creds for this org.
+		w.Header().Set("Content-Type", "application/json")
+		fmt.Fprint(w, `{"MOLECULE_CP_SHARED_SECRET":"x","MOLECULE_CP_URL":"https://api.moleculesai.app"}`)
+	}))
+	defer srv.Close()
+
+	t.Setenv("MOLECULE_ORG_ID", "org-managed-1")
+	t.Setenv("ADMIN_TOKEN", "admin-tok")
+	t.Setenv("MOLECULE_CP_URL", srv.URL)
+	// Clear all LLM keys to simulate the boot-without-LLM-env failure mode.
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+
+	// refreshEnvFromCP itself should succeed — CP is reachable, returned 200.
+	if err := refreshEnvFromCP(); err != nil {
+		t.Fatalf("refreshEnvFromCP: %v", err)
+	}
+	// The boot assertion must catch the missing LLM keys.
+	err := assertManagedTenantHasLLMEnv()
+	if err == nil {
+		t.Fatal("expected MISSING_CP_LLM_ENV error for managed tenant without LLM keys, got nil")
+	}
+	if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
+		t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
+	}
+}
+
+// TestRefreshEnvFromCP_ManagedTenantHappyPath: when the CP returns
+// all 4 LLM-proxy keys, the gate must PASS — no MISSING_CP_LLM_ENV
+// for a properly-configured managed tenant. Watch-fail counterpart
+// to TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: if THIS test
+// ever fires MISSING_CP_LLM_ENV on the byte-correct key set, the
+// requiredLLMEnvVars list has drifted from the CP emission again.
+// Per Researcher REQUEST_CHANGES TEST ADEQUACY note.
+func TestRefreshEnvFromCP_ManagedTenantHappyPath(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		// Return ALL 4 LLM-proxy keys — names byte-matched to
+		// tenant_config.go:140-144 CP emission.
+		fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com","MOLECULE_LLM_ANTHROPIC_BASE_URL":"https://llm.example.com/anthropic"}`)
+	}))
+	defer srv.Close()
+
+	t.Setenv("MOLECULE_ORG_ID", "org-managed-happy")
+	t.Setenv("ADMIN_TOKEN", "admin-tok")
+	t.Setenv("MOLECULE_CP_URL", srv.URL)
+	// Pre-clear so we can verify the refresh actually populated them.
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+
+	if err := refreshEnvFromCP(); err != nil {
+		t.Fatalf("refreshEnvFromCP: %v", err)
+	}
+	// Sanity: refresh actually applied the keys.
+	if got := os.Getenv("MOLECULE_LLM_USAGE_TOKEN"); got != "tok-1" {
+		t.Errorf("refresh did not apply USAGE_TOKEN: got %q", got)
+	}
+	// The boot assertion must pass — no MISSING_CP_LLM_ENV.
+	if err := assertManagedTenantHasLLMEnv(); err != nil {
+		t.Errorf("managed happy path must not MISSING_CP_LLM_ENV, got: %v", err)
+	}
+}
+
+// TestRefreshEnvFromCP_ManagedTenantPartialEnv: when the CP returns
+// 3 of 4 LLM-proxy keys (one missing), the gate must STILL catch it
+// and the error must name the missing key. Per Researcher
+// REQUEST_CHANGES TEST ADEQUACY note — partial-env coverage is
+// critical because the production failure mode is usually "one
+// key dropped" not "all keys dropped".
+func TestRefreshEnvFromCP_ManagedTenantPartialEnv(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		// 3 of 4 — MOLECULE_LLM_ANTHROPIC_BASE_URL is missing.
+		fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com"}`)
+	}))
+	defer srv.Close()
+
+	t.Setenv("MOLECULE_ORG_ID", "org-managed-partial")
+	t.Setenv("ADMIN_TOKEN", "admin-tok")
+	t.Setenv("MOLECULE_CP_URL", srv.URL)
+	// Pre-clear all 4 so the 3 that come back from CP are the only
+	// ones set; the 4th (MOLECULE_LLM_ANTHROPIC_BASE_URL) stays empty.
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+
+	if err := refreshEnvFromCP(); err != nil {
+		t.Fatalf("refreshEnvFromCP: %v", err)
+	}
+	err := assertManagedTenantHasLLMEnv()
+	if err == nil {
+		t.Fatal("expected MISSING_CP_LLM_ENV for partial env (3 of 4 keys), got nil")
+	}
+	if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
+		t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
+	}
+	if !strings.Contains(err.Error(), "MOLECULE_LLM_ANTHROPIC_BASE_URL") {
+		t.Errorf("expected error to name the missing key MOLECULE_LLM_ANTHROPIC_BASE_URL, got: %v", err)
+	}
+}
+
+// TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop: self-hosted
+// (no orgID/adminToken) must NOT block on missing LLM keys — dev
+// ergonomics matter and the assertion's contract is "managed only".
+func TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop(t *testing.T) {
+	t.Setenv("MOLECULE_ORG_ID", "")
+	t.Setenv("ADMIN_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+	if err := assertManagedTenantHasLLMEnv(); err != nil {
+		t.Errorf("self-hosted (not managed) must not block, got: %v", err)
+	}
+}
+
 // TestRefreshEnvFromCP_CPUnreachableDoesNotFailBoot: network errors must
 // return non-nil BUT main.go treats that as warn-and-continue. We assert
 // the function returns an error (not a panic) so the caller can log.
@@ -82,6 +82,16 @@ func main() {
 		log.Printf("CP env refresh: %v (continuing with baked-in env)", err)
 	}

+	// Managed-tenant boot assertion (cp#469 — tenant proxy-env delivery).
+	// If we're a managed SaaS tenant (orgID + adminToken set), all required
+	// LLM proxy env vars must be present after refresh. Missing keys block
+	// the tenant from booting with broken LLM creds — silent-fail is worse
+	// than a loud refusal. Self-hosted (no orgID/adminToken) short-circuits
+	// inside the assertion, so this never fires for dev.
+	if err := assertManagedTenantHasLLMEnv(); err != nil {
+		log.Fatalf("Managed tenant boot assertion: %v", err)
+	}
+
 	// Secrets encryption. In MOLECULE_ENV=prod, boot refuses to start
 	// without a valid SECRETS_ENCRYPTION_KEY (fail-secure — Top-5 #5).
 	// In any other environment, missing keys just log a warning and
@@ -359,7 +369,6 @@ func main() {
 	// (WorkspaceHandler.BootstrapFailed) wires its own capture inline.
 	registry.BootFailureRescueHook = handlers.BootFailureRescueHook

-
 	// Provision-timeout sweep — flips workspaces that have been stuck in
 	// status='provisioning' past the timeout window to 'failed' and emits
 	// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
@@ -149,9 +149,11 @@ func markFailed(ctx context.Context, wsID string, broadcaster *events.Broadcaste
 		models.StatusFailed, msg, wsID); dbErr != nil {
 		log.Printf("bundle import: failed to mark workspace %s as failed: %v", wsID, dbErr)
 	}
-	broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
+	if bcErr := broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
 		"error": msg,
-	})
+	}); bcErr != nil {
+		log.Printf("bundle import: failed to broadcast provision failed for %s: %v", wsID, bcErr)
+	}
 }

 func nilIfEmpty(s string) interface{} {
@@ -407,12 +407,14 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound

 	// Broadcast event
 	if m.broadcaster != nil {
-		m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
+		if err := m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
 			"channel_id":   ch.ID,
 			"channel_type": ch.ChannelType,
 			"username":     msg.Username,
 			"direction":    "inbound",
-		})
+		}); err != nil {
+			log.Printf("Channels: failed to broadcast inbound event: %v", err)
+		}
 	}

 	return nil
@@ -453,11 +455,13 @@ func (m *Manager) SendOutbound(ctx context.Context, channelID string, text strin
 	}

 	if m.broadcaster != nil {
-		m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
+		if err := m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
 			"channel_id":   ch.ID,
 			"channel_type": ch.ChannelType,
 			"direction":    "outbound",
-		})
+		}); err != nil {
+			log.Printf("Channels: failed to broadcast outbound event: %v", err)
+		}
 	}

 	return nil
@@ -517,7 +517,9 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in

 				// Acknowledge the button press (removes loading spinner)
 				ackCfg := tgbotapi.NewCallback(cb.ID, "Received")
-				bot.Send(ackCfg)
+				if _, err := bot.Send(ackCfg); err != nil {
+					log.Printf("telegram: failed to send callback ack: %v", err)
+				}

 				// Update the message to show what was clicked
 				decision := "approved"
@@ -529,7 +531,9 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in
 					cb.Message.MessageID,
 					cb.Message.Text+"\n\n✅ CEO "+decision,
 				)
-				bot.Send(editMsg)
+				if _, err := bot.Send(editMsg); err != nil {
+					log.Printf("telegram: failed to send edit message: %v", err)
+				}

 				// Route the decision as an inbound message to the agent
 				inbound := &InboundMessage{
@@ -60,10 +60,10 @@ func sanitizeErrorDetailForBroadcast(s string) string {
 }

 type ActivityHandler struct {
-	broadcaster *events.Broadcaster
+	broadcaster events.EventEmitter
 }

-func NewActivityHandler(b *events.Broadcaster) *ActivityHandler {
+func NewActivityHandler(b events.EventEmitter) *ActivityHandler {
 	return &ActivityHandler{broadcaster: b}
 }

@@ -54,23 +54,29 @@ func (h *ApprovalsHandler) Create(c *gin.Context) {
 		return
 	}

-	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
+	if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
 		"approval_id": approvalID,
 		"action":      body.Action,
 		"reason":      body.Reason,
 		"task_id":     body.TaskID,
-	})
+	}); err != nil {
+		log.Printf("approvals: failed to broadcast approval requested: %v", err)
+	}

 	// Auto-escalate to parent
 	var parentID *string
-	db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID)
+	if err := db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID); err != nil {
+		log.Printf("approvals: failed to lookup parent for escalation: %v", err)
+	}
 	if parentID != nil {
-		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
+		if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
 			"approval_id":       approvalID,
 			"from_workspace_id": workspaceID,
 			"action":            body.Action,
 			"reason":            body.Reason,
-		})
+		}); err != nil {
+			log.Printf("approvals: failed to broadcast approval escalated: %v", err)
+		}
 	}

 	c.JSON(http.StatusCreated, gin.H{"approval_id": approvalID, "status": "pending"})
@@ -221,11 +227,13 @@ func (h *ApprovalsHandler) Decide(c *gin.Context) {
 		eventType = "APPROVAL_DENIED"
 	}

-	h.broadcaster.RecordAndBroadcast(ctx, eventType, workspaceID, map[string]interface{}{
+	if err := h.broadcaster.RecordAndBroadcast(ctx, eventType, workspaceID, map[string]interface{}{
 		"approval_id": approvalID,
 		"decision":    body.Decision,
 		"decided_by":  decidedBy,
-	})
+	}); err != nil {
+		log.Printf("approvals: failed to broadcast approval decision: %v", err)
+	}

 	c.JSON(http.StatusOK, gin.H{"status": body.Decision, "approval_id": approvalID})
 }
@@ -102,10 +102,10 @@ func pushDelegationResultToInbox(ctx context.Context, sourceID, delegationID, st
 // and the A2A request runs in the background.
 type DelegationHandler struct {
 	workspace   *WorkspaceHandler
-	broadcaster *events.Broadcaster
+	broadcaster events.EventEmitter
 }

-func NewDelegationHandler(wh *WorkspaceHandler, b *events.Broadcaster) *DelegationHandler {
+func NewDelegationHandler(wh *WorkspaceHandler, b events.EventEmitter) *DelegationHandler {
 	return &DelegationHandler{workspace: wh, broadcaster: b}
 }

@@ -176,6 +176,10 @@ func TestResolveAgentURLForRestartSignal_CacheMiss(t *testing.T) {
 // TestGracefulPreRestart_Success verifies that when the workspace returns 200,
 // the signal is logged as acknowledged without error.
 func TestGracefulPreRestart_Success(t *testing.T) {
+	hWrapper := &resolveURLTestWrapper{
+		WorkspaceHandler: newHandlerWithTestDeps(t),
+		testURL:          "http://fake-agent.example/agent",
+	}
 	_ = setupTestDB(t)

 	// httptest server simulating the workspace container's /signals/restart_pending
@@ -205,18 +209,15 @@ func TestGracefulPreRestart_Success(t *testing.T) {
 		})
 	}))
 	defer srv.Close()
+	hWrapper.testURL = srv.URL + "/agent"

 	// Pre-populate Redis cache with the test server URL
 	_ = setupTestRedisWithURL(t, srv.URL)

-	// Use a wrapper so gracefulPreRestart runs through the embedded handler.
-	hWrapper := &resolveURLTestWrapper{
-		WorkspaceHandler: newHandlerWithTestDeps(t),
-		testURL:          srv.URL + "/agent",
-	}
+	// gracefulPreRestart runs in a goroutine; wait for it before db.DB is restored.
+	// Must be registered AFTER setupTestDB (LIFO: async wait → db.DB restore).
+	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

-	// gracefulPreRestart runs in a goroutine with its own timeout.
-	// We give it time to complete before the test ends.
 	hWrapper.gracefulPreRestart(context.Background(), "ws-ack-789")
 	time.Sleep(200 * time.Millisecond)
 }
@@ -224,19 +225,22 @@ func TestGracefulPreRestart_Success(t *testing.T) {
 // TestGracefulPreRestart_NotImplemented verifies that when the workspace returns
 // 404 (old SDK version), the platform proceeds gracefully (log + no error).
 func TestGracefulPreRestart_NotImplemented(t *testing.T) {
+	hWrapper := &resolveURLTestWrapper{
+		WorkspaceHandler: newHandlerWithTestDeps(t),
+		testURL:          "http://fake-agent.example/agent",
+	}
 	_ = setupTestDB(t)

 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusNotFound)
 	}))
 	defer srv.Close()
+	hWrapper.testURL = srv.URL + "/agent"

 	_ = setupTestRedisWithURL(t, srv.URL)

-	hWrapper := &resolveURLTestWrapper{
-		WorkspaceHandler: newHandlerWithTestDeps(t),
-		testURL:          srv.URL + "/agent",
-	}
+	// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
+	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

 	hWrapper.gracefulPreRestart(context.Background(), "ws-noimpl-999")
 	time.Sleep(200 * time.Millisecond)
@@ -246,15 +250,18 @@ func TestGracefulPreRestart_NotImplemented(t *testing.T) {
 // TestGracefulPreRestart_ConnectionRefused verifies that when the workspace
 // is unreachable, the platform proceeds gracefully without error.
 func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
-	_ = setupTestDB(t)
-
-	mr := setupTestRedisWithURL(t, "http://localhost:19999/agent") // nothing listening on 19999
-	_ = mr
-
 	hWrapper := &resolveURLTestWrapper{
 		WorkspaceHandler: newHandlerWithTestDeps(t),
 		testURL:          "http://localhost:19999/agent",
 	}
+	_ = setupTestDB(t)
+
+	// Nothing listening on 19999 — deliberate connection failure.
+	mr := setupTestRedisWithURL(t, "http://localhost:19999/agent")
+	_ = mr
+
+	// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
+	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

 	hWrapper.gracefulPreRestart(context.Background(), "ws-unreachable-000")
 	time.Sleep(200 * time.Millisecond)
@@ -264,13 +271,17 @@ func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
 // TestGracefulPreRestart_URLResolutionError verifies that when URL resolution
 // fails, the platform proceeds gracefully without blocking the restart.
 func TestGracefulPreRestart_URLResolutionError(t *testing.T) {
-	_ = setupTestDB(t)
-	_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
-
 	hWrapper := &resolveURLTestWrapper{
 		WorkspaceHandler: newHandlerWithTestDeps(t),
 		errToReturn:      context.DeadlineExceeded,
 	}
+	_ = setupTestDB(t)
+	_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
+
+	// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
+	// This ensures goroutines (which access both DB and Redis) are drained before
+	// any cleanup fires. setupTestRedis comes after newHandlerWithTestDeps
+	// so the handler holds the correct Redis client reference.
 	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

 	hWrapper.gracefulPreRestart(context.Background(), "ws-url-err-111")
@@ -56,7 +56,20 @@ func PatchAbilities(c *gin.Context) {
 		return
 	}

-	if body.BroadcastEnabled != nil {
+	// Atomic update: when both fields are supplied, apply them in one SQL
+	// statement so the request is all-or-nothing (#2131). A partial mutation
+	// (e.g. broadcast_enabled updated but talk_to_user_enabled failing) would
+	// leave the workspace in an ambiguous capability state.
+	if body.BroadcastEnabled != nil && body.TalkToUserEnabled != nil {
+		if _, err := db.DB.ExecContext(ctx,
+			`UPDATE workspaces SET broadcast_enabled = $2, talk_to_user_enabled = $3, updated_at = now() WHERE id = $1`,
+			id, *body.BroadcastEnabled, *body.TalkToUserEnabled,
+		); err != nil {
+			log.Printf("PatchAbilities both-fields for %s: %v", id, err)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
+			return
+		}
+	} else if body.BroadcastEnabled != nil {
 		if _, err := db.DB.ExecContext(ctx,
 			`UPDATE workspaces SET broadcast_enabled = $2, updated_at = now() WHERE id = $1`,
 			id, *body.BroadcastEnabled,
@@ -65,9 +78,7 @@ func PatchAbilities(c *gin.Context) {
 			c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
 			return
 		}
-	}
-
-	if body.TalkToUserEnabled != nil {
+	} else if body.TalkToUserEnabled != nil {
 		if _, err := db.DB.ExecContext(ctx,
 			`UPDATE workspaces SET talk_to_user_enabled = $2, updated_at = now() WHERE id = $1`,
 			id, *body.TalkToUserEnabled,
@@ -130,11 +130,8 @@ func TestPatchAbilities_BothFields(t *testing.T) {
 	mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1 AND status != 'removed'\)`).
 		WithArgs(wsUUID1).
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
-		WithArgs(wsUUID1, true).
-		WillReturnResult(sqlmock.NewResult(0, 1))
-	mock.ExpectExec(`UPDATE workspaces SET talk_to_user_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
-		WithArgs(wsUUID1, true).
+	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, talk_to_user_enabled = \$3, updated_at = now\(\) WHERE id = \$1`).
+		WithArgs(wsUUID1, true, true).
 		WillReturnResult(sqlmock.NewResult(0, 1))

 	w := patchAbilitiesReq(t, wsUUID1, `{"broadcast_enabled":true,"talk_to_user_enabled":true}`)
@@ -182,19 +179,25 @@ func TestPatchAbilities_TalkToUserUpdateError(t *testing.T) {
 	}
 }

-func TestPatchAbilities_BothFields_BroadcastFails(t *testing.T) {
+// TestPatchAbilities_BothFields_UpdateError — regression for #2131. When
+// both fields are supplied the handler uses a single combined UPDATE. A
+// failure of that UPDATE must leave the workspace unchanged (atomic).
+func TestPatchAbilities_BothFields_UpdateError(t *testing.T) {
 	mock, cleanup := withMockDB(t)
 	defer cleanup()

 	mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1 AND status != 'removed'\)`).
 		WithArgs(wsUUID1).
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
-		WithArgs(wsUUID1, true).
+	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, talk_to_user_enabled = \$3, updated_at = now\(\) WHERE id = \$1`).
+		WithArgs(wsUUID1, true, true).
 		WillReturnError(errors.New("disk full"))

 	w := patchAbilitiesReq(t, wsUUID1, `{"broadcast_enabled":true,"talk_to_user_enabled":true}`)
 	if w.Code != http.StatusInternalServerError {
 		t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
 	}
+	// Because only one UPDATE is issued, there is no partial-mutation
+	// path to assert against; sqlmock implicitly verifies no second
+	// exec occurred.
 }
@@ -95,6 +95,14 @@ func TestIntegration_BroadcastOrgRoot_NonRootSenderResolvesToRoot(t *testing.T)
 		}
 	})

+	// Pre-test hygiene: if a prior run crashed or was killed, its rows may
+	// still be in the shared integration DB. Remove them before inserting so
+	// the unique index workspaces_parent_name_uniq does not conflict.
+	if _, err := conn.ExecContext(ctx,
+		`DELETE FROM workspaces WHERE name LIKE $1`, prefix+"%"); err != nil {
+		t.Logf("pre-test cleanup (non-fatal): %v", err)
+	}
+
 	rootID := uuid.New().String()
 	midID := uuid.New().String()
 	leafID := uuid.New().String()
@@ -876,8 +876,9 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) {
 	h.provisionWorkspaceAutoSync(workspaceID, "", nil, payload)
 	// sendRestartContext is a one-way notification to the new container; safe
 	// to fire async — the next restart cycle won't depend on it completing.
-	// Tracked via goAsync so the test harness can drain it before the
-	// global db.DB swap (sendRestartContext reads db.DB).
+	// Tracked via h.goAsync so tests can wait for it via h.asyncWG before
+	// closing the sqlmock. Without this, untracked goroutines hit the restored
+	// mock and cause "was not expected" errors in parallel CI execution (mc#1264).
 	h.goAsync(func() { h.sendRestartContext(workspaceID, restartData) })
 }