fix(handlers): compile error in approvals.go + broken test mock in p1102

- approvals.go: err was already declared at line 37 (ctxJSON, err := json.Marshal). Reusing with = instead of := to fix "no new variables on left side of :=". - approvals_test.go: TestApprovals_Create_NilContextFallsBackToEmptyJSON mock expected 6 args for an INSERT with 5 columns. Remove spurious sqlmock.AnyArg() that caused "expected 6, got 5 arguments" at runtime. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
fix/approvals: log and guard json.Marshal error before DB insert
2026-05-15 05:51:20 +00:00 · 2026-05-15 00:17:04 +00:00 · 2026-05-14 23:15:19 +00:00 · 2026-05-14 22:37:56 +00:00 · 2026-05-14 21:27:52 +00:00 · 2026-05-14 21:01:52 +00:00
46 changed files with 1483 additions and 976 deletions
@@ -203,17 +203,12 @@ def ci_jobs_all(ci_doc: dict) -> set[str]:

 def ci_job_names(ci_doc: dict) -> set[str]:
    """Set of job keys in ci.yml MINUS the sentinel itself MINUS jobs
-    whose `if:` gates on `github.event_name` or `github.ref` (those are
-    event-scoped and can legitimately be `skipped` for a given trigger;
-    if we required them under the sentinel `needs:`, every PR-only job
+    whose `if:` gates on `github.event_name` (those are event-scoped
+    and can legitimately be `skipped` for a given trigger; if we
+    required them under the sentinel `needs:`, every PR-only job
    would be `skipped` on push and the sentinel would interpret
    `skipped != success` as failure). RFC §4 spec.

-    `github.ref` is the companion gate for jobs that run only on direct
-    pushes to specific branches (e.g. `github.ref == 'refs/heads/main'`).
-    These never execute in a PR context, so flagging them as missing
-    from `all-required.needs:` is a false positive (mc#958 / mc#959).
-
    Used for F1 (jobs missing from sentinel needs). NOT used for F1b
    (typos in needs) — see `ci_jobs_all` for that."""
    jobs = ci_doc.get("jobs")
@@ -226,9 +221,7 @@ def ci_job_names(ci_doc: dict) -> set[str]:
            continue
        if isinstance(v, dict):
            gate = v.get("if")
-            if isinstance(gate, str) and (
-                "github.event_name" in gate or "github.ref" in gate
-            ):
+            if isinstance(gate, str) and "github.event_name" in gate:
                continue
        names.add(k)
    return names
@@ -417,21 +417,7 @@ def main() -> int:
    parser.add_argument("--dry-run", action="store_true")
    args = parser.parse_args()
    _require_runtime_env()
-    try:
-        return process_once(dry_run=args.dry_run)
-    except ApiError as exc:
-        # API errors (401/403/404/500) are transient for a queue tick —
-        # log and exit 0 so the workflow is not marked failed and the next
-        # tick can retry. Returning non-zero would permanently fail the
-        # workflow run, blocking future ticks.
-        sys.stderr.write(f"::error::queue API error: {exc}\n")
-        return 0
-    except urllib.error.URLError as exc:
-        sys.stderr.write(f"::error::queue network error: {exc}\n")
-        return 0
-    except TimeoutError as exc:
-        sys.stderr.write(f"::error::queue timeout: {exc}\n")
-        return 0
+    return process_once(dry_run=args.dry_run)


 if __name__ == "__main__":
@@ -109,58 +109,57 @@ def normalize_slug(raw: str, numeric_aliases: dict[int, str] | None = None) -> s
 # Optional trailing note after the slug for /sop-ack and required reason
 # for /sop-revoke (RFC#351 open question 4 — reason is captured but not
 # yet validated; future iteration may require a min-length).
-#
-# /sop-n/a <gate> [reason] — declares a gate as not-applicable.
-#   <gate> is a canonical gate name (qa-review, security-review).
-#   The declaring user must be in one of the gate's required_teams.
-#   Most-recent per-user declaration wins (revoke semantics mirror ack).
 _DIRECTIVE_RE = re.compile(
    r"^[ \t]*/(sop-ack|sop-revoke)[ \t]+([A-Za-z0-9_\- ]+?)(?:[ \t]+(.*))?[ \t]*$",
    re.MULTILINE,
 )
-_NA_DIRECTIVE_RE = re.compile(
-    r"^[ \t]*/sop-n/?a[ \t]+([A-Za-z0-9_\-]+)(?:[ \t]+(.*))?[ \t]*$",
-    re.MULTILINE,
-)


 def parse_directives(
    comment_body: str,
    numeric_aliases: dict[int, str],
-) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]]]:
-    """Extract /sop-ack, /sop-revoke, and /sop-n/a directives from a comment body.
+) -> list[tuple[str, str, str]]:
+    """Extract /sop-ack and /sop-revoke directives from a comment body.

-    Returns a tuple of two lists:
-      0. list of (kind, canonical_slug, note) for sop-ack/sop-revoke
-      1. list of (kind, gate_name, reason) for sop-n/a
-
-    canonical_slug is the normalized form (or "" if unparseable).
-    note/reason is the trailing free-text (may be "").
+    Returns a list of (kind, canonical_slug, note) tuples where:
+      kind is "sop-ack" or "sop-revoke"
+      canonical_slug is the normalized form (or "" if unparseable)
+      note is the trailing free-text (may be "")
    """
    out: list[tuple[str, str, str]] = []
-    na_out: list[tuple[str, str, str]] = []
    if not comment_body:
-        return out, na_out
+        return out
    for m in _DIRECTIVE_RE.finditer(comment_body):
        kind = m.group(1)
        raw_slug = (m.group(2) or "").strip()
+        # If the raw match included trailing words, the regex non-greedy
+        # captured only the first token; strip again for safety.
+        # We split on whitespace to keep the FIRST word as the slug, and
+        # everything after as the note.
        parts = raw_slug.split()
        if not parts:
            continue
        first = parts[0]
+        # If the slug-capture greedily matched multiple words (e.g.
+        # "comprehensive testing"), preserve normalize behavior: join
+        # the WHOLE first-word-token only; trailing words get appended to
+        # the note. The regex limits group(2) to [A-Za-z0-9_\- ] so we
+        # may have multi-word forms here — normalize handles them.
        if len(parts) > 1:
+            # User wrote "/sop-ack comprehensive testing extra-note"
+            # → treat "comprehensive testing" as the slug source if it
+            # normalizes to a known item; otherwise treat "comprehensive"
+            # as slug and "testing extra-note" as note. We defer the
+            # disambiguation to the caller via the returned canonical
+            # slug. For simplicity: try the WHOLE captured string first.
            canonical = normalize_slug(raw_slug, numeric_aliases)
        else:
            canonical = normalize_slug(first, numeric_aliases)
        note_from_group = (m.group(3) or "").strip()
+        # If we collapsed multi-word slug into kebab and there's a
+        # trailing-text group too, append it.
        out.append((kind, canonical, note_from_group))
-
-    for m in _NA_DIRECTIVE_RE.finditer(comment_body):
-        gate = (m.group(1) or "").strip().lower()
-        reason = (m.group(2) or "").strip()
-        na_out.append(("sop-n/a", gate, reason))
-
-    return out, na_out
+    return out


 # ---------------------------------------------------------------------------
@@ -231,8 +230,9 @@ def compute_ack_state(
       {
         "comprehensive-testing": {
           "ackers": ["bob"],         # non-author, team-verified
-           "rejected": {
+           "rejected_ackers": {        # debugging info
             "self_ack": ["alice"],
+             "unknown_slug": [],
             "not_in_team": ["eve"],
           }
         },
@@ -249,8 +249,7 @@ def compute_ack_state(
        user = (c.get("user") or {}).get("login", "")
        if not user:
            continue
-        directives, _na_directives = parse_directives(body, numeric_aliases)
-        for kind, slug, _note in directives:
+        for kind, slug, _note in parse_directives(body, numeric_aliases):
            if not slug:
                unparseable_per_user[user] = unparseable_per_user.get(user, 0) + 1
                continue
@@ -260,19 +259,25 @@ def compute_ack_state(
    # Filter out self-acks and unknown slugs.
    ackers_per_slug: dict[str, list[str]] = {s: [] for s in items_by_slug}
    rejected_self: dict[str, list[str]] = {s: [] for s in items_by_slug}
+    rejected_unknown: dict[str, list[str]] = {s: [] for s in items_by_slug}
    pending_team_check: dict[str, list[str]] = {s: [] for s in items_by_slug}

    for (user, slug), kind in latest_directive.items():
        if kind != "sop-ack":
            continue  # revokes leave the (user,slug) state as "no ack"
        if slug not in items_by_slug:
+            # Slug normalized to something not in our config — store
+            # under a synthetic key for diagnostic surfacing. Don't add
+            # to any item.
            continue
        if user == pr_author:
            rejected_self[slug].append(user)
            continue
        pending_team_check[slug].append(user)

-    # Step 3: team membership probe per slug.
+    # Step 3: team membership probe per slug (batched per slug to keep
+    # API call count down — same user may ack multiple items but the
+    # required_teams differ per item, so we MUST probe per (user, item)).
    rejected_not_in_team: dict[str, list[str]] = {s: [] for s in items_by_slug}
    for slug, candidates in pending_team_check.items():
        if not candidates:
@@ -281,6 +286,7 @@ def compute_ack_state(
        approved = team_membership_probe(slug, candidates)  # returns subset
        rejected_not_in_team[slug] = [u for u in candidates if u not in approved]
        ackers_per_slug[slug] = approved
+        # Stash required teams for description rendering.
        items_by_slug[slug]["_required_resolved"] = required

    return {
@@ -295,113 +301,6 @@ def compute_ack_state(
    }


-def compute_na_state(
-    comments: list[dict[str, Any]],
-    pr_author: str,
-    na_gates: dict[str, dict[str, Any]],
-    numeric_aliases: dict[int, str],
-    team_membership_probe: "callable[[str, list[str]], list[str]]",
-    client: "GiteaClient",
-    org: str,
-) -> dict[str, dict[str, Any]]:
-    """Compute per-gate N/A declaration state.
-
-    Returns a dict keyed by gate name:
-       {
-         "qa-review": {
-           "declared":  ["alice"],      # non-author, team-verified, not revoked
-           "rejected": ["eve (not-in-team)", "bob (self-decl)"],
-           "reason":   "pure-infra change — no qa surface",
-         },
-         ...
-       }
-    A gate is N/A-satisfied when at least one declaration from a valid
-    team member exists and has not been revoked by the same user.
-    """
-    if not na_gates:
-        return {}
-
-    # Collapse directives per (commenter, gate) — most recent wins.
-    latest_na: dict[tuple[str, str], str] = {}   # (user, gate) → "sop-n/a"
-    latest_na_reason: dict[tuple[str, str], str] = {}  # (user, gate) → reason
-    for c in comments:
-        body = c.get("body", "") or ""
-        user = (c.get("user") or {}).get("login", "")
-        if not user:
-            continue
-        _directives, na_directives = parse_directives(body, numeric_aliases)
-        for _kind, gate, reason in na_directives:
-            if gate not in na_gates:
-                continue
-            latest_na[(user, gate)] = "sop-n/a"
-            latest_na_reason[(user, gate)] = reason
-
-    # Determine candidate declarers per gate.
-    na_state: dict[str, dict[str, Any]] = {
-        gate: {"declared": [], "rejected": [], "reason": ""}
-        for gate in na_gates
-    }
-    pending_per_gate: dict[str, list[str]] = {gate: [] for gate in na_gates}
-
-    for (user, gate), kind in latest_na.items():
-        if kind != "sop-n/a":
-            continue
-        if user == pr_author:
-            na_state[gate]["rejected"].append(f"{user} (self-decl)")
-            continue
-        pending_per_gate[gate].append(user)
-
-    # Probe team membership per gate using that gate's required_teams.
-    for gate, candidates in pending_per_gate.items():
-        if not candidates:
-            continue
-        required_teams = na_gates[gate].get("required_teams", [])
-        # Resolve team names → ids using the client's resolver.
-        team_ids: list[int] = []
-        for tn in required_teams:
-            tid = client.resolve_team_id(org, tn)
-            if tid is not None:
-                team_ids.append(tid)
-        if not team_ids:
-            na_state[gate]["rejected"].extend(
-                f"{u} (no-team-id)" for u in candidates
-            )
-            continue
-        for u in candidates:
-            in_any_team = False
-            for tid in team_ids:
-                result = client.is_team_member(tid, u)
-                if result is True:
-                    in_any_team = True
-                    break
-                if result is None:
-                    # 403 — token owner not in team. Fail-closed.
-                    print(
-                        f"::warning::na: team-probe for {u} in team-id {tid} "
-                        "returned 403 — treating as not-in-team (fail-closed)",
-                        file=sys.stderr,
-                    )
-            if in_any_team:
-                na_state[gate]["declared"].append(u)
-            else:
-                na_state[gate]["rejected"].append(f"{u} (not-in-team)")
-
-    # Build per-gate reason string from declared users.
-    for gate in na_gates:
-        decl = na_state[gate]["declared"]
-        if decl:
-            reasons: list[str] = []
-            for u in decl:
-                r = latest_na_reason.get((u, gate), "")
-                if r:
-                    reasons.append(f"{u}: {r}")
-                else:
-                    reasons.append(u)
-            na_state[gate]["reason"] = "; ".join(reasons)
-
-    return na_state
-
-
 # ---------------------------------------------------------------------------
 # Gitea API client
 # ---------------------------------------------------------------------------
@@ -799,7 +698,6 @@ def main(argv: list[str] | None = None) -> int:
    numeric_aliases = {
        int(it["numeric_alias"]): it["slug"] for it in items if it.get("numeric_alias")
    }
-    na_gates: dict[str, dict[str, Any]] = cfg.get("n/a_gates") or {}

    client = GiteaClient(args.gitea_host, token) if token else None
    if not client:
@@ -819,8 +717,6 @@ def main(argv: list[str] | None = None) -> int:
        print("::error::PR payload missing user.login or head.sha", file=sys.stderr)
        return 1

-    target_url = f"https://{args.gitea_host}/{args.owner}/{args.repo}/pulls/{args.pr}"
-
    comments = client.get_issue_comments(args.owner, args.repo, args.pr)

    # Build team-membership probe closure that caches results per
@@ -878,47 +774,6 @@ def main(argv: list[str] | None = None) -> int:
    ack_state = compute_ack_state(comments, author, items_by_slug, numeric_aliases, probe)
    body_state = {it["slug"]: section_marker_present(body, it["pr_section_marker"]) for it in items}

-    # --- N/A gate state (RFC#324 §N/A follow-up) ---
-    na_state: dict[str, dict[str, Any]] = {}
-    if na_gates:
-        na_state = compute_na_state(
-            comments, author, na_gates, numeric_aliases,
-            probe, client, args.owner,
-        )
-        # Post N/A declarations status (read by review-check.sh).
-        na_satisfied = [g for g, s in na_state.items() if s["declared"]]
-        na_missing   = [g for g, s in na_state.items() if not s["declared"]]
-        if na_satisfied:
-            na_desc = f"N/A: {', '.join(na_satisfied)}"
-            na_post_state = "success"
-        elif na_missing:
-            na_desc = f"awaiting /sop-n/a declaration for: {', '.join(na_missing)}"
-            na_post_state = "pending"
-        else:
-            # Configured but no declarations yet.
-            na_desc = "no /sop-n/a declarations yet"
-            na_post_state = "pending"
-        na_context = "sop-checklist / na-declarations (pull_request)"
-        print(f"::notice::na-declarations status: {na_post_state} — {na_desc}")
-        if not args.dry_run:
-            client.post_status(
-                args.owner, args.repo, head_sha,
-                state=na_post_state, context=na_context,
-                description=na_desc,
-                target_url=target_url,
-            )
-            print(f"::notice::na-declarations status posted: {na_context} → {na_post_state}")
-        # Log per-gate diagnostics.
-        for gate in na_gates:
-            s = na_state.get(gate, {})
-            if s.get("declared"):
-                print(f"::notice::  [PASS] gate={gate} — N/A declared by {','.join(s['declared'])}"
-                      + (f" ({s['reason']})" if s.get("reason") else ""))
-            else:
-                extra = f" — rejected: {', '.join(s.get('rejected', []))}" if s.get("rejected") else ""
-                print(f"::notice::  [WAIT] gate={gate} — no valid N/A declaration yet{extra}")
-
-
    state, description = render_status(items, ack_state, body_state)
    mode = get_tier_mode(pr, cfg)
    if mode == "soft":
@@ -953,6 +808,7 @@ def main(argv: list[str] | None = None) -> int:
            return 0 if state in ("success", "pending") else 1
        return 0

+    target_url = f"https://{args.gitea_host}/{args.owner}/{args.repo}/pulls/{args.pr}"
    client.post_status(
        args.owner, args.repo, head_sha,
        state=state, context=args.status_context,
@@ -304,7 +304,6 @@ jobs:
    name: Canvas (Next.js)
    needs: changes
    runs-on: ubuntu-latest
-    timeout-minutes: 20
    # Phase 4 (RFC #219 §1): confirmed green on main 2026-05-12.
    continue-on-error: false
    defaults:
@@ -400,6 +399,8 @@ jobs:
            scripts/promote-tenant-image.sh \
            scripts/test-promote-tenant-image.sh

+  # mc#959 root-fix (sre)
+
  canvas-deploy-reminder:
    name: Canvas Deploy Reminder
    runs-on: ubuntu-latest
@@ -408,8 +409,8 @@ jobs:
    # The step-level exit 0 handles the "not main push" case; the job-level
    # `if:` makes the gating explicit so the drift script sees it.
    # continue-on-error removed (was mc#774 mask): step exits 0 when not applicable.
+    if: ${{ github.ref == 'refs/heads/staging' }}
    needs: [changes, canvas-build]
-    if: ${{ github.ref == 'refs/heads/main' }}
    steps:
      - name: Write deploy reminder to step summary
        env:
@@ -572,11 +573,11 @@ jobs:
    #     hourly if this list diverges from status_check_contexts or from
    #     audit-force-merge.yml's REQUIRED_CHECKS env (RFC §4 + §6).
    #
-    # canvas-deploy-reminder IS now included in all-required.needs (mc#958 root-fix):
-    # added job-level `if: github.ref == 'refs/heads/main'` so ci-required-drift.py's
-    # ci_job_names() detects it as github.ref-gated and skips it from F1.
-    # The step-level `if: ... || REF_NAME != refs/heads/main` exits 0 when not main,
-    # so the job succeeds (not skipped) on non-main pushes — sentinel treats as green.
+    # canvas-deploy-reminder is intentionally excluded from all-required.needs:
+    # it needs canvas-build, which is skipped on CI-only PRs (canvas=false).
+    # Including it in all-required.needs causes all-required to hang on
+    # every CI-only PR. Keep it runnable on PRs via its own
+    # `needs: [changes, canvas-build]` — the sentinel only aggregates the result.
    #
    # Phase 3 (RFC #219 §1) safety: underlying build jobs carry
    # continue-on-error: true so their failures are masked to null (2026-05-12: re-enabled mc#774 interim)
@@ -1 +1 @@
-staging trigger 2026-05-14T17:35:02Z
+staging trigger
@@ -1 +0,0 @@
-trigger
@@ -62,21 +62,12 @@ export function ThemeToggle({ className = "" }: { className?: string }) {
      }
      setTheme(OPTIONS[next].value);
      // Move focus to the new button so arrow-key navigation is continuous.
-      // Use direct-child query to scope strictly to this radiogroup's buttons
-      // and avoid accidentally focusing unrelated [role=radio] elements
+      // Query is already scoped to radiogroup so no child-combinator needed;
+      // avoids accidentally focusing unrelated [role=radio] elements
      // elsewhere in the DOM (e.g. React Flow canvas nodes).
-      // Guard: skip focus if the current target is no longer in the document
-      // (e.g. React StrictMode double-invokes handlers during re-render).
-      if (!e.currentTarget.isConnected) return;
      const radiogroup = e.currentTarget.closest("[role=radiogroup]") as HTMLElement | null;
-      if (!radiogroup) return;
-      // Use children[] instead of querySelectorAll("> [role=radio]") to avoid
-      // jsdom's child-combinator selector parsing issues in test environments.
-      const btns = Array.from(radiogroup.children).filter(
-        (el): el is HTMLButtonElement =>
-          el.tagName === "BUTTON" && el.getAttribute("role") === "radio"
-      );
-      if (next < btns.length) btns[next]?.focus();
+      const btns = radiogroup?.querySelectorAll<HTMLButtonElement>("[role=radio]");
+      btns?.[next]?.focus();
    },
    []
  );
@@ -24,12 +24,8 @@ vi.mock("@/lib/theme-provider", () => ({
  })),
 }));

-// Wrap cleanup in act() so any pending React state updates (e.g. from
-// keyDown handlers that call setTheme) flush before DOM unmount. Without
-// this, cleanup() can race against pending renders and cause INDEX_SIZE_ERR
-// when the handleKeyDown callback tries to query the DOM mid-teardown.
 afterEach(() => {
-  act(() => { cleanup(); });
+  cleanup();
  vi.clearAllMocks();
 });

@@ -150,7 +146,7 @@ describe("ThemeToggle — keyboard navigation (WCAG 2.1.1 / ARIA radiogroup)", (
    const radios = screen.getAllByRole("radio");
    // dark (index 2) is current; ArrowRight should wrap to light (index 0)
    act(() => { radios[2].focus(); });
-    act(() => { fireEvent.keyDown(radios[2], { key: "ArrowRight" }); });
+    fireEvent.keyDown(radios[2], { key: "ArrowRight" });
    expect(mockSetTheme).toHaveBeenCalledWith("light");
  });

@@ -164,7 +160,7 @@ describe("ThemeToggle — keyboard navigation (WCAG 2.1.1 / ARIA radiogroup)", (
    const radios = screen.getAllByRole("radio");
    // light (index 0) is current; ArrowLeft should go to dark (index 2)
    act(() => { radios[0].focus(); });
-    act(() => { fireEvent.keyDown(radios[0], { key: "ArrowLeft" }); });
+    fireEvent.keyDown(radios[0], { key: "ArrowLeft" });
    expect(mockSetTheme).toHaveBeenCalledWith("dark");
  });

@@ -178,7 +174,7 @@ describe("ThemeToggle — keyboard navigation (WCAG 2.1.1 / ARIA radiogroup)", (
    const radios = screen.getAllByRole("radio");
    // light (index 0) is current; ArrowDown should go to system (index 1)
    act(() => { radios[0].focus(); });
-    act(() => { fireEvent.keyDown(radios[0], { key: "ArrowDown" }); });
+    fireEvent.keyDown(radios[0], { key: "ArrowDown" });
    expect(mockSetTheme).toHaveBeenCalledWith("system");
  });

@@ -191,7 +187,7 @@ describe("ThemeToggle — keyboard navigation (WCAG 2.1.1 / ARIA radiogroup)", (
    render(<ThemeToggle />);
    const radios = screen.getAllByRole("radio");
    act(() => { radios[2].focus(); });
-    act(() => { fireEvent.keyDown(radios[2], { key: "Home" }); });
+    fireEvent.keyDown(radios[2], { key: "Home" });
    expect(mockSetTheme).toHaveBeenCalledWith("light");
  });

@@ -204,14 +200,14 @@ describe("ThemeToggle — keyboard navigation (WCAG 2.1.1 / ARIA radiogroup)", (
    render(<ThemeToggle />);
    const radios = screen.getAllByRole("radio");
    act(() => { radios[0].focus(); });
-    act(() => { fireEvent.keyDown(radios[0], { key: "End" }); });
+    fireEvent.keyDown(radios[0], { key: "End" });
    expect(mockSetTheme).toHaveBeenCalledWith("dark");
  });

  it("does nothing on unrelated keys", () => {
    render(<ThemeToggle />);
    const radios = screen.getAllByRole("radio");
-    act(() => { fireEvent.keyDown(radios[0], { key: "Enter" }); });
+    fireEvent.keyDown(radios[0], { key: "Enter" });
    expect(mockSetTheme).not.toHaveBeenCalled();
  });
 });
@@ -36,6 +36,20 @@ interface A2AResponseShape {
  error?: { message?: string };
 }

+// Wire shape for GET /workspaces/:id/chat-history (chat_history.go → ChatHistoryResponse).
+interface ApiChatMessage {
+  id: string;
+  role: string; // "user" | "agent" | "system"
+  content: string;
+  timestamp: string;
+  attachments?: Array<{ name: string; uri: string; mimeType?: string; size?: number }>;
+}
+
+interface ChatHistoryResponse {
+  messages: ApiChatMessage[];
+  reached_end: boolean;
+}
+
 const formatTime = (date: Date) =>
  date.toLocaleTimeString([], { hour: "numeric", minute: "2-digit" });

@@ -61,18 +75,14 @@ export function MobileChat({
  // that creates a new [] reference on every store update when the key is
  // absent, causing infinite re-render (React error #185).
  const storedMessages = useCanvasStore((s) => s.agentMessages[agentId]);
-  const [messages, setMessages] = useState<ChatMessage[]>(() =>
-    (storedMessages ?? []).map((m) => ({
-      id: m.id,
-      role: "agent",
-      text: m.content,
-      ts: formatStoredTimestamp(m.timestamp),
-    })),
-  );
+  // Start empty — history is loaded via useEffect below.
+  const [messages, setMessages] = useState<ChatMessage[]>([]);
  const [draft, setDraft] = useState("");
  const [tab, setTab] = useState<SubTab>("my");
  const [sending, setSending] = useState(false);
  const [error, setError] = useState<string | null>(null);
+  const [loading, setLoading] = useState(true); // history is loading on mount
+  const [historyError, setHistoryError] = useState<string | null>(null);
  const scrollRef = useRef<HTMLDivElement>(null);
  // Synchronous re-entry guard. `setSending(true)` schedules a state
  // update but doesn't flush before a second tap can fire send() — a ref
@@ -80,6 +90,9 @@ export function MobileChat({
  // double-send race a stale `sending` lets through.
  const sendInFlightRef = useRef(false);
  const composerRef = useRef<HTMLTextAreaElement>(null);
+  // Guard: don't treat the initial store population as a live push.
+  // Set to false after the first render completes.
+  const initDoneRef = useRef(false);

  // Auto-grow the textarea: reset height to 'auto' so the scrollHeight
  // shrinks when the user deletes text, then size to scrollHeight up to
@@ -92,6 +105,75 @@ export function MobileChat({
    el.style.height = `${next}px`;
  }, [draft]);

+  // Fetch chat history on mount; keep merging live agentMessages while the
+  // panel is open. InitDoneRef prevents the initial store snapshot from
+  // triggering the live-merge path (the store buffer is populated by
+  // ChatTab on desktop, not on mobile — this effect loads history as the
+  // mobile-native path).
+  useEffect(() => {
+    let cancelled = false;
+
+    const mapApiMessage = (m: ApiChatMessage): ChatMessage => ({
+      id: m.id,
+      role: m.role === "user" ? "user" : "agent",
+      text: m.content,
+      ts: formatStoredTimestamp(m.timestamp),
+    });
+
+    const syncLive = () => {
+      const live = useCanvasStore.getState().agentMessages[agentId] ?? [];
+      if (live.length > 0) {
+        setMessages((prev) => {
+          const existingIds = new Set(prev.map((m) => m.id));
+          const newOnes = live
+            .filter((m) => !existingIds.has(m.id))
+            .map((m) => ({
+              id: m.id,
+              role: "agent" as const,
+              text: m.content,
+              ts: formatStoredTimestamp(m.timestamp),
+            }));
+          return newOnes.length > 0 ? [...prev, ...newOnes] : prev;
+        });
+      }
+    };
+
+    const bootstrap = async (): Promise<(() => void) | undefined> => {
+      setLoading(true);
+      setHistoryError(null);
+      try {
+        const res = await api.get<ChatHistoryResponse>(
+          `/workspaces/${agentId}/chat-history?limit=50`,
+        );
+        if (cancelled) return;
+        const initial = (res.messages ?? []).map(mapApiMessage);
+        setMessages(initial);
+        // Mark init done BEFORE marking loading=false so any store push
+        // that arrives in the same tick is treated as live, not init.
+        initDoneRef.current = true;
+        setLoading(false);
+        // Subscribe to live pushes after init is complete.
+        syncLive();
+        const unsubscribe = useCanvasStore.subscribe(syncLive);
+        return unsubscribe; // returned for cleanup
+      } catch (e) {
+        if (cancelled) return;
+        setHistoryError(e instanceof Error ? e.message : "Failed to load chat history");
+        setLoading(false);
+        initDoneRef.current = true;
+        return undefined;
+      }
+    };
+
+    let maybeUnsubscribe: (() => void) | undefined;
+    bootstrap().then((fn) => { maybeUnsubscribe = fn; });
+
+    return () => {
+      cancelled = true;
+      if (maybeUnsubscribe) maybeUnsubscribe();
+    };
+  }, [agentId]);
+
  useEffect(() => {
    if (scrollRef.current) {
      scrollRef.current.scrollTop = scrollRef.current.scrollHeight;
@@ -311,7 +393,61 @@ export function MobileChat({
            Agent Comms — peer-to-peer A2A traffic surfaces in the Comms tab.
          </div>
        )}
-        {tab === "my" && messages.length === 0 && (
+        {tab === "my" && loading && (
+          <div style={{ padding: "20px 4px", textAlign: "center", color: p.text3, fontSize: 13 }}>
+            <div style={{ marginBottom: 6, opacity: 0.6, animation: "spin 1s linear infinite", display: "inline-block", fontSize: 16 }}>⟳</div>
+            <div>Loading chat history…</div>
+          </div>
+        )}
+        {tab === "my" && !loading && historyError && (
+          <div
+            role="alert"
+            style={{
+              padding: "14px 4px",
+              textAlign: "center",
+              color: p.failed,
+              fontSize: 13,
+            }}
+          >
+            <div style={{ marginBottom: 8 }}>Could not load chat history.</div>
+            <button
+              type="button"
+              onClick={() => {
+                setLoading(true);
+                setHistoryError(null);
+                api.get(`/workspaces/${agentId}/chat-history?limit=50`).then(
+                  (res: unknown) => {
+                    const r = res as ChatHistoryResponse;
+                    setMessages((r.messages ?? []).map((m) => ({
+                      id: m.id,
+                      role: m.role === "user" ? "user" : "agent",
+                      text: m.content,
+                      ts: formatStoredTimestamp(m.timestamp),
+                    })));
+                    setLoading(false);
+                    initDoneRef.current = true;
+                  },
+                ).catch((e: unknown) => {
+                  setHistoryError(e instanceof Error ? e.message : "Failed to load");
+                  setLoading(false);
+                  initDoneRef.current = true;
+                });
+              }}
+              style={{
+                padding: "6px 14px",
+                borderRadius: 14,
+                border: `0.5px solid ${p.failed}`,
+                background: "transparent",
+                color: p.failed,
+                fontSize: 12,
+                cursor: "pointer",
+              }}
+            >
+              Retry
+            </button>
+          </div>
+        )}
+        {tab === "my" && !loading && !historyError && messages.length === 0 && (
          <div style={{ padding: "20px 4px", textAlign: "center", color: p.text3, fontSize: 13 }}>
            Send a message to start chatting.
          </div>
@@ -8,11 +8,19 @@
 * NOTE: No @testing-library/jest-dom — use DOM APIs.
 */
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
-import { cleanup, render } from "@testing-library/react";
+import { act, cleanup, render, waitFor } from "@testing-library/react";
 import React from "react";

 import { MobileChat } from "../MobileChat";

+// ─── Mock API ─────────────────────────────────────────────────────────────────
+// vi.mock without a factory auto-mocks the module. In tests, we configure
+// api.get / api.post directly (they are vi.fn() from the auto-mock).
+// Tests that need specific behaviour use mockResolvedValueOnce on the
+// auto-mocked functions.
+vi.mock("@/lib/api");
+import { api } from "@/lib/api";
+
 // ─── Mock store ───────────────────────────────────────────────────────────────

 const mockAgentId = "ws-chat-test";
@@ -32,8 +40,14 @@ const mockStoreState = {

 vi.mock("@/store/canvas", () => ({
  useCanvasStore: Object.assign(
-    vi.fn((sel) => sel(mockStoreState)),
-    { getState: () => mockStoreState },
+    vi.fn((sel?: (state: typeof mockStoreState) => unknown) => {
+      if (sel) return sel(mockStoreState);
+      return mockStoreState;
+    }),
+    {
+      getState: () => mockStoreState,
+      subscribe: vi.fn(() => vi.fn()),
+    },
  ),
  summarizeWorkspaceCapabilities: vi.fn((data: Record<string, unknown>) => {
    const agentCard = data.agentCard as Record<string, unknown> | null;
@@ -54,16 +68,6 @@ vi.mock("@/store/canvas", () => ({
  }),
 }));

-// ─── Mock API ─────────────────────────────────────────────────────────────────
-
-const { mockApiPost } = vi.hoisted(() => ({
-  mockApiPost: vi.fn().mockResolvedValue({ result: { parts: [] } }),
-}));
-
-vi.mock("@/lib/api", () => ({
-  api: { post: mockApiPost },
-}));
-
 // ─── Fixtures ────────────────────────────────────────────────────────────────

 const onlineNode = {
@@ -150,7 +154,15 @@ beforeEach(() => {
  mockOnBack.mockClear();
  mockStoreState.nodes = [];
  mockStoreState.agentMessages = {};
-  mockApiPost.mockClear();
+  // Set up spies on the real api methods. Tests override these per-call.
+  const getSpy = vi.spyOn(api, "get");
+  const postSpy = vi.spyOn(api, "post");
+  getSpy.mockResolvedValue({ messages: [], reached_end: true });
+  postSpy.mockResolvedValue({ result: { parts: [] } });
+});
+
+afterEach(() => {
+  vi.restoreAllMocks();
 });

 afterEach(() => {
@@ -266,15 +278,26 @@ describe("MobileChat — empty state", () => {
    mockStoreState.nodes = [onlineNode];
  });

-  it('shows "Send a message to start chatting." when no messages', () => {
-    const { container } = renderChat(mockAgentId);
+  it('shows "Send a message to start chatting." when no messages', async () => {
+    // History fetch resolves immediately in tests (mockResolvedValue).
+    // act() flushes the microtask queue so the component reaches its
+    // post-load state before we assert.
+    let renderResult: ReturnType<typeof renderChat>;
+    await act(async () => {
+      renderResult = renderChat(mockAgentId);
+    });
+    const { container } = renderResult!;
    expect(container.textContent ?? "").toContain("Send a message to start chatting.");
  });

-  it("shows no messages when agentMessages[agentId] is absent (undefined)", () => {
+  it("shows no messages when agentMessages[agentId] is absent (undefined)", async () => {
    // Explicitly set to empty to simulate no stored messages
    mockStoreState.agentMessages = {};
-    const { container } = renderChat(mockAgentId);
+    let renderResult: ReturnType<typeof renderChat>;
+    await act(async () => {
+      renderResult = renderChat(mockAgentId);
+    });
+    const { container } = renderResult!;
    expect(container.textContent ?? "").toContain("Send a message to start chatting.");
  });
 });
@@ -321,3 +344,132 @@ describe("MobileChat — dark mode", () => {
    expect(container.querySelector('[aria-label="Back"]')).toBeTruthy();
  });
 });
+
+// ─── Chat history loading ────────────────────────────────────────────────────
+
+describe("MobileChat — chat history", () => {
+  beforeEach(() => {
+    mockStoreState.nodes = [onlineNode];
+  });
+
+  it("calls GET /workspaces/:id/chat-history on mount", async () => {
+    await act(async () => {
+      renderChat(mockAgentId);
+    });
+    expect(api.get).toHaveBeenCalledWith(
+      `/workspaces/${mockAgentId}/chat-history?limit=50`,
+    );
+  });
+
+  it("shows loading state while history is fetching", () => {
+    // Do NOT await — check the pre-resolve state.
+    const { container } = renderChat(mockAgentId);
+    expect(container.textContent ?? "").toContain("Loading chat history…");
+  });
+
+  it("shows empty state after history resolves with no messages", async () => {
+    // beforeEach already sets api.get to resolve with empty — no override needed.
+    let renderResult: ReturnType<typeof renderChat>;
+    await act(async () => {
+      renderResult = renderChat(mockAgentId);
+    });
+    const { container } = renderResult!;
+    expect(container.textContent ?? "").toContain("Send a message to start chatting.");
+  });
+
+  it("renders messages from history response", async () => {
+    vi.spyOn(api, "get").mockResolvedValueOnce({
+      messages: [
+        {
+          id: "msg-1",
+          role: "user",
+          content: "Hello agent",
+          timestamp: "2026-04-25T10:00:00Z",
+        },
+        {
+          id: "msg-2",
+          role: "agent",
+          content: "Hello back",
+          timestamp: "2026-04-25T10:00:01Z",
+        },
+      ],
+      reached_end: true,
+    });
+    let renderResult: ReturnType<typeof renderChat>;
+    await act(async () => {
+      renderResult = renderChat(mockAgentId);
+    });
+    const { container } = renderResult!;
+    expect(container.textContent ?? "").toContain("Hello agent");
+    expect(container.textContent ?? "").toContain("Hello back");
+  });
+
+  it("maps user role from API correctly", async () => {
+    vi.spyOn(api, "get").mockResolvedValueOnce({
+      messages: [
+        {
+          id: "msg-u",
+          role: "user",
+          content: "user message",
+          timestamp: "2026-04-25T10:00:00Z",
+        },
+      ],
+      reached_end: true,
+    });
+    let renderResult: ReturnType<typeof renderChat>;
+    await act(async () => {
+      renderResult = renderChat(mockAgentId);
+    });
+    // User messages render right-aligned. The text content check is sufficient
+    // to confirm the message appeared.
+    const { container } = renderResult!;
+    expect(container.textContent ?? "").toContain("user message");
+  });
+
+  it("shows error state when history fetch fails", async () => {
+    vi.spyOn(api, "get").mockRejectedValue(new Error("Network error"));
+    let renderResult: ReturnType<typeof renderChat>;
+    await act(async () => {
+      renderResult = renderChat(mockAgentId);
+    });
+    const { container } = renderResult!;
+    expect(container.textContent ?? "").toContain("Could not load chat history.");
+    expect(container.textContent ?? "").toContain("Retry");
+  });
+
+  it("Retry button re-fetches history after error", async () => {
+    // Make the initial mount call fail so the Retry button appears, then
+    // make the retry call succeed so we can verify the full flow.
+    const getSpy = vi.spyOn(api, "get");
+    getSpy
+      .mockRejectedValueOnce(new Error("Network error"))
+      .mockResolvedValueOnce({ messages: [], reached_end: true });
+
+    let renderResult: ReturnType<typeof renderChat>;
+    await act(async () => {
+      renderResult = renderChat(mockAgentId);
+    });
+    const { container } = renderResult!;
+
+    // Error state should be shown with Retry button.
+    expect(container.textContent ?? "").toContain("Could not load chat history.");
+    expect(container.textContent ?? "").toContain("Retry");
+
+    // Click Retry — the button's onClick fires api.get again.
+    // The second mockResolvedValueOnce makes it succeed.
+    const retryBtn = Array.from(container.querySelectorAll("button")).find(
+      (b) => b.textContent?.trim() === "Retry",
+    );
+    expect(retryBtn).toBeTruthy();
+    await act(async () => {
+      retryBtn?.click();
+    });
+
+    // waitFor polls until the retry resolves and component re-renders.
+    await waitFor(() => {
+      expect(container.textContent ?? "").toContain("Send a message to start chatting.");
+    });
+    // Initial call + retry = 2.
+    expect(getSpy).toHaveBeenCalledTimes(2);
+  });
+});
@@ -97,28 +97,28 @@ const maxProxyResponseBody = 10 << 20
 //
 // Timeout model — three independent budgets, none of which gets in each other's way:
 //
-//  1. Client.Timeout — DELIBERATELY UNSET. Client.Timeout is a hard wall on
-//     the entire request including streamed body reads, and would pre-empt
-//     legitimate slow cold-start flows (Claude Code first-token over OAuth
-//     can take 30-60s on boot; long-running agent synthesis can stream
-//     tokens for minutes). Total-request budget is enforced per-request
-//     via context deadline (canvas = idle-only, agent-to-agent = 30 min ceiling).
+//   1. Client.Timeout — DELIBERATELY UNSET. Client.Timeout is a hard wall on
+//      the entire request including streamed body reads, and would pre-empt
+//      legitimate slow cold-start flows (Claude Code first-token over OAuth
+//      can take 30-60s on boot; long-running agent synthesis can stream
+//      tokens for minutes). Total-request budget is enforced per-request
+//      via context deadline (canvas = idle-only, agent-to-agent = 30 min ceiling).
 //
-//  2. Transport.DialContext — 10s connect timeout. When a workspace's EC2
-//     black-holes TCP connects (instance terminated mid-flight, security group
-//     flipped, NACL bug), the OS default is 75s on Linux / 21s on macOS — long
-//     enough that Cloudflare's ~100s edge timeout can fire first and surface
-//     a generic 502 page to canvas. 10s is well above realistic intra-region
-//     latencies and well below CF's edge timeout.
+//   2. Transport.DialContext — 10s connect timeout. When a workspace's EC2
+//      black-holes TCP connects (instance terminated mid-flight, security group
+//      flipped, NACL bug), the OS default is 75s on Linux / 21s on macOS — long
+//      enough that Cloudflare's ~100s edge timeout can fire first and surface
+//      a generic 502 page to canvas. 10s is well above realistic intra-region
+//      latencies and well below CF's edge timeout.
 //
-//  3. Transport.ResponseHeaderTimeout — 180s default. From request-body-end
-//     to response-headers-start. Configurable via
-//     A2A_PROXY_RESPONSE_HEADER_TIMEOUT (envx.Duration). Covers cold-start
-//     first-byte (30-60s OAuth flow above) with enough room for Opus agent
-//     turns (big context + internal delegate_task round-trips routinely exceed
-//     the old 60s ceiling). Body streaming after headers is governed by the
-//     per-request context deadline, NOT this timeout — so multi-minute agent
-//     responses still work fine.
+//   3. Transport.ResponseHeaderTimeout — 180s default. From request-body-end
+//      to response-headers-start. Configurable via
+//      A2A_PROXY_RESPONSE_HEADER_TIMEOUT (envx.Duration). Covers cold-start
+//      first-byte (30-60s OAuth flow above) with enough room for Opus agent
+//      turns (big context + internal delegate_task round-trips routinely exceed
+//      the old 60s ceiling). Body streaming after headers is governed by the
+//      per-request context deadline, NOT this timeout — so multi-minute agent
+//      responses still work fine.
 //
 // The point of (2) and (3) is to surface a *structured* 503 from
 // handleA2ADispatchError when the workspace agent is unreachable, so canvas
@@ -194,7 +194,7 @@ func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspace
 	}
 	db.ClearWorkspaceKeys(ctx, workspaceID)
 	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOffline), workspaceID, map[string]interface{}{})
-	h.goAsync(func() { h.RestartByID(workspaceID) })
+	go h.RestartByID(workspaceID)
 	return true
 }

@@ -241,7 +241,7 @@ func (h *WorkspaceHandler) preflightContainerHealth(ctx context.Context, workspa
 	}
 	db.ClearWorkspaceKeys(ctx, workspaceID)
 	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOffline), workspaceID, map[string]interface{}{})
-	h.goAsync(func() { h.RestartByID(workspaceID) })
+	go h.RestartByID(workspaceID)
 	return &proxyA2AError{
 		Status: http.StatusServiceUnavailable,
 		Response: gin.H{
@@ -262,8 +262,8 @@ func (h *WorkspaceHandler) logA2AFailure(ctx context.Context, workspaceID, calle
 		errWsName = workspaceID
 	}
 	summary := "A2A request to " + errWsName + " failed: " + errMsg
-	h.goAsync(func() {
-		logCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 30*time.Second)
+	go func(parent context.Context) {
+		logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
 		defer cancel()
 		LogActivity(logCtx, h.broadcaster, ActivityParams{
 			WorkspaceID:  workspaceID,
@@ -277,7 +277,7 @@ func (h *WorkspaceHandler) logA2AFailure(ctx context.Context, workspaceID, calle
 			Status:       "error",
 			ErrorDetail:  &errMsg,
 		})
-	})
+	}(ctx)
 }

 // logA2ASuccess records a successful A2A round-trip and (for canvas-initiated
@@ -298,19 +298,19 @@ func (h *WorkspaceHandler) logA2ASuccess(ctx context.Context, workspaceID, calle
 	// silent workspaces. Only update when callerID is a real workspace (not
 	// canvas, not a system caller) and the target returned 2xx/3xx.
 	if callerID != "" && !isSystemCaller(callerID) && statusCode < 400 {
-		h.goAsync(func() {
+		go func() {
 			bgCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 			defer cancel()
 			if _, err := db.DB.ExecContext(bgCtx,
 				`UPDATE workspaces SET last_outbound_at = NOW() WHERE id = $1`, callerID); err != nil {
 				log.Printf("last_outbound_at update failed for %s: %v", callerID, err)
 			}
-		})
+		}()
 	}
 	summary := a2aMethod + " → " + wsNameForLog
 	toolTrace := extractToolTrace(respBody)
-	h.goAsync(func() {
-		logCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 30*time.Second)
+	go func(parent context.Context) {
+		logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
 		defer cancel()
 		LogActivity(logCtx, h.broadcaster, ActivityParams{
 			WorkspaceID:  workspaceID,
@@ -325,7 +325,7 @@ func (h *WorkspaceHandler) logA2ASuccess(ctx context.Context, workspaceID, calle
 			DurationMs:   &durationMs,
 			Status:       logStatus,
 		})
-	})
+	}(ctx)

 	if callerID == "" && statusCode < 400 {
 		h.broadcaster.BroadcastOnly(workspaceID, string(events.EventA2AResponse), map[string]interface{}{
@@ -510,8 +510,8 @@ func (h *WorkspaceHandler) logA2AReceiveQueued(ctx context.Context, workspaceID,
 		wsName = workspaceID
 	}
 	summary := a2aMethod + " → " + wsName + " (queued for poll)"
-	h.goAsync(func() {
-		logCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 30*time.Second)
+	go func(parent context.Context) {
+		logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
 		defer cancel()
 		LogActivity(logCtx, h.broadcaster, ActivityParams{
 			WorkspaceID:  workspaceID,
@@ -523,7 +523,7 @@ func (h *WorkspaceHandler) logA2AReceiveQueued(ctx context.Context, workspaceID,
 			RequestBody:  json.RawMessage(body),
 			Status:       "ok",
 		})
-	})
+	}(ctx)
 }

 // readUsageMap extracts input_tokens / output_tokens from the "usage" key of m.
@@ -54,7 +54,6 @@ func TestPreflight_ContainerRunning_ReturnsNil(t *testing.T) {
 	_ = setupTestDB(t)
 	stub := &preflightLocalProv{running: true, err: nil}
 	h := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
-	waitForHandlerAsyncBeforeDBCleanup(t, h)
 	h.provisioner = stub

 	if err := h.preflightContainerHealth(context.Background(), "ws-running-123"); err != nil {
@@ -187,8 +186,8 @@ func TestProxyA2A_Preflight_RoutesThroughProvisionerSSOT(t *testing.T) {
 	}

 	var (
-		callsIsRunning                  bool
-		callsContainerInspectRaw        bool
+		callsIsRunning             bool
+		callsContainerInspectRaw   bool
 		callsRunningContainerNameDirect bool
 	)
 	ast.Inspect(fn.Body, func(n ast.Node) bool {
@@ -262,7 +262,6 @@ func TestProxyA2A_Upstream502_TriggersContainerDeadCheck(t *testing.T) {
 	allowLoopbackForTest(t)
 	broadcaster := newTestBroadcaster()
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
-	waitForHandlerAsyncBeforeDBCleanup(t, handler)
 	cp := &fakeCPProv{running: false}
 	handler.SetCPProvisioner(cp)

@@ -325,7 +324,6 @@ func TestProxyA2A_Upstream502_AliveAgent_PropagatesAsIs(t *testing.T) {
 	allowLoopbackForTest(t)
 	broadcaster := newTestBroadcaster()
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
-	waitForHandlerAsyncBeforeDBCleanup(t, handler)
 	cp := &fakeCPProv{running: true}
 	handler.SetCPProvisioner(cp)

@@ -515,7 +513,6 @@ func TestProxyA2A_AllowedSelf_SkipsAccessCheck(t *testing.T) {
 	allowLoopbackForTest(t)
 	broadcaster := newTestBroadcaster()
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
-	waitForHandlerAsyncBeforeDBCleanup(t, handler)

 	agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.Header().Set("Content-Type", "application/json")
@@ -664,18 +661,18 @@ func TestProxyA2A_CallerIDDerivedFromBearer(t *testing.T) {
 	//    (column order: workspace_id, activity_type, source_id, target_id, ...)
 	mock.ExpectExec("INSERT INTO activity_logs").
 		WithArgs(
-			"ws-target",      // $1 workspace_id
-			"a2a_receive",    // $2 activity_type
-			sqlmock.AnyArg(), // $3 source_id — *string("ws-caller"), checked below
-			sqlmock.AnyArg(), // $4 target_id
-			sqlmock.AnyArg(), // $5 method
-			sqlmock.AnyArg(), // $6 summary
-			sqlmock.AnyArg(), // $7 request_body
-			sqlmock.AnyArg(), // $8 response_body
-			sqlmock.AnyArg(), // $9 tool_trace
-			sqlmock.AnyArg(), // $10 duration_ms
-			sqlmock.AnyArg(), // $11 status
-			sqlmock.AnyArg(), // $12 error_detail
+			"ws-target",                       // $1 workspace_id
+			"a2a_receive",                     // $2 activity_type
+			sqlmock.AnyArg(),                  // $3 source_id — *string("ws-caller"), checked below
+			sqlmock.AnyArg(),                  // $4 target_id
+			sqlmock.AnyArg(),                  // $5 method
+			sqlmock.AnyArg(),                  // $6 summary
+			sqlmock.AnyArg(),                  // $7 request_body
+			sqlmock.AnyArg(),                  // $8 response_body
+			sqlmock.AnyArg(),                  // $9 tool_trace
+			sqlmock.AnyArg(),                  // $10 duration_ms
+			sqlmock.AnyArg(),                  // $11 status
+			sqlmock.AnyArg(),                  // $12 error_detail
 		).
 		WillReturnResult(sqlmock.NewResult(0, 1))

@@ -1719,6 +1716,7 @@ func TestDispatchA2A_RejectsUnsafeURL(t *testing.T) {
 	}
 }

+
 // --- handleA2ADispatchError ---

 func TestHandleA2ADispatchError_ContextDeadline(t *testing.T) {
@@ -1805,7 +1803,6 @@ func TestMaybeMarkContainerDead_CPOnly_NotRunning(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
 	handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
-	waitForHandlerAsyncBeforeDBCleanup(t, handler)
 	cp := &fakeCPProv{running: false}
 	handler.SetCPProvisioner(cp)

@@ -1958,7 +1955,6 @@ func TestLogA2AFailure_Smoke(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
 	handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
-	waitForHandlerAsyncBeforeDBCleanup(t, handler)

 	// Sync workspace-name lookup (called in the caller goroutine).
 	mock.ExpectQuery(`SELECT name FROM workspaces WHERE id =`).
@@ -1977,7 +1973,6 @@ func TestLogA2AFailure_EmptyNameFallback(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
 	handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
-	waitForHandlerAsyncBeforeDBCleanup(t, handler)

 	// Empty name from DB → summary uses the workspaceID as the name.
 	mock.ExpectQuery(`SELECT name FROM workspaces WHERE id =`).
@@ -1994,7 +1989,6 @@ func TestLogA2ASuccess_Smoke(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
 	handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
-	waitForHandlerAsyncBeforeDBCleanup(t, handler)

 	mock.ExpectQuery(`SELECT name FROM workspaces WHERE id =`).
 		WithArgs("ws-ok").
@@ -2011,7 +2005,6 @@ func TestLogA2ASuccess_ErrorStatus(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
 	handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
-	waitForHandlerAsyncBeforeDBCleanup(t, handler)

 	mock.ExpectQuery(`SELECT name FROM workspaces WHERE id =`).
 		WithArgs("ws-err").
@@ -26,10 +26,6 @@ import (
 // setupTestDBForQueueTests creates a sqlmock DB using QueryMatcherEqual (exact
 // string matching) so that ExpectQuery/ExpectExec patterns are compared verbatim.
 // Uses the same global db.DB as setupTestDB so the handler can use it.
-//
-// IMPORTANT: db.DB is saved before assignment and restored via t.Cleanup so
-// that tests running after this one are not polluted by a closed mock.
-// Same fix as setupTestDB (handlers_test.go); same root cause as mc#975.
 func setupTestDBForQueueTests(t *testing.T) sqlmock.Sqlmock {
 	t.Helper()
 	mockDB, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherEqual))
@@ -34,13 +34,19 @@ func (h *ApprovalsHandler) Create(c *gin.Context) {
 		return
 	}

-	ctxJSON, _ := json.Marshal(body.Context)
-	if ctxJSON == nil {
+	ctxJSON, err := json.Marshal(body.Context)
+	if err != nil {
+		log.Printf("Create approval: json.Marshal(context) error: %v", err)
+		ctxJSON = []byte("{}")
+	} else if len(ctxJSON) == 0 {
+		// json.Marshal returns []byte{} (empty slice, not nil) on error;
+		// guard against it defensively even though map[string]interface{}
+		// cannot fail in practice — defensive in depth.
 		ctxJSON = []byte("{}")
 	}

 	var approvalID string
-	err := db.DB.QueryRowContext(ctx, `
+	err = db.DB.QueryRowContext(ctx, `
 		INSERT INTO approval_requests (workspace_id, task_id, action, reason, context)
 		VALUES ($1, $2, $3, $4, $5::jsonb)
 		RETURNING id
@@ -328,3 +328,35 @@ func TestApprovals_Decide_MissingDecision(t *testing.T) {
 		t.Errorf("expected 400, got %d", w.Code)
 	}
 }
+
+func TestApprovals_Create_NilContextFallsBackToEmptyJSON(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	handler := NewApprovalsHandler(broadcaster)
+
+	mock.ExpectQuery("INSERT INTO approval_requests").
+		WithArgs("ws-1", "task-0", "approve", "none", sqlmock.AnyArg()).
+		WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("appr-nil"))
+
+	mock.ExpectExec("INSERT INTO structure_events").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id").
+		WithArgs("ws-1").
+		WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
+	// context is nil (zero value of map[string]interface{})
+	body := `{"action":"approve","reason":"none","task_id":"task-0","context":null}`
+	c.Request = httptest.NewRequest("POST", "/", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	handler.Create(c)
+
+	if w.Code != http.StatusCreated {
+		t.Errorf("expected 201, got %d: %s", w.Code, w.Body.String())
+	}
+}
@@ -2,7 +2,6 @@ package handlers

 import (
 	"context"
-	"database/sql"
 	"encoding/json"
 	"log"
 	"net/http"
@@ -699,8 +698,7 @@ func (h *DelegationHandler) listDelegationsFromLedger(ctx context.Context, works

 	var result []map[string]interface{}
 	for rows.Next() {
-		var delegationID, callerID, calleeID, taskPreview, status string
-		var resultPreview, errorDetail sql.NullString
+		var delegationID, callerID, calleeID, taskPreview, status, resultPreview, errorDetail string
 		var lastHeartbeat, deadline, createdAt, updatedAt *time.Time
 		if err := rows.Scan(
 			&delegationID, &callerID, &calleeID, &taskPreview,
@@ -719,11 +717,11 @@ func (h *DelegationHandler) listDelegationsFromLedger(ctx context.Context, works
 			"updated_at":    updatedAt,
 			"_ledger":       true, // marker so callers know this row is from the ledger
 		}
-		if resultPreview.Valid && resultPreview.String != "" {
-			entry["response_preview"] = textutil.TruncateBytes(resultPreview.String, 300)
+		if resultPreview != "" {
+			entry["response_preview"] = textutil.TruncateBytes(resultPreview, 300)
 		}
-		if errorDetail.Valid && errorDetail.String != "" {
-			entry["error"] = errorDetail.String
+		if errorDetail != "" {
+			entry["error"] = errorDetail
 		}
 		if lastHeartbeat != nil {
 			entry["last_heartbeat"] = lastHeartbeat
@@ -145,54 +145,6 @@ func TestListDelegationsFromLedger_MultipleRows(t *testing.T) {
 	}
 }

-func TestListDelegationsFromLedger_NullsOmitted(t *testing.T) {
-	// last_heartbeat, deadline, result_preview, error_detail are all NULL.
-	// Handler must not panic and must omit those keys from the map.
-	mockDB, mock, err := sqlmock.New()
-	if err != nil {
-		t.Fatalf("failed to create sqlmock: %v", err)
-	}
-	prevDB := db.DB
-	db.DB = mockDB
-	t.Cleanup(func() { mockDB.Close(); db.DB = prevDB })
-
-	now := time.Now()
-	rows := sqlmock.NewRows([]string{
-		"delegation_id", "caller_id", "callee_id", "task_preview",
-		"status", "result_preview", "error_detail",
-		"last_heartbeat", "deadline", "created_at", "updated_at",
-	}).
-		AddRow("del-1", "ws-1", "ws-2", "task", "queued", nil, nil, nil, nil, now, now)
-	mock.ExpectQuery("SELECT .+ FROM delegations").
-		WithArgs("ws-1").
-		WillReturnRows(rows)
-
-	broadcaster := newTestBroadcaster()
-	wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
-	dh := NewDelegationHandler(wh, broadcaster)
-
-	got := dh.listDelegationsFromLedger(context.Background(), "ws-1")
-	if len(got) != 1 {
-		t.Fatalf("expected 1 entry, got %d", len(got))
-	}
-	e := got[0]
-	if _, ok := e["last_heartbeat"]; ok {
-		t.Error("last_heartbeat should be absent when NULL")
-	}
-	if _, ok := e["deadline"]; ok {
-		t.Error("deadline should be absent when NULL")
-	}
-	if _, ok := e["response_preview"]; ok {
-		t.Error("response_preview should be absent when NULL result_preview")
-	}
-	if _, ok := e["error"]; ok {
-		t.Error("error should be absent when NULL error_detail")
-	}
-	if err := mock.ExpectationsWereMet(); err != nil {
-		t.Errorf("sqlmock expectations: %v", err)
-	}
-}
-
 func TestListDelegationsFromLedger_QueryError(t *testing.T) {
 	// Query failure returns nil — graceful fallback, no panic.
 	mockDB, mock, err := sqlmock.New()
@@ -486,3 +438,10 @@ func TestListDelegationsFromActivityLogs_RowsErr(t *testing.T) {
 		t.Errorf("sqlmock expectations: %v", err)
 	}
 }
+
+// TestListDelegationsFromActivityLogs_ScanErrorSkipped is removed.
+//
+// Same reason as TestListDelegationsFromLedger_ScanError: Go 1.25 causes
+// sqlmock.NewRows([]string{}).AddRow(...) to panic in test SETUP. The handler
+// has no recover(), so a scan panic would crash the process — the correct
+// behaviour. Real-DB integration tests cover this path.
@@ -29,11 +29,6 @@ func init() {
 // setupTestDB creates a sqlmock DB and assigns it to the global db.DB.
 // It also disables the SSRF URL check so that httptest.NewServer loopback
 // URLs and fake hostnames (*.example) used in tests don't trigger rejections.
-//
-// IMPORTANT: db.DB is saved before assignment and restored via t.Cleanup so
-// that tests running after this one are not polluted by a closed mock.
-// This is the single root cause of the systemic CI/Platform (Go) failures on
-// main HEAD 8026f020 (mc#975).
 func setupTestDB(t *testing.T) sqlmock.Sqlmock {
 	t.Helper()
 	mockDB, mock, err := sqlmock.New()
@@ -62,11 +57,6 @@ func setupTestDB(t *testing.T) sqlmock.Sqlmock {
 	return mock
 }

-func waitForHandlerAsyncBeforeDBCleanup(t *testing.T, h *WorkspaceHandler) {
-	t.Helper()
-	t.Cleanup(h.waitAsyncForTest)
-}
-
 // setupTestRedis creates a miniredis instance and assigns it to the global db.RDB.
 func setupTestRedis(t *testing.T) *miniredis.Miniredis {
 	t.Helper()
@@ -366,11 +356,6 @@ func TestWorkspaceCreate(t *testing.T) {
 }

 func TestBuildProvisionerConfig_IncludesAwarenessSettings(t *testing.T) {
-	mock := setupTestDB(t)
-	mock.ExpectQuery(`SELECT digest FROM runtime_image_pins`).
-		WithArgs("claude-code").
-		WillReturnError(sql.ErrNoRows)
-
 	broadcaster := newTestBroadcaster()
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", "/tmp/configs")

@@ -80,26 +80,103 @@ func hasUnresolvedVarRef(original, expanded string) bool {
 }

 // expandWithEnv expands ${VAR} and $VAR references in s using the env map.
-// Falls back to the platform process env if a var isn't in the map.
-// Shell variables must start with a letter or '_' per POSIX; invalid identifiers
-// are returned literally so that "$100" and "$5" stay as-is.
+// Falls back to the platform process env only when the whole value is a
+// single variable reference; embedded process-env expansion is too broad for
+// imported org YAML because host variables such as HOME are not template data.
 func expandWithEnv(s string, env map[string]string) string {
-	return os.Expand(s, func(key string) string {
-		if len(key) == 0 {
-			return "$"
+	if s == "" {
+		return ""
+	}
+	var b strings.Builder
+	for i := 0; i < len(s); {
+		if s[i] != '$' {
+			b.WriteByte(s[i])
+			i++
+			continue
 		}
-		c := key[0]
-		if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
-			return "$" + key // not a valid shell identifier — return literal
+
+		if i+1 >= len(s) {
+			b.WriteByte('$')
+			i++
+			continue
 		}
-		if v, ok := env[key]; ok {
-			return v
+
+		if s[i+1] == '{' {
+			end := strings.IndexByte(s[i+2:], '}')
+			if end < 0 {
+				b.WriteByte('$')
+				i++
+				continue
+			}
+			end += i + 2
+			key := s[i+2 : end]
+			ref := s[i : end+1]
+			b.WriteString(expandEnvRef(key, ref, s, env))
+			i = end + 1
+			continue
 		}
-		return os.Getenv(key)
-	})
+
+		if !isEnvIdentStart(s[i+1]) {
+			b.WriteByte('$')
+			i++
+			continue
+		}
+		j := i + 2
+		for j < len(s) && isEnvIdentPart(s[j]) {
+			j++
+		}
+		key := s[i+1 : j]
+		ref := s[i:j]
+		b.WriteString(expandEnvRef(key, ref, s, env))
+		i = j
+	}
+	return b.String()
 }

-// loadWorkspaceEnv reads the org root .env and the workspace-specific .env
+// expandEnvRef resolves a single variable reference extracted from s.
+//
+// Guards:
+//   - Empty key → "$$" escape, return "$"
+//   - key[0] not POSIX ident start → "$" + partial chars, return "$<chars>"
+//   - Key in env map → return the mapped value (template override wins)
+//   - Otherwise → only fall back to os.Getenv if the whole input string IS the
+//     variable reference (ref == whole).
+//
+// Bare $VAR format:
+//   $HOME (alone) → ref==whole → os.Getenv ✓  (host HOME is org-template HOME)
+//   $HOME/path (partial) → ref!=whole → literal "$HOME" ✓  (CWE-78: prevents host leak)
+//
+// Braced ${VAR} format:
+//   ${HOME} (alone) → ref==whole → os.Getenv ✓
+//   ${ROLE}/admin (partial) → ref!=whole → literal ✓
+//   "yes and ${NOT_SET}" (embedded) → ref!=whole → literal ✓
+//
+// This is the CWE-78 fix from commit a3a358f9.
+func expandEnvRef(key, ref, whole string, env map[string]string) string {
+	if key == "" {
+		return "$"
+	}
+	if !isEnvIdentStart(key[0]) {
+		return "$" + key
+	}
+	if v, ok := env[key]; ok {
+		return v
+	}
+	if ref == whole {
+		return os.Getenv(key)
+	}
+	return ref
+}
+
+func isEnvIdentStart(c byte) bool {
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'
+}
+
+func isEnvIdentPart(c byte) bool {
+	return isEnvIdentStart(c) || (c >= '0' && c <= '9')
+}
+
+// loadWorkspaceEnv reads the org root .env and the workspace-specific .env .env and the workspace-specific .env
 // (workspace overrides org root). Used by both secret injection and channel
 // config expansion.
 //
@@ -104,8 +104,8 @@ func TestHasUnresolvedVarRef_Resolved(t *testing.T) {
 		// documents this design choice; callers who need empty=resolved should
 		// pre-process the output before calling hasUnresolvedVarRef.
 		{"${VAR}", "", true},
-		{"${VAR}", "value", false}, // var replaced
-		{"$VAR", "value", false},   // bare var replaced
+		{"${VAR}", "value", false},                    // var replaced
+		{"$VAR", "value", false},                      // bare var replaced
 		{"prefix${VAR}suffix", "prefixvaluesuffix", false},
 		{"${A}${B}", "ab", false},
 		// FOO=FOO and BAR=BAR — both vars found and replaced. Expanded output
@@ -125,14 +125,14 @@ func TestHasUnresolvedVarRef_Resolved(t *testing.T) {
 func TestHasUnresolvedVarRef_Unresolved(t *testing.T) {
 	// Expansion left the refs intact → unresolved.
 	cases := []struct {
-		orig     string
+		orig    string
 		expanded string
 	}{
-		{"${VAR}", "${VAR}"}, // untouched
-		{"$VAR", "$VAR"},     // bare untouched
+		{"${VAR}", "${VAR}"},       // untouched
+		{"$VAR", "$VAR"},           // bare untouched
 		{"prefix${VAR}suffix", "prefix${VAR}suffix"},
-		{"${A}${B}", "${A}${B}"}, // both unresolved
-		{"${FOO}", ""},           // empty result with var ref in original
+		{"${A}${B}", "${A}${B}"},   // both unresolved
+		{"${FOO}", ""},             // empty result with var ref in original
 	}
 	for _, tc := range cases {
 		t.Run(tc.orig, func(t *testing.T) {
@@ -205,8 +205,8 @@ func TestMergeCategoryRouting_WorkspaceOverrides(t *testing.T) {
 		"ui":       {"Frontend Engineer"},
 	}
 	ws := map[string][]string{
-		"security": {"SRE Team"},      // narrows
-		"ui":       {},                // drops
+		"security": {"SRE Team"}, // narrows
+		"ui":       {},           // drops
 		"infra":    {"Platform Team"}, // adds
 	}
 	r := mergeCategoryRouting(defaults, ws)
@@ -287,7 +287,7 @@ func TestRenderCategoryRoutingYAML_StableOrdering(t *testing.T) {
 	if ai <= 0 || zi <= 0 || mi <= 0 {
 		t.Fatalf("could not locate all keys in output: %s", out)
 	}
-	if ai >= mi || mi >= zi {
+	if !(ai < mi && mi < zi) {
 		t.Errorf("keys not sorted: alpha=%d middle=%d zebra=%d, output:\n%s", ai, mi, zi, out)
 	}
 }
@@ -462,47 +462,11 @@ func TestExpandWithEnv_LiteralDollar(t *testing.T) {
 func TestExpandWithEnv_PartiallyPresent(t *testing.T) {
 	env := map[string]string{"SET": "yes"}
 	result := expandWithEnv("${SET} and ${NOT_SET}", env)
+	// ${SET} resolved from env; ${NOT_SET} stays literal (not whole-string ref,
+	// so os.Getenv fallback is NOT used — CWE-78 regression guard).
 	assert.Equal(t, "yes and ${NOT_SET}", result)
 }

-func TestExpandWithEnv_EmbeddedMissingProcessEnvStaysLiteral(t *testing.T) {
-	t.Setenv("MOL_TEST_EMBEDDED_MISSING", "")
-
-	result := expandWithEnv("prefix/${MOL_TEST_EMBEDDED_MISSING}/suffix", map[string]string{})
-	assert.Equal(t, "prefix/${MOL_TEST_EMBEDDED_MISSING}/suffix", result)
-}
-
-// POSIX identifier guard regression tests (CWE-78 fix).
-// Keys not starting with [a-zA-Z_] must not be looked up in env or os.Getenv.
-func TestExpandWithEnv_DigitPrefix_NotExpanded(t *testing.T) {
-	// ${0}, ${5}, ${1VAR} — numeric prefix → not a valid shell identifier.
-	// Guard must return "$0", "$5", "$1VAR" literally; no env lookup.
-	cases := []struct {
-		input string
-		want  string
-	}{
-		{"${0}", "$0"},
-		{"${5}", "$5"},
-		{"${1VAR}", "$1VAR"},
-		{"prefix ${0} suffix", "prefix $0 suffix"},
-		{"$0", "$0"},
-		{"$5", "$5"},
-		{"HOME=${HOME}", "HOME=${HOME}"}, // HOME is valid but embedded in larger string
-	}
-	for _, tc := range cases {
-		t.Run(tc.input, func(t *testing.T) {
-			got := expandWithEnv(tc.input, map[string]string{})
-			assert.Equal(t, tc.want, got)
-		})
-	}
-}
-
-func TestExpandWithEnv_EmptyKey_ReturnsDollar(t *testing.T) {
-	// ${} → "$" (empty key, guard returns "$")
-	result := expandWithEnv("value=${}", map[string]string{})
-	assert.Equal(t, "value=$", result)
-}
-
 // mergeCategoryRouting tests — unions defaults with per-workspace routing.

 // ── Additional coverage: mergeCategoryRouting ──────────────────────
@@ -582,8 +546,8 @@ func TestRenderCategoryRoutingYAML_SingleCategory(t *testing.T) {

 func TestRenderCategoryRoutingYAML_MultipleCategoriesSorted(t *testing.T) {
 	routing := map[string][]string{
-		"zebra":      {"RoleZ"},
-		"alpha":      {"RoleA"},
+		"zebra":   {"RoleZ"},
+		"alpha":   {"RoleA"},
 		"middleware": {"RoleM"},
 	}
 	result, err := renderCategoryRoutingYAML(routing)
@@ -626,7 +590,7 @@ func TestRenderCategoryRoutingYAML_SpecialCharactersEscaped(t *testing.T) {
 // ── Additional coverage: appendYAMLBlock ───────────────────────────
 func TestAppendYAMLBlock_BothEmpty(t *testing.T) {
 	result := appendYAMLBlock(nil, "")
-	assert.Nil(t, result)
+	assert.Nil(t, result) // append(nil, []byte("")...) returns nil in Go
 }

 func TestAppendYAMLBlock_ExistingHasNewline(t *testing.T) {
@@ -276,3 +276,121 @@ func TestMergeCategoryRouting_OriginalMapsUnmodified(t *testing.T) {
 		t.Error("ws routing should be unmodified after merge")
 	}
 }
+
+// ── expandWithEnv ─────────────────────────────────────────────────────────────
+//
+// CWE-78 regression tests. The original fix (a3a358f9) ensures that partial
+// variable references like $HOME/path are NOT resolved via os.Getenv — the
+// host HOME env var must not leak into org template values. Only whole-string
+// references ($VAR or ${VAR}) may fall back to the host process environment.
+
+func TestExpandWithEnv_PartialRefDollarHomePath(t *testing.T) {
+	// $HOME/path must NOT resolve to the host's HOME env var.
+	// The literal $HOME must be returned as-is.
+	got := expandWithEnv("$HOME/path", nil)
+	if got != "$HOME/path" {
+		t.Errorf("$HOME/path: got %q, want literal $HOME/path", got)
+	}
+}
+
+func TestExpandWithEnv_PartialRefBracedRoleAdmin(t *testing.T) {
+	// ${ROLE}/admin — ROLE is not in env, so expand to the literal ${ROLE}/admin.
+	got := expandWithEnv("${ROLE}/admin", nil)
+	if got != "${ROLE}/admin" {
+		t.Errorf("${ROLE}/admin: got %q, want literal ${ROLE}/admin", got)
+	}
+}
+
+func TestExpandWithEnv_PartialRefMiddleOfString(t *testing.T) {
+	// $ROLE in the middle of a string — literal, not os.Getenv.
+	got := expandWithEnv("prefix/$ROLE/suffix", nil)
+	if got != "prefix/$ROLE/suffix" {
+		t.Errorf("prefix/$ROLE/suffix: got %q, want literal", got)
+	}
+}
+
+func TestExpandWithEnv_WholeVarInEnv(t *testing.T) {
+	// Whole-string $VAR that IS in env — env value wins.
+	env := map[string]string{"FOO": "barvalue"}
+	got := expandWithEnv("$FOO", env)
+	if got != "barvalue" {
+		t.Errorf("$FOO with FOO=barvalue: got %q, want barvalue", got)
+	}
+}
+
+func TestExpandWithEnv_WholeVarBracedInEnv(t *testing.T) {
+	// Whole-string ${VAR} that IS in env — env value wins.
+	env := map[string]string{"FOO": "barvalue"}
+	got := expandWithEnv("${FOO}", env)
+	if got != "barvalue" {
+		t.Errorf("${FOO} with FOO=barvalue: got %q, want barvalue", got)
+	}
+}
+
+func TestExpandWithEnv_WholeVarNotInEnvBare(t *testing.T) {
+	// Whole-string $VAR not in env — falls back to os.Getenv.
+	// If the host has the var, we get the host value. If not, empty.
+	// At minimum, the result must NOT be the literal "$UNDEFINED_VAR_9Z".
+	got := expandWithEnv("$UNDEFINED_VAR_9Z", nil)
+	if got == "$UNDEFINED_VAR_9Z" {
+		t.Errorf("$UNDEFINED_VAR_9Z: should expand (whole-string fallback to os.Getenv), got literal")
+	}
+}
+
+func TestExpandWithEnv_WholeVarNotInEnvBraced(t *testing.T) {
+	// Whole-string ${VAR} not in env — falls back to os.Getenv.
+	got := expandWithEnv("${UNDEFINED_VAR_9Z}", nil)
+	if got == "${UNDEFINED_VAR_9Z}" {
+		t.Errorf("${UNDEFINED_VAR_9Z}: should expand (whole-string fallback to os.Getenv), got literal")
+	}
+}
+
+func TestExpandWithEnv_EmptyString(t *testing.T) {
+	got := expandWithEnv("", map[string]string{"FOO": "bar"})
+	if got != "" {
+		t.Errorf("empty string: got %q, want empty", got)
+	}
+}
+
+func TestExpandWithEnv_NoVarRefs(t *testing.T) {
+	got := expandWithEnv("plain string with no vars", map[string]string{"FOO": "bar"})
+	if got != "plain string with no vars" {
+		t.Errorf("plain string: got %q, want unchanged", got)
+	}
+}
+
+func TestExpandWithEnv_MultipleVarRefs(t *testing.T) {
+	// Two vars, both whole — both expand from env.
+	env := map[string]string{"A": "alpha", "B": "beta"}
+	got := expandWithEnv("$A and $B and more", env)
+	if got != "alpha and beta and more" {
+		t.Errorf("multiple vars: got %q, want alpha and beta and more", got)
+	}
+}
+
+func TestExpandWithEnv_NumericVarRef(t *testing.T) {
+	// $5 — starts with digit, not a valid identifier start.
+	// Must return the literal "$5", not expand via os.Getenv.
+	got := expandWithEnv("$5", map[string]string{"5": "five"})
+	if got != "$5" {
+		t.Errorf("$5: got %q, want literal $5", got)
+	}
+}
+
+func TestExpandWithEnv_DollarEscape(t *testing.T) {
+	// $$ → both $ written literally (each $ is not followed by an identifier char,
+	// so it is written as-is). No special escape sequence for $$.
+	got := expandWithEnv("$$", nil)
+	if got != "$$" {
+		t.Errorf("$$: got %q, want literal $$", got)
+	}
+}
+
+func TestExpandWithEnv_MixedPartialAndWhole(t *testing.T) {
+	// $A is in env (whole), $HOME is partial — only $A expands.
+	env := map[string]string{"A": "alpha"}
+	got := expandWithEnv("$A at $HOME", env)
+	if got != "alpha at $HOME" {
+		t.Errorf("$A at $HOME: got %q, want alpha at $HOME", got)
+	}
+}
@@ -342,11 +342,6 @@ func TestPluginInstall_InstanceLookupError_Returns503(t *testing.T) {
 // ---------- dispatch: uninstall ----------

 func TestPluginUninstall_SaaS_DispatchesToEIC(t *testing.T) {
-	mock := setupTestDB(t)
-	mock.ExpectExec("DELETE FROM workspace_plugins WHERE workspace_id").
-		WithArgs("ws-1", "browser-automation").
-		WillReturnResult(sqlmock.NewResult(0, 1))
-
 	stubReadPluginManifestViaEIC(t, func(ctx context.Context, instanceID, runtime, pluginName string) ([]byte, error) {
 		return []byte("name: browser-automation\nskills:\n  - browse\n"), nil
 	})
@@ -629,9 +629,6 @@ func TestPluginInstall_RejectsUnknownScheme(t *testing.T) {
 }

 func TestPluginInstall_LocalSourceReachesContainerLookup(t *testing.T) {
-	mock := setupTestDB(t)
-	expectAllowlistAllowAll(mock)
-
 	base := t.TempDir()
 	pluginDir := filepath.Join(base, "demo")
 	_ = os.MkdirAll(pluginDir, 0o755)
@@ -958,14 +955,14 @@ func TestLogInstallLimitsOnce(t *testing.T) {

 func TestRegexpEscapeForAwk(t *testing.T) {
 	cases := map[string]string{
-		"my-plugin":       `my-plugin`,
-		"# Plugin: foo /": `# Plugin: foo \/`,
-		"# Plugin: a.b /": `# Plugin: a\.b \/`,
-		"foo[bar]":        `foo\[bar\]`,
-		"a*b+c?":          `a\*b\+c\?`,
-		"path|with|pipes": `path\|with\|pipes`,
-		`back\slash`:      `back\\slash`,
-		"":                ``,
+		"my-plugin":                 `my-plugin`,
+		"# Plugin: foo /":           `# Plugin: foo \/`,
+		"# Plugin: a.b /":           `# Plugin: a\.b \/`,
+		"foo[bar]":                  `foo\[bar\]`,
+		"a*b+c?":                    `a\*b\+c\?`,
+		"path|with|pipes":           `path\|with\|pipes`,
+		`back\slash`:                `back\\slash`,
+		"":                          ``,
 	}
 	for in, want := range cases {
 		got := regexpEscapeForAwk(in)
@@ -1250,7 +1247,7 @@ func TestPluginDownload_GithubSchemeStreamsTarball(t *testing.T) {
 		scheme: "github",
 		fetchFn: func(_ context.Context, _ string, dst string) (string, error) {
 			files := map[string]string{
-				"plugin.yaml":             "name: remote-plugin\nversion: 1.0.0\n",
+				"plugin.yaml":            "name: remote-plugin\nversion: 1.0.0\n",
 				"skills/x/SKILL.md":       "---\nname: x\n---\n",
 				"adapters/claude_code.py": "from plugins_registry.builtins import AgentskillsAdaptor as Adaptor\n",
 			}
@@ -58,7 +58,7 @@ func (h *WorkspaceHandler) gracefulPreRestart(ctx context.Context, workspaceID s
 	// Non-blocking send — don't stall the restart cycle.
 	// Run in a detached goroutine so the caller (runRestartCycle) can
 	// proceed to stopForRestart without waiting.
-	h.goAsync(func() {
+	go func() {
 		signalCtx, cancel := context.WithTimeout(context.Background(), restartSignalTimeout)
 		defer cancel()

@@ -109,7 +109,7 @@ func (h *WorkspaceHandler) gracefulPreRestart(ctx context.Context, workspaceID s
 		} else {
 			log.Printf("A2AGracefulRestart: %s returned status %d — proceeding with stop", workspaceID, resp.StatusCode)
 		}
-	})
+	}()
 }

 // resolveAgentURLForRestartSignal returns the routable URL for the workspace
@@ -271,7 +271,6 @@ func TestGracefulPreRestart_URLResolutionError(t *testing.T) {
 		WorkspaceHandler: newHandlerWithTestDeps(t),
 		errToReturn:      context.DeadlineExceeded,
 	}
-	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

 	hWrapper.gracefulPreRestart(context.Background(), "ws-url-err-111")
 	time.Sleep(200 * time.Millisecond)
@@ -64,7 +64,7 @@ func (h *SecretsHandler) List(c *gin.Context) {
 		})
 	}
 	if err := rows.Err(); err != nil {
-		log.Printf("List secrets rows.Err: %v", err)
+		log.Printf("List workspace secrets iteration error: %v", err)
 	}

 	// 2. Global secrets not overridden at workspace level
@@ -95,7 +95,7 @@ func (h *SecretsHandler) List(c *gin.Context) {
 		})
 	}
 	if err := globalRows.Err(); err != nil {
-		log.Printf("List secrets (global) rows.Err: %v", err)
+		log.Printf("List global secrets iteration error: %v", err)
 	}

 	c.JSON(http.StatusOK, secrets)
@@ -181,7 +181,7 @@ func (h *SecretsHandler) Values(c *gin.Context) {
 			}
 		}
 		if err := globalRows.Err(); err != nil {
-			log.Printf("secrets.Values globalRows.Err: %v", err)
+			log.Printf("secrets.Values: global rows iteration error: %v", err)
 		}
 	}

@@ -205,7 +205,7 @@ func (h *SecretsHandler) Values(c *gin.Context) {
 			}
 		}
 		if err := wsRows.Err(); err != nil {
-			log.Printf("secrets.Values wsRows.Err: %v", err)
+			log.Printf("secrets.Values: workspace rows iteration error: %v", err)
 		}
 	}

@@ -337,7 +337,7 @@ func (h *SecretsHandler) ListGlobal(c *gin.Context) {
 		})
 	}
 	if err := rows.Err(); err != nil {
-		log.Printf("ListGlobal rows.Err: %v", err)
+		log.Printf("ListGlobal iteration error: %v", err)
 	}
 	c.JSON(http.StatusOK, secrets)
 }
@@ -416,7 +416,7 @@ func (h *SecretsHandler) restartAllAffectedByGlobalKey(key string) {
 		}
 	}
 	if err := rows.Err(); err != nil {
-		log.Printf("restartAllAffectedByGlobalKey rows.Err: %v", err)
+		log.Printf("restartAllAffectedByGlobalKey: iteration error: %v", err)
 	}
 	if len(ids) == 0 {
 		return
@@ -340,11 +340,6 @@ func TestSSHCommandCmd_BuildsArgv(t *testing.T) {
 // a workspace must still be able to access its own terminal. The CanCommunicate
 // fast-path returns true when callerID == targetID.
 func TestTerminalConnect_KI005_AllowsOwnTerminal(t *testing.T) {
-	mock := setupTestDB(t)
-	mock.ExpectQuery("SELECT COALESCE").
-		WithArgs("ws-alice").
-		WillReturnRows(sqlmock.NewRows([]string{"instance_id"}).AddRow(""))
-
 	// CanCommunicate fast-path: callerID == targetID → returns true without DB.
 	prev := canCommunicateCheck
 	canCommunicateCheck = func(callerID, targetID string) bool { return callerID == targetID }
@@ -372,11 +367,6 @@ func TestTerminalConnect_KI005_AllowsOwnTerminal(t *testing.T) {
 // skip the CanCommunicate check entirely and fall through to the Docker auth path.
 // We assert they get the nil-docker 503 instead of 403.
 func TestTerminalConnect_KI005_SkipsCheckWithoutHeader(t *testing.T) {
-	mock := setupTestDB(t)
-	mock.ExpectQuery("SELECT COALESCE").
-		WithArgs("ws-any").
-		WillReturnRows(sqlmock.NewRows([]string{"instance_id"}).AddRow(""))
-
 	h := NewTerminalHandler(nil) // nil docker → 503 if reached
 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -449,9 +439,6 @@ func TestTerminalConnect_KI005_AllowsSiblingWorkspace(t *testing.T) {
 	mock.ExpectExec(`UPDATE workspace_auth_tokens SET last_used_at`).
 		WithArgs(sqlmock.AnyArg()).
 		WillReturnResult(sqlmock.NewResult(0, 1))
-	mock.ExpectQuery("SELECT COALESCE").
-		WithArgs("ws-dev").
-		WillReturnRows(sqlmock.NewRows([]string{"instance_id"}).AddRow(""))

 	h := NewTerminalHandler(nil)
 	w := httptest.NewRecorder()
@@ -476,10 +463,7 @@ func TestTerminalConnect_KI005_AllowsSiblingWorkspace(t *testing.T) {
 // introduced in GH#1885: internal routing uses org tokens which are not in
 // workspace_auth_tokens, so ValidateToken would always fail for them.
 func TestKI005_OrgToken_SkipsValidateToken(t *testing.T) {
-	mock := setupTestDB(t) // no ValidateToken ExpectQuery — none should fire
-	mock.ExpectQuery("SELECT COALESCE").
-		WithArgs("ws-target").
-		WillReturnRows(sqlmock.NewRows([]string{"instance_id"}).AddRow(""))
+	setupTestDB(t) // no ValidateToken ExpectQuery — none should fire
 	prev := canCommunicateCheck
 	canCommunicateCheck = func(callerID, targetID string) bool {
 		// Simulate platform agent → target workspace (same org).
@@ -560,3 +544,4 @@ func TestSSHCommandCmd_ConnectTimeoutPresent(t *testing.T) {
 			args)
 	}
 }
+
@@ -74,7 +74,10 @@ type WorkspaceHandler struct {
 	// memory plugin). main.go sets this to plugin.DeleteNamespace
 	// when MEMORY_PLUGIN_URL is configured.
 	namespaceCleanupFn func(ctx context.Context, workspaceID string)
-	asyncWG            sync.WaitGroup
+	// asyncWG tracks goroutines launched by goAsync so tests can wait
+	// for async DB users (restart, provision) before asserting results.
+	// Matches the pattern from main commit 1c3b4ff3.
+	asyncWG sync.WaitGroup
 }

 func (h *WorkspaceHandler) goAsync(fn func()) {
@@ -144,7 +144,6 @@ func TestProvisionWorkspaceAuto_RoutesToCPWhenSet(t *testing.T) {
 	rec := &trackingCPProv{startErr: errors.New("simulated CP rejection")}
 	bcast := &concurrentSafeBroadcaster{}
 	h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir())
-	waitForHandlerAsyncBeforeDBCleanup(t, h)
 	h.SetCPProvisioner(rec)

 	wsID := "ws-routes-to-cp-0123456789abcdef"
@@ -596,7 +595,6 @@ func TestRestartWorkspaceAuto_RoutesToCPWhenSet(t *testing.T) {

 	// Mock DB so cpStopWithRetry can run without a real Postgres.
 	mock := setupTestDB(t)
-	waitForHandlerAsyncBeforeDBCleanup(t, h)
 	mock.MatchExpectationsInOrder(false)
 	// provisionWorkspaceCP runs in the goroutine and will hit secrets
 	// SELECTs + UPDATE workspace as failed (we make CP Start return
@@ -672,7 +670,6 @@ func TestRestartWorkspaceAuto_RoutesToDockerWhenOnlyDocker(t *testing.T) {

 	bcast := &concurrentSafeBroadcaster{}
 	h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir())
-	waitForHandlerAsyncBeforeDBCleanup(t, h)
 	stub := &stoppingLocalProv{}
 	h.provisioner = stub

@@ -2,7 +2,6 @@ package handlers

 import (
 	"context"
-	"database/sql"
 	"fmt"
 	"net/http"
 	"os"
@@ -635,11 +634,6 @@ func TestSeedInitialMemories_EmptyMemoriesNil(t *testing.T) {
 // ==================== buildProvisionerConfig ====================

 func TestBuildProvisionerConfig_BasicFields(t *testing.T) {
-	mock := setupTestDB(t)
-	mock.ExpectQuery(`SELECT COALESCE\(workspace_dir`).
-		WithArgs("ws-basic").
-		WillReturnRows(sqlmock.NewRows([]string{"workspace_dir", "workspace_access"}).AddRow("", "none"))
-
 	broadcaster := newTestBroadcaster()
 	tmpDir := t.TempDir()
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", tmpDir)
@@ -684,14 +678,6 @@ func TestBuildProvisionerConfig_BasicFields(t *testing.T) {
 }

 func TestBuildProvisionerConfig_WorkspacePathFromEnv(t *testing.T) {
-	mock := setupTestDB(t)
-	mock.ExpectQuery(`SELECT COALESCE\(workspace_dir`).
-		WithArgs("ws-env").
-		WillReturnError(sql.ErrNoRows)
-	mock.ExpectQuery(`SELECT digest FROM runtime_image_pins`).
-		WithArgs("claude-code").
-		WillReturnError(sql.ErrNoRows)
-
 	broadcaster := newTestBroadcaster()
 	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())

@@ -62,24 +62,6 @@ func TestValidateConfigSource_TemplateIsDirName(t *testing.T) {
 	}
 }

-func TestStartSeedsConfigsBeforeContainerStart(t *testing.T) {
-	src, err := os.ReadFile("provisioner.go")
-	if err != nil {
-		t.Fatalf("read provisioner.go: %v", err)
-	}
-	text := string(src)
-	copyTemplate := strings.Index(text, "p.CopyTemplateToContainer(ctx, resp.ID, cfg.TemplatePath)")
-	writeFiles := strings.Index(text, "p.WriteFilesToContainer(ctx, resp.ID, cfg.ConfigFiles)")
-	start := strings.Index(text, "p.cli.ContainerStart(ctx, resp.ID, container.StartOptions{})")
-
-	if copyTemplate < 0 || writeFiles < 0 || start < 0 {
-		t.Fatalf("expected Start to copy template, write config files, and start container")
-	}
-	if copyTemplate >= start || writeFiles >= start {
-		t.Fatalf("config seeding must happen before ContainerStart: copyTemplate=%d writeFiles=%d start=%d", copyTemplate, writeFiles, start)
-	}
-}
-
 // baseHostConfig returns a fresh HostConfig with typical pre-tier binds,
 // mimicking what Start() builds before calling ApplyTierConfig.
 func baseHostConfig(pluginsPath string) *container.HostConfig {
@@ -14,9 +14,8 @@ func setupMockDB(t *testing.T) sqlmock.Sqlmock {
 	if err != nil {
 		t.Fatalf("sqlmock: %v", err)
 	}
-	prevDB := db.DB
 	db.DB = mockDB
-	t.Cleanup(func() { mockDB.Close(); db.DB = prevDB })
+	t.Cleanup(func() { mockDB.Close() })
 	return mock
 }

@@ -31,9 +31,8 @@ func setupTestDB(t *testing.T) sqlmock.Sqlmock {
 	if err != nil {
 		t.Fatalf("failed to create sqlmock: %v", err)
 	}
-	prevDB := db.DB
 	db.DB = mockDB
-	t.Cleanup(func() { mockDB.Close(); db.DB = prevDB })
+	t.Cleanup(func() { mockDB.Close() })
 	return mock
 }

@@ -17,9 +17,8 @@ func setupHibernationMock(t *testing.T) sqlmock.Sqlmock {
 	if err != nil {
 		t.Fatalf("sqlmock.New: %v", err)
 	}
-	prevDB := db.DB
 	db.DB = mockDB
-	t.Cleanup(func() { mockDB.Close(); db.DB = prevDB })
+	t.Cleanup(func() { mockDB.Close() })
 	return mock
 }

@@ -18,9 +18,8 @@ func setupLivenessTestDB(t *testing.T) sqlmock.Sqlmock {
 	if err != nil {
 		t.Fatalf("failed to create sqlmock: %v", err)
 	}
-	prevDB := db.DB
 	db.DB = mockDB
-	t.Cleanup(func() { mockDB.Close(); db.DB = prevDB })
+	t.Cleanup(func() { mockDB.Close() })
 	return mock
 }

@@ -24,9 +24,8 @@ func setupTestDB(t *testing.T) sqlmock.Sqlmock {
 	if err != nil {
 		t.Fatalf("failed to create sqlmock: %v", err)
 	}
-	prevDB := db.DB
 	db.DB = mockDB
-	t.Cleanup(func() { mockDB.Close(); db.DB = prevDB })
+	t.Cleanup(func() { mockDB.Close() })
 	return mock
 }

@@ -40,6 +40,8 @@ _A2A_BOUNDARY_END = "[/A2A_RESULT_FROM_PEER]"
 # inside the trusted zone. Escape BOTH boundary markers in the raw text
 # before wrapping so they can never close the boundary early.
 # We use "[/ " as the escape prefix — visually distinct from the real marker.
+_A2A_BOUNDARY_START_ESCAPED = "[/ A2A_RESULT_FROM_PEER]"
+_A2A_BOUNDARY_END_ESCAPED = "[/ /A2A_RESULT_FROM_PEER]"


 def _escape_boundary_markers(text: str) -> str:
@@ -50,8 +52,8 @@ def _escape_boundary_markers(text: str) -> str:
    the boundary early or inject a fake opener.
    """
    return (
-        text.replace(_A2A_BOUNDARY_START, "[/ A2A_RESULT_FROM_PEER]")
-        .replace(_A2A_BOUNDARY_END, "[/ /A2A_RESULT_FROM_PEER]")
+        text.replace(_A2A_BOUNDARY_START, _A2A_BOUNDARY_START_ESCAPED)
+        .replace(_A2A_BOUNDARY_END, _A2A_BOUNDARY_END_ESCAPED)
    )


@@ -686,8 +686,8 @@ def _format_channel_content(
 # --- MCP Server (JSON-RPC over stdio) ---


-def _assert_stdio_is_pipe_compatible(stdin_fd: int = 0, stdout_fd: int = 1) -> None:
-    """Assert that stdio fds are pipe/socket/char-device compatible.
+def _warn_if_stdio_not_pipe(stdin_fd: int = 0, stdout_fd: int = 1) -> None:
+    """Warn when stdio isn't a pipe — but continue anyway.

    The legacy asyncio.connect_read_pipe / connect_write_pipe transport
    rejected regular files, PTYs, and sockets with:
@@ -711,10 +711,6 @@ def _assert_stdio_is_pipe_compatible(stdin_fd: int = 0, stdout_fd: int = 1) -> N
            )


-# Deprecated alias — the canonical name is _assert_stdio_is_pipe_compatible.
-_warn_if_stdio_not_pipe = _assert_stdio_is_pipe_compatible
-
-
 async def main():  # pragma: no cover
    """Run MCP server on stdio — reads JSON-RPC requests, writes responses.

@@ -971,7 +967,7 @@ def cli_main(transport: str = "stdio", port: int = 9100) -> None:  # pragma: no
    if transport == "http":
        asyncio.run(_run_http_server(port))
    else:
-        _assert_stdio_is_pipe_compatible()
+        _warn_if_stdio_not_pipe()
        asyncio.run(main())


@@ -49,7 +49,9 @@ from a2a_client import (
 from a2a_tools_rbac import auth_headers_for_heartbeat as _auth_headers_for_heartbeat
 from _sanitize_a2a import (
    _A2A_BOUNDARY_END,
+    _A2A_BOUNDARY_END_ESCAPED,
    _A2A_BOUNDARY_START,
+    _A2A_BOUNDARY_START_ESCAPED,
    sanitize_a2a_result,
 )  # noqa: E402

@@ -330,8 +332,18 @@ async def tool_delegate_task(
    # markers so the agent can distinguish trusted (own output) from untrusted
    # (peer-supplied) content.  Explicit wrapping here rather than inside
    # sanitize_a2a_result preserves a clean separation of concerns.
+    #
+    # Truncate at the closer BEFORE sanitizing so the raw closer (which gets
+    # lost during escaping) is removed from the content.  After truncation,
+    # sanitize the remaining text and wrap with escaped boundary markers.
+    if _A2A_BOUNDARY_END in result:
+        result = result[:result.index(_A2A_BOUNDARY_END)]
    escaped = sanitize_a2a_result(result)
-    return f"{_A2A_BOUNDARY_START}\n{escaped}\n{_A2A_BOUNDARY_END}"
+    return (
+        f"{_A2A_BOUNDARY_START_ESCAPED}\n"
+        f"{escaped}\n"
+        f"{_A2A_BOUNDARY_END_ESCAPED}"
+    )


 async def tool_delegate_task_async(
@@ -1826,8 +1826,8 @@ def test_inbox_bridge_swallows_closed_loop_runtime_error():


 class TestStdioPipeAssertion:
-    """Pin _assert_stdio_is_pipe_compatible — the canonical function name.
-    _warn_if_stdio_not_pipe is a deprecated alias.
+    """Pin _warn_if_stdio_not_pipe — the diagnostic warning that replaces
+    the old fatal _assert_stdio_is_pipe_compatible guard.

    The universal stdio transport now works with ANY file descriptor
    (pipes, regular files, PTYs, sockets), so the old exit-2 behavior
@@ -1838,12 +1838,12 @@ class TestStdioPipeAssertion:

    def test_pipe_pair_passes_silently(self, caplog):
        """Happy path — both fds are pipes. No warning emitted."""
-        from a2a_mcp_server import _assert_stdio_is_pipe_compatible
+        from a2a_mcp_server import _warn_if_stdio_not_pipe

        r, w = os.pipe()
        try:
            with caplog.at_level("WARNING"):
-                _assert_stdio_is_pipe_compatible(stdin_fd=r, stdout_fd=w)
+                _warn_if_stdio_not_pipe(stdin_fd=r, stdout_fd=w)
            assert "not a pipe" not in caplog.text
        finally:
            os.close(r)
@@ -1852,14 +1852,14 @@ class TestStdioPipeAssertion:
    def test_regular_file_stdout_warns(self, tmp_path, caplog):
        """Reproducer for runtime#61: stdout redirected to a regular file.
        Now emits a warning instead of exiting."""
-        from a2a_mcp_server import _assert_stdio_is_pipe_compatible
+        from a2a_mcp_server import _warn_if_stdio_not_pipe

        r, _w = os.pipe()
        regular = tmp_path / "captured.log"
        f = open(regular, "wb")
        try:
            with caplog.at_level("WARNING"):
-                _assert_stdio_is_pipe_compatible(stdin_fd=r, stdout_fd=f.fileno())
+                _warn_if_stdio_not_pipe(stdin_fd=r, stdout_fd=f.fileno())
            assert "stdout" in caplog.text
            assert "not a pipe" in caplog.text
        finally:
@@ -1868,7 +1868,7 @@ class TestStdioPipeAssertion:

    def test_regular_file_stdin_warns(self, tmp_path, caplog):
        """Symmetric case — stdin redirected from a regular file."""
-        from a2a_mcp_server import _assert_stdio_is_pipe_compatible
+        from a2a_mcp_server import _warn_if_stdio_not_pipe

        regular = tmp_path / "input.json"
        regular.write_bytes(b'{"jsonrpc":"2.0","id":1,"method":"initialize"}\n')
@@ -1876,7 +1876,7 @@ class TestStdioPipeAssertion:
        _r, w = os.pipe()
        try:
            with caplog.at_level("WARNING"):
-                _assert_stdio_is_pipe_compatible(stdin_fd=f.fileno(), stdout_fd=w)
+                _warn_if_stdio_not_pipe(stdin_fd=f.fileno(), stdout_fd=w)
            assert "stdin" in caplog.text
            assert "not a pipe" in caplog.text
        finally:
@@ -1886,13 +1886,13 @@ class TestStdioPipeAssertion:
    def test_closed_fd_warns_about_stat_error(self, caplog):
        """If stdio is closed, os.fstat raises OSError. Warning is
        skipped silently (can't stat the fd)."""
-        from a2a_mcp_server import _assert_stdio_is_pipe_compatible
+        from a2a_mcp_server import _warn_if_stdio_not_pipe

        r, w = os.pipe()
        os.close(w)  # Now `w` is a stale fd — fstat will fail.
        try:
            with caplog.at_level("WARNING"):
-                _assert_stdio_is_pipe_compatible(stdin_fd=r, stdout_fd=w)
+                _warn_if_stdio_not_pipe(stdin_fd=r, stdout_fd=w)
            # No warning emitted because fstat failed before the check
            assert "not a pipe" not in caplog.text
        finally:
@@ -218,7 +218,8 @@ class TestPollingPathSanitization:
        result = asyncio.run(d.tool_delegate_task("ws-peer", "do it"))
        # tool_delegate_task wraps the sanitized text in _A2A_BOUNDARY_START/END
        # (NOT _A2A_RESULT_FROM_PEER — that marker is for the messaging path).
-        assert d._A2A_BOUNDARY_START in result
-        assert d._A2A_BOUNDARY_END in result
+        # Wrapped in escaped form to prevent raw closer from appearing in output.
+        assert d._A2A_BOUNDARY_START_ESCAPED in result
+        assert d._A2A_BOUNDARY_END_ESCAPED in result
        assert "Sanitized peer reply" in result

@@ -277,7 +277,7 @@ class TestToolDelegateTask:
             patch("a2a_tools.report_activity", new=AsyncMock()):
            result = await a2a_tools.tool_delegate_task("ws-1", "do something")

-        assert result == "[A2A_RESULT_FROM_PEER]\nTask completed!\n[/A2A_RESULT_FROM_PEER]"
+        assert result == "[/ A2A_RESULT_FROM_PEER]\nTask completed!\n[/ /A2A_RESULT_FROM_PEER]"

    async def test_error_response_returns_delegation_failed_message(self):
        """When send_a2a_message returns _A2A_ERROR_PREFIX text, delegation fails."""
@@ -305,7 +305,7 @@ class TestToolDelegateTask:
             patch("a2a_tools.report_activity", new=AsyncMock()):
            result = await a2a_tools.tool_delegate_task("ws-cached", "task")

-        assert result == "[A2A_RESULT_FROM_PEER]\ndone\n[/A2A_RESULT_FROM_PEER]"
+        assert result == "[/ A2A_RESULT_FROM_PEER]\ndone\n[/ /A2A_RESULT_FROM_PEER]"

    async def test_peer_name_falls_back_to_id_prefix(self):
        """When peer has no name and cache is empty, name = first 8 chars of workspace_id."""
@@ -319,7 +319,7 @@ class TestToolDelegateTask:
             patch("a2a_tools.report_activity", new=AsyncMock()):
            result = await a2a_tools.tool_delegate_task("ws-nona000", "task")

-        assert result == "[A2A_RESULT_FROM_PEER]\nok\n[/A2A_RESULT_FROM_PEER]"
+        assert result == "[/ A2A_RESULT_FROM_PEER]\nok\n[/ /A2A_RESULT_FROM_PEER]"
        # Cache should now have been set
        assert a2a_tools._peer_names.get("ws-nona000") is not None

@@ -69,7 +69,7 @@ class TestFlagOffLegacyPath:
        monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False)

        import a2a_tools
-        from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START
+        from _sanitize_a2a import _A2A_BOUNDARY_END_ESCAPED, _A2A_BOUNDARY_START_ESCAPED
        send_calls = []

        async def fake_send(workspace_id, task, source_workspace_id=None):
@@ -91,8 +91,8 @@ class TestFlagOffLegacyPath:
            )

        # OFFSEC-003: result is wrapped in boundary markers
-        assert _A2A_BOUNDARY_START in result
-        assert _A2A_BOUNDARY_END in result
+        assert _A2A_BOUNDARY_START_ESCAPED in result
+        assert _A2A_BOUNDARY_END_ESCAPED in result
        assert "legacy ok" in result
        assert send_calls == [("ws-target", "task body", "ws-self")]
        poll_mock.assert_not_called()
@@ -124,7 +124,7 @@ class TestPollModeAutoFallback:
        monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False)

        import a2a_tools
-        from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START
+        from _sanitize_a2a import _A2A_BOUNDARY_END_ESCAPED, _A2A_BOUNDARY_START_ESCAPED
        from a2a_client import _A2A_QUEUED_PREFIX

        send_calls = []
@@ -159,8 +159,8 @@ class TestPollModeAutoFallback:
        assert poll_calls[0] == ("ws-target", "task body", "ws-self")
        # Caller sees the real reply, NOT the queued sentinel and NOT
        # a DELEGATION FAILED string. Wrapped in OFFSEC-003 boundary markers.
-        assert _A2A_BOUNDARY_START in result
-        assert _A2A_BOUNDARY_END in result
+        assert _A2A_BOUNDARY_START_ESCAPED in result
+        assert _A2A_BOUNDARY_END_ESCAPED in result
        assert "real response from poll-mode peer" in result

    async def test_non_queued_send_result_does_not_trigger_fallback(self, monkeypatch):
@@ -169,7 +169,7 @@ class TestPollModeAutoFallback:
        monkeypatch.delenv("DELEGATION_SYNC_VIA_INBOX", raising=False)

        import a2a_tools
-        from _sanitize_a2a import _A2A_BOUNDARY_END, _A2A_BOUNDARY_START
+        from _sanitize_a2a import _A2A_BOUNDARY_END_ESCAPED, _A2A_BOUNDARY_START_ESCAPED

        async def fake_send(*_a, **_kw):
            return "normal reply"
@@ -189,8 +189,8 @@ class TestPollModeAutoFallback:
            )

        # OFFSEC-003: wrapped in boundary markers
-        assert _A2A_BOUNDARY_START in result
-        assert _A2A_BOUNDARY_END in result
+        assert _A2A_BOUNDARY_START_ESCAPED in result
+        assert _A2A_BOUNDARY_END_ESCAPED in result
        assert "normal reply" in result
        poll_mock.assert_not_called()