From c159d85eb544f6ac0fa7dec035bccae896a2a3c5 Mon Sep 17 00:00:00 2001
From: Hongming Wang <hongmingwangrabbit@gmail.com>
Date: Fri, 24 Apr 2026 23:47:44 -0700
Subject: [PATCH] =?UTF-8?q?fix(a2a):=20review-driven=20hardening=20?=
 =?UTF-8?q?=E2=80=94=20prefix-anchored=20type=20check,=20error=5Fdetail=20?=
 =?UTF-8?q?cap,=20shared=20hint=20module?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three required fixes from the bundle review of 391e1872:

1. workspace/a2a_client.py: substring `type_name in msg` could miss
   the diagnostic prefix when an exception's message embedded a
   different class name mid-string (e.g. `OSError("see ConnectionError
   below")` → printed as plain msg, type lost). Switched to a
   prefix-anchored check (`msg.startswith(f"{type_name}:")` etc.) so
   the type label is always added when not already at the start of
   the message.

2. workspace/a2a_tools.py: `activity_logs.error_detail` is unbounded
   TEXT on the platform (handlers/activity.go does not validate
   length). A buggy or hostile peer could stream arbitrarily large
   error messages into the caller's activity log. Cap at 4096 chars
   at the producer — comfortably above any real exception traceback,
   well below an obvious-DoS threshold.

3. New regression test for JSON-RPC `code=0` — pins the
   `code is not None` semantics so the code is preserved in the
   detail rather than collapsing into the no-code path. Code=0 is
   not valid per the spec, but a malformed peer can still emit it
   and we want it visible for diagnosis.

Plus one optional taken: extracted the A2A-error → hint mapping into
canvas/src/components/tabs/chat/a2aErrorHint.ts. The two prior copies
(AgentCommsPanel.inferCauseHint + ActivityTab.inferA2AErrorHint) had
already drifted — Activity tab gained `not found`/`offline` cases the
chat panel never picked up, AgentCommsPanel handled empty-input
explicitly while Activity didn't. The shared module is the merged
superset, with 10 unit tests pinning each named pattern + the
"most specific first" ordering (Claude SDK wedge wins over generic
timeout).

Skipped (per analysis):
- Unicode-naive 120-char slice — Python str[:N] slices on code
  points, not bytes. Safe.
- Nested [A2A_ERROR] confusion — non-issue per reviewer; outer
  prefix winning still produces a structured render.
- MessagePreview + JsonBlock dual render on errors — intentional
  drilldown; raw JSON is below the fold for operators who need it.
- console.warn dedup — refetches don't happen per-event so spam
  risk is low.
- str(data)[:200] materialization — A2A response bodies aren't
  typically MB-sized.

Verified: 1005 canvas tests pass (10 new hint tests); 10 Python
send_a2a_message tests pass (1 new for code=0); tsc clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 canvas/src/components/tabs/ActivityTab.tsx    | 24 +------
 .../components/tabs/chat/AgentCommsPanel.tsx  | 28 ++------
 .../tabs/chat/__tests__/a2aErrorHint.test.ts  | 67 +++++++++++++++++++
 .../src/components/tabs/chat/a2aErrorHint.ts  | 54 +++++++++++++++
 workspace/a2a_client.py                       |  7 +-
 workspace/a2a_tools.py                        |  9 ++-
 workspace/tests/test_a2a_client.py            | 18 +++++
 7 files changed, 158 insertions(+), 49 deletions(-)
 create mode 100644 canvas/src/components/tabs/chat/__tests__/a2aErrorHint.test.ts
 create mode 100644 canvas/src/components/tabs/chat/a2aErrorHint.ts

diff --git a/canvas/src/components/tabs/ActivityTab.tsx b/canvas/src/components/tabs/ActivityTab.tsx
index 3a3d47fa..d0e31630 100644
--- a/canvas/src/components/tabs/ActivityTab.tsx
+++ b/canvas/src/components/tabs/ActivityTab.tsx
@@ -5,6 +5,7 @@ import { api } from "@/lib/api";
 import { ConversationTraceModal } from "@/components/ConversationTraceModal";
 import { type ActivityEntry } from "@/types/activity";
 import { useWorkspaceName } from "@/hooks/useWorkspaceName";
+import { inferA2AErrorHint } from "./chat/a2aErrorHint";
 
 interface Props {
   workspaceId: string;
@@ -288,29 +289,6 @@ function ActivityRow({
 
 const A2A_ERROR_PREFIX = "[A2A_ERROR]";
 
-/** Best-effort cause hint for an A2A delivery failure. Mirrors the
- *  patterns used by AgentCommsPanel's inferCauseHint so the same
- *  symptom reads the same way in both surfaces. */
-function inferA2AErrorHint(detail: string): string {
-  const t = detail.toLowerCase();
-  if (t.includes("control request timeout")) {
-    return "The remote agent's Claude Code SDK is wedged on initialization. Restart the workspace to clear it.";
-  }
-  if (t.includes("readtimeout") || t.includes("connecttimeout") || t.includes("deadline exceeded") || t.includes("timeout")) {
-    return "The remote agent didn't respond within the proxy timeout. It may be busy with a long task or its runtime is stuck — restart if this repeats.";
-  }
-  if (t.includes("connectionreset") || t.includes("remoteprotocolerror") || t.includes("connection reset") || t.includes("no message")) {
-    return "The connection to the remote agent dropped before a reply arrived. Usually a transient network blip; retry once. If it repeats, the remote container may have crashed mid-request — check its logs.";
-  }
-  if (t.includes("agent error") || t.includes("exception")) {
-    return "The remote agent's runtime threw an exception. Check the workspace's container logs for the traceback. Restart usually clears transient runtime crashes.";
-  }
-  if (t.includes("not found") || t.includes("not accessible") || t.includes("offline")) {
-    return "The remote workspace can't be reached — it may be stopped, removed, or outside the access control list. Verify the peer is online before retrying.";
-  }
-  return "Check the remote agent's container logs for context. Restart the workspace if the failure repeats.";
-}
-
 /** Render a [A2A_ERROR]-prefixed response as a structured error block
  *  with a stripped detail line + a cause hint. The previous raw render
  *  ("[A2A_ERROR] " literal in the response area) gave the user no
diff --git a/canvas/src/components/tabs/chat/AgentCommsPanel.tsx b/canvas/src/components/tabs/chat/AgentCommsPanel.tsx
index 15af5b03..909616bf 100644
--- a/canvas/src/components/tabs/chat/AgentCommsPanel.tsx
+++ b/canvas/src/components/tabs/chat/AgentCommsPanel.tsx
@@ -9,6 +9,7 @@ import { WS_URL } from "@/store/socket";
 import { closeWebSocketGracefully } from "@/lib/ws-close";
 import { showToast } from "../../Toaster";
 import { extractResponseText, extractRequestText } from "./message-parser";
+import { inferA2AErrorHint } from "./a2aErrorHint";
 
 export interface ActivityEntry {
   id: string;
@@ -115,29 +116,8 @@ function unwrapErrorText(raw: string | null): string {
   return trimmed;
 }
 
-/** Best-effort cause hint based on what we can see in the error text.
- *  These map known runtime symptoms to operator-actionable language so
- *  the user isn't left staring at "[A2A_ERROR]" with no next step. */
-function inferCauseHint(errorText: string): string {
-  const t = errorText.toLowerCase();
-  // "control request timeout" is the specific Claude Code SDK init
-  // wedge symptom. Don't pattern on bare "initialize" — too broad
-  // (a user task containing "failed to initialize database" would
-  // false-positive into the SDK-wedge hint).
-  if (t.includes("control request timeout")) {
-    return "The remote agent's Claude Code SDK is wedged on initialization (often after a long idle period or OAuth refresh). A workspace restart usually clears it.";
-  }
-  if (t.includes("deadline exceeded") || t.includes("timeout")) {
-    return "The remote agent didn't respond within the proxy timeout. It may be busy with a long-running task, or the runtime is stuck. Restart the workspace if this repeats.";
-  }
-  if (t.includes("agent error") || t.includes("exception")) {
-    return "The remote agent's runtime threw an exception. Check the workspace's container logs for the traceback. Restart usually clears transient runtime crashes.";
-  }
-  if (errorText === "") {
-    return "The remote agent returned no error detail (the underlying httpx exception had an empty message — typically a connection-reset or silent timeout). A workspace restart is the safe first move.";
-  }
-  return "The remote agent reported a delivery failure. Check the workspace logs or try restarting.";
-}
+// inferA2AErrorHint moved to ./a2aErrorHint so the Activity tab and
+// this panel render identical hints for the same symptom.
 
 export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
   const [messages, setMessages] = useState<CommMessage[]>([]);
@@ -314,7 +294,7 @@ function ErrorMessage({ msg }: { msg: CommMessage }) {
   const selectNode = useCanvasStore((s) => s.selectNode);
   const [restarting, setRestarting] = useState(false);
   const errorText = unwrapErrorText(msg.responseText);
-  const hint = inferCauseHint(errorText);
+  const hint = inferA2AErrorHint(errorText);
 
   // Guard against acting on a peer whose workspace has been deleted
   // since this row was logged. Without the guard, restart 404s
diff --git a/canvas/src/components/tabs/chat/__tests__/a2aErrorHint.test.ts b/canvas/src/components/tabs/chat/__tests__/a2aErrorHint.test.ts
new file mode 100644
index 00000000..8518829b
--- /dev/null
+++ b/canvas/src/components/tabs/chat/__tests__/a2aErrorHint.test.ts
@@ -0,0 +1,67 @@
+import { describe, it, expect } from "vitest";
+import { inferA2AErrorHint } from "../a2aErrorHint";
+
+// Pure logic. Pin every named pattern so a future contributor adding a
+// new symptom doesn't accidentally collapse the buckets — and so the
+// "most specific first" ordering can't drift without a test failing.
+
+describe("inferA2AErrorHint", () => {
+  it("matches the Claude Code SDK init wedge specifically", () => {
+    const hint = inferA2AErrorHint("Control request timeout: initialize");
+    expect(hint).toMatch(/Claude Code SDK is wedged/);
+  });
+
+  it("does NOT misfire on user tasks containing 'initialize' generally", () => {
+    // Regression: an earlier bare-`initialize` pattern would have
+    // false-positived "failed to initialize database" into the SDK
+    // wedge hint. Confirm the full-phrase guard holds.
+    const hint = inferA2AErrorHint("failed to initialize database connection");
+    expect(hint).not.toMatch(/Claude Code SDK/);
+  });
+
+  it("recognises httpx ReadTimeout / ConnectTimeout class names", () => {
+    expect(inferA2AErrorHint("ReadTimeout: timeout")).toMatch(/proxy timeout/);
+    expect(inferA2AErrorHint("ConnectTimeout: ...")).toMatch(/proxy timeout/);
+  });
+
+  it("recognises generic timeout / deadline-exceeded language", () => {
+    expect(inferA2AErrorHint("deadline exceeded after 300s")).toMatch(/proxy timeout/);
+    expect(inferA2AErrorHint("Operation timeout")).toMatch(/proxy timeout/);
+  });
+
+  it("handles connection-reset family (RemoteProtocolError, ConnectionReset, no-message)", () => {
+    expect(inferA2AErrorHint("RemoteProtocolError: ...")).toMatch(/connection.*dropped/);
+    expect(inferA2AErrorHint("ConnectionResetError")).toMatch(/connection.*dropped/);
+    expect(inferA2AErrorHint("connection reset by peer")).toMatch(/connection.*dropped/);
+    expect(inferA2AErrorHint("RemoteProtocolError (no message — likely connection reset)")).toMatch(/connection.*dropped/);
+  });
+
+  it("recognises agent-runtime exceptions", () => {
+    expect(inferA2AErrorHint("Agent error: ValueError raised")).toMatch(/runtime threw an exception/);
+    expect(inferA2AErrorHint("RuntimeException in tool call")).toMatch(/runtime threw an exception/);
+  });
+
+  it("recognises peer-unreachable cases (Activity-tab originals)", () => {
+    expect(inferA2AErrorHint("workspace not found")).toMatch(/can't be reached/);
+    expect(inferA2AErrorHint("not accessible")).toMatch(/can't be reached/);
+    expect(inferA2AErrorHint("workspace is offline")).toMatch(/can't be reached/);
+  });
+
+  it("returns the empty-detail-specific hint when input is exactly empty", () => {
+    expect(inferA2AErrorHint("")).toMatch(/no error detail/);
+  });
+
+  it("returns a generic fallback for unrecognised text", () => {
+    const hint = inferA2AErrorHint("some completely novel error nobody has matched yet");
+    expect(hint).toMatch(/Check the workspace logs|delivery failure/);
+  });
+
+  it("Claude SDK wedge wins over the more general timeout pattern", () => {
+    // Both 'control request timeout' and 'timeout' match the same
+    // input. The SDK wedge hint is more actionable; the ordering in
+    // the function must keep it first. Lock that priority in.
+    const hint = inferA2AErrorHint("Control request timeout: initialize");
+    expect(hint).toMatch(/Claude Code SDK/);
+    expect(hint).not.toMatch(/proxy timeout/);
+  });
+});
diff --git a/canvas/src/components/tabs/chat/a2aErrorHint.ts b/canvas/src/components/tabs/chat/a2aErrorHint.ts
new file mode 100644
index 00000000..e29e643a
--- /dev/null
+++ b/canvas/src/components/tabs/chat/a2aErrorHint.ts
@@ -0,0 +1,54 @@
+/**
+ * Maps an A2A delivery-failure detail string (the bit AFTER stripping
+ * the [A2A_ERROR] sentinel prefix) to a one-line operator-actionable
+ * hint. Pattern matches are lowercase substring checks, ordered most-
+ * specific first so the right hint wins when multiple patterns
+ * overlap (e.g. "control request timeout" wins over generic "timeout").
+ *
+ * Used by both the chat Agent Comms panel and the Activity tab so the
+ * same symptom reads identically across surfaces. Two prior copies
+ * had already drifted (Activity tab gained `not found`/`offline`
+ * cases AgentCommsPanel never picked up) — this module is the merged
+ * superset and the only place hint text should change.
+ */
+export function inferA2AErrorHint(detail: string): string {
+  const t = detail.toLowerCase();
+
+  // "control request timeout" is the specific Claude Code SDK init
+  // wedge symptom. Pattern on the full phrase, not bare "initialize"
+  // — a user task containing "failed to initialize database" would
+  // false-positive into the SDK-wedge hint.
+  if (t.includes("control request timeout")) {
+    return "The remote agent's Claude Code SDK is wedged on initialization (often after a long idle period or OAuth refresh). A workspace restart usually clears it.";
+  }
+  if (
+    t.includes("readtimeout") ||
+    t.includes("connecttimeout") ||
+    t.includes("deadline exceeded") ||
+    t.includes("timeout")
+  ) {
+    return "The remote agent didn't respond within the proxy timeout. It may be busy with a long task, or the runtime is stuck — restart the workspace if this repeats.";
+  }
+  if (
+    t.includes("connectionreset") ||
+    t.includes("remoteprotocolerror") ||
+    t.includes("connection reset") ||
+    t.includes("no message")
+  ) {
+    return "The connection to the remote agent dropped before a reply arrived. Usually a transient network blip — retry once. If it repeats, the remote container may have crashed mid-request; check its logs.";
+  }
+  if (t.includes("agent error") || t.includes("exception")) {
+    return "The remote agent's runtime threw an exception. Check the workspace's container logs for the traceback. Restart usually clears transient runtime crashes.";
+  }
+  if (
+    t.includes("not found") ||
+    t.includes("not accessible") ||
+    t.includes("offline")
+  ) {
+    return "The remote workspace can't be reached — it may be stopped, removed, or outside the access control list. Verify the peer is online before retrying.";
+  }
+  if (detail === "") {
+    return "The remote agent returned no error detail (the underlying httpx exception had an empty message — typically a connection-reset or silent timeout). A workspace restart is the safe first move.";
+  }
+  return "The remote agent reported a delivery failure. Check the workspace logs or try restarting.";
+}
diff --git a/workspace/a2a_client.py b/workspace/a2a_client.py
index 8893e2e7..d740bd6a 100644
--- a/workspace/a2a_client.py
+++ b/workspace/a2a_client.py
@@ -111,7 +111,12 @@ async def send_a2a_message(target_url: str, message: str) -> str:
             type_name = type(e).__name__
             if not msg:
                 detail = f"{type_name} (no message — likely connection reset or silent timeout)"
-            elif type_name in msg:
+            elif msg.startswith(f"{type_name}:") or msg.startswith(f"{type_name} "):
+                # Already prefixed with the type — don't double-prefix.
+                # Prefix-anchored check (not substring) so a message that
+                # happens to mention some OTHER class name mid-string
+                # (e.g. "got OSError on read") doesn't suppress our own
+                # type prefix and lose the diagnostic signal.
                 detail = msg
             else:
                 detail = f"{type_name}: {msg}"
diff --git a/workspace/a2a_tools.py b/workspace/a2a_tools.py
index 4834870b..3e3671bb 100644
--- a/workspace/a2a_tools.py
+++ b/workspace/a2a_tools.py
@@ -188,7 +188,14 @@ async def tool_delegate_task(workspace_id: str, task: str) -> str:
     # Strip the sentinel prefix so error_detail is the human-readable
     # cause directly. The Activity tab's red error chip surfaces this
     # without the user having to scroll into the raw response JSON.
-    error_detail = result[len(_A2A_ERROR_PREFIX):].strip() if is_error else ""
+    #
+    # Cap at 4096 chars before sending — the platform's
+    # activity_logs.error_detail column is unbounded TEXT and a
+    # malicious or buggy peer could otherwise stream an arbitrarily
+    # large error message into the caller's activity log. 4096 is
+    # comfortably above any real exception traceback we've seen and
+    # well below an obvious-DoS threshold.
+    error_detail = result[len(_A2A_ERROR_PREFIX):].strip()[:4096] if is_error else ""
     await report_activity(
         "a2a_receive", workspace_id,
         f"{peer_name} responded ({len(result)} chars)" if not is_error else f"{peer_name} failed: {error_detail[:120]}",
diff --git a/workspace/tests/test_a2a_client.py b/workspace/tests/test_a2a_client.py
index a9fb522a..fd813f3e 100644
--- a/workspace/tests/test_a2a_client.py
+++ b/workspace/tests/test_a2a_client.py
@@ -207,6 +207,24 @@ class TestSendA2AMessage:
         # Target URL is included so chained delegations are traceable.
         assert "target=http://target/a2a" in result
 
+    async def test_jsonrpc_error_with_code_zero_includes_code_in_detail(self):
+        """JSON-RPC error code=0 is technically not valid in the spec,
+        but a malformed peer can still send it — make sure the code is
+        preserved in the detail rather than collapsing into the
+        no-code path. Locks in the `code is not None` semantics over
+        the truthy-check shortcut."""
+        import a2a_client
+
+        resp = _make_response(200, {"error": {"code": 0, "message": "weird"}})
+        mock_client = _make_mock_client(post_resp=resp)
+
+        with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
+            result = await a2a_client.send_a2a_message("http://target/a2a", "task")
+
+        assert result.startswith(a2a_client._A2A_ERROR_PREFIX)
+        assert "code=0" in result
+        assert "weird" in result
+
     async def test_neither_result_nor_error_returns_a2a_error_with_payload(self):
         """Response with neither 'result' nor 'error' → A2A_ERROR + payload context."""
         import a2a_client