fix(a2a): review-driven hardening — prefix-anchored type check, error_detail cap, shared hint module

Three required fixes from the bundle review of 391e1872: 1. workspace/a2a_client.py: substring `type_name in msg` could miss the diagnostic prefix when an exception's message embedded a different class name mid-string (e.g. `OSError("see ConnectionError below")` → printed as plain msg, type lost). Switched to a prefix-anchored check (`msg.startswith(f"{type_name}:")` etc.) so the type label is always added when not already at the start of the message. 2. workspace/a2a_tools.py: `activity_logs.error_detail` is unbounded TEXT on the platform (handlers/activity.go does not validate length). A buggy or hostile peer could stream arbitrarily large error messages into the caller's activity log. Cap at 4096 chars at the producer — comfortably above any real exception traceback, well below an obvious-DoS threshold. 3. New regression test for JSON-RPC `code=0` — pins the `code is not None` semantics so the code is preserved in the detail rather than collapsing into the no-code path. Code=0 is not valid per the spec, but a malformed peer can still emit it and we want it visible for diagnosis. Plus one optional taken: extracted the A2A-error → hint mapping into canvas/src/components/tabs/chat/a2aErrorHint.ts. The two prior copies (AgentCommsPanel.inferCauseHint + ActivityTab.inferA2AErrorHint) had already drifted — Activity tab gained `not found`/`offline` cases the chat panel never picked up, AgentCommsPanel handled empty-input explicitly while Activity didn't. The shared module is the merged superset, with 10 unit tests pinning each named pattern + the "most specific first" ordering (Claude SDK wedge wins over generic timeout). Skipped (per analysis): - Unicode-naive 120-char slice — Python str[:N] slices on code points, not bytes. Safe. - Nested [A2A_ERROR] confusion — non-issue per reviewer; outer prefix winning still produces a structured render. - MessagePreview + JsonBlock dual render on errors — intentional drilldown; raw JSON is below the fold for operators who need it. - console.warn dedup — refetches don't happen per-event so spam risk is low. - str(data)[:200] materialization — A2A response bodies aren't typically MB-sized. Verified: 1005 canvas tests pass (10 new hint tests); 10 Python send_a2a_message tests pass (1 new for code=0); tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 23:47:44 -07:00 · 2026-04-24 23:47:44 -07:00 · c159d85eb5
commit c159d85eb5
parent 391e187281
7 changed files with 158 additions and 49 deletions
--- a/canvas/src/components/tabs/ActivityTab.tsx
+++ b/canvas/src/components/tabs/ActivityTab.tsx
@ -5,6 +5,7 @@ import { api } from "@/lib/api";
 import { ConversationTraceModal } from "@/components/ConversationTraceModal";
 import { type ActivityEntry } from "@/types/activity";
 import { useWorkspaceName } from "@/hooks/useWorkspaceName";
+import { inferA2AErrorHint } from "./chat/a2aErrorHint";

 interface Props {
  workspaceId: string;
@ -288,29 +289,6 @@ function ActivityRow({

 const A2A_ERROR_PREFIX = "[A2A_ERROR]";

-/** Best-effort cause hint for an A2A delivery failure. Mirrors the
- *  patterns used by AgentCommsPanel's inferCauseHint so the same
- *  symptom reads the same way in both surfaces. */
-function inferA2AErrorHint(detail: string): string {
-  const t = detail.toLowerCase();
-  if (t.includes("control request timeout")) {
-    return "The remote agent's Claude Code SDK is wedged on initialization. Restart the workspace to clear it.";
-  }
-  if (t.includes("readtimeout") || t.includes("connecttimeout") || t.includes("deadline exceeded") || t.includes("timeout")) {
-    return "The remote agent didn't respond within the proxy timeout. It may be busy with a long task or its runtime is stuck — restart if this repeats.";
-  }
-  if (t.includes("connectionreset") || t.includes("remoteprotocolerror") || t.includes("connection reset") || t.includes("no message")) {
-    return "The connection to the remote agent dropped before a reply arrived. Usually a transient network blip; retry once. If it repeats, the remote container may have crashed mid-request — check its logs.";
-  }
-  if (t.includes("agent error") || t.includes("exception")) {
-    return "The remote agent's runtime threw an exception. Check the workspace's container logs for the traceback. Restart usually clears transient runtime crashes.";
-  }
-  if (t.includes("not found") || t.includes("not accessible") || t.includes("offline")) {
-    return "The remote workspace can't be reached — it may be stopped, removed, or outside the access control list. Verify the peer is online before retrying.";
-  }
-  return "Check the remote agent's container logs for context. Restart the workspace if the failure repeats.";
-}
-
 /** Render a [A2A_ERROR]-prefixed response as a structured error block
 *  with a stripped detail line + a cause hint. The previous raw render
 *  ("[A2A_ERROR] " literal in the response area) gave the user no
--- a/canvas/src/components/tabs/chat/AgentCommsPanel.tsx
+++ b/canvas/src/components/tabs/chat/AgentCommsPanel.tsx
@ -9,6 +9,7 @@ import { WS_URL } from "@/store/socket";
 import { closeWebSocketGracefully } from "@/lib/ws-close";
 import { showToast } from "../../Toaster";
 import { extractResponseText, extractRequestText } from "./message-parser";
+import { inferA2AErrorHint } from "./a2aErrorHint";

 export interface ActivityEntry {
  id: string;
@ -115,29 +116,8 @@ function unwrapErrorText(raw: string | null): string {
  return trimmed;
 }

-/** Best-effort cause hint based on what we can see in the error text.
- *  These map known runtime symptoms to operator-actionable language so
- *  the user isn't left staring at "[A2A_ERROR]" with no next step. */
-function inferCauseHint(errorText: string): string {
-  const t = errorText.toLowerCase();
-  // "control request timeout" is the specific Claude Code SDK init
-  // wedge symptom. Don't pattern on bare "initialize" — too broad
-  // (a user task containing "failed to initialize database" would
-  // false-positive into the SDK-wedge hint).
-  if (t.includes("control request timeout")) {
-    return "The remote agent's Claude Code SDK is wedged on initialization (often after a long idle period or OAuth refresh). A workspace restart usually clears it.";
-  }
-  if (t.includes("deadline exceeded") || t.includes("timeout")) {
-    return "The remote agent didn't respond within the proxy timeout. It may be busy with a long-running task, or the runtime is stuck. Restart the workspace if this repeats.";
-  }
-  if (t.includes("agent error") || t.includes("exception")) {
-    return "The remote agent's runtime threw an exception. Check the workspace's container logs for the traceback. Restart usually clears transient runtime crashes.";
-  }
-  if (errorText === "") {
-    return "The remote agent returned no error detail (the underlying httpx exception had an empty message — typically a connection-reset or silent timeout). A workspace restart is the safe first move.";
-  }
-  return "The remote agent reported a delivery failure. Check the workspace logs or try restarting.";
-}
+// inferA2AErrorHint moved to ./a2aErrorHint so the Activity tab and
+// this panel render identical hints for the same symptom.

 export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
  const [messages, setMessages] = useState<CommMessage[]>([]);
@ -314,7 +294,7 @@ function ErrorMessage({ msg }: { msg: CommMessage }) {
  const selectNode = useCanvasStore((s) => s.selectNode);
  const [restarting, setRestarting] = useState(false);
  const errorText = unwrapErrorText(msg.responseText);
-  const hint = inferCauseHint(errorText);
+  const hint = inferA2AErrorHint(errorText);

  // Guard against acting on a peer whose workspace has been deleted
  // since this row was logged. Without the guard, restart 404s
--- a/canvas/src/components/tabs/chat/tests/a2aErrorHint.test.ts
+++ b/canvas/src/components/tabs/chat/tests/a2aErrorHint.test.ts
@ -0,0 +1,67 @@
+import { describe, it, expect } from "vitest";
+import { inferA2AErrorHint } from "../a2aErrorHint";
+
+// Pure logic. Pin every named pattern so a future contributor adding a
+// new symptom doesn't accidentally collapse the buckets — and so the
+// "most specific first" ordering can't drift without a test failing.
+
+describe("inferA2AErrorHint", () => {
+  it("matches the Claude Code SDK init wedge specifically", () => {
+    const hint = inferA2AErrorHint("Control request timeout: initialize");
+    expect(hint).toMatch(/Claude Code SDK is wedged/);
+  });
+
+  it("does NOT misfire on user tasks containing 'initialize' generally", () => {
+    // Regression: an earlier bare-`initialize` pattern would have
+    // false-positived "failed to initialize database" into the SDK
+    // wedge hint. Confirm the full-phrase guard holds.
+    const hint = inferA2AErrorHint("failed to initialize database connection");
+    expect(hint).not.toMatch(/Claude Code SDK/);
+  });
+
+  it("recognises httpx ReadTimeout / ConnectTimeout class names", () => {
+    expect(inferA2AErrorHint("ReadTimeout: timeout")).toMatch(/proxy timeout/);
+    expect(inferA2AErrorHint("ConnectTimeout: ...")).toMatch(/proxy timeout/);
+  });
+
+  it("recognises generic timeout / deadline-exceeded language", () => {
+    expect(inferA2AErrorHint("deadline exceeded after 300s")).toMatch(/proxy timeout/);
+    expect(inferA2AErrorHint("Operation timeout")).toMatch(/proxy timeout/);
+  });
+
+  it("handles connection-reset family (RemoteProtocolError, ConnectionReset, no-message)", () => {
+    expect(inferA2AErrorHint("RemoteProtocolError: ...")).toMatch(/connection.*dropped/);
+    expect(inferA2AErrorHint("ConnectionResetError")).toMatch(/connection.*dropped/);
+    expect(inferA2AErrorHint("connection reset by peer")).toMatch(/connection.*dropped/);
+    expect(inferA2AErrorHint("RemoteProtocolError (no message — likely connection reset)")).toMatch(/connection.*dropped/);
+  });
+
+  it("recognises agent-runtime exceptions", () => {
+    expect(inferA2AErrorHint("Agent error: ValueError raised")).toMatch(/runtime threw an exception/);
+    expect(inferA2AErrorHint("RuntimeException in tool call")).toMatch(/runtime threw an exception/);
+  });
+
+  it("recognises peer-unreachable cases (Activity-tab originals)", () => {
+    expect(inferA2AErrorHint("workspace not found")).toMatch(/can't be reached/);
+    expect(inferA2AErrorHint("not accessible")).toMatch(/can't be reached/);
+    expect(inferA2AErrorHint("workspace is offline")).toMatch(/can't be reached/);
+  });
+
+  it("returns the empty-detail-specific hint when input is exactly empty", () => {
+    expect(inferA2AErrorHint("")).toMatch(/no error detail/);
+  });
+
+  it("returns a generic fallback for unrecognised text", () => {
+    const hint = inferA2AErrorHint("some completely novel error nobody has matched yet");
+    expect(hint).toMatch(/Check the workspace logs|delivery failure/);
+  });
+
+  it("Claude SDK wedge wins over the more general timeout pattern", () => {
+    // Both 'control request timeout' and 'timeout' match the same
+    // input. The SDK wedge hint is more actionable; the ordering in
+    // the function must keep it first. Lock that priority in.
+    const hint = inferA2AErrorHint("Control request timeout: initialize");
+    expect(hint).toMatch(/Claude Code SDK/);
+    expect(hint).not.toMatch(/proxy timeout/);
+  });
+});
--- a/canvas/src/components/tabs/chat/a2aErrorHint.ts
+++ b/canvas/src/components/tabs/chat/a2aErrorHint.ts
@ -0,0 +1,54 @@
+/**
+ * Maps an A2A delivery-failure detail string (the bit AFTER stripping
+ * the [A2A_ERROR] sentinel prefix) to a one-line operator-actionable
+ * hint. Pattern matches are lowercase substring checks, ordered most-
+ * specific first so the right hint wins when multiple patterns
+ * overlap (e.g. "control request timeout" wins over generic "timeout").
+ *
+ * Used by both the chat Agent Comms panel and the Activity tab so the
+ * same symptom reads identically across surfaces. Two prior copies
+ * had already drifted (Activity tab gained `not found`/`offline`
+ * cases AgentCommsPanel never picked up) — this module is the merged
+ * superset and the only place hint text should change.
+ */
+export function inferA2AErrorHint(detail: string): string {
+  const t = detail.toLowerCase();
+
+  // "control request timeout" is the specific Claude Code SDK init
+  // wedge symptom. Pattern on the full phrase, not bare "initialize"
+  // — a user task containing "failed to initialize database" would
+  // false-positive into the SDK-wedge hint.
+  if (t.includes("control request timeout")) {
+    return "The remote agent's Claude Code SDK is wedged on initialization (often after a long idle period or OAuth refresh). A workspace restart usually clears it.";
+  }
+  if (
+    t.includes("readtimeout") ||
+    t.includes("connecttimeout") ||
+    t.includes("deadline exceeded") ||
+    t.includes("timeout")
+  ) {
+    return "The remote agent didn't respond within the proxy timeout. It may be busy with a long task, or the runtime is stuck — restart the workspace if this repeats.";
+  }
+  if (
+    t.includes("connectionreset") ||
+    t.includes("remoteprotocolerror") ||
+    t.includes("connection reset") ||
+    t.includes("no message")
+  ) {
+    return "The connection to the remote agent dropped before a reply arrived. Usually a transient network blip — retry once. If it repeats, the remote container may have crashed mid-request; check its logs.";
+  }
+  if (t.includes("agent error") || t.includes("exception")) {
+    return "The remote agent's runtime threw an exception. Check the workspace's container logs for the traceback. Restart usually clears transient runtime crashes.";
+  }
+  if (
+    t.includes("not found") ||
+    t.includes("not accessible") ||
+    t.includes("offline")
+  ) {
+    return "The remote workspace can't be reached — it may be stopped, removed, or outside the access control list. Verify the peer is online before retrying.";
+  }
+  if (detail === "") {
+    return "The remote agent returned no error detail (the underlying httpx exception had an empty message — typically a connection-reset or silent timeout). A workspace restart is the safe first move.";
+  }
+  return "The remote agent reported a delivery failure. Check the workspace logs or try restarting.";
+}
--- a/workspace/a2a_client.py
+++ b/workspace/a2a_client.py
@ -111,7 +111,12 @@ async def send_a2a_message(target_url: str, message: str) -> str:
            type_name = type(e).__name__
            if not msg:
                detail = f"{type_name} (no message — likely connection reset or silent timeout)"
-            elif type_name in msg:
+            elif msg.startswith(f"{type_name}:") or msg.startswith(f"{type_name} "):
+                # Already prefixed with the type — don't double-prefix.
+                # Prefix-anchored check (not substring) so a message that
+                # happens to mention some OTHER class name mid-string
+                # (e.g. "got OSError on read") doesn't suppress our own
+                # type prefix and lose the diagnostic signal.
                detail = msg
            else:
                detail = f"{type_name}: {msg}"
--- a/workspace/a2a_tools.py
+++ b/workspace/a2a_tools.py
@ -188,7 +188,14 @@ async def tool_delegate_task(workspace_id: str, task: str) -> str:
    # Strip the sentinel prefix so error_detail is the human-readable
    # cause directly. The Activity tab's red error chip surfaces this
    # without the user having to scroll into the raw response JSON.
-    error_detail = result[len(_A2A_ERROR_PREFIX):].strip() if is_error else ""
+    #
+    # Cap at 4096 chars before sending — the platform's
+    # activity_logs.error_detail column is unbounded TEXT and a
+    # malicious or buggy peer could otherwise stream an arbitrarily
+    # large error message into the caller's activity log. 4096 is
+    # comfortably above any real exception traceback we've seen and
+    # well below an obvious-DoS threshold.
+    error_detail = result[len(_A2A_ERROR_PREFIX):].strip()[:4096] if is_error else ""
    await report_activity(
        "a2a_receive", workspace_id,
        f"{peer_name} responded ({len(result)} chars)" if not is_error else f"{peer_name} failed: {error_detail[:120]}",
--- a/workspace/tests/test_a2a_client.py
+++ b/workspace/tests/test_a2a_client.py
@ -207,6 +207,24 @@ class TestSendA2AMessage:
        # Target URL is included so chained delegations are traceable.
        assert "target=http://target/a2a" in result

+    async def test_jsonrpc_error_with_code_zero_includes_code_in_detail(self):
+        """JSON-RPC error code=0 is technically not valid in the spec,
+        but a malformed peer can still send it — make sure the code is
+        preserved in the detail rather than collapsing into the
+        no-code path. Locks in the `code is not None` semantics over
+        the truthy-check shortcut."""
+        import a2a_client
+
+        resp = _make_response(200, {"error": {"code": 0, "message": "weird"}})
+        mock_client = _make_mock_client(post_resp=resp)
+
+        with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
+            result = await a2a_client.send_a2a_message("http://target/a2a", "task")
+
+        assert result.startswith(a2a_client._A2A_ERROR_PREFIX)
+        assert "code=0" in result
+        assert "weird" in result
+
    async def test_neither_result_nor_error_returns_a2a_error_with_payload(self):
        """Response with neither 'result' nor 'error' → A2A_ERROR + payload context."""
        import a2a_client