forked from molecule-ai/molecule-core
fix(a2a): review-driven hardening — prefix-anchored type check, error_detail cap, shared hint module
Three required fixes from the bundle review of 391e1872:
1. workspace/a2a_client.py: substring `type_name in msg` could miss
the diagnostic prefix when an exception's message embedded a
different class name mid-string (e.g. `OSError("see ConnectionError
below")` → printed as plain msg, type lost). Switched to a
prefix-anchored check (`msg.startswith(f"{type_name}:")` etc.) so
the type label is always added when not already at the start of
the message.
2. workspace/a2a_tools.py: `activity_logs.error_detail` is unbounded
TEXT on the platform (handlers/activity.go does not validate
length). A buggy or hostile peer could stream arbitrarily large
error messages into the caller's activity log. Cap at 4096 chars
at the producer — comfortably above any real exception traceback,
well below an obvious-DoS threshold.
3. New regression test for JSON-RPC `code=0` — pins the
`code is not None` semantics so the code is preserved in the
detail rather than collapsing into the no-code path. Code=0 is
not valid per the spec, but a malformed peer can still emit it
and we want it visible for diagnosis.
Plus one optional taken: extracted the A2A-error → hint mapping into
canvas/src/components/tabs/chat/a2aErrorHint.ts. The two prior copies
(AgentCommsPanel.inferCauseHint + ActivityTab.inferA2AErrorHint) had
already drifted — Activity tab gained `not found`/`offline` cases the
chat panel never picked up, AgentCommsPanel handled empty-input
explicitly while Activity didn't. The shared module is the merged
superset, with 10 unit tests pinning each named pattern + the
"most specific first" ordering (Claude SDK wedge wins over generic
timeout).
Skipped (per analysis):
- Unicode-naive 120-char slice — Python str[:N] slices on code
points, not bytes. Safe.
- Nested [A2A_ERROR] confusion — non-issue per reviewer; outer
prefix winning still produces a structured render.
- MessagePreview + JsonBlock dual render on errors — intentional
drilldown; raw JSON is below the fold for operators who need it.
- console.warn dedup — refetches don't happen per-event so spam
risk is low.
- str(data)[:200] materialization — A2A response bodies aren't
typically MB-sized.
Verified: 1005 canvas tests pass (10 new hint tests); 10 Python
send_a2a_message tests pass (1 new for code=0); tsc clean.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
391e187281
commit
c159d85eb5
@ -5,6 +5,7 @@ import { api } from "@/lib/api";
|
||||
import { ConversationTraceModal } from "@/components/ConversationTraceModal";
|
||||
import { type ActivityEntry } from "@/types/activity";
|
||||
import { useWorkspaceName } from "@/hooks/useWorkspaceName";
|
||||
import { inferA2AErrorHint } from "./chat/a2aErrorHint";
|
||||
|
||||
interface Props {
|
||||
workspaceId: string;
|
||||
@ -288,29 +289,6 @@ function ActivityRow({
|
||||
|
||||
const A2A_ERROR_PREFIX = "[A2A_ERROR]";
|
||||
|
||||
/** Best-effort cause hint for an A2A delivery failure. Mirrors the
|
||||
* patterns used by AgentCommsPanel's inferCauseHint so the same
|
||||
* symptom reads the same way in both surfaces. */
|
||||
function inferA2AErrorHint(detail: string): string {
|
||||
const t = detail.toLowerCase();
|
||||
if (t.includes("control request timeout")) {
|
||||
return "The remote agent's Claude Code SDK is wedged on initialization. Restart the workspace to clear it.";
|
||||
}
|
||||
if (t.includes("readtimeout") || t.includes("connecttimeout") || t.includes("deadline exceeded") || t.includes("timeout")) {
|
||||
return "The remote agent didn't respond within the proxy timeout. It may be busy with a long task or its runtime is stuck — restart if this repeats.";
|
||||
}
|
||||
if (t.includes("connectionreset") || t.includes("remoteprotocolerror") || t.includes("connection reset") || t.includes("no message")) {
|
||||
return "The connection to the remote agent dropped before a reply arrived. Usually a transient network blip; retry once. If it repeats, the remote container may have crashed mid-request — check its logs.";
|
||||
}
|
||||
if (t.includes("agent error") || t.includes("exception")) {
|
||||
return "The remote agent's runtime threw an exception. Check the workspace's container logs for the traceback. Restart usually clears transient runtime crashes.";
|
||||
}
|
||||
if (t.includes("not found") || t.includes("not accessible") || t.includes("offline")) {
|
||||
return "The remote workspace can't be reached — it may be stopped, removed, or outside the access control list. Verify the peer is online before retrying.";
|
||||
}
|
||||
return "Check the remote agent's container logs for context. Restart the workspace if the failure repeats.";
|
||||
}
|
||||
|
||||
/** Render a [A2A_ERROR]-prefixed response as a structured error block
|
||||
* with a stripped detail line + a cause hint. The previous raw render
|
||||
* ("[A2A_ERROR] " literal in the response area) gave the user no
|
||||
|
||||
@ -9,6 +9,7 @@ import { WS_URL } from "@/store/socket";
|
||||
import { closeWebSocketGracefully } from "@/lib/ws-close";
|
||||
import { showToast } from "../../Toaster";
|
||||
import { extractResponseText, extractRequestText } from "./message-parser";
|
||||
import { inferA2AErrorHint } from "./a2aErrorHint";
|
||||
|
||||
export interface ActivityEntry {
|
||||
id: string;
|
||||
@ -115,29 +116,8 @@ function unwrapErrorText(raw: string | null): string {
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
/** Best-effort cause hint based on what we can see in the error text.
|
||||
* These map known runtime symptoms to operator-actionable language so
|
||||
* the user isn't left staring at "[A2A_ERROR]" with no next step. */
|
||||
function inferCauseHint(errorText: string): string {
|
||||
const t = errorText.toLowerCase();
|
||||
// "control request timeout" is the specific Claude Code SDK init
|
||||
// wedge symptom. Don't pattern on bare "initialize" — too broad
|
||||
// (a user task containing "failed to initialize database" would
|
||||
// false-positive into the SDK-wedge hint).
|
||||
if (t.includes("control request timeout")) {
|
||||
return "The remote agent's Claude Code SDK is wedged on initialization (often after a long idle period or OAuth refresh). A workspace restart usually clears it.";
|
||||
}
|
||||
if (t.includes("deadline exceeded") || t.includes("timeout")) {
|
||||
return "The remote agent didn't respond within the proxy timeout. It may be busy with a long-running task, or the runtime is stuck. Restart the workspace if this repeats.";
|
||||
}
|
||||
if (t.includes("agent error") || t.includes("exception")) {
|
||||
return "The remote agent's runtime threw an exception. Check the workspace's container logs for the traceback. Restart usually clears transient runtime crashes.";
|
||||
}
|
||||
if (errorText === "") {
|
||||
return "The remote agent returned no error detail (the underlying httpx exception had an empty message — typically a connection-reset or silent timeout). A workspace restart is the safe first move.";
|
||||
}
|
||||
return "The remote agent reported a delivery failure. Check the workspace logs or try restarting.";
|
||||
}
|
||||
// inferA2AErrorHint moved to ./a2aErrorHint so the Activity tab and
|
||||
// this panel render identical hints for the same symptom.
|
||||
|
||||
export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
const [messages, setMessages] = useState<CommMessage[]>([]);
|
||||
@ -314,7 +294,7 @@ function ErrorMessage({ msg }: { msg: CommMessage }) {
|
||||
const selectNode = useCanvasStore((s) => s.selectNode);
|
||||
const [restarting, setRestarting] = useState(false);
|
||||
const errorText = unwrapErrorText(msg.responseText);
|
||||
const hint = inferCauseHint(errorText);
|
||||
const hint = inferA2AErrorHint(errorText);
|
||||
|
||||
// Guard against acting on a peer whose workspace has been deleted
|
||||
// since this row was logged. Without the guard, restart 404s
|
||||
|
||||
@ -0,0 +1,67 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { inferA2AErrorHint } from "../a2aErrorHint";
|
||||
|
||||
// Pure logic. Pin every named pattern so a future contributor adding a
|
||||
// new symptom doesn't accidentally collapse the buckets — and so the
|
||||
// "most specific first" ordering can't drift without a test failing.
|
||||
|
||||
describe("inferA2AErrorHint", () => {
|
||||
it("matches the Claude Code SDK init wedge specifically", () => {
|
||||
const hint = inferA2AErrorHint("Control request timeout: initialize");
|
||||
expect(hint).toMatch(/Claude Code SDK is wedged/);
|
||||
});
|
||||
|
||||
it("does NOT misfire on user tasks containing 'initialize' generally", () => {
|
||||
// Regression: an earlier bare-`initialize` pattern would have
|
||||
// false-positived "failed to initialize database" into the SDK
|
||||
// wedge hint. Confirm the full-phrase guard holds.
|
||||
const hint = inferA2AErrorHint("failed to initialize database connection");
|
||||
expect(hint).not.toMatch(/Claude Code SDK/);
|
||||
});
|
||||
|
||||
it("recognises httpx ReadTimeout / ConnectTimeout class names", () => {
|
||||
expect(inferA2AErrorHint("ReadTimeout: timeout")).toMatch(/proxy timeout/);
|
||||
expect(inferA2AErrorHint("ConnectTimeout: ...")).toMatch(/proxy timeout/);
|
||||
});
|
||||
|
||||
it("recognises generic timeout / deadline-exceeded language", () => {
|
||||
expect(inferA2AErrorHint("deadline exceeded after 300s")).toMatch(/proxy timeout/);
|
||||
expect(inferA2AErrorHint("Operation timeout")).toMatch(/proxy timeout/);
|
||||
});
|
||||
|
||||
it("handles connection-reset family (RemoteProtocolError, ConnectionReset, no-message)", () => {
|
||||
expect(inferA2AErrorHint("RemoteProtocolError: ...")).toMatch(/connection.*dropped/);
|
||||
expect(inferA2AErrorHint("ConnectionResetError")).toMatch(/connection.*dropped/);
|
||||
expect(inferA2AErrorHint("connection reset by peer")).toMatch(/connection.*dropped/);
|
||||
expect(inferA2AErrorHint("RemoteProtocolError (no message — likely connection reset)")).toMatch(/connection.*dropped/);
|
||||
});
|
||||
|
||||
it("recognises agent-runtime exceptions", () => {
|
||||
expect(inferA2AErrorHint("Agent error: ValueError raised")).toMatch(/runtime threw an exception/);
|
||||
expect(inferA2AErrorHint("RuntimeException in tool call")).toMatch(/runtime threw an exception/);
|
||||
});
|
||||
|
||||
it("recognises peer-unreachable cases (Activity-tab originals)", () => {
|
||||
expect(inferA2AErrorHint("workspace not found")).toMatch(/can't be reached/);
|
||||
expect(inferA2AErrorHint("not accessible")).toMatch(/can't be reached/);
|
||||
expect(inferA2AErrorHint("workspace is offline")).toMatch(/can't be reached/);
|
||||
});
|
||||
|
||||
it("returns the empty-detail-specific hint when input is exactly empty", () => {
|
||||
expect(inferA2AErrorHint("")).toMatch(/no error detail/);
|
||||
});
|
||||
|
||||
it("returns a generic fallback for unrecognised text", () => {
|
||||
const hint = inferA2AErrorHint("some completely novel error nobody has matched yet");
|
||||
expect(hint).toMatch(/Check the workspace logs|delivery failure/);
|
||||
});
|
||||
|
||||
it("Claude SDK wedge wins over the more general timeout pattern", () => {
|
||||
// Both 'control request timeout' and 'timeout' match the same
|
||||
// input. The SDK wedge hint is more actionable; the ordering in
|
||||
// the function must keep it first. Lock that priority in.
|
||||
const hint = inferA2AErrorHint("Control request timeout: initialize");
|
||||
expect(hint).toMatch(/Claude Code SDK/);
|
||||
expect(hint).not.toMatch(/proxy timeout/);
|
||||
});
|
||||
});
|
||||
54
canvas/src/components/tabs/chat/a2aErrorHint.ts
Normal file
54
canvas/src/components/tabs/chat/a2aErrorHint.ts
Normal file
@ -0,0 +1,54 @@
|
||||
/**
|
||||
* Maps an A2A delivery-failure detail string (the bit AFTER stripping
|
||||
* the [A2A_ERROR] sentinel prefix) to a one-line operator-actionable
|
||||
* hint. Pattern matches are lowercase substring checks, ordered most-
|
||||
* specific first so the right hint wins when multiple patterns
|
||||
* overlap (e.g. "control request timeout" wins over generic "timeout").
|
||||
*
|
||||
* Used by both the chat Agent Comms panel and the Activity tab so the
|
||||
* same symptom reads identically across surfaces. Two prior copies
|
||||
* had already drifted (Activity tab gained `not found`/`offline`
|
||||
* cases AgentCommsPanel never picked up) — this module is the merged
|
||||
* superset and the only place hint text should change.
|
||||
*/
|
||||
export function inferA2AErrorHint(detail: string): string {
|
||||
const t = detail.toLowerCase();
|
||||
|
||||
// "control request timeout" is the specific Claude Code SDK init
|
||||
// wedge symptom. Pattern on the full phrase, not bare "initialize"
|
||||
// — a user task containing "failed to initialize database" would
|
||||
// false-positive into the SDK-wedge hint.
|
||||
if (t.includes("control request timeout")) {
|
||||
return "The remote agent's Claude Code SDK is wedged on initialization (often after a long idle period or OAuth refresh). A workspace restart usually clears it.";
|
||||
}
|
||||
if (
|
||||
t.includes("readtimeout") ||
|
||||
t.includes("connecttimeout") ||
|
||||
t.includes("deadline exceeded") ||
|
||||
t.includes("timeout")
|
||||
) {
|
||||
return "The remote agent didn't respond within the proxy timeout. It may be busy with a long task, or the runtime is stuck — restart the workspace if this repeats.";
|
||||
}
|
||||
if (
|
||||
t.includes("connectionreset") ||
|
||||
t.includes("remoteprotocolerror") ||
|
||||
t.includes("connection reset") ||
|
||||
t.includes("no message")
|
||||
) {
|
||||
return "The connection to the remote agent dropped before a reply arrived. Usually a transient network blip — retry once. If it repeats, the remote container may have crashed mid-request; check its logs.";
|
||||
}
|
||||
if (t.includes("agent error") || t.includes("exception")) {
|
||||
return "The remote agent's runtime threw an exception. Check the workspace's container logs for the traceback. Restart usually clears transient runtime crashes.";
|
||||
}
|
||||
if (
|
||||
t.includes("not found") ||
|
||||
t.includes("not accessible") ||
|
||||
t.includes("offline")
|
||||
) {
|
||||
return "The remote workspace can't be reached — it may be stopped, removed, or outside the access control list. Verify the peer is online before retrying.";
|
||||
}
|
||||
if (detail === "") {
|
||||
return "The remote agent returned no error detail (the underlying httpx exception had an empty message — typically a connection-reset or silent timeout). A workspace restart is the safe first move.";
|
||||
}
|
||||
return "The remote agent reported a delivery failure. Check the workspace logs or try restarting.";
|
||||
}
|
||||
@ -111,7 +111,12 @@ async def send_a2a_message(target_url: str, message: str) -> str:
|
||||
type_name = type(e).__name__
|
||||
if not msg:
|
||||
detail = f"{type_name} (no message — likely connection reset or silent timeout)"
|
||||
elif type_name in msg:
|
||||
elif msg.startswith(f"{type_name}:") or msg.startswith(f"{type_name} "):
|
||||
# Already prefixed with the type — don't double-prefix.
|
||||
# Prefix-anchored check (not substring) so a message that
|
||||
# happens to mention some OTHER class name mid-string
|
||||
# (e.g. "got OSError on read") doesn't suppress our own
|
||||
# type prefix and lose the diagnostic signal.
|
||||
detail = msg
|
||||
else:
|
||||
detail = f"{type_name}: {msg}"
|
||||
|
||||
@ -188,7 +188,14 @@ async def tool_delegate_task(workspace_id: str, task: str) -> str:
|
||||
# Strip the sentinel prefix so error_detail is the human-readable
|
||||
# cause directly. The Activity tab's red error chip surfaces this
|
||||
# without the user having to scroll into the raw response JSON.
|
||||
error_detail = result[len(_A2A_ERROR_PREFIX):].strip() if is_error else ""
|
||||
#
|
||||
# Cap at 4096 chars before sending — the platform's
|
||||
# activity_logs.error_detail column is unbounded TEXT and a
|
||||
# malicious or buggy peer could otherwise stream an arbitrarily
|
||||
# large error message into the caller's activity log. 4096 is
|
||||
# comfortably above any real exception traceback we've seen and
|
||||
# well below an obvious-DoS threshold.
|
||||
error_detail = result[len(_A2A_ERROR_PREFIX):].strip()[:4096] if is_error else ""
|
||||
await report_activity(
|
||||
"a2a_receive", workspace_id,
|
||||
f"{peer_name} responded ({len(result)} chars)" if not is_error else f"{peer_name} failed: {error_detail[:120]}",
|
||||
|
||||
@ -207,6 +207,24 @@ class TestSendA2AMessage:
|
||||
# Target URL is included so chained delegations are traceable.
|
||||
assert "target=http://target/a2a" in result
|
||||
|
||||
async def test_jsonrpc_error_with_code_zero_includes_code_in_detail(self):
|
||||
"""JSON-RPC error code=0 is technically not valid in the spec,
|
||||
but a malformed peer can still send it — make sure the code is
|
||||
preserved in the detail rather than collapsing into the
|
||||
no-code path. Locks in the `code is not None` semantics over
|
||||
the truthy-check shortcut."""
|
||||
import a2a_client
|
||||
|
||||
resp = _make_response(200, {"error": {"code": 0, "message": "weird"}})
|
||||
mock_client = _make_mock_client(post_resp=resp)
|
||||
|
||||
with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
|
||||
result = await a2a_client.send_a2a_message("http://target/a2a", "task")
|
||||
|
||||
assert result.startswith(a2a_client._A2A_ERROR_PREFIX)
|
||||
assert "code=0" in result
|
||||
assert "weird" in result
|
||||
|
||||
async def test_neither_result_nor_error_returns_a2a_error_with_payload(self):
|
||||
"""Response with neither 'result' nor 'error' → A2A_ERROR + payload context."""
|
||||
import a2a_client
|
||||
|
||||
Loading…
Reference in New Issue
Block a user