forked from molecule-ai/molecule-core
fix(a2a,canvas): make delivery failures comprehensive instead of "[A2A_ERROR] "
Symptom: Activity tab and Agent Comms surfaced bare "[A2A_ERROR] "
(prefix + nothing) for failed delegations. Operator had no signal
to act on — no exception type, no target, no hint about what went
wrong, no next step. Fix is in three layers.
1. workspace/a2a_client.py — every error path now produces an
actionable detail string:
- except branch: some httpx exceptions (RemoteProtocolError,
ConnectionReset variants) stringify to "". Pre-fix the catch
was `f"{_A2A_ERROR_PREFIX}{e}"` → bare prefix. Now falls back
to `<TypeName> (no message — likely connection reset or silent
timeout)` and always appends `[target=<url>]` for traceability
in chained delegations.
- JSON-RPC error branch: previously dropped error.code on the
floor and printed "unknown" when message was missing. Now
surfaces both, including the well-defined "JSON-RPC error
with no message (code=N)" path.
- "neither result nor error" branch: pre-fix returned
str(payload) which the canvas rendered as a successful
response block. Now tagged as A2A_ERROR with a payload
snippet so downstream UI routes through the error path.
2. workspace/a2a_tools.py — tool_delegate_task now passes
error_detail (the stripped error message) through to the
activity-log POST. The platform's activity_logs.error_detail
column is the canvas's red error chip source; populating it
makes the failure visible in the row header without the user
having to expand into raw response_body JSON. The summary line
also gets a 120-char prefix of the cause so the collapsed row
reads "React Engineer failed: ConnectionResetError: ... [target=...]"
instead of "React Engineer failed".
3. canvas/src/components/tabs/ActivityTab.tsx — MessagePreview
now detects [A2A_ERROR]-prefixed bodies and renders a
structured error block (red chip, stripped detail, cause hint)
instead of the previous gray text-block that showed the literal
"[A2A_ERROR]" string. inferA2AErrorHint mirrors the patterns
from AgentCommsPanel.inferCauseHint so the same symptom reads
the same way in both surfaces (Claude SDK init wedge → restart
workspace; timeout → busy/stuck; connection-reset → transient
blip then check logs).
Tests: 9 send_a2a_message tests pass (including a new regression
test for the empty-stringifying-exception case that the user
reported); 995 canvas tests pass; tsc clean.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
54f7c75c81
commit
391e187281
@ -286,6 +286,49 @@ function ActivityRow({
|
||||
);
|
||||
}
|
||||
|
||||
const A2A_ERROR_PREFIX = "[A2A_ERROR]";
|
||||
|
||||
/** Best-effort cause hint for an A2A delivery failure. Mirrors the
|
||||
* patterns used by AgentCommsPanel's inferCauseHint so the same
|
||||
* symptom reads the same way in both surfaces. */
|
||||
function inferA2AErrorHint(detail: string): string {
|
||||
const t = detail.toLowerCase();
|
||||
if (t.includes("control request timeout")) {
|
||||
return "The remote agent's Claude Code SDK is wedged on initialization. Restart the workspace to clear it.";
|
||||
}
|
||||
if (t.includes("readtimeout") || t.includes("connecttimeout") || t.includes("deadline exceeded") || t.includes("timeout")) {
|
||||
return "The remote agent didn't respond within the proxy timeout. It may be busy with a long task or its runtime is stuck — restart if this repeats.";
|
||||
}
|
||||
if (t.includes("connectionreset") || t.includes("remoteprotocolerror") || t.includes("connection reset") || t.includes("no message")) {
|
||||
return "The connection to the remote agent dropped before a reply arrived. Usually a transient network blip; retry once. If it repeats, the remote container may have crashed mid-request — check its logs.";
|
||||
}
|
||||
if (t.includes("agent error") || t.includes("exception")) {
|
||||
return "The remote agent's runtime threw an exception. Check the workspace's container logs for the traceback. Restart usually clears transient runtime crashes.";
|
||||
}
|
||||
if (t.includes("not found") || t.includes("not accessible") || t.includes("offline")) {
|
||||
return "The remote workspace can't be reached — it may be stopped, removed, or outside the access control list. Verify the peer is online before retrying.";
|
||||
}
|
||||
return "Check the remote agent's container logs for context. Restart the workspace if the failure repeats.";
|
||||
}
|
||||
|
||||
/** Render a [A2A_ERROR]-prefixed response as a structured error block
|
||||
* with a stripped detail line + a cause hint. The previous raw render
|
||||
* ("[A2A_ERROR] " literal in the response area) gave the user no
|
||||
* signal to act on. */
|
||||
function A2AErrorPreview({ label, raw }: { label: string; raw: string }) {
|
||||
const detail = raw.slice(A2A_ERROR_PREFIX.length).trim() || "(no detail provided)";
|
||||
const hint = inferA2AErrorHint(detail);
|
||||
return (
|
||||
<div>
|
||||
<div className="text-[8px] text-red-400/80 uppercase tracking-wider mb-1">{label} — delivery failed</div>
|
||||
<div className="text-[10px] text-red-300 bg-red-950/30 border border-red-800/40 rounded p-2 space-y-1.5">
|
||||
<div className="font-mono whitespace-pre-wrap break-words max-h-32 overflow-y-auto">{detail}</div>
|
||||
<div className="text-[9px] text-red-300/70 leading-relaxed border-t border-red-800/30 pt-1.5">{hint}</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/** Extract human-readable text from A2A request/response JSON */
|
||||
function MessagePreview({ label, body }: { label: string; body: Record<string, unknown> }) {
|
||||
// Try to extract text from A2A message parts
|
||||
@ -295,6 +338,14 @@ function MessagePreview({ label, body }: { label: string; body: Record<string, u
|
||||
if (body.task && typeof body.task === "string") { text = body.task; }
|
||||
if (!text && body.result && typeof body.result === "string") { text = body.result; }
|
||||
if (text) {
|
||||
// [A2A_ERROR]-prefixed responses get the structured error
|
||||
// treatment. Bare text fallthrough renders a bland gray block
|
||||
// — fine for normal replies, terrible for "[A2A_ERROR] " with
|
||||
// no further context. Detect at the top of the rendering path
|
||||
// so it short-circuits before the generic preview kicks in.
|
||||
if (text.trimStart().startsWith(A2A_ERROR_PREFIX)) {
|
||||
return <A2AErrorPreview label={label} raw={text.trimStart()} />;
|
||||
}
|
||||
return (
|
||||
<div>
|
||||
<div className="text-[8px] text-zinc-500 uppercase tracking-wider mb-1">{label}</div>
|
||||
|
||||
@ -87,10 +87,35 @@ async def send_a2a_message(target_url: str, message: str) -> str:
|
||||
return f"{_A2A_ERROR_PREFIX}{text}"
|
||||
return text
|
||||
elif "error" in data:
|
||||
return f"{_A2A_ERROR_PREFIX}{data['error'].get('message', 'unknown')}"
|
||||
return str(data)
|
||||
err = data["error"]
|
||||
msg = (err.get("message") or "").strip()
|
||||
code = err.get("code")
|
||||
if msg and code is not None:
|
||||
detail = f"{msg} (code={code})"
|
||||
elif msg:
|
||||
detail = msg
|
||||
elif code is not None:
|
||||
detail = f"JSON-RPC error with no message (code={code})"
|
||||
else:
|
||||
detail = "JSON-RPC error with no message"
|
||||
return f"{_A2A_ERROR_PREFIX}{detail} [target={target_url}]"
|
||||
return f"{_A2A_ERROR_PREFIX}unexpected response shape (no result, no error): {str(data)[:200]} [target={target_url}]"
|
||||
except Exception as e:
|
||||
return f"{_A2A_ERROR_PREFIX}{e}"
|
||||
# Some httpx exceptions stringify to empty (RemoteProtocolError,
|
||||
# ConnectionReset variants) — the canvas would then render
|
||||
# "[A2A_ERROR] " with no detail and the operator has no signal
|
||||
# to act on. Always include the exception class name and the
|
||||
# target URL so the activity log + Agent Comms panel have
|
||||
# actionable information without a trip through container logs.
|
||||
msg = str(e).strip()
|
||||
type_name = type(e).__name__
|
||||
if not msg:
|
||||
detail = f"{type_name} (no message — likely connection reset or silent timeout)"
|
||||
elif type_name in msg:
|
||||
detail = msg
|
||||
else:
|
||||
detail = f"{type_name}: {msg}"
|
||||
return f"{_A2A_ERROR_PREFIX}{detail} [target={target_url}]"
|
||||
|
||||
|
||||
async def get_peers() -> list[dict]:
|
||||
|
||||
@ -112,7 +112,7 @@ def _auth_headers_for_heartbeat() -> dict[str, str]:
|
||||
|
||||
async def report_activity(
|
||||
activity_type: str, target_id: str = "", summary: str = "", status: str = "ok",
|
||||
task_text: str = "", response_text: str = "",
|
||||
task_text: str = "", response_text: str = "", error_detail: str = "",
|
||||
):
|
||||
"""Report activity to the platform for live progress tracking."""
|
||||
try:
|
||||
@ -129,6 +129,13 @@ async def report_activity(
|
||||
payload["request_body"] = {"task": task_text}
|
||||
if response_text:
|
||||
payload["response_body"] = {"result": response_text}
|
||||
if error_detail:
|
||||
# error_detail is a top-level activity row column on the
|
||||
# platform (handlers/activity.go). Surfacing the cleaned
|
||||
# exception string here lets the Activity tab render a
|
||||
# red error chip + the cause without forcing the user
|
||||
# to scroll into the raw response_body JSON.
|
||||
payload["error_detail"] = error_detail
|
||||
await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/activity",
|
||||
json=payload,
|
||||
@ -178,11 +185,16 @@ async def tool_delegate_task(workspace_id: str, task: str) -> str:
|
||||
# Detect delegation failures — wrap them clearly so the calling agent
|
||||
# can decide to retry, use another peer, or handle the task itself.
|
||||
is_error = result.startswith(_A2A_ERROR_PREFIX)
|
||||
# Strip the sentinel prefix so error_detail is the human-readable
|
||||
# cause directly. The Activity tab's red error chip surfaces this
|
||||
# without the user having to scroll into the raw response JSON.
|
||||
error_detail = result[len(_A2A_ERROR_PREFIX):].strip() if is_error else ""
|
||||
await report_activity(
|
||||
"a2a_receive", workspace_id,
|
||||
f"{peer_name} responded ({len(result)} chars)" if not is_error else f"{peer_name} failed",
|
||||
f"{peer_name} responded ({len(result)} chars)" if not is_error else f"{peer_name} failed: {error_detail[:120]}",
|
||||
task_text=task, response_text=result,
|
||||
status="error" if is_error else "ok",
|
||||
error_detail=error_detail,
|
||||
)
|
||||
if is_error:
|
||||
return (
|
||||
|
||||
@ -199,10 +199,16 @@ class TestSendA2AMessage:
|
||||
result = await a2a_client.send_a2a_message("http://target/a2a", "task")
|
||||
|
||||
assert result.startswith(a2a_client._A2A_ERROR_PREFIX)
|
||||
assert "unknown" in result
|
||||
# The error includes the JSON-RPC code so the operator can look it
|
||||
# up; "no message" surfaces the missing-message condition explicitly
|
||||
# instead of the previous opaque "unknown".
|
||||
assert "code=-32600" in result
|
||||
assert "no message" in result.lower()
|
||||
# Target URL is included so chained delegations are traceable.
|
||||
assert "target=http://target/a2a" in result
|
||||
|
||||
async def test_neither_result_nor_error_returns_str_of_data(self):
|
||||
"""Response with neither 'result' nor 'error' → str(data)."""
|
||||
async def test_neither_result_nor_error_returns_a2a_error_with_payload(self):
|
||||
"""Response with neither 'result' nor 'error' → A2A_ERROR + payload context."""
|
||||
import a2a_client
|
||||
|
||||
payload = {"jsonrpc": "2.0", "id": "abc123"}
|
||||
@ -212,7 +218,14 @@ class TestSendA2AMessage:
|
||||
with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
|
||||
result = await a2a_client.send_a2a_message("http://target/a2a", "task")
|
||||
|
||||
assert result == str(payload)
|
||||
# Pre-fix this returned bare str(payload) which the canvas
|
||||
# rendered as a confusing "looks like a successful response"
|
||||
# block. Now it's tagged so downstream UI / delegate_task
|
||||
# routes it through the error path.
|
||||
assert result.startswith(a2a_client._A2A_ERROR_PREFIX)
|
||||
assert "unexpected response shape" in result
|
||||
assert "abc123" in result # snippet of payload included for context
|
||||
assert "target=http://target/a2a" in result
|
||||
|
||||
async def test_exception_returns_error_prefix_and_message(self):
|
||||
"""Network exception → returns _A2A_ERROR_PREFIX + exception text."""
|
||||
@ -225,6 +238,39 @@ class TestSendA2AMessage:
|
||||
|
||||
assert result.startswith(a2a_client._A2A_ERROR_PREFIX)
|
||||
assert "connection refused" in result
|
||||
# Exception class name is prepended when the message doesn't
|
||||
# already include it — gives the operator a typed handle to
|
||||
# search for in container logs.
|
||||
assert "ConnectionError" in result
|
||||
assert "target=http://target/a2a" in result
|
||||
|
||||
async def test_empty_stringifying_exception_falls_back_to_class_name(self):
|
||||
"""The user's reported bug: httpx.RemoteProtocolError and similar
|
||||
exceptions can stringify to "" — pre-fix the canvas rendered
|
||||
"[A2A_ERROR] " with no detail. Verify the empty path now
|
||||
produces an actionable message including the exception type
|
||||
and the target URL."""
|
||||
import a2a_client
|
||||
|
||||
# Subclass Exception with __str__ → "" to simulate the
|
||||
# silent-exception variants without depending on a specific
|
||||
# httpx version's behavior.
|
||||
class _SilentRemoteProtocolError(Exception):
|
||||
def __str__(self) -> str:
|
||||
return ""
|
||||
|
||||
mock_client = _make_mock_client(post_exc=_SilentRemoteProtocolError())
|
||||
|
||||
with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
|
||||
result = await a2a_client.send_a2a_message("http://target/a2a", "task")
|
||||
|
||||
# Must NOT be just the bare prefix — that's the regression.
|
||||
assert result != a2a_client._A2A_ERROR_PREFIX.strip()
|
||||
assert result != f"{a2a_client._A2A_ERROR_PREFIX}"
|
||||
# Must include the class name + something explanatory.
|
||||
assert "_SilentRemoteProtocolError" in result
|
||||
assert "no message" in result.lower()
|
||||
assert "target=http://target/a2a" in result
|
||||
|
||||
async def test_result_text_part_missing_text_key_returns_empty(self):
|
||||
"""Part dict without 'text' key → falls back to '' (empty string returned)."""
|
||||
|
||||
Loading…
Reference in New Issue
Block a user