Merge pull request #42 from Molecule-AI/fix/stderr-capture-a2a-response

fix(runtime): capture stderr in A2A error response (closes #66)
This commit is contained in:
Hongming Wang 2026-04-24 13:25:15 -07:00 committed by GitHub
commit 7fc3537b2f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 71 additions and 8 deletions

View File

@ -66,10 +66,11 @@ _NO_TEXT_MSG = "Error: message contained no text content."
_NO_RESPONSE_MSG = "(no response generated)"
_MAX_RETRIES = 3
_BASE_RETRY_DELAY_S = 5
# Cap for stderr captured from the CLI subprocess in the executor log. Keeps
# log lines bounded while still surfacing enough context to diagnose crashes.
# Fixes #66 (previously the executor logged nothing beyond the generic
# "Check stderr output for details" message).
# Cap for stderr captured from the CLI subprocess in the executor log and
# surfaced in A2A error responses. 1 KB keeps log lines bounded while still
# surfacing enough context to diagnose crashes. Fixes #66 (previously the
# executor logged nothing beyond the generic "Check stderr output for
# details" message).
_PROCESS_ERROR_STDERR_MAX_CHARS = 4096
# Substrings in error messages that indicate a transient failure worth retrying.
@ -454,10 +455,14 @@ class ClaudeSDKExecutor(AgentExecutor):
# Non-retryable or exhausted retries. Log exit_code +
# stderr explicitly (fixes #66) so operators don't have
# to reproduce the crash manually to find out why the
# subprocess died.
# subprocess died. Surface the first ~1 KB of stderr in
# the A2A response so clients can show actionable context.
logger.error("SDK agent error [claude-code]: %s", formatted)
logger.exception("SDK agent error [claude-code] — full traceback follows")
response_text = sanitize_agent_error(exc)
exc_stderr = getattr(exc, "stderr", None)
if exc_stderr:
exc_stderr = exc_stderr[:1024]
response_text = sanitize_agent_error(exc, stderr=exc_stderr)
break
finally:
await set_current_task(self.heartbeat, "")

View File

@ -409,6 +409,7 @@ def classify_subprocess_error(stderr_text: str, exit_code: int | None) -> str:
def sanitize_agent_error(
exc: BaseException | None = None,
category: str | None = None,
stderr: str | None = None,
) -> str:
"""Render an agent-side failure into a user-safe error message.
@ -416,7 +417,12 @@ def sanitize_agent_error(
category string (e.g. from `classify_subprocess_error`). If both are
given, `category` wins. If neither, the tag defaults to "unknown".
The message body is deliberately dropped exception messages and
`stderr` is the first ~1 KB of captured subprocess stderr it is
included verbatim in the returned message so callers can surface it in
A2A error responses. The caller is responsible for the cap; this
function does not truncate.
The message body is deliberately simple exception messages and
subprocess stderr frequently leak stack traces, paths, tokens, and
API keys. Full detail is available in the workspace logs via
`logger.exception()` / `logger.error()`.
@ -427,4 +433,7 @@ def sanitize_agent_error(
tag = type(exc).__name__
else:
tag = "unknown"
return f"Agent error ({tag}) — see workspace logs for details."
msg = f"Agent error ({tag})"
if stderr:
msg += f": {stderr}"
return msg

View File

@ -0,0 +1,49 @@
"""Tests for sanitize_agent_error() — specifically the stderr surface in A2A responses."""
from __future__ import annotations
import pytest
from molecule_runtime.executor_helpers import sanitize_agent_error
class TestSanitizeAgentError:
"""sanitize_agent_error() must surface stderr in A2A error messages."""
def test_plain_error_no_stderr(self):
"""No stderr → simple message, no trailing colon."""
class DummyError(Exception):
pass
result = sanitize_agent_error(exc=DummyError("boom"))
assert result == "Agent error (DummyError)"
def test_stderr_included_in_message(self):
"""stderr= kwarg is included verbatim so A2A callers can show it."""
result = sanitize_agent_error(category="subprocess_error", stderr="You hit your rate limit · resets Apr 17")
assert result == "Agent error (subprocess_error): You hit your rate limit · resets Apr 17"
def test_stderr_truncated_by_caller(self):
"""Caller is responsible for capping; function does not truncate."""
long_stderr = "x" * 5000
result = sanitize_agent_error(category="subprocess_error", stderr=long_stderr)
assert result.endswith(long_stderr)
def test_category_wins_over_exc_type(self):
"""When both category and exc are passed, category is the tag."""
result = sanitize_agent_error(exc=ValueError("oops"), category="rate_limited", stderr="rate limit")
assert "Agent error (rate_limited)" in result
assert "ValueError" not in result
def test_exc_name_used_when_no_category_or_stderr(self):
"""Fallback to exc type name when category is absent."""
class ToolNotFoundError(Exception):
pass
result = sanitize_agent_error(exc=ToolNotFoundError("some tool not found"))
assert result == "Agent error (ToolNotFoundError)"
def test_unknown_tag_when_no_exc_no_category(self):
"""Neither exc nor category → defaults to 'unknown'."""
result = sanitize_agent_error()
assert result == "Agent error (unknown)"