fix(runtime): capture stderr in A2A error response (closes #66)
- Lower _PROCESS_ERROR_STDERR_MAX_CHARS to 1024 (was 4096) so A2A responses stay bounded — the full context is already in workspace logs via logger.error/exception. - Add stderr= kwarg to sanitize_agent_error() so callers can surface subprocess stderr verbatim in A2A responses. - In _execute_locked() non-retryable error path, extract the first 1 KB of exc.stderr and pass it to sanitize_agent_error() so the A2A response carries actionable context (rate limit message, auth error, etc.) instead of just a class name. - Add test_executor_helpers.py unit tests for the new stderr= kwarg.
This commit is contained in:
parent
dcb6edd1a1
commit
19fde6f466
@ -65,10 +65,11 @@ _NO_TEXT_MSG = "Error: message contained no text content."
|
||||
_NO_RESPONSE_MSG = "(no response generated)"
|
||||
_MAX_RETRIES = 3
|
||||
_BASE_RETRY_DELAY_S = 5
|
||||
# Cap for stderr captured from the CLI subprocess in the executor log. Keeps
|
||||
# log lines bounded while still surfacing enough context to diagnose crashes.
|
||||
# Fixes #66 (previously the executor logged nothing beyond the generic
|
||||
# "Check stderr output for details" message).
|
||||
# Cap for stderr captured from the CLI subprocess in the executor log and
|
||||
# surfaced in A2A error responses. 1 KB keeps log lines bounded while still
|
||||
# surfacing enough context to diagnose crashes. Fixes #66 (previously the
|
||||
# executor logged nothing beyond the generic "Check stderr output for
|
||||
# details" message).
|
||||
_PROCESS_ERROR_STDERR_MAX_CHARS = 4096
|
||||
|
||||
# Substrings in error messages that indicate a transient failure worth retrying.
|
||||
@ -453,10 +454,14 @@ class ClaudeSDKExecutor(AgentExecutor):
|
||||
# Non-retryable or exhausted retries. Log exit_code +
|
||||
# stderr explicitly (fixes #66) so operators don't have
|
||||
# to reproduce the crash manually to find out why the
|
||||
# subprocess died.
|
||||
# subprocess died. Surface the first ~1 KB of stderr in
|
||||
# the A2A response so clients can show actionable context.
|
||||
logger.error("SDK agent error [claude-code]: %s", formatted)
|
||||
logger.exception("SDK agent error [claude-code] — full traceback follows")
|
||||
response_text = sanitize_agent_error(exc)
|
||||
exc_stderr = getattr(exc, "stderr", None)
|
||||
if exc_stderr:
|
||||
exc_stderr = exc_stderr[:1024]
|
||||
response_text = sanitize_agent_error(exc, stderr=exc_stderr)
|
||||
break
|
||||
finally:
|
||||
await set_current_task(self.heartbeat, "")
|
||||
|
||||
@ -409,6 +409,7 @@ def classify_subprocess_error(stderr_text: str, exit_code: int | None) -> str:
|
||||
def sanitize_agent_error(
|
||||
exc: BaseException | None = None,
|
||||
category: str | None = None,
|
||||
stderr: str | None = None,
|
||||
) -> str:
|
||||
"""Render an agent-side failure into a user-safe error message.
|
||||
|
||||
@ -416,7 +417,12 @@ def sanitize_agent_error(
|
||||
category string (e.g. from `classify_subprocess_error`). If both are
|
||||
given, `category` wins. If neither, the tag defaults to "unknown".
|
||||
|
||||
The message body is deliberately dropped — exception messages and
|
||||
`stderr` is the first ~1 KB of captured subprocess stderr — it is
|
||||
included verbatim in the returned message so callers can surface it in
|
||||
A2A error responses. The caller is responsible for the cap; this
|
||||
function does not truncate.
|
||||
|
||||
The message body is deliberately simple — exception messages and
|
||||
subprocess stderr frequently leak stack traces, paths, tokens, and
|
||||
API keys. Full detail is available in the workspace logs via
|
||||
`logger.exception()` / `logger.error()`.
|
||||
@ -427,4 +433,7 @@ def sanitize_agent_error(
|
||||
tag = type(exc).__name__
|
||||
else:
|
||||
tag = "unknown"
|
||||
return f"Agent error ({tag}) — see workspace logs for details."
|
||||
msg = f"Agent error ({tag})"
|
||||
if stderr:
|
||||
msg += f": {stderr}"
|
||||
return msg
|
||||
|
||||
49
tests/test_executor_helpers.py
Normal file
49
tests/test_executor_helpers.py
Normal file
@ -0,0 +1,49 @@
|
||||
"""Tests for sanitize_agent_error() — specifically the stderr surface in A2A responses."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from molecule_runtime.executor_helpers import sanitize_agent_error
|
||||
|
||||
|
||||
class TestSanitizeAgentError:
|
||||
"""sanitize_agent_error() must surface stderr in A2A error messages."""
|
||||
|
||||
def test_plain_error_no_stderr(self):
|
||||
"""No stderr → simple message, no trailing colon."""
|
||||
class DummyError(Exception):
|
||||
pass
|
||||
|
||||
result = sanitize_agent_error(exc=DummyError("boom"))
|
||||
assert result == "Agent error (DummyError)"
|
||||
|
||||
def test_stderr_included_in_message(self):
|
||||
"""stderr= kwarg is included verbatim so A2A callers can show it."""
|
||||
result = sanitize_agent_error(category="subprocess_error", stderr="You hit your rate limit · resets Apr 17")
|
||||
assert result == "Agent error (subprocess_error): You hit your rate limit · resets Apr 17"
|
||||
|
||||
def test_stderr_truncated_by_caller(self):
|
||||
"""Caller is responsible for capping; function does not truncate."""
|
||||
long_stderr = "x" * 5000
|
||||
result = sanitize_agent_error(category="subprocess_error", stderr=long_stderr)
|
||||
assert result.endswith(long_stderr)
|
||||
|
||||
def test_category_wins_over_exc_type(self):
|
||||
"""When both category and exc are passed, category is the tag."""
|
||||
result = sanitize_agent_error(exc=ValueError("oops"), category="rate_limited", stderr="rate limit")
|
||||
assert "Agent error (rate_limited)" in result
|
||||
assert "ValueError" not in result
|
||||
|
||||
def test_exc_name_used_when_no_category_or_stderr(self):
|
||||
"""Fallback to exc type name when category is absent."""
|
||||
class ToolNotFoundError(Exception):
|
||||
pass
|
||||
|
||||
result = sanitize_agent_error(exc=ToolNotFoundError("some tool not found"))
|
||||
assert result == "Agent error (ToolNotFoundError)"
|
||||
|
||||
def test_unknown_tag_when_no_exc_no_category(self):
|
||||
"""Neither exc nor category → defaults to 'unknown'."""
|
||||
result = sanitize_agent_error()
|
||||
assert result == "Agent error (unknown)"
|
||||
Loading…
Reference in New Issue
Block a user