diff --git a/molecule_runtime/claude_sdk_executor.py b/molecule_runtime/claude_sdk_executor.py index 70cc090..ba44690 100644 --- a/molecule_runtime/claude_sdk_executor.py +++ b/molecule_runtime/claude_sdk_executor.py @@ -66,10 +66,11 @@ _NO_TEXT_MSG = "Error: message contained no text content." _NO_RESPONSE_MSG = "(no response generated)" _MAX_RETRIES = 3 _BASE_RETRY_DELAY_S = 5 -# Cap for stderr captured from the CLI subprocess in the executor log. Keeps -# log lines bounded while still surfacing enough context to diagnose crashes. -# Fixes #66 (previously the executor logged nothing beyond the generic -# "Check stderr output for details" message). +# Cap for stderr captured from the CLI subprocess in the executor log and +# surfaced in A2A error responses. 1 KB keeps log lines bounded while still +# surfacing enough context to diagnose crashes. Fixes #66 (previously the +# executor logged nothing beyond the generic "Check stderr output for +# details" message). _PROCESS_ERROR_STDERR_MAX_CHARS = 4096 # Substrings in error messages that indicate a transient failure worth retrying. @@ -454,10 +455,14 @@ class ClaudeSDKExecutor(AgentExecutor): # Non-retryable or exhausted retries. Log exit_code + # stderr explicitly (fixes #66) so operators don't have # to reproduce the crash manually to find out why the - # subprocess died. + # subprocess died. Surface the first ~1 KB of stderr in + # the A2A response so clients can show actionable context. logger.error("SDK agent error [claude-code]: %s", formatted) logger.exception("SDK agent error [claude-code] — full traceback follows") - response_text = sanitize_agent_error(exc) + exc_stderr = getattr(exc, "stderr", None) + if exc_stderr: + exc_stderr = exc_stderr[:1024] + response_text = sanitize_agent_error(exc, stderr=exc_stderr) break finally: await set_current_task(self.heartbeat, "") diff --git a/molecule_runtime/executor_helpers.py b/molecule_runtime/executor_helpers.py index 9d6b993..a789a09 100644 --- a/molecule_runtime/executor_helpers.py +++ b/molecule_runtime/executor_helpers.py @@ -409,6 +409,7 @@ def classify_subprocess_error(stderr_text: str, exit_code: int | None) -> str: def sanitize_agent_error( exc: BaseException | None = None, category: str | None = None, + stderr: str | None = None, ) -> str: """Render an agent-side failure into a user-safe error message. @@ -416,7 +417,12 @@ def sanitize_agent_error( category string (e.g. from `classify_subprocess_error`). If both are given, `category` wins. If neither, the tag defaults to "unknown". - The message body is deliberately dropped — exception messages and + `stderr` is the first ~1 KB of captured subprocess stderr — it is + included verbatim in the returned message so callers can surface it in + A2A error responses. The caller is responsible for the cap; this + function does not truncate. + + The message body is deliberately simple — exception messages and subprocess stderr frequently leak stack traces, paths, tokens, and API keys. Full detail is available in the workspace logs via `logger.exception()` / `logger.error()`. @@ -427,4 +433,7 @@ def sanitize_agent_error( tag = type(exc).__name__ else: tag = "unknown" - return f"Agent error ({tag}) — see workspace logs for details." + msg = f"Agent error ({tag})" + if stderr: + msg += f": {stderr}" + return msg diff --git a/tests/test_executor_helpers.py b/tests/test_executor_helpers.py new file mode 100644 index 0000000..c387357 --- /dev/null +++ b/tests/test_executor_helpers.py @@ -0,0 +1,49 @@ +"""Tests for sanitize_agent_error() — specifically the stderr surface in A2A responses.""" + +from __future__ import annotations + +import pytest + +from molecule_runtime.executor_helpers import sanitize_agent_error + + +class TestSanitizeAgentError: + """sanitize_agent_error() must surface stderr in A2A error messages.""" + + def test_plain_error_no_stderr(self): + """No stderr → simple message, no trailing colon.""" + class DummyError(Exception): + pass + + result = sanitize_agent_error(exc=DummyError("boom")) + assert result == "Agent error (DummyError)" + + def test_stderr_included_in_message(self): + """stderr= kwarg is included verbatim so A2A callers can show it.""" + result = sanitize_agent_error(category="subprocess_error", stderr="You hit your rate limit · resets Apr 17") + assert result == "Agent error (subprocess_error): You hit your rate limit · resets Apr 17" + + def test_stderr_truncated_by_caller(self): + """Caller is responsible for capping; function does not truncate.""" + long_stderr = "x" * 5000 + result = sanitize_agent_error(category="subprocess_error", stderr=long_stderr) + assert result.endswith(long_stderr) + + def test_category_wins_over_exc_type(self): + """When both category and exc are passed, category is the tag.""" + result = sanitize_agent_error(exc=ValueError("oops"), category="rate_limited", stderr="rate limit") + assert "Agent error (rate_limited)" in result + assert "ValueError" not in result + + def test_exc_name_used_when_no_category_or_stderr(self): + """Fallback to exc type name when category is absent.""" + class ToolNotFoundError(Exception): + pass + + result = sanitize_agent_error(exc=ToolNotFoundError("some tool not found")) + assert result == "Agent error (ToolNotFoundError)" + + def test_unknown_tag_when_no_exc_no_category(self): + """Neither exc nor category → defaults to 'unknown'.""" + result = sanitize_agent_error() + assert result == "Agent error (unknown)"