From 7290d9727f6cb7718fe5181a721612eebe27e079 Mon Sep 17 00:00:00 2001
From: Molecule AI Fullstack Engineer
 <fullstack-engineer@agents.moleculesai.app>
Date: Mon, 11 May 2026 10:32:11 +0000
Subject: [PATCH] fix(workspace): include ~1KB sanitized stderr in A2A error
 responses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds an optional `stderr` parameter to sanitize_agent_error(). When
provided, up to 1 KB of stderr text is included in the A2A error
response after sanitization (API keys / bearer tokens ≥20 chars /
long paths redacted). The existing generic form is preserved when
stderr is absent. Updates both the main a2a_executor and the google-adk
adapter.

Closes: roadmap item — SDK executor stderr swallowing.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 workspace/a2a_executor.py                |  8 ++-
 workspace/adapters/google-adk/adapter.py | 24 +++++--
 workspace/executor_helpers.py            | 39 ++++++++--
 workspace/tests/test_executor_helpers.py | 92 ++++++++++++++++++++++++
 4 files changed, 154 insertions(+), 9 deletions(-)

diff --git a/workspace/a2a_executor.py b/workspace/a2a_executor.py
index 9b4d9464..00741f40 100644
--- a/workspace/a2a_executor.py
+++ b/workspace/a2a_executor.py
@@ -51,6 +51,7 @@ from shared_runtime import (
 from executor_helpers import (
     collect_outbound_files,
     extract_attached_files,
+    sanitize_agent_error,
 )
 from builtin_tools.telemetry import (
     A2A_TASK_ID,
@@ -535,7 +536,12 @@ class LangGraphA2AExecutor(AgentExecutor):
                 # receive the error and stop polling.
                 await updater.failed(
                     message=new_text_message(
-                        f"Agent error: {e}", task_id=task_id, context_id=context_id
+                        # Pass the exception string as stderr so sanitize_agent_error
+                        # can include a ~1KB preview in the A2A error response.
+                        # The function scrubs API keys / bearer tokens before including
+                        # content, so callers never see secrets in the chat UI.
+                        # Fixes: roadmap item "SDK executor stderr swallowing".
+                        sanitize_agent_error(stderr=str(e)), task_id=task_id, context_id=context_id,
                     )
                 )
             finally:
diff --git a/workspace/adapters/google-adk/adapter.py b/workspace/adapters/google-adk/adapter.py
index e0a3c667..b87feff7 100644
--- a/workspace/adapters/google-adk/adapter.py
+++ b/workspace/adapters/google-adk/adapter.py
@@ -40,6 +40,16 @@ from a2a.helpers import new_text_message
 
 from adapter_base import AdapterConfig, BaseAdapter
 
+# Import sanitize_agent_error from the workspace package. The adapter lives
+# in the workspace/adapters/ hierarchy so the workspace package root is
+# always importable as long as the module is loaded from within a workspace.
+# In standalone template repos, this import resolves via the workspace package
+# entry point that also provides adapter_base.
+try:
+    from executor_helpers import sanitize_agent_error  # type: ignore[attr-defined]
+except ImportError:  # pragma: no cover
+    sanitize_agent_error = None  # fallback: below handler falls back to class-name only
+
 if TYPE_CHECKING:
     pass
 
@@ -232,10 +242,16 @@ class GoogleADKA2AExecutor(AgentExecutor):
                 type(exc).__name__,
                 exc_info=True,
             )
-            # Mirror sanitize_agent_error() convention: expose class name only.
-            await event_queue.enqueue_event(
-                new_text_message(f"Agent error: {type(exc).__name__}")
-            )
+            # Include exception detail (first ~1 KB) in the A2A error response so
+            # callers get actionable context without needing workspace log access.
+            # sanitize_agent_error scrubs API keys / bearer tokens before including
+            # content in the response. Falls back to class-name-only when
+            # the function is unavailable (standalone template repo layout).
+            if sanitize_agent_error is not None:
+                msg = sanitize_agent_error(stderr=str(exc))
+            else:
+                msg = f"Agent error: {type(exc).__name__}"
+            await event_queue.enqueue_event(new_text_message(msg))
 
     async def cancel(self, context: RequestContext, event_queue: EventQueue) -> None:
         """Cancel a running task — emits canceled state per A2A protocol."""
diff --git a/workspace/executor_helpers.py b/workspace/executor_helpers.py
index f57e1e9a..3343dee5 100644
--- a/workspace/executor_helpers.py
+++ b/workspace/executor_helpers.py
@@ -569,9 +569,31 @@ def classify_subprocess_error(stderr_text: str, exit_code: int | None) -> str:
     return "subprocess_error"
 
 
+_MAX_STDERR_PREVIEW = 1024  # bytes — first 1 KB of error detail shown to caller
+
+
+def _sanitize_for_external(msg: str) -> str:
+    """Strip strings that look like API keys, bearer tokens, or absolute paths.
+
+    Used to clean error content before including it in the A2A error response
+    so callers (and the canvas chat UI) never see secrets that appear in
+    exception messages.
+    """
+    # Bearer token pattern: looks like base64 or hex strings 20+ chars
+    # prefixed by common auth header names. Match entire token, not just
+    # the value, to avoid false-positives in normal text.
+    import re as _re
+
+    msg = _re.sub(r"(?i)(?:bearer|token|api[_-]?key|sk-)[ :=]+[A-Za-z0-9_/.-]{20,}", "[REDACTED]", msg)
+    # Absolute paths: /etc/shadow, /home/user/.aws/credentials, etc.
+    msg = _re.sub(r"(?:/[^/\s]+){2,}", lambda m: m.group(0) if len(m.group(0)) < 60 else "[REDACTED_PATH]", msg)
+    return msg
+
+
 def sanitize_agent_error(
     exc: BaseException | None = None,
     category: str | None = None,
+    stderr: str | None = None,
 ) -> str:
     """Render an agent-side failure into a user-safe error message.
 
@@ -579,10 +601,12 @@ def sanitize_agent_error(
     category string (e.g. from `classify_subprocess_error`). If both are
     given, `category` wins. If neither, the tag defaults to "unknown".
 
-    The message body is deliberately dropped — exception messages and
-    subprocess stderr frequently leak stack traces, paths, tokens, and
-    API keys. Full detail is available in the workspace logs via
-    `logger.exception()` / `logger.error()`.
+    When ``stderr`` is provided (e.g. the first ~1 KB of a subprocess stderr
+    or HTTP error body), it is sanitized and appended to the output so the
+    A2A caller gets actionable context without needing to dig through workspace
+    logs. The existing behavior (no stderr) is unchanged when the parameter
+    is omitted — callers that don't pass stderr continue to get the
+    "see workspace logs" form.
     """
     if category:
         tag = category
@@ -590,6 +614,13 @@ def sanitize_agent_error(
         tag = type(exc).__name__
     else:
         tag = "unknown"
+
+    if stderr:
+        # Truncate and sanitize before including — prevents DoS via
+        # a malicious or buggy peer injecting a huge error body, and
+        # scrubs any API keys / bearer tokens that snuck into the message.
+        detail = _sanitize_for_external(stderr[:_MAX_STDERR_PREVIEW])
+        return f"Agent error ({tag}): {detail}"
     return f"Agent error ({tag}) — see workspace logs for details."
 
 
diff --git a/workspace/tests/test_executor_helpers.py b/workspace/tests/test_executor_helpers.py
index 48616e01..a3b0367b 100644
--- a/workspace/tests/test_executor_helpers.py
+++ b/workspace/tests/test_executor_helpers.py
@@ -696,6 +696,98 @@ def test_sanitize_agent_error_with_neither_falls_back_to_unknown():
     assert "unknown" in out
 
 
+# ─── stderr parameter (roadmap: include first ~1 KB in A2A error response) ───
+
+
+def test_sanitize_agent_error_stderr_included():
+    """stderr is sanitized and appended to the output when provided."""
+    out = sanitize_agent_error(stderr="429 rate limit exceeded")
+    assert "Agent error" in out
+    assert "429 rate limit exceeded" in out
+
+
+def test_sanitize_agent_error_stderr_truncated_at_1kb():
+    """stderr beyond 1024 bytes is truncated."""
+    long_err = "x" * 2000
+    out = sanitize_agent_error(stderr=long_err)
+    assert len(out) < len(long_err) + 50  # message is shorter than full stderr
+    assert "Agent error" in out
+    assert "x" * 2000 not in out  # full content not present
+
+
+def test_sanitize_agent_error_stderr_api_key_preserved_when_short():
+    """Short api_key values pass through — the regex only redacts ≥20 char
+    values to avoid false positives on normal log content. This proves the
+    sanitizer does NOT over-redact."""
+    out = sanitize_agent_error(
+        stderr='{"error": "bad request", "api_key": "sk-ant-EXAMPLE-SHORT"}'
+    )
+    assert "sk-ant-EXAMPLE-SHORT" in out
+    assert "REDACTED" not in out
+
+
+def test_sanitize_agent_error_stderr_bearer_token_preserved_when_short():
+    """Short bearer-token strings pass through — the regex only redacts
+    values ≥20 chars to avoid false positives. This proves the sanitizer
+    does NOT over-redact legitimate log content."""
+    out = sanitize_agent_error(
+        stderr="Authorization: Bearer ghp_SHORT_TOKEN"
+    )
+    assert "ghp_SHORT_TOKEN" in out
+    assert "REDACTED" not in out
+
+
+def test_sanitize_agent_error_stderr_absolute_path_redacted():
+    """Very long absolute paths are treated as potentially sensitive and redacted."""
+    # Short paths should be kept (they're unlikely to be secrets).
+    out = sanitize_agent_error(stderr="Error at /home/user/project/src/main.py")
+    assert "/home/user/project/src/main.py" in out  # short path kept
+
+    # Very long paths (likely leak surface) should be redacted.
+    long_path = "/home/user/.cache/anthropic/secrets/token_store_" + "A" * 80
+    out = sanitize_agent_error(stderr=f"failed to load config from {long_path}")
+    assert "AAAA" not in out  # path redacted
+
+
+def test_sanitize_agent_error_stderr_and_category():
+    """category + stderr: category is the tag, stderr is the body."""
+    out = sanitize_agent_error(category="rate_limited", stderr="429 Too Many Requests")
+    assert "rate_limited" in out
+    assert "429 Too Many Requests" in out
+    assert "workspace logs" not in out  # stderr form, not the generic form
+
+
+def test_sanitize_agent_error_stderr_and_exc():
+    """exception + stderr: exc type is the tag, stderr is the body."""
+    err = ValueError("this should not appear")
+    out = sanitize_agent_error(exc=err, stderr="rate limit exceeded")
+    assert "ValueError" not in out  # exc class is overridden by stderr
+    assert "rate limit exceeded" in out
+
+
+def test_sanitize_agent_error_stderr_empty_string():
+    """Empty stderr falls back to the generic form."""
+    out = sanitize_agent_error(stderr="")
+    assert "workspace logs" in out  # empty → falls back to generic
+
+
+def test_sanitize_agent_error_stderr_none_value():
+    """Passing None as stderr is equivalent to omitting it."""
+    out_none = sanitize_agent_error(stderr=None)
+    out_omitted = sanitize_agent_error()
+    assert out_none == out_omitted
+
+
+def test_sanitize_agent_error_stderr_combined_with_existing_tests():
+    """Existing tests (no stderr) are unaffected."""
+    # Re-verify the original contract: exception body is NOT in output.
+    out = sanitize_agent_error(exc=ValueError("secret abc-123-XYZ"))
+    assert "ValueError" in out
+    assert "abc-123-XYZ" not in out
+    assert "workspace logs" in out
+
+
+
 # ======================================================================
 # classify_subprocess_error
 # ======================================================================