diff --git a/workspace-template/claude_sdk_executor.py b/workspace-template/claude_sdk_executor.py index 02f3af4f..1389b0b9 100644 --- a/workspace-template/claude_sdk_executor.py +++ b/workspace-template/claude_sdk_executor.py @@ -82,6 +82,50 @@ _RETRYABLE_PATTERNS = ( ) +_SWALLOWED_STDERR_MARKER = "Check stderr output for details" + + +def _probe_claude_cli_error() -> str | None: + """Run ``claude --print`` directly and capture its stderr + stdout. + + Used as a fallback when the claude-agent-sdk raises a bare ``Exception`` + with the swallowed "Check stderr output for details" placeholder — that + happens when the SDK wraps a stream error from the CLI subprocess and + loses both the ``.stderr`` attribute and the exit code. At that point + the only way to see the real failure reason (rate limit, auth error, + network outage, missing token) is to run the CLI ourselves. + + Bounded by a 30s timeout so a hung CLI can't stall the error path. + Returns None if the probe itself failed (wrong invariant — don't + corrupt the main error message with probe noise). + """ + try: + import subprocess + # --print reads stdin, prints response, exits. Empty stdin gives the + # CLI something to work with without triggering an actual model call + # when it's going to fail anyway. + proc = subprocess.run( + ["claude", "--print"], + input="probe", + capture_output=True, + text=True, + timeout=30, + ) + if proc.returncode == 0: + # CLI succeeded — the original error was a transient state that + # resolved between the SDK failure and our probe. Signal that. + return "" + raw = (proc.stderr or "") + (proc.stdout or "") + raw = raw.strip() + if not raw: + return f"" + if len(raw) > _PROCESS_ERROR_STDERR_MAX_CHARS: + raw = raw[:_PROCESS_ERROR_STDERR_MAX_CHARS] + "... [truncated]" + return raw + except Exception as probe_exc: # pragma: no cover — best-effort diagnostic + return f"" + + def _format_process_error(exc: BaseException) -> str: """Render a Claude-SDK ProcessError (or any ClaudeSDKError) with its full captured context — exit code, stderr, exception type. Plain strings for @@ -92,6 +136,15 @@ def _format_process_error(exc: BaseException) -> str: ProcessError carries `.stderr`/`.exit_code` attributes that the previous code silently discarded, leaving every CLI crash with an identical "Check stderr output for details" message in the workspace log). + + Fixes #160: when the SDK raises a bare ``Exception`` containing the + "Check stderr output for details" placeholder (which happens when the + CLI subprocess emits a stream error the SDK can't categorize — rate + limit, auth, network), there's no ``.stderr``/``.exit_code`` to read. + In that case we fall back to running the CLI ourselves via + ``_probe_claude_cli_error`` so the operator sees the real failure + reason (e.g. ``You've hit your limit · resets Apr 17``) instead of + chasing ghosts in the workspace logs. """ parts = [f"{type(exc).__name__}: {exc}"] exit_code = getattr(exc, "exit_code", None) @@ -103,6 +156,13 @@ def _format_process_error(exc: BaseException) -> str: if len(stderr) > _PROCESS_ERROR_STDERR_MAX_CHARS: trimmed += f"... [{len(stderr) - _PROCESS_ERROR_STDERR_MAX_CHARS} more chars truncated]" parts.append(f"stderr={trimmed!r}") + elif exit_code is None and _SWALLOWED_STDERR_MARKER in str(exc): + # #160: generic exception with the swallowed-stderr placeholder. + # Probe the CLI directly — this is the only way to surface the real + # error when the SDK lost it in translation. + probed = _probe_claude_cli_error() + if probed: + parts.append(f"probed_cli_error={probed!r}") return " | ".join(parts) diff --git a/workspace-template/tests/test_claude_sdk_executor.py b/workspace-template/tests/test_claude_sdk_executor.py index 948fbd64..8a549cec 100644 --- a/workspace-template/tests/test_claude_sdk_executor.py +++ b/workspace-template/tests/test_claude_sdk_executor.py @@ -837,6 +837,87 @@ def test_format_process_error_no_stderr_but_has_exit_code(): assert "stderr" not in out +# --------------------------------------------------------------------------- +# _format_process_error — #160: probe CLI when SDK swallowed the real stderr +# --------------------------------------------------------------------------- + + +def test_format_process_error_probes_cli_when_stderr_swallowed(monkeypatch): + """Generic Exception with "Check stderr output for details" → probe the + claude CLI directly so the real error (rate limit, auth, etc.) surfaces + in the log instead of the useless placeholder text. Fixes #160.""" + from claude_sdk_executor import _format_process_error + + def fake_probe(): + return "You've hit your limit · resets Apr 17, 11pm (UTC)" + + # Patch the probe helper so we don't actually spawn a subprocess in tests + monkeypatch.setattr( + "claude_sdk_executor._probe_claude_cli_error", + fake_probe, + ) + + swallowed = Exception( + "Command failed with exit code 1 (exit code: 1)\n" + "Error output: Check stderr output for details" + ) + out = _format_process_error(swallowed) + assert "probed_cli_error" in out + assert "hit your limit" in out + + +def test_format_process_error_does_not_probe_when_stderr_already_present(): + """If the exception already carries .stderr, we DO NOT run the probe — + that'd be wasted subprocess work and could mask the real stderr with + probe-time state.""" + from claude_sdk_executor import _format_process_error + + # Track whether the probe was called + called = {"value": False} + + def fake_probe(): + called["value"] = True + return "probe ran" + + import claude_sdk_executor + original = claude_sdk_executor._probe_claude_cli_error + claude_sdk_executor._probe_claude_cli_error = fake_probe + try: + class FakeProcessError(Exception): + def __init__(self): + super().__init__("real error") + self.stderr = "real stderr content" + self.exit_code = 1 + + out = _format_process_error(FakeProcessError()) + assert "real stderr content" in out + assert called["value"] is False, "probe should not have been called" + finally: + claude_sdk_executor._probe_claude_cli_error = original + + +def test_format_process_error_does_not_probe_without_swallowed_marker(monkeypatch): + """Only probe when the exception message matches the specific swallowed- + stderr marker. Unrelated plain exceptions (e.g. 'generic failure') should + not trigger a CLI probe — that'd make every error path 30s slower.""" + from claude_sdk_executor import _format_process_error + + called = {"value": False} + + def fake_probe(): + called["value"] = True + return "probe ran" + + monkeypatch.setattr( + "claude_sdk_executor._probe_claude_cli_error", + fake_probe, + ) + + out = _format_process_error(RuntimeError("generic failure")) + assert called["value"] is False + assert "probed_cli_error" not in out + + def test_process_error_reaches_logs_via_execute(caplog): """End-to-end: a ProcessError in query() → executor logs both the formatted summary and the full traceback. Fixes #66 — previously no