From 2ee4b67cab181cce883cd9d8151053ea4d90946f Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sat, 25 Apr 2026 08:52:32 -0700 Subject: [PATCH] =?UTF-8?q?chore:=20third-pass=20review=20polish=20?= =?UTF-8?q?=E2=80=94=20empty-stream=20gate=20test=20+=20Callable=20type?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass 3 review came back Approve with two optional polish items. Both taken to fully converge the loop: 1. Regression test for the empty-stream wedge-clear gate (added in 3c4eef49). A degenerate stream that iterates without raising but emits NEITHER an AssistantMessage NOR a ResultMessage must NOT clear the wedge flag — pre-set wedge persists, the next heartbeat still reports runtime_state="wedged". Pins the gate against future regression. 2. Replaced the type annotation `"dict[str, callable[[dict], str]]"` (lowercase `callable`, string-quoted) with the proper `dict[str, Callable[[dict], str]]` using `Callable` from `collections.abc`. Benign before (`from __future__ import annotations` makes the annotation a string Python never evaluates), but pyright/mypy may flag the lowercase form. 65 Python tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace/claude_sdk_executor.py | 4 +-- workspace/tests/test_claude_sdk_executor.py | 35 +++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/workspace/claude_sdk_executor.py b/workspace/claude_sdk_executor.py index a0a89c5d..a05823c8 100644 --- a/workspace/claude_sdk_executor.py +++ b/workspace/claude_sdk_executor.py @@ -29,7 +29,7 @@ import asyncio import logging import os import sys -from collections.abc import AsyncIterator +from collections.abc import AsyncIterator, Callable from dataclasses import dataclass from typing import TYPE_CHECKING, Any @@ -160,7 +160,7 @@ def _reset_sdk_wedge_for_test() -> None: # table falls through to a generic "🛠 (…)" line. Order keys by # tool frequency so a future contributor can see the high-traffic # tools first. -_TOOL_USE_SUMMARIZERS: "dict[str, callable[[dict], str]]" = { +_TOOL_USE_SUMMARIZERS: dict[str, Callable[[dict], str]] = { "Read": lambda i: f"📄 Read {i.get('file_path', '?')}", "Write": lambda i: f"✍️ Write {i.get('file_path', '?')}", "Edit": lambda i: f"✏️ Edit {i.get('file_path', '?')}", diff --git a/workspace/tests/test_claude_sdk_executor.py b/workspace/tests/test_claude_sdk_executor.py index aff8d264..4fa9f1d9 100644 --- a/workspace/tests/test_claude_sdk_executor.py +++ b/workspace/tests/test_claude_sdk_executor.py @@ -1354,3 +1354,38 @@ async def test_execute_clears_wedge_on_successful_query(): assert _executor_mod.wedge_reason() == "" finally: _executor_mod._reset_sdk_wedge_for_test() + + +@pytest.mark.asyncio +async def test_execute_does_not_clear_wedge_on_empty_stream(): + """Regression for the gate added in 3c4eef49: a stream that + iterates without raising but emits NEITHER an AssistantMessage + NOR a ResultMessage (degenerate or stub-driven shape) must NOT + clear the wedge flag. A real successful query yields at least + one of those; treating an empty stream as "recovered" would + falsely flip the workspace back to online without any evidence + the SDK is actually working.""" + _executor_mod._reset_sdk_wedge_for_test() + _executor_mod._mark_sdk_wedged("pre-existing wedge — must not clear on empty stream") + assert _executor_mod.is_wedged() is True + + e = _make_executor() + ctx = _make_context(["test prompt"]) + eq = _make_event_queue() + + async def empty_query(prompt, options): + # Iterator returns without yielding — the degenerate case. + if False: + yield # pragma: no cover + + with patch("claude_sdk_executor.recall_memories", new=AsyncMock(return_value="")), \ + patch("claude_sdk_executor.read_delegation_results", return_value=""), \ + patch("claude_sdk_executor.commit_memory", new=AsyncMock()), \ + patch("claude_sdk_executor.set_current_task", new=AsyncMock()), \ + patch("claude_agent_sdk.query", new=empty_query): + try: + await e.execute(ctx, eq) + assert _executor_mod.is_wedged() is True, \ + "wedge must persist when the stream emitted no content" + finally: + _executor_mod._reset_sdk_wedge_for_test()