From 5b5eacbb2946f475dae92aae6d4f57ee83c2a3b4 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 5 May 2026 11:47:14 -0700 Subject: [PATCH] test(inbox): clean up daemon poller thread to prevent test cross-talk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test_start_poller_thread_is_daemon spawned a daemon thread with no stop mechanism — the leaked thread polled every 10ms with the test's patched httpx.Client mock STILL ACTIVE for ~50ms after the test scope. Later tests that re-patched httpx.Client + asserted call counts on fetch_and_stage / Client construction got their assertions inflated by the leaked thread's iterations. Symptoms: test_poll_once_skips_chat_upload_row_from_queue saw fetch_and_stage called twice instead of once on Python 3.11 CI; test_batch_fetcher_owns_client_when_not_supplied saw two Client constructions instead of one in the full local suite. Both surfaced only after Phase 5b's BatchFetcher refactor changed the timing window that allowed the leaked thread to fire mid-test. Fix: extend start_poller_thread with an optional stop_event kwarg (backward compatible — production callers pass None and rely on the daemon flag for process-exit cleanup). The test now signals + joins on stop_event before exiting scope, so the thread is gone before any later test patches httpx. Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace/inbox.py | 8 +++++++- workspace/tests/test_inbox.py | 26 ++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/workspace/inbox.py b/workspace/inbox.py index 5e2f02b1..6c131175 100644 --- a/workspace/inbox.py +++ b/workspace/inbox.py @@ -685,6 +685,7 @@ def start_poller_thread( platform_url: str, workspace_id: str, interval: float = POLL_INTERVAL_SECONDS, + stop_event: threading.Event | None = None, ) -> threading.Thread: """Spawn the poller as a daemon thread. Returns the Thread handle. @@ -696,13 +697,18 @@ def start_poller_thread( operator running ``ps -eL`` or eyeballing ``threading.enumerate()`` can tell which thread is which without reverse-engineering it from crash tracebacks. + + Pass ``stop_event`` to enable graceful shutdown — used by tests so + the daemon thread doesn't outlive the test that started it and race + with later tests' httpx patches. Production code passes None and + relies on the daemon flag for process-exit cleanup. """ name = "molecule-mcp-inbox-poller" if workspace_id: name = f"{name}-{workspace_id[:8]}" t = threading.Thread( target=_poll_loop, - args=(state, platform_url, workspace_id, interval), + args=(state, platform_url, workspace_id, interval, stop_event), name=name, daemon=True, ) diff --git a/workspace/tests/test_inbox.py b/workspace/tests/test_inbox.py index d62b2a0a..cbba9a3b 100644 --- a/workspace/tests/test_inbox.py +++ b/workspace/tests/test_inbox.py @@ -555,16 +555,34 @@ def test_poll_once_self_notify_does_not_fire_notification(state: inbox.InboxStat def test_start_poller_thread_is_daemon(state: inbox.InboxState): """Daemon flag is required so the poller dies with the parent process; a non-daemon poller would leak across `claude` restarts - and write to a stale workspace.""" + and write to a stale workspace. + + Stop_event is plumbed so the thread cleans up at the end of the + test instead of leaking into later tests. Without cleanup, the + daemon's ~10ms tick races with later tests that patch httpx.Client + — the leaked thread sees their patched response and runs an + unwanted iteration of _poll_once that double-counts mocked calls + (caught when test_batch_fetcher_owns_client_when_not_supplied + surfaced this on Python 3.11 CI but not 3.13 local). + """ resp = _make_response(200, []) p, _ = _patch_httpx(resp) + stop_event = threading.Event() with p, patch("platform_auth.auth_headers", return_value={}): # Use a very short interval so the loop body runs at least once # before we exit the test. - t = inbox.start_poller_thread(state, "http://platform", "ws-1", interval=0.01) + t = inbox.start_poller_thread( + state, "http://platform", "ws-1", interval=0.01, stop_event=stop_event + ) time.sleep(0.05) - assert t.daemon is True - assert t.is_alive() + assert t.daemon is True + assert t.is_alive() + # Signal shutdown + wait for the thread to actually exit before + # we leave the test scope. Without this join, the leaked thread + # races with later tests' httpx patches. + stop_event.set() + t.join(timeout=2.0) + assert not t.is_alive(), "poller thread did not exit on stop_event" # ---------------------------------------------------------------------------