From fd0796947f675fd3f1a48f65433e135d8bf2ae79 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 30 Apr 2026 01:34:08 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20stabilize=20CI=20=E2=80=94=20TS=20widen,?= =?UTF-8?q?=20sys.modules=20restore,=20WS=20subscriber=20race=20(#17836)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three narrow fixes targeting the remaining red checks after #17828: 1. ui-tui/src/app/slash/commands/ops.ts (Docker Build): /reload-mcp's local params type annotated session_id: string while ctx.sid is string | null. Widen to string | null — matches every other rpc call site and the test harness which passes { session_id: null }. Fixes TS2322 on line 86. The rpc signature itself is Record, so this is purely a local typing fix, no behavioral change. 2. tests/plugins/test_achievements_plugin.py (13 cascading test failures): _install_fake_session_db did a raw sys.modules['hermes_state'] = fake_module without restoration, leaking the fake across xdist worker boundaries. Downstream tests doing from hermes_state import SessionDB got a module whose SessionDB was lambda: fake_db — 6 test_hermes_state.py tests failed with AttributeError: 'function' object has no attribute '_sanitize_fts5_query' / _contains_cjk, and 7 test_860_dedup.py tests failed with TypeError: got unexpected keyword argument 'db_path' (real code calls SessionDB(db_path=...)). Fix: stash monkeypatch on the plugin_api module object in the fixture, and have the helper do monkeypatch.setitem(sys.modules, 'hermes_state', fake_module) for auto-restoration at test teardown. 3. tests/hermes_cli/test_web_server.py (WS race): TestPtyWebSocket::test_pub_broadcasts_to_events_subscribers hit the 30s test timeout on CI. websocket_connect returns after ws.accept() — but /api/events registers the subscriber in _event_channels on the NEXT await (inside _event_lock). A publish immediately after connect could race ahead of registration and be dropped, and the subsequent receive_text() blocked until SIGALRM killed the test. Fix: poll _event_channels after the subscriber connects, before publishing. Validation: scripts/run_tests.sh tests/plugins/test_achievements_plugin.py tests/run_agent/test_860_dedup.py tests/test_hermes_state.py tests/hermes_cli/test_web_server.py 338 passed cd ui-tui && npm run type-check clean cd ui-tui && npm run build clean Remaining red checks are pure infra (Nix ubuntu hits TwirpErrorResponse ResourceExhausted on the GH Actions cache API; Nix macos bounces between npm build openssl-legacy and cache rate-limits) and cannot be fixed in the codebase. --- tests/hermes_cli/test_web_server.py | 17 +++++++++++++++++ tests/plugins/test_achievements_plugin.py | 15 +++++++++++++-- ui-tui/src/app/slash/commands/ops.ts | 2 +- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py index 1b32934c..0093dfb9 100644 --- a/tests/hermes_cli/test_web_server.py +++ b/tests/hermes_cli/test_web_server.py @@ -1957,13 +1957,30 @@ class TestPtyWebSocket: def test_pub_broadcasts_to_events_subscribers(self, monkeypatch): """Frame written to /api/pub is rebroadcast verbatim to every /api/events subscriber on the same channel.""" + import time from urllib.parse import urlencode + from hermes_cli import web_server as ws_mod qs = urlencode({"token": self.token, "channel": "broadcast-test"}) pub_path = f"/api/pub?{qs}" sub_path = f"/api/events?{qs}" with self.client.websocket_connect(sub_path) as sub: + # Wait for the subscriber to be registered on the server side. + # websocket_connect returns when ws.accept() completes, but the + # server adds us to ``_event_channels`` in a follow-up await, + # so a publish immediately after connect can race ahead of the + # subscriber registration and the message is dropped. + deadline = time.monotonic() + 5.0 + while time.monotonic() < deadline: + if ws_mod._event_channels.get("broadcast-test"): + break + time.sleep(0.01) + else: + raise AssertionError( + "subscriber did not register on channel within 5s" + ) + with self.client.websocket_connect(pub_path) as pub: pub.send_text('{"type":"tool.start","payload":{"tool_id":"t1"}}') received = sub.receive_text() diff --git a/tests/plugins/test_achievements_plugin.py b/tests/plugins/test_achievements_plugin.py index c12503ac..782aea7b 100644 --- a/tests/plugins/test_achievements_plugin.py +++ b/tests/plugins/test_achievements_plugin.py @@ -50,6 +50,12 @@ def plugin_api(tmp_path, monkeypatch): ) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) + # Stash monkeypatch so ``_install_fake_session_db`` can use it to + # swap ``sys.modules['hermes_state']`` with auto-restoration. Without + # this, a raw ``sys.modules[...] = fake`` assignment would leak the + # fake into later tests in the same xdist worker — breaking every + # test that does ``from hermes_state import SessionDB``. + module._test_monkeypatch = monkeypatch yield module @@ -107,10 +113,15 @@ class _FakeSessionDB: def _install_fake_session_db(plugin_api, fake_db): - """Inject a fake SessionDB so ``scan_sessions`` finds it via its local import.""" + """Inject a fake SessionDB so ``scan_sessions`` finds it via its local import. + + Uses the monkeypatch stashed on ``plugin_api`` by the fixture, so the + ``sys.modules['hermes_state']`` swap is auto-restored at test teardown + and cannot leak into unrelated tests in the same xdist worker. + """ fake_module = type(sys)("hermes_state") fake_module.SessionDB = lambda: fake_db - sys.modules["hermes_state"] = fake_module + plugin_api._test_monkeypatch.setitem(sys.modules, "hermes_state", fake_module) def test_scan_sessions_default_scans_all_history_not_first_200(plugin_api): diff --git a/ui-tui/src/app/slash/commands/ops.ts b/ui-tui/src/app/slash/commands/ops.ts index 2bf262ee..ad9f3e94 100644 --- a/ui-tui/src/app/slash/commands/ops.ts +++ b/ui-tui/src/app/slash/commands/ops.ts @@ -82,7 +82,7 @@ export const opsCommands: SlashCommand[] = [ // Parse arg: `now` / `always` skip the confirmation gate. // `always` additionally persists approvals.mcp_reload_confirm=false. const a = (arg || '').trim().toLowerCase() - const params: { session_id: string; confirm?: boolean; always?: boolean } = { + const params: { session_id: string | null; confirm?: boolean; always?: boolean } = { session_id: ctx.sid } if (a === 'now' || a === 'approve' || a === 'once' || a === 'yes') {