fix(ci): stabilize main test suite regressions (#17660)

* fix: stabilize main test suite regressions * test(agent): update MiniMax normalization expectation * test: stabilize remaining CI assertions * test: harden config helper monkeypatching * test: harden CI-only assertions * fix(agent): propagate fast streaming interrupts
2026-04-29 23:18:55 -07:00 · 2026-04-29 23:18:55 -07:00 · f73364b1c4
commit f73364b1c4
parent e7beaaf184
37 changed files with 450 additions and 127 deletions
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@ -608,7 +608,7 @@ class CopilotACPClient:
                    end = start + limit if isinstance(limit, int) and limit > 0 else None
                    content = "".join(lines[start:end])
                if content:
-                    content = redact_sensitive_text(content)
+                    content = redact_sensitive_text(content, force=True)
                response = {
                    "jsonrpc": "2.0",
                    "id": message_id,
--- a/agent/redact.py
+++ b/agent/redact.py
@ -305,11 +305,13 @@ def _redact_form_body(text: str) -> str:
    return _redact_query_string(text.strip())


-def redact_sensitive_text(text: str) -> str:
+def redact_sensitive_text(text: str, *, force: bool = False) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
    Disabled by default — enable via security.redact_secrets: true in config.yaml.
+    Set force=True for safety boundaries that must never return raw secrets
+    regardless of the user's global logging redaction preference.
    """
    if text is None:
        return None
@ -317,7 +319,7 @@ def redact_sensitive_text(text: str) -> str:
        text = str(text)
    if not text:
        return text
-    if not _REDACT_ENABLED:
+    if not (force or _REDACT_ENABLED):
        return text

    # Known prefixes (sk-, ghp_, etc.)
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -1815,11 +1815,19 @@ class BasePlatformAdapter(ABC):
                if stop_event is None:
                    await asyncio.sleep(interval)
                    continue
-                try:
-                    await asyncio.wait_for(stop_event.wait(), timeout=interval)
-                except asyncio.TimeoutError:
-                    continue
-                return
+                loop = asyncio.get_running_loop()
+                deadline = loop.time() + interval
+                while not stop_event.is_set():
+                    remaining = deadline - loop.time()
+                    if remaining <= 0:
+                        break
+                    # Poll instead of wait_for(stop_event.wait()).  Cancelling
+                    # wait_for while it owns the inner Event.wait task can leave
+                    # shutdown paths stuck awaiting the typing task on Python
+                    # 3.11/pytest-asyncio; sleep cancellation is immediate.
+                    await asyncio.sleep(min(0.25, remaining))
+                if stop_event.is_set():
+                    return
        except asyncio.CancelledError:
            pass  # Normal cancellation when handler completes
        finally:
@ -2429,6 +2437,16 @@ class BasePlatformAdapter(ABC):
                **_keep_typing_kwargs,
            )
        )
+
+        async def _stop_typing_task() -> None:
+            typing_task.cancel()
+            try:
+                await asyncio.wait_for(asyncio.shield(typing_task), timeout=0.5)
+            except (asyncio.CancelledError, asyncio.TimeoutError):
+                # Cancellation cleanup must not block adapter shutdown.  The
+                # typing task is already cancelled; if the parent task is also
+                # cancelling, let this message-processing task unwind now.
+                pass
        
        try:
            await self._run_processing_hook("on_processing_start", event)
@ -2651,11 +2669,7 @@ class BasePlatformAdapter(ABC):
                _active = self._active_sessions.get(session_key)
                if _active is not None:
                    _active.clear()
-                typing_task.cancel()
-                try:
-                    await typing_task
-                except asyncio.CancelledError:
-                    pass
+                await _stop_typing_task()
                # Process pending message in new background task
                await self._process_message_background(pending_event, session_key)
                return  # Already cleaned up
@ -2703,11 +2717,7 @@ class BasePlatformAdapter(ABC):
                except Exception:
                    pass
            # Stop typing indicator
-            typing_task.cancel()
-            try:
-                await typing_task
-            except asyncio.CancelledError:
-                pass
+            await _stop_typing_task()
            # Also cancel any platform-level persistent typing tasks (e.g. Discord)
            # that may have been recreated by _keep_typing after the last stop_typing()
            try:
--- a/gateway/run.py
+++ b/gateway/run.py
@ -10530,10 +10530,26 @@ class GatewayRunner:

        # Tool progress mode — resolved per-platform with env var fallback
        _resolved_tp = resolve_display_setting(user_config, platform_key, "tool_progress")
+        _env_tp = os.getenv("HERMES_TOOL_PROGRESS_MODE")
+        _display_cfg = display_config if isinstance(display_config, dict) else {}
+        _platforms_cfg = _display_cfg.get("platforms") or {}
+        _platform_cfg = _platforms_cfg.get(platform_key) or {}
+        _legacy_tp_overrides = _display_cfg.get("tool_progress_overrides") or {}
+        _tool_progress_configured = (
+            "tool_progress" in _display_cfg
+            or (
+                isinstance(_platform_cfg, dict)
+                and "tool_progress" in _platform_cfg
+            )
+            or (
+                isinstance(_legacy_tp_overrides, dict)
+                and platform_key in _legacy_tp_overrides
+            )
+        )
        progress_mode = (
-            _resolved_tp
-            or os.getenv("HERMES_TOOL_PROGRESS_MODE")
-            or "all"
+            _env_tp
+            if _env_tp and not _tool_progress_configured
+            else (_resolved_tp or _env_tp or "all")
        )
        # Disable tool progress for webhooks - they don't support message editing,
        # so each progress line would be sent as a separate message.
--- a/gateway/session.py
+++ b/gateway/session.py
@ -62,6 +62,7 @@ from .config import (
 )
 from .whatsapp_identity import (
    canonical_whatsapp_identifier,
+    normalize_whatsapp_identifier,  # noqa: F401 - re-exported for gateway.session callers
 )
 from utils import atomic_replace

--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@ -78,6 +78,14 @@ def _system_package_install_cmd(pkg: str) -> str:
    return f"sudo apt install {pkg}"


+def _safe_which(cmd: str) -> str | None:
+    """shutil.which wrapper resilient to platform monkeypatching in tests."""
+    try:
+        return shutil.which(cmd)
+    except Exception:
+        return None
+
+
 def _termux_browser_setup_steps(node_installed: bool) -> list[str]:
    steps: list[str] = []
    step = 1
@ -579,7 +587,7 @@ def run_doctor(args):
    except Exception as e:
        check_warn("Auth provider status", f"(could not check: {e})")

-    if shutil.which("codex"):
+    if _safe_which("codex"):
        check_ok("codex CLI")
    else:
        # Native OAuth uses Hermes' own device-code flow — the Codex CLI is
@ -797,13 +805,13 @@ def run_doctor(args):
    print(color("◆ External Tools", Colors.CYAN, Colors.BOLD))
    
    # Git
-    if shutil.which("git"):
+    if _safe_which("git"):
        check_ok("git")
    else:
        check_warn("git not found", "(optional)")
    
    # ripgrep (optional, for faster file search)
-    if shutil.which("rg"):
+    if _safe_which("rg"):
        check_ok("ripgrep (rg)", "(faster file search)")
    else:
        check_warn("ripgrep (rg) not found", "(file search uses grep fallback)")
@ -812,7 +820,7 @@ def run_doctor(args):
    # Docker (optional)
    terminal_env = os.getenv("TERMINAL_ENV", "local")
    if terminal_env == "docker":
-        if shutil.which("docker"):
+        if _safe_which("docker"):
            # Check if docker daemon is running
            try:
                result = subprocess.run(["docker", "info"], capture_output=True, timeout=10)
@ -827,7 +835,7 @@ def run_doctor(args):
            check_fail("docker not found", "(required for TERMINAL_ENV=docker)")
            issues.append("Install Docker or change TERMINAL_ENV")
    else:
-        if shutil.which("docker"):
+        if _safe_which("docker"):
            check_ok("docker", "(optional)")
        else:
            if _is_termux():
@ -918,7 +926,7 @@ def run_doctor(args):
            check_info("Vercel persistence: ephemeral filesystem")

    # Node.js + agent-browser (for browser automation tools)
-    if shutil.which("node"):
+    if _safe_which("node"):
        check_ok("Node.js")
        # Check if agent-browser is installed
        agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser"
@ -944,7 +952,7 @@ def run_doctor(args):
            check_warn("Node.js not found", "(optional, needed for browser tools)")
    
    # npm audit for all Node.js packages
-    if shutil.which("npm"):
+    if _safe_which("npm"):
        npm_dirs = [
            (PROJECT_ROOT, "Browser tools (agent-browser)"),
            (PROJECT_ROOT / "scripts" / "whatsapp-bridge", "WhatsApp bridge"),
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -5398,7 +5398,7 @@ def _warn_stale_dashboard_processes() -> None:
                capture_output=True, text=True, timeout=10,
            )
            if result.returncode == 0:
-                for line in result.stdout.split("\n"):
+                for line in getattr(result, "stdout", "").split("\n"):
                    stripped = line.strip()
                    if not stripped or "grep" in stripped:
                        continue
--- a/run_agent.py
+++ b/run_agent.py
@ -6531,6 +6531,9 @@ class AIAgent:
        Falls back to _interruptible_api_call on provider errors indicating
        streaming is not supported.
        """
+        if self._interrupt_requested:
+            raise InterruptedError("Agent interrupted before streaming API call")
+
        if self.api_mode == "codex_responses":
            # Codex streams internally via _run_codex_stream. The main dispatch
            # in _interruptible_api_call already calls it; we just need to
@ -7189,6 +7192,12 @@ class AIAgent:
                        # to non-streaming on the next attempt via _disable_streaming.
                        result["error"] = e
                        return
+            except InterruptedError as e:
+                # The interrupt may be noticed inside the worker thread before
+                # the polling loop sees it. Surface it through the normal result
+                # channel so callers never miss a fast pre-retry interrupt.
+                result["error"] = e
+                return
            finally:
                request_client = request_client_holder.get("client")
                if request_client is not None:
--- a/tests/agent/test_minimax_provider.py
+++ b/tests/agent/test_minimax_provider.py
@ -308,14 +308,14 @@ class TestMinimaxPreserveDots:
        from agent.anthropic_adapter import normalize_model_name
        assert normalize_model_name("MiniMax-M2.7", preserve_dots=True) == "MiniMax-M2.7"

-    def test_normalize_converts_without_preserve(self):
+    def test_normalize_preserves_non_anthropic_dots_without_preserve(self):
        from agent.anthropic_adapter import normalize_model_name
-        # Post-#17171, dots are only converted to hyphens for claude-*/anthropic-*
-        # model names. Non-Anthropic models (including MiniMax) keep their dots
-        # even when preserve_dots=False — that's the fix this test was written
-        # against the inverse of, so just assert the new invariant directly.
+        # Non-Anthropic model families use dots as canonical version separators;
+        # only Claude/Anthropic names are hyphen-normalized by default.
        assert normalize_model_name("MiniMax-M2.7", preserve_dots=False) == "MiniMax-M2.7"
-        # Claude models still get dotted→hyphenated when preserve_dots=False.
+
+    def test_normalize_still_converts_claude_dots_without_preserve(self):
+        from agent.anthropic_adapter import normalize_model_name
        assert normalize_model_name("claude-opus-4.6", preserve_dots=False) == "claude-opus-4-6"


--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -20,6 +20,7 @@ test runner at ``scripts/run_tests.sh``.
 """

 import asyncio
+import logging
 import os
 import re
 import signal
@ -174,7 +175,10 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
    "HERMES_SESSION_KEY",
    "HERMES_GATEWAY_SESSION",
    "HERMES_PLATFORM",
+    "HERMES_MODEL",
+    "HERMES_INFERENCE_MODEL",
    "HERMES_INFERENCE_PROVIDER",
+    "HERMES_TUI_PROVIDER",
    "HERMES_MANAGED",
    "HERMES_DEV",
    "HERMES_CONTAINER",
@ -184,6 +188,14 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
    "HERMES_BACKGROUND_NOTIFICATIONS",
    "HERMES_EXEC_ASK",
    "HERMES_HOME_MODE",
+    "TERMINAL_CWD",
+    "TERMINAL_ENV",
+    "TERMINAL_VERCEL_RUNTIME",
+    "TERMINAL_CONTAINER_CPU",
+    "TERMINAL_CONTAINER_DISK",
+    "TERMINAL_CONTAINER_MEMORY",
+    "TERMINAL_CONTAINER_PERSISTENT",
+    "TERMINAL_DOCKER_RUN_AS_HOST_USER",
    "BROWSER_CDP_URL",
    "CAMOFOX_URL",
    # Platform allowlists — not credentials, but if set from any source
@ -326,6 +338,14 @@ def _reset_module_state():
    that don't exist yet (test collection before production import) are
    skipped silently — production import later creates fresh empty state.
    """
+    # --- logging — quiet/one-shot paths mutate process-global logger state ---
+    logging.disable(logging.NOTSET)
+    for _logger_name in ("tools", "run_agent", "trajectory_compressor", "cron", "hermes_cli"):
+        _logger = logging.getLogger(_logger_name)
+        _logger.disabled = False
+        _logger.setLevel(logging.NOTSET)
+        _logger.propagate = True
+
    # --- tools.approval — the single biggest source of cross-test pollution ---
    try:
        from tools import approval as _approval_mod
@ -380,6 +400,26 @@ def _reset_module_state():
    except Exception:
        pass

+    # --- tools.terminal_tool — active environment/cwd cache ---
+    # File tools prefer a live terminal cwd when one is cached for the task.
+    # Clear terminal environments between tests so a prior terminal call can't
+    # override TERMINAL_CWD in path-resolution tests.
+    try:
+        from tools import terminal_tool as _term_mod
+        _envs_to_cleanup = []
+        with _term_mod._env_lock:
+            _envs_to_cleanup = list(_term_mod._active_environments.values())
+            _term_mod._active_environments.clear()
+            _term_mod._last_activity.clear()
+            _term_mod._creation_locks.clear()
+        for _env in _envs_to_cleanup:
+            try:
+                _env.cleanup()
+            except Exception:
+                pass
+    except Exception:
+        pass
+
    # --- tools.credential_files — ContextVar<dict> ---
    try:
        from tools import credential_files as _credf_mod
--- a/tests/gateway/test_matrix.py
+++ b/tests/gateway/test_matrix.py
@ -1276,9 +1276,10 @@ class TestMatrixUploadAndSend:
        mock_client.send_message_event = AsyncMock(return_value="$event")
        adapter._client = mock_client

-        result = await adapter._upload_and_send(
-            "!room:example.org", b"secret", "secret.txt", "text/plain", "m.file",
-        )
+        with patch.dict("sys.modules", _make_fake_mautrix()):
+            result = await adapter._upload_and_send(
+                "!room:example.org", b"secret", "secret.txt", "text/plain", "m.file",
+            )

        assert result.success is True
        # Should have uploaded ciphertext, not plaintext
--- a/tests/hermes_cli/test_auth_commands.py
+++ b/tests/hermes_cli/test_auth_commands.py
@ -1470,6 +1470,8 @@ def test_credential_sources_registry_has_expected_steps():
        "~/.hermes/.anthropic_oauth.json",
        "auth.json providers.nous",
        "auth.json providers.openai-codex + ~/.codex/auth.json",
+        "auth.json providers.minimax-oauth",
+        "~/.qwen/oauth_creds.json",
        "Custom provider config.yaml api_key field",
    }
    missing = required - set(descriptions)
--- a/tests/hermes_cli/test_claw.py
+++ b/tests/hermes_cli/test_claw.py
@ -526,6 +526,11 @@ class TestCmdMigrate:
 class TestCmdCleanup:
    """Test the cleanup command handler."""

+    @pytest.fixture(autouse=True)
+    def _mock_openclaw_running(self):
+        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=[]):
+            yield
+
    def test_no_dirs_found(self, tmp_path, capsys):
        args = Namespace(source=None, dry_run=False, yes=False)
        with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[]):
--- a/tests/hermes_cli/test_config_env_expansion.py
+++ b/tests/hermes_cli/test_config_env_expansion.py
@ -72,7 +72,10 @@ class TestLoadConfigExpansion:

        monkeypatch.setenv("GOOGLE_API_KEY", "gsk-test-key")
        monkeypatch.setenv("TELEGRAM_BOT_TOKEN", "1234567:ABC-token")
-        monkeypatch.setattr("hermes_cli.config.get_config_path", lambda: config_file)
+        # Patch the imported function's own globals. Other tests may reload
+        # hermes_cli.config, making string-target monkeypatches hit a different
+        # module object than this collection-time imported load_config().
+        monkeypatch.setitem(load_config.__globals__, "get_config_path", lambda: config_file)

        config = load_config()

@ -86,7 +89,7 @@ class TestLoadConfigExpansion:
        config_file.write_text(config_yaml)

        monkeypatch.delenv("NOT_SET_XYZ_123", raising=False)
-        monkeypatch.setattr("hermes_cli.config.get_config_path", lambda: config_file)
+        monkeypatch.setitem(load_config.__globals__, "get_config_path", lambda: config_file)

        config = load_config()

--- a/tests/hermes_cli/test_container_aware_cli.py
+++ b/tests/hermes_cli/test_container_aware_cli.py
@ -105,7 +105,7 @@ def test_get_container_exec_info_defaults():
        )

        with patch("hermes_constants.is_container", return_value=False), \
-             patch("hermes_cli.config.get_hermes_home", return_value=hermes_home), \
+             patch.dict(get_container_exec_info.__globals__, {"get_hermes_home": lambda: hermes_home}), \
             patch.dict(os.environ, {}, clear=False):
            os.environ.pop("HERMES_DEV", None)
            info = get_container_exec_info()
--- a/tests/hermes_cli/test_gateway_service.py
+++ b/tests/hermes_cli/test_gateway_service.py
@ -255,7 +255,8 @@ class TestLaunchdServiceRecovery:
        target = f"{domain}/{label}"

        def fake_run(cmd, check=False, **kwargs):
-            calls.append(cmd)
+            if cmd and cmd[0] == "launchctl":
+                calls.append(cmd)
            if cmd == ["launchctl", "kickstart", target] and calls.count(cmd) == 1:
                raise gateway_cli.subprocess.CalledProcessError(3, cmd, stderr="Could not find service")
            return SimpleNamespace(returncode=0, stdout="", stderr="")
@ -282,7 +283,8 @@ class TestLaunchdServiceRecovery:
        target = f"{domain}/{label}"

        def fake_run(cmd, check=False, **kwargs):
-            calls.append(cmd)
+            if cmd and cmd[0] == "launchctl":
+                calls.append(cmd)
            if cmd == ["launchctl", "kickstart", target] and calls.count(cmd) == 1:
                raise gateway_cli.subprocess.CalledProcessError(113, cmd, stderr="Could not find service")
            return SimpleNamespace(returncode=0, stdout="", stderr="")
--- a/tests/hermes_cli/test_pty_bridge.py
+++ b/tests/hermes_cli/test_pty_bridge.py
@ -96,10 +96,17 @@ class TestPtyBridgeIO:
@skip_on_windows
 class TestPtyBridgeResize:
    def test_resize_updates_child_winsize(self):
-        # tput reads COLUMNS/LINES from the TTY ioctl (TIOCGWINSZ).
-        # Spawn a shell, resize, then ask tput for the dimensions.
+        # Query the TTY ioctl directly instead of using tput, which requires
+        # TERM and fails in GitHub Actions' non-interactive environment.
+        winsize_script = (
+            "import fcntl, struct, termios, time; "
+            "time.sleep(0.1); "
+            "rows, cols, *_ = struct.unpack('HHHH', "
+            "fcntl.ioctl(0, termios.TIOCGWINSZ, b'\\0' * 8)); "
+            "print(cols); print(rows)"
+        )
        bridge = PtyBridge.spawn(
-            ["/bin/sh", "-c", "sleep 0.1; tput cols; tput lines"],
+            [sys.executable, "-c", winsize_script],
            cols=80,
            rows=24,
        )
--- a/tests/hermes_cli/test_setup.py
+++ b/tests/hermes_cli/test_setup.py
@ -30,6 +30,17 @@ def _clear_provider_env(monkeypatch):
        monkeypatch.delenv(key, raising=False)


+def _clear_vercel_env(monkeypatch):
+    for key in (
+        "TERMINAL_VERCEL_RUNTIME",
+        "VERCEL_OIDC_TOKEN",
+        "VERCEL_TOKEN",
+        "VERCEL_PROJECT_ID",
+        "VERCEL_TEAM_ID",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+
 def _stub_tts(monkeypatch):
    """Stub out TTS prompts so setup_model_provider doesn't block."""
    monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda q, c, d=0: (
@ -485,6 +496,7 @@ def test_modal_setup_persists_direct_mode_when_user_chooses_their_own_account(tm

 def test_vercel_setup_configures_access_token_auth(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_vercel_env(monkeypatch)
    monkeypatch.setenv("VERCEL_OIDC_TOKEN", "old-oidc")
    monkeypatch.setitem(sys.modules, "vercel", types.ModuleType("vercel"))
    config = load_config()
@ -515,13 +527,7 @@ def test_vercel_setup_configures_access_token_auth(tmp_path, monkeypatch):

 def test_vercel_setup_prefills_project_and_team_from_link_file(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    # Sibling test (test_vercel_setup_configures_access_token_auth) calls
-    # save_env_value which mutates os.environ directly and never restores
-    # it. When xdist schedules both tests in the same worker, VERCEL_*
-    # from the earlier run masks the .vercel/project.json defaults that
-    # this test exercises. Clear them before load.
-    for _leaked in ("VERCEL_TOKEN", "VERCEL_PROJECT_ID", "VERCEL_TEAM_ID", "VERCEL_OIDC_TOKEN"):
-        monkeypatch.delenv(_leaked, raising=False)
+    _clear_vercel_env(monkeypatch)
    project_root = tmp_path / "project"
    nested = project_root / "app" / "src"
    nested.mkdir(parents=True)
--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@ -29,7 +29,7 @@ class TestReloadEnv:
        """reload_env() adds vars from .env that are not in os.environ."""
        env_file = tmp_path / ".env"
        env_file.write_text("TEST_RELOAD_VAR=hello123\n")
-        with patch("hermes_cli.config.get_env_path", return_value=env_file):
+        with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
            os.environ.pop("TEST_RELOAD_VAR", None)
            count = reload_env()
            assert count >= 1
@ -40,7 +40,7 @@ class TestReloadEnv:
        """reload_env() updates vars whose value changed on disk."""
        env_file = tmp_path / ".env"
        env_file.write_text("TEST_RELOAD_VAR=old_value\n")
-        with patch("hermes_cli.config.get_env_path", return_value=env_file):
+        with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
            os.environ["TEST_RELOAD_VAR"] = "old_value"
            # Now change the file
            env_file.write_text("TEST_RELOAD_VAR=new_value\n")
@ -55,7 +55,7 @@ class TestReloadEnv:
        env_file.write_text("")  # empty .env
        # Pick a known key from OPTIONAL_ENV_VARS
        known_key = next(iter(OPTIONAL_ENV_VARS.keys()))
-        with patch("hermes_cli.config.get_env_path", return_value=env_file):
+        with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
            os.environ[known_key] = "stale_value"
            count = reload_env()
            assert known_key not in os.environ
@ -65,7 +65,7 @@ class TestReloadEnv:
        """reload_env() preserves non-Hermes env vars even when absent from .env."""
        env_file = tmp_path / ".env"
        env_file.write_text("")
-        with patch("hermes_cli.config.get_env_path", return_value=env_file):
+        with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
            os.environ["MY_CUSTOM_UNRELATED_VAR"] = "keep_me"
            reload_env()
            assert os.environ.get("MY_CUSTOM_UNRELATED_VAR") == "keep_me"
@ -1851,14 +1851,24 @@ class TestPtyWebSocket:
            assert b"round-trip-payload" in buf

    def test_resize_escape_is_forwarded(self, monkeypatch):
-        # Resize escape gets intercepted and applied via TIOCSWINSZ,
-        # then ``tput cols/lines`` reports the new dimensions back.
+        # Resize escape gets intercepted and applied via TIOCSWINSZ, then the
+        # child reads the TTY ioctl directly. Avoid tput because CI may not set
+        # TERM for non-interactive shells.
+        import sys
+
+        winsize_script = (
+            "import fcntl, struct, termios, time; "
+            "time.sleep(0.15); "
+            "rows, cols, *_ = struct.unpack('HHHH', "
+            "fcntl.ioctl(0, termios.TIOCGWINSZ, b'\\0' * 8)); "
+            "print(cols); print(rows)"
+        )
        monkeypatch.setattr(
            self.ws_module,
            "_resolve_chat_argv",
-            # sleep gives the test time to push the resize before tput runs
+            # sleep gives the test time to push the resize before the child reads the ioctl.
            lambda resume=None, sidecar_url=None: (
-                ["/bin/sh", "-c", "sleep 0.15; tput cols; tput lines"],
+                [sys.executable, "-c", winsize_script],
                None,
                None,
            ),
--- a/tests/run_agent/test_background_review_toolset_restriction.py
+++ b/tests/run_agent/test_background_review_toolset_restriction.py
@ -8,12 +8,10 @@ effects (terminal, send_message, delegate_task, etc.).
 import threading
 from unittest.mock import patch

-from run_agent import AIAgent

-
-def _make_agent_stub():
+def _make_agent_stub(agent_cls):
    """Create a minimal AIAgent-like object with just enough state for _spawn_background_review."""
-    agent = object.__new__(AIAgent)
+    agent = object.__new__(agent_cls)
    agent.model = "test-model"
    agent.platform = "test"
    agent.provider = "openai"
@ -45,14 +43,16 @@ class _SyncThread:

 def test_background_review_agent_uses_restricted_toolsets():
    """The review agent must only have access to 'memory' and 'skills' toolsets."""
-    agent = _make_agent_stub()
+    import run_agent
+
+    agent = _make_agent_stub(run_agent.AIAgent)
    captured = {}

    def _capture_init(self, *args, **kwargs):
        captured["enabled_toolsets"] = kwargs.get("enabled_toolsets")
        raise RuntimeError("stop after capturing init args")

-    with patch.object(AIAgent, "__init__", _capture_init), \
+    with patch.object(run_agent.AIAgent, "__init__", _capture_init), \
         patch("threading.Thread", _SyncThread):
        agent._spawn_background_review(
            messages_snapshot=[],
--- a/tests/test_cli_skin_integration.py
+++ b/tests/test_cli_skin_integration.py
@ -96,7 +96,7 @@ class TestCompactBannerSkinIntegration:
        set_active_skin("default")

        with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
-             patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"):
+             patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v0.1.0 (test)"}):
            banner = _build_compact_banner()

        assert "NOUS HERMES" in banner
@ -105,7 +105,7 @@ class TestCompactBannerSkinIntegration:
        set_active_skin("poseidon")

        with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
-             patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"):
+             patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v0.1.0 (test)"}):
            banner = _build_compact_banner()

        assert "Poseidon Agent" in banner
@ -116,7 +116,7 @@ class TestCompactBannerSkinIntegration:
        skin = get_active_skin()

        with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
-             patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"):
+             patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v0.1.0 (test)"}):
            banner = _build_compact_banner()

        assert skin.get_color("banner_border") in banner
@ -127,7 +127,7 @@ class TestCompactBannerSkinIntegration:
        set_active_skin("default")

        with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
-             patch("cli.format_banner_version_label", return_value="Hermes Agent v1.0 (test) · upstream abc12345"):
+             patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v1.0 (test) · upstream abc12345"}):
            banner = _build_compact_banner()

        assert "upstream abc12345" in banner
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@ -2544,9 +2544,11 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
    # detection entirely and the test would race a non-event.
    build_started = threading.Event()
    release_build = threading.Event()
+    build_entered = threading.Event()

    def _slow_make_agent(sid, key, session_id=None):
        build_started.set()
+        build_entered.set()
        release_build.wait(timeout=3.0)
        return _FakeAgent()

@ -2584,6 +2586,7 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
    )
    assert resp.get("result"), f"got error: {resp.get('error')}"
    sid = resp["result"]["session_id"]
+    assert build_entered.wait(timeout=1.0), "deferred build did not start"

    # Wait until the (deferred) build thread has actually entered
    # _make_agent — otherwise session.close pops _sessions[sid] before
--- a/tests/tools/test_approval_heartbeat.py
+++ b/tests/tools/test_approval_heartbeat.py
@ -131,15 +131,15 @@ class TestApprovalHeartbeat:
        """Polling slices don't delay responsiveness — resolve is near-instant."""
        from tools.approval import (
            check_all_command_guards,
+            has_blocking_approval,
            register_gateway_notify,
            resolve_gateway_approval,
        )

-        register_gateway_notify(self.SESSION_KEY, lambda _payload: None)
-
-        start_time = time.monotonic()
        result_holder: dict = {}

+        register_gateway_notify(self.SESSION_KEY, lambda _payload: None)
+
        def _run_check():
            result_holder["result"] = check_all_command_guards(
                "rm -rf /tmp/nonexistent-fast-target", "local"
@ -148,9 +148,18 @@ class TestApprovalHeartbeat:
        thread = threading.Thread(target=_run_check, daemon=True)
        thread.start()

+        # Wait until the worker has actually enqueued the approval. Resolving
+        # before registration is a test race, not a responsiveness signal.
+        deadline = time.monotonic() + 5.0
+        while time.monotonic() < deadline:
+            if has_blocking_approval(self.SESSION_KEY):
+                break
+            time.sleep(0.01)
+        assert has_blocking_approval(self.SESSION_KEY)
+
        # Resolve almost immediately — the wait loop should return within
        # its current 1s poll slice.
-        time.sleep(0.1)
+        start_time = time.monotonic()
        resolve_gateway_approval(self.SESSION_KEY, "once")
        thread.join(timeout=5)
        elapsed = time.monotonic() - start_time
--- a/tests/tools/test_browser_orphan_reaper.py
+++ b/tests/tools/test_browser_orphan_reaper.py
@ -354,6 +354,7 @@ class TestOwnerPidCrossProcess:
        monkeypatch.setattr(
            bt, "_requires_real_termux_browser_install", lambda *a: False
        )
+        monkeypatch.setattr(bt, "_chromium_installed", lambda: True)
        monkeypatch.setattr(
            bt, "_get_session_info",
            lambda task_id: {"session_name": session_name},
--- a/tests/tools/test_clipboard.py
+++ b/tests/tools/test_clipboard.py
@ -205,24 +205,28 @@ class TestMacosOsascript:

 class TestIsWsl:
    def setup_method(self):
-        # _is_wsl is now hermes_constants.is_wsl — reset its cache
+        # _is_wsl is hermes_constants.is_wsl; reset the function's own module
+        # globals so this stays stable even if hermes_constants was imported
+        # through a different module object earlier in a large xdist run.
        import hermes_constants
        hermes_constants._wsl_detected = None
+        _is_wsl.__globals__["_wsl_detected"] = None

    def teardown_method(self):
        # Reset again after the test so we don't leak a cached value
        # (True/False) into whichever test the xdist worker runs next.
        import hermes_constants
        hermes_constants._wsl_detected = None
+        _is_wsl.__globals__["_wsl_detected"] = None

    def test_wsl2_detected(self):
        content = "Linux version 5.15.0 (microsoft-standard-WSL2)"
-        with patch("builtins.open", mock_open(read_data=content)):
+        with patch.dict(_is_wsl.__globals__, {"open": mock_open(read_data=content)}):
            assert _is_wsl() is True

    def test_wsl1_detected(self):
        content = "Linux version 4.4.0-microsoft-standard"
-        with patch("builtins.open", mock_open(read_data=content)):
+        with patch.dict(_is_wsl.__globals__, {"open": mock_open(read_data=content)}):
            assert _is_wsl() is True

    def test_regular_linux(self):
@ -234,20 +238,20 @@ class TestIsWsl:
        # short-circuits on the cache. setup_method resets, so we just
        # need to be sure the patched `open` is actually reached.
        content = "Linux version 6.14.0-37-generic (buildd@lcy02-amd64-049)"
-        with patch("hermes_constants.open", mock_open(read_data=content), create=True):
+        with patch.dict(_is_wsl.__globals__, {"open": mock_open(read_data=content)}):
            assert _is_wsl() is False

    def test_proc_version_missing(self):
-        with patch("hermes_constants.open", side_effect=FileNotFoundError, create=True):
+        with patch.dict(_is_wsl.__globals__, {"open": MagicMock(side_effect=FileNotFoundError)}):
            assert _is_wsl() is False

    def test_result_is_cached(self):
-        import hermes_constants
        content = "Linux version 5.15.0 (microsoft-standard-WSL2)"
-        with patch("hermes_constants.open", mock_open(read_data=content), create=True) as m:
+        opener = mock_open(read_data=content)
+        with patch.dict(_is_wsl.__globals__, {"open": opener}):
            assert _is_wsl() is True
            assert _is_wsl() is True
-            m.assert_called_once()  # only read once
+            opener.assert_called_once()  # only read once


 # ── WSL (powershell.exe) ────────────────────────────────────────────────
--- a/tests/tools/test_local_interrupt_cleanup.py
+++ b/tests/tools/test_local_interrupt_cleanup.py
@ -16,6 +16,7 @@ import signal
 import subprocess
 import threading
 import time
+from types import SimpleNamespace

 import pytest

@ -37,6 +38,58 @@ def _pgid_still_alive(pgid: int) -> bool:
        return False


+def _process_group_snapshot(pgid: int) -> str:
+    """Return a process-table snapshot for diagnostics."""
+    return subprocess.run(
+        ["ps", "-o", "pid,ppid,pgid,stat,cmd", "-g", str(pgid)],
+        capture_output=True,
+        text=True,
+        check=False,
+    ).stdout.strip()
+
+
+def _wait_for_pgid_exit(pgid: int, timeout: float = 10.0) -> bool:
+    """Wait for a process group to disappear under loaded xdist hosts."""
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        if not _pgid_still_alive(pgid):
+            return True
+        time.sleep(0.1)
+    return not _pgid_still_alive(pgid)
+
+
+def test_kill_process_uses_cached_pgid_if_wrapper_already_exited(monkeypatch):
+    """If the shell wrapper exits before cleanup, still kill its process group.
+
+    Without the cached pgid fallback, ``os.getpgid(proc.pid)`` raises for the
+    dead wrapper and cleanup falls back to ``proc.kill()``, which cannot reach
+    orphaned grandchildren still running in the original process group.
+    """
+    env = object.__new__(LocalEnvironment)
+    proc = SimpleNamespace(
+        pid=12345,
+        _hermes_pgid=67890,
+        poll=lambda: 0,
+        kill=lambda: None,
+    )
+    killpg_calls = []
+
+    def fake_getpgid(_pid):
+        raise ProcessLookupError
+
+    def fake_killpg(pgid, sig):
+        killpg_calls.append((pgid, sig))
+        if sig == 0:
+            raise ProcessLookupError
+
+    monkeypatch.setattr(os, "getpgid", fake_getpgid)
+    monkeypatch.setattr(os, "killpg", fake_killpg)
+
+    env._kill_process(proc)
+
+    assert killpg_calls == [(67890, signal.SIGTERM), (67890, 0)]
+
+
 def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
    """When KeyboardInterrupt arrives mid-poll, the subprocess group must be
    killed before the exception is re-raised."""
@ -118,19 +171,15 @@ def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
        assert not t.is_alive(), "worker didn't exit within 5 s of the interrupt"

        # The critical assertion: the subprocess GROUP must be dead.  Not
-        # just the bash wrapper — the 'sleep 30' child too.
-        # Give the SIGTERM+1s wait+SIGKILL escalation a moment to complete.
-        deadline = time.monotonic() + 3.0
-        while time.monotonic() < deadline:
-            if not _pgid_still_alive(pgid):
-                break
-            time.sleep(0.1)
-        assert not _pgid_still_alive(pgid), (
+        # just the bash wrapper — the 'sleep 30' child too. Under xdist load,
+        # process-group disappearance can lag briefly after the worker exits,
+        # especially if the process is already dying or waiting to be reaped.
+        assert _wait_for_pgid_exit(pgid), (
            f"subprocess group {pgid} is STILL ALIVE after worker received "
            f"KeyboardInterrupt — orphan bug regressed.  This is the "
            f"sleep-300-survives-SIGTERM scenario from Physikal's Apr 2026 "
            f"report.  See tools/environments/base.py _wait_for_process "
-            f"except-block."
+            f"except-block.\n{_process_group_snapshot(pgid)}"
        )
        # And the worker should have observed the KeyboardInterrupt (i.e.
        # it re-raised cleanly, not silently swallowed).
--- a/tests/tools/test_tirith_security.py
+++ b/tests/tools/test_tirith_security.py
@ -997,10 +997,13 @@ class TestHermesHomeIsolation:
        assert "hermes_test" in hermes_home, "Should point to test temp dir"

    def test_get_hermes_home_fallback(self):
-        """Without HERMES_HOME set, falls back to ~/.hermes."""
+        """Without HERMES_HOME set, falls back to the active OS home."""
        from tools.tirith_security import _get_hermes_home
        with patch.dict(os.environ, {}, clear=True):
-            # Remove HERMES_HOME entirely
+            # Remove HERMES_HOME entirely. With HOME also absent, expanduser
+            # falls back to the account database; compute expected under the
+            # same environment instead of after patch.dict restores HOME.
            os.environ.pop("HERMES_HOME", None)
+            expected = os.path.join(os.path.expanduser("~"), ".hermes")
            result = _get_hermes_home()
-        assert result == os.path.join(os.path.expanduser("~"), ".hermes")
+        assert result == expected
--- a/tests/tools/test_transcription.py
+++ b/tests/tools/test_transcription.py
@ -36,14 +36,16 @@ class TestGetProvider:
        monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test")
        monkeypatch.delenv("GROQ_API_KEY", raising=False)
        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
-             patch("tools.transcription_tools._HAS_OPENAI", True):
+             patch("tools.transcription_tools._HAS_OPENAI", True), \
+             patch("tools.transcription_tools._has_local_command", return_value=False):
            from tools.transcription_tools import _get_provider
            assert _get_provider({"provider": "local"}) == "none"

    def test_local_nothing_available(self, monkeypatch):
        monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
-             patch("tools.transcription_tools._HAS_OPENAI", False):
+             patch("tools.transcription_tools._HAS_OPENAI", False), \
+             patch("tools.transcription_tools._has_local_command", return_value=False):
            from tools.transcription_tools import _get_provider
            assert _get_provider({"provider": "local"}) == "none"

--- a/tests/tools/test_transcription_dotenv_fallback.py
+++ b/tests/tools/test_transcription_dotenv_fallback.py
@ -36,6 +36,28 @@ class TestProviderSelectionGate:
    configure ``{"enabled": True, "provider": ...}`` for explicit tests.
    """

+    def test_import_after_config_env_patch_uses_restored_dotenv_loader(self):
+        """Importing STT while hermes_cli.config.get_env_value is patched must
+        not freeze that temporary helper into this module forever.
+        """
+        import importlib
+        import hermes_cli.config as config_mod
+        from tools import transcription_tools as tt
+
+        with pytest.MonkeyPatch.context() as mp:
+            mp.setattr(config_mod, "get_env_value", lambda name, default=None: "")
+            tt = importlib.reload(tt)
+
+        try:
+            with patch.object(tt, "_HAS_FASTER_WHISPER", False), \
+                 patch.object(tt, "_HAS_OPENAI", True), \
+                 patch.object(tt, "_has_local_command", return_value=False), \
+                 patch("hermes_cli.config.load_env",
+                       return_value={"GROQ_API_KEY": "dotenv-secret"}):
+                assert tt._get_provider({"enabled": True, "provider": "groq"}) == "groq"
+        finally:
+            importlib.reload(tt)
+
    def test_explicit_groq_sees_dotenv(self):
        from tools import transcription_tools as tt

--- a/tests/tools/test_transcription_tools.py
+++ b/tests/tools/test_transcription_tools.py
@ -758,19 +758,12 @@ class TestValidateAudioFileEdgeCases:
        f = tmp_path / "test.ogg"
        f.write_bytes(b"data")
        from tools.transcription_tools import _validate_audio_file
-        real_stat = f.stat()
-        call_count = 0

-        def stat_side_effect(*args, **kwargs):
-            nonlocal call_count
-            call_count += 1
-            # First calls are from exists() and is_file(), let them pass
-            if call_count <= 2:
-                return real_stat
-            raise OSError("disk error")
-
-        with patch("pathlib.Path.stat", side_effect=stat_side_effect):
+        with patch("pathlib.Path.exists", return_value=True), \
+             patch("pathlib.Path.is_file", return_value=True), \
+             patch("pathlib.Path.stat", side_effect=OSError("disk error")):
            result = _validate_audio_file(str(f))
+
        assert result is not None
        assert "Failed to access" in result["error"]

--- a/tests/tools/test_tts_dotenv_fallback.py
+++ b/tests/tools/test_tts_dotenv_fallback.py
@ -174,6 +174,45 @@ class TestRegressionGuard:
    key while ``os.environ`` does not.
    """

+    def test_import_after_config_env_patch_uses_restored_dotenv_loader(self, tmp_path, monkeypatch):
+        """Importing TTS while hermes_cli.config.get_env_value is patched must
+        not freeze that temporary helper into this module forever.
+        """
+        import importlib
+        import hermes_cli.config as config_mod
+        from tools import tts_tool
+
+        monkeypatch.delenv("MINIMAX_API_KEY", raising=False)
+
+        with pytest.MonkeyPatch.context() as mp:
+            mp.setattr(config_mod, "get_env_value", lambda name: "")
+            tts_tool = importlib.reload(tts_tool)
+
+        try:
+            captured: dict = {}
+
+            def fake_post(url, **kwargs):
+                captured["headers"] = kwargs.get("headers", {})
+                response = MagicMock()
+                response.json.return_value = {
+                    "data": {"audio": b"\x00".hex()},
+                    "base_resp": {"status_code": 0},
+                }
+                response.raise_for_status = MagicMock()
+                return response
+
+            with patch(
+                "hermes_cli.config.load_env",
+                return_value={"MINIMAX_API_KEY": "dotenv-secret"},
+            ), patch("requests.post", side_effect=fake_post):
+                tts_tool._generate_minimax_tts(
+                    "hi", str(tmp_path / "out.mp3"), {}
+                )
+
+            assert captured["headers"]["Authorization"] == "Bearer dotenv-secret"
+        finally:
+            importlib.reload(tts_tool)
+
    def test_minimax_missing_when_only_in_dotenv_before_fix(self, tmp_path, monkeypatch):
        from tools import tts_tool

--- a/tools/environments/local.py
+++ b/tools/environments/local.py
@ -6,6 +6,7 @@ import shutil
 import signal
 import subprocess
 import tempfile
+import time

 from tools.environments.base import BaseEnvironment, _pipe_stdin

@ -369,6 +370,11 @@ class LocalEnvironment(BaseEnvironment):
            preexec_fn=None if _IS_WINDOWS else os.setsid,
            cwd=self.cwd,
        )
+        if not _IS_WINDOWS:
+            try:
+                proc._hermes_pgid = os.getpgid(proc.pid)
+            except ProcessLookupError:
+                pass

        if stdin_data is not None:
            _pipe_stdin(proc, stdin_data)
@ -381,12 +387,42 @@ class LocalEnvironment(BaseEnvironment):
            if _IS_WINDOWS:
                proc.terminate()
            else:
-                pgid = os.getpgid(proc.pid)
+                try:
+                    pgid = os.getpgid(proc.pid)
+                except ProcessLookupError:
+                    pgid = getattr(proc, "_hermes_pgid", None)
+                    if pgid is None:
+                        raise
                os.killpg(pgid, signal.SIGTERM)
+                deadline = time.monotonic() + 1.0
+                while time.monotonic() < deadline:
+                    if proc.poll() is not None:
+                        try:
+                            os.killpg(pgid, 0)
+                        except ProcessLookupError:
+                            return
+                    time.sleep(0.05)
+
+                # The shell can exit quickly while a child in the same process
+                # group is still shutting down. Escalate based on the process
+                # group, not just the shell wrapper, so interrupted commands do
+                # not leave orphaned grandchildren under load.
+                try:
+                    # _IS_WINDOWS is guarded by the enclosing else branch.
+                    os.killpg(pgid, signal.SIGKILL)
+                except ProcessLookupError:
+                    return
                try:
                    proc.wait(timeout=1.0)
                except subprocess.TimeoutExpired:
-                    os.killpg(pgid, signal.SIGKILL)
+                    pass
+                deadline = time.monotonic() + 1.0
+                while time.monotonic() < deadline:
+                    try:
+                        os.killpg(pgid, 0)
+                    except ProcessLookupError:
+                        return
+                    time.sleep(0.05)
        except (ProcessLookupError, PermissionError):
            try:
                proc.kill()
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@ -489,12 +489,15 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
            task_data = _read_tracker.setdefault(task_id, {
                "last_key": None, "consecutive": 0,
                "read_history": set(), "dedup": {},
-                "dedup_hits": {},
+                "dedup_hits": {}, "read_timestamps": {},
            })
            # Backward-compat for pre-existing tracker entries that predate
-            # dedup_hits (long-lived task or crossed an upgrade boundary).
+            # dedup_hits/read_timestamps (long-lived task or crossed an
+            # upgrade boundary).
            if "dedup_hits" not in task_data:
                task_data["dedup_hits"] = {}
+            if "read_timestamps" not in task_data:
+                task_data["read_timestamps"] = {}
            cached_mtime = task_data.get("dedup", {}).get(dedup_key)

        if cached_mtime is not None:
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@ -915,11 +915,12 @@ class MCPServerTask:
        except Exception:
            logger.exception("MCP server '%s': dynamic tool refresh failed", self.name)

-    def _schedule_tools_refresh(self) -> None:
+    def _schedule_tools_refresh(self) -> asyncio.Task:
        """Schedule a background tool refresh and keep it strongly referenced."""
        task = asyncio.create_task(self._refresh_tools_task())
        self._pending_refresh_tasks.add(task)
        task.add_done_callback(self._pending_refresh_tasks.discard)
+        return task

    def _make_message_handler(self):
        """Build a ``message_handler`` callback for ``ClientSession``.
@ -950,6 +951,10 @@ class MCPServerTask:
                            # a separate task and let the handler return
                            # promptly.
                            self._schedule_tools_refresh()
+                            # Yield one loop tick so tests and short-lived
+                            # notification contexts can observe the scheduled
+                            # refresh without awaiting the full server RPC.
+                            await asyncio.sleep(0)
                        case PromptListChangedNotification():
                            logger.debug("MCP server '%s': prompts/list_changed (ignored)", self.name)
                        case ResourceListChangedNotification():
@ -2005,8 +2010,12 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
            }, ensure_ascii=False)

        async def _call():
-            async with server._rpc_lock:
+            rpc_lock = getattr(server, "_rpc_lock", None)
+            if rpc_lock is None:
                result = await server.session.call_tool(tool_name, arguments=args)
+            else:
+                async with rpc_lock:
+                    result = await server.session.call_tool(tool_name, arguments=args)
            # MCP CallToolResult has .content (list of content blocks) and .isError
            if result.isError:
                error_text = ""
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@ -42,11 +42,19 @@ from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_opena

 logger = logging.getLogger(__name__)

-try:
-    from hermes_cli.config import get_env_value
-except ImportError:
-    def get_env_value(name, default=None):
+def get_env_value(name, default=None):
+    """Read env values through the live config module.
+
+    Tests may monkeypatch and later restore ``hermes_cli.config.get_env_value``
+    before this module is imported. Resolve the helper at call time so STT does
+    not keep a stale imported function for the rest of the test process.
+    """
+    try:
+        from hermes_cli.config import get_env_value as _get_env_value
+    except ImportError:
        return os.getenv(name, default)
+    value = _get_env_value(name)
+    return default if value is None else value

 # ---------------------------------------------------------------------------
 # Optional imports — graceful degradation
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@ -44,11 +44,19 @@ from urllib.parse import urljoin
 from hermes_constants import display_hermes_home

 logger = logging.getLogger(__name__)
-try:
-    from hermes_cli.config import get_env_value
-except ImportError:
-    def get_env_value(name, default=None):
+def get_env_value(name, default=None):
+    """Read env values through the live config module.
+
+    Tests may monkeypatch and later restore ``hermes_cli.config.get_env_value``
+    before this module is imported. Resolve the helper at call time so TTS does
+    not keep a stale imported function for the rest of the test process.
+    """
+    try:
+        from hermes_cli.config import get_env_value as _get_env_value
+    except ImportError:
        return os.getenv(name, default)
+    value = _get_env_value(name)
+    return default if value is None else value
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
 from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway, resolve_openai_audio_api_key
 from tools.xai_http import hermes_xai_user_agent
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -1860,6 +1860,18 @@ def _enrich_with_attached_images(user_text: str, image_paths: list[str]) -> str:
    return text or "What do you see in this image?"


+def _messages_as_conversation(db, session_id: str, *, include_ancestors: bool = False):
+    if include_ancestors:
+        try:
+            return db.get_messages_as_conversation(
+                session_id, include_ancestors=True
+            )
+        except TypeError as exc:
+            if "include_ancestors" not in str(exc):
+                raise
+    return db.get_messages_as_conversation(session_id)
+
+
 def _history_to_messages(history: list[dict]) -> list[dict]:
    messages = []
    tool_call_args = {}
@ -2068,9 +2080,9 @@ def _(rid, params: dict) -> dict:
    _enable_gateway_prompts()
    try:
        db.reopen_session(target)
-        history = db.get_messages_as_conversation(target)
-        display_history = db.get_messages_as_conversation(
-            target, include_ancestors=True
+        history = _messages_as_conversation(db, target)
+        display_history = _messages_as_conversation(
+            db, target, include_ancestors=True
        )
        messages = _history_to_messages(display_history)
        tokens = _set_session_context(target)
@ -2215,8 +2227,8 @@ def _(rid, params: dict) -> dict:
    db = _get_db()
    if db is not None and session.get("session_key"):
        try:
-            history = db.get_messages_as_conversation(
-                session["session_key"], include_ancestors=True
+            history = _messages_as_conversation(
+                db, session["session_key"], include_ancestors=True
            )
        except Exception:
            pass