2026-05-08 21:11:13 +00:00
6 changed files with 134 additions and 60 deletions
--- a/tests/acp/test_server.py
+++ b/tests/acp/test_server.py
@ -200,6 +200,8 @@ class TestSessionOps:
            "context",
            "reset",
            "compact",
+            "steer",
+            "queue",
            "version",
        ]
        model_cmd = next(
--- a/tests/gateway/test_teams.py
+++ b/tests/gateway/test_teams.py
@ -397,13 +397,22 @@ class TestTeamsSend:
        assert "Network error" in result.error

    @pytest.mark.asyncio
-    async def test_send_typing(self):
+    async def test_send_typing(self, monkeypatch):
        adapter = TeamsAdapter(_make_config(
            client_id="id", client_secret="secret", tenant_id="tenant",
        ))
        mock_app = MagicMock()
        mock_app.send = AsyncMock()
        adapter._app = mock_app
+        # The adapter module imports TypingActivityInput at load time; if
+        # the real microsoft_teams package isn'"'"'t installed, that local
+        # binding is None even though the test fixture registers a mock
+        # in sys.modules. Force a non-None local binding so the call to
+        # TypingActivityInput() inside send_typing succeeds and we actually
+        # reach self._app.send.
+        class _StubTypingActivityInput:
+            pass
+        monkeypatch.setattr(_teams_mod, "TypingActivityInput", _StubTypingActivityInput)

        await adapter.send_typing("conv-id")
        mock_app.send.assert_awaited_once()
--- a/tests/plugins/test_kanban_dashboard_plugin.py
+++ b/tests/plugins/test_kanban_dashboard_plugin.py
@ -478,9 +478,19 @@ def test_ws_events_rejects_when_token_required(tmp_path, monkeypatch):
    kb.init_db()

    # Stub web_server so _check_ws_token has a token to compare against.
+    # NOTE: monkeypatch.setitem(sys.modules, ...) alone is not enough.
+    # If another test in the same xdist worker has already imported
+    # hermes_cli.web_server, the parent package `hermes_cli` has the real
+    # module bound as an attribute. `from hermes_cli import web_server`
+    # then resolves via the attribute, NOT sys.modules — so the stub is
+    # bypassed and _check_ws_token compares against the real (random)
+    # _SESSION_TOKEN, rejecting our "secret-xyz" branch with 1008.
+    # Patching the parent package attribute keeps both lookup paths in sync.
    import types
+    import hermes_cli
    stub = types.SimpleNamespace(_SESSION_TOKEN="secret-xyz")
    monkeypatch.setitem(sys.modules, "hermes_cli.web_server", stub)
+    monkeypatch.setattr(hermes_cli, "web_server", stub, raising=False)

    app = FastAPI()
    app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@ -753,57 +753,63 @@ def test_session_title_set_errors_when_row_lookup_fails_after_noop(monkeypatch):
        server._sessions.pop("sid", None)


-def test_session_create_drops_pending_title_on_valueerror(monkeypatch):
-    unblock_agent = threading.Event()
-
-    class _FakeWorker:
-        def __init__(self, key, model):
-            self.key = key
-
-        def close(self):
-            return None
-
-    class _FakeAgent:
-        model = "x"
-        provider = "openrouter"
-        base_url = ""
-        api_key = ""
-
-    class _FakeDB:
-        def create_session(self, _key, source="tui", model=None):
-            return None
+def test_apply_pending_session_title_drops_on_valueerror():
+    """ValueError from set_session_title (e.g. duplicate title) must drop
+    the pending_title so a stuck title doesn't keep retrying forever.

+    Originally tested via the eager-apply path in _start_agent_build, which
+    was removed by c5b4c48 (#18370, lazy session creation) and replaced by
+    a post-message-complete apply that only `except Exception: pass`'d —
+    losing the ValueError-specific drop semantics. The helper restores
+    them; this test asserts that.
+    """
+    class _RaisingDB:
        def set_session_title(self, _key, _title):
            raise ValueError("Title already in use")

-    def _make_agent(_sid, _key):
-        unblock_agent.wait(timeout=2.0)
-        return _FakeAgent()
-
-    monkeypatch.setattr(server, "_make_agent", _make_agent)
-    monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
-    monkeypatch.setattr(server, "_get_db", lambda: _FakeDB())
-    monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
-    monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
-    monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
-    monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)
-
-    import tools.approval as _approval
-
-    monkeypatch.setattr(_approval, "register_gateway_notify", lambda key, cb: None)
-    monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)
-
-    resp = server.handle_request(
-        {"id": "1", "method": "session.create", "params": {"cols": 80}}
-    )
-    sid = resp["result"]["session_id"]
-    session = server._sessions[sid]
-    session["pending_title"] = "duplicate title"
-    unblock_agent.set()
-    session["agent_ready"].wait(timeout=2.0)
+    session = {"session_key": "k1", "pending_title": "duplicate title"}
+    server._apply_pending_session_title(session, "sid-1", _RaisingDB())
+
+    assert session["pending_title"] is None
+
+
+def test_apply_pending_session_title_clears_on_success():
+    class _OkDB:
+        def set_session_title(self, _key, _title):
+            return True
+
+    session = {"session_key": "k2", "pending_title": "Real title"}
+    server._apply_pending_session_title(session, "sid-2", _OkDB())
+
+    assert session["pending_title"] is None
+
+
+def test_apply_pending_session_title_retains_on_transient_exception():
+    """A transient (non-ValueError) DB failure should keep the pending
+    title queued so the next message-complete can retry. Without this
+    behaviour, a single flaky DB call would silently lose the title."""
+    class _FlakyDB:
+        def set_session_title(self, _key, _title):
+            raise RuntimeError("transient db blip")
+
+    session = {"session_key": "k3", "pending_title": "Keep retrying"}
+    server._apply_pending_session_title(session, "sid-3", _FlakyDB())
+
+    assert session["pending_title"] == "Keep retrying"
+
+
+def test_apply_pending_session_title_no_op_without_pending():
+    """Helper must be a no-op when pending_title is None — most calls
+    look like this (every message-complete on a session that already has
+    a title applied)."""
+    class _ShouldNotBeCalledDB:
+        def set_session_title(self, _key, _title):
+            raise AssertionError("DB must not be touched when no pending title")
+
+    session = {"session_key": "k4", "pending_title": None}
+    server._apply_pending_session_title(session, "sid-4", _ShouldNotBeCalledDB())

    assert session["pending_title"] is None
-    server._sessions.pop(sid, None)


 def test_config_set_yolo_toggles_session_scope():
--- a/tests/tools/test_credential_pool_env_fallback.py
+++ b/tests/tools/test_credential_pool_env_fallback.py
@ -106,10 +106,20 @@ class TestCredentialPoolSeedsFromDotEnv:
        assert active_sources == set()
        assert entries == []

-    def test_os_environ_still_wins_over_dotenv(self, isolated_hermes_home, monkeypatch):
-        """get_env_value checks os.environ first — verify seeding picks that up."""
-        _write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-stale")
-        monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-env-fresh-xyz")
+    def test_dotenv_wins_over_stale_os_environ(self, isolated_hermes_home, monkeypatch):
+        """.env should win over a stale os.environ value.
+
+        Inverted from the pre-#18254 behaviour. Stale env vars inherited
+        from parent shells (Codex CLI, test harnesses) used to shadow
+        deliberate updates to ~/.hermes/.env, causing auth.json to cache
+        an outdated key and silent 401 errors. The invariant now is:
+        when a key appears in both sources, .env wins.
+
+        Sister coverage in tests/agent/test_credential_pool.py exercises
+        the load_pool path; this case exercises _seed_from_env directly.
+        """
+        _write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-fresh")
+        monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-env-stale-xyz")

        from agent.credential_pool import _seed_from_env
        entries = []
@ -118,7 +128,7 @@ class TestCredentialPoolSeedsFromDotEnv:
        assert changed is True
        seeded = [e for e in entries if e.source == "env:DEEPSEEK_API_KEY"]
        assert len(seeded) == 1
-        assert seeded[0].access_token == "sk-env-fresh-xyz"
+        assert seeded[0].access_token == "sk-dotenv-fresh"


 class TestAuthResolvesFromDotEnv:
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -515,6 +515,50 @@ def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
    return _err(rid, 5032, err) if err else None


+def _apply_pending_session_title(
+    session: dict, sid: str, db: object | None
+) -> None:
+    """Apply session["pending_title"] to the DB via db.set_session_title.
+
+    Pending titles are queued during session.create (before the DB row
+    exists, since c5b4c48 deferred row creation to first message) and
+    flushed here once a message-complete event lands.
+
+    Outcome by branch:
+      - set_session_title returns truthy: pending_title cleared.
+      - ValueError (title invalid / duplicate): pending_title dropped,
+        because retrying with the same value will fail the same way.
+        Auto-title later picks a fresh title from message content.
+      - other Exception: pending_title retained — likely a transient DB
+        failure worth retrying on the next message-complete.
+
+    No-ops when there is no pending title or no DB.
+
+    Pre-c5b4c48 (#18370) the same semantics lived inline in
+    _start_agent_build. Extracting them here both restores the lost
+    ValueError handling and makes the invariant testable without
+    simulating a full message turn.
+    """
+    pending = session.get("pending_title")
+    if not pending or db is None:
+        return
+    key = session.get("session_key") or sid
+    try:
+        if db.set_session_title(key, pending):
+            session["pending_title"] = None
+    except ValueError as exc:
+        # Title invalid / duplicate — retrying is futile; drop and let
+        # auto-title pick something.
+        session["pending_title"] = None
+        logger.info("Dropping pending title for session %s: %s", sid, exc)
+    except Exception:
+        # Likely transient — keep pending_title so the next
+        # message-complete can retry. Auto-title is the eventual fallback.
+        logger.warning(
+            "Failed to apply pending title for session %s", sid, exc_info=True,
+        )
+
+
 def _start_agent_build(sid: str, session: dict) -> None:
    """Start building the real AIAgent for a TUI session, once.

@ -2982,15 +3026,8 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
            _emit("message.complete", sid, payload)

            # Apply pending_title now that the DB row exists.
-            _pending = session.get("pending_title")
-            if _pending and status == "complete":
-                _pdb = _get_db()
-                if _pdb:
-                    try:
-                        if _pdb.set_session_title(session.get("session_key") or sid, _pending):
-                            session["pending_title"] = None
-                    except Exception:
-                        pass  # Best effort — auto-title will handle it below
+            if status == "complete":
+                _apply_pending_session_title(session, sid, _get_db())

            if (
                status == "complete"