fix(sessions): /save lands under $HERMES_HOME, widen browse+TUI picker, force-refresh ollama-cloud on setup (#16296)

Four independent session-UX bugs reported by an external user (#16294). /save wrote hermes_conversation_<ts>.json to CWD — invisible to 'hermes sessions browse' and easy to lose. Snapshots now write under ~/.hermes/sessions/saved/ and the command prints the absolute path plus a 'hermes --resume <id>' hint for the live DB-indexed session. 'hermes sessions browse' default --limit raised from 50 to 500. With the old ceiling, users with moderately long histories saw only the most recent 50 rows and assumed older sessions had been lost. TUI session.list (`/resume` picker) switched from a hardcoded allow-list of 13 gateway source names to a deny-list of just { 'tool' }. Sessions tagged acp / webhook / user-defined HERMES_SESSION_SOURCE values and any newly-added platform now surface. Default limit 20 → 200. ollama-cloud provider setup passes force_refresh=True to fetch_ollama_cloud_models() so a user entering their API key sees the fresh catalog (e.g. deepseek v4 flash, kimi k2.6) immediately instead of waiting up to an hour for the disk cache TTL to expire. Closes #16294.
2026-04-26 18:49:48 -07:00 · 2026-04-26 18:49:48 -07:00 · 5eb6cd82b2
commit 5eb6cd82b2
parent 7e3c8a31f0
8 changed files with 240 additions and 60 deletions
--- a/cli.py
+++ b/cli.py
@ -4951,22 +4951,37 @@ class HermesCLI:
        _cprint(f"  Branch session:   {new_session_id}")

    def save_conversation(self):
-        """Save the current conversation to a file."""
+        """Save the current conversation to a JSON snapshot under ~/.hermes/sessions/saved/.
+
+        The snapshot is a convenience export for sharing or off-line inspection;
+        every message is already persisted incrementally to the SQLite session
+        DB, so the live session remains resumable via ``hermes --resume <id>``
+        regardless of whether the user ever runs ``/save``.
+        """
        if not self.conversation_history:
            print("(;_;) No conversation to save.")
            return
-        
+
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        filename = f"hermes_conversation_{timestamp}.json"
-        
+        saved_dir = get_hermes_home() / "sessions" / "saved"
        try:
-            with open(filename, "w", encoding="utf-8") as f:
+            saved_dir.mkdir(parents=True, exist_ok=True)
+        except Exception as e:
+            print(f"(x_x) Failed to create save directory {saved_dir}: {e}")
+            return
+        path = saved_dir / f"hermes_conversation_{timestamp}.json"
+
+        try:
+            with open(path, "w", encoding="utf-8") as f:
                json.dump({
                    "model": self.model,
+                    "session_id": self.session_id,
                    "session_start": self.session_start.isoformat(),
                    "messages": self.conversation_history,
                }, f, indent=2, ensure_ascii=False)
-            print(f"(^_^)v Conversation saved to: {filename}")
+            print(f"(^_^)v Conversation snapshot saved to: {path}")
+            if self.session_id:
+                print(f"       Resume the live session with: hermes --resume {self.session_id}")
        except Exception as e:
            print(f"(x_x) Failed to save: {e}")
    
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -4412,8 +4412,14 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        from hermes_cli.models import fetch_ollama_cloud_models

        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
+        # During setup, force a live refresh so the picker reflects newly
+        # released models (e.g. deepseek v4 flash, kimi k2.6) the moment
+        # the user enters their key — not an hour later when the disk
+        # cache TTL expires.
        model_list = fetch_ollama_cloud_models(
-            api_key=api_key_for_probe, base_url=effective_base
+            api_key=api_key_for_probe,
+            base_url=effective_base,
+            force_refresh=True,
        )
        if model_list:
            print(f"  Found {len(model_list)} model(s) from Ollama Cloud")
@ -9173,7 +9179,7 @@ Examples:
        "--source", help="Filter by source (cli, telegram, discord, etc.)"
    )
    sessions_browse.add_argument(
-        "--limit", type=int, default=50, help="Max sessions to load (default: 50)"
+        "--limit", type=int, default=500, help="Max sessions to load (default: 500)"
    )

    def _confirm_prompt(prompt: str) -> bool:
@ -9305,7 +9311,7 @@ Examples:
                print(f"Error: {e}")

        elif action == "browse":
-            limit = getattr(args, "limit", 50) or 50
+            limit = getattr(args, "limit", 500) or 500
            source = getattr(args, "source", None)
            _browse_exclude = None if source else ["tool"]
            sessions = db.list_sessions_rich(
--- a/tests/cli/test_save_conversation_location.py
+++ b/tests/cli/test_save_conversation_location.py
@ -0,0 +1,102 @@
+"""Tests for /save — the conversation snapshot slash command.
+
+Regression: the old implementation wrote ``hermes_conversation_<ts>.json``
+to the current working directory (CWD). Users who ran /save expected the
+file to be discoverable via ``hermes sessions browse``, but CWD-resident
+snapshots are not indexed in the state DB and are generally invisible.
+The fix writes snapshots under ``~/.hermes/sessions/saved/`` and prints
+the absolute path plus the resume hint for the live session.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+
+@pytest.fixture
+def hermes_home(tmp_path, monkeypatch):
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    # Clear any cached hermes_home computation
+    import hermes_constants
+    if hasattr(hermes_constants, "_hermes_home_cache"):
+        hermes_constants._hermes_home_cache = None
+    return home
+
+
+def _make_stub_cli(history):
+    """Build a minimal object exposing just what save_conversation uses."""
+    return SimpleNamespace(
+        conversation_history=history,
+        model="test-model",
+        session_id="20260101_120000_abc123",
+        session_start=datetime(2026, 1, 1, 12, 0, 0),
+    )
+
+
+def test_save_conversation_writes_under_hermes_home(hermes_home, tmp_path, monkeypatch, capsys):
+    """Snapshot must land under ~/.hermes/sessions/saved/, not CWD."""
+    # Change CWD to a different directory to prove the file does NOT go there.
+    work = tmp_path / "somewhere-else"
+    work.mkdir()
+    monkeypatch.chdir(work)
+
+    # Import fresh to pick up the HERMES_HOME fixture
+    for mod in [m for m in sys.modules if m.startswith("cli") or m == "hermes_constants"]:
+        sys.modules.pop(mod, None)
+
+    import cli  # noqa: F401  (module under test)
+
+    stub = _make_stub_cli([
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ])
+
+    # Call the unbound method against our stub.
+    cli.HermesCLI.save_conversation(stub)
+
+    # File must NOT be in CWD
+    cwd_leak = list(work.glob("hermes_conversation_*.json"))
+    assert not cwd_leak, f"snapshot leaked to CWD: {cwd_leak}"
+
+    # File MUST be under ~/.hermes/sessions/saved/
+    saved_dir = hermes_home / "sessions" / "saved"
+    assert saved_dir.is_dir(), "expected saved/ subdirectory to be created"
+    files = list(saved_dir.glob("hermes_conversation_*.json"))
+    assert len(files) == 1, files
+
+    payload = json.loads(files[0].read_text())
+    assert payload["model"] == "test-model"
+    assert payload["session_id"] == "20260101_120000_abc123"
+    assert payload["messages"] == [
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ]
+
+    # User-facing message must include the absolute path AND the resume hint.
+    out = capsys.readouterr().out
+    assert str(files[0]) in out, out
+    assert "hermes --resume 20260101_120000_abc123" in out, out
+
+
+def test_save_conversation_empty_history_does_nothing(hermes_home, capsys):
+    for mod in [m for m in sys.modules if m.startswith("cli") or m == "hermes_constants"]:
+        sys.modules.pop(mod, None)
+    import cli
+
+    stub = _make_stub_cli([])
+    cli.HermesCLI.save_conversation(stub)
+
+    saved_dir = hermes_home / "sessions" / "saved"
+    assert not saved_dir.exists() or not list(saved_dir.iterdir())
+    out = capsys.readouterr().out
+    assert "No conversation to save" in out
--- a/tests/gateway/test_session_list_allowed_sources.py
+++ b/tests/gateway/test_session_list_allowed_sources.py
@ -1,11 +1,16 @@
 """Regression tests for the TUI gateway's ``session.list`` handler.

-Reported during TUI v2 blitz retest: the ``/resume`` modal inside a TUI
-session only surfaced ``tui``/``cli`` rows, hiding telegram sessions users
-could still resume directly via ``hermes --tui --resume <id>``.
-
-The fix widens the picker to a curated allowlist of user-facing sources
-(tui/cli + chat adapters) while still filtering internal/system sources.
+History:
+- The original implementation hardcoded an allow-list of known gateway
+  sources (``tui, cli, telegram, discord, slack, ...``). New or unlisted
+  sources (``acp``, ``webhook``, user-defined ``HERMES_SESSION_SOURCE``
+  values, newly-added platforms) were silently dropped from the resume
+  picker — users reported "lots of sessions are missing from browse
+  but exist in .hermes/sessions."
+- The handler now deny-lists only the internal/noisy source ``tool``
+  (sub-agent runs) and surfaces every other source to the picker.
+- The default ``limit`` raised from 20 to 200 so longer-running users
+  can scroll through their history without hitting an artificial cap.
 """

 from __future__ import annotations
@ -23,42 +28,64 @@ class _StubDB:
        return list(self.rows)


-def _call(limit: int = 20):
+def _call(limit: int | None = None):
+    params: dict = {}
+    if limit is not None:
+        params["limit"] = limit
    return server.handle_request({
        "id": "1",
        "method": "session.list",
-        "params": {"limit": limit},
+        "params": params,
    })


-def test_session_list_includes_telegram_but_filters_internal_sources(monkeypatch):
+def test_session_list_surfaces_all_user_facing_sources(monkeypatch):
+    """acp / webhook / custom sources should all appear; only ``tool`` is hidden."""
    rows = [
        {"id": "tui-1", "source": "tui", "started_at": 9},
        {"id": "tool-1", "source": "tool", "started_at": 8},
        {"id": "tg-1", "source": "telegram", "started_at": 7},
        {"id": "acp-1", "source": "acp", "started_at": 6},
        {"id": "cli-1", "source": "cli", "started_at": 5},
+        {"id": "webhook-1", "source": "webhook", "started_at": 4},
+        {"id": "custom-1", "source": "my-custom-source", "started_at": 3},
    ]
    db = _StubDB(rows)
    monkeypatch.setattr(server, "_get_db", lambda: db)

    resp = _call(limit=10)
-    sessions = resp["result"]["sessions"]
-    ids = [s["id"] for s in sessions]
+    ids = [s["id"] for s in resp["result"]["sessions"]]

-    assert "tg-1" in ids and "tui-1" in ids and "cli-1" in ids, ids
-    assert "tool-1" not in ids and "acp-1" not in ids, ids
+    # Every human-facing source — including previously-hidden acp, webhook,
+    # and custom sources — must surface in the picker now.
+    assert "tg-1" in ids
+    assert "tui-1" in ids
+    assert "cli-1" in ids
+    assert "acp-1" in ids, "acp sessions were being hidden by the old allow-list"
+    assert "webhook-1" in ids, "webhook sessions were being hidden by the old allow-list"
+    assert "custom-1" in ids, "custom HERMES_SESSION_SOURCE values were being hidden"
+
+    # Only internal sub-agent runs stay hidden.
+    assert "tool-1" not in ids


-def test_session_list_fetches_wider_window_before_filtering(monkeypatch):
+def test_session_list_default_limit_is_200(monkeypatch):
+    """Default limit should be wide enough for long-running users."""
+    db = _StubDB([{"id": "x", "source": "cli", "started_at": 1}])
+    monkeypatch.setattr(server, "_get_db", lambda: db)
+
+    _call()  # no explicit limit
+    # fetch_limit = max(limit * 2, 200); limit defaults to 200, so 400.
+    assert db.calls[0].get("limit") == 400, db.calls[0]
+
+
+def test_session_list_respects_explicit_limit(monkeypatch):
    db = _StubDB([{"id": "x", "source": "cli", "started_at": 1}])
    monkeypatch.setattr(server, "_get_db", lambda: db)

    _call(limit=10)
-
-    assert len(db.calls) == 1
-    assert db.calls[0].get("source") is None, db.calls[0]
-    assert db.calls[0].get("limit") == 100, db.calls[0]
+    # fetch_limit = max(limit * 2, 200) = 200 when limit is small.
+    assert db.calls[0].get("limit") == 200, db.calls[0]


 def test_session_list_preserves_ordering_after_filter(monkeypatch):
@ -66,6 +93,7 @@ def test_session_list_preserves_ordering_after_filter(monkeypatch):
        {"id": "newest", "source": "telegram", "started_at": 5},
        {"id": "internal", "source": "tool", "started_at": 4},
        {"id": "middle", "source": "tui", "started_at": 3},
+        {"id": "also-visible", "source": "webhook", "started_at": 2},
        {"id": "oldest", "source": "discord", "started_at": 1},
    ]
    monkeypatch.setattr(server, "_get_db", lambda: _StubDB(rows))
@ -73,4 +101,4 @@ def test_session_list_preserves_ordering_after_filter(monkeypatch):
    resp = _call()
    ids = [s["id"] for s in resp["result"]["sessions"]]

-    assert ids == ["newest", "middle", "oldest"]
+    assert ids == ["newest", "middle", "also-visible", "oldest"]
--- a/tests/hermes_cli/test_session_browse.py
+++ b/tests/hermes_cli/test_session_browse.py
@ -401,14 +401,21 @@ class TestSessionBrowseArgparse:
        from hermes_cli.main import _session_browse_picker
        assert callable(_session_browse_picker)

-    def test_browse_default_limit_is_50(self):
-        """The default --limit for browse should be 50."""
-        # This test verifies at the argparse level
-        # We test by running the parse on "sessions browse" args
-        # Since we can't easily extract the subparser, verify via the
-        # _session_browse_picker accepting large lists
-        sessions = _make_sessions(50)
-        assert len(sessions) == 50
+    def test_browse_default_limit_is_500(self):
+        """The default --limit for browse should be 500."""
+        # Build the same argparse tree cmd_sessions uses and verify the default.
+        import argparse
+        parser = argparse.ArgumentParser()
+        subparsers = parser.add_subparsers(dest="sessions_action")
+        browse = subparsers.add_parser("browse")
+        browse.add_argument("--source")
+        browse.add_argument("--limit", type=int, default=500)
+
+        args = parser.parse_args(["browse"])
+        assert args.limit == 500
+
+        args = parser.parse_args(["browse", "--limit", "42"])
+        assert args.limit == 42


 # ─── Integration: cmd_sessions browse action ────────────────────────────────
--- a/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py
+++ b/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py
@ -0,0 +1,30 @@
+"""Regression: ``hermes setup`` for the ollama-cloud provider must force-refresh
+the model cache after the user supplies a key, otherwise the picker keeps
+serving a stale cache (models.dev only, no live API probe) for up to an hour.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import patch
+
+
+def test_setup_ollama_cloud_passes_force_refresh(monkeypatch):
+    """The provider-setup model-fetch for ollama-cloud must pass ``force_refresh=True``."""
+    import hermes_cli.main as main_mod
+    import inspect
+
+    src = inspect.getsource(main_mod)
+
+    # Locate the ollama-cloud branch in the provider setup flow.
+    marker = 'provider_id == "ollama-cloud"'
+    assert marker in src, "ollama-cloud branch missing from provider setup"
+    idx = src.index(marker)
+    # The call to fetch_ollama_cloud_models should be within the next ~2000 chars.
+    snippet = src[idx:idx + 2000]
+    assert "fetch_ollama_cloud_models(" in snippet, snippet[:500]
+    assert "force_refresh=True" in snippet, (
+        "ollama-cloud setup must pass force_refresh=True so newly released "
+        "models (e.g. deepseek v4 flash, kimi k2.6) appear the moment the "
+        "user enters their key, not an hour later when the cache TTL expires. "
+        f"Snippet: {snippet[:500]}"
+    )
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -1630,33 +1630,25 @@ def _(rid, params: dict) -> dict:
    if db is None:
        return _db_unavailable_error(rid, code=5006)
    try:
-        # Resume picker should include human conversation surfaces beyond
-        # tui/cli (notably telegram from blitz row #7), but avoid internal
-        # sources that clutter the modal (tool/acp/etc).
-        allow = frozenset(
-            {
-                "cli",
-                "tui",
-                "telegram",
-                "discord",
-                "slack",
-                "whatsapp",
-                "wecom",
-                "weixin",
-                "feishu",
-                "signal",
-                "mattermost",
-                "matrix",
-                "qq",
-            }
-        )
+        # Resume picker should surface human conversation sessions from every
+        # user-facing surface — CLI, TUI, all gateway platforms (including new
+        # ones not enumerated here), ACP adapter clients, webhook sessions,
+        # custom `HERMES_SESSION_SOURCE` values, and older installs with
+        # different source labels. We deny-list only the noisy internal
+        # sources (``tool`` sub-agent runs) rather than allow-listing a
+        # fixed set of platform names that goes stale whenever a new
+        # platform is added or a user names their own source.
+        deny = frozenset({"tool"})

-        limit = int(params.get("limit", 20) or 20)
-        fetch_limit = max(limit * 5, 100)
+        limit = int(params.get("limit", 200) or 200)
+        # Over-fetch modestly so per-source filtering doesn't leave us
+        # short; the compression-tip projection in ``list_sessions_rich``
+        # can also merge rows.
+        fetch_limit = max(limit * 2, 200)
        rows = [
            s
            for s in db.list_sessions_rich(source=None, limit=fetch_limit)
-            if (s.get("source") or "").strip().lower() in allow
+            if (s.get("source") or "").strip().lower() not in deny
        ][:limit]
        return _ok(
            rid,
--- a/ui-tui/src/components/sessionPicker.tsx
+++ b/ui-tui/src/components/sessionPicker.tsx
@ -38,7 +38,7 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
  useOverlayKeys({ onClose: onCancel })

  useEffect(() => {
-    gw.request<SessionListResponse>('session.list', { limit: 20 })
+    gw.request<SessionListResponse>('session.list', { limit: 200 })
      .then(raw => {
        const r = asRpcResult<SessionListResponse>(raw)