fix(sessions): /save lands under $HERMES_HOME, widen browse+TUI picker, force-refresh ollama-cloud on setup (#16296)

Four independent session-UX bugs reported by an external user (#16294).

/save wrote hermes_conversation_<ts>.json to CWD — invisible to
'hermes sessions browse' and easy to lose. Snapshots now write under
~/.hermes/sessions/saved/ and the command prints the absolute path plus
a 'hermes --resume <id>' hint for the live DB-indexed session.

'hermes sessions browse' default --limit raised from 50 to 500. With the
old ceiling, users with moderately long histories saw only the most
recent 50 rows and assumed older sessions had been lost.

TUI session.list (`/resume` picker) switched from a hardcoded allow-list
of 13 gateway source names to a deny-list of just { 'tool' }. Sessions
tagged acp / webhook / user-defined HERMES_SESSION_SOURCE values and
any newly-added platform now surface. Default limit 20 → 200.

ollama-cloud provider setup passes force_refresh=True to
fetch_ollama_cloud_models() so a user entering their API key sees the
fresh catalog (e.g. deepseek v4 flash, kimi k2.6) immediately instead
of waiting up to an hour for the disk cache TTL to expire.

Closes #16294.
This commit is contained in:
Teknium 2026-04-26 18:49:48 -07:00 committed by GitHub
parent 7e3c8a31f0
commit 5eb6cd82b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 240 additions and 60 deletions

27
cli.py
View File

@ -4951,22 +4951,37 @@ class HermesCLI:
_cprint(f" Branch session: {new_session_id}")
def save_conversation(self):
"""Save the current conversation to a file."""
"""Save the current conversation to a JSON snapshot under ~/.hermes/sessions/saved/.
The snapshot is a convenience export for sharing or off-line inspection;
every message is already persisted incrementally to the SQLite session
DB, so the live session remains resumable via ``hermes --resume <id>``
regardless of whether the user ever runs ``/save``.
"""
if not self.conversation_history:
print("(;_;) No conversation to save.")
return
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"hermes_conversation_{timestamp}.json"
saved_dir = get_hermes_home() / "sessions" / "saved"
try:
with open(filename, "w", encoding="utf-8") as f:
saved_dir.mkdir(parents=True, exist_ok=True)
except Exception as e:
print(f"(x_x) Failed to create save directory {saved_dir}: {e}")
return
path = saved_dir / f"hermes_conversation_{timestamp}.json"
try:
with open(path, "w", encoding="utf-8") as f:
json.dump({
"model": self.model,
"session_id": self.session_id,
"session_start": self.session_start.isoformat(),
"messages": self.conversation_history,
}, f, indent=2, ensure_ascii=False)
print(f"(^_^)v Conversation saved to: {filename}")
print(f"(^_^)v Conversation snapshot saved to: {path}")
if self.session_id:
print(f" Resume the live session with: hermes --resume {self.session_id}")
except Exception as e:
print(f"(x_x) Failed to save: {e}")

View File

@ -4412,8 +4412,14 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
from hermes_cli.models import fetch_ollama_cloud_models
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
# During setup, force a live refresh so the picker reflects newly
# released models (e.g. deepseek v4 flash, kimi k2.6) the moment
# the user enters their key — not an hour later when the disk
# cache TTL expires.
model_list = fetch_ollama_cloud_models(
api_key=api_key_for_probe, base_url=effective_base
api_key=api_key_for_probe,
base_url=effective_base,
force_refresh=True,
)
if model_list:
print(f" Found {len(model_list)} model(s) from Ollama Cloud")
@ -9173,7 +9179,7 @@ Examples:
"--source", help="Filter by source (cli, telegram, discord, etc.)"
)
sessions_browse.add_argument(
"--limit", type=int, default=50, help="Max sessions to load (default: 50)"
"--limit", type=int, default=500, help="Max sessions to load (default: 500)"
)
def _confirm_prompt(prompt: str) -> bool:
@ -9305,7 +9311,7 @@ Examples:
print(f"Error: {e}")
elif action == "browse":
limit = getattr(args, "limit", 50) or 50
limit = getattr(args, "limit", 500) or 500
source = getattr(args, "source", None)
_browse_exclude = None if source else ["tool"]
sessions = db.list_sessions_rich(

View File

@ -0,0 +1,102 @@
"""Tests for /save — the conversation snapshot slash command.
Regression: the old implementation wrote ``hermes_conversation_<ts>.json``
to the current working directory (CWD). Users who ran /save expected the
file to be discoverable via ``hermes sessions browse``, but CWD-resident
snapshots are not indexed in the state DB and are generally invisible.
The fix writes snapshots under ``~/.hermes/sessions/saved/`` and prints
the absolute path plus the resume hint for the live session.
"""
from __future__ import annotations
import json
import os
import sys
from datetime import datetime
from pathlib import Path
from types import SimpleNamespace
import pytest
@pytest.fixture
def hermes_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
# Clear any cached hermes_home computation
import hermes_constants
if hasattr(hermes_constants, "_hermes_home_cache"):
hermes_constants._hermes_home_cache = None
return home
def _make_stub_cli(history):
"""Build a minimal object exposing just what save_conversation uses."""
return SimpleNamespace(
conversation_history=history,
model="test-model",
session_id="20260101_120000_abc123",
session_start=datetime(2026, 1, 1, 12, 0, 0),
)
def test_save_conversation_writes_under_hermes_home(hermes_home, tmp_path, monkeypatch, capsys):
"""Snapshot must land under ~/.hermes/sessions/saved/, not CWD."""
# Change CWD to a different directory to prove the file does NOT go there.
work = tmp_path / "somewhere-else"
work.mkdir()
monkeypatch.chdir(work)
# Import fresh to pick up the HERMES_HOME fixture
for mod in [m for m in sys.modules if m.startswith("cli") or m == "hermes_constants"]:
sys.modules.pop(mod, None)
import cli # noqa: F401 (module under test)
stub = _make_stub_cli([
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "hello"},
])
# Call the unbound method against our stub.
cli.HermesCLI.save_conversation(stub)
# File must NOT be in CWD
cwd_leak = list(work.glob("hermes_conversation_*.json"))
assert not cwd_leak, f"snapshot leaked to CWD: {cwd_leak}"
# File MUST be under ~/.hermes/sessions/saved/
saved_dir = hermes_home / "sessions" / "saved"
assert saved_dir.is_dir(), "expected saved/ subdirectory to be created"
files = list(saved_dir.glob("hermes_conversation_*.json"))
assert len(files) == 1, files
payload = json.loads(files[0].read_text())
assert payload["model"] == "test-model"
assert payload["session_id"] == "20260101_120000_abc123"
assert payload["messages"] == [
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "hello"},
]
# User-facing message must include the absolute path AND the resume hint.
out = capsys.readouterr().out
assert str(files[0]) in out, out
assert "hermes --resume 20260101_120000_abc123" in out, out
def test_save_conversation_empty_history_does_nothing(hermes_home, capsys):
for mod in [m for m in sys.modules if m.startswith("cli") or m == "hermes_constants"]:
sys.modules.pop(mod, None)
import cli
stub = _make_stub_cli([])
cli.HermesCLI.save_conversation(stub)
saved_dir = hermes_home / "sessions" / "saved"
assert not saved_dir.exists() or not list(saved_dir.iterdir())
out = capsys.readouterr().out
assert "No conversation to save" in out

View File

@ -1,11 +1,16 @@
"""Regression tests for the TUI gateway's ``session.list`` handler.
Reported during TUI v2 blitz retest: the ``/resume`` modal inside a TUI
session only surfaced ``tui``/``cli`` rows, hiding telegram sessions users
could still resume directly via ``hermes --tui --resume <id>``.
The fix widens the picker to a curated allowlist of user-facing sources
(tui/cli + chat adapters) while still filtering internal/system sources.
History:
- The original implementation hardcoded an allow-list of known gateway
sources (``tui, cli, telegram, discord, slack, ...``). New or unlisted
sources (``acp``, ``webhook``, user-defined ``HERMES_SESSION_SOURCE``
values, newly-added platforms) were silently dropped from the resume
picker users reported "lots of sessions are missing from browse
but exist in .hermes/sessions."
- The handler now deny-lists only the internal/noisy source ``tool``
(sub-agent runs) and surfaces every other source to the picker.
- The default ``limit`` raised from 20 to 200 so longer-running users
can scroll through their history without hitting an artificial cap.
"""
from __future__ import annotations
@ -23,42 +28,64 @@ class _StubDB:
return list(self.rows)
def _call(limit: int = 20):
def _call(limit: int | None = None):
params: dict = {}
if limit is not None:
params["limit"] = limit
return server.handle_request({
"id": "1",
"method": "session.list",
"params": {"limit": limit},
"params": params,
})
def test_session_list_includes_telegram_but_filters_internal_sources(monkeypatch):
def test_session_list_surfaces_all_user_facing_sources(monkeypatch):
"""acp / webhook / custom sources should all appear; only ``tool`` is hidden."""
rows = [
{"id": "tui-1", "source": "tui", "started_at": 9},
{"id": "tool-1", "source": "tool", "started_at": 8},
{"id": "tg-1", "source": "telegram", "started_at": 7},
{"id": "acp-1", "source": "acp", "started_at": 6},
{"id": "cli-1", "source": "cli", "started_at": 5},
{"id": "webhook-1", "source": "webhook", "started_at": 4},
{"id": "custom-1", "source": "my-custom-source", "started_at": 3},
]
db = _StubDB(rows)
monkeypatch.setattr(server, "_get_db", lambda: db)
resp = _call(limit=10)
sessions = resp["result"]["sessions"]
ids = [s["id"] for s in sessions]
ids = [s["id"] for s in resp["result"]["sessions"]]
assert "tg-1" in ids and "tui-1" in ids and "cli-1" in ids, ids
assert "tool-1" not in ids and "acp-1" not in ids, ids
# Every human-facing source — including previously-hidden acp, webhook,
# and custom sources — must surface in the picker now.
assert "tg-1" in ids
assert "tui-1" in ids
assert "cli-1" in ids
assert "acp-1" in ids, "acp sessions were being hidden by the old allow-list"
assert "webhook-1" in ids, "webhook sessions were being hidden by the old allow-list"
assert "custom-1" in ids, "custom HERMES_SESSION_SOURCE values were being hidden"
# Only internal sub-agent runs stay hidden.
assert "tool-1" not in ids
def test_session_list_fetches_wider_window_before_filtering(monkeypatch):
def test_session_list_default_limit_is_200(monkeypatch):
"""Default limit should be wide enough for long-running users."""
db = _StubDB([{"id": "x", "source": "cli", "started_at": 1}])
monkeypatch.setattr(server, "_get_db", lambda: db)
_call() # no explicit limit
# fetch_limit = max(limit * 2, 200); limit defaults to 200, so 400.
assert db.calls[0].get("limit") == 400, db.calls[0]
def test_session_list_respects_explicit_limit(monkeypatch):
db = _StubDB([{"id": "x", "source": "cli", "started_at": 1}])
monkeypatch.setattr(server, "_get_db", lambda: db)
_call(limit=10)
assert len(db.calls) == 1
assert db.calls[0].get("source") is None, db.calls[0]
assert db.calls[0].get("limit") == 100, db.calls[0]
# fetch_limit = max(limit * 2, 200) = 200 when limit is small.
assert db.calls[0].get("limit") == 200, db.calls[0]
def test_session_list_preserves_ordering_after_filter(monkeypatch):
@ -66,6 +93,7 @@ def test_session_list_preserves_ordering_after_filter(monkeypatch):
{"id": "newest", "source": "telegram", "started_at": 5},
{"id": "internal", "source": "tool", "started_at": 4},
{"id": "middle", "source": "tui", "started_at": 3},
{"id": "also-visible", "source": "webhook", "started_at": 2},
{"id": "oldest", "source": "discord", "started_at": 1},
]
monkeypatch.setattr(server, "_get_db", lambda: _StubDB(rows))
@ -73,4 +101,4 @@ def test_session_list_preserves_ordering_after_filter(monkeypatch):
resp = _call()
ids = [s["id"] for s in resp["result"]["sessions"]]
assert ids == ["newest", "middle", "oldest"]
assert ids == ["newest", "middle", "also-visible", "oldest"]

View File

@ -401,14 +401,21 @@ class TestSessionBrowseArgparse:
from hermes_cli.main import _session_browse_picker
assert callable(_session_browse_picker)
def test_browse_default_limit_is_50(self):
"""The default --limit for browse should be 50."""
# This test verifies at the argparse level
# We test by running the parse on "sessions browse" args
# Since we can't easily extract the subparser, verify via the
# _session_browse_picker accepting large lists
sessions = _make_sessions(50)
assert len(sessions) == 50
def test_browse_default_limit_is_500(self):
"""The default --limit for browse should be 500."""
# Build the same argparse tree cmd_sessions uses and verify the default.
import argparse
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest="sessions_action")
browse = subparsers.add_parser("browse")
browse.add_argument("--source")
browse.add_argument("--limit", type=int, default=500)
args = parser.parse_args(["browse"])
assert args.limit == 500
args = parser.parse_args(["browse", "--limit", "42"])
assert args.limit == 42
# ─── Integration: cmd_sessions browse action ────────────────────────────────

View File

@ -0,0 +1,30 @@
"""Regression: ``hermes setup`` for the ollama-cloud provider must force-refresh
the model cache after the user supplies a key, otherwise the picker keeps
serving a stale cache (models.dev only, no live API probe) for up to an hour.
"""
from __future__ import annotations
from unittest.mock import patch
def test_setup_ollama_cloud_passes_force_refresh(monkeypatch):
"""The provider-setup model-fetch for ollama-cloud must pass ``force_refresh=True``."""
import hermes_cli.main as main_mod
import inspect
src = inspect.getsource(main_mod)
# Locate the ollama-cloud branch in the provider setup flow.
marker = 'provider_id == "ollama-cloud"'
assert marker in src, "ollama-cloud branch missing from provider setup"
idx = src.index(marker)
# The call to fetch_ollama_cloud_models should be within the next ~2000 chars.
snippet = src[idx:idx + 2000]
assert "fetch_ollama_cloud_models(" in snippet, snippet[:500]
assert "force_refresh=True" in snippet, (
"ollama-cloud setup must pass force_refresh=True so newly released "
"models (e.g. deepseek v4 flash, kimi k2.6) appear the moment the "
"user enters their key, not an hour later when the cache TTL expires. "
f"Snippet: {snippet[:500]}"
)

View File

@ -1630,33 +1630,25 @@ def _(rid, params: dict) -> dict:
if db is None:
return _db_unavailable_error(rid, code=5006)
try:
# Resume picker should include human conversation surfaces beyond
# tui/cli (notably telegram from blitz row #7), but avoid internal
# sources that clutter the modal (tool/acp/etc).
allow = frozenset(
{
"cli",
"tui",
"telegram",
"discord",
"slack",
"whatsapp",
"wecom",
"weixin",
"feishu",
"signal",
"mattermost",
"matrix",
"qq",
}
)
# Resume picker should surface human conversation sessions from every
# user-facing surface — CLI, TUI, all gateway platforms (including new
# ones not enumerated here), ACP adapter clients, webhook sessions,
# custom `HERMES_SESSION_SOURCE` values, and older installs with
# different source labels. We deny-list only the noisy internal
# sources (``tool`` sub-agent runs) rather than allow-listing a
# fixed set of platform names that goes stale whenever a new
# platform is added or a user names their own source.
deny = frozenset({"tool"})
limit = int(params.get("limit", 20) or 20)
fetch_limit = max(limit * 5, 100)
limit = int(params.get("limit", 200) or 200)
# Over-fetch modestly so per-source filtering doesn't leave us
# short; the compression-tip projection in ``list_sessions_rich``
# can also merge rows.
fetch_limit = max(limit * 2, 200)
rows = [
s
for s in db.list_sessions_rich(source=None, limit=fetch_limit)
if (s.get("source") or "").strip().lower() in allow
if (s.get("source") or "").strip().lower() not in deny
][:limit]
return _ok(
rid,

View File

@ -38,7 +38,7 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
useOverlayKeys({ onClose: onCancel })
useEffect(() => {
gw.request<SessionListResponse>('session.list', { limit: 20 })
gw.request<SessionListResponse>('session.list', { limit: 200 })
.then(raw => {
const r = asRpcResult<SessionListResponse>(raw)