fix(gateway): read /status token totals from SessionDB (#17158)

/status was reading session_entry.total_tokens from the in-memory
SessionStore (gateway/session.py), which the agent never writes to —
so the token count was always 0.

The agent already persists token deltas to the SQLite SessionDB
(run_agent.py:11497) for every platform with a session_id. Route
/status through that single source of truth instead of duplicating
token writes into a second store.

Fix:
- gateway/run.py: _handle_status_command now calls
  self._session_db.get_session(session_id) and sums the five token
  component columns (input/output/cache_read/cache_write/reasoning).
  Falls back to 0 when no SessionDB is configured or no row exists.
- Two new regression tests covering the populated-row and
  missing-row paths.

Co-authored-by: Hermes <127238744+teknium1@users.noreply.github.com>
This commit is contained in:
Jezza Hehn 2026-04-30 20:26:54 -07:00 committed by Teknium
parent a178081468
commit 7abc9ce4df
2 changed files with 81 additions and 1 deletions

View File

@ -6568,11 +6568,30 @@ class GatewayRunner:
queue_depth = self._queue_depth(session_key, adapter=adapter)
title = None
# Pull token totals from the SQLite session DB rather than the
# in-memory SessionStore. The agent's per-turn token deltas are
# persisted into sessions_db (run_agent.py), not into SessionEntry,
# so session_entry.total_tokens is always 0. SessionDB is the
# single source of truth; reading it here keeps /status accurate
# without duplicating token writes into two stores.
db_total_tokens = 0
if self._session_db:
try:
title = self._session_db.get_session_title(session_entry.session_id)
except Exception:
title = None
try:
row = self._session_db.get_session(session_entry.session_id)
if row:
db_total_tokens = (
(row.get("input_tokens") or 0)
+ (row.get("output_tokens") or 0)
+ (row.get("cache_read_tokens") or 0)
+ (row.get("cache_write_tokens") or 0)
+ (row.get("reasoning_tokens") or 0)
)
except Exception:
db_total_tokens = 0
lines = [
"📊 **Hermes Gateway Status**",
@ -6584,7 +6603,7 @@ class GatewayRunner:
lines.extend([
f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
f"**Tokens:** {session_entry.total_tokens:,}",
f"**Tokens:** {db_total_tokens:,}",
f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
])
if queue_depth:

View File

@ -55,6 +55,9 @@ def _make_runner(session_entry: SessionEntry, *, platform: Platform = Platform.T
runner._pending_approvals = {}
runner._session_db = MagicMock()
runner._session_db.get_session_title.return_value = None
# Default: no DB row → /status reports 0 tokens. Tests that exercise
# the populated path override this.
runner._session_db.get_session.return_value = None
runner._reasoning_config = None
runner._provider_routing = {}
runner._fallback_model = None
@ -80,6 +83,14 @@ async def test_status_command_reports_running_agent_without_interrupt(monkeypatc
total_tokens=321,
)
runner = _make_runner(session_entry)
# Token total comes from the SQLite SessionDB, not SessionEntry.
runner._session_db.get_session.return_value = {
"input_tokens": 200,
"output_tokens": 121,
"cache_read_tokens": 0,
"cache_write_tokens": 0,
"reasoning_tokens": 0,
}
running_agent = MagicMock()
runner._running_agents[build_session_key(_make_source())] = running_agent
@ -113,6 +124,56 @@ async def test_status_command_includes_session_title_when_present():
assert "**Title:** My titled session" in result
@pytest.mark.asyncio
async def test_status_command_reads_token_totals_from_session_db():
"""Regression test for #17158: /status must source token totals from the
SQLite SessionDB (where run_agent.py persists them) and sum all component
counts, not from SessionEntry (which the agent never writes)."""
session_entry = SessionEntry(
session_key=build_session_key(_make_source()),
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
total_tokens=0, # SessionEntry never gets written to — always 0.
)
runner = _make_runner(session_entry)
runner._session_db.get_session.return_value = {
"input_tokens": 1000,
"output_tokens": 250,
"cache_read_tokens": 500,
"cache_write_tokens": 100,
"reasoning_tokens": 50,
}
result = await runner._handle_message(_make_event("/status"))
# 1000 + 250 + 500 + 100 + 50 = 1,900
assert "**Tokens:** 1,900" in result
@pytest.mark.asyncio
async def test_status_command_tokens_zero_when_session_db_row_missing():
"""When the SessionDB has no row for the current session yet (fresh
session, no agent calls), /status reports 0 without raising."""
session_entry = SessionEntry(
session_key=build_session_key(_make_source()),
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
total_tokens=999, # This should be ignored.
)
runner = _make_runner(session_entry)
runner._session_db.get_session.return_value = None
result = await runner._handle_message(_make_event("/status"))
assert "**Tokens:** 0" in result
@pytest.mark.asyncio
async def test_agents_command_reports_active_agents_and_processes(monkeypatch):
session_key = build_session_key(_make_source())