fix(gateway): read /status token totals from SessionDB (#17158)
/status was reading session_entry.total_tokens from the in-memory SessionStore (gateway/session.py), which the agent never writes to — so the token count was always 0. The agent already persists token deltas to the SQLite SessionDB (run_agent.py:11497) for every platform with a session_id. Route /status through that single source of truth instead of duplicating token writes into a second store. Fix: - gateway/run.py: _handle_status_command now calls self._session_db.get_session(session_id) and sums the five token component columns (input/output/cache_read/cache_write/reasoning). Falls back to 0 when no SessionDB is configured or no row exists. - Two new regression tests covering the populated-row and missing-row paths. Co-authored-by: Hermes <127238744+teknium1@users.noreply.github.com>
This commit is contained in:
parent
a178081468
commit
7abc9ce4df
@ -6568,11 +6568,30 @@ class GatewayRunner:
|
||||
queue_depth = self._queue_depth(session_key, adapter=adapter)
|
||||
|
||||
title = None
|
||||
# Pull token totals from the SQLite session DB rather than the
|
||||
# in-memory SessionStore. The agent's per-turn token deltas are
|
||||
# persisted into sessions_db (run_agent.py), not into SessionEntry,
|
||||
# so session_entry.total_tokens is always 0. SessionDB is the
|
||||
# single source of truth; reading it here keeps /status accurate
|
||||
# without duplicating token writes into two stores.
|
||||
db_total_tokens = 0
|
||||
if self._session_db:
|
||||
try:
|
||||
title = self._session_db.get_session_title(session_entry.session_id)
|
||||
except Exception:
|
||||
title = None
|
||||
try:
|
||||
row = self._session_db.get_session(session_entry.session_id)
|
||||
if row:
|
||||
db_total_tokens = (
|
||||
(row.get("input_tokens") or 0)
|
||||
+ (row.get("output_tokens") or 0)
|
||||
+ (row.get("cache_read_tokens") or 0)
|
||||
+ (row.get("cache_write_tokens") or 0)
|
||||
+ (row.get("reasoning_tokens") or 0)
|
||||
)
|
||||
except Exception:
|
||||
db_total_tokens = 0
|
||||
|
||||
lines = [
|
||||
"📊 **Hermes Gateway Status**",
|
||||
@ -6584,7 +6603,7 @@ class GatewayRunner:
|
||||
lines.extend([
|
||||
f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
|
||||
f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
|
||||
f"**Tokens:** {session_entry.total_tokens:,}",
|
||||
f"**Tokens:** {db_total_tokens:,}",
|
||||
f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
|
||||
])
|
||||
if queue_depth:
|
||||
|
||||
@ -55,6 +55,9 @@ def _make_runner(session_entry: SessionEntry, *, platform: Platform = Platform.T
|
||||
runner._pending_approvals = {}
|
||||
runner._session_db = MagicMock()
|
||||
runner._session_db.get_session_title.return_value = None
|
||||
# Default: no DB row → /status reports 0 tokens. Tests that exercise
|
||||
# the populated path override this.
|
||||
runner._session_db.get_session.return_value = None
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
@ -80,6 +83,14 @@ async def test_status_command_reports_running_agent_without_interrupt(monkeypatc
|
||||
total_tokens=321,
|
||||
)
|
||||
runner = _make_runner(session_entry)
|
||||
# Token total comes from the SQLite SessionDB, not SessionEntry.
|
||||
runner._session_db.get_session.return_value = {
|
||||
"input_tokens": 200,
|
||||
"output_tokens": 121,
|
||||
"cache_read_tokens": 0,
|
||||
"cache_write_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
}
|
||||
running_agent = MagicMock()
|
||||
runner._running_agents[build_session_key(_make_source())] = running_agent
|
||||
|
||||
@ -113,6 +124,56 @@ async def test_status_command_includes_session_title_when_present():
|
||||
assert "**Title:** My titled session" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_status_command_reads_token_totals_from_session_db():
|
||||
"""Regression test for #17158: /status must source token totals from the
|
||||
SQLite SessionDB (where run_agent.py persists them) and sum all component
|
||||
counts, not from SessionEntry (which the agent never writes)."""
|
||||
session_entry = SessionEntry(
|
||||
session_key=build_session_key(_make_source()),
|
||||
session_id="sess-1",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
total_tokens=0, # SessionEntry never gets written to — always 0.
|
||||
)
|
||||
runner = _make_runner(session_entry)
|
||||
runner._session_db.get_session.return_value = {
|
||||
"input_tokens": 1000,
|
||||
"output_tokens": 250,
|
||||
"cache_read_tokens": 500,
|
||||
"cache_write_tokens": 100,
|
||||
"reasoning_tokens": 50,
|
||||
}
|
||||
|
||||
result = await runner._handle_message(_make_event("/status"))
|
||||
|
||||
# 1000 + 250 + 500 + 100 + 50 = 1,900
|
||||
assert "**Tokens:** 1,900" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_status_command_tokens_zero_when_session_db_row_missing():
|
||||
"""When the SessionDB has no row for the current session yet (fresh
|
||||
session, no agent calls), /status reports 0 without raising."""
|
||||
session_entry = SessionEntry(
|
||||
session_key=build_session_key(_make_source()),
|
||||
session_id="sess-1",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
total_tokens=999, # This should be ignored.
|
||||
)
|
||||
runner = _make_runner(session_entry)
|
||||
runner._session_db.get_session.return_value = None
|
||||
|
||||
result = await runner._handle_message(_make_event("/status"))
|
||||
|
||||
assert "**Tokens:** 0" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_agents_command_reports_active_agents_and_processes(monkeypatch):
|
||||
session_key = build_session_key(_make_source())
|
||||
|
||||
Loading…
Reference in New Issue
Block a user