fix(sessions): delete on-disk transcript files during prune and delete (#3015)
`delete_session()` and `prune_sessions()` only removed SQLite records,
leaving .json/.jsonl transcript files on disk forever. Over time this
causes unbounded disk growth (~27MB/day observed).
Changes:
- Add `_remove_session_files()` static helper that cleans up
`{session_id}.json`, `.jsonl`, and `request_dump_{session_id}_*.json`
- `delete_session()` accepts optional `sessions_dir` param and removes
files for the deleted session and its children
- `prune_sessions()` accepts optional `sessions_dir` param and removes
files for all pruned sessions after the DB transaction
- Wire up CLI `hermes sessions delete` and `hermes sessions prune` to
pass `sessions_dir`
- File cleanup is best-effort (OSError silenced) so DB operations are
never blocked by filesystem issues
- Fully backward-compatible: `sessions_dir=None` (default) preserves
existing behavior
This commit is contained in:
parent
0ba6471dd1
commit
3b60abb6bb
2
cli.py
2
cli.py
@ -974,6 +974,7 @@ def _run_state_db_auto_maintenance(session_db) -> None:
|
||||
return
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_full_config
|
||||
from hermes_constants import get_hermes_home as _get_hermes_home
|
||||
cfg = (_load_full_config().get("sessions") or {})
|
||||
if not cfg.get("auto_prune", False):
|
||||
return
|
||||
@ -981,6 +982,7 @@ def _run_state_db_auto_maintenance(session_db) -> None:
|
||||
retention_days=int(cfg.get("retention_days", 90)),
|
||||
min_interval_hours=int(cfg.get("min_interval_hours", 24)),
|
||||
vacuum=bool(cfg.get("vacuum_after_prune", True)),
|
||||
sessions_dir=_get_hermes_home() / "sessions",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("state.db auto-maintenance skipped: %s", exc)
|
||||
|
||||
@ -763,6 +763,7 @@ class GatewayRunner:
|
||||
retention_days=int(_sess_cfg.get("retention_days", 90)),
|
||||
min_interval_hours=int(_sess_cfg.get("min_interval_hours", 24)),
|
||||
vacuum=bool(_sess_cfg.get("vacuum_after_prune", True)),
|
||||
sessions_dir=self.config.sessions_dir,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("state.db auto-maintenance skipped: %s", exc)
|
||||
|
||||
@ -9230,7 +9230,8 @@ Examples:
|
||||
):
|
||||
print("Cancelled.")
|
||||
return
|
||||
if db.delete_session(resolved_session_id):
|
||||
sessions_dir = get_hermes_home() / "sessions"
|
||||
if db.delete_session(resolved_session_id, sessions_dir=sessions_dir):
|
||||
print(f"Deleted session '{resolved_session_id}'.")
|
||||
else:
|
||||
print(f"Session '{args.session_id}' not found.")
|
||||
@ -9244,7 +9245,9 @@ Examples:
|
||||
):
|
||||
print("Cancelled.")
|
||||
return
|
||||
count = db.prune_sessions(older_than_days=days, source=args.source)
|
||||
sessions_dir = get_hermes_home() / "sessions"
|
||||
count = db.prune_sessions(older_than_days=days, source=args.source,
|
||||
sessions_dir=sessions_dir)
|
||||
print(f"Pruned {count} session(s).")
|
||||
|
||||
elif action == "rename":
|
||||
|
||||
@ -1512,12 +1512,45 @@ class SessionDB:
|
||||
)
|
||||
self._execute_write(_do)
|
||||
|
||||
def delete_session(self, session_id: str) -> bool:
|
||||
@staticmethod
|
||||
def _remove_session_files(sessions_dir: Optional[Path], session_id: str) -> None:
|
||||
"""Remove on-disk transcript files for a session.
|
||||
|
||||
Cleans up ``{session_id}.json``, ``{session_id}.jsonl``, and any
|
||||
``request_dump_{session_id}_*.json`` files left by the gateway.
|
||||
Silently skips files that don't exist and swallows OSError so a
|
||||
filesystem hiccup never blocks a DB operation.
|
||||
"""
|
||||
if sessions_dir is None:
|
||||
return
|
||||
for suffix in (".json", ".jsonl"):
|
||||
p = sessions_dir / f"{session_id}{suffix}"
|
||||
try:
|
||||
p.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
# request_dump files use session_id as a prefix component
|
||||
try:
|
||||
for p in sessions_dir.glob(f"request_dump_{session_id}_*.json"):
|
||||
try:
|
||||
p.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def delete_session(
|
||||
self,
|
||||
session_id: str,
|
||||
sessions_dir: Optional[Path] = None,
|
||||
) -> bool:
|
||||
"""Delete a session and all its messages.
|
||||
|
||||
Child sessions are orphaned (parent_session_id set to NULL) rather
|
||||
than cascade-deleted, so they remain accessible independently.
|
||||
Returns True if the session was found and deleted.
|
||||
When *sessions_dir* is provided, also removes on-disk transcript
|
||||
files (``.json`` / ``.jsonl`` / ``request_dump_*``) for the deleted
|
||||
session. Returns True if the session was found and deleted.
|
||||
"""
|
||||
def _do(conn):
|
||||
cursor = conn.execute(
|
||||
@ -1534,16 +1567,29 @@ class SessionDB:
|
||||
conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
|
||||
conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
|
||||
return True
|
||||
return self._execute_write(_do)
|
||||
|
||||
def prune_sessions(self, older_than_days: int = 90, source: str = None) -> int:
|
||||
deleted = self._execute_write(_do)
|
||||
if deleted:
|
||||
self._remove_session_files(sessions_dir, session_id)
|
||||
return deleted
|
||||
|
||||
def prune_sessions(
|
||||
self,
|
||||
older_than_days: int = 90,
|
||||
source: str = None,
|
||||
sessions_dir: Optional[Path] = None,
|
||||
) -> int:
|
||||
"""Delete sessions older than N days. Returns count of deleted sessions.
|
||||
|
||||
Only prunes ended sessions (not active ones). Child sessions outside
|
||||
the prune window are orphaned (parent_session_id set to NULL) rather
|
||||
than cascade-deleted.
|
||||
than cascade-deleted. When *sessions_dir* is provided, also removes
|
||||
on-disk transcript files (``.json`` / ``.jsonl`` /
|
||||
``request_dump_*``) for every pruned session, outside the DB
|
||||
transaction.
|
||||
"""
|
||||
cutoff = time.time() - (older_than_days * 86400)
|
||||
removed_ids: list[str] = []
|
||||
|
||||
def _do(conn):
|
||||
if source:
|
||||
@ -1573,9 +1619,14 @@ class SessionDB:
|
||||
for sid in session_ids:
|
||||
conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
|
||||
conn.execute("DELETE FROM sessions WHERE id = ?", (sid,))
|
||||
removed_ids.append(sid)
|
||||
return len(session_ids)
|
||||
|
||||
return self._execute_write(_do)
|
||||
count = self._execute_write(_do)
|
||||
# Clean up on-disk files outside the DB transaction
|
||||
for sid in removed_ids:
|
||||
self._remove_session_files(sessions_dir, sid)
|
||||
return count
|
||||
|
||||
# ── Meta key/value (for scheduler bookkeeping) ──
|
||||
|
||||
@ -1629,6 +1680,7 @@ class SessionDB:
|
||||
retention_days: int = 90,
|
||||
min_interval_hours: int = 24,
|
||||
vacuum: bool = True,
|
||||
sessions_dir: Optional[Path] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Idempotent auto-maintenance: prune old sessions + optional VACUUM.
|
||||
|
||||
@ -1636,6 +1688,10 @@ class SessionDB:
|
||||
within ``min_interval_hours`` no-op. Designed to be called once at
|
||||
startup from long-lived entrypoints (CLI, gateway, cron scheduler).
|
||||
|
||||
When *sessions_dir* is provided, on-disk transcript files
|
||||
(``.json`` / ``.jsonl`` / ``request_dump_*``) for pruned sessions
|
||||
are removed as part of the same sweep (issue #3015).
|
||||
|
||||
Never raises. On any failure, logs a warning and returns a dict
|
||||
with ``"error"`` set.
|
||||
|
||||
@ -1659,7 +1715,10 @@ class SessionDB:
|
||||
except (TypeError, ValueError):
|
||||
pass # corrupt meta; treat as no prior run
|
||||
|
||||
pruned = self.prune_sessions(older_than_days=retention_days)
|
||||
pruned = self.prune_sessions(
|
||||
older_than_days=retention_days,
|
||||
sessions_dir=sessions_dir,
|
||||
)
|
||||
result["pruned"] = pruned
|
||||
|
||||
# Only VACUUM if we actually freed rows — VACUUM on a tight DB
|
||||
|
||||
Loading…
Reference in New Issue
Block a user