fix(constants): warn once when get_hermes_home() falls back under an active profile (#18746)

When HERMES_HOME is unset but ~/.hermes/active_profile names a non-default
profile, any data this process writes lands in the default profile — not the
one the operator expects. Before this change the fallback was silent, so
cross-profile contamination (#18594) was invisible until a user noticed
their memory/state ended up in the wrong place.

Now we emit a one-shot warning to stderr the first time this happens in
a process. No raise — there are 30+ module-level callers of get_hermes_home()
and raising from any of them would brick import. Behavior is otherwise
unchanged; subprocess spawners (systemd template, kanban dispatcher, docker
entrypoint) already propagate HERMES_HOME correctly.

Bypasses logging.getLogger() because this runs before logging is configured
in a significant fraction of callers (module import time).

Refs #18594. Credit to @liuhao1024 for surfacing the silent-fallback case
in PR #18600; we kept the diagnostic signal without the import-time raise.
This commit is contained in:
Teknium 2026-05-02 01:49:55 -07:00 committed by GitHub
parent 98c98821ff
commit 699b3679bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 167 additions and 1 deletions

View File

@ -8,14 +8,64 @@ import os
from pathlib import Path
_profile_fallback_warned: bool = False
def get_hermes_home() -> Path:
"""Return the Hermes home directory (default: ~/.hermes).
Reads HERMES_HOME env var, falls back to ~/.hermes.
This is the single source of truth all other copies should import this.
When ``HERMES_HOME`` is unset but an ``active_profile`` file indicates
a non-default profile is active, logs a loud one-shot warning to
``errors.log`` so cross-profile data corruption is diagnosable instead
of silent. Behavior is unchanged otherwise we still return
``~/.hermes`` because raising here would brick 30+ module-level
callers that import this at load time. Subprocess spawners are
expected to propagate ``HERMES_HOME`` explicitly (see the systemd
template in ``hermes_cli/gateway.py`` and the kanban dispatcher in
``hermes_cli/kanban_db.py``). See https://github.com/NousResearch/hermes-agent/issues/18594.
"""
val = os.environ.get("HERMES_HOME", "").strip()
return Path(val) if val else Path.home() / ".hermes"
if val:
return Path(val)
# Guard: if a non-default profile is sticky-active, warn once that
# the fallback to the default profile is almost certainly wrong.
global _profile_fallback_warned
if not _profile_fallback_warned:
try:
# Inline the default-root resolution from get_default_hermes_root()
# to stay import-safe (this function is called from module scope
# in 30+ files; we cannot afford to trigger logging setup here).
active_path = (Path.home() / ".hermes" / "active_profile")
active = active_path.read_text().strip() if active_path.exists() else ""
except (UnicodeDecodeError, OSError):
active = ""
if active and active != "default":
_profile_fallback_warned = True
# Write directly to stderr. We intentionally do NOT route this
# through ``logging`` because (a) this function is called at
# module-import time from 30+ sites, often before logging is
# configured, and (b) root-logger propagation would double-emit
# on consoles where a StreamHandler is already attached.
import sys
msg = (
f"[HERMES_HOME fallback] HERMES_HOME is unset but active "
f"profile is {active!r}. Falling back to ~/.hermes, which "
f"is the DEFAULT profile — not {active!r}. Any data this "
f"process writes will land in the wrong profile. The "
f"subprocess spawner should pass HERMES_HOME explicitly "
f"(see issue #18594)."
)
try:
sys.stderr.write(msg + "\n")
sys.stderr.flush()
except Exception:
pass
return Path.home() / ".hermes"
def get_default_hermes_root() -> Path:

View File

@ -0,0 +1,116 @@
"""Tests for get_hermes_home() profile-mode fallback warning.
Regression test for https://github.com/NousResearch/hermes-agent/issues/18594.
When HERMES_HOME is unset but an active_profile file indicates a non-default
profile is active, get_hermes_home() should:
1. STILL return ~/.hermes (raising would brick 30+ module-level callers)
2. Emit a loud one-shot warning to stderr so operators can diagnose
cross-profile data contamination after the fact.
The warning goes to stderr directly (not through logging) because this
function is called at module-import time from 30+ sites, often before the
logging subsystem has been configured.
"""
from pathlib import Path
import pytest
@pytest.fixture
def fresh_constants(monkeypatch, tmp_path):
"""Import hermes_constants fresh and reset the one-shot warn flag."""
import importlib
import hermes_constants
importlib.reload(hermes_constants)
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.delenv("HERMES_HOME", raising=False)
return hermes_constants
class TestGetHermesHomeProfileWarning:
def test_classic_mode_no_active_profile_no_warning(
self, fresh_constants, tmp_path, capsys
):
"""Classic mode: no active_profile file → silent, returns ~/.hermes."""
result = fresh_constants.get_hermes_home()
assert result == tmp_path / ".hermes"
assert "HERMES_HOME fallback" not in capsys.readouterr().err
def test_default_active_profile_no_warning(
self, fresh_constants, tmp_path, capsys
):
"""active_profile=default → still no warning, returns ~/.hermes."""
hermes_dir = tmp_path / ".hermes"
hermes_dir.mkdir()
(hermes_dir / "active_profile").write_text("default\n")
result = fresh_constants.get_hermes_home()
assert result == tmp_path / ".hermes"
assert "HERMES_HOME fallback" not in capsys.readouterr().err
def test_named_profile_unset_home_warns_once(
self, fresh_constants, tmp_path, capsys
):
"""active_profile=coder + HERMES_HOME unset → warn loudly, still return fallback."""
hermes_dir = tmp_path / ".hermes"
hermes_dir.mkdir()
(hermes_dir / "active_profile").write_text("coder\n")
result = fresh_constants.get_hermes_home()
# 1. Still returns the fallback — no import-time crash
assert result == tmp_path / ".hermes"
# 2. Stderr got the warning exactly once
err = capsys.readouterr().err
assert err.count("HERMES_HOME fallback") == 1
assert "'coder'" in err
assert "#18594" in err
# 3. One-shot: second and third calls don't re-warn
fresh_constants.get_hermes_home()
fresh_constants.get_hermes_home()
err2 = capsys.readouterr().err
assert "HERMES_HOME fallback" not in err2
def test_hermes_home_set_suppresses_warning(
self, fresh_constants, tmp_path, capsys, monkeypatch
):
"""Even if active_profile is 'coder', setting HERMES_HOME suppresses warning."""
profile_dir = tmp_path / ".hermes" / "profiles" / "coder"
profile_dir.mkdir(parents=True)
(tmp_path / ".hermes" / "active_profile").write_text("coder\n")
monkeypatch.setenv("HERMES_HOME", str(profile_dir))
result = fresh_constants.get_hermes_home()
assert result == profile_dir
assert "HERMES_HOME fallback" not in capsys.readouterr().err
def test_unreadable_active_profile_no_crash(
self, fresh_constants, tmp_path, capsys
):
"""active_profile that can't be decoded → fall through silently."""
hermes_dir = tmp_path / ".hermes"
hermes_dir.mkdir()
# Write bytes that aren't valid utf-8
(hermes_dir / "active_profile").write_bytes(b"\xff\xfe\x00\x00")
result = fresh_constants.get_hermes_home()
assert result == tmp_path / ".hermes"
# Shouldn't crash; shouldn't warn either (can't tell what profile was intended)
assert "HERMES_HOME fallback" not in capsys.readouterr().err
def test_empty_active_profile_no_warning(
self, fresh_constants, tmp_path, capsys
):
"""Empty active_profile file → treated as default, no warning."""
hermes_dir = tmp_path / ".hermes"
hermes_dir.mkdir()
(hermes_dir / "active_profile").write_text("")
result = fresh_constants.get_hermes_home()
assert result == tmp_path / ".hermes"
assert "HERMES_HOME fallback" not in capsys.readouterr().err