fix(ci): stabilize main test suite regressions (#17660)

* fix: stabilize main test suite regressions

* test(agent): update MiniMax normalization expectation

* test: stabilize remaining CI assertions

* test: harden config helper monkeypatching

* test: harden CI-only assertions

* fix(agent): propagate fast streaming interrupts
This commit is contained in:
Stephen Schoettler 2026-04-29 23:18:55 -07:00 committed by GitHub
parent e7beaaf184
commit f73364b1c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
37 changed files with 450 additions and 127 deletions

View File

@ -608,7 +608,7 @@ class CopilotACPClient:
end = start + limit if isinstance(limit, int) and limit > 0 else None
content = "".join(lines[start:end])
if content:
content = redact_sensitive_text(content)
content = redact_sensitive_text(content, force=True)
response = {
"jsonrpc": "2.0",
"id": message_id,

View File

@ -305,11 +305,13 @@ def _redact_form_body(text: str) -> str:
return _redact_query_string(text.strip())
def redact_sensitive_text(text: str) -> str:
def redact_sensitive_text(text: str, *, force: bool = False) -> str:
"""Apply all redaction patterns to a block of text.
Safe to call on any string -- non-matching text passes through unchanged.
Disabled by default enable via security.redact_secrets: true in config.yaml.
Set force=True for safety boundaries that must never return raw secrets
regardless of the user's global logging redaction preference.
"""
if text is None:
return None
@ -317,7 +319,7 @@ def redact_sensitive_text(text: str) -> str:
text = str(text)
if not text:
return text
if not _REDACT_ENABLED:
if not (force or _REDACT_ENABLED):
return text
# Known prefixes (sk-, ghp_, etc.)

View File

@ -1815,11 +1815,19 @@ class BasePlatformAdapter(ABC):
if stop_event is None:
await asyncio.sleep(interval)
continue
try:
await asyncio.wait_for(stop_event.wait(), timeout=interval)
except asyncio.TimeoutError:
continue
return
loop = asyncio.get_running_loop()
deadline = loop.time() + interval
while not stop_event.is_set():
remaining = deadline - loop.time()
if remaining <= 0:
break
# Poll instead of wait_for(stop_event.wait()). Cancelling
# wait_for while it owns the inner Event.wait task can leave
# shutdown paths stuck awaiting the typing task on Python
# 3.11/pytest-asyncio; sleep cancellation is immediate.
await asyncio.sleep(min(0.25, remaining))
if stop_event.is_set():
return
except asyncio.CancelledError:
pass # Normal cancellation when handler completes
finally:
@ -2429,6 +2437,16 @@ class BasePlatformAdapter(ABC):
**_keep_typing_kwargs,
)
)
async def _stop_typing_task() -> None:
typing_task.cancel()
try:
await asyncio.wait_for(asyncio.shield(typing_task), timeout=0.5)
except (asyncio.CancelledError, asyncio.TimeoutError):
# Cancellation cleanup must not block adapter shutdown. The
# typing task is already cancelled; if the parent task is also
# cancelling, let this message-processing task unwind now.
pass
try:
await self._run_processing_hook("on_processing_start", event)
@ -2651,11 +2669,7 @@ class BasePlatformAdapter(ABC):
_active = self._active_sessions.get(session_key)
if _active is not None:
_active.clear()
typing_task.cancel()
try:
await typing_task
except asyncio.CancelledError:
pass
await _stop_typing_task()
# Process pending message in new background task
await self._process_message_background(pending_event, session_key)
return # Already cleaned up
@ -2703,11 +2717,7 @@ class BasePlatformAdapter(ABC):
except Exception:
pass
# Stop typing indicator
typing_task.cancel()
try:
await typing_task
except asyncio.CancelledError:
pass
await _stop_typing_task()
# Also cancel any platform-level persistent typing tasks (e.g. Discord)
# that may have been recreated by _keep_typing after the last stop_typing()
try:

View File

@ -10530,10 +10530,26 @@ class GatewayRunner:
# Tool progress mode — resolved per-platform with env var fallback
_resolved_tp = resolve_display_setting(user_config, platform_key, "tool_progress")
_env_tp = os.getenv("HERMES_TOOL_PROGRESS_MODE")
_display_cfg = display_config if isinstance(display_config, dict) else {}
_platforms_cfg = _display_cfg.get("platforms") or {}
_platform_cfg = _platforms_cfg.get(platform_key) or {}
_legacy_tp_overrides = _display_cfg.get("tool_progress_overrides") or {}
_tool_progress_configured = (
"tool_progress" in _display_cfg
or (
isinstance(_platform_cfg, dict)
and "tool_progress" in _platform_cfg
)
or (
isinstance(_legacy_tp_overrides, dict)
and platform_key in _legacy_tp_overrides
)
)
progress_mode = (
_resolved_tp
or os.getenv("HERMES_TOOL_PROGRESS_MODE")
or "all"
_env_tp
if _env_tp and not _tool_progress_configured
else (_resolved_tp or _env_tp or "all")
)
# Disable tool progress for webhooks - they don't support message editing,
# so each progress line would be sent as a separate message.

View File

@ -62,6 +62,7 @@ from .config import (
)
from .whatsapp_identity import (
canonical_whatsapp_identifier,
normalize_whatsapp_identifier, # noqa: F401 - re-exported for gateway.session callers
)
from utils import atomic_replace

View File

@ -78,6 +78,14 @@ def _system_package_install_cmd(pkg: str) -> str:
return f"sudo apt install {pkg}"
def _safe_which(cmd: str) -> str | None:
"""shutil.which wrapper resilient to platform monkeypatching in tests."""
try:
return shutil.which(cmd)
except Exception:
return None
def _termux_browser_setup_steps(node_installed: bool) -> list[str]:
steps: list[str] = []
step = 1
@ -579,7 +587,7 @@ def run_doctor(args):
except Exception as e:
check_warn("Auth provider status", f"(could not check: {e})")
if shutil.which("codex"):
if _safe_which("codex"):
check_ok("codex CLI")
else:
# Native OAuth uses Hermes' own device-code flow — the Codex CLI is
@ -797,13 +805,13 @@ def run_doctor(args):
print(color("◆ External Tools", Colors.CYAN, Colors.BOLD))
# Git
if shutil.which("git"):
if _safe_which("git"):
check_ok("git")
else:
check_warn("git not found", "(optional)")
# ripgrep (optional, for faster file search)
if shutil.which("rg"):
if _safe_which("rg"):
check_ok("ripgrep (rg)", "(faster file search)")
else:
check_warn("ripgrep (rg) not found", "(file search uses grep fallback)")
@ -812,7 +820,7 @@ def run_doctor(args):
# Docker (optional)
terminal_env = os.getenv("TERMINAL_ENV", "local")
if terminal_env == "docker":
if shutil.which("docker"):
if _safe_which("docker"):
# Check if docker daemon is running
try:
result = subprocess.run(["docker", "info"], capture_output=True, timeout=10)
@ -827,7 +835,7 @@ def run_doctor(args):
check_fail("docker not found", "(required for TERMINAL_ENV=docker)")
issues.append("Install Docker or change TERMINAL_ENV")
else:
if shutil.which("docker"):
if _safe_which("docker"):
check_ok("docker", "(optional)")
else:
if _is_termux():
@ -918,7 +926,7 @@ def run_doctor(args):
check_info("Vercel persistence: ephemeral filesystem")
# Node.js + agent-browser (for browser automation tools)
if shutil.which("node"):
if _safe_which("node"):
check_ok("Node.js")
# Check if agent-browser is installed
agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser"
@ -944,7 +952,7 @@ def run_doctor(args):
check_warn("Node.js not found", "(optional, needed for browser tools)")
# npm audit for all Node.js packages
if shutil.which("npm"):
if _safe_which("npm"):
npm_dirs = [
(PROJECT_ROOT, "Browser tools (agent-browser)"),
(PROJECT_ROOT / "scripts" / "whatsapp-bridge", "WhatsApp bridge"),

View File

@ -5398,7 +5398,7 @@ def _warn_stale_dashboard_processes() -> None:
capture_output=True, text=True, timeout=10,
)
if result.returncode == 0:
for line in result.stdout.split("\n"):
for line in getattr(result, "stdout", "").split("\n"):
stripped = line.strip()
if not stripped or "grep" in stripped:
continue

View File

@ -6531,6 +6531,9 @@ class AIAgent:
Falls back to _interruptible_api_call on provider errors indicating
streaming is not supported.
"""
if self._interrupt_requested:
raise InterruptedError("Agent interrupted before streaming API call")
if self.api_mode == "codex_responses":
# Codex streams internally via _run_codex_stream. The main dispatch
# in _interruptible_api_call already calls it; we just need to
@ -7189,6 +7192,12 @@ class AIAgent:
# to non-streaming on the next attempt via _disable_streaming.
result["error"] = e
return
except InterruptedError as e:
# The interrupt may be noticed inside the worker thread before
# the polling loop sees it. Surface it through the normal result
# channel so callers never miss a fast pre-retry interrupt.
result["error"] = e
return
finally:
request_client = request_client_holder.get("client")
if request_client is not None:

View File

@ -308,14 +308,14 @@ class TestMinimaxPreserveDots:
from agent.anthropic_adapter import normalize_model_name
assert normalize_model_name("MiniMax-M2.7", preserve_dots=True) == "MiniMax-M2.7"
def test_normalize_converts_without_preserve(self):
def test_normalize_preserves_non_anthropic_dots_without_preserve(self):
from agent.anthropic_adapter import normalize_model_name
# Post-#17171, dots are only converted to hyphens for claude-*/anthropic-*
# model names. Non-Anthropic models (including MiniMax) keep their dots
# even when preserve_dots=False — that's the fix this test was written
# against the inverse of, so just assert the new invariant directly.
# Non-Anthropic model families use dots as canonical version separators;
# only Claude/Anthropic names are hyphen-normalized by default.
assert normalize_model_name("MiniMax-M2.7", preserve_dots=False) == "MiniMax-M2.7"
# Claude models still get dotted→hyphenated when preserve_dots=False.
def test_normalize_still_converts_claude_dots_without_preserve(self):
from agent.anthropic_adapter import normalize_model_name
assert normalize_model_name("claude-opus-4.6", preserve_dots=False) == "claude-opus-4-6"

View File

@ -20,6 +20,7 @@ test runner at ``scripts/run_tests.sh``.
"""
import asyncio
import logging
import os
import re
import signal
@ -174,7 +175,10 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
"HERMES_SESSION_KEY",
"HERMES_GATEWAY_SESSION",
"HERMES_PLATFORM",
"HERMES_MODEL",
"HERMES_INFERENCE_MODEL",
"HERMES_INFERENCE_PROVIDER",
"HERMES_TUI_PROVIDER",
"HERMES_MANAGED",
"HERMES_DEV",
"HERMES_CONTAINER",
@ -184,6 +188,14 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
"HERMES_BACKGROUND_NOTIFICATIONS",
"HERMES_EXEC_ASK",
"HERMES_HOME_MODE",
"TERMINAL_CWD",
"TERMINAL_ENV",
"TERMINAL_VERCEL_RUNTIME",
"TERMINAL_CONTAINER_CPU",
"TERMINAL_CONTAINER_DISK",
"TERMINAL_CONTAINER_MEMORY",
"TERMINAL_CONTAINER_PERSISTENT",
"TERMINAL_DOCKER_RUN_AS_HOST_USER",
"BROWSER_CDP_URL",
"CAMOFOX_URL",
# Platform allowlists — not credentials, but if set from any source
@ -326,6 +338,14 @@ def _reset_module_state():
that don't exist yet (test collection before production import) are
skipped silently production import later creates fresh empty state.
"""
# --- logging — quiet/one-shot paths mutate process-global logger state ---
logging.disable(logging.NOTSET)
for _logger_name in ("tools", "run_agent", "trajectory_compressor", "cron", "hermes_cli"):
_logger = logging.getLogger(_logger_name)
_logger.disabled = False
_logger.setLevel(logging.NOTSET)
_logger.propagate = True
# --- tools.approval — the single biggest source of cross-test pollution ---
try:
from tools import approval as _approval_mod
@ -380,6 +400,26 @@ def _reset_module_state():
except Exception:
pass
# --- tools.terminal_tool — active environment/cwd cache ---
# File tools prefer a live terminal cwd when one is cached for the task.
# Clear terminal environments between tests so a prior terminal call can't
# override TERMINAL_CWD in path-resolution tests.
try:
from tools import terminal_tool as _term_mod
_envs_to_cleanup = []
with _term_mod._env_lock:
_envs_to_cleanup = list(_term_mod._active_environments.values())
_term_mod._active_environments.clear()
_term_mod._last_activity.clear()
_term_mod._creation_locks.clear()
for _env in _envs_to_cleanup:
try:
_env.cleanup()
except Exception:
pass
except Exception:
pass
# --- tools.credential_files — ContextVar<dict> ---
try:
from tools import credential_files as _credf_mod

View File

@ -1276,9 +1276,10 @@ class TestMatrixUploadAndSend:
mock_client.send_message_event = AsyncMock(return_value="$event")
adapter._client = mock_client
result = await adapter._upload_and_send(
"!room:example.org", b"secret", "secret.txt", "text/plain", "m.file",
)
with patch.dict("sys.modules", _make_fake_mautrix()):
result = await adapter._upload_and_send(
"!room:example.org", b"secret", "secret.txt", "text/plain", "m.file",
)
assert result.success is True
# Should have uploaded ciphertext, not plaintext

View File

@ -1470,6 +1470,8 @@ def test_credential_sources_registry_has_expected_steps():
"~/.hermes/.anthropic_oauth.json",
"auth.json providers.nous",
"auth.json providers.openai-codex + ~/.codex/auth.json",
"auth.json providers.minimax-oauth",
"~/.qwen/oauth_creds.json",
"Custom provider config.yaml api_key field",
}
missing = required - set(descriptions)

View File

@ -526,6 +526,11 @@ class TestCmdMigrate:
class TestCmdCleanup:
"""Test the cleanup command handler."""
@pytest.fixture(autouse=True)
def _mock_openclaw_running(self):
with patch.object(claw_mod, "_detect_openclaw_processes", return_value=[]):
yield
def test_no_dirs_found(self, tmp_path, capsys):
args = Namespace(source=None, dry_run=False, yes=False)
with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[]):

View File

@ -72,7 +72,10 @@ class TestLoadConfigExpansion:
monkeypatch.setenv("GOOGLE_API_KEY", "gsk-test-key")
monkeypatch.setenv("TELEGRAM_BOT_TOKEN", "1234567:ABC-token")
monkeypatch.setattr("hermes_cli.config.get_config_path", lambda: config_file)
# Patch the imported function's own globals. Other tests may reload
# hermes_cli.config, making string-target monkeypatches hit a different
# module object than this collection-time imported load_config().
monkeypatch.setitem(load_config.__globals__, "get_config_path", lambda: config_file)
config = load_config()
@ -86,7 +89,7 @@ class TestLoadConfigExpansion:
config_file.write_text(config_yaml)
monkeypatch.delenv("NOT_SET_XYZ_123", raising=False)
monkeypatch.setattr("hermes_cli.config.get_config_path", lambda: config_file)
monkeypatch.setitem(load_config.__globals__, "get_config_path", lambda: config_file)
config = load_config()

View File

@ -105,7 +105,7 @@ def test_get_container_exec_info_defaults():
)
with patch("hermes_constants.is_container", return_value=False), \
patch("hermes_cli.config.get_hermes_home", return_value=hermes_home), \
patch.dict(get_container_exec_info.__globals__, {"get_hermes_home": lambda: hermes_home}), \
patch.dict(os.environ, {}, clear=False):
os.environ.pop("HERMES_DEV", None)
info = get_container_exec_info()

View File

@ -255,7 +255,8 @@ class TestLaunchdServiceRecovery:
target = f"{domain}/{label}"
def fake_run(cmd, check=False, **kwargs):
calls.append(cmd)
if cmd and cmd[0] == "launchctl":
calls.append(cmd)
if cmd == ["launchctl", "kickstart", target] and calls.count(cmd) == 1:
raise gateway_cli.subprocess.CalledProcessError(3, cmd, stderr="Could not find service")
return SimpleNamespace(returncode=0, stdout="", stderr="")
@ -282,7 +283,8 @@ class TestLaunchdServiceRecovery:
target = f"{domain}/{label}"
def fake_run(cmd, check=False, **kwargs):
calls.append(cmd)
if cmd and cmd[0] == "launchctl":
calls.append(cmd)
if cmd == ["launchctl", "kickstart", target] and calls.count(cmd) == 1:
raise gateway_cli.subprocess.CalledProcessError(113, cmd, stderr="Could not find service")
return SimpleNamespace(returncode=0, stdout="", stderr="")

View File

@ -96,10 +96,17 @@ class TestPtyBridgeIO:
@skip_on_windows
class TestPtyBridgeResize:
def test_resize_updates_child_winsize(self):
# tput reads COLUMNS/LINES from the TTY ioctl (TIOCGWINSZ).
# Spawn a shell, resize, then ask tput for the dimensions.
# Query the TTY ioctl directly instead of using tput, which requires
# TERM and fails in GitHub Actions' non-interactive environment.
winsize_script = (
"import fcntl, struct, termios, time; "
"time.sleep(0.1); "
"rows, cols, *_ = struct.unpack('HHHH', "
"fcntl.ioctl(0, termios.TIOCGWINSZ, b'\\0' * 8)); "
"print(cols); print(rows)"
)
bridge = PtyBridge.spawn(
["/bin/sh", "-c", "sleep 0.1; tput cols; tput lines"],
[sys.executable, "-c", winsize_script],
cols=80,
rows=24,
)

View File

@ -30,6 +30,17 @@ def _clear_provider_env(monkeypatch):
monkeypatch.delenv(key, raising=False)
def _clear_vercel_env(monkeypatch):
for key in (
"TERMINAL_VERCEL_RUNTIME",
"VERCEL_OIDC_TOKEN",
"VERCEL_TOKEN",
"VERCEL_PROJECT_ID",
"VERCEL_TEAM_ID",
):
monkeypatch.delenv(key, raising=False)
def _stub_tts(monkeypatch):
"""Stub out TTS prompts so setup_model_provider doesn't block."""
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda q, c, d=0: (
@ -485,6 +496,7 @@ def test_modal_setup_persists_direct_mode_when_user_chooses_their_own_account(tm
def test_vercel_setup_configures_access_token_auth(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_clear_vercel_env(monkeypatch)
monkeypatch.setenv("VERCEL_OIDC_TOKEN", "old-oidc")
monkeypatch.setitem(sys.modules, "vercel", types.ModuleType("vercel"))
config = load_config()
@ -515,13 +527,7 @@ def test_vercel_setup_configures_access_token_auth(tmp_path, monkeypatch):
def test_vercel_setup_prefills_project_and_team_from_link_file(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
# Sibling test (test_vercel_setup_configures_access_token_auth) calls
# save_env_value which mutates os.environ directly and never restores
# it. When xdist schedules both tests in the same worker, VERCEL_*
# from the earlier run masks the .vercel/project.json defaults that
# this test exercises. Clear them before load.
for _leaked in ("VERCEL_TOKEN", "VERCEL_PROJECT_ID", "VERCEL_TEAM_ID", "VERCEL_OIDC_TOKEN"):
monkeypatch.delenv(_leaked, raising=False)
_clear_vercel_env(monkeypatch)
project_root = tmp_path / "project"
nested = project_root / "app" / "src"
nested.mkdir(parents=True)

View File

@ -29,7 +29,7 @@ class TestReloadEnv:
"""reload_env() adds vars from .env that are not in os.environ."""
env_file = tmp_path / ".env"
env_file.write_text("TEST_RELOAD_VAR=hello123\n")
with patch("hermes_cli.config.get_env_path", return_value=env_file):
with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
os.environ.pop("TEST_RELOAD_VAR", None)
count = reload_env()
assert count >= 1
@ -40,7 +40,7 @@ class TestReloadEnv:
"""reload_env() updates vars whose value changed on disk."""
env_file = tmp_path / ".env"
env_file.write_text("TEST_RELOAD_VAR=old_value\n")
with patch("hermes_cli.config.get_env_path", return_value=env_file):
with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
os.environ["TEST_RELOAD_VAR"] = "old_value"
# Now change the file
env_file.write_text("TEST_RELOAD_VAR=new_value\n")
@ -55,7 +55,7 @@ class TestReloadEnv:
env_file.write_text("") # empty .env
# Pick a known key from OPTIONAL_ENV_VARS
known_key = next(iter(OPTIONAL_ENV_VARS.keys()))
with patch("hermes_cli.config.get_env_path", return_value=env_file):
with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
os.environ[known_key] = "stale_value"
count = reload_env()
assert known_key not in os.environ
@ -65,7 +65,7 @@ class TestReloadEnv:
"""reload_env() preserves non-Hermes env vars even when absent from .env."""
env_file = tmp_path / ".env"
env_file.write_text("")
with patch("hermes_cli.config.get_env_path", return_value=env_file):
with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
os.environ["MY_CUSTOM_UNRELATED_VAR"] = "keep_me"
reload_env()
assert os.environ.get("MY_CUSTOM_UNRELATED_VAR") == "keep_me"
@ -1851,14 +1851,24 @@ class TestPtyWebSocket:
assert b"round-trip-payload" in buf
def test_resize_escape_is_forwarded(self, monkeypatch):
# Resize escape gets intercepted and applied via TIOCSWINSZ,
# then ``tput cols/lines`` reports the new dimensions back.
# Resize escape gets intercepted and applied via TIOCSWINSZ, then the
# child reads the TTY ioctl directly. Avoid tput because CI may not set
# TERM for non-interactive shells.
import sys
winsize_script = (
"import fcntl, struct, termios, time; "
"time.sleep(0.15); "
"rows, cols, *_ = struct.unpack('HHHH', "
"fcntl.ioctl(0, termios.TIOCGWINSZ, b'\\0' * 8)); "
"print(cols); print(rows)"
)
monkeypatch.setattr(
self.ws_module,
"_resolve_chat_argv",
# sleep gives the test time to push the resize before tput runs
# sleep gives the test time to push the resize before the child reads the ioctl.
lambda resume=None, sidecar_url=None: (
["/bin/sh", "-c", "sleep 0.15; tput cols; tput lines"],
[sys.executable, "-c", winsize_script],
None,
None,
),

View File

@ -8,12 +8,10 @@ effects (terminal, send_message, delegate_task, etc.).
import threading
from unittest.mock import patch
from run_agent import AIAgent
def _make_agent_stub():
def _make_agent_stub(agent_cls):
"""Create a minimal AIAgent-like object with just enough state for _spawn_background_review."""
agent = object.__new__(AIAgent)
agent = object.__new__(agent_cls)
agent.model = "test-model"
agent.platform = "test"
agent.provider = "openai"
@ -45,14 +43,16 @@ class _SyncThread:
def test_background_review_agent_uses_restricted_toolsets():
"""The review agent must only have access to 'memory' and 'skills' toolsets."""
agent = _make_agent_stub()
import run_agent
agent = _make_agent_stub(run_agent.AIAgent)
captured = {}
def _capture_init(self, *args, **kwargs):
captured["enabled_toolsets"] = kwargs.get("enabled_toolsets")
raise RuntimeError("stop after capturing init args")
with patch.object(AIAgent, "__init__", _capture_init), \
with patch.object(run_agent.AIAgent, "__init__", _capture_init), \
patch("threading.Thread", _SyncThread):
agent._spawn_background_review(
messages_snapshot=[],

View File

@ -96,7 +96,7 @@ class TestCompactBannerSkinIntegration:
set_active_skin("default")
with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"):
patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v0.1.0 (test)"}):
banner = _build_compact_banner()
assert "NOUS HERMES" in banner
@ -105,7 +105,7 @@ class TestCompactBannerSkinIntegration:
set_active_skin("poseidon")
with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"):
patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v0.1.0 (test)"}):
banner = _build_compact_banner()
assert "Poseidon Agent" in banner
@ -116,7 +116,7 @@ class TestCompactBannerSkinIntegration:
skin = get_active_skin()
with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"):
patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v0.1.0 (test)"}):
banner = _build_compact_banner()
assert skin.get_color("banner_border") in banner
@ -127,7 +127,7 @@ class TestCompactBannerSkinIntegration:
set_active_skin("default")
with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
patch("cli.format_banner_version_label", return_value="Hermes Agent v1.0 (test) · upstream abc12345"):
patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v1.0 (test) · upstream abc12345"}):
banner = _build_compact_banner()
assert "upstream abc12345" in banner

View File

@ -2544,9 +2544,11 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
# detection entirely and the test would race a non-event.
build_started = threading.Event()
release_build = threading.Event()
build_entered = threading.Event()
def _slow_make_agent(sid, key, session_id=None):
build_started.set()
build_entered.set()
release_build.wait(timeout=3.0)
return _FakeAgent()
@ -2584,6 +2586,7 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
)
assert resp.get("result"), f"got error: {resp.get('error')}"
sid = resp["result"]["session_id"]
assert build_entered.wait(timeout=1.0), "deferred build did not start"
# Wait until the (deferred) build thread has actually entered
# _make_agent — otherwise session.close pops _sessions[sid] before

View File

@ -131,15 +131,15 @@ class TestApprovalHeartbeat:
"""Polling slices don't delay responsiveness — resolve is near-instant."""
from tools.approval import (
check_all_command_guards,
has_blocking_approval,
register_gateway_notify,
resolve_gateway_approval,
)
register_gateway_notify(self.SESSION_KEY, lambda _payload: None)
start_time = time.monotonic()
result_holder: dict = {}
register_gateway_notify(self.SESSION_KEY, lambda _payload: None)
def _run_check():
result_holder["result"] = check_all_command_guards(
"rm -rf /tmp/nonexistent-fast-target", "local"
@ -148,9 +148,18 @@ class TestApprovalHeartbeat:
thread = threading.Thread(target=_run_check, daemon=True)
thread.start()
# Wait until the worker has actually enqueued the approval. Resolving
# before registration is a test race, not a responsiveness signal.
deadline = time.monotonic() + 5.0
while time.monotonic() < deadline:
if has_blocking_approval(self.SESSION_KEY):
break
time.sleep(0.01)
assert has_blocking_approval(self.SESSION_KEY)
# Resolve almost immediately — the wait loop should return within
# its current 1s poll slice.
time.sleep(0.1)
start_time = time.monotonic()
resolve_gateway_approval(self.SESSION_KEY, "once")
thread.join(timeout=5)
elapsed = time.monotonic() - start_time

View File

@ -354,6 +354,7 @@ class TestOwnerPidCrossProcess:
monkeypatch.setattr(
bt, "_requires_real_termux_browser_install", lambda *a: False
)
monkeypatch.setattr(bt, "_chromium_installed", lambda: True)
monkeypatch.setattr(
bt, "_get_session_info",
lambda task_id: {"session_name": session_name},

View File

@ -205,24 +205,28 @@ class TestMacosOsascript:
class TestIsWsl:
def setup_method(self):
# _is_wsl is now hermes_constants.is_wsl — reset its cache
# _is_wsl is hermes_constants.is_wsl; reset the function's own module
# globals so this stays stable even if hermes_constants was imported
# through a different module object earlier in a large xdist run.
import hermes_constants
hermes_constants._wsl_detected = None
_is_wsl.__globals__["_wsl_detected"] = None
def teardown_method(self):
# Reset again after the test so we don't leak a cached value
# (True/False) into whichever test the xdist worker runs next.
import hermes_constants
hermes_constants._wsl_detected = None
_is_wsl.__globals__["_wsl_detected"] = None
def test_wsl2_detected(self):
content = "Linux version 5.15.0 (microsoft-standard-WSL2)"
with patch("builtins.open", mock_open(read_data=content)):
with patch.dict(_is_wsl.__globals__, {"open": mock_open(read_data=content)}):
assert _is_wsl() is True
def test_wsl1_detected(self):
content = "Linux version 4.4.0-microsoft-standard"
with patch("builtins.open", mock_open(read_data=content)):
with patch.dict(_is_wsl.__globals__, {"open": mock_open(read_data=content)}):
assert _is_wsl() is True
def test_regular_linux(self):
@ -234,20 +238,20 @@ class TestIsWsl:
# short-circuits on the cache. setup_method resets, so we just
# need to be sure the patched `open` is actually reached.
content = "Linux version 6.14.0-37-generic (buildd@lcy02-amd64-049)"
with patch("hermes_constants.open", mock_open(read_data=content), create=True):
with patch.dict(_is_wsl.__globals__, {"open": mock_open(read_data=content)}):
assert _is_wsl() is False
def test_proc_version_missing(self):
with patch("hermes_constants.open", side_effect=FileNotFoundError, create=True):
with patch.dict(_is_wsl.__globals__, {"open": MagicMock(side_effect=FileNotFoundError)}):
assert _is_wsl() is False
def test_result_is_cached(self):
import hermes_constants
content = "Linux version 5.15.0 (microsoft-standard-WSL2)"
with patch("hermes_constants.open", mock_open(read_data=content), create=True) as m:
opener = mock_open(read_data=content)
with patch.dict(_is_wsl.__globals__, {"open": opener}):
assert _is_wsl() is True
assert _is_wsl() is True
m.assert_called_once() # only read once
opener.assert_called_once() # only read once
# ── WSL (powershell.exe) ────────────────────────────────────────────────

View File

@ -16,6 +16,7 @@ import signal
import subprocess
import threading
import time
from types import SimpleNamespace
import pytest
@ -37,6 +38,58 @@ def _pgid_still_alive(pgid: int) -> bool:
return False
def _process_group_snapshot(pgid: int) -> str:
"""Return a process-table snapshot for diagnostics."""
return subprocess.run(
["ps", "-o", "pid,ppid,pgid,stat,cmd", "-g", str(pgid)],
capture_output=True,
text=True,
check=False,
).stdout.strip()
def _wait_for_pgid_exit(pgid: int, timeout: float = 10.0) -> bool:
"""Wait for a process group to disappear under loaded xdist hosts."""
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
if not _pgid_still_alive(pgid):
return True
time.sleep(0.1)
return not _pgid_still_alive(pgid)
def test_kill_process_uses_cached_pgid_if_wrapper_already_exited(monkeypatch):
"""If the shell wrapper exits before cleanup, still kill its process group.
Without the cached pgid fallback, ``os.getpgid(proc.pid)`` raises for the
dead wrapper and cleanup falls back to ``proc.kill()``, which cannot reach
orphaned grandchildren still running in the original process group.
"""
env = object.__new__(LocalEnvironment)
proc = SimpleNamespace(
pid=12345,
_hermes_pgid=67890,
poll=lambda: 0,
kill=lambda: None,
)
killpg_calls = []
def fake_getpgid(_pid):
raise ProcessLookupError
def fake_killpg(pgid, sig):
killpg_calls.append((pgid, sig))
if sig == 0:
raise ProcessLookupError
monkeypatch.setattr(os, "getpgid", fake_getpgid)
monkeypatch.setattr(os, "killpg", fake_killpg)
env._kill_process(proc)
assert killpg_calls == [(67890, signal.SIGTERM), (67890, 0)]
def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
"""When KeyboardInterrupt arrives mid-poll, the subprocess group must be
killed before the exception is re-raised."""
@ -118,19 +171,15 @@ def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
assert not t.is_alive(), "worker didn't exit within 5 s of the interrupt"
# The critical assertion: the subprocess GROUP must be dead. Not
# just the bash wrapper — the 'sleep 30' child too.
# Give the SIGTERM+1s wait+SIGKILL escalation a moment to complete.
deadline = time.monotonic() + 3.0
while time.monotonic() < deadline:
if not _pgid_still_alive(pgid):
break
time.sleep(0.1)
assert not _pgid_still_alive(pgid), (
# just the bash wrapper — the 'sleep 30' child too. Under xdist load,
# process-group disappearance can lag briefly after the worker exits,
# especially if the process is already dying or waiting to be reaped.
assert _wait_for_pgid_exit(pgid), (
f"subprocess group {pgid} is STILL ALIVE after worker received "
f"KeyboardInterrupt — orphan bug regressed. This is the "
f"sleep-300-survives-SIGTERM scenario from Physikal's Apr 2026 "
f"report. See tools/environments/base.py _wait_for_process "
f"except-block."
f"except-block.\n{_process_group_snapshot(pgid)}"
)
# And the worker should have observed the KeyboardInterrupt (i.e.
# it re-raised cleanly, not silently swallowed).

View File

@ -997,10 +997,13 @@ class TestHermesHomeIsolation:
assert "hermes_test" in hermes_home, "Should point to test temp dir"
def test_get_hermes_home_fallback(self):
"""Without HERMES_HOME set, falls back to ~/.hermes."""
"""Without HERMES_HOME set, falls back to the active OS home."""
from tools.tirith_security import _get_hermes_home
with patch.dict(os.environ, {}, clear=True):
# Remove HERMES_HOME entirely
# Remove HERMES_HOME entirely. With HOME also absent, expanduser
# falls back to the account database; compute expected under the
# same environment instead of after patch.dict restores HOME.
os.environ.pop("HERMES_HOME", None)
expected = os.path.join(os.path.expanduser("~"), ".hermes")
result = _get_hermes_home()
assert result == os.path.join(os.path.expanduser("~"), ".hermes")
assert result == expected

View File

@ -36,14 +36,16 @@ class TestGetProvider:
monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test")
monkeypatch.delenv("GROQ_API_KEY", raising=False)
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
patch("tools.transcription_tools._HAS_OPENAI", True):
patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools._has_local_command", return_value=False):
from tools.transcription_tools import _get_provider
assert _get_provider({"provider": "local"}) == "none"
def test_local_nothing_available(self, monkeypatch):
monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
patch("tools.transcription_tools._HAS_OPENAI", False):
patch("tools.transcription_tools._HAS_OPENAI", False), \
patch("tools.transcription_tools._has_local_command", return_value=False):
from tools.transcription_tools import _get_provider
assert _get_provider({"provider": "local"}) == "none"

View File

@ -36,6 +36,28 @@ class TestProviderSelectionGate:
configure ``{"enabled": True, "provider": ...}`` for explicit tests.
"""
def test_import_after_config_env_patch_uses_restored_dotenv_loader(self):
"""Importing STT while hermes_cli.config.get_env_value is patched must
not freeze that temporary helper into this module forever.
"""
import importlib
import hermes_cli.config as config_mod
from tools import transcription_tools as tt
with pytest.MonkeyPatch.context() as mp:
mp.setattr(config_mod, "get_env_value", lambda name, default=None: "")
tt = importlib.reload(tt)
try:
with patch.object(tt, "_HAS_FASTER_WHISPER", False), \
patch.object(tt, "_HAS_OPENAI", True), \
patch.object(tt, "_has_local_command", return_value=False), \
patch("hermes_cli.config.load_env",
return_value={"GROQ_API_KEY": "dotenv-secret"}):
assert tt._get_provider({"enabled": True, "provider": "groq"}) == "groq"
finally:
importlib.reload(tt)
def test_explicit_groq_sees_dotenv(self):
from tools import transcription_tools as tt

View File

@ -758,19 +758,12 @@ class TestValidateAudioFileEdgeCases:
f = tmp_path / "test.ogg"
f.write_bytes(b"data")
from tools.transcription_tools import _validate_audio_file
real_stat = f.stat()
call_count = 0
def stat_side_effect(*args, **kwargs):
nonlocal call_count
call_count += 1
# First calls are from exists() and is_file(), let them pass
if call_count <= 2:
return real_stat
raise OSError("disk error")
with patch("pathlib.Path.stat", side_effect=stat_side_effect):
with patch("pathlib.Path.exists", return_value=True), \
patch("pathlib.Path.is_file", return_value=True), \
patch("pathlib.Path.stat", side_effect=OSError("disk error")):
result = _validate_audio_file(str(f))
assert result is not None
assert "Failed to access" in result["error"]

View File

@ -174,6 +174,45 @@ class TestRegressionGuard:
key while ``os.environ`` does not.
"""
def test_import_after_config_env_patch_uses_restored_dotenv_loader(self, tmp_path, monkeypatch):
"""Importing TTS while hermes_cli.config.get_env_value is patched must
not freeze that temporary helper into this module forever.
"""
import importlib
import hermes_cli.config as config_mod
from tools import tts_tool
monkeypatch.delenv("MINIMAX_API_KEY", raising=False)
with pytest.MonkeyPatch.context() as mp:
mp.setattr(config_mod, "get_env_value", lambda name: "")
tts_tool = importlib.reload(tts_tool)
try:
captured: dict = {}
def fake_post(url, **kwargs):
captured["headers"] = kwargs.get("headers", {})
response = MagicMock()
response.json.return_value = {
"data": {"audio": b"\x00".hex()},
"base_resp": {"status_code": 0},
}
response.raise_for_status = MagicMock()
return response
with patch(
"hermes_cli.config.load_env",
return_value={"MINIMAX_API_KEY": "dotenv-secret"},
), patch("requests.post", side_effect=fake_post):
tts_tool._generate_minimax_tts(
"hi", str(tmp_path / "out.mp3"), {}
)
assert captured["headers"]["Authorization"] == "Bearer dotenv-secret"
finally:
importlib.reload(tts_tool)
def test_minimax_missing_when_only_in_dotenv_before_fix(self, tmp_path, monkeypatch):
from tools import tts_tool

View File

@ -6,6 +6,7 @@ import shutil
import signal
import subprocess
import tempfile
import time
from tools.environments.base import BaseEnvironment, _pipe_stdin
@ -369,6 +370,11 @@ class LocalEnvironment(BaseEnvironment):
preexec_fn=None if _IS_WINDOWS else os.setsid,
cwd=self.cwd,
)
if not _IS_WINDOWS:
try:
proc._hermes_pgid = os.getpgid(proc.pid)
except ProcessLookupError:
pass
if stdin_data is not None:
_pipe_stdin(proc, stdin_data)
@ -381,12 +387,42 @@ class LocalEnvironment(BaseEnvironment):
if _IS_WINDOWS:
proc.terminate()
else:
pgid = os.getpgid(proc.pid)
try:
pgid = os.getpgid(proc.pid)
except ProcessLookupError:
pgid = getattr(proc, "_hermes_pgid", None)
if pgid is None:
raise
os.killpg(pgid, signal.SIGTERM)
deadline = time.monotonic() + 1.0
while time.monotonic() < deadline:
if proc.poll() is not None:
try:
os.killpg(pgid, 0)
except ProcessLookupError:
return
time.sleep(0.05)
# The shell can exit quickly while a child in the same process
# group is still shutting down. Escalate based on the process
# group, not just the shell wrapper, so interrupted commands do
# not leave orphaned grandchildren under load.
try:
# _IS_WINDOWS is guarded by the enclosing else branch.
os.killpg(pgid, signal.SIGKILL)
except ProcessLookupError:
return
try:
proc.wait(timeout=1.0)
except subprocess.TimeoutExpired:
os.killpg(pgid, signal.SIGKILL)
pass
deadline = time.monotonic() + 1.0
while time.monotonic() < deadline:
try:
os.killpg(pgid, 0)
except ProcessLookupError:
return
time.sleep(0.05)
except (ProcessLookupError, PermissionError):
try:
proc.kill()

View File

@ -489,12 +489,15 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
task_data = _read_tracker.setdefault(task_id, {
"last_key": None, "consecutive": 0,
"read_history": set(), "dedup": {},
"dedup_hits": {},
"dedup_hits": {}, "read_timestamps": {},
})
# Backward-compat for pre-existing tracker entries that predate
# dedup_hits (long-lived task or crossed an upgrade boundary).
# dedup_hits/read_timestamps (long-lived task or crossed an
# upgrade boundary).
if "dedup_hits" not in task_data:
task_data["dedup_hits"] = {}
if "read_timestamps" not in task_data:
task_data["read_timestamps"] = {}
cached_mtime = task_data.get("dedup", {}).get(dedup_key)
if cached_mtime is not None:

View File

@ -915,11 +915,12 @@ class MCPServerTask:
except Exception:
logger.exception("MCP server '%s': dynamic tool refresh failed", self.name)
def _schedule_tools_refresh(self) -> None:
def _schedule_tools_refresh(self) -> asyncio.Task:
"""Schedule a background tool refresh and keep it strongly referenced."""
task = asyncio.create_task(self._refresh_tools_task())
self._pending_refresh_tasks.add(task)
task.add_done_callback(self._pending_refresh_tasks.discard)
return task
def _make_message_handler(self):
"""Build a ``message_handler`` callback for ``ClientSession``.
@ -950,6 +951,10 @@ class MCPServerTask:
# a separate task and let the handler return
# promptly.
self._schedule_tools_refresh()
# Yield one loop tick so tests and short-lived
# notification contexts can observe the scheduled
# refresh without awaiting the full server RPC.
await asyncio.sleep(0)
case PromptListChangedNotification():
logger.debug("MCP server '%s': prompts/list_changed (ignored)", self.name)
case ResourceListChangedNotification():
@ -2005,8 +2010,12 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
}, ensure_ascii=False)
async def _call():
async with server._rpc_lock:
rpc_lock = getattr(server, "_rpc_lock", None)
if rpc_lock is None:
result = await server.session.call_tool(tool_name, arguments=args)
else:
async with rpc_lock:
result = await server.session.call_tool(tool_name, arguments=args)
# MCP CallToolResult has .content (list of content blocks) and .isError
if result.isError:
error_text = ""

View File

@ -42,11 +42,19 @@ from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_opena
logger = logging.getLogger(__name__)
try:
from hermes_cli.config import get_env_value
except ImportError:
def get_env_value(name, default=None):
def get_env_value(name, default=None):
"""Read env values through the live config module.
Tests may monkeypatch and later restore ``hermes_cli.config.get_env_value``
before this module is imported. Resolve the helper at call time so STT does
not keep a stale imported function for the rest of the test process.
"""
try:
from hermes_cli.config import get_env_value as _get_env_value
except ImportError:
return os.getenv(name, default)
value = _get_env_value(name)
return default if value is None else value
# ---------------------------------------------------------------------------
# Optional imports — graceful degradation

View File

@ -44,11 +44,19 @@ from urllib.parse import urljoin
from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__)
try:
from hermes_cli.config import get_env_value
except ImportError:
def get_env_value(name, default=None):
def get_env_value(name, default=None):
"""Read env values through the live config module.
Tests may monkeypatch and later restore ``hermes_cli.config.get_env_value``
before this module is imported. Resolve the helper at call time so TTS does
not keep a stale imported function for the rest of the test process.
"""
try:
from hermes_cli.config import get_env_value as _get_env_value
except ImportError:
return os.getenv(name, default)
value = _get_env_value(name)
return default if value is None else value
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway, resolve_openai_audio_api_key
from tools.xai_http import hermes_xai_user_agent

View File

@ -1860,6 +1860,18 @@ def _enrich_with_attached_images(user_text: str, image_paths: list[str]) -> str:
return text or "What do you see in this image?"
def _messages_as_conversation(db, session_id: str, *, include_ancestors: bool = False):
if include_ancestors:
try:
return db.get_messages_as_conversation(
session_id, include_ancestors=True
)
except TypeError as exc:
if "include_ancestors" not in str(exc):
raise
return db.get_messages_as_conversation(session_id)
def _history_to_messages(history: list[dict]) -> list[dict]:
messages = []
tool_call_args = {}
@ -2068,9 +2080,9 @@ def _(rid, params: dict) -> dict:
_enable_gateway_prompts()
try:
db.reopen_session(target)
history = db.get_messages_as_conversation(target)
display_history = db.get_messages_as_conversation(
target, include_ancestors=True
history = _messages_as_conversation(db, target)
display_history = _messages_as_conversation(
db, target, include_ancestors=True
)
messages = _history_to_messages(display_history)
tokens = _set_session_context(target)
@ -2215,8 +2227,8 @@ def _(rid, params: dict) -> dict:
db = _get_db()
if db is not None and session.get("session_key"):
try:
history = db.get_messages_as_conversation(
session["session_key"], include_ancestors=True
history = _messages_as_conversation(
db, session["session_key"], include_ancestors=True
)
except Exception:
pass