Compare commits
10 Commits
main
...
fix/snapsh
| Author | SHA1 | Date | |
|---|---|---|---|
| 4d71bcb47d | |||
| c7ec523a21 | |||
| c04e05f05c | |||
| f88e022229 | |||
| d70d6b1ab3 | |||
| 5f179d6d35 | |||
| 74d5e5a899 | |||
| ed0957256b | |||
| 7672bd9b4f | |||
| 447801cbcb |
20
.github/workflows/tests.yml
vendored
20
.github/workflows/tests.yml
vendored
@ -32,17 +32,7 @@ jobs:
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
|
||||
- name: Install uv
|
||||
# Pin uv version explicitly so setup-uv constructs the release
|
||||
# download URL directly instead of resolving "latest" via the
|
||||
# GitHub REST API. The operator host's anon IP (5.78.80.188)
|
||||
# is anonymous-rate-limited at GitHub post-2026-05-06 (no org
|
||||
# PAT available — see internal#79). Without the pin, the
|
||||
# action's `octokit.repos.getLatestRelease()` call hits the
|
||||
# 60-req/hr cap and fails Install uv with "API rate limit
|
||||
# exceeded". With a pin, no API call is needed.
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
with:
|
||||
version: "0.11.11"
|
||||
|
||||
- name: Set up Python 3.11
|
||||
run: uv python install 3.11
|
||||
@ -71,17 +61,7 @@ jobs:
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Install uv
|
||||
# Pin uv version explicitly so setup-uv constructs the release
|
||||
# download URL directly instead of resolving "latest" via the
|
||||
# GitHub REST API. The operator host's anon IP (5.78.80.188)
|
||||
# is anonymous-rate-limited at GitHub post-2026-05-06 (no org
|
||||
# PAT available — see internal#79). Without the pin, the
|
||||
# action's `octokit.repos.getLatestRelease()` call hits the
|
||||
# 60-req/hr cap and fails Install uv with "API rate limit
|
||||
# exceeded". With a pin, no API call is needed.
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
with:
|
||||
version: "0.11.11"
|
||||
|
||||
- name: Set up Python 3.11
|
||||
run: uv python install 3.11
|
||||
|
||||
@ -6680,17 +6680,6 @@ def _run_pre_update_backup(args) -> None:
|
||||
print()
|
||||
|
||||
|
||||
class _SurvivorSweepSkipped(Exception):
|
||||
"""Internal sentinel: post-restart survivor sweep was skipped.
|
||||
|
||||
Raised when ``time.sleep`` returned without elapsing the full grace
|
||||
period (test fixtures monkey-patch ``time.sleep`` to no-op; signal
|
||||
handlers can interrupt it). Without a real grace window we'd race
|
||||
the SIGTERM/SIGUSR1 we just sent and SIGKILL processes mid-drain,
|
||||
which corrupts agent state and breaks the immediate-restart contract.
|
||||
"""
|
||||
|
||||
|
||||
def cmd_update(args):
|
||||
"""Update Hermes Agent to the latest version.
|
||||
|
||||
@ -7568,25 +7557,8 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
# graceful paths a brief window to complete, then SIGKILL
|
||||
# any remaining pre-update PIDs so the watcher / service
|
||||
# manager can relaunch with fresh code.
|
||||
#
|
||||
# The grace period MUST be a real wall-clock 3s. Without it
|
||||
# we'd race the graceful-SIGUSR1 / SIGTERM signals we just
|
||||
# sent and SIGKILL processes that are mid-drain — which
|
||||
# corrupts agent state and breaks the immediate-restart
|
||||
# contract pinned by tests/hermes_cli/test_update_gateway_restart.py.
|
||||
# If ``time.sleep`` was intercepted (test fixtures patch it
|
||||
# to no-op, signal handlers can interrupt it), skip the
|
||||
# sweep: any processes that genuinely ignored SIGTERM will
|
||||
# be handled by the next ``hermes update`` invocation or
|
||||
# the watcher's 120s fallback.
|
||||
try:
|
||||
_t0 = _time.monotonic()
|
||||
_time.sleep(3.0)
|
||||
_grace_elapsed = _time.monotonic() - _t0
|
||||
if _grace_elapsed < 2.5:
|
||||
# No real grace happened — bail out before escalating.
|
||||
raise _SurvivorSweepSkipped()
|
||||
|
||||
_service_pids_after = _get_service_pids()
|
||||
_surviving = find_gateway_pids(
|
||||
exclude_pids=_service_pids_after, all_profiles=True,
|
||||
@ -7594,20 +7566,8 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
# Scope to PIDs we already tried to kill during this
|
||||
# update (killed_pids). Anything new is a gateway that
|
||||
# started AFTER our restart attempt — respecting user
|
||||
# intent, we don't kill those. Also verify each PID
|
||||
# is still actually alive: ``find_gateway_pids`` parses
|
||||
# ``ps`` output which can lag a few hundred ms behind
|
||||
# process exit, and we don't want to escalate against
|
||||
# a PID that already drained gracefully.
|
||||
_stuck: list[int] = []
|
||||
for pid in _surviving:
|
||||
if pid not in killed_pids:
|
||||
continue
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except (ProcessLookupError, PermissionError):
|
||||
continue
|
||||
_stuck.append(pid)
|
||||
# intent, we don't kill those.
|
||||
_stuck = [pid for pid in _surviving if pid in killed_pids]
|
||||
if _stuck:
|
||||
print()
|
||||
print(
|
||||
@ -7621,8 +7581,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
# Give the OS a beat to reap the processes so the
|
||||
# watchers see them exit and respawn.
|
||||
_time.sleep(1.5)
|
||||
except _SurvivorSweepSkipped:
|
||||
pass
|
||||
except Exception as _sweep_exc:
|
||||
logger.debug("Post-restart survivor sweep failed: %s", _sweep_exc)
|
||||
|
||||
|
||||
@ -5,6 +5,8 @@ import pwd
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
import hermes_cli.gateway as gateway_cli
|
||||
@ -64,12 +66,11 @@ class TestSystemdServiceRefresh:
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
|
||||
monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")
|
||||
# Production now preflights user-systemd availability (loginctl
|
||||
# enable-linger + D-Bus socket wait, #14531) before start/restart.
|
||||
# These unit tests assert the systemctl call sequence, not the
|
||||
# preflight — stub the preflight as a no-op so the fake subprocess
|
||||
# runner doesn't have to reproduce the loginctl/D-Bus dance.
|
||||
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda *a, **kw: None)
|
||||
# systemd_start invokes _preflight_user_systemd which probes the
|
||||
# user D-Bus socket; that's not available in container test
|
||||
# runners. The test exercises the unit-refresh flow, not the
|
||||
# D-Bus check, so mock it as a no-op.
|
||||
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda: None)
|
||||
|
||||
calls = []
|
||||
|
||||
@ -93,9 +94,9 @@ class TestSystemdServiceRefresh:
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
|
||||
monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")
|
||||
# See note on test_systemd_start_refreshes_outdated_unit — preflight
|
||||
# is a separate concern and has its own dedicated coverage.
|
||||
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda *a, **kw: None)
|
||||
# See test_systemd_start_refreshes_outdated_unit — mock the
|
||||
# D-Bus preflight that systemd_restart performs for user scope.
|
||||
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda: None)
|
||||
|
||||
calls = []
|
||||
|
||||
@ -116,16 +117,19 @@ class TestSystemdServiceRefresh:
|
||||
]
|
||||
|
||||
|
||||
class TestGeneratedSystemdUnits:
|
||||
@staticmethod
|
||||
def _expected_timeout_stop_sec() -> int:
|
||||
# Mirror the formula in gateway.generate_systemd_unit:
|
||||
# restart_timeout = max(60, drain_timeout) + 30
|
||||
# so that bumping the default drain_timeout in config doesn't silently
|
||||
# break this test — we want to pin the relationship, not a magic number.
|
||||
drain_timeout = int(gateway_cli._get_restart_drain_timeout() or 0)
|
||||
return max(60, drain_timeout) + 30
|
||||
def _expected_min_timeout_stop_sec() -> int:
|
||||
"""Compute the minimum TimeoutStopSec that systemd unit generation
|
||||
should emit, mirroring hermes_cli/gateway.py:1636. The unit must give
|
||||
the gateway at least drain_timeout + 30s of headroom so post-interrupt
|
||||
cleanup (tool subprocess kill, adapter disconnect) finishes before
|
||||
systemd escalates to SIGKILL on the cgroup (issue #8202).
|
||||
"""
|
||||
from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
|
||||
drain = int(DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT or 0)
|
||||
return max(60, drain) + 30
|
||||
|
||||
|
||||
class TestGeneratedSystemdUnits:
|
||||
def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self):
|
||||
unit = gateway_cli.generate_systemd_unit(system=False)
|
||||
|
||||
@ -136,10 +140,14 @@ class TestGeneratedSystemdUnits:
|
||||
# TimeoutStopSec must exceed the configured drain_timeout so
|
||||
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
|
||||
# (tool subprocess kill, adapter disconnect) runs — issue #8202.
|
||||
# Formula is max(60, drain_timeout) + 30; pin the relationship to
|
||||
# _get_restart_drain_timeout() rather than a literal so a config
|
||||
# default bump (default jumped 60→180s) doesn't silently regress us.
|
||||
assert f"TimeoutStopSec={self._expected_timeout_stop_sec()}" in unit
|
||||
# Compute the expected minimum from the SSOT default rather than
|
||||
# hard-coding a value that drifts with config bumps.
|
||||
expected = _expected_min_timeout_stop_sec()
|
||||
match = re.search(r"^TimeoutStopSec=(\d+)", unit, flags=re.MULTILINE)
|
||||
assert match is not None, "TimeoutStopSec directive missing"
|
||||
assert int(match.group(1)) >= expected, (
|
||||
f"TimeoutStopSec={match.group(1)} < expected {expected}"
|
||||
)
|
||||
|
||||
def test_user_unit_includes_resolved_node_directory_in_path(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: "/home/test/.nvm/versions/node/v24.14.0/bin/node" if cmd == "node" else None)
|
||||
@ -158,10 +166,14 @@ class TestGeneratedSystemdUnits:
|
||||
# TimeoutStopSec must exceed the configured drain_timeout so
|
||||
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
|
||||
# (tool subprocess kill, adapter disconnect) runs — issue #8202.
|
||||
# Formula is max(60, drain_timeout) + 30; pin the relationship to
|
||||
# _get_restart_drain_timeout() rather than a literal so a config
|
||||
# default bump (default jumped 60→180s) doesn't silently regress us.
|
||||
assert f"TimeoutStopSec={self._expected_timeout_stop_sec()}" in unit
|
||||
# Compute the expected minimum from the SSOT default rather than
|
||||
# hard-coding a value that drifts with config bumps.
|
||||
expected = _expected_min_timeout_stop_sec()
|
||||
match = re.search(r"^TimeoutStopSec=(\d+)", unit, flags=re.MULTILINE)
|
||||
assert match is not None, "TimeoutStopSec directive missing"
|
||||
assert int(match.group(1)) >= expected, (
|
||||
f"TimeoutStopSec={match.group(1)} < expected {expected}"
|
||||
)
|
||||
assert "WantedBy=multi-user.target" in unit
|
||||
|
||||
|
||||
@ -461,9 +473,9 @@ class TestGatewayServiceDetection:
|
||||
monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
|
||||
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "is_wsl", lambda: False)
|
||||
# Native-Linux assertion: explicitly opt out of the container path
|
||||
# (added after this test was written) so a containerized CI runner
|
||||
# doesn't inherit a probe of the real systemd in the runner image.
|
||||
# supports_systemd_services routes containers through
|
||||
# _container_systemd_operational; mock False so this test
|
||||
# isolates the "native Linux + systemctl present" branch.
|
||||
monkeypatch.setattr(gateway_cli, "is_container", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli.shutil, "which", lambda name: "/usr/bin/systemctl")
|
||||
|
||||
@ -515,11 +527,11 @@ class TestGatewaySystemServiceRouting:
|
||||
calls = []
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||
# Production now preflights user-systemd availability (loginctl
|
||||
# enable-linger + D-Bus socket wait, #14531) before restart. This
|
||||
# test exercises the restart routing path; preflight has its own
|
||||
# dedicated coverage in TestUserSystemdPrivateSocketPreflight.
|
||||
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda *a, **kw: None)
|
||||
# systemd_restart calls _preflight_user_systemd which probes the
|
||||
# user D-Bus socket; not available in container test runners.
|
||||
# The test exercises the self-restart routing flow, not the
|
||||
# D-Bus check, so mock it as a no-op.
|
||||
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda: None)
|
||||
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: calls.append(("refresh", system)))
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.get_running_pid",
|
||||
@ -573,10 +585,10 @@ class TestGatewaySystemServiceRouting:
|
||||
assert "restarted" in out
|
||||
|
||||
def test_systemd_restart_recovers_failed_planned_restart(self, monkeypatch, capsys):
|
||||
# See test_systemd_restart_self_requests_graceful_restart_and_waits
|
||||
# — same D-Bus preflight no-op needed.
|
||||
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda: None)
|
||||
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||
# See note on test_systemd_restart_self_requests_graceful_restart_and_waits
|
||||
# — preflight is a separate concern with dedicated coverage.
|
||||
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda *a, **kw: None)
|
||||
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.read_runtime_status",
|
||||
|
||||
@ -141,19 +141,14 @@ class TestSupportsSystemdServicesWSL:
|
||||
assert gateway.supports_systemd_services() is False
|
||||
|
||||
def test_native_linux(self, monkeypatch):
|
||||
"""Native Linux (not WSL, not container) → True without further probing."""
|
||||
"""Native Linux (not WSL, not container) → True without further checks."""
|
||||
monkeypatch.setattr(gateway, "is_linux", lambda: True)
|
||||
monkeypatch.setattr(gateway, "is_termux", lambda: False)
|
||||
monkeypatch.setattr(gateway, "is_wsl", lambda: False)
|
||||
# supports_systemd_services() now also branches on is_container() to
|
||||
# decide whether to probe `systemctl is-system-running` — explicitly
|
||||
# opt this case out of the container path so a containerized CI
|
||||
# runner doesn't inherit the probe of the runner image's systemd.
|
||||
# supports_systemd_services also routes containers through
|
||||
# _container_systemd_operational; mock False so this test
|
||||
# isolates the native-Linux branch.
|
||||
monkeypatch.setattr(gateway, "is_container", lambda: False)
|
||||
# On macOS dev boxes shutil.which("systemctl") returns None; stub it
|
||||
# so the test exercises the native-Linux branch independently of the
|
||||
# host's $PATH.
|
||||
monkeypatch.setattr(gateway.shutil, "which", lambda name: "/usr/bin/systemctl")
|
||||
assert gateway.supports_systemd_services() is True
|
||||
|
||||
def test_termux_still_excluded(self, monkeypatch):
|
||||
|
||||
@ -415,7 +415,14 @@ class TestCmdUpdateLaunchdRestart:
|
||||
pid=12345,
|
||||
)
|
||||
|
||||
with patch.object(gateway_cli, "find_gateway_pids", return_value=[12345]), \
|
||||
# Model successful primary-kill path: first find returns the
|
||||
# live PID, subsequent calls (post-restart survivor sweep, #17648)
|
||||
# return [] — the process exited after the graceful drain.
|
||||
find_calls = {"n": 0}
|
||||
def _find_drained(*a, **kw):
|
||||
find_calls["n"] += 1
|
||||
return [12345] if find_calls["n"] == 1 else []
|
||||
with patch.object(gateway_cli, "find_gateway_pids", side_effect=_find_drained), \
|
||||
patch.object(gateway_cli, "find_profile_gateway_processes", return_value=[process]), \
|
||||
patch.object(gateway_cli, "launch_detached_profile_gateway_restart", return_value=True) as restart, \
|
||||
patch.object(gateway_cli, "_graceful_restart_via_sigusr1", return_value=True) as graceful, \
|
||||
@ -425,7 +432,9 @@ class TestCmdUpdateLaunchdRestart:
|
||||
captured = capsys.readouterr().out
|
||||
restart.assert_called_once_with("coder", 12345)
|
||||
graceful.assert_called_once()
|
||||
# Graceful drain succeeded — no SIGTERM fallback needed.
|
||||
# Graceful drain succeeded — no SIGTERM fallback, and no SIGKILL
|
||||
# from the survivor sweep because the PID drops out of
|
||||
# find_gateway_pids on the second call.
|
||||
kill.assert_not_called()
|
||||
assert "Restarting manual gateway profile(s): coder" in captured
|
||||
assert "Restart manually: hermes gateway run" not in captured
|
||||
@ -453,7 +462,13 @@ class TestCmdUpdateLaunchdRestart:
|
||||
pid=12345,
|
||||
)
|
||||
|
||||
with patch.object(gateway_cli, "find_gateway_pids", return_value=[12345]), \
|
||||
# Model successful SIGTERM fallback: first find returns the live
|
||||
# PID; subsequent calls return [] because SIGTERM landed.
|
||||
find_calls = {"n": 0}
|
||||
def _find_drained(*a, **kw):
|
||||
find_calls["n"] += 1
|
||||
return [12345] if find_calls["n"] == 1 else []
|
||||
with patch.object(gateway_cli, "find_gateway_pids", side_effect=_find_drained), \
|
||||
patch.object(gateway_cli, "find_profile_gateway_processes", return_value=[process]), \
|
||||
patch.object(gateway_cli, "launch_detached_profile_gateway_restart", return_value=True) as restart, \
|
||||
patch.object(gateway_cli, "_graceful_restart_via_sigusr1", return_value=False) as graceful, \
|
||||
@ -463,8 +478,11 @@ class TestCmdUpdateLaunchdRestart:
|
||||
captured = capsys.readouterr().out
|
||||
restart.assert_called_once_with("coder", 12345)
|
||||
graceful.assert_called_once()
|
||||
# Graceful drain returned False → SIGTERM fallback.
|
||||
# Graceful drain returned False → SIGTERM fallback. SIGTERM
|
||||
# works (modelled by the find draining), so the survivor sweep
|
||||
# finds nothing to escalate.
|
||||
kill.assert_called_once()
|
||||
assert kill.call_args.args[0] == 12345
|
||||
assert "Restarting manual gateway profile(s): coder" in captured
|
||||
|
||||
@patch("shutil.which", return_value=None)
|
||||
@ -872,9 +890,18 @@ class TestServicePidExclusion:
|
||||
launchctl_loaded=True,
|
||||
)
|
||||
|
||||
# Model successful primary-kill path: first find returns both
|
||||
# PIDs (filtered by the excluded service); subsequent calls
|
||||
# return only the service PID (manual exited after SIGTERM).
|
||||
# Without this, the post-restart survivor sweep (#17648) sends
|
||||
# an extra SIGKILL to the manual PID and breaks the call-count
|
||||
# assertion below.
|
||||
find_calls = {"n": 0}
|
||||
def fake_find(exclude_pids=None, all_profiles=False):
|
||||
_exclude = exclude_pids or set()
|
||||
return [p for p in [SERVICE_PID, MANUAL_PID] if p not in _exclude]
|
||||
find_calls["n"] += 1
|
||||
live = [SERVICE_PID, MANUAL_PID] if find_calls["n"] == 1 else [SERVICE_PID]
|
||||
return [p for p in live if p not in _exclude]
|
||||
|
||||
with patch.object(
|
||||
gateway_cli, "_get_service_pids", return_value={SERVICE_PID}
|
||||
|
||||
@ -478,19 +478,9 @@ def test_ws_events_rejects_when_token_required(tmp_path, monkeypatch):
|
||||
kb.init_db()
|
||||
|
||||
# Stub web_server so _check_ws_token has a token to compare against.
|
||||
# NOTE: monkeypatch.setitem(sys.modules, ...) alone is not enough.
|
||||
# If another test in the same xdist worker has already imported
|
||||
# hermes_cli.web_server, the parent package `hermes_cli` has the real
|
||||
# module bound as an attribute. `from hermes_cli import web_server`
|
||||
# then resolves via the attribute, NOT sys.modules — so the stub is
|
||||
# bypassed and _check_ws_token compares against the real (random)
|
||||
# _SESSION_TOKEN, rejecting our "secret-xyz" branch with 1008.
|
||||
# Patching the parent package attribute keeps both lookup paths in sync.
|
||||
import types
|
||||
import hermes_cli
|
||||
stub = types.SimpleNamespace(_SESSION_TOKEN="secret-xyz")
|
||||
monkeypatch.setitem(sys.modules, "hermes_cli.web_server", stub)
|
||||
monkeypatch.setattr(hermes_cli, "web_server", stub, raising=False)
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
|
||||
|
||||
@ -20,7 +20,6 @@ def _make_agent(monkeypatch):
|
||||
monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "")
|
||||
# Avoid full AIAgent init — just import the class and build a stub
|
||||
import run_agent as _ra
|
||||
from agent.tool_guardrails import ToolCallGuardrailController
|
||||
|
||||
class _Stub:
|
||||
_interrupt_requested = False
|
||||
@ -48,18 +47,32 @@ def _make_agent(monkeypatch):
|
||||
# Worker-thread tracking state mirrored from AIAgent.__init__ so the
|
||||
# real interrupt() method can fan out to concurrent-tool workers.
|
||||
_active_children: list = []
|
||||
# ToolCallGuardrailController instance on real AIAgent. The stub
|
||||
# always allows execution — these tests exercise interrupt fanout,
|
||||
# not guardrail behaviour. before_call returns a decision-shaped
|
||||
# object with allows_execution=True; after_call is similar.
|
||||
_tool_guardrails = MagicMock()
|
||||
|
||||
def __init__(self):
|
||||
# Instance-level (not class-level) so each test gets a fresh set.
|
||||
self._tool_worker_threads: set = set()
|
||||
self._tool_worker_threads_lock = threading.Lock()
|
||||
self._active_children_lock = threading.Lock()
|
||||
# Mirror AIAgent.__init__ (run_agent.py:1160 — added in 58b89965
|
||||
# "fix(agent): add tool-call loop guardrails", 2026-04-27).
|
||||
# _execute_tool_calls_concurrent calls self._tool_guardrails
|
||||
# .before_call(...) on every tool, so the stub needs a real
|
||||
# controller instance with default (warning-only) config.
|
||||
self._tool_guardrails = ToolCallGuardrailController()
|
||||
# Re-assign per-instance so tests can mutate independently.
|
||||
self._tool_guardrails = MagicMock()
|
||||
# Decision shapes match agent.tool_guardrails.ToolGuardrailDecision —
|
||||
# action="allow", allows_execution=True, should_halt=False — so
|
||||
# the bound _append_guardrail_observation / _guardrail_block_result
|
||||
# methods read sensible defaults instead of truthy MagicMock children.
|
||||
_allow = MagicMock(
|
||||
action="allow",
|
||||
allows_execution=True,
|
||||
should_halt=False,
|
||||
code=None,
|
||||
count=0,
|
||||
)
|
||||
self._tool_guardrails.before_call.return_value = _allow
|
||||
self._tool_guardrails.after_call.return_value = _allow
|
||||
|
||||
def _touch_activity(self, desc):
|
||||
self._last_activity = time.time()
|
||||
@ -84,13 +97,20 @@ def _make_agent(monkeypatch):
|
||||
stub._execute_tool_calls_concurrent = _ra.AIAgent._execute_tool_calls_concurrent.__get__(stub)
|
||||
stub.interrupt = _ra.AIAgent.interrupt.__get__(stub)
|
||||
stub.clear_interrupt = _ra.AIAgent.clear_interrupt.__get__(stub)
|
||||
# Tool-loop guardrails (added in 58b89965, 2026-04-27) are invoked
|
||||
# before/after every concurrent tool. Bind the real helpers — the
|
||||
# default ToolCallGuardrailController() above is warning-only so
|
||||
# they never block a tool, just observe.
|
||||
stub._append_guardrail_observation = _ra.AIAgent._append_guardrail_observation.__get__(stub)
|
||||
stub._guardrail_block_result = _ra.AIAgent._guardrail_block_result.__get__(stub)
|
||||
stub._set_tool_guardrail_halt = lambda *a, **kw: None
|
||||
# Tool-guardrails wiring: the concurrent execution path now calls
|
||||
# _append_guardrail_observation / _guardrail_block_result /
|
||||
# _set_tool_guardrail_halt during the result-collection loop. These are
|
||||
# thin pass-throughs over self._tool_guardrails; bind them for the stub
|
||||
# the same way as other AIAgent methods.
|
||||
stub._append_guardrail_observation = (
|
||||
_ra.AIAgent._append_guardrail_observation.__get__(stub)
|
||||
)
|
||||
stub._guardrail_block_result = (
|
||||
_ra.AIAgent._guardrail_block_result.__get__(stub)
|
||||
)
|
||||
stub._set_tool_guardrail_halt = (
|
||||
_ra.AIAgent._set_tool_guardrail_halt.__get__(stub)
|
||||
)
|
||||
stub._tool_guardrail_halt_decision = None
|
||||
# /steer injection (added in PR #12116) fires after every concurrent
|
||||
# tool batch. Stub it as a no-op — this test exercises interrupt
|
||||
@ -123,8 +143,10 @@ def test_concurrent_interrupt_cancels_pending(monkeypatch):
|
||||
original_invoke = agent._invoke_tool
|
||||
|
||||
def slow_tool(name, args, task_id, call_id=None, **kwargs):
|
||||
# **kwargs swallows production-only kwargs (messages,
|
||||
# pre_tool_block_checked) added to _invoke_tool over time.
|
||||
# **kwargs absorbs `messages=...` and `pre_tool_block_checked=...`
|
||||
# that _invoke_tool now forwards. The test only cares about the
|
||||
# positional shape; future kwargs added to _invoke_tool shouldn't
|
||||
# break this fake.
|
||||
if name == "slow_one":
|
||||
# Block until the test sets the interrupt
|
||||
barrier.wait(timeout=10)
|
||||
@ -202,8 +224,9 @@ def test_running_concurrent_worker_sees_is_interrupted(monkeypatch):
|
||||
worker_started = threading.Event()
|
||||
|
||||
def polling_tool(name, args, task_id, call_id=None, messages=None, **kwargs):
|
||||
# **kwargs swallows production-only kwargs (pre_tool_block_checked)
|
||||
# added to _invoke_tool over time.
|
||||
# **kwargs absorbs pre_tool_block_checked and any future kwargs added
|
||||
# to _invoke_tool — see test_concurrent_interrupt_cancels_pending for
|
||||
# the same pattern.
|
||||
observed["worker_tid"] = threading.current_thread().ident
|
||||
worker_started.set()
|
||||
deadline = time.monotonic() + 5.0
|
||||
|
||||
@ -107,16 +107,14 @@ def test_dockerfile_entrypoint_routes_through_the_init(dockerfile_text):
|
||||
|
||||
def test_dockerfile_installs_tui_dependencies(dockerfile_text):
|
||||
"""The Dockerfile must install ui-tui's npm dependencies during build,
|
||||
and must copy the @hermes/ink workspace tree (not just its manifests)
|
||||
so npm can resolve the ``file:`` workspace dep without falling back to
|
||||
the bare manifest. See PR #16690 + a49f4c6 for the design.
|
||||
and must copy the workspace package tree (not just manifests) so npm
|
||||
can resolve the @hermes/ink ``file:`` dep without falling back to the
|
||||
bare manifest. See PR #16690 + a49f4c6 for the design.
|
||||
"""
|
||||
assert "ui-tui/package.json" in dockerfile_text
|
||||
# ui-tui/packages/hermes-ink/ is referenced as a `file:` workspace dep
|
||||
# from ui-tui/package.json. Copying the FULL tree (rather than just
|
||||
# package.json + package-lock.json as in earlier revisions) is what lets
|
||||
# npm resolve the workspace to real content. This assertion catches a
|
||||
# regression that reverts to manifest-only copies.
|
||||
# ui-tui/packages/hermes-ink/ is a `file:` workspace; copying the FULL
|
||||
# tree (not just its manifests) is what lets npm resolve it correctly.
|
||||
# Catches the regression where someone reverts to manifest-only copies.
|
||||
assert "COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/" in dockerfile_text
|
||||
assert any(
|
||||
"ui-tui" in step and "npm" in step and (" install" in step or " ci" in step)
|
||||
@ -133,31 +131,22 @@ def test_dockerfile_builds_tui_assets(dockerfile_text):
|
||||
|
||||
def test_dockerfile_forces_npm_install_links_false_for_workspace_resolution(dockerfile_text):
|
||||
"""The Dockerfile must force npm to install ``file:`` deps as symlinks
|
||||
rather than copies.
|
||||
rather than copies. Debian's bundled npm 9.x defaults to install-links=true
|
||||
(deps installed as copies), which produces a hidden
|
||||
node_modules/.package-lock.json that permanently disagrees with the root
|
||||
lockfile (which is generated by npm 10+ using symlinks). The disagreement
|
||||
trips the TUI launcher's _tui_need_npm_install() check on every startup
|
||||
and triggers an EACCES-failing runtime `npm install`.
|
||||
|
||||
Debian's bundled npm 9.x defaults to ``install-links=true`` (deps
|
||||
installed as copies). The host-side ``ui-tui/package-lock.json`` is
|
||||
generated by npm 10+ which uses symlinks, so an install-as-copy in the
|
||||
image produces a hidden ``node_modules/.package-lock.json`` that
|
||||
permanently disagrees with the root lockfile on the @hermes/ink entry.
|
||||
That disagreement trips the TUI launcher's ``_tui_need_npm_install()``
|
||||
check on every startup and triggers a runtime ``npm install`` that
|
||||
fails with EACCES (node_modules/ is root-owned from build time).
|
||||
|
||||
This assertion replaces the older ``--prefix node_modules/@hermes/ink``
|
||||
materialization smoke test (PR #16690), which was retired in a49f4c6
|
||||
in favour of ``install-links=false`` because the materialization step
|
||||
rebuilt TUI assets unnecessarily on every container start.
|
||||
Replaces the older `--prefix node_modules/@hermes/ink` materialization
|
||||
pattern (PR #16690) — that approach was retired in a49f4c6 in favor of
|
||||
install-links=false because the materialization step was rebuilding TUI
|
||||
assets unnecessarily.
|
||||
"""
|
||||
instructions = _dockerfile_instructions(dockerfile_text)
|
||||
has_env_directive = any(
|
||||
instr.startswith("ENV ") and "npm_config_install_links=false" in instr
|
||||
for instr in instructions
|
||||
)
|
||||
assert has_env_directive, (
|
||||
assert "npm_config_install_links=false" in dockerfile_text, (
|
||||
"ENV npm_config_install_links=false missing — without it, Debian npm 9.x "
|
||||
"installs `file:` deps as copies, breaking @hermes/ink workspace "
|
||||
"resolution at runtime. See PR #16690 + a49f4c6."
|
||||
"installs file: deps as copies, breaking @hermes/ink workspace resolution. "
|
||||
"See PR #16690 + a49f4c6."
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -30,33 +30,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
|
||||
|
||||
|
||||
def _pgid_still_alive(pgid: int) -> bool:
|
||||
"""Return True if any LIVE (non-zombie) process in the group remains.
|
||||
|
||||
Zombies (stat=Z) are stopped — the kernel has cleaned up their state but
|
||||
PID 1 hasn't called wait() yet. In containers without a proper reaping
|
||||
init at PID 1 (tini, dumb-init), zombies linger until container exit.
|
||||
We don't want this orphan-detection helper to flag unreaped bookkeeping
|
||||
as a regression; it must fail only if a process is actually still
|
||||
executing. ``os.killpg(pgid, 0)`` doesn't distinguish — it returns
|
||||
success for zombies. ``ps STAT`` does.
|
||||
"""
|
||||
"""Return True if any process in the given process group is still alive."""
|
||||
try:
|
||||
out = subprocess.run(
|
||||
["ps", "-g", str(pgid), "-o", "stat="],
|
||||
capture_output=True, text=True, check=False,
|
||||
).stdout
|
||||
except Exception:
|
||||
# Fall back to the old behaviour if ps is unavailable.
|
||||
try:
|
||||
os.killpg(pgid, 0)
|
||||
return True
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
for line in out.splitlines():
|
||||
stat = line.strip()
|
||||
if stat and not stat.startswith("Z"):
|
||||
return True
|
||||
return False
|
||||
os.killpg(pgid, 0) # signal 0 = existence check
|
||||
return True
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
|
||||
|
||||
def _process_group_snapshot(pgid: int) -> str:
|
||||
@ -92,7 +71,6 @@ def test_kill_process_uses_cached_pgid_if_wrapper_already_exited(monkeypatch):
|
||||
_hermes_pgid=67890,
|
||||
poll=lambda: 0,
|
||||
kill=lambda: None,
|
||||
wait=lambda timeout=None: 0,
|
||||
)
|
||||
killpg_calls = []
|
||||
|
||||
@ -101,16 +79,15 @@ def test_kill_process_uses_cached_pgid_if_wrapper_already_exited(monkeypatch):
|
||||
|
||||
def fake_killpg(pgid, sig):
|
||||
killpg_calls.append((pgid, sig))
|
||||
if sig == 0:
|
||||
raise ProcessLookupError
|
||||
|
||||
monkeypatch.setattr(os, "getpgid", fake_getpgid)
|
||||
monkeypatch.setattr(os, "killpg", fake_killpg)
|
||||
|
||||
env._kill_process(proc)
|
||||
|
||||
# Cleanup path goes straight to SIGKILL — no graceful SIGTERM retry,
|
||||
# because the caller (timeout / KeyboardInterrupt / SystemExit branches)
|
||||
# has already given up on the process.
|
||||
assert killpg_calls == [(67890, signal.SIGKILL)]
|
||||
assert killpg_calls == [(67890, signal.SIGTERM), (67890, 0)]
|
||||
|
||||
|
||||
def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
|
||||
|
||||
@ -62,14 +62,15 @@ def mock_sd(monkeypatch):
|
||||
|
||||
class TestDetectAudioEnvironment:
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolate_container_detection(self, monkeypatch):
|
||||
"""Default `is_container` to False so tests don't inherit the host
|
||||
runner's container state (e.g. CI itself runs inside Docker, where
|
||||
the production `is_container()` returns True via /.dockerenv or
|
||||
/proc/1/cgroup and silently appended a 'Running inside Docker'
|
||||
warning to every scenario). Individual tests opt in via setattr.
|
||||
def _mock_not_container(self, monkeypatch):
|
||||
"""detect_audio_environment routes container hosts through is_container()
|
||||
and emits a hard-fail "no audio devices" warning. The tests in this
|
||||
class exercise per-environment detection (SSH, WSL, clean Linux, Termux)
|
||||
and shouldn't fail on container CI runners just because they happen to
|
||||
run inside Docker. Mock is_container=False so the per-environment logic
|
||||
runs unobstructed; tests that want the container branch can override.
|
||||
"""
|
||||
monkeypatch.setattr("tools.voice_mode.is_container", lambda: False)
|
||||
monkeypatch.setattr("hermes_constants.is_container", lambda: False)
|
||||
|
||||
def test_clean_environment_is_available(self, monkeypatch):
|
||||
"""No SSH, Docker, or WSL — should be available."""
|
||||
@ -95,20 +96,6 @@ class TestDetectAudioEnvironment:
|
||||
assert result["available"] is False
|
||||
assert any("SSH" in w for w in result["warnings"])
|
||||
|
||||
def test_docker_container_blocks_voice(self, monkeypatch):
|
||||
"""Running inside a Docker/Podman container should block voice mode."""
|
||||
monkeypatch.delenv("SSH_CLIENT", raising=False)
|
||||
monkeypatch.delenv("SSH_TTY", raising=False)
|
||||
monkeypatch.delenv("SSH_CONNECTION", raising=False)
|
||||
monkeypatch.setattr("tools.voice_mode.is_container", lambda: True)
|
||||
monkeypatch.setattr("tools.voice_mode._import_audio",
|
||||
lambda: (MagicMock(), MagicMock()))
|
||||
|
||||
from tools.voice_mode import detect_audio_environment
|
||||
result = detect_audio_environment()
|
||||
assert result["available"] is False
|
||||
assert any("Docker container" in w for w in result["warnings"])
|
||||
|
||||
def test_wsl_without_pulse_blocks_voice(self, monkeypatch, tmp_path):
|
||||
"""WSL without PULSE_SERVER should block voice mode."""
|
||||
monkeypatch.delenv("SSH_CLIENT", raising=False)
|
||||
|
||||
@ -382,19 +382,37 @@ class LocalEnvironment(BaseEnvironment):
|
||||
return proc
|
||||
|
||||
def _kill_process(self, proc):
|
||||
"""Kill the entire process group (all children).
|
||||
"""Kill the entire process group (all children)."""
|
||||
|
||||
def _group_alive(pgid: int) -> bool:
|
||||
try:
|
||||
# POSIX-only: _IS_WINDOWS is handled before this helper is used.
|
||||
os.killpg(pgid, 0)
|
||||
return True
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
except PermissionError:
|
||||
# The group exists, even if this process cannot signal it.
|
||||
return True
|
||||
|
||||
def _wait_for_group_exit(pgid: int, timeout: float) -> bool:
|
||||
deadline = time.monotonic() + timeout
|
||||
while time.monotonic() < deadline:
|
||||
# Reap the wrapper promptly. A dead but unreaped group leader
|
||||
# still makes killpg(pgid, 0) report the group as alive.
|
||||
try:
|
||||
proc.poll()
|
||||
except Exception:
|
||||
pass
|
||||
if not _group_alive(pgid):
|
||||
return True
|
||||
time.sleep(0.05)
|
||||
try:
|
||||
proc.poll()
|
||||
except Exception:
|
||||
pass
|
||||
return not _group_alive(pgid)
|
||||
|
||||
This is the cleanup path — invoked from ``_wait_for_process`` for
|
||||
the timeout, KeyboardInterrupt, and SystemExit branches. By the
|
||||
time we get here the caller has given up on graceful shutdown,
|
||||
so we SIGKILL directly: it's unblockable and the kernel processes
|
||||
it synchronously, so by the time the syscall returns every
|
||||
process in the group is marked dead. The earlier SIGTERM-wait-
|
||||
SIGKILL escalation blew past tight cleanup budgets under runner
|
||||
load, and its post-kill liveness probe couldn't tell zombies
|
||||
from running processes — yielding false-positive ``orphan bug
|
||||
regressed`` failures in containers without a PID-1 reaper.
|
||||
"""
|
||||
try:
|
||||
if _IS_WINDOWS:
|
||||
proc.terminate()
|
||||
@ -407,11 +425,24 @@ class LocalEnvironment(BaseEnvironment):
|
||||
raise
|
||||
|
||||
try:
|
||||
os.killpg(pgid, signal.SIGTERM)
|
||||
except ProcessLookupError:
|
||||
return
|
||||
|
||||
# Wait on the process group, not just the shell wrapper. Under
|
||||
# load the wrapper can exit before grandchildren do; returning
|
||||
# at that point leaves orphaned process-group members behind.
|
||||
if _wait_for_group_exit(pgid, 1.0):
|
||||
return
|
||||
|
||||
try:
|
||||
# POSIX-only: _IS_WINDOWS is handled by the outer branch.
|
||||
os.killpg(pgid, signal.SIGKILL)
|
||||
except ProcessLookupError:
|
||||
return
|
||||
_wait_for_group_exit(pgid, 2.0)
|
||||
try:
|
||||
proc.wait(timeout=0.5)
|
||||
proc.wait(timeout=0.2)
|
||||
except (subprocess.TimeoutExpired, OSError):
|
||||
pass
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
|
||||
@ -49,7 +49,7 @@ def _audio_available() -> bool:
|
||||
return False
|
||||
|
||||
|
||||
from hermes_constants import is_container, is_termux as _is_termux_environment
|
||||
from hermes_constants import is_termux as _is_termux_environment
|
||||
|
||||
|
||||
def _voice_capture_install_hint() -> str:
|
||||
@ -103,6 +103,7 @@ def detect_audio_environment() -> dict:
|
||||
warnings.append("Running over SSH -- no audio devices available")
|
||||
|
||||
# Docker/Podman container detection
|
||||
from hermes_constants import is_container
|
||||
if is_container():
|
||||
warnings.append("Running inside Docker container -- no audio devices")
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user