fix(systemd): align tests with production contract — D-Bus stubs + TimeoutStopSec (partial close hermes-agent#9) #16

Closed
claude-ceo-assistant wants to merge 1 commits from fix/systemd-tests-align-with-production-contract into main
2 changed files with 49 additions and 8 deletions

View File

@ -64,6 +64,10 @@ class TestSystemdServiceRefresh:
monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")
# Test fixtures don't have a real user D-Bus session — stub the preflight
# so the test exercises the unit-refresh + systemctl call sequence rather
# than the linger / loginctl probe (covered by TestPreflightUserSystemd).
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda *a, **kw: None)
calls = []
@ -87,6 +91,9 @@ class TestSystemdServiceRefresh:
monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")
# Same rationale as test_systemd_start_refreshes_outdated_unit: this test
# validates the unit-refresh + systemctl call shape, not the D-Bus probe.
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda *a, **kw: None)
calls = []
@ -115,10 +122,20 @@ class TestGeneratedSystemdUnits:
assert "ExecStop=" not in unit
assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit
assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit
# TimeoutStopSec must exceed the default drain_timeout (60s) so
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
# (tool subprocess kill, adapter disconnect) runs — issue #8202.
assert "TimeoutStopSec=90" in unit
# TimeoutStopSec must exceed the default drain_timeout so systemd
# doesn't SIGKILL the cgroup before post-interrupt cleanup (tool
# subprocess kill, adapter disconnect) runs — issue #8202.
# gateway.py: restart_timeout = max(60, drain_timeout) + 30s headroom.
# PR #18761 (2026-05-02) raised the default drain_timeout from 60s
# to 180s after a /restart force-interrupted three mid-API-call
# agents inside the old 60s budget, so the expected TimeoutStopSec
# is now 180 + 30 = 210s.
expected_timeout = max(60, int(DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT)) + 30
assert f"TimeoutStopSec={expected_timeout}" in unit
assert expected_timeout == 210, (
"drain_timeout default changed; update DEFAULT_CONFIG and this "
"test in lockstep — see PR #18761 commit message for rationale."
)
def test_user_unit_includes_resolved_node_directory_in_path(self, monkeypatch):
monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: "/home/test/.nvm/versions/node/v24.14.0/bin/node" if cmd == "node" else None)
@ -134,10 +151,16 @@ class TestGeneratedSystemdUnits:
assert "ExecStop=" not in unit
assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit
assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit
# TimeoutStopSec must exceed the default drain_timeout (60s) so
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
# (tool subprocess kill, adapter disconnect) runs — issue #8202.
assert "TimeoutStopSec=90" in unit
# See test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout
# for the TimeoutStopSec rationale; both unit generators share the same
# max(60, drain_timeout) + 30s formula so the user and system units stay
# in lockstep on this contract.
expected_timeout = max(60, int(DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT)) + 30
assert f"TimeoutStopSec={expected_timeout}" in unit
assert expected_timeout == 210, (
"drain_timeout default changed; update DEFAULT_CONFIG and this "
"test in lockstep — see PR #18761 commit message for rationale."
)
assert "WantedBy=multi-user.target" in unit
@ -437,6 +460,11 @@ class TestGatewayServiceDetection:
monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
monkeypatch.setattr(gateway_cli, "is_wsl", lambda: False)
# supports_systemd_services() also branches on is_container() — without
# this stub the test runs the container fallback which calls real
# systemctl, returning False on a CI runner that has /.dockerenv but
# no working systemd. The test models a native-Linux host, so pin it.
monkeypatch.setattr(gateway_cli, "is_container", lambda: False)
monkeypatch.setattr(gateway_cli.shutil, "which", lambda name: "/usr/bin/systemctl")
assert gateway_cli.supports_systemd_services() is True
@ -487,6 +515,10 @@ class TestGatewaySystemServiceRouting:
calls = []
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
# systemd_restart() calls _preflight_user_systemd() before refreshing
# the unit; the test fixture has no real D-Bus session so stub it. The
# preflight contract itself is covered by TestPreflightUserSystemd below.
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda *a, **kw: None)
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: calls.append(("refresh", system)))
monkeypatch.setattr(
"gateway.status.get_running_pid",
@ -541,6 +573,8 @@ class TestGatewaySystemServiceRouting:
def test_systemd_restart_recovers_failed_planned_restart(self, monkeypatch, capsys):
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
# See sibling test for preflight stub rationale.
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda *a, **kw: None)
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
monkeypatch.setattr(
"gateway.status.read_runtime_status",

View File

@ -145,6 +145,13 @@ class TestSupportsSystemdServicesWSL:
monkeypatch.setattr(gateway, "is_linux", lambda: True)
monkeypatch.setattr(gateway, "is_termux", lambda: False)
monkeypatch.setattr(gateway, "is_wsl", lambda: False)
# Pin is_container() too: CI runners often have /.dockerenv which would
# send supports_systemd_services() down the _container_systemd_operational()
# branch (calls real systemctl) and return False here.
monkeypatch.setattr(gateway, "is_container", lambda: False)
# Make sure a systemctl binary appears present so we don't trip the
# which() short-circuit in supports_systemd_services().
monkeypatch.setattr(gateway.shutil, "which", lambda name: "/usr/bin/systemctl")
assert gateway.supports_systemd_services() is True
def test_termux_still_excluded(self, monkeypatch):