diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index f2bfa8b8..7011b1c0 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -5,6 +5,8 @@ import pwd from pathlib import Path from types import SimpleNamespace +import re + import pytest import hermes_cli.gateway as gateway_cli @@ -107,6 +109,18 @@ class TestSystemdServiceRefresh: ] +def _expected_min_timeout_stop_sec() -> int: + """Compute the minimum TimeoutStopSec that systemd unit generation + should emit, mirroring hermes_cli/gateway.py:1636. The unit must give + the gateway at least drain_timeout + 30s of headroom so post-interrupt + cleanup (tool subprocess kill, adapter disconnect) finishes before + systemd escalates to SIGKILL on the cgroup (issue #8202). + """ + from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT + drain = int(DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT or 0) + return max(60, drain) + 30 + + class TestGeneratedSystemdUnits: def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self): unit = gateway_cli.generate_systemd_unit(system=False) @@ -115,10 +129,17 @@ class TestGeneratedSystemdUnits: assert "ExecStop=" not in unit assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit - # TimeoutStopSec must exceed the default drain_timeout (60s) so + # TimeoutStopSec must exceed the configured drain_timeout so # systemd doesn't SIGKILL the cgroup before post-interrupt cleanup # (tool subprocess kill, adapter disconnect) runs — issue #8202. - assert "TimeoutStopSec=90" in unit + # Compute the expected minimum from the SSOT default rather than + # hard-coding a value that drifts with config bumps. + expected = _expected_min_timeout_stop_sec() + match = re.search(r"^TimeoutStopSec=(\d+)", unit, flags=re.MULTILINE) + assert match is not None, "TimeoutStopSec directive missing" + assert int(match.group(1)) >= expected, ( + f"TimeoutStopSec={match.group(1)} < expected {expected}" + ) def test_user_unit_includes_resolved_node_directory_in_path(self, monkeypatch): monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: "/home/test/.nvm/versions/node/v24.14.0/bin/node" if cmd == "node" else None) @@ -134,10 +155,17 @@ class TestGeneratedSystemdUnits: assert "ExecStop=" not in unit assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit - # TimeoutStopSec must exceed the default drain_timeout (60s) so + # TimeoutStopSec must exceed the configured drain_timeout so # systemd doesn't SIGKILL the cgroup before post-interrupt cleanup # (tool subprocess kill, adapter disconnect) runs — issue #8202. - assert "TimeoutStopSec=90" in unit + # Compute the expected minimum from the SSOT default rather than + # hard-coding a value that drifts with config bumps. + expected = _expected_min_timeout_stop_sec() + match = re.search(r"^TimeoutStopSec=(\d+)", unit, flags=re.MULTILINE) + assert match is not None, "TimeoutStopSec directive missing" + assert int(match.group(1)) >= expected, ( + f"TimeoutStopSec={match.group(1)} < expected {expected}" + ) assert "WantedBy=multi-user.target" in unit