diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py index 721149dd..7577c922 100644 --- a/tests/hermes_cli/test_update_gateway_restart.py +++ b/tests/hermes_cli/test_update_gateway_restart.py @@ -415,7 +415,14 @@ class TestCmdUpdateLaunchdRestart: pid=12345, ) - with patch.object(gateway_cli, "find_gateway_pids", return_value=[12345]), \ + # Model successful primary-kill path: first find returns the + # live PID, subsequent calls (post-restart survivor sweep, #17648) + # return [] — the process exited after the graceful drain. + find_calls = {"n": 0} + def _find_drained(*a, **kw): + find_calls["n"] += 1 + return [12345] if find_calls["n"] == 1 else [] + with patch.object(gateway_cli, "find_gateway_pids", side_effect=_find_drained), \ patch.object(gateway_cli, "find_profile_gateway_processes", return_value=[process]), \ patch.object(gateway_cli, "launch_detached_profile_gateway_restart", return_value=True) as restart, \ patch.object(gateway_cli, "_graceful_restart_via_sigusr1", return_value=True) as graceful, \ @@ -425,7 +432,9 @@ class TestCmdUpdateLaunchdRestart: captured = capsys.readouterr().out restart.assert_called_once_with("coder", 12345) graceful.assert_called_once() - # Graceful drain succeeded — no SIGTERM fallback needed. + # Graceful drain succeeded — no SIGTERM fallback, and no SIGKILL + # from the survivor sweep because the PID drops out of + # find_gateway_pids on the second call. kill.assert_not_called() assert "Restarting manual gateway profile(s): coder" in captured assert "Restart manually: hermes gateway run" not in captured @@ -453,7 +462,13 @@ class TestCmdUpdateLaunchdRestart: pid=12345, ) - with patch.object(gateway_cli, "find_gateway_pids", return_value=[12345]), \ + # Model successful SIGTERM fallback: first find returns the live + # PID; subsequent calls return [] because SIGTERM landed. + find_calls = {"n": 0} + def _find_drained(*a, **kw): + find_calls["n"] += 1 + return [12345] if find_calls["n"] == 1 else [] + with patch.object(gateway_cli, "find_gateway_pids", side_effect=_find_drained), \ patch.object(gateway_cli, "find_profile_gateway_processes", return_value=[process]), \ patch.object(gateway_cli, "launch_detached_profile_gateway_restart", return_value=True) as restart, \ patch.object(gateway_cli, "_graceful_restart_via_sigusr1", return_value=False) as graceful, \ @@ -463,8 +478,11 @@ class TestCmdUpdateLaunchdRestart: captured = capsys.readouterr().out restart.assert_called_once_with("coder", 12345) graceful.assert_called_once() - # Graceful drain returned False → SIGTERM fallback. + # Graceful drain returned False → SIGTERM fallback. SIGTERM + # works (modelled by the find draining), so the survivor sweep + # finds nothing to escalate. kill.assert_called_once() + assert kill.call_args.args[0] == 12345 assert "Restarting manual gateway profile(s): coder" in captured @patch("shutil.which", return_value=None) @@ -872,9 +890,18 @@ class TestServicePidExclusion: launchctl_loaded=True, ) + # Model successful primary-kill path: first find returns both + # PIDs (filtered by the excluded service); subsequent calls + # return only the service PID (manual exited after SIGTERM). + # Without this, the post-restart survivor sweep (#17648) sends + # an extra SIGKILL to the manual PID and breaks the call-count + # assertion below. + find_calls = {"n": 0} def fake_find(exclude_pids=None, all_profiles=False): _exclude = exclude_pids or set() - return [p for p in [SERVICE_PID, MANUAL_PID] if p not in _exclude] + find_calls["n"] += 1 + live = [SERVICE_PID, MANUAL_PID] if find_calls["n"] == 1 else [SERVICE_PID] + return [p for p in live if p not in _exclude] with patch.object( gateway_cli, "_get_service_pids", return_value={SERVICE_PID}