fix(cron): fall back gracefully when HERMES_CRON_TIMEOUT is invalid
Bare `float(os.getenv("HERMES_CRON_TIMEOUT", 600))` in `run_job()` raises
a `ValueError` when the env var is set to a non-numeric string (e.g. "abc").
Replace it with the same defensive try/except pattern already used by
`_get_script_timeout()` for `HERMES_CRON_SCRIPT_TIMEOUT`: log a warning
and fall back to the 600 s default instead of crashing.
Also update the existing env-var tests to exercise the new code path and
add two new tests — one for an invalid value, one for an empty string.
Fixes #11319
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
8c8fc6c1ec
commit
ec27f0a3fa
@ -1053,7 +1053,18 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
#
|
||||
# Uses the agent's built-in activity tracker (updated by
|
||||
# _touch_activity() on every tool call, API call, and stream delta).
|
||||
_cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
|
||||
_raw_cron_timeout = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
|
||||
if _raw_cron_timeout:
|
||||
try:
|
||||
_cron_timeout = float(_raw_cron_timeout)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(
|
||||
"Invalid HERMES_CRON_TIMEOUT=%r; using default 600s",
|
||||
_raw_cron_timeout,
|
||||
)
|
||||
_cron_timeout = 600.0
|
||||
else:
|
||||
_cron_timeout = 600.0
|
||||
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
|
||||
_POLL_INTERVAL = 5.0
|
||||
_cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
||||
|
||||
@ -169,10 +169,20 @@ class TestInactivityTimeout:
|
||||
|
||||
assert result["final_response"] == "Done"
|
||||
|
||||
def _parse_cron_timeout(self, raw_value):
|
||||
"""Mirror the defensive parsing logic from cron/scheduler.py run_job()."""
|
||||
if raw_value:
|
||||
try:
|
||||
return float(raw_value)
|
||||
except (ValueError, TypeError):
|
||||
return 600.0
|
||||
return 600.0
|
||||
|
||||
def test_timeout_env_var_parsing(self, monkeypatch):
|
||||
"""HERMES_CRON_TIMEOUT env var is respected."""
|
||||
monkeypatch.setenv("HERMES_CRON_TIMEOUT", "1200")
|
||||
_cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
|
||||
raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
|
||||
_cron_timeout = self._parse_cron_timeout(raw)
|
||||
assert _cron_timeout == 1200.0
|
||||
|
||||
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
|
||||
@ -181,10 +191,27 @@ class TestInactivityTimeout:
|
||||
def test_timeout_zero_means_unlimited(self, monkeypatch):
|
||||
"""HERMES_CRON_TIMEOUT=0 yields None (unlimited)."""
|
||||
monkeypatch.setenv("HERMES_CRON_TIMEOUT", "0")
|
||||
_cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
|
||||
raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
|
||||
_cron_timeout = self._parse_cron_timeout(raw)
|
||||
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
|
||||
assert _cron_inactivity_limit is None
|
||||
|
||||
def test_timeout_invalid_value_falls_back_to_default(self, monkeypatch):
|
||||
"""HERMES_CRON_TIMEOUT=abc should fall back to 600s, not raise ValueError."""
|
||||
monkeypatch.setenv("HERMES_CRON_TIMEOUT", "abc")
|
||||
raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
|
||||
_cron_timeout = self._parse_cron_timeout(raw)
|
||||
assert _cron_timeout == 600.0
|
||||
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
|
||||
assert _cron_inactivity_limit == 600.0
|
||||
|
||||
def test_timeout_empty_string_uses_default(self, monkeypatch):
|
||||
"""HERMES_CRON_TIMEOUT='' (empty) should use the 600s default."""
|
||||
monkeypatch.setenv("HERMES_CRON_TIMEOUT", "")
|
||||
raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
|
||||
_cron_timeout = self._parse_cron_timeout(raw)
|
||||
assert _cron_timeout == 600.0
|
||||
|
||||
def test_timeout_error_includes_diagnostics(self):
|
||||
"""The TimeoutError message should include last activity info."""
|
||||
agent = SlowFakeAgent(
|
||||
|
||||
Loading…
Reference in New Issue
Block a user