diff --git a/gateway/run.py b/gateway/run.py index 8c6c4a51..af654021 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -122,6 +122,21 @@ def _auto_continue_freshness_window() -> float: return float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT) +def _float_env(name: str, default: float) -> float: + """Read an env var as float, falling back to ``default`` on typos/empty. + + A misconfigured env var (e.g. ``HERMES_AGENT_TIMEOUT=abc``) must not + crash the gateway or an agent turn. Unset/empty also falls back. + """ + raw = os.environ.get(name) + if raw is None or raw == "": + return float(default) + try: + return float(raw) + except (TypeError, ValueError): + return float(default) + + def _is_fresh_gateway_interruption( value: Any, *, @@ -3948,10 +3963,7 @@ class GatewayRunner: # wall-clock age alone isn't sufficient. Evict only when the agent # has been *idle* beyond the inactivity threshold (or when the agent # object has no activity tracker and wall-clock age is extreme). - try: - _raw_stale_timeout = float(os.getenv("HERMES_AGENT_TIMEOUT", 1800)) - except (ValueError, TypeError): - _raw_stale_timeout = 1800.0 + _raw_stale_timeout = _float_env("HERMES_AGENT_TIMEOUT", 1800) _stale_ts = self._running_agents_ts.get(_quick_key, 0) if _quick_key in self._running_agents and _stale_ts: _stale_age = time.time() - _stale_ts @@ -11758,10 +11770,7 @@ class GatewayRunner: # Config: agent.gateway_notify_interval in config.yaml, or # HERMES_AGENT_NOTIFY_INTERVAL env var. Default 180s (3 min). # 0 = disable notifications. - try: - _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 180)) - except (ValueError, TypeError): - _NOTIFY_INTERVAL_RAW = 180.0 + _NOTIFY_INTERVAL_RAW = _float_env("HERMES_AGENT_NOTIFY_INTERVAL", 180) _NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None _notify_start = time.time() @@ -11809,15 +11818,9 @@ class GatewayRunner: # Config: agent.gateway_timeout in config.yaml, or # HERMES_AGENT_TIMEOUT env var (env var takes precedence). # Default 1800s (30 min inactivity). 0 = unlimited. - try: - _agent_timeout_raw = float(os.getenv("HERMES_AGENT_TIMEOUT", 1800)) - except (ValueError, TypeError): - _agent_timeout_raw = 1800.0 + _agent_timeout_raw = _float_env("HERMES_AGENT_TIMEOUT", 1800) _agent_timeout = _agent_timeout_raw if _agent_timeout_raw > 0 else None - try: - _agent_warning_raw = float(os.getenv("HERMES_AGENT_TIMEOUT_WARNING", 900)) - except (ValueError, TypeError): - _agent_warning_raw = 900.0 + _agent_warning_raw = _float_env("HERMES_AGENT_TIMEOUT_WARNING", 900) _agent_warning = _agent_warning_raw if _agent_warning_raw > 0 else None _warning_fired = False _executor_task = asyncio.ensure_future(