feat(providers): add per-provider and per-model request_timeout_seconds config

Adds optional providers.<id>.request_timeout_seconds and providers.<id>.models.<model>.timeout_seconds config, resolved via a new hermes_cli/timeouts.py helper and applied where client_kwargs is built in run_agent.py. Zero default behavior change: when both keys are unset, the openai SDK default takes over. Mirrors the existing _get_task_timeout pattern in agent/auxiliary_client.py for auxiliary tasks - the primary turn path just never got the equivalent knob. Cross-project demand: openclaw/openclaw#43946 (17 reactions) asks for exactly this config - specifically calls out Ollama cold-start hanging the client.
2026-04-18 21:38:31 -07:00 · 2026-04-18 21:38:31 -07:00 · 3143d32330
commit 3143d32330
parent fd119a1c4a
5 changed files with 141 additions and 1 deletions
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@ -63,7 +63,19 @@ model:
  #   Leave unset to use the model's native output ceiling (recommended).
  #   Set only if you want to deliberately limit individual response length.
  #
-  # max_tokens: 8192
+# max_tokens: 8192
+
+# Named provider overrides (optional)
+# Use this for per-provider request timeouts and per-model exceptions.
+#
+# providers:
+#   ollama-local:
+#     request_timeout_seconds: 300   # Longer timeout for local cold-starts
+#   anthropic:
+#     request_timeout_seconds: 30    # Fast-fail cloud requests
+#     models:
+#       claude-opus-4.6:
+#         timeout_seconds: 600       # Longer timeout for extended-thinking Opus calls

 # =============================================================================
 # OpenRouter Provider Routing (only applies when using OpenRouter)
--- a/hermes_cli/timeouts.py
+++ b/hermes_cli/timeouts.py
@ -0,0 +1,42 @@
+from __future__ import annotations
+
+
+def _coerce_timeout(raw: object) -> float | None:
+    try:
+        timeout = float(raw)
+    except (TypeError, ValueError):
+        return None
+    if timeout <= 0:
+        return None
+    return timeout
+
+
+def get_provider_request_timeout(
+    provider_id: str, model: str | None = None
+) -> float | None:
+    """Return a configured provider request timeout in seconds, if any."""
+    if not provider_id:
+        return None
+
+    try:
+        from hermes_cli.config import load_config
+    except ImportError:
+        return None
+
+    config = load_config()
+    providers = config.get("providers", {}) if isinstance(config, dict) else {}
+    provider_config = (
+        providers.get(provider_id, {}) if isinstance(providers, dict) else {}
+    )
+    if not isinstance(provider_config, dict):
+        return None
+
+    if model:
+        models = provider_config.get("models", {})
+        model_config = models.get(model, {}) if isinstance(models, dict) else {}
+        if isinstance(model_config, dict):
+            timeout = _coerce_timeout(model_config.get("timeout_seconds"))
+            if timeout is not None:
+                return timeout
+
+    return _coerce_timeout(provider_config.get("request_timeout_seconds"))
--- a/run_agent.py
+++ b/run_agent.py
@ -48,6 +48,7 @@ from hermes_constants import get_hermes_home
 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
 from hermes_cli.env_loader import load_hermes_dotenv
+from hermes_cli.timeouts import get_provider_request_timeout

 _hermes_home = get_hermes_home()
 _project_env = Path(__file__).parent / '.env'
@ -1034,6 +1035,9 @@ class AIAgent:
                # Explicit credentials from CLI/gateway — construct directly.
                # The runtime provider resolver already handled auth for us.
                client_kwargs = {"api_key": api_key, "base_url": base_url}
+                _provider_timeout = get_provider_request_timeout(self.provider, self.model)
+                if _provider_timeout is not None:
+                    client_kwargs["timeout"] = _provider_timeout
                if self.provider == "copilot-acp":
                    client_kwargs["command"] = self.acp_command
                    client_kwargs["args"] = self.acp_args
--- a/tests/hermes_cli/test_timeouts.py
+++ b/tests/hermes_cli/test_timeouts.py
@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import textwrap
+
+from hermes_cli.timeouts import get_provider_request_timeout
+
+
+def _write_config(tmp_path, body: str) -> None:
+    (tmp_path / "config.yaml").write_text(textwrap.dedent(body), encoding="utf-8")
+
+
+def test_model_timeout_override_wins(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          anthropic:
+            request_timeout_seconds: 30
+            models:
+              claude-opus-4.6:
+                timeout_seconds: 120
+        """,
+    )
+
+    assert get_provider_request_timeout("anthropic", "claude-opus-4.6") == 120.0
+
+
+def test_provider_timeout_used_when_no_model_override(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          ollama-local:
+            request_timeout_seconds: 300
+        """,
+    )
+
+    assert get_provider_request_timeout("ollama-local", "qwen3:32b") == 300.0
+
+
+def test_missing_timeout_returns_none(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          anthropic:
+            models:
+              claude-opus-4.6:
+                context_length: 200000
+        """,
+    )
+
+    assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
+    assert get_provider_request_timeout("missing-provider", "claude-opus-4.6") is None
+
+
+def test_invalid_timeout_values_return_none(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          anthropic:
+            request_timeout_seconds: "fast"
+            models:
+              claude-opus-4.6:
+                timeout_seconds: -5
+          ollama-local:
+            request_timeout_seconds: -1
+        """,
+    )
+
+    assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
+    assert get_provider_request_timeout("anthropic", "claude-sonnet-4.5") is None
+    assert get_provider_request_timeout("ollama-local") is None
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@ -73,6 +73,10 @@ Multiple references in a single value work: `url: "${HOST}:${PORT}"`. If a refer

 For AI provider setup (OpenRouter, Anthropic, Copilot, custom endpoints, self-hosted LLMs, fallback models, etc.), see [AI Providers](/docs/integrations/providers).

+### Provider Request Timeouts
+
+You can set `providers.<id>.request_timeout_seconds` for a provider-wide timeout, plus `providers.<id>.models.<model>.timeout_seconds` for a model-specific override. See the commented example in [`cli-config.yaml.example`](https://github.com/NousResearch/hermes-agent/blob/main/cli-config.yaml.example).
+
 ## Terminal Backend Configuration

 Hermes supports six terminal backends. Each determines where the agent's shell commands actually execute — your local machine, a Docker container, a remote server via SSH, a Modal cloud sandbox, a Daytona workspace, or a Singularity/Apptainer container.