feat(providers): add per-provider and per-model request_timeout_seconds config

Adds optional providers.<id>.request_timeout_seconds and
providers.<id>.models.<model>.timeout_seconds config, resolved via a new
hermes_cli/timeouts.py helper and applied where client_kwargs is built
in run_agent.py. Zero default behavior change: when both keys are unset,
the openai SDK default takes over.

Mirrors the existing _get_task_timeout pattern in agent/auxiliary_client.py
for auxiliary tasks - the primary turn path just never got the equivalent
knob.

Cross-project demand: openclaw/openclaw#43946 (17 reactions) asks for
exactly this config - specifically calls out Ollama cold-start hanging
the client.
This commit is contained in:
Matt Van Horn 2026-04-18 21:38:31 -07:00 committed by Teknium
parent fd119a1c4a
commit 3143d32330
5 changed files with 141 additions and 1 deletions

View File

@ -63,7 +63,19 @@ model:
# Leave unset to use the model's native output ceiling (recommended).
# Set only if you want to deliberately limit individual response length.
#
# max_tokens: 8192
# max_tokens: 8192
# Named provider overrides (optional)
# Use this for per-provider request timeouts and per-model exceptions.
#
# providers:
# ollama-local:
# request_timeout_seconds: 300 # Longer timeout for local cold-starts
# anthropic:
# request_timeout_seconds: 30 # Fast-fail cloud requests
# models:
# claude-opus-4.6:
# timeout_seconds: 600 # Longer timeout for extended-thinking Opus calls
# =============================================================================
# OpenRouter Provider Routing (only applies when using OpenRouter)

42
hermes_cli/timeouts.py Normal file
View File

@ -0,0 +1,42 @@
from __future__ import annotations
def _coerce_timeout(raw: object) -> float | None:
try:
timeout = float(raw)
except (TypeError, ValueError):
return None
if timeout <= 0:
return None
return timeout
def get_provider_request_timeout(
provider_id: str, model: str | None = None
) -> float | None:
"""Return a configured provider request timeout in seconds, if any."""
if not provider_id:
return None
try:
from hermes_cli.config import load_config
except ImportError:
return None
config = load_config()
providers = config.get("providers", {}) if isinstance(config, dict) else {}
provider_config = (
providers.get(provider_id, {}) if isinstance(providers, dict) else {}
)
if not isinstance(provider_config, dict):
return None
if model:
models = provider_config.get("models", {})
model_config = models.get(model, {}) if isinstance(models, dict) else {}
if isinstance(model_config, dict):
timeout = _coerce_timeout(model_config.get("timeout_seconds"))
if timeout is not None:
return timeout
return _coerce_timeout(provider_config.get("request_timeout_seconds"))

View File

@ -48,6 +48,7 @@ from hermes_constants import get_hermes_home
# Load .env from ~/.hermes/.env first, then project root as dev fallback.
# User-managed env files should override stale shell exports on restart.
from hermes_cli.env_loader import load_hermes_dotenv
from hermes_cli.timeouts import get_provider_request_timeout
_hermes_home = get_hermes_home()
_project_env = Path(__file__).parent / '.env'
@ -1034,6 +1035,9 @@ class AIAgent:
# Explicit credentials from CLI/gateway — construct directly.
# The runtime provider resolver already handled auth for us.
client_kwargs = {"api_key": api_key, "base_url": base_url}
_provider_timeout = get_provider_request_timeout(self.provider, self.model)
if _provider_timeout is not None:
client_kwargs["timeout"] = _provider_timeout
if self.provider == "copilot-acp":
client_kwargs["command"] = self.acp_command
client_kwargs["args"] = self.acp_args

View File

@ -0,0 +1,78 @@
from __future__ import annotations
import textwrap
from hermes_cli.timeouts import get_provider_request_timeout
def _write_config(tmp_path, body: str) -> None:
(tmp_path / "config.yaml").write_text(textwrap.dedent(body), encoding="utf-8")
def test_model_timeout_override_wins(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_config(
tmp_path,
"""\
providers:
anthropic:
request_timeout_seconds: 30
models:
claude-opus-4.6:
timeout_seconds: 120
""",
)
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") == 120.0
def test_provider_timeout_used_when_no_model_override(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_config(
tmp_path,
"""\
providers:
ollama-local:
request_timeout_seconds: 300
""",
)
assert get_provider_request_timeout("ollama-local", "qwen3:32b") == 300.0
def test_missing_timeout_returns_none(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_config(
tmp_path,
"""\
providers:
anthropic:
models:
claude-opus-4.6:
context_length: 200000
""",
)
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
assert get_provider_request_timeout("missing-provider", "claude-opus-4.6") is None
def test_invalid_timeout_values_return_none(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_config(
tmp_path,
"""\
providers:
anthropic:
request_timeout_seconds: "fast"
models:
claude-opus-4.6:
timeout_seconds: -5
ollama-local:
request_timeout_seconds: -1
""",
)
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
assert get_provider_request_timeout("anthropic", "claude-sonnet-4.5") is None
assert get_provider_request_timeout("ollama-local") is None

View File

@ -73,6 +73,10 @@ Multiple references in a single value work: `url: "${HOST}:${PORT}"`. If a refer
For AI provider setup (OpenRouter, Anthropic, Copilot, custom endpoints, self-hosted LLMs, fallback models, etc.), see [AI Providers](/docs/integrations/providers).
### Provider Request Timeouts
You can set `providers.<id>.request_timeout_seconds` for a provider-wide timeout, plus `providers.<id>.models.<model>.timeout_seconds` for a model-specific override. See the commented example in [`cli-config.yaml.example`](https://github.com/NousResearch/hermes-agent/blob/main/cli-config.yaml.example).
## Terminal Backend Configuration
Hermes supports six terminal backends. Each determines where the agent's shell commands actually execute — your local machine, a Docker container, a remote server via SSH, a Modal cloud sandbox, a Daytona workspace, or a Singularity/Apptainer container.