feat(providers): add per-provider and per-model request_timeout_seconds config
Adds optional providers.<id>.request_timeout_seconds and providers.<id>.models.<model>.timeout_seconds config, resolved via a new hermes_cli/timeouts.py helper and applied where client_kwargs is built in run_agent.py. Zero default behavior change: when both keys are unset, the openai SDK default takes over. Mirrors the existing _get_task_timeout pattern in agent/auxiliary_client.py for auxiliary tasks - the primary turn path just never got the equivalent knob. Cross-project demand: openclaw/openclaw#43946 (17 reactions) asks for exactly this config - specifically calls out Ollama cold-start hanging the client.
This commit is contained in:
parent
fd119a1c4a
commit
3143d32330
@ -63,7 +63,19 @@ model:
|
||||
# Leave unset to use the model's native output ceiling (recommended).
|
||||
# Set only if you want to deliberately limit individual response length.
|
||||
#
|
||||
# max_tokens: 8192
|
||||
# max_tokens: 8192
|
||||
|
||||
# Named provider overrides (optional)
|
||||
# Use this for per-provider request timeouts and per-model exceptions.
|
||||
#
|
||||
# providers:
|
||||
# ollama-local:
|
||||
# request_timeout_seconds: 300 # Longer timeout for local cold-starts
|
||||
# anthropic:
|
||||
# request_timeout_seconds: 30 # Fast-fail cloud requests
|
||||
# models:
|
||||
# claude-opus-4.6:
|
||||
# timeout_seconds: 600 # Longer timeout for extended-thinking Opus calls
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Provider Routing (only applies when using OpenRouter)
|
||||
|
||||
42
hermes_cli/timeouts.py
Normal file
42
hermes_cli/timeouts.py
Normal file
@ -0,0 +1,42 @@
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def _coerce_timeout(raw: object) -> float | None:
|
||||
try:
|
||||
timeout = float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if timeout <= 0:
|
||||
return None
|
||||
return timeout
|
||||
|
||||
|
||||
def get_provider_request_timeout(
|
||||
provider_id: str, model: str | None = None
|
||||
) -> float | None:
|
||||
"""Return a configured provider request timeout in seconds, if any."""
|
||||
if not provider_id:
|
||||
return None
|
||||
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
config = load_config()
|
||||
providers = config.get("providers", {}) if isinstance(config, dict) else {}
|
||||
provider_config = (
|
||||
providers.get(provider_id, {}) if isinstance(providers, dict) else {}
|
||||
)
|
||||
if not isinstance(provider_config, dict):
|
||||
return None
|
||||
|
||||
if model:
|
||||
models = provider_config.get("models", {})
|
||||
model_config = models.get(model, {}) if isinstance(models, dict) else {}
|
||||
if isinstance(model_config, dict):
|
||||
timeout = _coerce_timeout(model_config.get("timeout_seconds"))
|
||||
if timeout is not None:
|
||||
return timeout
|
||||
|
||||
return _coerce_timeout(provider_config.get("request_timeout_seconds"))
|
||||
@ -48,6 +48,7 @@ from hermes_constants import get_hermes_home
|
||||
# Load .env from ~/.hermes/.env first, then project root as dev fallback.
|
||||
# User-managed env files should override stale shell exports on restart.
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from hermes_cli.timeouts import get_provider_request_timeout
|
||||
|
||||
_hermes_home = get_hermes_home()
|
||||
_project_env = Path(__file__).parent / '.env'
|
||||
@ -1034,6 +1035,9 @@ class AIAgent:
|
||||
# Explicit credentials from CLI/gateway — construct directly.
|
||||
# The runtime provider resolver already handled auth for us.
|
||||
client_kwargs = {"api_key": api_key, "base_url": base_url}
|
||||
_provider_timeout = get_provider_request_timeout(self.provider, self.model)
|
||||
if _provider_timeout is not None:
|
||||
client_kwargs["timeout"] = _provider_timeout
|
||||
if self.provider == "copilot-acp":
|
||||
client_kwargs["command"] = self.acp_command
|
||||
client_kwargs["args"] = self.acp_args
|
||||
|
||||
78
tests/hermes_cli/test_timeouts.py
Normal file
78
tests/hermes_cli/test_timeouts.py
Normal file
@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import textwrap
|
||||
|
||||
from hermes_cli.timeouts import get_provider_request_timeout
|
||||
|
||||
|
||||
def _write_config(tmp_path, body: str) -> None:
|
||||
(tmp_path / "config.yaml").write_text(textwrap.dedent(body), encoding="utf-8")
|
||||
|
||||
|
||||
def test_model_timeout_override_wins(monkeypatch, tmp_path):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
_write_config(
|
||||
tmp_path,
|
||||
"""\
|
||||
providers:
|
||||
anthropic:
|
||||
request_timeout_seconds: 30
|
||||
models:
|
||||
claude-opus-4.6:
|
||||
timeout_seconds: 120
|
||||
""",
|
||||
)
|
||||
|
||||
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") == 120.0
|
||||
|
||||
|
||||
def test_provider_timeout_used_when_no_model_override(monkeypatch, tmp_path):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
_write_config(
|
||||
tmp_path,
|
||||
"""\
|
||||
providers:
|
||||
ollama-local:
|
||||
request_timeout_seconds: 300
|
||||
""",
|
||||
)
|
||||
|
||||
assert get_provider_request_timeout("ollama-local", "qwen3:32b") == 300.0
|
||||
|
||||
|
||||
def test_missing_timeout_returns_none(monkeypatch, tmp_path):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
_write_config(
|
||||
tmp_path,
|
||||
"""\
|
||||
providers:
|
||||
anthropic:
|
||||
models:
|
||||
claude-opus-4.6:
|
||||
context_length: 200000
|
||||
""",
|
||||
)
|
||||
|
||||
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
|
||||
assert get_provider_request_timeout("missing-provider", "claude-opus-4.6") is None
|
||||
|
||||
|
||||
def test_invalid_timeout_values_return_none(monkeypatch, tmp_path):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
_write_config(
|
||||
tmp_path,
|
||||
"""\
|
||||
providers:
|
||||
anthropic:
|
||||
request_timeout_seconds: "fast"
|
||||
models:
|
||||
claude-opus-4.6:
|
||||
timeout_seconds: -5
|
||||
ollama-local:
|
||||
request_timeout_seconds: -1
|
||||
""",
|
||||
)
|
||||
|
||||
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
|
||||
assert get_provider_request_timeout("anthropic", "claude-sonnet-4.5") is None
|
||||
assert get_provider_request_timeout("ollama-local") is None
|
||||
@ -73,6 +73,10 @@ Multiple references in a single value work: `url: "${HOST}:${PORT}"`. If a refer
|
||||
|
||||
For AI provider setup (OpenRouter, Anthropic, Copilot, custom endpoints, self-hosted LLMs, fallback models, etc.), see [AI Providers](/docs/integrations/providers).
|
||||
|
||||
### Provider Request Timeouts
|
||||
|
||||
You can set `providers.<id>.request_timeout_seconds` for a provider-wide timeout, plus `providers.<id>.models.<model>.timeout_seconds` for a model-specific override. See the commented example in [`cli-config.yaml.example`](https://github.com/NousResearch/hermes-agent/blob/main/cli-config.yaml.example).
|
||||
|
||||
## Terminal Backend Configuration
|
||||
|
||||
Hermes supports six terminal backends. Each determines where the agent's shell commands actually execute — your local machine, a Docker container, a remote server via SSH, a Modal cloud sandbox, a Daytona workspace, or a Singularity/Apptainer container.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user