feat(providers): add GMI Cloud as a first-class API-key provider (#11955)

Add GMI Cloud (api.gmi-serving.com) as a full first-class API-key provider
with built-in auth, aliases, model catalog, CLI entry points, auxiliary client
routing, context length resolution, doctor checks, env var tracking, and docs.

- auth.py: ProviderConfig for 'gmi' (api_key, GMI_API_KEY / GMI_BASE_URL)
- providers.py: HermesOverlay with extra_env_vars for models.dev detection
- models.py: curated slash-form model catalog; live /v1/models fetch
- main.py: 'gmi' in _named_custom_provider_map and --provider choices
- model_metadata.py: _URL_TO_PROVIDER, _PROVIDER_PREFIXES, dedicated
  context-length probe block (GMI's /models has authoritative data)
- auxiliary_client.py: alias entries; _compat_model fix for slash-form
  models on cached aggregator-style clients; gmi aux default model
- doctor.py: GMI in provider connectivity checks
- config.py: GMI_API_KEY / GMI_BASE_URL in OPTIONAL_ENV_VARS
- conftest.py: explicit GMI_BASE_URL clearing (not caught by _API_KEY suffix)
- docs: providers.md, environment-variables.md, fallback-providers.md,
  configuration.md, quickstart.md (expands provider table)

Co-authored-by: Isaac Huang <isaachuang@Isaacs-MacBook-Pro.local>
This commit is contained in:
Isaac Huang 2026-04-17 11:19:56 -07:00 committed by kshitij
parent 41f70e6fc4
commit c53fcb0173
18 changed files with 700 additions and 38 deletions

1
.gitignore vendored
View File

@ -69,3 +69,4 @@ mini-swe-agent/
.nix-stamps/
result
website/static/api/skills-index.json
models-dev-upstream/

View File

@ -82,6 +82,8 @@ _PROVIDER_ALIASES = {
"moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn",
"moonshot-cn": "kimi-coding-cn",
"gmi-cloud": "gmi",
"gmicloud": "gmi",
"minimax-china": "minimax-cn",
"minimax_cn": "minimax-cn",
"claude": "anthropic",
@ -155,6 +157,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"kimi-coding": "kimi-k2-turbo-preview",
"stepfun": "step-3.5-flash",
"kimi-coding-cn": "kimi-k2-turbo-preview",
"gmi": "anthropic/claude-opus-4.6",
"minimax": "MiniMax-M2.7",
"minimax-cn": "MiniMax-M2.7",
"anthropic": "claude-haiku-4-5-20251001",
@ -2558,12 +2561,19 @@ def _is_openrouter_client(client: Any) -> bool:
return False
def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool:
"""Best-effort check for cached clients that accept ``vendor/model`` IDs."""
if _is_openrouter_client(client):
return True
return bool(cached_default and "/" in cached_default)
def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
"""Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
"""Keep slash-bearing model IDs only for cached clients that support them.
Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
"""
if model and "/" in model and not _is_openrouter_client(client):
if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default):
return cached_default
return model or cached_default

View File

@ -51,6 +51,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"qwen-oauth",
"xiaomi",
"arcee",
"gmi",
"custom", "local",
# Common aliases
"google", "google-gemini", "google-ai-studio",
@ -60,6 +61,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
"mimo", "xiaomi-mimo",
"arcee-ai", "arceeai",
"gmi-cloud", "gmicloud",
"xai", "x-ai", "x.ai", "grok",
"nvidia", "nim", "nvidia-nim", "nemotron",
"qwen-portal",
@ -307,6 +309,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"integrate.api.nvidia.com": "nvidia",
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"api.gmi-serving.com": "gmi",
"ollama.com": "ollama-cloud",
}
@ -702,6 +705,29 @@ def fetch_endpoint_model_metadata(
return {}
def _resolve_endpoint_context_length(
model: str,
base_url: str,
api_key: str = "",
) -> Optional[int]:
"""Resolve context length from an endpoint's live ``/models`` metadata."""
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
matched = endpoint_metadata.get(model)
if not matched:
if len(endpoint_metadata) == 1:
matched = next(iter(endpoint_metadata.values()))
else:
for key, entry in endpoint_metadata.items():
if model in key or key in model:
matched = entry
break
if matched:
context_length = matched.get("context_length")
if isinstance(context_length, int):
return context_length
return None
def _get_context_cache_path() -> Path:
"""Return path to the persistent context length cache file."""
from hermes_constants import get_hermes_home
@ -1295,21 +1321,8 @@ def get_model_context_length(
# returns 128k) instead of the model's full context (400k). models.dev
# has the correct per-provider values and is checked at step 5+.
if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
matched = endpoint_metadata.get(model)
if not matched:
# Single-model servers: if only one model is loaded, use it
if len(endpoint_metadata) == 1:
matched = next(iter(endpoint_metadata.values()))
else:
# Fuzzy match: substring in either direction
for key, entry in endpoint_metadata.items():
if model in key or key in model:
matched = entry
break
if matched:
context_length = matched.get("context_length")
if isinstance(context_length, int):
context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if context_length is not None:
return context_length
if not _is_known_provider_base_url(base_url):
# 3. Try querying local server directly
@ -1374,6 +1387,12 @@ def get_model_context_length(
if base_url:
save_context_length(model, base_url, codex_ctx)
return codex_ctx
if effective_provider == "gmi" and base_url:
# GMI exposes authoritative context_length via /models, but it is not
# in models.dev yet. Preserve that higher-fidelity endpoint lookup.
ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if ctx is not None:
return ctx
if effective_provider:
from agent.models_dev import lookup_models_dev_context
ctx = lookup_models_dev_context(effective_provider, model)

View File

@ -224,6 +224,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("ARCEEAI_API_KEY",),
base_url_env_var="ARCEE_BASE_URL",
),
"gmi": ProviderConfig(
id="gmi",
name="GMI Cloud",
auth_type="api_key",
inference_base_url="https://api.gmi-serving.com/v1",
api_key_env_vars=("GMI_API_KEY",),
base_url_env_var="GMI_BASE_URL",
),
"minimax": ProviderConfig(
id="minimax",
name="MiniMax",
@ -1120,6 +1128,7 @@ def resolve_provider(
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
"step": "stepfun", "stepfun-coding-plan": "stepfun",
"arcee-ai": "arcee", "arceeai": "arcee",
"gmi-cloud": "gmi", "gmicloud": "gmi",
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
"alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
"alibaba_coding_plan": "alibaba-coding-plan",

View File

@ -1082,6 +1082,7 @@ ENV_VARS_BY_VERSION: Dict[int, List[str]] = {
"SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"],
10: ["TAVILY_API_KEY"],
11: ["TERMINAL_MODAL_MODE"],
17: ["GMI_API_KEY", "GMI_BASE_URL"],
}
# Required environment variables with metadata for migration prompts.
@ -1254,6 +1255,22 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"GMI_API_KEY": {
"description": "GMI Cloud API key",
"prompt": "GMI Cloud API key",
"url": "https://www.gmicloud.ai/",
"password": True,
"category": "provider",
"advanced": True,
},
"GMI_BASE_URL": {
"description": "GMI Cloud base URL override",
"prompt": "GMI Cloud base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"MINIMAX_API_KEY": {
"description": "MiniMax API key (international)",
"prompt": "MiniMax API key",

View File

@ -46,6 +46,7 @@ _PROVIDER_ENV_HINTS = (
"Z_AI_API_KEY",
"KIMI_API_KEY",
"KIMI_CN_API_KEY",
"GMI_API_KEY",
"MINIMAX_API_KEY",
"MINIMAX_CN_API_KEY",
"KILOCODE_API_KEY",
@ -937,6 +938,7 @@ def run_doctor(args):
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),

View File

@ -1768,6 +1768,7 @@ def select_provider_and_model(args=None):
"huggingface",
"xiaomi",
"arcee",
"gmi",
"nvidia",
"ollama-cloud",
):
@ -7782,6 +7783,7 @@ For more help on a command:
"kilocode",
"xiaomi",
"arcee",
"gmi",
"nvidia",
],
default=None,

View File

@ -278,6 +278,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"trinity-large-preview",
"trinity-mini",
],
"gmi": [
"zai-org/GLM-5.1-FP8",
"deepseek-ai/DeepSeek-V3.2",
"moonshotai/Kimi-K2.5",
"google/gemini-3.1-flash-lite-preview",
"anthropic/claude-sonnet-4.6",
"openai/gpt-5.4",
],
"opencode-zen": [
"kimi-k2.5",
"gpt-5.4-pro",
@ -709,7 +717,6 @@ class ProviderEntry(NamedTuple):
label: str
tui_desc: str # detailed description for `hermes model` TUI
CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
@ -735,6 +742,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (multi-model direct API)"),
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
@ -769,6 +777,8 @@ _PROVIDER_ALIASES = {
"stepfun-coding-plan": "stepfun",
"arcee-ai": "arcee",
"arceeai": "arcee",
"gmi-cloud": "gmi",
"gmicloud": "gmi",
"minimax-china": "minimax-cn",
"minimax_cn": "minimax-cn",
"claude": "anthropic",
@ -1849,6 +1859,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
return live
except Exception:
pass
if normalized == "gmi":
try:
from hermes_cli.auth import resolve_api_key_provider_credentials
creds = resolve_api_key_provider_credentials("gmi")
api_key = str(creds.get("api_key") or "").strip()
base_url = str(creds.get("base_url") or "").strip()
if api_key and base_url:
live = fetch_api_models(api_key, base_url)
if live:
return live
except Exception:
pass
if normalized == "custom":
base_url = _get_custom_base_url()
if base_url:

View File

@ -163,6 +163,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_override="https://api.arcee.ai/api/v1",
base_url_env_var="ARCEE_BASE_URL",
),
"gmi": HermesOverlay(
transport="openai_chat",
extra_env_vars=("GMI_API_KEY",),
base_url_override="https://api.gmi-serving.com/v1",
base_url_env_var="GMI_BASE_URL",
),
"ollama-cloud": HermesOverlay(
transport="openai_chat",
base_url_env_var="OLLAMA_BASE_URL",
@ -297,6 +303,10 @@ ALIASES: Dict[str, str] = {
"arcee-ai": "arcee",
"arceeai": "arcee",
# gmi
"gmi-cloud": "gmi",
"gmicloud": "gmi",
# Local server aliases → virtual "local" concept (resolved via user config)
"lmstudio": "lmstudio",
"lm-studio": "lmstudio",
@ -319,6 +329,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
"copilot-acp": "GitHub Copilot ACP",
"stepfun": "StepFun Step Plan",
"xiaomi": "Xiaomi MiMo",
"gmi": "GMI Cloud",
"local": "Local endpoint",
"bedrock": "AWS Bedrock",
"ollama-cloud": "Ollama Cloud",

View File

@ -516,19 +516,82 @@ class TestGetTextAuxiliaryClient:
assert isinstance(client, CodexAuxiliaryClient)
assert model == "gpt-5.2-codex"
def test_returns_none_when_nothing_available(self, monkeypatch):
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
client, model = get_text_auxiliary_client()
assert client is None
assert model is None
class TestNousAuxiliaryRefresh:
def test_try_nous_prefers_runtime_credentials(self):
fresh_base = "https://inference-api.nousresearch.com/v1"
def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self):
with patch("agent.auxiliary_client._resolve_custom_runtime",
return_value=("https://api.openai.com/v1", "sk-test", "codex_responses")), \
patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.3-codex"), \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client()
from agent.auxiliary_client import CodexAuxiliaryClient
assert isinstance(client, CodexAuxiliaryClient)
assert model == "gpt-5.3-codex"
assert mock_openai.call_args.kwargs["base_url"] == "https://api.openai.com/v1"
assert mock_openai.call_args.kwargs["api_key"] == "sk-test"
class TestVisionClientFallback:
"""Vision client auto mode resolves known-good multimodal backends."""
def test_vision_auto_includes_active_provider_when_configured(self, monkeypatch):
"""Active provider appears in available backends when credentials exist."""
monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
):
backends = get_available_vision_backends()
assert "anthropic" in backends
def test_resolve_provider_client_returns_native_anthropic_wrapper(self, monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
with (
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
):
client, model = resolve_provider_client("anthropic")
assert client is not None
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
assert model == "claude-haiku-4-5-20251001"
class TestAuxiliaryPoolAwareness:
def test_try_nous_uses_pool_entry(self):
class _Entry:
access_token = "pooled-access-token"
agent_key = "pooled-agent-key"
inference_base_url = "https://inference.pool.example/v1"
class _Pool:
def has_credentials(self):
return True
def select(self):
return _Entry()
with (
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
mock_openai.return_value = MagicMock()
client, model = _try_nous()
assert client is not None
@ -643,6 +706,67 @@ class TestNousAuxiliaryRefresh:
assert stale_client.chat.completions.create.await_count == 1
assert fresh_async_client.chat.completions.create.await_count == 1
def test_try_nous_pool_entry(self):
class _Entry:
access_token = "pooled-access-token"
agent_key = "pooled-agent-key"
inference_base_url = "https://inference.pool.example/v1"
class _Pool:
def has_credentials(self):
return True
def select(self):
return _Entry()
with (
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
client, model = _try_nous()
assert client is not None
assert model == "gemini-3-flash"
call_kwargs = mock_openai.call_args.kwargs
assert call_kwargs["api_key"] == "pooled-agent-key"
assert call_kwargs["base_url"] == "https://inference.pool.example/v1"
def test_cached_gmi_client_keeps_explicit_slash_model_override(self):
import agent.auxiliary_client as aux
fake_client = MagicMock()
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(fake_client, "anthropic/claude-opus-4.6"),
) as mock_resolve:
aux.shutdown_cached_clients()
try:
client, model = aux._get_cached_client(
"gmi",
"anthropic/claude-opus-4.6",
base_url="https://api.gmi-serving.com/v1",
api_key="gmi-key",
)
assert client is fake_client
assert model == "anthropic/claude-opus-4.6"
client, model = aux._get_cached_client(
"gmi",
"openai/gpt-5.4-mini",
base_url="https://api.gmi-serving.com/v1",
api_key="gmi-key",
)
finally:
aux.shutdown_cached_clients()
assert client is fake_client
assert model == "openai/gpt-5.4-mini"
assert mock_resolve.call_count == 1
# ── Payment / credit exhaustion fallback ─────────────────────────────────

View File

@ -288,6 +288,10 @@ def _hermetic_environment(tmp_path, monkeypatch):
monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
except Exception:
pass
# Explicitly clear provider-specific base URL overrides that don't match
# the generic credential-shaped env-var filter above.
monkeypatch.delenv("GMI_API_KEY", raising=False)
monkeypatch.delenv("GMI_BASE_URL", raising=False)
# Backward-compat alias — old tests reference this fixture name. Keep it

View File

@ -42,6 +42,7 @@ class TestProviderRegistry:
("minimax-cn", "MiniMax (China)", "api_key"),
("ai-gateway", "Vercel AI Gateway", "api_key"),
("kilocode", "Kilo Code", "api_key"),
("gmi", "GMI Cloud", "api_key"),
])
def test_provider_registered(self, provider_id, name, auth_type):
assert provider_id in PROVIDER_REGISTRY
@ -106,6 +107,11 @@ class TestProviderRegistry:
assert pconfig.api_key_env_vars == ("KILOCODE_API_KEY",)
assert pconfig.base_url_env_var == "KILOCODE_BASE_URL"
def test_gmi_env_vars(self):
pconfig = PROVIDER_REGISTRY["gmi"]
assert pconfig.api_key_env_vars == ("GMI_API_KEY",)
assert pconfig.base_url_env_var == "GMI_BASE_URL"
def test_huggingface_env_vars(self):
pconfig = PROVIDER_REGISTRY["huggingface"]
assert pconfig.api_key_env_vars == ("HF_TOKEN",)
@ -121,6 +127,7 @@ class TestProviderRegistry:
assert PROVIDER_REGISTRY["minimax-cn"].inference_base_url == "https://api.minimaxi.com/anthropic"
assert PROVIDER_REGISTRY["ai-gateway"].inference_base_url == "https://ai-gateway.vercel.sh/v1"
assert PROVIDER_REGISTRY["kilocode"].inference_base_url == "https://api.kilo.ai/api/gateway"
assert PROVIDER_REGISTRY["gmi"].inference_base_url == "https://api.gmi-serving.com/v1"
assert PROVIDER_REGISTRY["huggingface"].inference_base_url == "https://router.huggingface.co/v1"
def test_oauth_providers_unchanged(self):
@ -143,6 +150,7 @@ PROVIDER_ENV_VARS = (
"MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
"AI_GATEWAY_API_KEY", "AI_GATEWAY_BASE_URL",
"KILOCODE_API_KEY", "KILOCODE_BASE_URL",
"GMI_API_KEY", "GMI_BASE_URL",
"DASHSCOPE_API_KEY", "OPENCODE_ZEN_API_KEY", "OPENCODE_GO_API_KEY",
"NOUS_API_KEY", "GITHUB_TOKEN", "GH_TOKEN",
"OPENAI_BASE_URL", "HERMES_COPILOT_ACP_COMMAND", "COPILOT_CLI_PATH",
@ -178,6 +186,9 @@ class TestResolveProvider:
def test_explicit_ai_gateway(self):
assert resolve_provider("ai-gateway") == "ai-gateway"
def test_explicit_gmi(self):
assert resolve_provider("gmi") == "gmi"
def test_alias_glm(self):
assert resolve_provider("glm") == "zai"
@ -205,6 +216,9 @@ class TestResolveProvider:
def test_alias_vercel(self):
assert resolve_provider("vercel") == "ai-gateway"
def test_alias_gmi_cloud(self):
assert resolve_provider("gmi-cloud") == "gmi"
def test_explicit_kilocode(self):
assert resolve_provider("kilocode") == "kilocode"
@ -280,6 +294,10 @@ class TestResolveProvider:
monkeypatch.setenv("AI_GATEWAY_API_KEY", "test-gw-key")
assert resolve_provider("auto") == "ai-gateway"
def test_auto_detects_gmi_key(self, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "test-gmi-key")
assert resolve_provider("auto") == "gmi"
def test_auto_detects_kilocode_key(self, monkeypatch):
monkeypatch.setenv("KILOCODE_API_KEY", "test-kilo-key")
assert resolve_provider("auto") == "kilocode"
@ -497,6 +515,19 @@ class TestResolveApiKeyProviderCredentials:
assert creds["api_key"] == "kilo-secret-key"
assert creds["base_url"] == "https://api.kilo.ai/api/gateway"
def test_resolve_gmi_with_key(self, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "gmi-secret-key")
creds = resolve_api_key_provider_credentials("gmi")
assert creds["provider"] == "gmi"
assert creds["api_key"] == "gmi-secret-key"
assert creds["base_url"] == "https://api.gmi-serving.com/v1"
def test_resolve_gmi_custom_base_url(self, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "gmi-key")
monkeypatch.setenv("GMI_BASE_URL", "https://custom.gmi.example/v1")
creds = resolve_api_key_provider_credentials("gmi")
assert creds["base_url"] == "https://custom.gmi.example/v1"
def test_resolve_kilocode_custom_base_url(self, monkeypatch):
monkeypatch.setenv("KILOCODE_API_KEY", "kilo-key")
monkeypatch.setenv("KILOCODE_BASE_URL", "https://custom.kilo.example/v1")
@ -594,6 +625,15 @@ class TestRuntimeProviderResolution:
assert result["api_key"] == "kilo-key"
assert "kilo.ai" in result["base_url"]
def test_runtime_gmi(self, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "gmi-key")
from hermes_cli.runtime_provider import resolve_runtime_provider
result = resolve_runtime_provider(requested="gmi")
assert result["provider"] == "gmi"
assert result["api_mode"] == "chat_completions"
assert result["api_key"] == "gmi-key"
assert result["base_url"] == "https://api.gmi-serving.com/v1"
def test_runtime_auto_detects_api_key_provider(self, monkeypatch):
monkeypatch.setenv("KIMI_API_KEY", "auto-kimi-key")
from hermes_cli.runtime_provider import resolve_runtime_provider

View File

@ -0,0 +1,363 @@
"""Focused tests for GMI Cloud first-class provider wiring."""
from __future__ import annotations
import contextlib
import io
import sys
import types
from argparse import Namespace
from unittest.mock import patch
import pytest
if "dotenv" not in sys.modules:
fake_dotenv = types.ModuleType("dotenv")
fake_dotenv.load_dotenv = lambda *args, **kwargs: None
sys.modules["dotenv"] = fake_dotenv
from hermes_cli.auth import resolve_provider
from hermes_cli.config import load_config
from hermes_cli.models import (
CANONICAL_PROVIDERS,
_PROVIDER_LABELS,
_PROVIDER_MODELS,
normalize_provider,
provider_model_ids,
)
from agent.auxiliary_client import resolve_provider_client
from agent.model_metadata import get_model_context_length
@pytest.fixture(autouse=True)
def _clear_provider_env(monkeypatch):
for key in (
"OPENROUTER_API_KEY",
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
"GOOGLE_API_KEY",
"GLM_API_KEY",
"KIMI_API_KEY",
"MINIMAX_API_KEY",
"GMI_API_KEY",
"GMI_BASE_URL",
):
monkeypatch.delenv(key, raising=False)
class TestGmiAliases:
@pytest.mark.parametrize("alias", ["gmi", "gmi-cloud", "gmicloud"])
def test_alias_resolves(self, alias, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
assert resolve_provider(alias) == "gmi"
def test_models_normalize_provider(self):
assert normalize_provider("gmi-cloud") == "gmi"
assert normalize_provider("gmicloud") == "gmi"
def test_providers_normalize_provider(self):
from hermes_cli.providers import normalize_provider as normalize_provider_in_providers
assert normalize_provider_in_providers("gmi-cloud") == "gmi"
assert normalize_provider_in_providers("gmicloud") == "gmi"
class TestGmiConfigRegistry:
def test_optional_env_vars_include_gmi(self):
from hermes_cli.config import ENV_VARS_BY_VERSION, OPTIONAL_ENV_VARS
assert "GMI_API_KEY" in OPTIONAL_ENV_VARS
assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["category"] == "provider"
assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["password"] is True
assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["url"] == "https://www.gmicloud.ai/"
assert "GMI_BASE_URL" in OPTIONAL_ENV_VARS
assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["category"] == "provider"
assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["password"] is False
assert "GMI_API_KEY" in ENV_VARS_BY_VERSION[17]
assert "GMI_BASE_URL" in ENV_VARS_BY_VERSION[17]
class TestGmiModelCatalog:
def test_static_model_fallback_exists(self):
assert "gmi" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["gmi"]
assert "zai-org/GLM-5.1-FP8" in models
assert "deepseek-ai/DeepSeek-V3.2" in models
assert "moonshotai/Kimi-K2.5" in models
assert "anthropic/claude-sonnet-4.6" in models
def test_canonical_provider_entry(self):
slugs = [p.slug for p in CANONICAL_PROVIDERS]
assert "gmi" in slugs
def test_provider_model_ids_prefers_live_api(self, monkeypatch):
monkeypatch.setattr(
"hermes_cli.auth.resolve_api_key_provider_credentials",
lambda provider_id: {
"provider": provider_id,
"api_key": "gmi-live-key",
"base_url": "https://api.gmi-serving.com/v1",
"source": "GMI_API_KEY",
},
)
monkeypatch.setattr(
"hermes_cli.models.fetch_api_models",
lambda api_key, base_url: [
"openai/gpt-5.4-mini",
"zai-org/GLM-5.1-FP8",
],
)
assert provider_model_ids("gmi") == [
"openai/gpt-5.4-mini",
"zai-org/GLM-5.1-FP8",
]
def test_provider_model_ids_falls_back_to_static_models(self, monkeypatch):
monkeypatch.setattr(
"hermes_cli.auth.resolve_api_key_provider_credentials",
lambda provider_id: {
"provider": provider_id,
"api_key": "gmi-live-key",
"base_url": "https://api.gmi-serving.com/v1",
"source": "GMI_API_KEY",
},
)
monkeypatch.setattr("hermes_cli.models.fetch_api_models", lambda api_key, base_url: None)
assert provider_model_ids("gmi") == list(_PROVIDER_MODELS["gmi"])
class TestGmiProvidersModule:
def test_overlay_exists(self):
from hermes_cli.providers import HERMES_OVERLAYS
assert "gmi" in HERMES_OVERLAYS
overlay = HERMES_OVERLAYS["gmi"]
assert overlay.transport == "openai_chat"
assert overlay.extra_env_vars == ("GMI_API_KEY",)
assert overlay.base_url_override == "https://api.gmi-serving.com/v1"
assert overlay.base_url_env_var == "GMI_BASE_URL"
assert not overlay.is_aggregator
def test_provider_label(self):
assert _PROVIDER_LABELS["gmi"] == "GMI Cloud"
class TestGmiDoctor:
def test_provider_env_hints_include_gmi(self):
from hermes_cli.doctor import _PROVIDER_ENV_HINTS
assert "GMI_API_KEY" in _PROVIDER_ENV_HINTS
def test_run_doctor_checks_gmi_models_endpoint(self, monkeypatch, tmp_path):
from hermes_cli import doctor as doctor_mod
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
(home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
(home / ".env").write_text("GMI_API_KEY=gmi-test-key\n", encoding="utf-8")
project = tmp_path / "project"
project.mkdir(exist_ok=True)
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
for env_name in (
"OPENROUTER_API_KEY",
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
"ANTHROPIC_TOKEN",
"GLM_API_KEY",
"ZAI_API_KEY",
"Z_AI_API_KEY",
"KIMI_API_KEY",
"KIMI_CN_API_KEY",
"ARCEEAI_API_KEY",
"DEEPSEEK_API_KEY",
"HF_TOKEN",
"DASHSCOPE_API_KEY",
"MINIMAX_API_KEY",
"MINIMAX_CN_API_KEY",
"AI_GATEWAY_API_KEY",
"KILOCODE_API_KEY",
"OPENCODE_ZEN_API_KEY",
"OPENCODE_GO_API_KEY",
"XIAOMI_API_KEY",
):
monkeypatch.delenv(env_name, raising=False)
fake_model_tools = types.SimpleNamespace(
check_tool_availability=lambda *a, **kw: ([], []),
TOOLSET_REQUIREMENTS={},
)
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
try:
from hermes_cli import auth as _auth_mod
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
except Exception:
pass
calls = []
def fake_get(url, headers=None, timeout=None):
calls.append((url, headers, timeout))
return types.SimpleNamespace(status_code=200)
import httpx
monkeypatch.setattr(httpx, "get", fake_get)
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
doctor_mod.run_doctor(Namespace(fix=False))
out = buf.getvalue()
assert "API key or custom endpoint configured" in out
assert "GMI Cloud" in out
assert any(url == "https://api.gmi-serving.com/v1/models" for url, _, _ in calls)
class TestGmiModelMetadata:
def test_url_to_provider(self):
from agent.model_metadata import _URL_TO_PROVIDER
assert _URL_TO_PROVIDER.get("api.gmi-serving.com") == "gmi"
def test_provider_prefixes(self):
from agent.model_metadata import _PROVIDER_PREFIXES
assert "gmi" in _PROVIDER_PREFIXES
assert "gmi-cloud" in _PROVIDER_PREFIXES
assert "gmicloud" in _PROVIDER_PREFIXES
def test_infer_from_url(self):
from agent.model_metadata import _infer_provider_from_url
assert _infer_provider_from_url("https://api.gmi-serving.com/v1") == "gmi"
def test_known_gmi_endpoint_still_uses_endpoint_metadata(self):
with patch(
"agent.model_metadata.get_cached_context_length",
return_value=None,
), patch(
"agent.model_metadata.fetch_endpoint_model_metadata",
return_value={"anthropic/claude-opus-4.6": {"context_length": 409600}},
), patch(
"agent.models_dev.lookup_models_dev_context",
return_value=None,
), patch(
"agent.model_metadata.fetch_model_metadata",
return_value={},
):
result = get_model_context_length(
"anthropic/claude-opus-4.6",
base_url="https://api.gmi-serving.com/v1",
api_key="gmi-test-key",
provider="custom",
)
assert result == 409600
class TestGmiAuxiliary:
def test_aux_default_model(self):
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "anthropic/claude-opus-4.6"
def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
mock_openai.return_value = object()
client, model = resolve_provider_client("gmi")
assert client is not None
assert model == "anthropic/claude-opus-4.6"
assert mock_openai.call_args.kwargs["api_key"] == "gmi-test-key"
assert mock_openai.call_args.kwargs["base_url"] == "https://api.gmi-serving.com/v1"
def test_resolve_provider_client_accepts_gmi_alias(self, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
mock_openai.return_value = object()
client, model = resolve_provider_client("gmi-cloud")
assert client is not None
assert model == "anthropic/claude-opus-4.6"
class TestGmiMainFlow:
def test_chat_parser_accepts_gmi_provider(self, monkeypatch):
recorded: dict[str, str] = {}
monkeypatch.setattr("hermes_cli.config.get_container_exec_info", lambda: None)
monkeypatch.setattr(
"hermes_cli.main.cmd_chat",
lambda args: recorded.setdefault("provider", args.provider),
)
monkeypatch.setattr(sys, "argv", ["hermes", "chat", "--provider", "gmi"])
from hermes_cli.main import main
main()
assert recorded["provider"] == "gmi"
def test_select_provider_and_model_routes_gmi_to_generic_flow(self, monkeypatch):
recorded: dict[str, str] = {}
monkeypatch.setattr("hermes_cli.auth.resolve_provider", lambda *args, **kwargs: None)
def fake_prompt_provider_choice(choices, default=0):
return next(i for i, label in enumerate(choices) if label.startswith("GMI Cloud"))
def fake_model_flow_api_key_provider(config, provider_id, current_model=""):
recorded["provider_id"] = provider_id
monkeypatch.setattr("hermes_cli.main._prompt_provider_choice", fake_prompt_provider_choice)
monkeypatch.setattr("hermes_cli.main._model_flow_api_key_provider", fake_model_flow_api_key_provider)
from hermes_cli.main import select_provider_and_model
select_provider_and_model()
assert recorded["provider_id"] == "gmi"
def test_model_flow_api_key_provider_persists_gmi_selection(self, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
with patch(
"hermes_cli.models.fetch_api_models",
return_value=["zai-org/GLM-5.1-FP8", "openai/gpt-5.4-mini"],
), patch(
"hermes_cli.auth._prompt_model_selection",
return_value="openai/gpt-5.4-mini",
), patch(
"hermes_cli.auth.deactivate_provider",
), patch(
"builtins.input",
return_value="",
):
from hermes_cli.main import _model_flow_api_key_provider
_model_flow_api_key_provider(load_config(), "gmi", "old-model")
import yaml
from hermes_constants import get_hermes_home
config = yaml.safe_load((get_hermes_home() / "config.yaml").read_text()) or {}
model_cfg = config.get("model")
assert isinstance(model_cfg, dict)
assert model_cfg["provider"] == "gmi"
assert model_cfg["default"] == "openai/gpt-5.4-mini"
assert model_cfg["base_url"] == "https://api.gmi-serving.com/v1"

View File

@ -66,13 +66,30 @@ hermes model
Good defaults:
| Situation | Recommended path |
|---|---|
| Least friction | Nous Portal or OpenRouter |
| You already have Claude or Codex auth | Anthropic or OpenAI Codex |
| You want local/private inference | Ollama or any custom OpenAI-compatible endpoint |
| You want multi-provider routing | OpenRouter |
| You have a custom GPU server | vLLM, SGLang, LiteLLM, or any OpenAI-compatible endpoint |
| Provider | What it is | How to set up |
|----------|-----------|---------------|
| **Nous Portal** | Subscription-based, zero-config | OAuth login via `hermes model` |
| **OpenAI Codex** | ChatGPT OAuth, uses Codex models | Device code auth via `hermes model` |
| **Anthropic** | Claude models directly (Pro/Max or API key) | `hermes model` with Claude Code auth, or an Anthropic API key |
| **OpenRouter** | Multi-provider routing across many models | Enter your API key |
| **Z.AI** | GLM / Zhipu-hosted models | Set `GLM_API_KEY` / `ZAI_API_KEY` |
| **Kimi / Moonshot** | Moonshot-hosted coding and chat models | Set `KIMI_API_KEY` |
| **Kimi / Moonshot China** | China-region Moonshot endpoint | Set `KIMI_CN_API_KEY` |
| **Arcee AI** | Trinity models | Set `ARCEEAI_API_KEY` |
| **GMI Cloud** | Multi-model direct API | Set `GMI_API_KEY` |
| **MiniMax** | International MiniMax endpoint | Set `MINIMAX_API_KEY` |
| **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` |
| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` |
| **Hugging Face** | 20+ open models via unified router (Qwen, DeepSeek, Kimi, etc.) | Set `HF_TOKEN` |
| **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` |
| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
| **Custom Endpoint** | VLLM, SGLang, Ollama, or any OpenAI-compatible API | Set base URL + API key |
For most first-time users: choose a provider, accept the defaults unless you know why you're changing them. The full provider catalog with env vars and setup steps lives on the [Providers](../integrations/providers.md) page.

View File

@ -25,6 +25,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
| **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) |
| **Kimi / Moonshot (China)** | `KIMI_CN_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding-cn`; aliases: `kimi-cn`, `moonshot-cn`) |
| **Arcee AI** | `ARCEEAI_API_KEY` in `~/.hermes/.env` (provider: `arcee`; aliases: `arcee-ai`, `arceeai`) |
| **GMI Cloud** | `GMI_API_KEY` in `~/.hermes/.env` (provider: `gmi`; aliases: `gmi-cloud`, `gmicloud`) |
| **MiniMax** | `MINIMAX_API_KEY` in `~/.hermes/.env` (provider: `minimax`) |
| **MiniMax China** | `MINIMAX_CN_API_KEY` in `~/.hermes/.env` (provider: `minimax-cn`) |
| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`, aliases: `dashscope`, `qwen`) |
@ -250,7 +251,7 @@ model:
| `HERMES_COPILOT_ACP_COMMAND` | Override the Copilot CLI binary path (default: `copilot`) |
| `HERMES_COPILOT_ACP_ARGS` | Override ACP args (default: `--acp --stdio`) |
### First-Class Chinese AI Providers
### First-Class API-Key Providers
These providers have built-in support with dedicated provider IDs. Set the API key and use `--provider` to select:
@ -286,16 +287,21 @@ hermes chat --provider xiaomi --model mimo-v2-pro
# Arcee AI (Trinity models)
hermes chat --provider arcee --model trinity-large-thinking
# Requires: ARCEEAI_API_KEY in ~/.hermes/.env
# GMI Cloud
# Use the exact model ID returned by GMI's /v1/models endpoint.
hermes chat --provider gmi --model zai-org/GLM-5.1-FP8
# Requires: GMI_API_KEY in ~/.hermes/.env
```
Or set the provider permanently in `config.yaml`:
```yaml
model:
provider: "zai" # or: kimi-coding, kimi-coding-cn, minimax, minimax-cn, alibaba, xiaomi, arcee
default: "glm-5"
provider: "gmi"
default: "zai-org/GLM-5.1-FP8"
```
Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, `DASHSCOPE_BASE_URL`, or `XIAOMI_BASE_URL` environment variables.
Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, `DASHSCOPE_BASE_URL`, `XIAOMI_BASE_URL`, or `GMI_BASE_URL` environment variables.
:::note Z.AI Endpoint Auto-Detection
When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoints (global, China, coding variants) to find one that accepts your API key. You don't need to set `GLM_BASE_URL` manually — the working endpoint is detected and cached automatically.

View File

@ -36,6 +36,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
| `KIMI_CN_API_KEY` | Kimi / Moonshot China API key ([moonshot.cn](https://platform.moonshot.cn)) |
| `ARCEEAI_API_KEY` | Arcee AI API key ([chat.arcee.ai](https://chat.arcee.ai/)) |
| `ARCEE_BASE_URL` | Override Arcee base URL (default: `https://api.arcee.ai/api/v1`) |
| `GMI_API_KEY` | GMI Cloud API key ([gmicloud.ai](https://www.gmicloud.ai/)) |
| `GMI_BASE_URL` | Override GMI Cloud base URL (default: `https://api.gmi-serving.com/v1`) |
| `MINIMAX_API_KEY` | MiniMax API key — global endpoint ([minimax.io](https://www.minimax.io)) |
| `MINIMAX_BASE_URL` | Override MiniMax base URL (default: `https://api.minimax.io/anthropic` — Hermes uses MiniMax's Anthropic Messages-compatible endpoint) |
| `MINIMAX_CN_API_KEY` | MiniMax API key — China endpoint ([minimaxi.com](https://www.minimaxi.com)) |
@ -89,7 +91,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
| Variable | Description |
|----------|-------------|
| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
| `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
| `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
| `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |

View File

@ -801,6 +801,17 @@ These options apply to **auxiliary task configs** (`auxiliary:`, `compression:`,
| `"codex"` | Force Codex OAuth (ChatGPT account). Supports vision (gpt-5.3-codex). | `hermes model` → Codex |
| `"main"` | Use your active custom/main endpoint. This can come from `OPENAI_BASE_URL` + `OPENAI_API_KEY` or from a custom endpoint saved via `hermes model` / `config.yaml`. Works with OpenAI, local models, or any OpenAI-compatible API. **Auxiliary tasks only — not valid for `model.provider`.** | Custom endpoint credentials + base URL |
Direct API-key providers from the main provider catalog also work here when you want side tasks to bypass your default router. `gmi` is valid once `GMI_API_KEY` is configured:
```yaml
auxiliary:
compression:
provider: "gmi"
model: "anthropic/claude-opus-4.6"
```
For GMI auxiliary routing, use the exact model ID returned by GMI's `/v1/models` endpoint.
### Common Setups
**Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs):

View File

@ -59,6 +59,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
| Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
| Xiaomi MiMo | `xiaomi` | `XIAOMI_API_KEY` |
| Arcee AI | `arcee` | `ARCEEAI_API_KEY` |
| GMI Cloud | `gmi` | `GMI_API_KEY` |
| Alibaba / DashScope | `alibaba` | `DASHSCOPE_API_KEY` |
| Hugging Face | `huggingface` | `HF_TOKEN` |
| Custom endpoint | `custom` | `base_url` + `key_env` (see below) |