feat(providers): add GMI Cloud as a first-class API-key provider (#11955)
Add GMI Cloud (api.gmi-serving.com) as a full first-class API-key provider with built-in auth, aliases, model catalog, CLI entry points, auxiliary client routing, context length resolution, doctor checks, env var tracking, and docs. - auth.py: ProviderConfig for 'gmi' (api_key, GMI_API_KEY / GMI_BASE_URL) - providers.py: HermesOverlay with extra_env_vars for models.dev detection - models.py: curated slash-form model catalog; live /v1/models fetch - main.py: 'gmi' in _named_custom_provider_map and --provider choices - model_metadata.py: _URL_TO_PROVIDER, _PROVIDER_PREFIXES, dedicated context-length probe block (GMI's /models has authoritative data) - auxiliary_client.py: alias entries; _compat_model fix for slash-form models on cached aggregator-style clients; gmi aux default model - doctor.py: GMI in provider connectivity checks - config.py: GMI_API_KEY / GMI_BASE_URL in OPTIONAL_ENV_VARS - conftest.py: explicit GMI_BASE_URL clearing (not caught by _API_KEY suffix) - docs: providers.md, environment-variables.md, fallback-providers.md, configuration.md, quickstart.md (expands provider table) Co-authored-by: Isaac Huang <isaachuang@Isaacs-MacBook-Pro.local>
This commit is contained in:
parent
41f70e6fc4
commit
c53fcb0173
1
.gitignore
vendored
1
.gitignore
vendored
@ -69,3 +69,4 @@ mini-swe-agent/
|
||||
.nix-stamps/
|
||||
result
|
||||
website/static/api/skills-index.json
|
||||
models-dev-upstream/
|
||||
|
||||
@ -82,6 +82,8 @@ _PROVIDER_ALIASES = {
|
||||
"moonshot": "kimi-coding",
|
||||
"kimi-cn": "kimi-coding-cn",
|
||||
"moonshot-cn": "kimi-coding-cn",
|
||||
"gmi-cloud": "gmi",
|
||||
"gmicloud": "gmi",
|
||||
"minimax-china": "minimax-cn",
|
||||
"minimax_cn": "minimax-cn",
|
||||
"claude": "anthropic",
|
||||
@ -155,6 +157,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"kimi-coding": "kimi-k2-turbo-preview",
|
||||
"stepfun": "step-3.5-flash",
|
||||
"kimi-coding-cn": "kimi-k2-turbo-preview",
|
||||
"gmi": "anthropic/claude-opus-4.6",
|
||||
"minimax": "MiniMax-M2.7",
|
||||
"minimax-cn": "MiniMax-M2.7",
|
||||
"anthropic": "claude-haiku-4-5-20251001",
|
||||
@ -2558,12 +2561,19 @@ def _is_openrouter_client(client: Any) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool:
|
||||
"""Best-effort check for cached clients that accept ``vendor/model`` IDs."""
|
||||
if _is_openrouter_client(client):
|
||||
return True
|
||||
return bool(cached_default and "/" in cached_default)
|
||||
|
||||
|
||||
def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
|
||||
"""Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
|
||||
"""Keep slash-bearing model IDs only for cached clients that support them.
|
||||
|
||||
Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
|
||||
"""
|
||||
if model and "/" in model and not _is_openrouter_client(client):
|
||||
if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default):
|
||||
return cached_default
|
||||
return model or cached_default
|
||||
|
||||
|
||||
@ -51,6 +51,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"qwen-oauth",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"gmi",
|
||||
"custom", "local",
|
||||
# Common aliases
|
||||
"google", "google-gemini", "google-ai-studio",
|
||||
@ -60,6 +61,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"mimo", "xiaomi-mimo",
|
||||
"arcee-ai", "arceeai",
|
||||
"gmi-cloud", "gmicloud",
|
||||
"xai", "x-ai", "x.ai", "grok",
|
||||
"nvidia", "nim", "nvidia-nim", "nemotron",
|
||||
"qwen-portal",
|
||||
@ -307,6 +309,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"integrate.api.nvidia.com": "nvidia",
|
||||
"api.xiaomimimo.com": "xiaomi",
|
||||
"xiaomimimo.com": "xiaomi",
|
||||
"api.gmi-serving.com": "gmi",
|
||||
"ollama.com": "ollama-cloud",
|
||||
}
|
||||
|
||||
@ -702,6 +705,29 @@ def fetch_endpoint_model_metadata(
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_endpoint_context_length(
|
||||
model: str,
|
||||
base_url: str,
|
||||
api_key: str = "",
|
||||
) -> Optional[int]:
|
||||
"""Resolve context length from an endpoint's live ``/models`` metadata."""
|
||||
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
||||
matched = endpoint_metadata.get(model)
|
||||
if not matched:
|
||||
if len(endpoint_metadata) == 1:
|
||||
matched = next(iter(endpoint_metadata.values()))
|
||||
else:
|
||||
for key, entry in endpoint_metadata.items():
|
||||
if model in key or key in model:
|
||||
matched = entry
|
||||
break
|
||||
if matched:
|
||||
context_length = matched.get("context_length")
|
||||
if isinstance(context_length, int):
|
||||
return context_length
|
||||
return None
|
||||
|
||||
|
||||
def _get_context_cache_path() -> Path:
|
||||
"""Return path to the persistent context length cache file."""
|
||||
from hermes_constants import get_hermes_home
|
||||
@ -1295,21 +1321,8 @@ def get_model_context_length(
|
||||
# returns 128k) instead of the model's full context (400k). models.dev
|
||||
# has the correct per-provider values and is checked at step 5+.
|
||||
if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
|
||||
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
||||
matched = endpoint_metadata.get(model)
|
||||
if not matched:
|
||||
# Single-model servers: if only one model is loaded, use it
|
||||
if len(endpoint_metadata) == 1:
|
||||
matched = next(iter(endpoint_metadata.values()))
|
||||
else:
|
||||
# Fuzzy match: substring in either direction
|
||||
for key, entry in endpoint_metadata.items():
|
||||
if model in key or key in model:
|
||||
matched = entry
|
||||
break
|
||||
if matched:
|
||||
context_length = matched.get("context_length")
|
||||
if isinstance(context_length, int):
|
||||
context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
|
||||
if context_length is not None:
|
||||
return context_length
|
||||
if not _is_known_provider_base_url(base_url):
|
||||
# 3. Try querying local server directly
|
||||
@ -1374,6 +1387,12 @@ def get_model_context_length(
|
||||
if base_url:
|
||||
save_context_length(model, base_url, codex_ctx)
|
||||
return codex_ctx
|
||||
if effective_provider == "gmi" and base_url:
|
||||
# GMI exposes authoritative context_length via /models, but it is not
|
||||
# in models.dev yet. Preserve that higher-fidelity endpoint lookup.
|
||||
ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
|
||||
if ctx is not None:
|
||||
return ctx
|
||||
if effective_provider:
|
||||
from agent.models_dev import lookup_models_dev_context
|
||||
ctx = lookup_models_dev_context(effective_provider, model)
|
||||
|
||||
@ -224,6 +224,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=("ARCEEAI_API_KEY",),
|
||||
base_url_env_var="ARCEE_BASE_URL",
|
||||
),
|
||||
"gmi": ProviderConfig(
|
||||
id="gmi",
|
||||
name="GMI Cloud",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://api.gmi-serving.com/v1",
|
||||
api_key_env_vars=("GMI_API_KEY",),
|
||||
base_url_env_var="GMI_BASE_URL",
|
||||
),
|
||||
"minimax": ProviderConfig(
|
||||
id="minimax",
|
||||
name="MiniMax",
|
||||
@ -1120,6 +1128,7 @@ def resolve_provider(
|
||||
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
|
||||
"step": "stepfun", "stepfun-coding-plan": "stepfun",
|
||||
"arcee-ai": "arcee", "arceeai": "arcee",
|
||||
"gmi-cloud": "gmi", "gmicloud": "gmi",
|
||||
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
|
||||
"alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
|
||||
"alibaba_coding_plan": "alibaba-coding-plan",
|
||||
|
||||
@ -1082,6 +1082,7 @@ ENV_VARS_BY_VERSION: Dict[int, List[str]] = {
|
||||
"SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"],
|
||||
10: ["TAVILY_API_KEY"],
|
||||
11: ["TERMINAL_MODAL_MODE"],
|
||||
17: ["GMI_API_KEY", "GMI_BASE_URL"],
|
||||
}
|
||||
|
||||
# Required environment variables with metadata for migration prompts.
|
||||
@ -1254,6 +1255,22 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"GMI_API_KEY": {
|
||||
"description": "GMI Cloud API key",
|
||||
"prompt": "GMI Cloud API key",
|
||||
"url": "https://www.gmicloud.ai/",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"GMI_BASE_URL": {
|
||||
"description": "GMI Cloud base URL override",
|
||||
"prompt": "GMI Cloud base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"MINIMAX_API_KEY": {
|
||||
"description": "MiniMax API key (international)",
|
||||
"prompt": "MiniMax API key",
|
||||
|
||||
@ -46,6 +46,7 @@ _PROVIDER_ENV_HINTS = (
|
||||
"Z_AI_API_KEY",
|
||||
"KIMI_API_KEY",
|
||||
"KIMI_CN_API_KEY",
|
||||
"GMI_API_KEY",
|
||||
"MINIMAX_API_KEY",
|
||||
"MINIMAX_CN_API_KEY",
|
||||
"KILOCODE_API_KEY",
|
||||
@ -937,6 +938,7 @@ def run_doctor(args):
|
||||
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
|
||||
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
|
||||
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
||||
("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
|
||||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
||||
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
|
||||
|
||||
@ -1768,6 +1768,7 @@ def select_provider_and_model(args=None):
|
||||
"huggingface",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"gmi",
|
||||
"nvidia",
|
||||
"ollama-cloud",
|
||||
):
|
||||
@ -7782,6 +7783,7 @@ For more help on a command:
|
||||
"kilocode",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"gmi",
|
||||
"nvidia",
|
||||
],
|
||||
default=None,
|
||||
|
||||
@ -278,6 +278,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"trinity-large-preview",
|
||||
"trinity-mini",
|
||||
],
|
||||
"gmi": [
|
||||
"zai-org/GLM-5.1-FP8",
|
||||
"deepseek-ai/DeepSeek-V3.2",
|
||||
"moonshotai/Kimi-K2.5",
|
||||
"google/gemini-3.1-flash-lite-preview",
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
"openai/gpt-5.4",
|
||||
],
|
||||
"opencode-zen": [
|
||||
"kimi-k2.5",
|
||||
"gpt-5.4-pro",
|
||||
@ -709,7 +717,6 @@ class ProviderEntry(NamedTuple):
|
||||
label: str
|
||||
tui_desc: str # detailed description for `hermes model` TUI
|
||||
|
||||
|
||||
CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
|
||||
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
|
||||
@ -735,6 +742,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
||||
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
|
||||
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
|
||||
ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (multi-model direct API)"),
|
||||
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
|
||||
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
||||
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
||||
@ -769,6 +777,8 @@ _PROVIDER_ALIASES = {
|
||||
"stepfun-coding-plan": "stepfun",
|
||||
"arcee-ai": "arcee",
|
||||
"arceeai": "arcee",
|
||||
"gmi-cloud": "gmi",
|
||||
"gmicloud": "gmi",
|
||||
"minimax-china": "minimax-cn",
|
||||
"minimax_cn": "minimax-cn",
|
||||
"claude": "anthropic",
|
||||
@ -1849,6 +1859,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
return live
|
||||
except Exception:
|
||||
pass
|
||||
if normalized == "gmi":
|
||||
try:
|
||||
from hermes_cli.auth import resolve_api_key_provider_credentials
|
||||
|
||||
creds = resolve_api_key_provider_credentials("gmi")
|
||||
api_key = str(creds.get("api_key") or "").strip()
|
||||
base_url = str(creds.get("base_url") or "").strip()
|
||||
if api_key and base_url:
|
||||
live = fetch_api_models(api_key, base_url)
|
||||
if live:
|
||||
return live
|
||||
except Exception:
|
||||
pass
|
||||
if normalized == "custom":
|
||||
base_url = _get_custom_base_url()
|
||||
if base_url:
|
||||
|
||||
@ -163,6 +163,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
base_url_override="https://api.arcee.ai/api/v1",
|
||||
base_url_env_var="ARCEE_BASE_URL",
|
||||
),
|
||||
"gmi": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
extra_env_vars=("GMI_API_KEY",),
|
||||
base_url_override="https://api.gmi-serving.com/v1",
|
||||
base_url_env_var="GMI_BASE_URL",
|
||||
),
|
||||
"ollama-cloud": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_env_var="OLLAMA_BASE_URL",
|
||||
@ -297,6 +303,10 @@ ALIASES: Dict[str, str] = {
|
||||
"arcee-ai": "arcee",
|
||||
"arceeai": "arcee",
|
||||
|
||||
# gmi
|
||||
"gmi-cloud": "gmi",
|
||||
"gmicloud": "gmi",
|
||||
|
||||
# Local server aliases → virtual "local" concept (resolved via user config)
|
||||
"lmstudio": "lmstudio",
|
||||
"lm-studio": "lmstudio",
|
||||
@ -319,6 +329,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
|
||||
"copilot-acp": "GitHub Copilot ACP",
|
||||
"stepfun": "StepFun Step Plan",
|
||||
"xiaomi": "Xiaomi MiMo",
|
||||
"gmi": "GMI Cloud",
|
||||
"local": "Local endpoint",
|
||||
"bedrock": "AWS Bedrock",
|
||||
"ollama-cloud": "Ollama Cloud",
|
||||
|
||||
@ -516,19 +516,82 @@ class TestGetTextAuxiliaryClient:
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.2-codex"
|
||||
|
||||
def test_returns_none_when_nothing_available(self, monkeypatch):
|
||||
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
||||
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert client is None
|
||||
assert model is None
|
||||
|
||||
class TestNousAuxiliaryRefresh:
|
||||
def test_try_nous_prefers_runtime_credentials(self):
|
||||
fresh_base = "https://inference-api.nousresearch.com/v1"
|
||||
def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self):
|
||||
with patch("agent.auxiliary_client._resolve_custom_runtime",
|
||||
return_value=("https://api.openai.com/v1", "sk-test", "codex_responses")), \
|
||||
patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.3-codex"), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_text_auxiliary_client()
|
||||
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.3-codex"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == "https://api.openai.com/v1"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "sk-test"
|
||||
|
||||
|
||||
class TestVisionClientFallback:
|
||||
"""Vision client auto mode resolves known-good multimodal backends."""
|
||||
|
||||
def test_vision_auto_includes_active_provider_when_configured(self, monkeypatch):
|
||||
"""Active provider appears in available backends when credentials exist."""
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
|
||||
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
||||
patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
|
||||
patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
|
||||
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
|
||||
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
|
||||
):
|
||||
backends = get_available_vision_backends()
|
||||
|
||||
assert "anthropic" in backends
|
||||
|
||||
def test_resolve_provider_client_returns_native_anthropic_wrapper(self, monkeypatch):
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
||||
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
|
||||
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
|
||||
):
|
||||
client, model = resolve_provider_client("anthropic")
|
||||
|
||||
assert client is not None
|
||||
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
|
||||
assert model == "claude-haiku-4-5-20251001"
|
||||
|
||||
|
||||
class TestAuxiliaryPoolAwareness:
|
||||
def test_try_nous_uses_pool_entry(self):
|
||||
class _Entry:
|
||||
access_token = "pooled-access-token"
|
||||
agent_key = "pooled-agent-key"
|
||||
inference_base_url = "https://inference.pool.example/v1"
|
||||
|
||||
class _Pool:
|
||||
def has_credentials(self):
|
||||
return True
|
||||
|
||||
def select(self):
|
||||
return _Entry()
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
|
||||
mock_openai.return_value = MagicMock()
|
||||
client, model = _try_nous()
|
||||
|
||||
assert client is not None
|
||||
@ -643,6 +706,67 @@ class TestNousAuxiliaryRefresh:
|
||||
assert stale_client.chat.completions.create.await_count == 1
|
||||
assert fresh_async_client.chat.completions.create.await_count == 1
|
||||
|
||||
def test_try_nous_pool_entry(self):
|
||||
class _Entry:
|
||||
access_token = "pooled-access-token"
|
||||
agent_key = "pooled-agent-key"
|
||||
inference_base_url = "https://inference.pool.example/v1"
|
||||
|
||||
class _Pool:
|
||||
def has_credentials(self):
|
||||
return True
|
||||
|
||||
def select(self):
|
||||
return _Entry()
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
|
||||
client, model = _try_nous()
|
||||
|
||||
assert client is not None
|
||||
assert model == "gemini-3-flash"
|
||||
call_kwargs = mock_openai.call_args.kwargs
|
||||
assert call_kwargs["api_key"] == "pooled-agent-key"
|
||||
assert call_kwargs["base_url"] == "https://inference.pool.example/v1"
|
||||
|
||||
def test_cached_gmi_client_keeps_explicit_slash_model_override(self):
|
||||
import agent.auxiliary_client as aux
|
||||
|
||||
fake_client = MagicMock()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.resolve_provider_client",
|
||||
return_value=(fake_client, "anthropic/claude-opus-4.6"),
|
||||
) as mock_resolve:
|
||||
aux.shutdown_cached_clients()
|
||||
try:
|
||||
client, model = aux._get_cached_client(
|
||||
"gmi",
|
||||
"anthropic/claude-opus-4.6",
|
||||
base_url="https://api.gmi-serving.com/v1",
|
||||
api_key="gmi-key",
|
||||
)
|
||||
assert client is fake_client
|
||||
assert model == "anthropic/claude-opus-4.6"
|
||||
|
||||
client, model = aux._get_cached_client(
|
||||
"gmi",
|
||||
"openai/gpt-5.4-mini",
|
||||
base_url="https://api.gmi-serving.com/v1",
|
||||
api_key="gmi-key",
|
||||
)
|
||||
finally:
|
||||
aux.shutdown_cached_clients()
|
||||
|
||||
assert client is fake_client
|
||||
assert model == "openai/gpt-5.4-mini"
|
||||
assert mock_resolve.call_count == 1
|
||||
|
||||
|
||||
# ── Payment / credit exhaustion fallback ─────────────────────────────────
|
||||
|
||||
|
||||
|
||||
@ -288,6 +288,10 @@ def _hermetic_environment(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
|
||||
except Exception:
|
||||
pass
|
||||
# Explicitly clear provider-specific base URL overrides that don't match
|
||||
# the generic credential-shaped env-var filter above.
|
||||
monkeypatch.delenv("GMI_API_KEY", raising=False)
|
||||
monkeypatch.delenv("GMI_BASE_URL", raising=False)
|
||||
|
||||
|
||||
# Backward-compat alias — old tests reference this fixture name. Keep it
|
||||
|
||||
@ -42,6 +42,7 @@ class TestProviderRegistry:
|
||||
("minimax-cn", "MiniMax (China)", "api_key"),
|
||||
("ai-gateway", "Vercel AI Gateway", "api_key"),
|
||||
("kilocode", "Kilo Code", "api_key"),
|
||||
("gmi", "GMI Cloud", "api_key"),
|
||||
])
|
||||
def test_provider_registered(self, provider_id, name, auth_type):
|
||||
assert provider_id in PROVIDER_REGISTRY
|
||||
@ -106,6 +107,11 @@ class TestProviderRegistry:
|
||||
assert pconfig.api_key_env_vars == ("KILOCODE_API_KEY",)
|
||||
assert pconfig.base_url_env_var == "KILOCODE_BASE_URL"
|
||||
|
||||
def test_gmi_env_vars(self):
|
||||
pconfig = PROVIDER_REGISTRY["gmi"]
|
||||
assert pconfig.api_key_env_vars == ("GMI_API_KEY",)
|
||||
assert pconfig.base_url_env_var == "GMI_BASE_URL"
|
||||
|
||||
def test_huggingface_env_vars(self):
|
||||
pconfig = PROVIDER_REGISTRY["huggingface"]
|
||||
assert pconfig.api_key_env_vars == ("HF_TOKEN",)
|
||||
@ -121,6 +127,7 @@ class TestProviderRegistry:
|
||||
assert PROVIDER_REGISTRY["minimax-cn"].inference_base_url == "https://api.minimaxi.com/anthropic"
|
||||
assert PROVIDER_REGISTRY["ai-gateway"].inference_base_url == "https://ai-gateway.vercel.sh/v1"
|
||||
assert PROVIDER_REGISTRY["kilocode"].inference_base_url == "https://api.kilo.ai/api/gateway"
|
||||
assert PROVIDER_REGISTRY["gmi"].inference_base_url == "https://api.gmi-serving.com/v1"
|
||||
assert PROVIDER_REGISTRY["huggingface"].inference_base_url == "https://router.huggingface.co/v1"
|
||||
|
||||
def test_oauth_providers_unchanged(self):
|
||||
@ -143,6 +150,7 @@ PROVIDER_ENV_VARS = (
|
||||
"MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
|
||||
"AI_GATEWAY_API_KEY", "AI_GATEWAY_BASE_URL",
|
||||
"KILOCODE_API_KEY", "KILOCODE_BASE_URL",
|
||||
"GMI_API_KEY", "GMI_BASE_URL",
|
||||
"DASHSCOPE_API_KEY", "OPENCODE_ZEN_API_KEY", "OPENCODE_GO_API_KEY",
|
||||
"NOUS_API_KEY", "GITHUB_TOKEN", "GH_TOKEN",
|
||||
"OPENAI_BASE_URL", "HERMES_COPILOT_ACP_COMMAND", "COPILOT_CLI_PATH",
|
||||
@ -178,6 +186,9 @@ class TestResolveProvider:
|
||||
def test_explicit_ai_gateway(self):
|
||||
assert resolve_provider("ai-gateway") == "ai-gateway"
|
||||
|
||||
def test_explicit_gmi(self):
|
||||
assert resolve_provider("gmi") == "gmi"
|
||||
|
||||
def test_alias_glm(self):
|
||||
assert resolve_provider("glm") == "zai"
|
||||
|
||||
@ -205,6 +216,9 @@ class TestResolveProvider:
|
||||
def test_alias_vercel(self):
|
||||
assert resolve_provider("vercel") == "ai-gateway"
|
||||
|
||||
def test_alias_gmi_cloud(self):
|
||||
assert resolve_provider("gmi-cloud") == "gmi"
|
||||
|
||||
def test_explicit_kilocode(self):
|
||||
assert resolve_provider("kilocode") == "kilocode"
|
||||
|
||||
@ -280,6 +294,10 @@ class TestResolveProvider:
|
||||
monkeypatch.setenv("AI_GATEWAY_API_KEY", "test-gw-key")
|
||||
assert resolve_provider("auto") == "ai-gateway"
|
||||
|
||||
def test_auto_detects_gmi_key(self, monkeypatch):
|
||||
monkeypatch.setenv("GMI_API_KEY", "test-gmi-key")
|
||||
assert resolve_provider("auto") == "gmi"
|
||||
|
||||
def test_auto_detects_kilocode_key(self, monkeypatch):
|
||||
monkeypatch.setenv("KILOCODE_API_KEY", "test-kilo-key")
|
||||
assert resolve_provider("auto") == "kilocode"
|
||||
@ -497,6 +515,19 @@ class TestResolveApiKeyProviderCredentials:
|
||||
assert creds["api_key"] == "kilo-secret-key"
|
||||
assert creds["base_url"] == "https://api.kilo.ai/api/gateway"
|
||||
|
||||
def test_resolve_gmi_with_key(self, monkeypatch):
|
||||
monkeypatch.setenv("GMI_API_KEY", "gmi-secret-key")
|
||||
creds = resolve_api_key_provider_credentials("gmi")
|
||||
assert creds["provider"] == "gmi"
|
||||
assert creds["api_key"] == "gmi-secret-key"
|
||||
assert creds["base_url"] == "https://api.gmi-serving.com/v1"
|
||||
|
||||
def test_resolve_gmi_custom_base_url(self, monkeypatch):
|
||||
monkeypatch.setenv("GMI_API_KEY", "gmi-key")
|
||||
monkeypatch.setenv("GMI_BASE_URL", "https://custom.gmi.example/v1")
|
||||
creds = resolve_api_key_provider_credentials("gmi")
|
||||
assert creds["base_url"] == "https://custom.gmi.example/v1"
|
||||
|
||||
def test_resolve_kilocode_custom_base_url(self, monkeypatch):
|
||||
monkeypatch.setenv("KILOCODE_API_KEY", "kilo-key")
|
||||
monkeypatch.setenv("KILOCODE_BASE_URL", "https://custom.kilo.example/v1")
|
||||
@ -594,6 +625,15 @@ class TestRuntimeProviderResolution:
|
||||
assert result["api_key"] == "kilo-key"
|
||||
assert "kilo.ai" in result["base_url"]
|
||||
|
||||
def test_runtime_gmi(self, monkeypatch):
|
||||
monkeypatch.setenv("GMI_API_KEY", "gmi-key")
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
result = resolve_runtime_provider(requested="gmi")
|
||||
assert result["provider"] == "gmi"
|
||||
assert result["api_mode"] == "chat_completions"
|
||||
assert result["api_key"] == "gmi-key"
|
||||
assert result["base_url"] == "https://api.gmi-serving.com/v1"
|
||||
|
||||
def test_runtime_auto_detects_api_key_provider(self, monkeypatch):
|
||||
monkeypatch.setenv("KIMI_API_KEY", "auto-kimi-key")
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
363
tests/hermes_cli/test_gmi_provider.py
Normal file
363
tests/hermes_cli/test_gmi_provider.py
Normal file
@ -0,0 +1,363 @@
|
||||
"""Focused tests for GMI Cloud first-class provider wiring."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import sys
|
||||
import types
|
||||
from argparse import Namespace
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
if "dotenv" not in sys.modules:
|
||||
fake_dotenv = types.ModuleType("dotenv")
|
||||
fake_dotenv.load_dotenv = lambda *args, **kwargs: None
|
||||
sys.modules["dotenv"] = fake_dotenv
|
||||
|
||||
from hermes_cli.auth import resolve_provider
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.models import (
|
||||
CANONICAL_PROVIDERS,
|
||||
_PROVIDER_LABELS,
|
||||
_PROVIDER_MODELS,
|
||||
normalize_provider,
|
||||
provider_model_ids,
|
||||
)
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_provider_env(monkeypatch):
|
||||
for key in (
|
||||
"OPENROUTER_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"ANTHROPIC_API_KEY",
|
||||
"GOOGLE_API_KEY",
|
||||
"GLM_API_KEY",
|
||||
"KIMI_API_KEY",
|
||||
"MINIMAX_API_KEY",
|
||||
"GMI_API_KEY",
|
||||
"GMI_BASE_URL",
|
||||
):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
||||
|
||||
class TestGmiAliases:
|
||||
@pytest.mark.parametrize("alias", ["gmi", "gmi-cloud", "gmicloud"])
|
||||
def test_alias_resolves(self, alias, monkeypatch):
|
||||
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
||||
assert resolve_provider(alias) == "gmi"
|
||||
|
||||
def test_models_normalize_provider(self):
|
||||
assert normalize_provider("gmi-cloud") == "gmi"
|
||||
assert normalize_provider("gmicloud") == "gmi"
|
||||
|
||||
def test_providers_normalize_provider(self):
|
||||
from hermes_cli.providers import normalize_provider as normalize_provider_in_providers
|
||||
|
||||
assert normalize_provider_in_providers("gmi-cloud") == "gmi"
|
||||
assert normalize_provider_in_providers("gmicloud") == "gmi"
|
||||
|
||||
|
||||
class TestGmiConfigRegistry:
|
||||
def test_optional_env_vars_include_gmi(self):
|
||||
from hermes_cli.config import ENV_VARS_BY_VERSION, OPTIONAL_ENV_VARS
|
||||
|
||||
assert "GMI_API_KEY" in OPTIONAL_ENV_VARS
|
||||
assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["category"] == "provider"
|
||||
assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["password"] is True
|
||||
assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["url"] == "https://www.gmicloud.ai/"
|
||||
|
||||
assert "GMI_BASE_URL" in OPTIONAL_ENV_VARS
|
||||
assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["category"] == "provider"
|
||||
assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["password"] is False
|
||||
|
||||
assert "GMI_API_KEY" in ENV_VARS_BY_VERSION[17]
|
||||
assert "GMI_BASE_URL" in ENV_VARS_BY_VERSION[17]
|
||||
|
||||
|
||||
class TestGmiModelCatalog:
|
||||
def test_static_model_fallback_exists(self):
|
||||
assert "gmi" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["gmi"]
|
||||
assert "zai-org/GLM-5.1-FP8" in models
|
||||
assert "deepseek-ai/DeepSeek-V3.2" in models
|
||||
assert "moonshotai/Kimi-K2.5" in models
|
||||
assert "anthropic/claude-sonnet-4.6" in models
|
||||
|
||||
def test_canonical_provider_entry(self):
|
||||
slugs = [p.slug for p in CANONICAL_PROVIDERS]
|
||||
assert "gmi" in slugs
|
||||
|
||||
def test_provider_model_ids_prefers_live_api(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.auth.resolve_api_key_provider_credentials",
|
||||
lambda provider_id: {
|
||||
"provider": provider_id,
|
||||
"api_key": "gmi-live-key",
|
||||
"base_url": "https://api.gmi-serving.com/v1",
|
||||
"source": "GMI_API_KEY",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.models.fetch_api_models",
|
||||
lambda api_key, base_url: [
|
||||
"openai/gpt-5.4-mini",
|
||||
"zai-org/GLM-5.1-FP8",
|
||||
],
|
||||
)
|
||||
|
||||
assert provider_model_ids("gmi") == [
|
||||
"openai/gpt-5.4-mini",
|
||||
"zai-org/GLM-5.1-FP8",
|
||||
]
|
||||
|
||||
def test_provider_model_ids_falls_back_to_static_models(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.auth.resolve_api_key_provider_credentials",
|
||||
lambda provider_id: {
|
||||
"provider": provider_id,
|
||||
"api_key": "gmi-live-key",
|
||||
"base_url": "https://api.gmi-serving.com/v1",
|
||||
"source": "GMI_API_KEY",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr("hermes_cli.models.fetch_api_models", lambda api_key, base_url: None)
|
||||
|
||||
assert provider_model_ids("gmi") == list(_PROVIDER_MODELS["gmi"])
|
||||
|
||||
|
||||
class TestGmiProvidersModule:
|
||||
def test_overlay_exists(self):
|
||||
from hermes_cli.providers import HERMES_OVERLAYS
|
||||
|
||||
assert "gmi" in HERMES_OVERLAYS
|
||||
overlay = HERMES_OVERLAYS["gmi"]
|
||||
assert overlay.transport == "openai_chat"
|
||||
assert overlay.extra_env_vars == ("GMI_API_KEY",)
|
||||
assert overlay.base_url_override == "https://api.gmi-serving.com/v1"
|
||||
assert overlay.base_url_env_var == "GMI_BASE_URL"
|
||||
assert not overlay.is_aggregator
|
||||
|
||||
def test_provider_label(self):
|
||||
assert _PROVIDER_LABELS["gmi"] == "GMI Cloud"
|
||||
|
||||
|
||||
class TestGmiDoctor:
|
||||
def test_provider_env_hints_include_gmi(self):
|
||||
from hermes_cli.doctor import _PROVIDER_ENV_HINTS
|
||||
|
||||
assert "GMI_API_KEY" in _PROVIDER_ENV_HINTS
|
||||
|
||||
def test_run_doctor_checks_gmi_models_endpoint(self, monkeypatch, tmp_path):
|
||||
from hermes_cli import doctor as doctor_mod
|
||||
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir(parents=True, exist_ok=True)
|
||||
(home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
|
||||
(home / ".env").write_text("GMI_API_KEY=gmi-test-key\n", encoding="utf-8")
|
||||
project = tmp_path / "project"
|
||||
project.mkdir(exist_ok=True)
|
||||
|
||||
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
|
||||
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
|
||||
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
|
||||
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
||||
|
||||
for env_name in (
|
||||
"OPENROUTER_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_TOKEN",
|
||||
"GLM_API_KEY",
|
||||
"ZAI_API_KEY",
|
||||
"Z_AI_API_KEY",
|
||||
"KIMI_API_KEY",
|
||||
"KIMI_CN_API_KEY",
|
||||
"ARCEEAI_API_KEY",
|
||||
"DEEPSEEK_API_KEY",
|
||||
"HF_TOKEN",
|
||||
"DASHSCOPE_API_KEY",
|
||||
"MINIMAX_API_KEY",
|
||||
"MINIMAX_CN_API_KEY",
|
||||
"AI_GATEWAY_API_KEY",
|
||||
"KILOCODE_API_KEY",
|
||||
"OPENCODE_ZEN_API_KEY",
|
||||
"OPENCODE_GO_API_KEY",
|
||||
"XIAOMI_API_KEY",
|
||||
):
|
||||
monkeypatch.delenv(env_name, raising=False)
|
||||
|
||||
fake_model_tools = types.SimpleNamespace(
|
||||
check_tool_availability=lambda *a, **kw: ([], []),
|
||||
TOOLSET_REQUIREMENTS={},
|
||||
)
|
||||
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
|
||||
|
||||
try:
|
||||
from hermes_cli import auth as _auth_mod
|
||||
|
||||
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
|
||||
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_get(url, headers=None, timeout=None):
|
||||
calls.append((url, headers, timeout))
|
||||
return types.SimpleNamespace(status_code=200)
|
||||
|
||||
import httpx
|
||||
|
||||
monkeypatch.setattr(httpx, "get", fake_get)
|
||||
|
||||
buf = io.StringIO()
|
||||
with contextlib.redirect_stdout(buf):
|
||||
doctor_mod.run_doctor(Namespace(fix=False))
|
||||
out = buf.getvalue()
|
||||
|
||||
assert "API key or custom endpoint configured" in out
|
||||
assert "GMI Cloud" in out
|
||||
assert any(url == "https://api.gmi-serving.com/v1/models" for url, _, _ in calls)
|
||||
|
||||
|
||||
class TestGmiModelMetadata:
|
||||
def test_url_to_provider(self):
|
||||
from agent.model_metadata import _URL_TO_PROVIDER
|
||||
|
||||
assert _URL_TO_PROVIDER.get("api.gmi-serving.com") == "gmi"
|
||||
|
||||
def test_provider_prefixes(self):
|
||||
from agent.model_metadata import _PROVIDER_PREFIXES
|
||||
|
||||
assert "gmi" in _PROVIDER_PREFIXES
|
||||
assert "gmi-cloud" in _PROVIDER_PREFIXES
|
||||
assert "gmicloud" in _PROVIDER_PREFIXES
|
||||
|
||||
def test_infer_from_url(self):
|
||||
from agent.model_metadata import _infer_provider_from_url
|
||||
|
||||
assert _infer_provider_from_url("https://api.gmi-serving.com/v1") == "gmi"
|
||||
|
||||
def test_known_gmi_endpoint_still_uses_endpoint_metadata(self):
|
||||
with patch(
|
||||
"agent.model_metadata.get_cached_context_length",
|
||||
return_value=None,
|
||||
), patch(
|
||||
"agent.model_metadata.fetch_endpoint_model_metadata",
|
||||
return_value={"anthropic/claude-opus-4.6": {"context_length": 409600}},
|
||||
), patch(
|
||||
"agent.models_dev.lookup_models_dev_context",
|
||||
return_value=None,
|
||||
), patch(
|
||||
"agent.model_metadata.fetch_model_metadata",
|
||||
return_value={},
|
||||
):
|
||||
result = get_model_context_length(
|
||||
"anthropic/claude-opus-4.6",
|
||||
base_url="https://api.gmi-serving.com/v1",
|
||||
api_key="gmi-test-key",
|
||||
provider="custom",
|
||||
)
|
||||
|
||||
assert result == 409600
|
||||
|
||||
|
||||
class TestGmiAuxiliary:
|
||||
def test_aux_default_model(self):
|
||||
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
|
||||
|
||||
assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "anthropic/claude-opus-4.6"
|
||||
|
||||
def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
|
||||
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
||||
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
mock_openai.return_value = object()
|
||||
client, model = resolve_provider_client("gmi")
|
||||
|
||||
assert client is not None
|
||||
assert model == "anthropic/claude-opus-4.6"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "gmi-test-key"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == "https://api.gmi-serving.com/v1"
|
||||
|
||||
def test_resolve_provider_client_accepts_gmi_alias(self, monkeypatch):
|
||||
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
||||
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
mock_openai.return_value = object()
|
||||
client, model = resolve_provider_client("gmi-cloud")
|
||||
|
||||
assert client is not None
|
||||
assert model == "anthropic/claude-opus-4.6"
|
||||
|
||||
|
||||
class TestGmiMainFlow:
|
||||
def test_chat_parser_accepts_gmi_provider(self, monkeypatch):
|
||||
recorded: dict[str, str] = {}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.config.get_container_exec_info", lambda: None)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.main.cmd_chat",
|
||||
lambda args: recorded.setdefault("provider", args.provider),
|
||||
)
|
||||
monkeypatch.setattr(sys, "argv", ["hermes", "chat", "--provider", "gmi"])
|
||||
|
||||
from hermes_cli.main import main
|
||||
|
||||
main()
|
||||
|
||||
assert recorded["provider"] == "gmi"
|
||||
|
||||
def test_select_provider_and_model_routes_gmi_to_generic_flow(self, monkeypatch):
|
||||
recorded: dict[str, str] = {}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.auth.resolve_provider", lambda *args, **kwargs: None)
|
||||
|
||||
def fake_prompt_provider_choice(choices, default=0):
|
||||
return next(i for i, label in enumerate(choices) if label.startswith("GMI Cloud"))
|
||||
|
||||
def fake_model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
recorded["provider_id"] = provider_id
|
||||
|
||||
monkeypatch.setattr("hermes_cli.main._prompt_provider_choice", fake_prompt_provider_choice)
|
||||
monkeypatch.setattr("hermes_cli.main._model_flow_api_key_provider", fake_model_flow_api_key_provider)
|
||||
|
||||
from hermes_cli.main import select_provider_and_model
|
||||
|
||||
select_provider_and_model()
|
||||
|
||||
assert recorded["provider_id"] == "gmi"
|
||||
|
||||
def test_model_flow_api_key_provider_persists_gmi_selection(self, monkeypatch):
|
||||
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
||||
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_api_models",
|
||||
return_value=["zai-org/GLM-5.1-FP8", "openai/gpt-5.4-mini"],
|
||||
), patch(
|
||||
"hermes_cli.auth._prompt_model_selection",
|
||||
return_value="openai/gpt-5.4-mini",
|
||||
), patch(
|
||||
"hermes_cli.auth.deactivate_provider",
|
||||
), patch(
|
||||
"builtins.input",
|
||||
return_value="",
|
||||
):
|
||||
from hermes_cli.main import _model_flow_api_key_provider
|
||||
|
||||
_model_flow_api_key_provider(load_config(), "gmi", "old-model")
|
||||
|
||||
import yaml
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
config = yaml.safe_load((get_hermes_home() / "config.yaml").read_text()) or {}
|
||||
model_cfg = config.get("model")
|
||||
assert isinstance(model_cfg, dict)
|
||||
assert model_cfg["provider"] == "gmi"
|
||||
assert model_cfg["default"] == "openai/gpt-5.4-mini"
|
||||
assert model_cfg["base_url"] == "https://api.gmi-serving.com/v1"
|
||||
@ -66,13 +66,30 @@ hermes model
|
||||
|
||||
Good defaults:
|
||||
|
||||
| Situation | Recommended path |
|
||||
|---|---|
|
||||
| Least friction | Nous Portal or OpenRouter |
|
||||
| You already have Claude or Codex auth | Anthropic or OpenAI Codex |
|
||||
| You want local/private inference | Ollama or any custom OpenAI-compatible endpoint |
|
||||
| You want multi-provider routing | OpenRouter |
|
||||
| You have a custom GPU server | vLLM, SGLang, LiteLLM, or any OpenAI-compatible endpoint |
|
||||
| Provider | What it is | How to set up |
|
||||
|----------|-----------|---------------|
|
||||
| **Nous Portal** | Subscription-based, zero-config | OAuth login via `hermes model` |
|
||||
| **OpenAI Codex** | ChatGPT OAuth, uses Codex models | Device code auth via `hermes model` |
|
||||
| **Anthropic** | Claude models directly (Pro/Max or API key) | `hermes model` with Claude Code auth, or an Anthropic API key |
|
||||
| **OpenRouter** | Multi-provider routing across many models | Enter your API key |
|
||||
| **Z.AI** | GLM / Zhipu-hosted models | Set `GLM_API_KEY` / `ZAI_API_KEY` |
|
||||
| **Kimi / Moonshot** | Moonshot-hosted coding and chat models | Set `KIMI_API_KEY` |
|
||||
| **Kimi / Moonshot China** | China-region Moonshot endpoint | Set `KIMI_CN_API_KEY` |
|
||||
| **Arcee AI** | Trinity models | Set `ARCEEAI_API_KEY` |
|
||||
| **GMI Cloud** | Multi-model direct API | Set `GMI_API_KEY` |
|
||||
| **MiniMax** | International MiniMax endpoint | Set `MINIMAX_API_KEY` |
|
||||
| **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` |
|
||||
| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` |
|
||||
| **Hugging Face** | 20+ open models via unified router (Qwen, DeepSeek, Kimi, etc.) | Set `HF_TOKEN` |
|
||||
| **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` |
|
||||
| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
|
||||
| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
|
||||
| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
|
||||
| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
|
||||
| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
|
||||
| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
|
||||
| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
|
||||
| **Custom Endpoint** | VLLM, SGLang, Ollama, or any OpenAI-compatible API | Set base URL + API key |
|
||||
|
||||
For most first-time users: choose a provider, accept the defaults unless you know why you're changing them. The full provider catalog with env vars and setup steps lives on the [Providers](../integrations/providers.md) page.
|
||||
|
||||
|
||||
@ -25,6 +25,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
|
||||
| **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) |
|
||||
| **Kimi / Moonshot (China)** | `KIMI_CN_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding-cn`; aliases: `kimi-cn`, `moonshot-cn`) |
|
||||
| **Arcee AI** | `ARCEEAI_API_KEY` in `~/.hermes/.env` (provider: `arcee`; aliases: `arcee-ai`, `arceeai`) |
|
||||
| **GMI Cloud** | `GMI_API_KEY` in `~/.hermes/.env` (provider: `gmi`; aliases: `gmi-cloud`, `gmicloud`) |
|
||||
| **MiniMax** | `MINIMAX_API_KEY` in `~/.hermes/.env` (provider: `minimax`) |
|
||||
| **MiniMax China** | `MINIMAX_CN_API_KEY` in `~/.hermes/.env` (provider: `minimax-cn`) |
|
||||
| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`, aliases: `dashscope`, `qwen`) |
|
||||
@ -250,7 +251,7 @@ model:
|
||||
| `HERMES_COPILOT_ACP_COMMAND` | Override the Copilot CLI binary path (default: `copilot`) |
|
||||
| `HERMES_COPILOT_ACP_ARGS` | Override ACP args (default: `--acp --stdio`) |
|
||||
|
||||
### First-Class Chinese AI Providers
|
||||
### First-Class API-Key Providers
|
||||
|
||||
These providers have built-in support with dedicated provider IDs. Set the API key and use `--provider` to select:
|
||||
|
||||
@ -286,16 +287,21 @@ hermes chat --provider xiaomi --model mimo-v2-pro
|
||||
# Arcee AI (Trinity models)
|
||||
hermes chat --provider arcee --model trinity-large-thinking
|
||||
# Requires: ARCEEAI_API_KEY in ~/.hermes/.env
|
||||
|
||||
# GMI Cloud
|
||||
# Use the exact model ID returned by GMI's /v1/models endpoint.
|
||||
hermes chat --provider gmi --model zai-org/GLM-5.1-FP8
|
||||
# Requires: GMI_API_KEY in ~/.hermes/.env
|
||||
```
|
||||
|
||||
Or set the provider permanently in `config.yaml`:
|
||||
```yaml
|
||||
model:
|
||||
provider: "zai" # or: kimi-coding, kimi-coding-cn, minimax, minimax-cn, alibaba, xiaomi, arcee
|
||||
default: "glm-5"
|
||||
provider: "gmi"
|
||||
default: "zai-org/GLM-5.1-FP8"
|
||||
```
|
||||
|
||||
Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, `DASHSCOPE_BASE_URL`, or `XIAOMI_BASE_URL` environment variables.
|
||||
Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, `DASHSCOPE_BASE_URL`, `XIAOMI_BASE_URL`, or `GMI_BASE_URL` environment variables.
|
||||
|
||||
:::note Z.AI Endpoint Auto-Detection
|
||||
When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoints (global, China, coding variants) to find one that accepts your API key. You don't need to set `GLM_BASE_URL` manually — the working endpoint is detected and cached automatically.
|
||||
|
||||
@ -36,6 +36,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
||||
| `KIMI_CN_API_KEY` | Kimi / Moonshot China API key ([moonshot.cn](https://platform.moonshot.cn)) |
|
||||
| `ARCEEAI_API_KEY` | Arcee AI API key ([chat.arcee.ai](https://chat.arcee.ai/)) |
|
||||
| `ARCEE_BASE_URL` | Override Arcee base URL (default: `https://api.arcee.ai/api/v1`) |
|
||||
| `GMI_API_KEY` | GMI Cloud API key ([gmicloud.ai](https://www.gmicloud.ai/)) |
|
||||
| `GMI_BASE_URL` | Override GMI Cloud base URL (default: `https://api.gmi-serving.com/v1`) |
|
||||
| `MINIMAX_API_KEY` | MiniMax API key — global endpoint ([minimax.io](https://www.minimax.io)) |
|
||||
| `MINIMAX_BASE_URL` | Override MiniMax base URL (default: `https://api.minimax.io/anthropic` — Hermes uses MiniMax's Anthropic Messages-compatible endpoint) |
|
||||
| `MINIMAX_CN_API_KEY` | MiniMax API key — China endpoint ([minimaxi.com](https://www.minimaxi.com)) |
|
||||
@ -89,7 +91,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
|
||||
| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
|
||||
| `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
|
||||
| `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
|
||||
| `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
|
||||
|
||||
@ -801,6 +801,17 @@ These options apply to **auxiliary task configs** (`auxiliary:`, `compression:`,
|
||||
| `"codex"` | Force Codex OAuth (ChatGPT account). Supports vision (gpt-5.3-codex). | `hermes model` → Codex |
|
||||
| `"main"` | Use your active custom/main endpoint. This can come from `OPENAI_BASE_URL` + `OPENAI_API_KEY` or from a custom endpoint saved via `hermes model` / `config.yaml`. Works with OpenAI, local models, or any OpenAI-compatible API. **Auxiliary tasks only — not valid for `model.provider`.** | Custom endpoint credentials + base URL |
|
||||
|
||||
Direct API-key providers from the main provider catalog also work here when you want side tasks to bypass your default router. `gmi` is valid once `GMI_API_KEY` is configured:
|
||||
|
||||
```yaml
|
||||
auxiliary:
|
||||
compression:
|
||||
provider: "gmi"
|
||||
model: "anthropic/claude-opus-4.6"
|
||||
```
|
||||
|
||||
For GMI auxiliary routing, use the exact model ID returned by GMI's `/v1/models` endpoint.
|
||||
|
||||
### Common Setups
|
||||
|
||||
**Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs):
|
||||
|
||||
@ -59,6 +59,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
|
||||
| Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
|
||||
| Xiaomi MiMo | `xiaomi` | `XIAOMI_API_KEY` |
|
||||
| Arcee AI | `arcee` | `ARCEEAI_API_KEY` |
|
||||
| GMI Cloud | `gmi` | `GMI_API_KEY` |
|
||||
| Alibaba / DashScope | `alibaba` | `DASHSCOPE_API_KEY` |
|
||||
| Hugging Face | `huggingface` | `HF_TOKEN` |
|
||||
| Custom endpoint | `custom` | `base_url` + `key_env` (see below) |
|
||||
|
||||
Loading…
Reference in New Issue
Block a user