feat(providers): add GMI Cloud as a first-class API-key provider (#11955)
Add GMI Cloud (api.gmi-serving.com) as a full first-class API-key provider with built-in auth, aliases, model catalog, CLI entry points, auxiliary client routing, context length resolution, doctor checks, env var tracking, and docs. - auth.py: ProviderConfig for 'gmi' (api_key, GMI_API_KEY / GMI_BASE_URL) - providers.py: HermesOverlay with extra_env_vars for models.dev detection - models.py: curated slash-form model catalog; live /v1/models fetch - main.py: 'gmi' in _named_custom_provider_map and --provider choices - model_metadata.py: _URL_TO_PROVIDER, _PROVIDER_PREFIXES, dedicated context-length probe block (GMI's /models has authoritative data) - auxiliary_client.py: alias entries; _compat_model fix for slash-form models on cached aggregator-style clients; gmi aux default model - doctor.py: GMI in provider connectivity checks - config.py: GMI_API_KEY / GMI_BASE_URL in OPTIONAL_ENV_VARS - conftest.py: explicit GMI_BASE_URL clearing (not caught by _API_KEY suffix) - docs: providers.md, environment-variables.md, fallback-providers.md, configuration.md, quickstart.md (expands provider table) Co-authored-by: Isaac Huang <isaachuang@Isaacs-MacBook-Pro.local>
This commit is contained in:
parent
41f70e6fc4
commit
c53fcb0173
1
.gitignore
vendored
1
.gitignore
vendored
@ -69,3 +69,4 @@ mini-swe-agent/
|
|||||||
.nix-stamps/
|
.nix-stamps/
|
||||||
result
|
result
|
||||||
website/static/api/skills-index.json
|
website/static/api/skills-index.json
|
||||||
|
models-dev-upstream/
|
||||||
|
|||||||
@ -82,6 +82,8 @@ _PROVIDER_ALIASES = {
|
|||||||
"moonshot": "kimi-coding",
|
"moonshot": "kimi-coding",
|
||||||
"kimi-cn": "kimi-coding-cn",
|
"kimi-cn": "kimi-coding-cn",
|
||||||
"moonshot-cn": "kimi-coding-cn",
|
"moonshot-cn": "kimi-coding-cn",
|
||||||
|
"gmi-cloud": "gmi",
|
||||||
|
"gmicloud": "gmi",
|
||||||
"minimax-china": "minimax-cn",
|
"minimax-china": "minimax-cn",
|
||||||
"minimax_cn": "minimax-cn",
|
"minimax_cn": "minimax-cn",
|
||||||
"claude": "anthropic",
|
"claude": "anthropic",
|
||||||
@ -155,6 +157,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
|||||||
"kimi-coding": "kimi-k2-turbo-preview",
|
"kimi-coding": "kimi-k2-turbo-preview",
|
||||||
"stepfun": "step-3.5-flash",
|
"stepfun": "step-3.5-flash",
|
||||||
"kimi-coding-cn": "kimi-k2-turbo-preview",
|
"kimi-coding-cn": "kimi-k2-turbo-preview",
|
||||||
|
"gmi": "anthropic/claude-opus-4.6",
|
||||||
"minimax": "MiniMax-M2.7",
|
"minimax": "MiniMax-M2.7",
|
||||||
"minimax-cn": "MiniMax-M2.7",
|
"minimax-cn": "MiniMax-M2.7",
|
||||||
"anthropic": "claude-haiku-4-5-20251001",
|
"anthropic": "claude-haiku-4-5-20251001",
|
||||||
@ -2558,12 +2561,19 @@ def _is_openrouter_client(client: Any) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool:
|
||||||
|
"""Best-effort check for cached clients that accept ``vendor/model`` IDs."""
|
||||||
|
if _is_openrouter_client(client):
|
||||||
|
return True
|
||||||
|
return bool(cached_default and "/" in cached_default)
|
||||||
|
|
||||||
|
|
||||||
def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
|
def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
|
||||||
"""Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
|
"""Keep slash-bearing model IDs only for cached clients that support them.
|
||||||
|
|
||||||
Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
|
Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
|
||||||
"""
|
"""
|
||||||
if model and "/" in model and not _is_openrouter_client(client):
|
if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default):
|
||||||
return cached_default
|
return cached_default
|
||||||
return model or cached_default
|
return model or cached_default
|
||||||
|
|
||||||
|
|||||||
@ -51,6 +51,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
|||||||
"qwen-oauth",
|
"qwen-oauth",
|
||||||
"xiaomi",
|
"xiaomi",
|
||||||
"arcee",
|
"arcee",
|
||||||
|
"gmi",
|
||||||
"custom", "local",
|
"custom", "local",
|
||||||
# Common aliases
|
# Common aliases
|
||||||
"google", "google-gemini", "google-ai-studio",
|
"google", "google-gemini", "google-ai-studio",
|
||||||
@ -60,6 +61,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
|||||||
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||||
"mimo", "xiaomi-mimo",
|
"mimo", "xiaomi-mimo",
|
||||||
"arcee-ai", "arceeai",
|
"arcee-ai", "arceeai",
|
||||||
|
"gmi-cloud", "gmicloud",
|
||||||
"xai", "x-ai", "x.ai", "grok",
|
"xai", "x-ai", "x.ai", "grok",
|
||||||
"nvidia", "nim", "nvidia-nim", "nemotron",
|
"nvidia", "nim", "nvidia-nim", "nemotron",
|
||||||
"qwen-portal",
|
"qwen-portal",
|
||||||
@ -307,6 +309,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
|||||||
"integrate.api.nvidia.com": "nvidia",
|
"integrate.api.nvidia.com": "nvidia",
|
||||||
"api.xiaomimimo.com": "xiaomi",
|
"api.xiaomimimo.com": "xiaomi",
|
||||||
"xiaomimimo.com": "xiaomi",
|
"xiaomimimo.com": "xiaomi",
|
||||||
|
"api.gmi-serving.com": "gmi",
|
||||||
"ollama.com": "ollama-cloud",
|
"ollama.com": "ollama-cloud",
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -702,6 +705,29 @@ def fetch_endpoint_model_metadata(
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_endpoint_context_length(
|
||||||
|
model: str,
|
||||||
|
base_url: str,
|
||||||
|
api_key: str = "",
|
||||||
|
) -> Optional[int]:
|
||||||
|
"""Resolve context length from an endpoint's live ``/models`` metadata."""
|
||||||
|
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
||||||
|
matched = endpoint_metadata.get(model)
|
||||||
|
if not matched:
|
||||||
|
if len(endpoint_metadata) == 1:
|
||||||
|
matched = next(iter(endpoint_metadata.values()))
|
||||||
|
else:
|
||||||
|
for key, entry in endpoint_metadata.items():
|
||||||
|
if model in key or key in model:
|
||||||
|
matched = entry
|
||||||
|
break
|
||||||
|
if matched:
|
||||||
|
context_length = matched.get("context_length")
|
||||||
|
if isinstance(context_length, int):
|
||||||
|
return context_length
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _get_context_cache_path() -> Path:
|
def _get_context_cache_path() -> Path:
|
||||||
"""Return path to the persistent context length cache file."""
|
"""Return path to the persistent context length cache file."""
|
||||||
from hermes_constants import get_hermes_home
|
from hermes_constants import get_hermes_home
|
||||||
@ -1295,22 +1321,9 @@ def get_model_context_length(
|
|||||||
# returns 128k) instead of the model's full context (400k). models.dev
|
# returns 128k) instead of the model's full context (400k). models.dev
|
||||||
# has the correct per-provider values and is checked at step 5+.
|
# has the correct per-provider values and is checked at step 5+.
|
||||||
if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
|
if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
|
||||||
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
|
||||||
matched = endpoint_metadata.get(model)
|
if context_length is not None:
|
||||||
if not matched:
|
return context_length
|
||||||
# Single-model servers: if only one model is loaded, use it
|
|
||||||
if len(endpoint_metadata) == 1:
|
|
||||||
matched = next(iter(endpoint_metadata.values()))
|
|
||||||
else:
|
|
||||||
# Fuzzy match: substring in either direction
|
|
||||||
for key, entry in endpoint_metadata.items():
|
|
||||||
if model in key or key in model:
|
|
||||||
matched = entry
|
|
||||||
break
|
|
||||||
if matched:
|
|
||||||
context_length = matched.get("context_length")
|
|
||||||
if isinstance(context_length, int):
|
|
||||||
return context_length
|
|
||||||
if not _is_known_provider_base_url(base_url):
|
if not _is_known_provider_base_url(base_url):
|
||||||
# 3. Try querying local server directly
|
# 3. Try querying local server directly
|
||||||
if is_local_endpoint(base_url):
|
if is_local_endpoint(base_url):
|
||||||
@ -1374,6 +1387,12 @@ def get_model_context_length(
|
|||||||
if base_url:
|
if base_url:
|
||||||
save_context_length(model, base_url, codex_ctx)
|
save_context_length(model, base_url, codex_ctx)
|
||||||
return codex_ctx
|
return codex_ctx
|
||||||
|
if effective_provider == "gmi" and base_url:
|
||||||
|
# GMI exposes authoritative context_length via /models, but it is not
|
||||||
|
# in models.dev yet. Preserve that higher-fidelity endpoint lookup.
|
||||||
|
ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
|
||||||
|
if ctx is not None:
|
||||||
|
return ctx
|
||||||
if effective_provider:
|
if effective_provider:
|
||||||
from agent.models_dev import lookup_models_dev_context
|
from agent.models_dev import lookup_models_dev_context
|
||||||
ctx = lookup_models_dev_context(effective_provider, model)
|
ctx = lookup_models_dev_context(effective_provider, model)
|
||||||
|
|||||||
@ -224,6 +224,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
|||||||
api_key_env_vars=("ARCEEAI_API_KEY",),
|
api_key_env_vars=("ARCEEAI_API_KEY",),
|
||||||
base_url_env_var="ARCEE_BASE_URL",
|
base_url_env_var="ARCEE_BASE_URL",
|
||||||
),
|
),
|
||||||
|
"gmi": ProviderConfig(
|
||||||
|
id="gmi",
|
||||||
|
name="GMI Cloud",
|
||||||
|
auth_type="api_key",
|
||||||
|
inference_base_url="https://api.gmi-serving.com/v1",
|
||||||
|
api_key_env_vars=("GMI_API_KEY",),
|
||||||
|
base_url_env_var="GMI_BASE_URL",
|
||||||
|
),
|
||||||
"minimax": ProviderConfig(
|
"minimax": ProviderConfig(
|
||||||
id="minimax",
|
id="minimax",
|
||||||
name="MiniMax",
|
name="MiniMax",
|
||||||
@ -1120,6 +1128,7 @@ def resolve_provider(
|
|||||||
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
|
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
|
||||||
"step": "stepfun", "stepfun-coding-plan": "stepfun",
|
"step": "stepfun", "stepfun-coding-plan": "stepfun",
|
||||||
"arcee-ai": "arcee", "arceeai": "arcee",
|
"arcee-ai": "arcee", "arceeai": "arcee",
|
||||||
|
"gmi-cloud": "gmi", "gmicloud": "gmi",
|
||||||
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
|
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
|
||||||
"alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
|
"alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
|
||||||
"alibaba_coding_plan": "alibaba-coding-plan",
|
"alibaba_coding_plan": "alibaba-coding-plan",
|
||||||
|
|||||||
@ -1082,6 +1082,7 @@ ENV_VARS_BY_VERSION: Dict[int, List[str]] = {
|
|||||||
"SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"],
|
"SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"],
|
||||||
10: ["TAVILY_API_KEY"],
|
10: ["TAVILY_API_KEY"],
|
||||||
11: ["TERMINAL_MODAL_MODE"],
|
11: ["TERMINAL_MODAL_MODE"],
|
||||||
|
17: ["GMI_API_KEY", "GMI_BASE_URL"],
|
||||||
}
|
}
|
||||||
|
|
||||||
# Required environment variables with metadata for migration prompts.
|
# Required environment variables with metadata for migration prompts.
|
||||||
@ -1254,6 +1255,22 @@ OPTIONAL_ENV_VARS = {
|
|||||||
"category": "provider",
|
"category": "provider",
|
||||||
"advanced": True,
|
"advanced": True,
|
||||||
},
|
},
|
||||||
|
"GMI_API_KEY": {
|
||||||
|
"description": "GMI Cloud API key",
|
||||||
|
"prompt": "GMI Cloud API key",
|
||||||
|
"url": "https://www.gmicloud.ai/",
|
||||||
|
"password": True,
|
||||||
|
"category": "provider",
|
||||||
|
"advanced": True,
|
||||||
|
},
|
||||||
|
"GMI_BASE_URL": {
|
||||||
|
"description": "GMI Cloud base URL override",
|
||||||
|
"prompt": "GMI Cloud base URL (leave empty for default)",
|
||||||
|
"url": None,
|
||||||
|
"password": False,
|
||||||
|
"category": "provider",
|
||||||
|
"advanced": True,
|
||||||
|
},
|
||||||
"MINIMAX_API_KEY": {
|
"MINIMAX_API_KEY": {
|
||||||
"description": "MiniMax API key (international)",
|
"description": "MiniMax API key (international)",
|
||||||
"prompt": "MiniMax API key",
|
"prompt": "MiniMax API key",
|
||||||
|
|||||||
@ -46,6 +46,7 @@ _PROVIDER_ENV_HINTS = (
|
|||||||
"Z_AI_API_KEY",
|
"Z_AI_API_KEY",
|
||||||
"KIMI_API_KEY",
|
"KIMI_API_KEY",
|
||||||
"KIMI_CN_API_KEY",
|
"KIMI_CN_API_KEY",
|
||||||
|
"GMI_API_KEY",
|
||||||
"MINIMAX_API_KEY",
|
"MINIMAX_API_KEY",
|
||||||
"MINIMAX_CN_API_KEY",
|
"MINIMAX_CN_API_KEY",
|
||||||
"KILOCODE_API_KEY",
|
"KILOCODE_API_KEY",
|
||||||
@ -937,6 +938,7 @@ def run_doctor(args):
|
|||||||
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
|
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
|
||||||
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
|
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
|
||||||
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
||||||
|
("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
|
||||||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||||
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
||||||
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
|
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
|
||||||
|
|||||||
@ -1768,6 +1768,7 @@ def select_provider_and_model(args=None):
|
|||||||
"huggingface",
|
"huggingface",
|
||||||
"xiaomi",
|
"xiaomi",
|
||||||
"arcee",
|
"arcee",
|
||||||
|
"gmi",
|
||||||
"nvidia",
|
"nvidia",
|
||||||
"ollama-cloud",
|
"ollama-cloud",
|
||||||
):
|
):
|
||||||
@ -7782,6 +7783,7 @@ For more help on a command:
|
|||||||
"kilocode",
|
"kilocode",
|
||||||
"xiaomi",
|
"xiaomi",
|
||||||
"arcee",
|
"arcee",
|
||||||
|
"gmi",
|
||||||
"nvidia",
|
"nvidia",
|
||||||
],
|
],
|
||||||
default=None,
|
default=None,
|
||||||
|
|||||||
@ -278,6 +278,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||||||
"trinity-large-preview",
|
"trinity-large-preview",
|
||||||
"trinity-mini",
|
"trinity-mini",
|
||||||
],
|
],
|
||||||
|
"gmi": [
|
||||||
|
"zai-org/GLM-5.1-FP8",
|
||||||
|
"deepseek-ai/DeepSeek-V3.2",
|
||||||
|
"moonshotai/Kimi-K2.5",
|
||||||
|
"google/gemini-3.1-flash-lite-preview",
|
||||||
|
"anthropic/claude-sonnet-4.6",
|
||||||
|
"openai/gpt-5.4",
|
||||||
|
],
|
||||||
"opencode-zen": [
|
"opencode-zen": [
|
||||||
"kimi-k2.5",
|
"kimi-k2.5",
|
||||||
"gpt-5.4-pro",
|
"gpt-5.4-pro",
|
||||||
@ -709,7 +717,6 @@ class ProviderEntry(NamedTuple):
|
|||||||
label: str
|
label: str
|
||||||
tui_desc: str # detailed description for `hermes model` TUI
|
tui_desc: str # detailed description for `hermes model` TUI
|
||||||
|
|
||||||
|
|
||||||
CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||||
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
|
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
|
||||||
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
|
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
|
||||||
@ -735,6 +742,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
|||||||
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
||||||
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
|
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
|
||||||
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
|
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
|
||||||
|
ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (multi-model direct API)"),
|
||||||
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
|
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
|
||||||
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
||||||
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
||||||
@ -769,6 +777,8 @@ _PROVIDER_ALIASES = {
|
|||||||
"stepfun-coding-plan": "stepfun",
|
"stepfun-coding-plan": "stepfun",
|
||||||
"arcee-ai": "arcee",
|
"arcee-ai": "arcee",
|
||||||
"arceeai": "arcee",
|
"arceeai": "arcee",
|
||||||
|
"gmi-cloud": "gmi",
|
||||||
|
"gmicloud": "gmi",
|
||||||
"minimax-china": "minimax-cn",
|
"minimax-china": "minimax-cn",
|
||||||
"minimax_cn": "minimax-cn",
|
"minimax_cn": "minimax-cn",
|
||||||
"claude": "anthropic",
|
"claude": "anthropic",
|
||||||
@ -1849,6 +1859,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
|||||||
return live
|
return live
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
if normalized == "gmi":
|
||||||
|
try:
|
||||||
|
from hermes_cli.auth import resolve_api_key_provider_credentials
|
||||||
|
|
||||||
|
creds = resolve_api_key_provider_credentials("gmi")
|
||||||
|
api_key = str(creds.get("api_key") or "").strip()
|
||||||
|
base_url = str(creds.get("base_url") or "").strip()
|
||||||
|
if api_key and base_url:
|
||||||
|
live = fetch_api_models(api_key, base_url)
|
||||||
|
if live:
|
||||||
|
return live
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
if normalized == "custom":
|
if normalized == "custom":
|
||||||
base_url = _get_custom_base_url()
|
base_url = _get_custom_base_url()
|
||||||
if base_url:
|
if base_url:
|
||||||
|
|||||||
@ -163,6 +163,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
|||||||
base_url_override="https://api.arcee.ai/api/v1",
|
base_url_override="https://api.arcee.ai/api/v1",
|
||||||
base_url_env_var="ARCEE_BASE_URL",
|
base_url_env_var="ARCEE_BASE_URL",
|
||||||
),
|
),
|
||||||
|
"gmi": HermesOverlay(
|
||||||
|
transport="openai_chat",
|
||||||
|
extra_env_vars=("GMI_API_KEY",),
|
||||||
|
base_url_override="https://api.gmi-serving.com/v1",
|
||||||
|
base_url_env_var="GMI_BASE_URL",
|
||||||
|
),
|
||||||
"ollama-cloud": HermesOverlay(
|
"ollama-cloud": HermesOverlay(
|
||||||
transport="openai_chat",
|
transport="openai_chat",
|
||||||
base_url_env_var="OLLAMA_BASE_URL",
|
base_url_env_var="OLLAMA_BASE_URL",
|
||||||
@ -297,6 +303,10 @@ ALIASES: Dict[str, str] = {
|
|||||||
"arcee-ai": "arcee",
|
"arcee-ai": "arcee",
|
||||||
"arceeai": "arcee",
|
"arceeai": "arcee",
|
||||||
|
|
||||||
|
# gmi
|
||||||
|
"gmi-cloud": "gmi",
|
||||||
|
"gmicloud": "gmi",
|
||||||
|
|
||||||
# Local server aliases → virtual "local" concept (resolved via user config)
|
# Local server aliases → virtual "local" concept (resolved via user config)
|
||||||
"lmstudio": "lmstudio",
|
"lmstudio": "lmstudio",
|
||||||
"lm-studio": "lmstudio",
|
"lm-studio": "lmstudio",
|
||||||
@ -319,6 +329,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
|
|||||||
"copilot-acp": "GitHub Copilot ACP",
|
"copilot-acp": "GitHub Copilot ACP",
|
||||||
"stepfun": "StepFun Step Plan",
|
"stepfun": "StepFun Step Plan",
|
||||||
"xiaomi": "Xiaomi MiMo",
|
"xiaomi": "Xiaomi MiMo",
|
||||||
|
"gmi": "GMI Cloud",
|
||||||
"local": "Local endpoint",
|
"local": "Local endpoint",
|
||||||
"bedrock": "AWS Bedrock",
|
"bedrock": "AWS Bedrock",
|
||||||
"ollama-cloud": "Ollama Cloud",
|
"ollama-cloud": "Ollama Cloud",
|
||||||
|
|||||||
@ -516,19 +516,82 @@ class TestGetTextAuxiliaryClient:
|
|||||||
assert isinstance(client, CodexAuxiliaryClient)
|
assert isinstance(client, CodexAuxiliaryClient)
|
||||||
assert model == "gpt-5.2-codex"
|
assert model == "gpt-5.2-codex"
|
||||||
|
|
||||||
|
def test_returns_none_when_nothing_available(self, monkeypatch):
|
||||||
|
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||||
|
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||||
|
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||||
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||||
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
||||||
|
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
||||||
|
client, model = get_text_auxiliary_client()
|
||||||
|
assert client is None
|
||||||
|
assert model is None
|
||||||
|
|
||||||
class TestNousAuxiliaryRefresh:
|
def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self):
|
||||||
def test_try_nous_prefers_runtime_credentials(self):
|
with patch("agent.auxiliary_client._resolve_custom_runtime",
|
||||||
fresh_base = "https://inference-api.nousresearch.com/v1"
|
return_value=("https://api.openai.com/v1", "sk-test", "codex_responses")), \
|
||||||
|
patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.3-codex"), \
|
||||||
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
|
client, model = get_text_auxiliary_client()
|
||||||
|
|
||||||
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||||
|
assert isinstance(client, CodexAuxiliaryClient)
|
||||||
|
assert model == "gpt-5.3-codex"
|
||||||
|
assert mock_openai.call_args.kwargs["base_url"] == "https://api.openai.com/v1"
|
||||||
|
assert mock_openai.call_args.kwargs["api_key"] == "sk-test"
|
||||||
|
|
||||||
|
|
||||||
|
class TestVisionClientFallback:
|
||||||
|
"""Vision client auto mode resolves known-good multimodal backends."""
|
||||||
|
|
||||||
|
def test_vision_auto_includes_active_provider_when_configured(self, monkeypatch):
|
||||||
|
"""Active provider appears in available backends when credentials exist."""
|
||||||
|
monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
|
||||||
with (
|
with (
|
||||||
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
|
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
||||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
|
patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
|
||||||
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
|
patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
|
||||||
|
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
|
||||||
|
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
|
||||||
|
):
|
||||||
|
backends = get_available_vision_backends()
|
||||||
|
|
||||||
|
assert "anthropic" in backends
|
||||||
|
|
||||||
|
def test_resolve_provider_client_returns_native_anthropic_wrapper(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
|
||||||
|
with (
|
||||||
|
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
||||||
|
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
|
||||||
|
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
|
||||||
|
):
|
||||||
|
client, model = resolve_provider_client("anthropic")
|
||||||
|
|
||||||
|
assert client is not None
|
||||||
|
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
|
||||||
|
assert model == "claude-haiku-4-5-20251001"
|
||||||
|
|
||||||
|
|
||||||
|
class TestAuxiliaryPoolAwareness:
|
||||||
|
def test_try_nous_uses_pool_entry(self):
|
||||||
|
class _Entry:
|
||||||
|
access_token = "pooled-access-token"
|
||||||
|
agent_key = "pooled-agent-key"
|
||||||
|
inference_base_url = "https://inference.pool.example/v1"
|
||||||
|
|
||||||
|
class _Pool:
|
||||||
|
def has_credentials(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def select(self):
|
||||||
|
return _Entry()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
|
||||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||||
):
|
):
|
||||||
from agent.auxiliary_client import _try_nous
|
from agent.auxiliary_client import _try_nous
|
||||||
|
|
||||||
mock_openai.return_value = MagicMock()
|
|
||||||
client, model = _try_nous()
|
client, model = _try_nous()
|
||||||
|
|
||||||
assert client is not None
|
assert client is not None
|
||||||
@ -643,6 +706,67 @@ class TestNousAuxiliaryRefresh:
|
|||||||
assert stale_client.chat.completions.create.await_count == 1
|
assert stale_client.chat.completions.create.await_count == 1
|
||||||
assert fresh_async_client.chat.completions.create.await_count == 1
|
assert fresh_async_client.chat.completions.create.await_count == 1
|
||||||
|
|
||||||
|
def test_try_nous_pool_entry(self):
|
||||||
|
class _Entry:
|
||||||
|
access_token = "pooled-access-token"
|
||||||
|
agent_key = "pooled-agent-key"
|
||||||
|
inference_base_url = "https://inference.pool.example/v1"
|
||||||
|
|
||||||
|
class _Pool:
|
||||||
|
def has_credentials(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def select(self):
|
||||||
|
return _Entry()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
|
||||||
|
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||||
|
):
|
||||||
|
from agent.auxiliary_client import _try_nous
|
||||||
|
|
||||||
|
client, model = _try_nous()
|
||||||
|
|
||||||
|
assert client is not None
|
||||||
|
assert model == "gemini-3-flash"
|
||||||
|
call_kwargs = mock_openai.call_args.kwargs
|
||||||
|
assert call_kwargs["api_key"] == "pooled-agent-key"
|
||||||
|
assert call_kwargs["base_url"] == "https://inference.pool.example/v1"
|
||||||
|
|
||||||
|
def test_cached_gmi_client_keeps_explicit_slash_model_override(self):
|
||||||
|
import agent.auxiliary_client as aux
|
||||||
|
|
||||||
|
fake_client = MagicMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(fake_client, "anthropic/claude-opus-4.6"),
|
||||||
|
) as mock_resolve:
|
||||||
|
aux.shutdown_cached_clients()
|
||||||
|
try:
|
||||||
|
client, model = aux._get_cached_client(
|
||||||
|
"gmi",
|
||||||
|
"anthropic/claude-opus-4.6",
|
||||||
|
base_url="https://api.gmi-serving.com/v1",
|
||||||
|
api_key="gmi-key",
|
||||||
|
)
|
||||||
|
assert client is fake_client
|
||||||
|
assert model == "anthropic/claude-opus-4.6"
|
||||||
|
|
||||||
|
client, model = aux._get_cached_client(
|
||||||
|
"gmi",
|
||||||
|
"openai/gpt-5.4-mini",
|
||||||
|
base_url="https://api.gmi-serving.com/v1",
|
||||||
|
api_key="gmi-key",
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
aux.shutdown_cached_clients()
|
||||||
|
|
||||||
|
assert client is fake_client
|
||||||
|
assert model == "openai/gpt-5.4-mini"
|
||||||
|
assert mock_resolve.call_count == 1
|
||||||
|
|
||||||
|
|
||||||
# ── Payment / credit exhaustion fallback ─────────────────────────────────
|
# ── Payment / credit exhaustion fallback ─────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -288,6 +288,10 @@ def _hermetic_environment(tmp_path, monkeypatch):
|
|||||||
monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
|
monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
# Explicitly clear provider-specific base URL overrides that don't match
|
||||||
|
# the generic credential-shaped env-var filter above.
|
||||||
|
monkeypatch.delenv("GMI_API_KEY", raising=False)
|
||||||
|
monkeypatch.delenv("GMI_BASE_URL", raising=False)
|
||||||
|
|
||||||
|
|
||||||
# Backward-compat alias — old tests reference this fixture name. Keep it
|
# Backward-compat alias — old tests reference this fixture name. Keep it
|
||||||
|
|||||||
@ -42,6 +42,7 @@ class TestProviderRegistry:
|
|||||||
("minimax-cn", "MiniMax (China)", "api_key"),
|
("minimax-cn", "MiniMax (China)", "api_key"),
|
||||||
("ai-gateway", "Vercel AI Gateway", "api_key"),
|
("ai-gateway", "Vercel AI Gateway", "api_key"),
|
||||||
("kilocode", "Kilo Code", "api_key"),
|
("kilocode", "Kilo Code", "api_key"),
|
||||||
|
("gmi", "GMI Cloud", "api_key"),
|
||||||
])
|
])
|
||||||
def test_provider_registered(self, provider_id, name, auth_type):
|
def test_provider_registered(self, provider_id, name, auth_type):
|
||||||
assert provider_id in PROVIDER_REGISTRY
|
assert provider_id in PROVIDER_REGISTRY
|
||||||
@ -106,6 +107,11 @@ class TestProviderRegistry:
|
|||||||
assert pconfig.api_key_env_vars == ("KILOCODE_API_KEY",)
|
assert pconfig.api_key_env_vars == ("KILOCODE_API_KEY",)
|
||||||
assert pconfig.base_url_env_var == "KILOCODE_BASE_URL"
|
assert pconfig.base_url_env_var == "KILOCODE_BASE_URL"
|
||||||
|
|
||||||
|
def test_gmi_env_vars(self):
|
||||||
|
pconfig = PROVIDER_REGISTRY["gmi"]
|
||||||
|
assert pconfig.api_key_env_vars == ("GMI_API_KEY",)
|
||||||
|
assert pconfig.base_url_env_var == "GMI_BASE_URL"
|
||||||
|
|
||||||
def test_huggingface_env_vars(self):
|
def test_huggingface_env_vars(self):
|
||||||
pconfig = PROVIDER_REGISTRY["huggingface"]
|
pconfig = PROVIDER_REGISTRY["huggingface"]
|
||||||
assert pconfig.api_key_env_vars == ("HF_TOKEN",)
|
assert pconfig.api_key_env_vars == ("HF_TOKEN",)
|
||||||
@ -121,6 +127,7 @@ class TestProviderRegistry:
|
|||||||
assert PROVIDER_REGISTRY["minimax-cn"].inference_base_url == "https://api.minimaxi.com/anthropic"
|
assert PROVIDER_REGISTRY["minimax-cn"].inference_base_url == "https://api.minimaxi.com/anthropic"
|
||||||
assert PROVIDER_REGISTRY["ai-gateway"].inference_base_url == "https://ai-gateway.vercel.sh/v1"
|
assert PROVIDER_REGISTRY["ai-gateway"].inference_base_url == "https://ai-gateway.vercel.sh/v1"
|
||||||
assert PROVIDER_REGISTRY["kilocode"].inference_base_url == "https://api.kilo.ai/api/gateway"
|
assert PROVIDER_REGISTRY["kilocode"].inference_base_url == "https://api.kilo.ai/api/gateway"
|
||||||
|
assert PROVIDER_REGISTRY["gmi"].inference_base_url == "https://api.gmi-serving.com/v1"
|
||||||
assert PROVIDER_REGISTRY["huggingface"].inference_base_url == "https://router.huggingface.co/v1"
|
assert PROVIDER_REGISTRY["huggingface"].inference_base_url == "https://router.huggingface.co/v1"
|
||||||
|
|
||||||
def test_oauth_providers_unchanged(self):
|
def test_oauth_providers_unchanged(self):
|
||||||
@ -143,6 +150,7 @@ PROVIDER_ENV_VARS = (
|
|||||||
"MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
|
"MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
|
||||||
"AI_GATEWAY_API_KEY", "AI_GATEWAY_BASE_URL",
|
"AI_GATEWAY_API_KEY", "AI_GATEWAY_BASE_URL",
|
||||||
"KILOCODE_API_KEY", "KILOCODE_BASE_URL",
|
"KILOCODE_API_KEY", "KILOCODE_BASE_URL",
|
||||||
|
"GMI_API_KEY", "GMI_BASE_URL",
|
||||||
"DASHSCOPE_API_KEY", "OPENCODE_ZEN_API_KEY", "OPENCODE_GO_API_KEY",
|
"DASHSCOPE_API_KEY", "OPENCODE_ZEN_API_KEY", "OPENCODE_GO_API_KEY",
|
||||||
"NOUS_API_KEY", "GITHUB_TOKEN", "GH_TOKEN",
|
"NOUS_API_KEY", "GITHUB_TOKEN", "GH_TOKEN",
|
||||||
"OPENAI_BASE_URL", "HERMES_COPILOT_ACP_COMMAND", "COPILOT_CLI_PATH",
|
"OPENAI_BASE_URL", "HERMES_COPILOT_ACP_COMMAND", "COPILOT_CLI_PATH",
|
||||||
@ -178,6 +186,9 @@ class TestResolveProvider:
|
|||||||
def test_explicit_ai_gateway(self):
|
def test_explicit_ai_gateway(self):
|
||||||
assert resolve_provider("ai-gateway") == "ai-gateway"
|
assert resolve_provider("ai-gateway") == "ai-gateway"
|
||||||
|
|
||||||
|
def test_explicit_gmi(self):
|
||||||
|
assert resolve_provider("gmi") == "gmi"
|
||||||
|
|
||||||
def test_alias_glm(self):
|
def test_alias_glm(self):
|
||||||
assert resolve_provider("glm") == "zai"
|
assert resolve_provider("glm") == "zai"
|
||||||
|
|
||||||
@ -205,6 +216,9 @@ class TestResolveProvider:
|
|||||||
def test_alias_vercel(self):
|
def test_alias_vercel(self):
|
||||||
assert resolve_provider("vercel") == "ai-gateway"
|
assert resolve_provider("vercel") == "ai-gateway"
|
||||||
|
|
||||||
|
def test_alias_gmi_cloud(self):
|
||||||
|
assert resolve_provider("gmi-cloud") == "gmi"
|
||||||
|
|
||||||
def test_explicit_kilocode(self):
|
def test_explicit_kilocode(self):
|
||||||
assert resolve_provider("kilocode") == "kilocode"
|
assert resolve_provider("kilocode") == "kilocode"
|
||||||
|
|
||||||
@ -280,6 +294,10 @@ class TestResolveProvider:
|
|||||||
monkeypatch.setenv("AI_GATEWAY_API_KEY", "test-gw-key")
|
monkeypatch.setenv("AI_GATEWAY_API_KEY", "test-gw-key")
|
||||||
assert resolve_provider("auto") == "ai-gateway"
|
assert resolve_provider("auto") == "ai-gateway"
|
||||||
|
|
||||||
|
def test_auto_detects_gmi_key(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("GMI_API_KEY", "test-gmi-key")
|
||||||
|
assert resolve_provider("auto") == "gmi"
|
||||||
|
|
||||||
def test_auto_detects_kilocode_key(self, monkeypatch):
|
def test_auto_detects_kilocode_key(self, monkeypatch):
|
||||||
monkeypatch.setenv("KILOCODE_API_KEY", "test-kilo-key")
|
monkeypatch.setenv("KILOCODE_API_KEY", "test-kilo-key")
|
||||||
assert resolve_provider("auto") == "kilocode"
|
assert resolve_provider("auto") == "kilocode"
|
||||||
@ -497,6 +515,19 @@ class TestResolveApiKeyProviderCredentials:
|
|||||||
assert creds["api_key"] == "kilo-secret-key"
|
assert creds["api_key"] == "kilo-secret-key"
|
||||||
assert creds["base_url"] == "https://api.kilo.ai/api/gateway"
|
assert creds["base_url"] == "https://api.kilo.ai/api/gateway"
|
||||||
|
|
||||||
|
def test_resolve_gmi_with_key(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("GMI_API_KEY", "gmi-secret-key")
|
||||||
|
creds = resolve_api_key_provider_credentials("gmi")
|
||||||
|
assert creds["provider"] == "gmi"
|
||||||
|
assert creds["api_key"] == "gmi-secret-key"
|
||||||
|
assert creds["base_url"] == "https://api.gmi-serving.com/v1"
|
||||||
|
|
||||||
|
def test_resolve_gmi_custom_base_url(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("GMI_API_KEY", "gmi-key")
|
||||||
|
monkeypatch.setenv("GMI_BASE_URL", "https://custom.gmi.example/v1")
|
||||||
|
creds = resolve_api_key_provider_credentials("gmi")
|
||||||
|
assert creds["base_url"] == "https://custom.gmi.example/v1"
|
||||||
|
|
||||||
def test_resolve_kilocode_custom_base_url(self, monkeypatch):
|
def test_resolve_kilocode_custom_base_url(self, monkeypatch):
|
||||||
monkeypatch.setenv("KILOCODE_API_KEY", "kilo-key")
|
monkeypatch.setenv("KILOCODE_API_KEY", "kilo-key")
|
||||||
monkeypatch.setenv("KILOCODE_BASE_URL", "https://custom.kilo.example/v1")
|
monkeypatch.setenv("KILOCODE_BASE_URL", "https://custom.kilo.example/v1")
|
||||||
@ -594,6 +625,15 @@ class TestRuntimeProviderResolution:
|
|||||||
assert result["api_key"] == "kilo-key"
|
assert result["api_key"] == "kilo-key"
|
||||||
assert "kilo.ai" in result["base_url"]
|
assert "kilo.ai" in result["base_url"]
|
||||||
|
|
||||||
|
def test_runtime_gmi(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("GMI_API_KEY", "gmi-key")
|
||||||
|
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||||
|
result = resolve_runtime_provider(requested="gmi")
|
||||||
|
assert result["provider"] == "gmi"
|
||||||
|
assert result["api_mode"] == "chat_completions"
|
||||||
|
assert result["api_key"] == "gmi-key"
|
||||||
|
assert result["base_url"] == "https://api.gmi-serving.com/v1"
|
||||||
|
|
||||||
def test_runtime_auto_detects_api_key_provider(self, monkeypatch):
|
def test_runtime_auto_detects_api_key_provider(self, monkeypatch):
|
||||||
monkeypatch.setenv("KIMI_API_KEY", "auto-kimi-key")
|
monkeypatch.setenv("KIMI_API_KEY", "auto-kimi-key")
|
||||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||||
|
|||||||
363
tests/hermes_cli/test_gmi_provider.py
Normal file
363
tests/hermes_cli/test_gmi_provider.py
Normal file
@ -0,0 +1,363 @@
|
|||||||
|
"""Focused tests for GMI Cloud first-class provider wiring."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
|
import io
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
from argparse import Namespace
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
if "dotenv" not in sys.modules:
|
||||||
|
fake_dotenv = types.ModuleType("dotenv")
|
||||||
|
fake_dotenv.load_dotenv = lambda *args, **kwargs: None
|
||||||
|
sys.modules["dotenv"] = fake_dotenv
|
||||||
|
|
||||||
|
from hermes_cli.auth import resolve_provider
|
||||||
|
from hermes_cli.config import load_config
|
||||||
|
from hermes_cli.models import (
|
||||||
|
CANONICAL_PROVIDERS,
|
||||||
|
_PROVIDER_LABELS,
|
||||||
|
_PROVIDER_MODELS,
|
||||||
|
normalize_provider,
|
||||||
|
provider_model_ids,
|
||||||
|
)
|
||||||
|
from agent.auxiliary_client import resolve_provider_client
|
||||||
|
from agent.model_metadata import get_model_context_length
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _clear_provider_env(monkeypatch):
|
||||||
|
for key in (
|
||||||
|
"OPENROUTER_API_KEY",
|
||||||
|
"OPENAI_API_KEY",
|
||||||
|
"ANTHROPIC_API_KEY",
|
||||||
|
"GOOGLE_API_KEY",
|
||||||
|
"GLM_API_KEY",
|
||||||
|
"KIMI_API_KEY",
|
||||||
|
"MINIMAX_API_KEY",
|
||||||
|
"GMI_API_KEY",
|
||||||
|
"GMI_BASE_URL",
|
||||||
|
):
|
||||||
|
monkeypatch.delenv(key, raising=False)
|
||||||
|
|
||||||
|
|
||||||
|
class TestGmiAliases:
|
||||||
|
@pytest.mark.parametrize("alias", ["gmi", "gmi-cloud", "gmicloud"])
|
||||||
|
def test_alias_resolves(self, alias, monkeypatch):
|
||||||
|
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
||||||
|
assert resolve_provider(alias) == "gmi"
|
||||||
|
|
||||||
|
def test_models_normalize_provider(self):
|
||||||
|
assert normalize_provider("gmi-cloud") == "gmi"
|
||||||
|
assert normalize_provider("gmicloud") == "gmi"
|
||||||
|
|
||||||
|
def test_providers_normalize_provider(self):
|
||||||
|
from hermes_cli.providers import normalize_provider as normalize_provider_in_providers
|
||||||
|
|
||||||
|
assert normalize_provider_in_providers("gmi-cloud") == "gmi"
|
||||||
|
assert normalize_provider_in_providers("gmicloud") == "gmi"
|
||||||
|
|
||||||
|
|
||||||
|
class TestGmiConfigRegistry:
|
||||||
|
def test_optional_env_vars_include_gmi(self):
|
||||||
|
from hermes_cli.config import ENV_VARS_BY_VERSION, OPTIONAL_ENV_VARS
|
||||||
|
|
||||||
|
assert "GMI_API_KEY" in OPTIONAL_ENV_VARS
|
||||||
|
assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["category"] == "provider"
|
||||||
|
assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["password"] is True
|
||||||
|
assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["url"] == "https://www.gmicloud.ai/"
|
||||||
|
|
||||||
|
assert "GMI_BASE_URL" in OPTIONAL_ENV_VARS
|
||||||
|
assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["category"] == "provider"
|
||||||
|
assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["password"] is False
|
||||||
|
|
||||||
|
assert "GMI_API_KEY" in ENV_VARS_BY_VERSION[17]
|
||||||
|
assert "GMI_BASE_URL" in ENV_VARS_BY_VERSION[17]
|
||||||
|
|
||||||
|
|
||||||
|
class TestGmiModelCatalog:
|
||||||
|
def test_static_model_fallback_exists(self):
|
||||||
|
assert "gmi" in _PROVIDER_MODELS
|
||||||
|
models = _PROVIDER_MODELS["gmi"]
|
||||||
|
assert "zai-org/GLM-5.1-FP8" in models
|
||||||
|
assert "deepseek-ai/DeepSeek-V3.2" in models
|
||||||
|
assert "moonshotai/Kimi-K2.5" in models
|
||||||
|
assert "anthropic/claude-sonnet-4.6" in models
|
||||||
|
|
||||||
|
def test_canonical_provider_entry(self):
|
||||||
|
slugs = [p.slug for p in CANONICAL_PROVIDERS]
|
||||||
|
assert "gmi" in slugs
|
||||||
|
|
||||||
|
def test_provider_model_ids_prefers_live_api(self, monkeypatch):
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"hermes_cli.auth.resolve_api_key_provider_credentials",
|
||||||
|
lambda provider_id: {
|
||||||
|
"provider": provider_id,
|
||||||
|
"api_key": "gmi-live-key",
|
||||||
|
"base_url": "https://api.gmi-serving.com/v1",
|
||||||
|
"source": "GMI_API_KEY",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"hermes_cli.models.fetch_api_models",
|
||||||
|
lambda api_key, base_url: [
|
||||||
|
"openai/gpt-5.4-mini",
|
||||||
|
"zai-org/GLM-5.1-FP8",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert provider_model_ids("gmi") == [
|
||||||
|
"openai/gpt-5.4-mini",
|
||||||
|
"zai-org/GLM-5.1-FP8",
|
||||||
|
]
|
||||||
|
|
||||||
|
def test_provider_model_ids_falls_back_to_static_models(self, monkeypatch):
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"hermes_cli.auth.resolve_api_key_provider_credentials",
|
||||||
|
lambda provider_id: {
|
||||||
|
"provider": provider_id,
|
||||||
|
"api_key": "gmi-live-key",
|
||||||
|
"base_url": "https://api.gmi-serving.com/v1",
|
||||||
|
"source": "GMI_API_KEY",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr("hermes_cli.models.fetch_api_models", lambda api_key, base_url: None)
|
||||||
|
|
||||||
|
assert provider_model_ids("gmi") == list(_PROVIDER_MODELS["gmi"])
|
||||||
|
|
||||||
|
|
||||||
|
class TestGmiProvidersModule:
|
||||||
|
def test_overlay_exists(self):
|
||||||
|
from hermes_cli.providers import HERMES_OVERLAYS
|
||||||
|
|
||||||
|
assert "gmi" in HERMES_OVERLAYS
|
||||||
|
overlay = HERMES_OVERLAYS["gmi"]
|
||||||
|
assert overlay.transport == "openai_chat"
|
||||||
|
assert overlay.extra_env_vars == ("GMI_API_KEY",)
|
||||||
|
assert overlay.base_url_override == "https://api.gmi-serving.com/v1"
|
||||||
|
assert overlay.base_url_env_var == "GMI_BASE_URL"
|
||||||
|
assert not overlay.is_aggregator
|
||||||
|
|
||||||
|
def test_provider_label(self):
|
||||||
|
assert _PROVIDER_LABELS["gmi"] == "GMI Cloud"
|
||||||
|
|
||||||
|
|
||||||
|
class TestGmiDoctor:
|
||||||
|
def test_provider_env_hints_include_gmi(self):
|
||||||
|
from hermes_cli.doctor import _PROVIDER_ENV_HINTS
|
||||||
|
|
||||||
|
assert "GMI_API_KEY" in _PROVIDER_ENV_HINTS
|
||||||
|
|
||||||
|
def test_run_doctor_checks_gmi_models_endpoint(self, monkeypatch, tmp_path):
|
||||||
|
from hermes_cli import doctor as doctor_mod
|
||||||
|
|
||||||
|
home = tmp_path / ".hermes"
|
||||||
|
home.mkdir(parents=True, exist_ok=True)
|
||||||
|
(home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
|
||||||
|
(home / ".env").write_text("GMI_API_KEY=gmi-test-key\n", encoding="utf-8")
|
||||||
|
project = tmp_path / "project"
|
||||||
|
project.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
|
||||||
|
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
|
||||||
|
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
|
||||||
|
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
||||||
|
|
||||||
|
for env_name in (
|
||||||
|
"OPENROUTER_API_KEY",
|
||||||
|
"OPENAI_API_KEY",
|
||||||
|
"ANTHROPIC_API_KEY",
|
||||||
|
"ANTHROPIC_TOKEN",
|
||||||
|
"GLM_API_KEY",
|
||||||
|
"ZAI_API_KEY",
|
||||||
|
"Z_AI_API_KEY",
|
||||||
|
"KIMI_API_KEY",
|
||||||
|
"KIMI_CN_API_KEY",
|
||||||
|
"ARCEEAI_API_KEY",
|
||||||
|
"DEEPSEEK_API_KEY",
|
||||||
|
"HF_TOKEN",
|
||||||
|
"DASHSCOPE_API_KEY",
|
||||||
|
"MINIMAX_API_KEY",
|
||||||
|
"MINIMAX_CN_API_KEY",
|
||||||
|
"AI_GATEWAY_API_KEY",
|
||||||
|
"KILOCODE_API_KEY",
|
||||||
|
"OPENCODE_ZEN_API_KEY",
|
||||||
|
"OPENCODE_GO_API_KEY",
|
||||||
|
"XIAOMI_API_KEY",
|
||||||
|
):
|
||||||
|
monkeypatch.delenv(env_name, raising=False)
|
||||||
|
|
||||||
|
fake_model_tools = types.SimpleNamespace(
|
||||||
|
check_tool_availability=lambda *a, **kw: ([], []),
|
||||||
|
TOOLSET_REQUIREMENTS={},
|
||||||
|
)
|
||||||
|
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from hermes_cli import auth as _auth_mod
|
||||||
|
|
||||||
|
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
|
||||||
|
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def fake_get(url, headers=None, timeout=None):
|
||||||
|
calls.append((url, headers, timeout))
|
||||||
|
return types.SimpleNamespace(status_code=200)
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
monkeypatch.setattr(httpx, "get", fake_get)
|
||||||
|
|
||||||
|
buf = io.StringIO()
|
||||||
|
with contextlib.redirect_stdout(buf):
|
||||||
|
doctor_mod.run_doctor(Namespace(fix=False))
|
||||||
|
out = buf.getvalue()
|
||||||
|
|
||||||
|
assert "API key or custom endpoint configured" in out
|
||||||
|
assert "GMI Cloud" in out
|
||||||
|
assert any(url == "https://api.gmi-serving.com/v1/models" for url, _, _ in calls)
|
||||||
|
|
||||||
|
|
||||||
|
class TestGmiModelMetadata:
|
||||||
|
def test_url_to_provider(self):
|
||||||
|
from agent.model_metadata import _URL_TO_PROVIDER
|
||||||
|
|
||||||
|
assert _URL_TO_PROVIDER.get("api.gmi-serving.com") == "gmi"
|
||||||
|
|
||||||
|
def test_provider_prefixes(self):
|
||||||
|
from agent.model_metadata import _PROVIDER_PREFIXES
|
||||||
|
|
||||||
|
assert "gmi" in _PROVIDER_PREFIXES
|
||||||
|
assert "gmi-cloud" in _PROVIDER_PREFIXES
|
||||||
|
assert "gmicloud" in _PROVIDER_PREFIXES
|
||||||
|
|
||||||
|
def test_infer_from_url(self):
|
||||||
|
from agent.model_metadata import _infer_provider_from_url
|
||||||
|
|
||||||
|
assert _infer_provider_from_url("https://api.gmi-serving.com/v1") == "gmi"
|
||||||
|
|
||||||
|
def test_known_gmi_endpoint_still_uses_endpoint_metadata(self):
|
||||||
|
with patch(
|
||||||
|
"agent.model_metadata.get_cached_context_length",
|
||||||
|
return_value=None,
|
||||||
|
), patch(
|
||||||
|
"agent.model_metadata.fetch_endpoint_model_metadata",
|
||||||
|
return_value={"anthropic/claude-opus-4.6": {"context_length": 409600}},
|
||||||
|
), patch(
|
||||||
|
"agent.models_dev.lookup_models_dev_context",
|
||||||
|
return_value=None,
|
||||||
|
), patch(
|
||||||
|
"agent.model_metadata.fetch_model_metadata",
|
||||||
|
return_value={},
|
||||||
|
):
|
||||||
|
result = get_model_context_length(
|
||||||
|
"anthropic/claude-opus-4.6",
|
||||||
|
base_url="https://api.gmi-serving.com/v1",
|
||||||
|
api_key="gmi-test-key",
|
||||||
|
provider="custom",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == 409600
|
||||||
|
|
||||||
|
|
||||||
|
class TestGmiAuxiliary:
|
||||||
|
def test_aux_default_model(self):
|
||||||
|
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
|
||||||
|
|
||||||
|
assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "anthropic/claude-opus-4.6"
|
||||||
|
|
||||||
|
def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
||||||
|
|
||||||
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
|
mock_openai.return_value = object()
|
||||||
|
client, model = resolve_provider_client("gmi")
|
||||||
|
|
||||||
|
assert client is not None
|
||||||
|
assert model == "anthropic/claude-opus-4.6"
|
||||||
|
assert mock_openai.call_args.kwargs["api_key"] == "gmi-test-key"
|
||||||
|
assert mock_openai.call_args.kwargs["base_url"] == "https://api.gmi-serving.com/v1"
|
||||||
|
|
||||||
|
def test_resolve_provider_client_accepts_gmi_alias(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
||||||
|
|
||||||
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
|
mock_openai.return_value = object()
|
||||||
|
client, model = resolve_provider_client("gmi-cloud")
|
||||||
|
|
||||||
|
assert client is not None
|
||||||
|
assert model == "anthropic/claude-opus-4.6"
|
||||||
|
|
||||||
|
|
||||||
|
class TestGmiMainFlow:
|
||||||
|
def test_chat_parser_accepts_gmi_provider(self, monkeypatch):
|
||||||
|
recorded: dict[str, str] = {}
|
||||||
|
|
||||||
|
monkeypatch.setattr("hermes_cli.config.get_container_exec_info", lambda: None)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"hermes_cli.main.cmd_chat",
|
||||||
|
lambda args: recorded.setdefault("provider", args.provider),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(sys, "argv", ["hermes", "chat", "--provider", "gmi"])
|
||||||
|
|
||||||
|
from hermes_cli.main import main
|
||||||
|
|
||||||
|
main()
|
||||||
|
|
||||||
|
assert recorded["provider"] == "gmi"
|
||||||
|
|
||||||
|
def test_select_provider_and_model_routes_gmi_to_generic_flow(self, monkeypatch):
|
||||||
|
recorded: dict[str, str] = {}
|
||||||
|
|
||||||
|
monkeypatch.setattr("hermes_cli.auth.resolve_provider", lambda *args, **kwargs: None)
|
||||||
|
|
||||||
|
def fake_prompt_provider_choice(choices, default=0):
|
||||||
|
return next(i for i, label in enumerate(choices) if label.startswith("GMI Cloud"))
|
||||||
|
|
||||||
|
def fake_model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||||
|
recorded["provider_id"] = provider_id
|
||||||
|
|
||||||
|
monkeypatch.setattr("hermes_cli.main._prompt_provider_choice", fake_prompt_provider_choice)
|
||||||
|
monkeypatch.setattr("hermes_cli.main._model_flow_api_key_provider", fake_model_flow_api_key_provider)
|
||||||
|
|
||||||
|
from hermes_cli.main import select_provider_and_model
|
||||||
|
|
||||||
|
select_provider_and_model()
|
||||||
|
|
||||||
|
assert recorded["provider_id"] == "gmi"
|
||||||
|
|
||||||
|
def test_model_flow_api_key_provider_persists_gmi_selection(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"hermes_cli.models.fetch_api_models",
|
||||||
|
return_value=["zai-org/GLM-5.1-FP8", "openai/gpt-5.4-mini"],
|
||||||
|
), patch(
|
||||||
|
"hermes_cli.auth._prompt_model_selection",
|
||||||
|
return_value="openai/gpt-5.4-mini",
|
||||||
|
), patch(
|
||||||
|
"hermes_cli.auth.deactivate_provider",
|
||||||
|
), patch(
|
||||||
|
"builtins.input",
|
||||||
|
return_value="",
|
||||||
|
):
|
||||||
|
from hermes_cli.main import _model_flow_api_key_provider
|
||||||
|
|
||||||
|
_model_flow_api_key_provider(load_config(), "gmi", "old-model")
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from hermes_constants import get_hermes_home
|
||||||
|
|
||||||
|
config = yaml.safe_load((get_hermes_home() / "config.yaml").read_text()) or {}
|
||||||
|
model_cfg = config.get("model")
|
||||||
|
assert isinstance(model_cfg, dict)
|
||||||
|
assert model_cfg["provider"] == "gmi"
|
||||||
|
assert model_cfg["default"] == "openai/gpt-5.4-mini"
|
||||||
|
assert model_cfg["base_url"] == "https://api.gmi-serving.com/v1"
|
||||||
@ -66,13 +66,30 @@ hermes model
|
|||||||
|
|
||||||
Good defaults:
|
Good defaults:
|
||||||
|
|
||||||
| Situation | Recommended path |
|
| Provider | What it is | How to set up |
|
||||||
|---|---|
|
|----------|-----------|---------------|
|
||||||
| Least friction | Nous Portal or OpenRouter |
|
| **Nous Portal** | Subscription-based, zero-config | OAuth login via `hermes model` |
|
||||||
| You already have Claude or Codex auth | Anthropic or OpenAI Codex |
|
| **OpenAI Codex** | ChatGPT OAuth, uses Codex models | Device code auth via `hermes model` |
|
||||||
| You want local/private inference | Ollama or any custom OpenAI-compatible endpoint |
|
| **Anthropic** | Claude models directly (Pro/Max or API key) | `hermes model` with Claude Code auth, or an Anthropic API key |
|
||||||
| You want multi-provider routing | OpenRouter |
|
| **OpenRouter** | Multi-provider routing across many models | Enter your API key |
|
||||||
| You have a custom GPU server | vLLM, SGLang, LiteLLM, or any OpenAI-compatible endpoint |
|
| **Z.AI** | GLM / Zhipu-hosted models | Set `GLM_API_KEY` / `ZAI_API_KEY` |
|
||||||
|
| **Kimi / Moonshot** | Moonshot-hosted coding and chat models | Set `KIMI_API_KEY` |
|
||||||
|
| **Kimi / Moonshot China** | China-region Moonshot endpoint | Set `KIMI_CN_API_KEY` |
|
||||||
|
| **Arcee AI** | Trinity models | Set `ARCEEAI_API_KEY` |
|
||||||
|
| **GMI Cloud** | Multi-model direct API | Set `GMI_API_KEY` |
|
||||||
|
| **MiniMax** | International MiniMax endpoint | Set `MINIMAX_API_KEY` |
|
||||||
|
| **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` |
|
||||||
|
| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` |
|
||||||
|
| **Hugging Face** | 20+ open models via unified router (Qwen, DeepSeek, Kimi, etc.) | Set `HF_TOKEN` |
|
||||||
|
| **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` |
|
||||||
|
| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
|
||||||
|
| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
|
||||||
|
| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
|
||||||
|
| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
|
||||||
|
| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
|
||||||
|
| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
|
||||||
|
| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
|
||||||
|
| **Custom Endpoint** | VLLM, SGLang, Ollama, or any OpenAI-compatible API | Set base URL + API key |
|
||||||
|
|
||||||
For most first-time users: choose a provider, accept the defaults unless you know why you're changing them. The full provider catalog with env vars and setup steps lives on the [Providers](../integrations/providers.md) page.
|
For most first-time users: choose a provider, accept the defaults unless you know why you're changing them. The full provider catalog with env vars and setup steps lives on the [Providers](../integrations/providers.md) page.
|
||||||
|
|
||||||
|
|||||||
@ -25,6 +25,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
|
|||||||
| **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) |
|
| **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) |
|
||||||
| **Kimi / Moonshot (China)** | `KIMI_CN_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding-cn`; aliases: `kimi-cn`, `moonshot-cn`) |
|
| **Kimi / Moonshot (China)** | `KIMI_CN_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding-cn`; aliases: `kimi-cn`, `moonshot-cn`) |
|
||||||
| **Arcee AI** | `ARCEEAI_API_KEY` in `~/.hermes/.env` (provider: `arcee`; aliases: `arcee-ai`, `arceeai`) |
|
| **Arcee AI** | `ARCEEAI_API_KEY` in `~/.hermes/.env` (provider: `arcee`; aliases: `arcee-ai`, `arceeai`) |
|
||||||
|
| **GMI Cloud** | `GMI_API_KEY` in `~/.hermes/.env` (provider: `gmi`; aliases: `gmi-cloud`, `gmicloud`) |
|
||||||
| **MiniMax** | `MINIMAX_API_KEY` in `~/.hermes/.env` (provider: `minimax`) |
|
| **MiniMax** | `MINIMAX_API_KEY` in `~/.hermes/.env` (provider: `minimax`) |
|
||||||
| **MiniMax China** | `MINIMAX_CN_API_KEY` in `~/.hermes/.env` (provider: `minimax-cn`) |
|
| **MiniMax China** | `MINIMAX_CN_API_KEY` in `~/.hermes/.env` (provider: `minimax-cn`) |
|
||||||
| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`, aliases: `dashscope`, `qwen`) |
|
| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`, aliases: `dashscope`, `qwen`) |
|
||||||
@ -250,7 +251,7 @@ model:
|
|||||||
| `HERMES_COPILOT_ACP_COMMAND` | Override the Copilot CLI binary path (default: `copilot`) |
|
| `HERMES_COPILOT_ACP_COMMAND` | Override the Copilot CLI binary path (default: `copilot`) |
|
||||||
| `HERMES_COPILOT_ACP_ARGS` | Override ACP args (default: `--acp --stdio`) |
|
| `HERMES_COPILOT_ACP_ARGS` | Override ACP args (default: `--acp --stdio`) |
|
||||||
|
|
||||||
### First-Class Chinese AI Providers
|
### First-Class API-Key Providers
|
||||||
|
|
||||||
These providers have built-in support with dedicated provider IDs. Set the API key and use `--provider` to select:
|
These providers have built-in support with dedicated provider IDs. Set the API key and use `--provider` to select:
|
||||||
|
|
||||||
@ -286,16 +287,21 @@ hermes chat --provider xiaomi --model mimo-v2-pro
|
|||||||
# Arcee AI (Trinity models)
|
# Arcee AI (Trinity models)
|
||||||
hermes chat --provider arcee --model trinity-large-thinking
|
hermes chat --provider arcee --model trinity-large-thinking
|
||||||
# Requires: ARCEEAI_API_KEY in ~/.hermes/.env
|
# Requires: ARCEEAI_API_KEY in ~/.hermes/.env
|
||||||
|
|
||||||
|
# GMI Cloud
|
||||||
|
# Use the exact model ID returned by GMI's /v1/models endpoint.
|
||||||
|
hermes chat --provider gmi --model zai-org/GLM-5.1-FP8
|
||||||
|
# Requires: GMI_API_KEY in ~/.hermes/.env
|
||||||
```
|
```
|
||||||
|
|
||||||
Or set the provider permanently in `config.yaml`:
|
Or set the provider permanently in `config.yaml`:
|
||||||
```yaml
|
```yaml
|
||||||
model:
|
model:
|
||||||
provider: "zai" # or: kimi-coding, kimi-coding-cn, minimax, minimax-cn, alibaba, xiaomi, arcee
|
provider: "gmi"
|
||||||
default: "glm-5"
|
default: "zai-org/GLM-5.1-FP8"
|
||||||
```
|
```
|
||||||
|
|
||||||
Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, `DASHSCOPE_BASE_URL`, or `XIAOMI_BASE_URL` environment variables.
|
Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, `DASHSCOPE_BASE_URL`, `XIAOMI_BASE_URL`, or `GMI_BASE_URL` environment variables.
|
||||||
|
|
||||||
:::note Z.AI Endpoint Auto-Detection
|
:::note Z.AI Endpoint Auto-Detection
|
||||||
When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoints (global, China, coding variants) to find one that accepts your API key. You don't need to set `GLM_BASE_URL` manually — the working endpoint is detected and cached automatically.
|
When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoints (global, China, coding variants) to find one that accepts your API key. You don't need to set `GLM_BASE_URL` manually — the working endpoint is detected and cached automatically.
|
||||||
|
|||||||
@ -36,6 +36,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
|||||||
| `KIMI_CN_API_KEY` | Kimi / Moonshot China API key ([moonshot.cn](https://platform.moonshot.cn)) |
|
| `KIMI_CN_API_KEY` | Kimi / Moonshot China API key ([moonshot.cn](https://platform.moonshot.cn)) |
|
||||||
| `ARCEEAI_API_KEY` | Arcee AI API key ([chat.arcee.ai](https://chat.arcee.ai/)) |
|
| `ARCEEAI_API_KEY` | Arcee AI API key ([chat.arcee.ai](https://chat.arcee.ai/)) |
|
||||||
| `ARCEE_BASE_URL` | Override Arcee base URL (default: `https://api.arcee.ai/api/v1`) |
|
| `ARCEE_BASE_URL` | Override Arcee base URL (default: `https://api.arcee.ai/api/v1`) |
|
||||||
|
| `GMI_API_KEY` | GMI Cloud API key ([gmicloud.ai](https://www.gmicloud.ai/)) |
|
||||||
|
| `GMI_BASE_URL` | Override GMI Cloud base URL (default: `https://api.gmi-serving.com/v1`) |
|
||||||
| `MINIMAX_API_KEY` | MiniMax API key — global endpoint ([minimax.io](https://www.minimax.io)) |
|
| `MINIMAX_API_KEY` | MiniMax API key — global endpoint ([minimax.io](https://www.minimax.io)) |
|
||||||
| `MINIMAX_BASE_URL` | Override MiniMax base URL (default: `https://api.minimax.io/anthropic` — Hermes uses MiniMax's Anthropic Messages-compatible endpoint) |
|
| `MINIMAX_BASE_URL` | Override MiniMax base URL (default: `https://api.minimax.io/anthropic` — Hermes uses MiniMax's Anthropic Messages-compatible endpoint) |
|
||||||
| `MINIMAX_CN_API_KEY` | MiniMax API key — China endpoint ([minimaxi.com](https://www.minimaxi.com)) |
|
| `MINIMAX_CN_API_KEY` | MiniMax API key — China endpoint ([minimaxi.com](https://www.minimaxi.com)) |
|
||||||
@ -89,7 +91,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
|||||||
|
|
||||||
| Variable | Description |
|
| Variable | Description |
|
||||||
|----------|-------------|
|
|----------|-------------|
|
||||||
| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
|
| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
|
||||||
| `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
|
| `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
|
||||||
| `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
|
| `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
|
||||||
| `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
|
| `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
|
||||||
|
|||||||
@ -801,6 +801,17 @@ These options apply to **auxiliary task configs** (`auxiliary:`, `compression:`,
|
|||||||
| `"codex"` | Force Codex OAuth (ChatGPT account). Supports vision (gpt-5.3-codex). | `hermes model` → Codex |
|
| `"codex"` | Force Codex OAuth (ChatGPT account). Supports vision (gpt-5.3-codex). | `hermes model` → Codex |
|
||||||
| `"main"` | Use your active custom/main endpoint. This can come from `OPENAI_BASE_URL` + `OPENAI_API_KEY` or from a custom endpoint saved via `hermes model` / `config.yaml`. Works with OpenAI, local models, or any OpenAI-compatible API. **Auxiliary tasks only — not valid for `model.provider`.** | Custom endpoint credentials + base URL |
|
| `"main"` | Use your active custom/main endpoint. This can come from `OPENAI_BASE_URL` + `OPENAI_API_KEY` or from a custom endpoint saved via `hermes model` / `config.yaml`. Works with OpenAI, local models, or any OpenAI-compatible API. **Auxiliary tasks only — not valid for `model.provider`.** | Custom endpoint credentials + base URL |
|
||||||
|
|
||||||
|
Direct API-key providers from the main provider catalog also work here when you want side tasks to bypass your default router. `gmi` is valid once `GMI_API_KEY` is configured:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
auxiliary:
|
||||||
|
compression:
|
||||||
|
provider: "gmi"
|
||||||
|
model: "anthropic/claude-opus-4.6"
|
||||||
|
```
|
||||||
|
|
||||||
|
For GMI auxiliary routing, use the exact model ID returned by GMI's `/v1/models` endpoint.
|
||||||
|
|
||||||
### Common Setups
|
### Common Setups
|
||||||
|
|
||||||
**Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs):
|
**Using a direct custom endpoint** (clearer than `provider: "main"` for local/self-hosted APIs):
|
||||||
|
|||||||
@ -59,6 +59,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
|
|||||||
| Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
|
| Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
|
||||||
| Xiaomi MiMo | `xiaomi` | `XIAOMI_API_KEY` |
|
| Xiaomi MiMo | `xiaomi` | `XIAOMI_API_KEY` |
|
||||||
| Arcee AI | `arcee` | `ARCEEAI_API_KEY` |
|
| Arcee AI | `arcee` | `ARCEEAI_API_KEY` |
|
||||||
|
| GMI Cloud | `gmi` | `GMI_API_KEY` |
|
||||||
| Alibaba / DashScope | `alibaba` | `DASHSCOPE_API_KEY` |
|
| Alibaba / DashScope | `alibaba` | `DASHSCOPE_API_KEY` |
|
||||||
| Hugging Face | `huggingface` | `HF_TOKEN` |
|
| Hugging Face | `huggingface` | `HF_TOKEN` |
|
||||||
| Custom endpoint | `custom` | `base_url` + `key_env` (see below) |
|
| Custom endpoint | `custom` | `base_url` + `key_env` (see below) |
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user