From 7e7871875cc3d3b39ea2d7e99af470df9ca70988 Mon Sep 17 00:00:00 2001
From: Hongming Wang <hongmingwangrabbit@gmail.com>
Date: Wed, 22 Apr 2026 12:05:36 -0700
Subject: [PATCH] =?UTF-8?q?fix(v2.1.0):=20startup=20bugs=20+=20full=20prov?=
 =?UTF-8?q?ider=20matrix=20=E2=80=94=20proven=20end-to-end=20(#6)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stacked follow-up on the v2.0.0 rewrite. The merged v2.0.0 template
had three latent issues that only surfaced during local E2E testing:

1) sudo → gosu (python:3.11-slim ships neither; only gosu was in
   the Dockerfile). start.sh was calling sudo which would have
   broken every container boot.

2) PATH pointed at /home/agent/.hermes/bin which doesn't exist —
   install.sh symlinks ~/.local/bin/hermes. Installer is also
   interactive by default; needs --skip-setup to run in docker build.

3) start.sh wrote ~/.hermes/cli-config.yaml but hermes-agent reads
   ~/.hermes/config.yaml. cli-config.yaml.example is just a starter
   file — install.sh copies it to config.yaml on first boot. Without
   our overwrite the template inherited the example default
   (anthropic/claude-opus-4.6 + provider: auto) instead of the
   workspace's chosen model. We now rewrite config.yaml every boot
   from HERMES_DEFAULT_MODEL + HERMES_INFERENCE_PROVIDER env.

Also:
- Added xz-utils + build-essential to the image (hermes installer
  extracts a Node 22 .tar.xz and some Python deps in .[all] build
  from source).
- Forward every provider key hermes-agent knows about, not just
  the 6 from v2.0.0. All ~22 providers documented in the official
  website/docs/integrations/providers.md are now wired:
    HERMES_API_KEY, NOUS_API_KEY, OPENROUTER_API_KEY, OPENAI_API_KEY,
    ANTHROPIC_API_KEY, GEMINI_API_KEY, GOOGLE_API_KEY, DEEPSEEK_API_KEY,
    GLM_API_KEY, KIMI_API_KEY, KIMI_CN_API_KEY, MINIMAX_API_KEY,
    MINIMAX_CN_API_KEY, DASHSCOPE_API_KEY, XIAOMI_API_KEY,
    ARCEEAI_API_KEY, NVIDIA_API_KEY, OLLAMA_API_KEY, HF_TOKEN,
    AI_GATEWAY_API_KEY, KILOCODE_API_KEY, OPENCODE_ZEN_API_KEY,
    OPENCODE_GO_API_KEY, COPILOT_GITHUB_TOKEN, GH_TOKEN
- config.yaml models[] list expanded to 30+ entries covering every
  provider family (Hermes 3/4, Anthropic direct, OpenAI via
  OpenRouter, Gemini direct, DeepSeek, GLM, Kimi, MiniMax global+CN,
  Qwen/DashScope, Xiaomi MiMo, Arcee Trinity, NVIDIA NIM, Ollama
  Cloud, Hugging Face catch-all, Vercel AI Gateway, OpenCode Zen+Go,
  Kilo Code, OpenRouter catch-all, custom/local).
- top-level required_env: [] — hermes supports too many providers
  for a single hardcoded requirement; per-model required_env in
  the canvas Config tab drives the real UX. hermes-agent itself
  errors loud at request time if zero providers are configured.
- HERMES_CUSTOM_BASE_URL / HERMES_CUSTOM_API_KEY env support in
  start.sh — lets operators point hermes at OpenAI direct, LM Studio,
  LiteLLM, any OpenAI-compat endpoint without exec-ing into the
  container.
- HERMES_INFERENCE_PROVIDER env — forces a specific provider,
  overriding hermes' auto-detection (which routes OPENAI_API_KEY
  to openai-codex OAuth path → 401 Missing Authentication header).
- docs/CONFIGURATION.md rewritten with the full provider matrix,
  OAuth flow, forcing a provider, auxiliary model, persistence
  layout, and the common routing gotchas surfaced during testing.
- docs/ARCHITECTURE.md adds "Provider routing (how keys become
  inference)" section.

Proved end-to-end on local Docker:
  [start.sh] hermes gateway ready on :8642 (pid 22)
  Uvicorn running on http://0.0.0.0:8000
  → A2A message/send "Respond with HERMES BRIDGE WORKING END TO END"
  ← HERMES BRIDGE WORKING END TO END — (via OpenAI Responses API)
  → "Run uname -a && whoami && pwd using your terminal tool"
  ← Linux 094f72... aarch64 GNU/Linux / agent / /home/agent
     (real tool call — not chat response)

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Dockerfile            |  27 +++++--
 config.yaml           | 179 ++++++++++++++++++++++++++++++++++--------
 docs/ARCHITECTURE.md  |  36 +++++++++
 docs/CONFIGURATION.md | 161 +++++++++++++++++++++++++++----------
 start.sh              | 104 ++++++++++++++++++++----
 5 files changed, 410 insertions(+), 97 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index deb0fdf..0b55ac3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,9 +1,14 @@
 FROM python:3.11-slim
 
-# System deps: curl for the hermes installer, git for the agent's file/repo
-# tools, gosu so start.sh can drop privileges, ca-certificates for TLS.
+# System deps:
+#   curl         — hermes installer + loopback health probe in start.sh
+#   ca-certificates — TLS for all the outbound installs
+#   git          — hermes installer clones the repo; also used by agent tools
+#   gosu         — drop privileges in start.sh (single-process friendly)
+#   xz-utils     — hermes installer extracts a Node 22 tarball (.tar.xz)
+#   build-essential — some python deps in hermes `.[all]` extra compile from src
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl ca-certificates git gosu \
+    curl ca-certificates git gosu xz-utils build-essential \
     && rm -rf /var/lib/apt/lists/*
 
 # Non-root agent user. hermes-agent writes its state into ~/.hermes so
@@ -26,13 +31,19 @@ COPY start.sh /usr/local/bin/start.sh
 RUN chmod +x /usr/local/bin/start.sh
 
 # --- Install the real Nous Research hermes-agent as the agent user ---
-# The installer lives under the agent's home (~/.hermes, PATH update in
-# .bashrc). Running as root would place it in /root and break discovery.
+# The installer lives under the agent's home (~/.hermes, symlinks the
+# `hermes` entrypoint into ~/.local/bin/). Running as root would place
+# it in /root and break discovery.
+#   --skip-setup → no interactive wizard (curl|bash is non-tty anyway
+#                  but the installer treats this as "run anyway" by
+#                  default; passing it explicitly avoids surprises).
 USER agent
 WORKDIR /home/agent
-RUN curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
-# Make `hermes` available in non-interactive shells (start.sh).
-ENV PATH="/home/agent/.local/bin:/home/agent/.hermes/bin:${PATH}"
+RUN curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh \
+      | bash -s -- --skip-setup
+# hermes installer symlinks ~/.hermes/hermes-agent/venv/bin/hermes into
+# ~/.local/bin/hermes, so ~/.local/bin is the only PATH entry we need.
+ENV PATH="/home/agent/.local/bin:${PATH}"
 
 USER root
 WORKDIR /app
diff --git a/config.yaml b/config.yaml
index 6a214e5..57ef6df 100644
--- a/config.yaml
+++ b/config.yaml
@@ -6,79 +6,186 @@ description: >-
   behind an A2A bridge. The agent exposes its OpenAI-compatible API on
   localhost:8642 and molecule_runtime proxies messages to it on :8000.
 
-  Any model `hermes-agent` supports is selectable at runtime via
-  `hermes model` — Nous Portal, OpenRouter, OpenAI, Anthropic, Gemini,
-  xAI, MiniMax, Qwen, DeepSeek, Groq, NVIDIA NIM, Kimi, Mistral, and more.
-version: 2.0.0
+  Supports every provider hermes-agent supports — see docs/CONFIGURATION.md
+  for the full list. Any model hermes-agent accepts is selectable at
+  runtime via `hermes model`, or from the canvas Config tab.
+version: 2.1.0
 tier: 2
 
 runtime: hermes
 runtime_config:
-  # Default model. `hermes-agent` resolves provider + auth from its own
-  # config — canvas just passes the model ID through so the user can switch
-  # without leaving the UI. Override per-workspace in the Config tab.
+  # Default model. hermes-agent owns provider resolution via the
+  # HERMES_INFERENCE_PROVIDER env or its own `hermes model` wizard.
+  # Override per-workspace in the canvas Config tab.
   model: nousresearch/hermes-4-70b
 
   # Canvas surfaces this list as a Model dropdown and auto-populates
-  # Required Env Vars based on the selected entry. hermes-agent itself
-  # accepts any model string and picks the provider by prefix/scheme.
+  # Required Env Vars from the selected entry's required_env. Provider
+  # names in parentheses mirror hermes-agent's CLI provider field; see
+  # docs/CONFIGURATION.md#provider-matrix for the full mapping.
   models:
-    # --- Hermes 4 (Nous Research) ---
+    # ── Nous Research (Hermes 4) via Nous Portal ──────────────────────
     - id: nousresearch/hermes-4-70b
       name: Hermes 4 70B (Nous Portal)
       required_env: [HERMES_API_KEY]
     - id: nousresearch/hermes-4-405b
       name: Hermes 4 405B (Nous Portal)
       required_env: [HERMES_API_KEY]
-    # --- Hermes 3 (Nous Research, legacy) ---
+    - id: nousresearch/hermes-4-14b
+      name: Hermes 4 14B (Nous Portal)
+      required_env: [HERMES_API_KEY]
+
+    # ── Hermes 3 family (via OpenRouter) ─────────────────────────────
     - id: nousresearch/hermes-3-llama-3.1-70b
-      name: Hermes 3 70B (via OpenRouter)
+      name: Hermes 3 70B (OpenRouter)
       required_env: [OPENROUTER_API_KEY]
     - id: nousresearch/hermes-3-llama-3.1-405b
-      name: Hermes 3 405B (via OpenRouter)
+      name: Hermes 3 405B (OpenRouter)
       required_env: [OPENROUTER_API_KEY]
 
-    # --- Anthropic (direct, native SDK in hermes-agent) ---
+    # ── Anthropic (native SDK inside hermes-agent) ────────────────────
     - id: anthropic/claude-sonnet-4-5
       name: Claude Sonnet 4.5 (direct)
       required_env: [ANTHROPIC_API_KEY]
     - id: anthropic/claude-opus-4-1
       name: Claude Opus 4.1 (direct)
       required_env: [ANTHROPIC_API_KEY]
+    - id: anthropic/claude-haiku-4-5
+      name: Claude Haiku 4.5 (direct)
+      required_env: [ANTHROPIC_API_KEY]
 
-    # --- OpenAI (direct) ---
+    # ── OpenAI (via OpenRouter — hermes has no direct openai provider;
+    #    openai-codex is OAuth-only for Codex models) ──────────────────
     - id: openai/gpt-5
-      name: GPT-5 (direct)
-      required_env: [OPENAI_API_KEY]
+      name: GPT-5 (via OpenRouter)
+      required_env: [OPENROUTER_API_KEY]
+    - id: openai/gpt-5-mini
+      name: GPT-5 mini (via OpenRouter)
+      required_env: [OPENROUTER_API_KEY]
+    - id: openai/gpt-4o
+      name: GPT-4o (via OpenRouter)
+      required_env: [OPENROUTER_API_KEY]
+    - id: openai/gpt-4o-mini
+      name: GPT-4o mini (via OpenRouter)
+      required_env: [OPENROUTER_API_KEY]
 
-    # --- Gemini (direct, native SDK in hermes-agent) ---
+    # ── Google Gemini (native SDK) ────────────────────────────────────
     - id: gemini/gemini-2.5-pro
       name: Gemini 2.5 Pro (direct)
       required_env: [GEMINI_API_KEY]
+    - id: gemini/gemini-2.5-flash
+      name: Gemini 2.5 Flash (direct)
+      required_env: [GEMINI_API_KEY]
 
-    # --- MiniMax (direct) — reused from prior template version ---
-    - id: MiniMax-M2.7
-      name: MiniMax M2.7 (direct, ~197K ctx, coding-tuned)
+    # ── DeepSeek (direct) ─────────────────────────────────────────────
+    - id: deepseek/deepseek-v3.2
+      name: DeepSeek V3.2 (direct)
+      required_env: [DEEPSEEK_API_KEY]
+    - id: deepseek/deepseek-r1
+      name: DeepSeek R1 reasoning (direct)
+      required_env: [DEEPSEEK_API_KEY]
+
+    # ── z.ai / GLM ────────────────────────────────────────────────────
+    - id: zai/glm-4.6
+      name: GLM 4.6 (z.ai)
+      required_env: [GLM_API_KEY]
+
+    # ── Kimi / Moonshot ───────────────────────────────────────────────
+    - id: kimi-coding/kimi-k2
+      name: Kimi K2 (Moonshot)
+      required_env: [KIMI_API_KEY]
+
+    # ── MiniMax (global + China) ─────────────────────────────────────
+    - id: minimax/MiniMax-M2.7
+      name: MiniMax M2.7 (direct, coding-tuned)
       required_env: [MINIMAX_API_KEY]
-    - id: MiniMax-M2.7-highspeed
+    - id: minimax/MiniMax-M2.7-highspeed
       name: MiniMax M2.7 highspeed (Token Plan only)
       required_env: [MINIMAX_API_KEY]
+    - id: minimax/MiniMax-M1
+      name: MiniMax M1 (1M ctx)
+      required_env: [MINIMAX_API_KEY]
+    - id: minimax-cn/abab6.5-chat
+      name: MiniMax China (abab6.5)
+      required_env: [MINIMAX_CN_API_KEY]
 
-    # --- Any OpenRouter-exposed model (catch-all) ---
+    # ── Alibaba Cloud / Qwen (DashScope) ─────────────────────────────
+    - id: alibaba/qwen3-max
+      name: Qwen 3 Max (Alibaba Cloud)
+      required_env: [DASHSCOPE_API_KEY]
+    - id: alibaba/qwen3-coder
+      name: Qwen 3 Coder (Alibaba Cloud)
+      required_env: [DASHSCOPE_API_KEY]
+
+    # ── Xiaomi MiMo ──────────────────────────────────────────────────
+    - id: xiaomi/mimo-v1
+      name: Xiaomi MiMo v1
+      required_env: [XIAOMI_API_KEY]
+
+    # ── Arcee Trinity ────────────────────────────────────────────────
+    - id: arcee/trinity-70b
+      name: Arcee Trinity 70B
+      required_env: [ARCEEAI_API_KEY]
+
+    # ── NVIDIA NIM ────────────────────────────────────────────────────
+    - id: nvidia/nemotron-70b
+      name: Nemotron 70B (NVIDIA NIM)
+      required_env: [NVIDIA_API_KEY]
+
+    # ── Ollama Cloud ─────────────────────────────────────────────────
+    - id: ollama-cloud/llama-3.3-70b
+      name: Llama 3.3 70B (Ollama Cloud)
+      required_env: [OLLAMA_API_KEY]
+
+    # ── Hugging Face Inference ───────────────────────────────────────
+    - id: huggingface/*
+      name: Any Hugging Face model (set ID per workspace)
+      required_env: [HF_TOKEN]
+
+    # ── Vercel AI Gateway ────────────────────────────────────────────
+    - id: ai-gateway/*
+      name: Any Vercel AI Gateway model
+      required_env: [AI_GATEWAY_API_KEY]
+
+    # ── OpenCode Zen / Go ────────────────────────────────────────────
+    - id: opencode-zen/*
+      name: OpenCode Zen (set model per workspace)
+      required_env: [OPENCODE_ZEN_API_KEY]
+    - id: opencode-go/*
+      name: OpenCode Go (set model per workspace)
+      required_env: [OPENCODE_GO_API_KEY]
+
+    # ── Kilo Code ────────────────────────────────────────────────────
+    - id: kilocode/*
+      name: Kilo Code (set model per workspace)
+      required_env: [KILOCODE_API_KEY]
+
+    # ── OpenRouter catch-all ─────────────────────────────────────────
     - id: openrouter/*
-      name: Any OpenRouter model (set ID per workspace)
+      name: Any OpenRouter model (200+ available)
       required_env: [OPENROUTER_API_KEY]
 
-  # Required env is driven by the selected model above.
-  required_env:
-    - HERMES_API_KEY
+    # ── Custom endpoint (LM Studio / Ollama local / vLLM / llama.cpp) ─
+    - id: custom/*
+      name: Self-hosted OpenAI-compat endpoint (configure base_url in ~/.hermes/config)
+      required_env: []
+
+  # No single required env — hermes-agent supports 20+ providers and
+  # customers pick any one via the canvas Config tab (per-model
+  # required_env above drives the real UX). Molecule-runtime's
+  # preflight enforces AND-semantics on this list, so a non-empty
+  # value here would block workspaces using any non-default provider.
+  # hermes-agent itself errors loud at request time if zero providers
+  # are configured — that's the safety net.
+  required_env: []
 
   # 0 = no timeout; hermes-agent sessions can run long when tool-using.
   timeout: 0
 
-# Tools hermes-agent ships natively — see .hermes/config.yaml inside the
-# container for the full list. These are informational; hermes-agent owns
-# tool selection. Use `hermes tools` to adjust at runtime.
+# Tools hermes-agent ships natively. See `hermes tools` inside the
+# container for the interactive toggle — by default all 17 built-in
+# tool families are available (terminal, file read/write/edit,
+# web fetch+search, memory, skills, subagent spawn, etc.).
 skills: []
 
 a2a:
@@ -86,14 +193,20 @@ a2a:
   streaming: true
   push_notifications: true
 
-# Bridge config — consumed by executor.py.
+# Bridge config — consumed by executor.py and start.sh.
 bridge:
-  # Where the in-container hermes-agent API server listens. Do not change
-  # unless you also change start.sh and API_SERVER_PORT env.
+  # Where the in-container hermes-agent API server listens. Do NOT
+  # change unless you also change start.sh / API_SERVER_PORT.
   hermes_api_base: http://127.0.0.1:8642/v1
   # Bearer token is injected by start.sh as API_SERVER_KEY and read by
   # executor.py from the env at request time — not stored in config.
   hermes_api_key_env: API_SERVER_KEY
+  # Optional: force a specific hermes provider instead of relying on
+  # hermes-agent's `auto` detection. Useful when multiple keys are
+  # set and you want deterministic routing. Set via HERMES_INFERENCE_PROVIDER
+  # env on the container OR hardcode here. Valid values: see
+  # docs/CONFIGURATION.md#provider-matrix.
+  provider: ""
 
 delegation:
   retry_attempts: 3
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index a6ed928..6e76672 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -92,6 +92,42 @@ canvas ─── POST /a2a/... ───▶ molecule_runtime (:8000)
   that's a regression to v1.x.
 - **Tool routing.** Tools are hermes-agent's job. Our bridge sees
   only the final assistant text.
+
+## Provider routing (how keys become inference)
+
+Provider resolution happens inside hermes-agent, driven by:
+
+1. **`~/.hermes/cli-config.yaml`** — `model.provider` field. start.sh
+   seeds this file on first boot (`auto` by default, or whatever
+   `HERMES_INFERENCE_PROVIDER` specifies).
+2. **`~/.hermes/.env`** — every provider key we forward from the
+   container env (see start.sh for the full list; see
+   `CONFIGURATION.md#provider-matrix` for the mapping).
+3. **Auto-detection** — when `provider: auto`, hermes walks its
+   internal resolution order and picks the first provider whose
+   credential is present. When multiple keys are set, prefer explicit
+   `HERMES_INFERENCE_PROVIDER` to avoid surprises.
+
+### Common routing gotcha
+
+With only `OPENAI_API_KEY` set and `provider: auto`, hermes-agent will
+route to `openai-codex` (Codex API, OAuth-only) and return:
+
+```
+401 - Missing Authentication header
+```
+
+The fix is to set `HERMES_INFERENCE_PROVIDER=openrouter` — hermes's
+openrouter provider accepts `OPENAI_API_KEY` as alt-auth and routes
+OpenAI-format Chat Completions correctly. This is documented in
+`CONFIGURATION.md#forcing-a-provider`.
+
+### Auxiliary model
+
+Vision, web summarization, and MoA use a separate auxiliary model —
+defaults to Gemini Flash via OpenRouter. If `OPENROUTER_API_KEY` is
+absent, these capabilities break silently (the primary path still
+works). Set `HERMES_AUXILIARY_PROVIDER` to override.
 - **Streaming.** `stream: false` in the request payload. A later
   revision can upgrade to SSE by subscribing to
   `GET /v1/runs/{run_id}/events` and pushing partial messages into
diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
index 886c392..240d788 100644
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@@ -10,36 +10,116 @@ list in `config.yaml`. When you pick one, canvas writes the selection
 into the workspace's runtime_config; molecule_runtime constructs
 `AdapterConfig.model` from that; the bridge sends it verbatim as the
 `model` field in the OpenAI-compat request payload. hermes-agent
-resolves provider + auth from the string.
+resolves provider + auth from the string (see provider matrix below).
 
 **Via `hermes` CLI** — open the workspace's Terminal tab and run
-`hermes model`. This updates `~/.hermes/config.yaml` inside the
+`hermes model`. This updates `~/.hermes/cli-config.yaml` inside the
 container and affects any subsequent A2A request.
 
 **Which wins** — today the CLI and the bridge are independent.
-If you set the model in the canvas AND in the CLI, each request
+If you set the model in the canvas AND in the CLI, each A2A request
 uses the one the bridge sends (the canvas value). An upcoming PR
 will sync the two; see `ARCHITECTURE.md#future-work`.
 
-## Provider keys
+## Provider matrix
 
-Set one or more of these as workspace-level secrets via
-`POST /settings/secrets` (see monorepo `docs/runbooks/saas-secrets.md`).
-All are forwarded into `~/.hermes/.env` at container boot.
+hermes-agent supports every provider below. Set the corresponding env
+var as a workspace secret (`POST /settings/secrets` — see monorepo
+`docs/runbooks/saas-secrets.md`). start.sh forwards it into
+`~/.hermes/.env` at container boot.
 
-| Env var              | Activates provider                                |
-|----------------------|---------------------------------------------------|
-| `HERMES_API_KEY`     | Nous Portal (Hermes 3, Hermes 4 direct)           |
-| `OPENROUTER_API_KEY` | OpenRouter (200+ models)                          |
-| `ANTHROPIC_API_KEY`  | Claude direct via Anthropic Messages API          |
-| `OPENAI_API_KEY`     | GPT direct                                        |
-| `GEMINI_API_KEY`     | Gemini direct via `google-genai`                  |
-| `MINIMAX_API_KEY`    | MiniMax direct (sk-api-* or sk-cp-* accepted)     |
+### OAuth-based providers
 
-You don't pick the provider yourself. hermes-agent resolves it from
-the `model` string prefix — `anthropic/` → Anthropic, `gemini/` →
-Gemini, `nousresearch/` → Nous Portal (if `HERMES_API_KEY` present)
-falling back to OpenRouter, etc.
+These require `hermes model` to be run interactively (Terminal tab,
+non-piped). Set up once; tokens stored at `~/.hermes/auth/`.
+
+| Provider                | How to set up                                                                   |
+|-------------------------|---------------------------------------------------------------------------------|
+| **Nous Portal**         | `hermes model` → Nous Portal OAuth (subscription)                               |
+| **OpenAI Codex**        | `hermes model` → ChatGPT OAuth (uses GPT-5-Codex family)                        |
+| **GitHub Copilot**      | `hermes model` → OAuth device code, or set `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
+| **Anthropic (Claude Pro/Max)** | `hermes model` → Claude Code auth, or set `ANTHROPIC_API_KEY` for API-key mode |
+| **Google Gemini OAuth** | `hermes model` → "Google Gemini (OAuth)". Free tier, PKCE. See provider-routing docs for GCP-project caveats |
+
+### API-key providers
+
+Just set the env var; hermes-agent picks up the key at boot.
+
+| Provider           | Env var                | Example model IDs                                      |
+|--------------------|------------------------|-------------------------------------------------------|
+| **Nous Portal API**| `HERMES_API_KEY` (or `NOUS_API_KEY`)  | `nousresearch/hermes-4-70b`, `nousresearch/hermes-4-405b`, `nousresearch/hermes-4-14b` |
+| **OpenRouter**     | `OPENROUTER_API_KEY`   | Anything on openrouter.ai (`openai/gpt-5`, `anthropic/claude-sonnet-4-5`, 200+ others) |
+| **OpenAI (via OpenRouter)** | `OPENAI_API_KEY` alt-auth on openrouter | `openai/gpt-5`, `openai/gpt-4o`, `openai/gpt-4o-mini` |
+| **Anthropic**      | `ANTHROPIC_API_KEY`    | `anthropic/claude-sonnet-4-5`, `anthropic/claude-opus-4-1`, `anthropic/claude-haiku-4-5` |
+| **Google Gemini**  | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | `gemini/gemini-2.5-pro`, `gemini/gemini-2.5-flash` |
+| **DeepSeek**       | `DEEPSEEK_API_KEY`     | `deepseek/deepseek-v3.2`, `deepseek/deepseek-r1`        |
+| **z.ai / GLM**     | `GLM_API_KEY`          | `zai/glm-4.6`                                           |
+| **Kimi / Moonshot**| `KIMI_API_KEY` (global), `KIMI_CN_API_KEY` (China) | `kimi-coding/kimi-k2` |
+| **MiniMax**        | `MINIMAX_API_KEY` (global), `MINIMAX_CN_API_KEY` (China) | `minimax/MiniMax-M2.7`, `minimax-cn/abab6.5-chat` |
+| **Alibaba / Qwen** | `DASHSCOPE_API_KEY`    | `alibaba/qwen3-max`, `alibaba/qwen3-coder`              |
+| **Xiaomi MiMo**    | `XIAOMI_API_KEY`       | `xiaomi/mimo-v1`                                        |
+| **Arcee Trinity**  | `ARCEEAI_API_KEY`      | `arcee/trinity-70b`                                     |
+| **NVIDIA NIM**     | `NVIDIA_API_KEY`       | `nvidia/nemotron-70b`                                   |
+| **Ollama Cloud**   | `OLLAMA_API_KEY`       | `ollama-cloud/llama-3.3-70b`                            |
+| **Hugging Face**   | `HF_TOKEN`             | `huggingface/*` (any HF inference model)                |
+| **Vercel AI Gateway** | `AI_GATEWAY_API_KEY` | `ai-gateway/*`                                          |
+| **Kilo Code**      | `KILOCODE_API_KEY`     | `kilocode/*`                                            |
+| **OpenCode Zen**   | `OPENCODE_ZEN_API_KEY` | `opencode-zen/*`                                        |
+| **OpenCode Go**    | `OPENCODE_GO_API_KEY`  | `opencode-go/*`                                         |
+
+### Self-hosted / local
+
+`hermes model` → "Custom endpoint" — any OpenAI-compatible HTTP API.
+Aliases for quick setup: `lmstudio`, `ollama`, `vllm`, `llamacpp`.
+
+```yaml
+# example ~/.hermes/cli-config.yaml override
+model:
+  default: "llama-3.3-70b-instruct"
+  provider: "lmstudio"
+  base_url: "http://host.docker.internal:1234/v1"
+```
+
+No API key needed — local servers typically ignore auth.
+
+## Forcing a provider
+
+By default hermes-agent's provider-selection is `auto` — it walks its
+internal resolution order and picks the first available credential.
+This can route surprising ways when multiple keys are set (e.g. an
+`OPENAI_API_KEY` will fall to `openai-codex` which is OAuth-only and
+returns 401 on API-key auth).
+
+To force a specific provider, set `HERMES_INFERENCE_PROVIDER` on the
+workspace container. start.sh writes it into `~/.hermes/cli-config.yaml`
+and `~/.hermes/.env` at boot. Valid values (from hermes-agent
+`cli-config.yaml.example`):
+
+```
+auto | openrouter | nous | nous-api | anthropic | openai-codex
+copilot | gemini | google-gemini-cli | zai | kimi-coding | kimi-coding-cn
+minimax | minimax-cn | alibaba (aliases: dashscope, qwen)
+arcee | nvidia | xiaomi | huggingface | ollama-cloud
+ai-gateway | kilocode | opencode-zen | opencode-go | deepseek | custom
+```
+
+**Most common choices when multiple keys are present:**
+- `OPENAI_API_KEY` only → `HERMES_INFERENCE_PROVIDER=openrouter` (hermes
+  openrouter accepts OPENAI_API_KEY as alt auth)
+- `ANTHROPIC_API_KEY` only → `anthropic`
+- Mixed keys → `auto` usually works
+
+## Auxiliary model (vision / MoA / summarization)
+
+hermes-agent uses a second, smaller model for vision, web page
+summarization, and mixture-of-agents tool calls. Defaults to
+**Gemini Flash via OpenRouter**. Having `OPENROUTER_API_KEY` set is
+enough; otherwise vision + web-summarize + MoA break silently.
+
+Override the auxiliary path with `HERMES_AUXILIARY_PROVIDER` env —
+start.sh forwards it. See hermes-agent
+[Auxiliary Models docs](https://github.com/NousResearch/hermes-agent/blob/main/website/docs/user-guide/configuration.md)
+for the full field set.
 
 ## Persisting skills + memory
 
@@ -47,30 +127,29 @@ falling back to OpenRouter, etc.
 
 ```
 /home/agent/.hermes/
-├── .env               ← provider keys + API_SERVER_* (regenerated per boot)
-├── config.yaml        ← model, tools, gateway settings
-├── skills/            ← self-improvement loop writes here
-├── sessions/          ← conversation history (FTS5-indexed)
-├── memory/            ← long-lived user model (Honcho + custom)
+├── .env                 ← provider keys + API_SERVER_* (regenerated per boot)
+├── cli-config.yaml      ← model + provider selection (seeded by start.sh if absent)
+├── hermes-agent/        ← the installed project; venv, source, upstream repo
+├── auth/                ← OAuth tokens (Google Gemini OAuth, Copilot, Codex, etc.)
+├── skills/              ← self-improvement loop writes here
+├── sessions/            ← conversation history (FTS5-indexed)
+├── memory/              ← long-lived user model (Honcho + custom)
 └── logs/
 ```
 
-For these to survive a workspace container restart, the platform
-needs a Docker volume mounted at `/home/agent/.hermes`. The default
-provisioner config already handles this — verify with:
+For these to survive a container restart, mount a Docker volume at
+`/home/agent/.hermes`. The platform's default provisioner config does
+this already — verify with:
 
 ```bash
 docker inspect --format='{{json .Mounts}}' <workspace-container-id>
 ```
 
-If `/home/agent/.hermes` is not in the Mounts list, edit the
-workspace's provisioner config in the monorepo.
-
 ## Gateway platforms (advanced)
 
 `hermes-agent` ships with Telegram, Discord, Slack, WhatsApp, Signal,
-and ~10 other platform adapters. v2.0.0 of this template wires only
-the `api_server` platform (required for the A2A bridge).
+and ~10 other platform adapters. v2.x of this template wires only the
+`api_server` platform (required for the A2A bridge).
 
 To enable another platform, customize `~/.hermes/.env` in the workspace:
 
@@ -86,8 +165,8 @@ EOF
 '
 ```
 
-This is not yet surfaced in canvas. Follow the issue tracker for
-first-class gateway-platform support.
+Not yet surfaced in canvas. Follow the issue tracker for first-class
+gateway-platform support.
 
 ## Restarting the gateway
 
@@ -108,12 +187,12 @@ molecule_runtime on :8000 is unaffected.
 
 ```bash
 # What model is the CLI pinned to?
-docker exec -u agent <id> hermes model
+docker exec -u agent <id> hermes model show
 
 # What tools are enabled?
 docker exec -u agent <id> hermes tools
 
-# How is the agent doing?
+# Doctor report (warnings, missing deps, broken providers):
 docker exec -u agent <id> hermes doctor
 
 # Last 200 lines of gateway log:
@@ -122,8 +201,8 @@ docker exec <id> tail -200 /var/log/hermes-gateway.log
 
 ## Bridge timeouts
 
-`executor.py` uses a 600-second httpx timeout. If you run agent
-turns that take longer than 10 minutes (large research tasks with
-many tool calls), bump `_REQUEST_TIMEOUT` in `executor.py` and rebuild
-the image. Don't try to configure this at runtime via env — we keep
-it in code so regressions are version-controlled.
+`executor.py` uses a 600-second httpx timeout. If you run agent turns
+that take longer than 10 minutes (large research tasks with many tool
+calls), bump `_REQUEST_TIMEOUT` in `executor.py` and rebuild the
+image. Don't try to configure this at runtime via env — we keep it in
+code so regressions are version-controlled.
diff --git a/start.sh b/start.sh
index 323c539..b7ecb38 100755
--- a/start.sh
+++ b/start.sh
@@ -12,6 +12,7 @@ set -euo pipefail
 
 HERMES_HOME="/home/agent/.hermes"
 ENV_FILE="${HERMES_HOME}/.env"
+HERMES_CONFIG="${HERMES_HOME}/config.yaml"
 LOG_FILE="/var/log/hermes-gateway.log"
 
 mkdir -p "$(dirname "$LOG_FILE")"
@@ -27,37 +28,110 @@ if [ -z "${API_SERVER_KEY:-}" ]; then
   export API_SERVER_KEY
 fi
 
+install -d -o agent -g agent "$HERMES_HOME"
+
 # --- Write hermes-agent's .env ---
-# API_SERVER_ENABLED must be true and the bearer must match. Provider
-# keys (HERMES_API_KEY / OPENROUTER_API_KEY / ANTHROPIC_API_KEY /
-# OPENAI_API_KEY / GEMINI_API_KEY / MINIMAX_API_KEY) are forwarded from
-# the container env — hermes-agent will pick the right one based on the
-# model selected via `hermes model`.
-sudo -u agent mkdir -p "$HERMES_HOME"
-sudo -u agent tee "$ENV_FILE" >/dev/null <<EOF
+# API_SERVER_ENABLED must be true and the bearer must match. Every
+# provider key hermes-agent knows about is forwarded from the container
+# env IF it's set — see docs/CONFIGURATION.md#provider-matrix for the
+# authoritative list. Adding a new key here also needs a matching
+# required_env entry in config.yaml.
+cat >"$ENV_FILE" <<EOF
 API_SERVER_ENABLED=true
 API_SERVER_KEY=${API_SERVER_KEY}
 API_SERVER_HOST=${API_SERVER_HOST:-127.0.0.1}
 API_SERVER_PORT=${API_SERVER_PORT:-8642}
+# Provider-selection override (optional; empty = hermes auto-detect).
+${HERMES_INFERENCE_PROVIDER:+HERMES_INFERENCE_PROVIDER=${HERMES_INFERENCE_PROVIDER}}
+# Auxiliary model defaults — used by vision, web summarization, MoA.
+${HERMES_AUXILIARY_PROVIDER:+HERMES_AUXILIARY_PROVIDER=${HERMES_AUXILIARY_PROVIDER}}
+# ── Primary inference providers (keyed) ───────────────────────
 ${HERMES_API_KEY:+HERMES_API_KEY=${HERMES_API_KEY}}
+${NOUS_API_KEY:+NOUS_API_KEY=${NOUS_API_KEY}}
 ${OPENROUTER_API_KEY:+OPENROUTER_API_KEY=${OPENROUTER_API_KEY}}
-${ANTHROPIC_API_KEY:+ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}}
 ${OPENAI_API_KEY:+OPENAI_API_KEY=${OPENAI_API_KEY}}
+${ANTHROPIC_API_KEY:+ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}}
 ${GEMINI_API_KEY:+GEMINI_API_KEY=${GEMINI_API_KEY}}
+${GOOGLE_API_KEY:+GOOGLE_API_KEY=${GOOGLE_API_KEY}}
+${DEEPSEEK_API_KEY:+DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY}}
+${GLM_API_KEY:+GLM_API_KEY=${GLM_API_KEY}}
+${KIMI_API_KEY:+KIMI_API_KEY=${KIMI_API_KEY}}
+${KIMI_CN_API_KEY:+KIMI_CN_API_KEY=${KIMI_CN_API_KEY}}
 ${MINIMAX_API_KEY:+MINIMAX_API_KEY=${MINIMAX_API_KEY}}
+${MINIMAX_CN_API_KEY:+MINIMAX_CN_API_KEY=${MINIMAX_CN_API_KEY}}
+${DASHSCOPE_API_KEY:+DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}}
+${XIAOMI_API_KEY:+XIAOMI_API_KEY=${XIAOMI_API_KEY}}
+${ARCEEAI_API_KEY:+ARCEEAI_API_KEY=${ARCEEAI_API_KEY}}
+${NVIDIA_API_KEY:+NVIDIA_API_KEY=${NVIDIA_API_KEY}}
+${OLLAMA_API_KEY:+OLLAMA_API_KEY=${OLLAMA_API_KEY}}
+${HF_TOKEN:+HF_TOKEN=${HF_TOKEN}}
+${AI_GATEWAY_API_KEY:+AI_GATEWAY_API_KEY=${AI_GATEWAY_API_KEY}}
+${KILOCODE_API_KEY:+KILOCODE_API_KEY=${KILOCODE_API_KEY}}
+${OPENCODE_ZEN_API_KEY:+OPENCODE_ZEN_API_KEY=${OPENCODE_ZEN_API_KEY}}
+${OPENCODE_GO_API_KEY:+OPENCODE_GO_API_KEY=${OPENCODE_GO_API_KEY}}
+# GitHub Copilot (OAuth or token)
+${COPILOT_GITHUB_TOKEN:+COPILOT_GITHUB_TOKEN=${COPILOT_GITHUB_TOKEN}}
+${GH_TOKEN:+GH_TOKEN=${GH_TOKEN}}
 EOF
+chown agent:agent "$ENV_FILE"
+chmod 600 "$ENV_FILE"
+
+# --- Seed a minimal ~/.hermes/config.yaml if not already present ---
+# The container image runs install.sh with --skip-setup so no config
+# is generated at build time. Without an explicit provider, hermes
+# errors at request time with "No LLM provider configured" even when
+# a provider key is present in .env — the config.yaml is the primary
+# source of truth, .env only holds keys.
+#
+# Writing an explicit provider here also avoids the auto-detect
+# falling through to openai-codex (OAuth-only) when OPENAI_API_KEY is
+# set but OPENROUTER_API_KEY isn't — source of the 401 "Missing
+# Authentication header" in early testing.
+# Unconditionally overwrite — the hermes installer drops its
+# `cli-config.yaml.example` in place as `~/.hermes/config.yaml`
+# (defaulting to anthropic/claude-opus-4.6 + provider:auto) which
+# doesn't match the workspace's intended model. Our template owns
+# the selection; operators override via HERMES_INFERENCE_PROVIDER
+# + HERMES_DEFAULT_MODEL env, or by editing config.yaml at runtime
+# inside the container.
+PROVIDER="${HERMES_INFERENCE_PROVIDER:-auto}"
+DEFAULT_MODEL="${HERMES_DEFAULT_MODEL:-nousresearch/hermes-4-70b}"
+{
+  echo "# Seeded by molecule template-hermes start.sh. Customize via"
+  echo "# \`hermes config edit\` or by editing this file directly."
+  echo "# start.sh rewrites model.default + model.provider on every"
+  echo "# boot from HERMES_DEFAULT_MODEL / HERMES_INFERENCE_PROVIDER env."
+  echo "model:"
+  echo "  default: \"${DEFAULT_MODEL}\""
+  echo "  provider: \"${PROVIDER}\""
+  # For custom provider (or its aliases lmstudio/ollama/vllm/llamacpp),
+  # let operators pipe the base_url and api_key through env. Useful for
+  # pointing at a non-OpenRouter OpenAI-compat endpoint (OpenAI direct,
+  # LiteLLM gateway, LM Studio, local vLLM, etc.).
+  if [ -n "${HERMES_CUSTOM_BASE_URL:-}" ]; then
+    echo "  base_url: \"${HERMES_CUSTOM_BASE_URL}\""
+  fi
+  if [ -n "${HERMES_CUSTOM_API_KEY:-}" ]; then
+    echo "  api_key: \"${HERMES_CUSTOM_API_KEY}\""
+  fi
+} >"$HERMES_CONFIG"
+chown agent:agent "$HERMES_CONFIG"
 
 # --- Start hermes gateway in the background ---
 # `hermes gateway` reads ~/.hermes/.env at startup. We run it as the
-# agent user so memory/skills land in the agent-owned home.
-nohup sudo -u agent -E bash -lc "hermes gateway" >>"$LOG_FILE" 2>&1 &
+# agent user via gosu so memory/skills land in the agent-owned home.
+# `bash -lc` forces a login shell so .profile / .bashrc add ~/.local/bin
+# to PATH (that's where install.sh symlinks the hermes binary).
+nohup gosu agent bash -lc "cd /home/agent && hermes gateway" \
+    >>"$LOG_FILE" 2>&1 &
 GATEWAY_PID=$!
 
 # --- Wait for :8642 readiness ---
-# Max 60s — enough for a cold gateway boot including first-time DB
-# migrations. Longer waits should surface as a provisioning failure
-# upstream rather than silently holding the container.
-for _ in $(seq 1 60); do
+# Max 120s — enough for a cold gateway boot including first-time DB
+# migrations and session-store init. Longer waits should surface as a
+# provisioning failure upstream rather than silently holding the container.
+READY_TIMEOUT=120
+for _ in $(seq 1 $READY_TIMEOUT); do
   if curl -fsS "http://127.0.0.1:${API_SERVER_PORT:-8642}/health" >/dev/null 2>&1; then
     break
   fi
@@ -70,7 +144,7 @@ for _ in $(seq 1 60); do
 done
 
 if ! curl -fsS "http://127.0.0.1:${API_SERVER_PORT:-8642}/health" >/dev/null 2>&1; then
-  echo "[start.sh] hermes gateway failed to reach /health within 60s." >&2
+  echo "[start.sh] hermes gateway failed to reach /health within ${READY_TIMEOUT}s." >&2
   tail -80 "$LOG_FILE" >&2
   exit 1
 fi