Three small but real cleanups against hermes-agent v0.12.0 (NousResearch/hermes-agent, 2026-04-30): 1. Rename HERMES_DEFAULT_MODEL -> HERMES_INFERENCE_MODEL (upstream's actual env name). Reads BOTH for one release cycle so workspace-server (which still writes the legacy name) doesn't break — drop the legacy fallback after workspace-server is updated in a follow-up PR. 2. Drop HERMES_API_KEY from start.sh's .env heredoc. That var only feeds hermes-agent's TUI gateway bridge, NOT any LLM provider. Provider credentials go through OPENROUTER_API_KEY / OPENAI_API_KEY / etc. 3. Add 12 missing provider prefixes to derive-provider.sh so model slugs like xai/grok-4, bedrock/anthropic.claude-sonnet-4, lmstudio/local, copilot/gpt-4o, etc., route to the correct provider instead of falling through to "auto". New tests/test_derive_provider.sh — 26 sh-style assertions covering the legacy fallback, the precedence rule, all 12 new providers, and a few regression cases for adjacent prefixes (minimax vs minimax-oauth, qwen vs qwen-oauth, alibaba vs alibaba-coding-plan). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
294 lines
15 KiB
Bash
Executable File
294 lines
15 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Boot both processes inside the workspace container:
|
|
# 1. The real hermes-agent gateway with the OpenAI-compat API server
|
|
# platform enabled, listening on 127.0.0.1:8642.
|
|
# 2. molecule-runtime (our A2A server + bridge adapter) on :8000.
|
|
#
|
|
# The two talk over loopback. The platform only exposes :8000 — the
|
|
# hermes-agent API is an internal implementation detail and never
|
|
# reachable from outside the container.
|
|
|
|
set -euo pipefail
|
|
|
|
# Boot-smoke contract (molecule-core#2275): the publish-image gate
|
|
# invokes the runtime with stub creds and no network so it can
|
|
# exercise lazy imports inside executor.execute(). The hermes
|
|
# gateway needs valid creds + a writable log file, neither of which
|
|
# exist in the smoke env. Skip directly to molecule-runtime — the
|
|
# runtime's smoke_mode short-circuit fires after create_executor()
|
|
# returns and exits before any A2A traffic is attempted. Real
|
|
# production boots are unaffected.
|
|
if [ "${MOLECULE_SMOKE_MODE:-0}" = "1" ]; then
|
|
echo "[start.sh] MOLECULE_SMOKE_MODE=1 — skipping hermes gateway spawn"
|
|
exec molecule-runtime
|
|
fi
|
|
|
|
HERMES_HOME="/tmp/.hermes"
|
|
ENV_FILE="${HERMES_HOME}/.env"
|
|
HERMES_CONFIG="${HERMES_HOME}/config.yaml"
|
|
LOG_FILE="/tmp/hermes-gateway.log"
|
|
|
|
mkdir -p "$(dirname "$LOG_FILE")"
|
|
touch "$LOG_FILE"
|
|
chown agent:agent "$LOG_FILE"
|
|
|
|
# --- Generate a per-container API_SERVER_KEY ---
|
|
# hermes-agent requires a bearer token on the api-server platform. We
|
|
# generate a random value per boot and inject it into both processes via
|
|
# env — molecule_runtime's executor reads the same var at request time.
|
|
if [ -z "${API_SERVER_KEY:-}" ]; then
|
|
API_SERVER_KEY="$(head -c 32 /dev/urandom | base64 | tr -d '/+=' | head -c 40)"
|
|
export API_SERVER_KEY
|
|
fi
|
|
|
|
install -d -o agent -g agent "$HERMES_HOME"
|
|
|
|
# --- Write hermes-agent's .env ---
|
|
# API_SERVER_ENABLED must be true and the bearer must match. Every
|
|
# provider key hermes-agent knows about is forwarded from the container
|
|
# env IF it's set — see docs/CONFIGURATION.md#provider-matrix for the
|
|
# authoritative list. Adding a new key here also needs a matching
|
|
# required_env entry in config.yaml.
|
|
cat >"$ENV_FILE" <<EOF
|
|
API_SERVER_ENABLED=true
|
|
API_SERVER_KEY=${API_SERVER_KEY}
|
|
API_SERVER_HOST=${API_SERVER_HOST:-127.0.0.1}
|
|
API_SERVER_PORT=${API_SERVER_PORT:-8642}
|
|
# Provider-selection override (optional; empty = hermes auto-detect).
|
|
${HERMES_INFERENCE_PROVIDER:+HERMES_INFERENCE_PROVIDER=${HERMES_INFERENCE_PROVIDER}}
|
|
# Auxiliary model defaults — used by vision, web summarization, MoA.
|
|
${HERMES_AUXILIARY_PROVIDER:+HERMES_AUXILIARY_PROVIDER=${HERMES_AUXILIARY_PROVIDER}}
|
|
# ── Primary inference providers (keyed) ───────────────────────
|
|
# NOTE: HERMES_API_KEY intentionally NOT forwarded — upstream uses it only for
|
|
# the TUI gateway bridge, not as an LLM credential. Provider keys go below
|
|
# (NOUS_API_KEY, OPENROUTER_API_KEY, OPENAI_API_KEY, …).
|
|
${NOUS_API_KEY:+NOUS_API_KEY=${NOUS_API_KEY}}
|
|
${OPENROUTER_API_KEY:+OPENROUTER_API_KEY=${OPENROUTER_API_KEY}}
|
|
${OPENAI_API_KEY:+OPENAI_API_KEY=${OPENAI_API_KEY}}
|
|
${ANTHROPIC_API_KEY:+ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}}
|
|
${GEMINI_API_KEY:+GEMINI_API_KEY=${GEMINI_API_KEY}}
|
|
${GOOGLE_API_KEY:+GOOGLE_API_KEY=${GOOGLE_API_KEY}}
|
|
${DEEPSEEK_API_KEY:+DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY}}
|
|
${GLM_API_KEY:+GLM_API_KEY=${GLM_API_KEY}}
|
|
${KIMI_API_KEY:+KIMI_API_KEY=${KIMI_API_KEY}}
|
|
${KIMI_CN_API_KEY:+KIMI_CN_API_KEY=${KIMI_CN_API_KEY}}
|
|
${MINIMAX_API_KEY:+MINIMAX_API_KEY=${MINIMAX_API_KEY}}
|
|
${MINIMAX_CN_API_KEY:+MINIMAX_CN_API_KEY=${MINIMAX_CN_API_KEY}}
|
|
${DASHSCOPE_API_KEY:+DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}}
|
|
${XIAOMI_API_KEY:+XIAOMI_API_KEY=${XIAOMI_API_KEY}}
|
|
${ARCEEAI_API_KEY:+ARCEEAI_API_KEY=${ARCEEAI_API_KEY}}
|
|
${NVIDIA_API_KEY:+NVIDIA_API_KEY=${NVIDIA_API_KEY}}
|
|
${OLLAMA_API_KEY:+OLLAMA_API_KEY=${OLLAMA_API_KEY}}
|
|
${HF_TOKEN:+HF_TOKEN=${HF_TOKEN}}
|
|
${AI_GATEWAY_API_KEY:+AI_GATEWAY_API_KEY=${AI_GATEWAY_API_KEY}}
|
|
${KILOCODE_API_KEY:+KILOCODE_API_KEY=${KILOCODE_API_KEY}}
|
|
${OPENCODE_ZEN_API_KEY:+OPENCODE_ZEN_API_KEY=${OPENCODE_ZEN_API_KEY}}
|
|
${OPENCODE_GO_API_KEY:+OPENCODE_GO_API_KEY=${OPENCODE_GO_API_KEY}}
|
|
# GitHub Copilot (OAuth or token)
|
|
${COPILOT_GITHUB_TOKEN:+COPILOT_GITHUB_TOKEN=${COPILOT_GITHUB_TOKEN}}
|
|
${GH_TOKEN:+GH_TOKEN=${GH_TOKEN}}
|
|
EOF
|
|
chown agent:agent "$ENV_FILE"
|
|
chmod 600 "$ENV_FILE"
|
|
|
|
# --- Seed a minimal ~/.hermes/config.yaml if not already present ---
|
|
# The container image runs install.sh with --skip-setup so no config
|
|
# is generated at build time. Without an explicit provider, hermes
|
|
# errors at request time with "No LLM provider configured" even when
|
|
# a provider key is present in .env — the config.yaml is the primary
|
|
# source of truth, .env only holds keys.
|
|
#
|
|
# Writing an explicit provider here also avoids the auto-detect
|
|
# falling through to openai-codex (OAuth-only) when OPENAI_API_KEY is
|
|
# set but OPENROUTER_API_KEY isn't — source of the 401 "Missing
|
|
# Authentication header" in early testing.
|
|
# Unconditionally overwrite — the hermes installer drops its
|
|
# `cli-config.yaml.example` in place as `~/.hermes/config.yaml`
|
|
# (defaulting to anthropic/claude-opus-4.6 + provider:auto) which
|
|
# doesn't match the workspace's intended model. Our template owns
|
|
# the selection; operators override via HERMES_INFERENCE_PROVIDER
|
|
# + HERMES_INFERENCE_MODEL env, or by editing config.yaml at runtime
|
|
# inside the container.
|
|
# Pull HERMES_INFERENCE_MODEL/HERMES_DEFAULT_MODEL + HERMES_INFERENCE_PROVIDER
|
|
# out of /configs/config.yaml (canvas Config tab values, written by CP
|
|
# user-data per task #197). Env-var overrides still win — the helper
|
|
# only sets vars that aren't already set. Sourced for env mutation.
|
|
# Dockerfile COPYs scripts/ to /app/scripts; fall back to /scripts
|
|
# for dev environments running start.sh with a different WORKDIR.
|
|
#
|
|
# Runs BEFORE the API-key-based auto-selection block below so a
|
|
# canvas-set provider/model wins over a key-presence guess. Operators
|
|
# who explicitly picked GLM-4.6 in the UI shouldn't get bumped to
|
|
# anthropic/* just because ANTHROPIC_API_KEY happens to be in env too.
|
|
LOAD_CONFIG_SCRIPT="/app/scripts/load-workspace-config.sh"
|
|
[ -f "$LOAD_CONFIG_SCRIPT" ] || LOAD_CONFIG_SCRIPT="/scripts/load-workspace-config.sh"
|
|
[ -f "$LOAD_CONFIG_SCRIPT" ] && . "$LOAD_CONFIG_SCRIPT"
|
|
|
|
# Pick a default model. The fallback used to be `nousresearch/hermes-4-70b`
|
|
# unconditionally, which derives PROVIDER=openrouter when no Nous key is
|
|
# present — and if OPENROUTER_API_KEY isn't set either, hermes-agent boots
|
|
# with a config that points at a provider with no usable key, then 500s
|
|
# at request time with "No LLM provider configured". Surfaces as a real
|
|
# user-facing error whenever a workspace is provisioned with a single
|
|
# provider key (e.g. just MINIMAX_API_KEY) but no explicit model
|
|
# selection — the canvas's "set key, save, send" flow.
|
|
#
|
|
# Fix: when neither model env var is set and HERMES_INFERENCE_PROVIDER
|
|
# is unset, pick the default model based on which API key is actually
|
|
# present in env. Keeps the behaviour-when-everything-is-set unchanged.
|
|
# Order below is rough preference (direct providers preferred over OR
|
|
# routing for the same model family).
|
|
#
|
|
# We accept BOTH HERMES_INFERENCE_MODEL (upstream's actual env var, see
|
|
# NousResearch/hermes-agent website/docs/reference/environment-variables.md)
|
|
# AND HERMES_DEFAULT_MODEL (legacy name we invented before 2026-05).
|
|
# Workspace-server still writes the legacy name during the migration
|
|
# window — accepting both keeps boots green until that's fixed. Once
|
|
# workspace-server switches over, drop the HERMES_DEFAULT_MODEL fallback.
|
|
if [ -z "${HERMES_INFERENCE_MODEL:-}" ] && [ -z "${HERMES_DEFAULT_MODEL:-}" ] && [ -z "${HERMES_INFERENCE_PROVIDER:-}" ]; then
|
|
if [ -n "${HERMES_API_KEY:-}" ] || [ -n "${NOUS_API_KEY:-}" ]; then
|
|
HERMES_DEFAULT_MODEL="nousresearch/hermes-4-70b"
|
|
elif [ -n "${ANTHROPIC_API_KEY:-}" ]; then
|
|
HERMES_DEFAULT_MODEL="anthropic/claude-sonnet-4-5"
|
|
elif [ -n "${OPENAI_API_KEY:-}" ]; then
|
|
HERMES_DEFAULT_MODEL="openai/gpt-4o"
|
|
elif [ -n "${MINIMAX_API_KEY:-}" ]; then
|
|
HERMES_DEFAULT_MODEL="minimax/MiniMax-M2.7-highspeed"
|
|
elif [ -n "${MINIMAX_CN_API_KEY:-}" ]; then
|
|
HERMES_DEFAULT_MODEL="minimax-cn/abab6.5-chat"
|
|
elif [ -n "${GEMINI_API_KEY:-}" ] || [ -n "${GOOGLE_API_KEY:-}" ]; then
|
|
HERMES_DEFAULT_MODEL="gemini/gemini-2.0-flash"
|
|
elif [ -n "${DEEPSEEK_API_KEY:-}" ]; then
|
|
HERMES_DEFAULT_MODEL="deepseek/deepseek-chat"
|
|
elif [ -n "${KIMI_API_KEY:-}" ]; then
|
|
HERMES_DEFAULT_MODEL="kimi/kimi-k2"
|
|
elif [ -n "${OPENROUTER_API_KEY:-}" ]; then
|
|
HERMES_DEFAULT_MODEL="nousresearch/hermes-4-70b" # routes via OR
|
|
else
|
|
# No provider key at all — keep the historical fallback so the
|
|
# error surfaces as the same "No LLM provider configured" message
|
|
# that operators are familiar with (rather than swapping it for a
|
|
# different obscure error).
|
|
HERMES_DEFAULT_MODEL="nousresearch/hermes-4-70b"
|
|
fi
|
|
echo "[start.sh] no model env was set; auto-selected '${HERMES_DEFAULT_MODEL}' from available API keys"
|
|
fi
|
|
|
|
DEFAULT_MODEL="${HERMES_INFERENCE_MODEL:-${HERMES_DEFAULT_MODEL}}"
|
|
# Derive provider from model slug prefix — shared with install.sh via
|
|
# scripts/derive-provider.sh so Docker + bare-host paths match.
|
|
# Dockerfile COPYs scripts/ to /app/scripts; fall back to /scripts
|
|
# for dev environments that run start.sh with a different WORKDIR.
|
|
DERIVE_SCRIPT="/app/scripts/derive-provider.sh"
|
|
[ -f "$DERIVE_SCRIPT" ] || DERIVE_SCRIPT="/scripts/derive-provider.sh"
|
|
HERMES_INFERENCE_MODEL="${DEFAULT_MODEL}" . "$DERIVE_SCRIPT"
|
|
|
|
# --- OpenAI bridge: custom provider + chat_completions api_mode ---
|
|
# Symmetric with install.sh. See install.sh for the full explanation.
|
|
# hermes has NO native "openai" provider — bridge must use custom+
|
|
# api_mode=chat_completions to get the OpenAI-compat /v1/chat/completions
|
|
# path (not /v1/responses with encrypted_content, which 400s on gpt-4o).
|
|
if [ "${PROVIDER}" = "custom" ] && [ -n "${OPENAI_API_KEY:-}" ] && [ -z "${HERMES_CUSTOM_BASE_URL:-}" ] && [ -z "${HERMES_CUSTOM_API_KEY:-}" ]; then
|
|
export HERMES_CUSTOM_BASE_URL="https://api.openai.com/v1"
|
|
export HERMES_CUSTOM_API_KEY="${OPENAI_API_KEY}"
|
|
export HERMES_CUSTOM_API_MODE="chat_completions"
|
|
DEFAULT_MODEL="${DEFAULT_MODEL#openai/}"
|
|
echo "[start.sh] bridged OPENAI_API_KEY → custom provider @ api.openai.com (api_mode=chat_completions, model=${DEFAULT_MODEL})"
|
|
fi
|
|
|
|
{
|
|
echo "# Seeded by molecule template-hermes start.sh. Customize via"
|
|
echo "# \`hermes config edit\` or by editing this file directly."
|
|
echo "# start.sh rewrites model.default + model.provider on every"
|
|
echo "# boot from HERMES_DEFAULT_MODEL / HERMES_INFERENCE_PROVIDER env."
|
|
echo "model:"
|
|
echo " default: \"${DEFAULT_MODEL}\""
|
|
echo " provider: \"${PROVIDER}\""
|
|
# For custom provider (or its aliases lmstudio/ollama/vllm/llamacpp),
|
|
# let operators pipe the base_url and api_key through env. Useful for
|
|
# pointing at a non-OpenRouter OpenAI-compat endpoint (OpenAI direct,
|
|
# LiteLLM gateway, LM Studio, local vLLM, etc.).
|
|
if [ -n "${HERMES_CUSTOM_BASE_URL:-}" ]; then
|
|
echo " base_url: \"${HERMES_CUSTOM_BASE_URL}\""
|
|
fi
|
|
if [ -n "${HERMES_CUSTOM_API_KEY:-}" ]; then
|
|
echo " api_key: \"${HERMES_CUSTOM_API_KEY}\""
|
|
fi
|
|
# api_mode gates hermes custom-provider request shape:
|
|
# chat_completions → /v1/chat/completions (OpenAI-compat)
|
|
# codex_responses → /v1/responses + encrypted_content (o1 only)
|
|
if [ -n "${HERMES_CUSTOM_API_MODE:-}" ]; then
|
|
echo " api_mode: \"${HERMES_CUSTOM_API_MODE}\""
|
|
fi
|
|
# --- Molecule A2A platform plugin ---
|
|
# Loaded into hermes via the hermes_agent.plugins entry point baked
|
|
# into the image (see Dockerfile). When enabled, hermes opens a
|
|
# localhost HTTP listener on MOLECULE_A2A_PLATFORM_PORT; molecule-runtime
|
|
# POSTs A2A peer messages there and gets agent replies back via the
|
|
# callback_url. Independent of the OpenAI-compat api-server platform
|
|
# on :8642 — both run side-by-side. The runtime adapter still uses
|
|
# the api-server bridge today; switching to the plugin path is a
|
|
# separate adapter.py change (post-demo).
|
|
if [ "${MOLECULE_A2A_PLATFORM_ENABLED:-true}" = "true" ]; then
|
|
# Default the plugin's callback URL to the executor's reply
|
|
# server (started by adapter.create_executor → executor.start()).
|
|
# Operators can pin a custom URL via env if molecule-runtime is
|
|
# extended to host /a2a/reply itself.
|
|
DEFAULT_CALLBACK="http://${MOLECULE_A2A_CALLBACK_HOST:-127.0.0.1}:${MOLECULE_A2A_CALLBACK_PORT:-8646}/a2a/reply"
|
|
echo "platforms:"
|
|
echo " molecule-a2a:"
|
|
echo " enabled: true"
|
|
echo " extra:"
|
|
echo " host: \"${MOLECULE_A2A_PLATFORM_HOST:-127.0.0.1}\""
|
|
echo " port: ${MOLECULE_A2A_PLATFORM_PORT:-8645}"
|
|
echo " callback_url: \"${MOLECULE_A2A_PLATFORM_CALLBACK_URL:-${DEFAULT_CALLBACK}}\""
|
|
if [ -n "${MOLECULE_A2A_PLATFORM_SHARED_SECRET:-}" ]; then
|
|
echo " shared_secret: \"${MOLECULE_A2A_PLATFORM_SHARED_SECRET}\""
|
|
fi
|
|
fi
|
|
} >"$HERMES_CONFIG"
|
|
chown agent:agent "$HERMES_CONFIG"
|
|
|
|
# --- Start hermes gateway in the background ---
|
|
# `hermes gateway` reads ~/.hermes/.env at startup. We override HOME to
|
|
# /tmp so the lookup resolves to /tmp/.hermes/.env (writable; matches
|
|
# HERMES_HOME above). The hermes binary is on PATH via /home/agent/.local/bin
|
|
# (set in Dockerfile) — that location is read-only under T1 sandbox but
|
|
# binary lookup only needs read.
|
|
# Use bash -c (not -lc) since we no longer want the login-shell HOME-driven
|
|
# defaults; we're explicitly setting PATH + HOME inline.
|
|
nohup gosu agent env HOME=/tmp PATH="/home/agent/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" \
|
|
bash -c "cd /tmp && hermes gateway" \
|
|
>>"$LOG_FILE" 2>&1 &
|
|
GATEWAY_PID=$!
|
|
|
|
# --- Wait for :8642 readiness ---
|
|
# Max 120s — enough for a cold gateway boot including first-time DB
|
|
# migrations and session-store init. Longer waits should surface as a
|
|
# provisioning failure upstream rather than silently holding the container.
|
|
READY_TIMEOUT=120
|
|
for _ in $(seq 1 $READY_TIMEOUT); do
|
|
if curl -fsS "http://127.0.0.1:${API_SERVER_PORT:-8642}/health" >/dev/null 2>&1; then
|
|
break
|
|
fi
|
|
if ! kill -0 "$GATEWAY_PID" 2>/dev/null; then
|
|
echo "[start.sh] hermes gateway exited during boot. Last log lines:" >&2
|
|
tail -40 "$LOG_FILE" >&2
|
|
exit 1
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
if ! curl -fsS "http://127.0.0.1:${API_SERVER_PORT:-8642}/health" >/dev/null 2>&1; then
|
|
echo "[start.sh] hermes gateway failed to reach /health within ${READY_TIMEOUT}s." >&2
|
|
tail -80 "$LOG_FILE" >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "[start.sh] hermes gateway ready on :${API_SERVER_PORT:-8642} (pid ${GATEWAY_PID})"
|
|
|
|
# --- Exec molecule-runtime on :8000 ---
|
|
# From here on, every A2A message the platform sends gets proxied
|
|
# through executor.py → :8642 → hermes-agent.
|
|
exec molecule-runtime
|