fix(workspace-server): vendor upstream derive-provider.sh + close 12-prefix drift

The drift gate's monorepoRoot walk-up looked for workspace-configs-templates/
which is gitignored locally and doesn't exist in this repo at all (the
canonical script lives in molecule-ai-workspace-template-hermes). Test
failed on CI from day one with "could not find monorepo root".

Two layered fixes in one PR:

1. Vendor upstream derive-provider.sh as testdata/ + drop monorepoRoot.
   The vendored copy has a header pointing operators at the upstream
   source and a one-line cp command for refresh. Test now reads two
   files (vendored shell + workspace_provision.go) via package-relative
   paths — Go test sets cwd to the package dir, so this is hermetic
   without any walk-up gymnastics.

2. Update the case-statement regex to match upstream's renamed variable
   (${_HERMES_MODEL} since v0.12.0, the resolved value of
   HERMES_INFERENCE_MODEL with a HERMES_DEFAULT_MODEL legacy fallback).
   Regex now accepts either spelling so a future rename fails loudly
   on the parser-sanity check rather than silently returning empty.

Vendoring upstream surfaced real drift the gate was designed to catch:
upstream v0.12.0 added 12 provider prefixes that deriveProviderFromModelSlug
didn't handle (xai/grok, bedrock/aws, tencent/tencent-tokenhub, gmi,
qwen-oauth, lmstudio/lm-studio, minimax-oauth, alibaba-coding-plan,
google-gemini-cli, openai-codex, copilot-acp, copilot). Without these,
Save+Restart on a workspace using one of those prefixes would persist
LLM_PROVIDER="" and the next boot would fall back to derive-provider.sh's
runtime *=auto branch — losing the user's explicit choice on every restart.

Added all 12 case clauses + 16 new table-driven test cases (covering
both canonical and aliased forms). Drift gate now passes; future
upstream additions will fail loudly with a "DRIFT: ..." message
pointing the engineer at the missing case.

Task: #242
This commit is contained in:
Hongming Wang 2026-05-02 23:51:20 -07:00
parent 284012a768
commit dfeefb0acc
4 changed files with 229 additions and 44 deletions

View File

@ -7,10 +7,12 @@ package handlers
// workspace-server can persist LLM_PROVIDER into workspace_secrets at
// provision time. That created two sources of truth:
//
// 1. workspace-configs-templates/hermes/scripts/derive-provider.sh —
// 1. molecule-ai-workspace-template-hermes/scripts/derive-provider.sh —
// runs inside the container at boot, has the final say on which
// provider hermes targets (writes ~/.hermes/config.yaml's
// model.provider field).
// model.provider field). The shell script lives in a separate
// OSS repo, so we vendor a snapshot at testdata/derive-provider.sh
// to keep this gate hermetic.
// 2. workspace-server/internal/handlers/workspace_provision.go's
// deriveProviderFromModelSlug — runs at provision time on the
// platform side so LLM_PROVIDER lands in workspace_secrets and
@ -41,17 +43,19 @@ package handlers
// which pins the *values*; this test pins the *coverage* of the
// prefix set itself.
//
// Hermetic: only reads two files from the monorepo + parses them
// in-process. No network, no docker, no DB.
// Hermetic: reads two files (vendored shell script + Go source) from
// paths relative to the test package directory and parses them
// in-process. No network, no docker, no DB. The vendored shell script
// at testdata/derive-provider.sh is a snapshot of the upstream OSS
// template repo's script — refresh it via the cp command in that file's
// header when upstream changes.
import (
"go/ast"
"go/parser"
"go/token"
"os"
"path/filepath"
"regexp"
"runtime"
"sort"
"strconv"
"strings"
@ -184,30 +188,15 @@ func TestDeriveProviderDrift_ShellAndGoStayInSync(t *testing.T) {
}
}
// monorepoRoot resolves the absolute path of the molecule-monorepo
// root by walking up from this test file's directory. Avoids relying
// on a fixed CWD or env var.
func monorepoRoot(t *testing.T) string {
t.Helper()
_, thisFile, _, ok := runtime.Caller(0)
if !ok {
t.Fatalf("runtime.Caller failed — cannot locate test file path")
}
// .../workspace-server/internal/handlers/derive_provider_drift_test.go
dir := filepath.Dir(thisFile)
for i := 0; i < 6; i++ {
if _, err := os.Stat(filepath.Join(dir, "workspace-configs-templates")); err == nil {
return dir
}
parent := filepath.Dir(dir)
if parent == dir {
break
}
dir = parent
}
t.Fatalf("could not find monorepo root (looked for workspace-configs-templates/) walking up from %s", thisFile)
return ""
}
// vendoredShellPath is the testdata snapshot of upstream
// derive-provider.sh. The path is relative to the test package
// directory (which is what `go test` sets as cwd). See the file's
// header for the refresh procedure when upstream changes.
const vendoredShellPath = "testdata/derive-provider.sh"
// goSourcePath is the file containing deriveProviderFromModelSlug.
// Relative to the test package directory.
const goSourcePath = "workspace_provision.go"
// loadShellPrefixMap parses derive-provider.sh and returns a
// map[prefix]provider for every case clause. Aliases inside a single
@ -228,26 +217,27 @@ func monorepoRoot(t *testing.T) string {
// hardcode.
func loadShellPrefixMap(t *testing.T) map[string]string {
t.Helper()
root := monorepoRoot(t)
shellPath := filepath.Join(root, "workspace-configs-templates", "hermes", "scripts", "derive-provider.sh")
raw, err := os.ReadFile(shellPath)
raw, err := os.ReadFile(vendoredShellPath)
if err != nil {
t.Fatalf("read %s: %v", shellPath, err)
t.Fatalf("read %s: %v (refresh from upstream — see file header)", vendoredShellPath, err)
}
// Locate the case statement body so we don't accidentally match
// PROVIDER= assignments above the case (the HERMES_INFERENCE_PROVIDER
// override + HERMES_DEFAULT_MODEL empty fallback both write PROVIDER=
// before the case).
caseStart := regexp.MustCompile(`(?m)^case\s+"\$\{HERMES_DEFAULT_MODEL\}"\s+in\s*$`)
// override + the empty-model fallback both write PROVIDER= before
// the case). Upstream renamed the case variable to ${_HERMES_MODEL}
// in v0.12.0 (the resolved value of HERMES_INFERENCE_MODEL with a
// HERMES_DEFAULT_MODEL legacy fallback); accept either spelling so
// this test survives a future rename.
caseStart := regexp.MustCompile(`(?m)^case\s+"\$\{(_?HERMES(?:_DEFAULT|_INFERENCE)?_MODEL)\}"\s+in\s*$`)
startLoc := caseStart.FindIndex(raw)
if startLoc == nil {
t.Fatalf("could not locate `case \"${HERMES_DEFAULT_MODEL}\" in` in %s — shell file shape changed; rebuild parser", shellPath)
t.Fatalf("could not locate `case \"${...HERMES...MODEL}\" in` in %s — shell file shape changed; rebuild parser", vendoredShellPath)
}
caseEnd := regexp.MustCompile(`(?m)^esac\s*$`)
endLoc := caseEnd.FindIndex(raw[startLoc[1]:])
if endLoc == nil {
t.Fatalf("could not locate `esac` after the case statement in %s — shell file shape changed", shellPath)
t.Fatalf("could not locate `esac` after the case statement in %s — shell file shape changed", vendoredShellPath)
}
body := string(raw[startLoc[1] : startLoc[1]+endLoc[0]])
@ -336,13 +326,11 @@ func loadShellPrefixMap(t *testing.T) map[string]string {
// in the function.
func loadGoPrefixMap(t *testing.T) map[string]string {
t.Helper()
root := monorepoRoot(t)
goPath := filepath.Join(root, "workspace-server", "internal", "handlers", "workspace_provision.go")
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, goPath, nil, parser.ParseComments)
file, err := parser.ParseFile(fset, goSourcePath, nil, parser.ParseComments)
if err != nil {
t.Fatalf("parse %s: %v", goPath, err)
t.Fatalf("parse %s: %v", goSourcePath, err)
}
var fn *ast.FuncDecl
@ -357,7 +345,7 @@ func loadGoPrefixMap(t *testing.T) map[string]string {
}
}
if fn == nil {
t.Fatalf("could not find deriveProviderFromModelSlug in %s — function renamed/removed; this gate's invariant has been violated", goPath)
t.Fatalf("could not find deriveProviderFromModelSlug in %s — function renamed/removed; this gate's invariant has been violated", goSourcePath)
}
// Walk the function body for the SwitchStmt.

View File

@ -0,0 +1,150 @@
#!/usr/bin/env bash
# VENDORED COPY — DO NOT EDIT THIS FILE BY HAND.
#
# Source of truth:
# github.com/Molecule-AI/molecule-ai-workspace-template-hermes
# scripts/derive-provider.sh
#
# This snapshot is read by derive_provider_drift_test.go so the AST
# drift gate stays hermetic (no network, no submodule, no path-walk).
# When upstream changes, refresh via:
#
# cp ~/path/to/molecule-ai-workspace-template-hermes/scripts/derive-provider.sh \
# workspace-server/internal/handlers/testdata/derive-provider.sh
#
# (and re-add the VENDORED COPY header below.) The drift test will
# fail loudly if upstream adds prefixes that deriveProviderFromModelSlug
# doesn't handle — fix it by adding the missing case to the Go function,
# not by silently widening acceptedDivergences.
#
# derive-provider.sh — map a hermes-agent model slug to its provider
# name. Sourced by both install.sh (SaaS bare-host path) and start.sh
# (Docker path) so the two entry-points stay consistent.
#
# Contract:
# Reads: $HERMES_INFERENCE_PROVIDER (if already set, we respect it)
# $HERMES_INFERENCE_MODEL (preferred — matches upstream env name)
# $HERMES_DEFAULT_MODEL (legacy fallback — name we invented before
# 2026-05; workspace-server still writes
# it during the migration window)
# $HERMES_API_KEY / $NOUS_API_KEY (affect the nousresearch/* branch)
# Writes: $PROVIDER — the derived provider name, or "auto" if unknown.
#
# Upstream's actual env var is $HERMES_INFERENCE_MODEL (see
# website/docs/reference/environment-variables.md in NousResearch/hermes-agent).
# We accept both for one release cycle so workspaces booting under the legacy
# control-plane don't break — drop $HERMES_DEFAULT_MODEL once workspace-server
# is updated to write the upstream name.
#
# Why the per-template sub-script (vs doing this in CP): every runtime
# has its own provider taxonomy. Keeping the logic inside the template
# repo means CP stays runtime-agnostic and adding a new runtime with
# different provider semantics doesn't require a CP edit.
#
# Hermes-specific quirks encoded here:
# - `openai/...` routes through `openrouter` (hermes has no direct
# openai provider; openai-codex is OAuth-only for Codex models)
# - `nousresearch/...` prefers direct `nous` if HERMES_API_KEY is
# set, else falls back to `openrouter` (which also serves Hermes 3)
# - chinese-region variants (minimax-cn, kimi-coding-cn) keep their
# full prefix as the provider name
#
# See molecule-controlplane/docs/canary-tenants.md and the hermes-agent
# providers.md docs for the full taxonomy.
# Honour an explicit override.
if [ -n "${HERMES_INFERENCE_PROVIDER:-}" ]; then
PROVIDER="${HERMES_INFERENCE_PROVIDER}"
return 0 2>/dev/null || exit 0
fi
# Resolve the model slug — prefer the upstream env name, fall back to legacy.
_HERMES_MODEL="${HERMES_INFERENCE_MODEL:-${HERMES_DEFAULT_MODEL:-}}"
if [ -z "${_HERMES_MODEL}" ]; then
PROVIDER="auto"
return 0 2>/dev/null || exit 0
fi
case "${_HERMES_MODEL}" in
# Keep full CN-suffix as provider so chinese-region keys route right
minimax-cn/*) PROVIDER="minimax-cn" ;;
kimi-coding-cn/*) PROVIDER="kimi-coding-cn" ;;
# Direct-SDK providers (clean 1:1 prefix→provider mapping)
minimax/*) PROVIDER="minimax" ;;
anthropic/*) PROVIDER="anthropic" ;;
gemini/*) PROVIDER="gemini" ;;
deepseek/*) PROVIDER="deepseek" ;;
zai/*) PROVIDER="zai" ;;
kimi-coding/*) PROVIDER="kimi-coding" ;;
alibaba/*|dashscope/*|qwen/*) PROVIDER="alibaba" ;;
xiaomi/*|mimo/*) PROVIDER="xiaomi" ;;
arcee/*|arcee-ai/*) PROVIDER="arcee" ;;
nvidia/*|nim/*) PROVIDER="nvidia" ;;
ollama-cloud/*) PROVIDER="ollama-cloud" ;;
huggingface/*|hf/*) PROVIDER="huggingface" ;;
ai-gateway/*|aigateway/*) PROVIDER="ai-gateway" ;;
kilocode/*) PROVIDER="kilocode" ;;
opencode-zen/*) PROVIDER="opencode-zen" ;;
opencode-go/*) PROVIDER="opencode-go" ;;
# Hermes-specific routing quirks. `openai/*` has two valid targets:
# 1. hermes's "custom" provider pointed at api.openai.com — requires
# OPENAI_API_KEY. install.sh sees this case and auto-populates
# HERMES_CUSTOM_{BASE_URL,API_KEY} so the direct-OpenAI path works
# without the user having to set HERMES_CUSTOM_* explicitly.
# 2. OpenRouter (hermes's built-in path — requires OPENROUTER_API_KEY).
#
# Priority: prefer **custom** (direct OpenAI) when OPENAI_API_KEY is set.
# The operator supplying OPENAI_API_KEY for an openai/* model is an
# explicit intent signal to hit OpenAI directly. The previous "prefer
# OR if any OR key exists" rule silently hijacked that intent whenever
# a tenant-global OPENROUTER_API_KEY was present (even if stale/empty
# enough to 401), which is exactly what bit the 2026-04-23 E2E (surfaced
# as OpenRouter's `401 Missing Authentication header` in the agent reply).
#
# To explicitly route openai/* through OR, set HERMES_INFERENCE_PROVIDER=openrouter
# (handled at the top of this file) or use an openrouter/* model slug.
openai/*)
if [ -n "${OPENAI_API_KEY:-}" ]; then
PROVIDER="custom"
elif [ -n "${OPENROUTER_API_KEY:-}" ]; then
PROVIDER="openrouter"
else
PROVIDER="openrouter" # no-key fallback — hermes will error clearly
fi
;;
nousresearch/*)
# Prefer direct Nous Portal if Nous credentials present, else OR.
if [ -n "${HERMES_API_KEY:-}" ] || [ -n "${NOUS_API_KEY:-}" ]; then
PROVIDER="nous"
else
PROVIDER="openrouter"
fi
;;
# Explicit catch-alls
openrouter/*) PROVIDER="openrouter" ;;
custom/*) PROVIDER="custom" ;;
# Additional 1:1 prefix→provider mappings — kept aligned with upstream's
# HERMES_INFERENCE_PROVIDER list (website/docs/reference/environment-variables.md
# in NousResearch/hermes-agent, v0.12.0 / 2026-04-30). Place these BEFORE the
# catch-all so they win.
xai/*|grok/*) PROVIDER="xai" ;;
bedrock/*|aws/*) PROVIDER="bedrock" ;;
tencent/*|tencent-tokenhub/*) PROVIDER="tencent-tokenhub" ;;
gmi/*) PROVIDER="gmi" ;;
qwen-oauth/*) PROVIDER="qwen-oauth" ;;
lmstudio/*|lm-studio/*) PROVIDER="lmstudio" ;;
minimax-oauth/*) PROVIDER="minimax-oauth" ;;
alibaba-coding-plan/*) PROVIDER="alibaba-coding-plan" ;;
google-gemini-cli/*) PROVIDER="google-gemini-cli" ;;
openai-codex/*) PROVIDER="openai-codex" ;;
copilot-acp/*) PROVIDER="copilot-acp" ;;
copilot/*) PROVIDER="copilot" ;;
# Unknown prefix → let hermes auto-detect
*) PROVIDER="auto" ;;
esac

View File

@ -662,6 +662,34 @@ func deriveProviderFromModelSlug(model string) string {
// extra config) and let the script upgrade to nous/custom at runtime.
case "nousresearch", "openai":
return "openrouter"
// Additional 1:1 prefix→provider mappings — kept aligned with upstream's
// HERMES_INFERENCE_PROVIDER list (NousResearch/hermes-agent v0.12.0,
// 2026-04-30) and the additional case clauses in derive-provider.sh.
// The drift gate in derive_provider_drift_test.go enforces parity.
case "xai", "grok":
return "xai"
case "bedrock", "aws":
return "bedrock"
case "tencent", "tencent-tokenhub":
return "tencent-tokenhub"
case "gmi":
return "gmi"
case "qwen-oauth":
return "qwen-oauth"
case "lmstudio", "lm-studio":
return "lmstudio"
case "minimax-oauth":
return "minimax-oauth"
case "alibaba-coding-plan":
return "alibaba-coding-plan"
case "google-gemini-cli":
return "google-gemini-cli"
case "openai-codex":
return "openai-codex"
case "copilot-acp":
return "copilot-acp"
case "copilot":
return "copilot"
}
// Unknown prefix → don't persist a guess. derive-provider.sh's
// *=auto fallback handles it at runtime.

View File

@ -437,6 +437,25 @@ func TestDeriveProviderFromModelSlug(t *testing.T) {
// boot if HERMES_API_KEY/OPENAI_API_KEY are present).
{"nousresearch defaults to openrouter at provision time", "nousresearch/hermes-4-70b", "openrouter"},
{"openai defaults to openrouter at provision time", "openai/gpt-5", "openrouter"},
// hermes-agent v0.12.0 / 2026-04-30 provider list — the drift gate
// in derive_provider_drift_test.go pins parity with the shell case
// statement.
{"xai", "xai/grok-4", "xai"},
{"xai via grok alias", "grok/grok-4", "xai"},
{"bedrock", "bedrock/anthropic.claude-sonnet-4-6", "bedrock"},
{"bedrock via aws alias", "aws/anthropic.claude-sonnet-4-6", "bedrock"},
{"tencent", "tencent/hunyuan-coder", "tencent-tokenhub"},
{"tencent-tokenhub passthrough", "tencent-tokenhub/hunyuan-coder", "tencent-tokenhub"},
{"gmi", "gmi/gmi-coder-1", "gmi"},
{"qwen-oauth", "qwen-oauth/qwen3-coder", "qwen-oauth"},
{"lmstudio", "lmstudio/qwen3-coder", "lmstudio"},
{"lmstudio via lm-studio alias", "lm-studio/qwen3-coder", "lmstudio"},
{"minimax-oauth", "minimax-oauth/MiniMax-M2.7", "minimax-oauth"},
{"alibaba-coding-plan", "alibaba-coding-plan/qwen3-coder", "alibaba-coding-plan"},
{"google-gemini-cli", "google-gemini-cli/gemini-2.5-pro", "google-gemini-cli"},
{"openai-codex", "openai-codex/gpt-5-codex", "openai-codex"},
{"copilot-acp", "copilot-acp/claude-sonnet-4-6", "copilot-acp"},
{"copilot", "copilot/claude-sonnet-4-6", "copilot"},
// Unknowns return "" so the caller skips the LLM_PROVIDER write
// and lets derive-provider.sh's *=auto branch decide at runtime.
{"unknown prefix returns empty", "totally-unknown-model/foo", ""},