From ddc4e8190ccb13b99ea9bec77262c45f845784e8 Mon Sep 17 00:00:00 2001 From: hongming Date: Thu, 4 Jun 2026 14:36:39 -0700 Subject: [PATCH] feat(providers): BYOK-routability-aware workspace-create enforcer (cp#529) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit validateRegisteredModelForRuntime now allows a model if it is on the runtime's platform menu (ModelsForRuntime) OR DeriveProvider resolves a native provider — the CTO-approved Option C routability path. Wire confirmed-non-platform BYOK providers into claude-code/hermes/openclaw as name-only native arms (zero platform-menu change) + widen their prefix matchers to accept both slash and colon BYOK id forms. Billing guardrail: only non-platform (BYOK) providers are wired; the platform-shared vendors (openai/gemini/minimax/anthropic, and groq which has no provider) are deliberately NOT wired, so their ids stay residual drift rather than billing a customer's model through the platform key. claude-code now fully resolves; residual drift = only platform-shared ids (hermes anthropic//gemini//openai//minimax/, codex codex-minimax, openclaw groq:/openai:/minimax:) — trimmed from templates / restored via dedicated BYOK-vendor providers in a follow-up. Build + providers/gen/handlers tests green. NOTE: overlaps files with open PR #2241 (cp#521, trim approach); co-review and rebase before merge. cp#529 Co-Authored-By: Claude Opus 4.8 (1M context) --- .../handlers/model_registry_validation.go | 36 ++++++- .../model_registry_validation_test.go | 95 ++++++++++++++----- .../providers/derive_provider_test.go | 12 ++- .../internal/providers/gen/registry_gen.go | 55 +++++++---- .../internal/providers/providers.go | 17 +++- .../internal/providers/providers.yaml | 77 +++++++++++---- .../internal/providers/runtimes_test.go | 91 ++++++++++++++---- .../internal/providers/sync_canonical_test.go | 2 +- 8 files changed, 296 insertions(+), 89 deletions(-) diff --git a/workspace-server/internal/handlers/model_registry_validation.go b/workspace-server/internal/handlers/model_registry_validation.go index 426a2325f..ecdadabae 100644 --- a/workspace-server/internal/handlers/model_registry_validation.go +++ b/workspace-server/internal/handlers/model_registry_validation.go @@ -24,13 +24,23 @@ import ( // validateRegisteredModelForRuntime reports whether (runtime, model) is // selectable per the provider registry. Returns: // -// (true, "") — allowed: model is registered for this runtime, OR the -// runtime is not in the registry (fail-open), OR model=="". -// (false, reason) — rejected: the runtime IS registered but the model is not -// in its native ModelsForRuntime set. +// (true, "") — allowed: model is on the runtime's platform menu +// (ModelsForRuntime) OR DeriveProvider(runtime, model) +// RESOLVES a native provider (the cp#529 routability-aware +// BYOK path), OR the runtime is not in the registry +// (fail-open), OR model=="". +// (false, reason) — rejected: the runtime IS registered, the model is not on +// its platform menu, AND no native provider prefix-owns it +// (genuinely unroutable). // // model=="" is allowed here: the MODEL_REQUIRED gate owns the empty-model case, // so this validator must not double-reject it. +// +// ROUTABILITY-AWARE (cp#529, CTO Option C): the final predicate is an OR — +// `model ∈ ModelsForRuntime(runtime)` OR `DeriveProvider(runtime, model, nil)` +// resolves. The platform menu carries platform-billed ids; the DeriveProvider +// path covers BYOK ids that prefix-match a name-only native arm (no platform +// billing). The drift checker in molecule-controlplane mirrors this exact OR. func validateRegisteredModelForRuntime(runtime, model string) (bool, string) { model = strings.TrimSpace(model) if model == "" { @@ -52,6 +62,24 @@ func validateRegisteredModelForRuntime(runtime, model string) (bool, string) { return true, "" } } + // ROUTABILITY-AWARE allow path (cp#529, CTO-approved Option C). The model is + // NOT on the runtime's platform menu (ModelsForRuntime) — but a model can be + // legitimately SELECTABLE without being a platform-menu id: a BYOK id whose + // prefix matches one of the runtime's NATIVE provider arms (a name-only arm + // added in providers.yaml) resolves to a concrete provider via DeriveProvider + // even though it carries no platform billing. Allow it iff DeriveProvider + // resolves a provider for (runtime, model). A genuinely-unroutable id (no + // native provider prefix-owns it) still falls through to the 422 below. + // + // BILLING GUARDRAIL: only CONFIRMED-NON-PLATFORM (BYOK) providers are wired as + // name-only arms in providers.yaml (never platform/anthropic-*/openai-*/ + // moonshot/minimax/google/vertex), so a DeriveProvider-resolved id reached by + // THIS path can never bill the platform's key for a customer's model. The + // platform-menu ids that DO carry platform billing are already allowed by the + // exact-membership loop above; this path only ever resolves to a BYOK arm. + if _, derr := m.DeriveProvider(runtime, model, nil); derr == nil { + return true, "" + } return false, fmt.Sprintf( "model %q is not a registered model for runtime %q; pick one of the runtime's registered models (provider-registry SSOT, internal#718)", model, runtime) diff --git a/workspace-server/internal/handlers/model_registry_validation_test.go b/workspace-server/internal/handlers/model_registry_validation_test.go index c68b0c818..f9a4658d7 100644 --- a/workspace-server/internal/handlers/model_registry_validation_test.go +++ b/workspace-server/internal/handlers/model_registry_validation_test.go @@ -79,6 +79,49 @@ func TestValidateRegisteredModelForRuntime(t *testing.T) { model: "", wantOK: true, }, + // ---- cp#529 routability-aware allow path ------------------------------- + { + // BYOK passthrough id: NOT on hermes's platform menu, but the + // openrouter name-only native arm prefix-owns it → DeriveProvider + // resolves → ALLOWED (no platform billing — openrouter is BYOK). + name: "byok_passthrough_routable_now_allowed", + runtime: "hermes", + model: "openrouter/anthropic/claude-3.5-sonnet", + wantOK: true, + }, + { + // BYOK namespaced vendor id: deepseek's widened ^deepseek[-:/] + // matches the vendor/ form on a name-only hermes arm → allowed. + name: "byok_namespaced_vendor_routable_now_allowed", + runtime: "hermes", + model: "deepseek/deepseek-chat", + wantOK: true, + }, + { + // claude-code bare GLM- BYOK id: zai name-only arm + (?i)^(glm-|…) + // matches → DeriveProvider resolves → allowed. + name: "claude_code_bare_glm_byok_routable_now_allowed", + runtime: "claude-code", + model: "GLM-4.6", + wantOK: true, + }, + { + // Genuinely UNROUTABLE id: no native hermes arm prefix-owns bare + // gpt-4o (the platform-shared openai vendor is NOT wired into hermes + // — billing guardrail), so DeriveProvider errors → still 422. + name: "genuinely_unroutable_still_rejected", + runtime: "hermes", + model: "gpt-4o", + wantOK: false, + }, + { + // A platform-shared namespaced id that MUST remain unroutable on + // hermes (billing guardrail: openai vendor not wired) → still 422. + name: "platform_shared_openai_namespaced_still_rejected", + runtime: "hermes", + model: "openai/gpt-4o", + wantOK: false, + }, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { @@ -109,58 +152,58 @@ func TestValidateDerivedProviderInRegistry(t *testing.T) { // provider that IS in the providers list. These are the live corpus // entries; the test pins the registry-consistency invariant. { - name: "claude_code_anthropic_api_native", + name: "claude_code_anthropic_api_native", runtime: "claude-code", model: "claude-sonnet-4-6", - wantOK: true, + wantOK: true, }, { - name: "claude_code_kimi_coding_native", + name: "claude_code_kimi_coding_native", runtime: "claude-code", model: "kimi-for-coding", - wantOK: true, + wantOK: true, }, { - name: "claude_code_minimax_native", + name: "claude_code_minimax_native", runtime: "claude-code", model: "MiniMax-M2.7", - wantOK: true, + wantOK: true, }, { - name: "claude_code_platform_namespaced", + name: "claude_code_platform_namespaced", runtime: "claude-code", model: "moonshot/kimi-k2.6", - wantOK: true, + wantOK: true, }, { - name: "codex_openai_subscription_default_arm", + name: "codex_openai_subscription_default_arm", runtime: "codex", model: "gpt-5.5", - wantOK: true, + wantOK: true, }, { - name: "codex_platform_namespaced", + name: "codex_platform_namespaced", runtime: "codex", model: "openai/gpt-5.4-mini", - wantOK: true, + wantOK: true, }, { - name: "hermes_kimi_coding", + name: "hermes_kimi_coding", runtime: "hermes", model: "kimi-coding/kimi-k2", - wantOK: true, + wantOK: true, }, { - name: "hermes_platform_namespaced", + name: "hermes_platform_namespaced", runtime: "hermes", model: "moonshot/kimi-k2.6", - wantOK: true, + wantOK: true, }, { - name: "openclaw_kimi_coding", + name: "openclaw_kimi_coding", runtime: "openclaw", model: "moonshot:kimi-k2.6", - wantOK: true, + wantOK: true, }, // FAIL — model-side validator catches this, but the provider-side // gate is called AFTER it in Create and inherits the fail-open @@ -168,30 +211,30 @@ func TestValidateDerivedProviderInRegistry(t *testing.T) { // errors → allow, letting the model-side response own the message). // This is the deliberate "don't double-reject" decision. { - name: "unregistered_model_pass_through_to_model_side", + name: "unregistered_model_pass_through_to_model_side", runtime: "claude-code", model: "totally-made-up-model-xyz", - wantOK: true, // pass-through: model-side validator owns the rejection + wantOK: true, // pass-through: model-side validator owns the rejection }, // Federation contract — mirror of the model-side test above. { - name: "langgraph_runtime_failopen", + name: "langgraph_runtime_failopen", runtime: "langgraph", model: "anything-goes", - wantOK: true, + wantOK: true, }, { - name: "external_runtime_failopen", + name: "external_runtime_failopen", runtime: "external", model: "whatever", - wantOK: true, + wantOK: true, }, // Empty model — MODEL_REQUIRED owns it; allow. { - name: "empty_model_allowed_other_gate_owns_it", + name: "empty_model_allowed_other_gate_owns_it", runtime: "claude-code", model: "", - wantOK: true, + wantOK: true, }, } for _, c := range cases { diff --git a/workspace-server/internal/providers/derive_provider_test.go b/workspace-server/internal/providers/derive_provider_test.go index ff81ec3a9..1bc87d964 100644 --- a/workspace-server/internal/providers/derive_provider_test.go +++ b/workspace-server/internal/providers/derive_provider_test.go @@ -99,10 +99,16 @@ func TestDeriveProvider_UnregisteredErrors(t *testing.T) { runtime string model string }{ - // gpt-* is OpenAI — not in claude-code's native set. + // gpt-* is OpenAI — not in claude-code's native set (no openai arm; + // the platform-shared openai vendor is never wired into a BYOK runtime). {"claude-code", "gpt-5.5"}, - // deepseek is a catalog provider but in NO runtime's native set. - {"claude-code", "deepseek-v4-pro"}, + // qwen-* is alibaba — a catalog provider NOT wired into claude-code + // (cp#529 wires alibaba only into hermes; claude-code's name-only BYOK + // arms are zai/deepseek/xiaomi-mimo). So it stays unregistered here. + // (NB: deepseek-* IS now routable on claude-code via the deepseek + // name-only arm — see the routability tests — so it is no longer a valid + // "unregistered" example; qwen replaces it.) + {"claude-code", "qwen-max"}, // codex is OpenAI-only — a kimi id is unregistered for it. {"codex", "kimi-for-coding"}, // a slug no provider in the manifest matches at all. diff --git a/workspace-server/internal/providers/gen/registry_gen.go b/workspace-server/internal/providers/gen/registry_gen.go index c4c6ae637..286acc4f9 100644 --- a/workspace-server/internal/providers/gen/registry_gen.go +++ b/workspace-server/internal/providers/gen/registry_gen.go @@ -16,7 +16,7 @@ const SchemaVersion = 1 // Fingerprint is a stable content hash of the generated projection (schema // version + provider catalog + runtime native sets). It changes iff the // registry DATA changes (comment-only YAML edits do not churn it). -const Fingerprint = "ae33546c8fba3474" +const Fingerprint = "5a741b326b6f812c" // GenProvider is the generated projection of one provider catalog entry — // the subset a downstream consumer needs to derive + display a provider. @@ -51,26 +51,26 @@ var Providers = []GenProvider{ {Name: "moonshot", DisplayName: "Moonshot (Kimi)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOONSHOT_API_KEY", "KIMI_API_KEY"}, ModelPrefixMatch: "^moonshot[:/-]", IsPlatform: false, UpstreamVendor: "moonshot"}, {Name: "minimax", DisplayName: "MiniMax", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^minimax-m", IsPlatform: false, UpstreamVendor: "minimax"}, {Name: "platform", DisplayName: "Platform", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOLECULE_LLM_USAGE_TOKEN"}, ModelPrefixMatch: "^platform/", IsPlatform: true}, - {Name: "xiaomi-mimo", DisplayName: "Xiaomi MiMo", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "^mimo-", IsPlatform: false}, - {Name: "zai", DisplayName: "Z.ai (GLM)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GLM_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^glm-", IsPlatform: false}, + {Name: "xiaomi-mimo", DisplayName: "Xiaomi MiMo", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^(mimo-|xiaomi[:/])", IsPlatform: false}, + {Name: "zai", DisplayName: "Z.ai (GLM)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GLM_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^(glm-|zai[:/])", IsPlatform: false}, {Name: "kimi-coding", DisplayName: "Moonshot Kimi (coding-tuned)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"KIMI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^kimi-", IsPlatform: false}, - {Name: "deepseek", DisplayName: "DeepSeek", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"DEEPSEEK_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "^deepseek-", IsPlatform: false}, + {Name: "deepseek", DisplayName: "DeepSeek", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"DEEPSEEK_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "^deepseek[-:/]", IsPlatform: false}, {Name: "google", DisplayName: "Google Gemini", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GEMINI_API_KEY", "GOOGLE_API_KEY"}, ModelPrefixMatch: "^gemini-", IsPlatform: false}, {Name: "vertex", DisplayName: "Google Vertex AI (keyless ADC)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GOOGLE_APPLICATION_CREDENTIALS"}, ModelPrefixMatch: "^vertex:", IsPlatform: false}, - {Name: "alibaba", DisplayName: "Alibaba Qwen (DashScope)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"DASHSCOPE_API_KEY", "ALIBABA_API_KEY"}, ModelPrefixMatch: "^qwen-", IsPlatform: false}, - {Name: "nousresearch", DisplayName: "Nous Research (Hermes)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"NOUSRESEARCH_API_KEY"}, ModelPrefixMatch: "^nousresearch/", IsPlatform: false}, - {Name: "openrouter", DisplayName: "OpenRouter (any model)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENROUTER_API_KEY"}, ModelPrefixMatch: "^openrouter/", IsPlatform: false}, - {Name: "huggingface", DisplayName: "Hugging Face Inference", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"HUGGINGFACE_API_KEY", "HF_TOKEN"}, ModelPrefixMatch: "^huggingface/", IsPlatform: false}, - {Name: "ai-gateway", DisplayName: "Vercel AI Gateway", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"AI_GATEWAY_API_KEY"}, ModelPrefixMatch: "^ai-gateway/", IsPlatform: false}, - {Name: "opencode-zen", DisplayName: "OpenCode Zen", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENCODE_ZEN_API_KEY"}, ModelPrefixMatch: "^opencode-zen/", IsPlatform: false}, - {Name: "opencode-go", DisplayName: "OpenCode Go", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENCODE_GO_API_KEY"}, ModelPrefixMatch: "^opencode-go/", IsPlatform: false}, - {Name: "kilocode", DisplayName: "Kilo Code", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"KILOCODE_API_KEY"}, ModelPrefixMatch: "^kilocode/", IsPlatform: false}, - {Name: "minimax-cn", DisplayName: "MiniMax China", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^minimax-cn/", IsPlatform: false}, - {Name: "ollama-cloud", DisplayName: "Ollama Cloud", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OLLAMA_CLOUD_API_KEY"}, ModelPrefixMatch: "^ollama-cloud/", IsPlatform: false}, + {Name: "alibaba", DisplayName: "Alibaba Qwen (DashScope)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"DASHSCOPE_API_KEY", "ALIBABA_API_KEY"}, ModelPrefixMatch: "(?i)^(qwen|alibaba[:/])", IsPlatform: false}, + {Name: "nousresearch", DisplayName: "Nous Research (Hermes)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"NOUSRESEARCH_API_KEY"}, ModelPrefixMatch: "^nousresearch[:/]", IsPlatform: false}, + {Name: "openrouter", DisplayName: "OpenRouter (any model)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENROUTER_API_KEY"}, ModelPrefixMatch: "^openrouter[:/]", IsPlatform: false}, + {Name: "huggingface", DisplayName: "Hugging Face Inference", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"HUGGINGFACE_API_KEY", "HF_TOKEN"}, ModelPrefixMatch: "^huggingface[:/]", IsPlatform: false}, + {Name: "ai-gateway", DisplayName: "Vercel AI Gateway", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"AI_GATEWAY_API_KEY"}, ModelPrefixMatch: "^ai-gateway[:/]", IsPlatform: false}, + {Name: "opencode-zen", DisplayName: "OpenCode Zen", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENCODE_ZEN_API_KEY"}, ModelPrefixMatch: "^opencode-zen[:/]", IsPlatform: false}, + {Name: "opencode-go", DisplayName: "OpenCode Go", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENCODE_GO_API_KEY"}, ModelPrefixMatch: "^opencode-go[:/]", IsPlatform: false}, + {Name: "kilocode", DisplayName: "Kilo Code", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"KILOCODE_API_KEY"}, ModelPrefixMatch: "^kilocode[:/]", IsPlatform: false}, + {Name: "minimax-cn", DisplayName: "MiniMax China", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^minimax-cn[:/]", IsPlatform: false}, + {Name: "ollama-cloud", DisplayName: "Ollama Cloud", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OLLAMA_CLOUD_API_KEY"}, ModelPrefixMatch: "^ollama-cloud[:/]", IsPlatform: false}, {Name: "ollama", DisplayName: "Ollama (self-hosted)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OLLAMA_HOST"}, ModelPrefixMatch: "^ollama/", IsPlatform: false}, - {Name: "nvidia", DisplayName: "NVIDIA NIM", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"NVIDIA_API_KEY"}, ModelPrefixMatch: "^nvidia/", IsPlatform: false}, - {Name: "arcee", DisplayName: "Arcee", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ARCEE_API_KEY"}, ModelPrefixMatch: "^arcee/", IsPlatform: false}, - {Name: "custom", DisplayName: "Custom OpenAI-compat endpoint", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"CUSTOM_API_KEY", "OPENAI_API_KEY"}, ModelPrefixMatch: "^custom/", IsPlatform: false}, + {Name: "nvidia", DisplayName: "NVIDIA NIM", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"NVIDIA_API_KEY"}, ModelPrefixMatch: "^nvidia[:/]", IsPlatform: false}, + {Name: "arcee", DisplayName: "Arcee", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ARCEE_API_KEY"}, ModelPrefixMatch: "^arcee[:/]", IsPlatform: false}, + {Name: "custom", DisplayName: "Custom OpenAI-compat endpoint", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"CUSTOM_API_KEY", "OPENAI_API_KEY"}, ModelPrefixMatch: "^custom[:/]", IsPlatform: false}, } // Runtimes maps each runtime to its native provider+model set, runtime names @@ -82,6 +82,9 @@ var Runtimes = map[string][]GenRuntimeRef{ {Name: "kimi-coding", Models: []string{"kimi-for-coding", "kimi-k2.5", "kimi-k2", "moonshot:kimi-k2.6", "moonshot:kimi-k2.5"}}, {Name: "minimax", Models: []string{"MiniMax-M2", "MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M3", "minimax:MiniMax-M2", "minimax:MiniMax-M2.7", "minimax:MiniMax-M2.7-highspeed", "minimax:MiniMax-M3"}}, {Name: "platform", Models: []string{"anthropic/claude-opus-4-7", "anthropic/claude-sonnet-4-6", "moonshot/kimi-k2.6", "moonshot/kimi-k2.5", "minimax/MiniMax-M2.7", "minimax/MiniMax-M2.7-highspeed", "minimax/MiniMax-M3"}}, + {Name: "zai", Models: []string{}}, + {Name: "deepseek", Models: []string{}}, + {Name: "xiaomi-mimo", Models: []string{}}, }, "codex": { {Name: "openai-subscription", Models: []string{"gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2"}}, @@ -95,9 +98,27 @@ var Runtimes = map[string][]GenRuntimeRef{ "hermes": { {Name: "kimi-coding", Models: []string{"kimi-coding/kimi-k2"}}, {Name: "platform", Models: []string{"moonshot/kimi-k2.6", "moonshot/kimi-k2.5"}}, + {Name: "openrouter", Models: []string{}}, + {Name: "huggingface", Models: []string{}}, + {Name: "ai-gateway", Models: []string{}}, + {Name: "opencode-zen", Models: []string{}}, + {Name: "opencode-go", Models: []string{}}, + {Name: "kilocode", Models: []string{}}, + {Name: "custom", Models: []string{}}, + {Name: "nvidia", Models: []string{}}, + {Name: "arcee", Models: []string{}}, + {Name: "ollama-cloud", Models: []string{}}, + {Name: "minimax-cn", Models: []string{}}, + {Name: "nousresearch", Models: []string{}}, + {Name: "deepseek", Models: []string{}}, + {Name: "zai", Models: []string{}}, + {Name: "xiaomi-mimo", Models: []string{}}, + {Name: "alibaba", Models: []string{}}, }, "openclaw": { {Name: "kimi-coding", Models: []string{"moonshot:kimi-k2.6", "moonshot:kimi-k2.5"}}, {Name: "platform", Models: []string{"moonshot/kimi-k2.6", "moonshot/kimi-k2.5"}}, + {Name: "openrouter", Models: []string{}}, + {Name: "custom", Models: []string{}}, }, } diff --git a/workspace-server/internal/providers/providers.go b/workspace-server/internal/providers/providers.go index 28dcc2a36..cd36019b2 100644 --- a/workspace-server/internal/providers/providers.go +++ b/workspace-server/internal/providers/providers.go @@ -257,9 +257,20 @@ func parseManifest(raw []byte) (*Manifest, error) { return nil, fmt.Errorf("providers: runtime %q references provider %q twice", rt, ref.Name) } refSeen[ref.Name] = struct{}{} - if len(ref.Models) == 0 { - return nil, fmt.Errorf("providers: runtime %q provider %q has no model ids", rt, ref.Name) - } + // A NAME-ONLY arm (zero model ids) is permitted (cp#529): it adds + // NOTHING to the runtime's platform menu (ModelsForRuntime only + // iterates ref.Models, so an empty Models contributes no selectable + // id — additive, zero platform-menu change) yet wires the provider + // into the runtime's NATIVE prefix-routing set, so a BYOK id the + // provider's model_prefix_match matches becomes routable via + // DeriveProvider step-4. This is the mechanism the cp#529 + // routability-aware enforcer keys off: a name-only BYOK arm makes a + // passthrough id (openrouter/…, deepseek-…, etc.) resolve to a + // concrete provider without ever appearing on the platform menu. + // BILLING GUARDRAIL: only CONFIRMED-NON-PLATFORM (BYOK) providers + // are wired as name-only arms — never `platform`/anthropic-*/ + // openai-*/moonshot/minimax/google/vertex — so a name-only arm can + // never route a customer model through the platform's key. } } diff --git a/workspace-server/internal/providers/providers.yaml b/workspace-server/internal/providers/providers.yaml index db5269883..85b341f33 100644 --- a/workspace-server/internal/providers/providers.yaml +++ b/workspace-server/internal/providers/providers.yaml @@ -317,7 +317,7 @@ providers: # Adapter prefix "mimo-"; canvas /^mimo-/i. proxy routing TBD (PR-3). # NOTE: canvas has a duplicate "xiaomi" VENDOR_LABELS key aliasing the # same vendor — collapsed into this one entry. - model_prefix_match: "^mimo-" + model_prefix_match: "(?i)^(mimo-|xiaomi[:/])" model_aliases: [] # =========================================================================== @@ -334,7 +334,7 @@ providers: auth_token_env: ANTHROPIC_AUTH_TOKEN # Adapter prefix "glm-" (lowercased match catches GLM-4.6); canvas /^GLM-/i. # canvas-only + adapter-only today; proxy routing TBD (PR-3). - model_prefix_match: "(?i)^glm-" + model_prefix_match: "(?i)^(glm-|zai[:/])" model_aliases: [] # =========================================================================== @@ -385,7 +385,7 @@ providers: auth_token_env: ANTHROPIC_AUTH_TOKEN # Adapter prefix "deepseek-"; canvas /^deepseek-/i. adapter+canvas only; # proxy routing TBD (PR-3). - model_prefix_match: "^deepseek-" + model_prefix_match: "^deepseek[-:/]" model_aliases: [] # =========================================================================== @@ -452,7 +452,7 @@ providers: auth_env: [DASHSCOPE_API_KEY, ALIBABA_API_KEY] auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. canvas /^qwen-/i. - model_prefix_match: "^qwen-" + model_prefix_match: "(?i)^(qwen|alibaba[:/])" model_aliases: [] - name: nousresearch @@ -466,7 +466,7 @@ providers: auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. Slash-prefix id # (e.g. nousresearch/hermes-4-70b). - model_prefix_match: "^nousresearch/" + model_prefix_match: "^nousresearch[:/]" model_aliases: [] - name: openrouter @@ -479,7 +479,7 @@ providers: auth_env: [OPENROUTER_API_KEY] auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. Wildcard: openrouter/. - model_prefix_match: "^openrouter/" + model_prefix_match: "^openrouter[:/]" model_aliases: [] - name: huggingface @@ -492,7 +492,7 @@ providers: auth_env: [HUGGINGFACE_API_KEY, HF_TOKEN] auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. Wildcard: huggingface/. - model_prefix_match: "^huggingface/" + model_prefix_match: "^huggingface[:/]" model_aliases: [] - name: ai-gateway @@ -505,7 +505,7 @@ providers: auth_env: [AI_GATEWAY_API_KEY] auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. - model_prefix_match: "^ai-gateway/" + model_prefix_match: "^ai-gateway[:/]" model_aliases: [] - name: opencode-zen @@ -518,7 +518,7 @@ providers: auth_env: [OPENCODE_ZEN_API_KEY] auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. - model_prefix_match: "^opencode-zen/" + model_prefix_match: "^opencode-zen[:/]" model_aliases: [] - name: opencode-go @@ -531,7 +531,7 @@ providers: auth_env: [OPENCODE_GO_API_KEY] auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. - model_prefix_match: "^opencode-go/" + model_prefix_match: "^opencode-go[:/]" model_aliases: [] - name: kilocode @@ -544,7 +544,7 @@ providers: auth_env: [KILOCODE_API_KEY] auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. - model_prefix_match: "^kilocode/" + model_prefix_match: "^kilocode[:/]" model_aliases: [] - name: minimax-cn @@ -559,7 +559,7 @@ providers: # canvas-only today; proxy routing TBD. China endpoint sibling of `minimax` # (api.minimaxi.com). Matched only by the explicit slash-prefix so it does # NOT collide with `minimax`'s (?i)^minimax- in the overlap guard. - model_prefix_match: "^minimax-cn/" + model_prefix_match: "^minimax-cn[:/]" model_aliases: [] - name: ollama-cloud @@ -572,7 +572,7 @@ providers: auth_env: [OLLAMA_CLOUD_API_KEY] auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. - model_prefix_match: "^ollama-cloud/" + model_prefix_match: "^ollama-cloud[:/]" model_aliases: [] - name: ollama @@ -598,7 +598,7 @@ providers: auth_env: [NVIDIA_API_KEY] auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. - model_prefix_match: "^nvidia/" + model_prefix_match: "^nvidia[:/]" model_aliases: [] - name: arcee @@ -611,7 +611,7 @@ providers: auth_env: [ARCEE_API_KEY] auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. - model_prefix_match: "^arcee/" + model_prefix_match: "^arcee[:/]" model_aliases: [] - name: custom @@ -624,7 +624,7 @@ providers: auth_env: [CUSTOM_API_KEY, OPENAI_API_KEY] auth_token_env: ANTHROPIC_AUTH_TOKEN # canvas-only today; proxy routing TBD. Wildcard free-text: custom/. - model_prefix_match: "^custom/" + model_prefix_match: "^custom[:/]" model_aliases: [] # ============================================================================= @@ -762,6 +762,16 @@ runtimes: - minimax/MiniMax-M2.7 - minimax/MiniMax-M2.7-highspeed - minimax/MiniMax-M3 + # NAME-ONLY BYOK arms (cp#529): zero model ids → add NOTHING to the + # platform menu (ModelsForRuntime), but wire these CONFIRMED-NON-PLATFORM + # providers into claude-code's NATIVE prefix-routing set so the bare BYOK + # ids the claude-code template offers (GLM-*, deepseek-*, mimo-*) resolve + # via DeriveProvider. BILLING-SAFE: zai/deepseek/xiaomi-mimo are tenant-key + # (BYOK) providers — never platform-shared — so routing through them bills + # the tenant's own key, never the platform's. + - name: zai + - name: deepseek + - name: xiaomi-mimo # hermes: native Kimi only (kimi-coding gateway). hermes-agent owns its own # broad provider matrix, but the CTO native matrix for the Molecule @@ -777,6 +787,32 @@ runtimes: models: - moonshot/kimi-k2.6 - moonshot/kimi-k2.5 + # NAME-ONLY BYOK arms (cp#529): zero model ids → no addition to the + # platform menu, but wire hermes's CONFIRMED-NON-PLATFORM passthrough + + # bare-vendor providers into its NATIVE prefix-routing set so the BYOK + # ids the hermes template offers (openrouter/…, huggingface/…, deepseek/…, + # zai:…, etc.) resolve via DeriveProvider. ALL tenant-key (BYOK). + # GUARDRAIL: the platform-shared vendors (openai/gemini/minimax/anthropic + # and groq) are DELIBERATELY ABSENT here — wiring them would route a + # customer model through the platform's key (a money bug); so hermes ids + # like anthropic/claude-*, gemini/*, openai/*, minimax/*, groq:* remain + # unroutable (residual drift) until dedicated BYOK-vendor providers exist. + - name: openrouter + - name: huggingface + - name: ai-gateway + - name: opencode-zen + - name: opencode-go + - name: kilocode + - name: custom + - name: nvidia + - name: arcee + - name: ollama-cloud + - name: minimax-cn + - name: nousresearch + - name: deepseek + - name: zai + - name: xiaomi-mimo + - name: alibaba # codex: OpenAI — BYOK split across TWO native providers # (openai-subscription + openai-api), mirroring claude-code's anthropic @@ -846,6 +882,15 @@ runtimes: models: - moonshot/kimi-k2.6 - moonshot/kimi-k2.5 + # NAME-ONLY BYOK arms (cp#529): zero model ids → no platform-menu change, + # but wire openclaw's CONFIRMED-NON-PLATFORM passthroughs into its NATIVE + # prefix-routing set so the BYOK colon/slash ids the openclaw template + # offers (openrouter:…, custom:…) resolve via DeriveProvider. BYOK only. + # GUARDRAIL: the platform-shared openclaw ids openai:*, minimax:*, groq:* + # are DELIBERATELY ABSENT (groq has no provider at all) — they stay + # unroutable residual drift rather than billing the platform's key. + - name: openrouter + - name: custom # google-adk: Gemini via Vertex AI, keyless ADC (Workload Identity diff --git a/workspace-server/internal/providers/runtimes_test.go b/workspace-server/internal/providers/runtimes_test.go index 4b07a123b..f684b00c0 100644 --- a/workspace-server/internal/providers/runtimes_test.go +++ b/workspace-server/internal/providers/runtimes_test.go @@ -17,19 +17,35 @@ import ( // of its native vendors the proxy can serve — kimi for hermes/openclaw, // openai for codex, anthropic+kimi+minimax for claude-code. // +// cp#529 adds NAME-ONLY BYOK arms (zero model ids) to claude-code/hermes/ +// openclaw: they add NOTHING to the platform menu (ModelsForRuntime) but wire +// CONFIRMED-NON-PLATFORM providers into the runtime's NATIVE prefix-routing set +// so a matching BYOK id resolves via DeriveProvider. ProvidersForRuntime returns +// the full native arm set (menu + name-only), so the expected sets below include +// them. The platform-shared/denylist providers are NEVER wired into a BYOK arm. +// // claude-code -> anthropic (oauth+api), kimi (kimi-coding), minimax, platform +// + BYOK name-only: zai, deepseek, xiaomi-mimo // hermes -> kimi (kimi-coding), platform -// codex -> openai (subscription + api), platform -// openclaw -> kimi (kimi-coding), platform +// + BYOK name-only: openrouter, huggingface, ai-gateway, +// opencode-zen, opencode-go, kilocode, custom, nvidia, arcee, +// ollama-cloud, minimax-cn, nousresearch, deepseek, zai, +// xiaomi-mimo, alibaba +// codex -> openai (subscription + api), platform (no BYOK name-only) +// openclaw -> kimi (kimi-coding), platform + BYOK name-only: openrouter, custom var runtimeNativeProviders = map[string][]string{ - "claude-code": {"anthropic-api", "anthropic-oauth", "kimi-coding", "minimax", "platform"}, - "hermes": {"kimi-coding", "platform"}, + "claude-code": {"anthropic-api", "anthropic-oauth", "kimi-coding", "minimax", "platform", "zai", "deepseek", "xiaomi-mimo"}, + "hermes": {"kimi-coding", "platform", + "openrouter", "huggingface", "ai-gateway", "opencode-zen", "opencode-go", + "kilocode", "custom", "nvidia", "arcee", "ollama-cloud", "minimax-cn", + "nousresearch", "deepseek", "zai", "xiaomi-mimo", "alibaba"}, // codex's OpenAI BYOK is split across the OAuth subscription arm // (openai-subscription) and the direct-key arm (openai-api), mirroring // claude-code's anthropic oauth+api split; platform openai via the proxy - // Responses surface. + // Responses surface. No name-only BYOK arms (its templates offer no + // passthrough ids). "codex": {"openai-subscription", "openai-api", "platform"}, - "openclaw": {"kimi-coding", "platform"}, + "openclaw": {"kimi-coding", "platform", "openrouter", "custom"}, } func sortedCopy(in []string) []string { @@ -253,6 +269,56 @@ func TestParseManifest_ValidBaseline(t *testing.T) { } } +// TestParseManifest_NameOnlyArm proves a NAME-ONLY runtime arm (zero model +// ids) is PERMITTED (cp#529) and is additive: it contributes nothing to the +// runtime's platform menu (ModelsForRuntime) yet wires the provider into the +// runtime's NATIVE prefix-routing set so a matching BYOK id resolves via +// DeriveProvider. This is the loader half of the cp#529 routability change. +func TestParseManifest_NameOnlyArm(t *testing.T) { + const y = ` +schema_version: 1 +providers: + - name: openai + display_name: "OpenAI" + protocol: openai + auth_mode: anthropic_api + auth_env: [OPENAI_API_KEY] + model_prefix_match: "^gpt-" + - name: openrouter + display_name: "OpenRouter" + protocol: openai + auth_mode: third_party_anthropic_compat + auth_env: [OPENROUTER_API_KEY] + model_prefix_match: "^openrouter[:/]" +runtimes: + codex: + providers: + - name: openai + models: [gpt-5.5] + - name: openrouter +` + m, err := parseManifest([]byte(y)) + if err != nil { + t.Fatalf("parseManifest(name-only arm) error = %v; want nil (name-only arms are permitted)", err) + } + // The name-only arm adds NOTHING to the platform menu. + models, err := m.ModelsForRuntime("codex") + if err != nil { + t.Fatalf("ModelsForRuntime(codex) error = %v", err) + } + if len(models) != 1 || models[0] != "gpt-5.5" { + t.Fatalf("ModelsForRuntime(codex) = %v; want [gpt-5.5] (name-only arm must not add a menu id)", models) + } + // …yet a BYOK id matching the name-only arm's prefix now ROUTES. + p, err := m.DeriveProvider("codex", "openrouter/anthropic/claude-3.5-sonnet", nil) + if err != nil { + t.Fatalf("DeriveProvider(codex, openrouter/…) error = %v; want it to resolve via the name-only arm", err) + } + if p.Name != "openrouter" { + t.Fatalf("DeriveProvider resolved to %q; want openrouter", p.Name) + } +} + // TestParseManifest_FailDirection is the load-bearing-guard proof: each case // breaks the manifest in one way and asserts the matching error fires. If a // future edit removes a guard, the corresponding case flips red. @@ -287,19 +353,6 @@ runtimes: `, wantErr: "empty native provider set", }, - { - name: "provider ref with no models", - yaml: ` -schema_version: 1 -providers: - - {name: openai, display_name: "OpenAI", protocol: openai, auth_mode: anthropic_api, auth_env: [OPENAI_API_KEY], model_prefix_match: "^gpt-"} -runtimes: - codex: - providers: - - {name: openai, models: []} -`, - wantErr: "no model ids", - }, { name: "duplicate provider ref", yaml: ` diff --git a/workspace-server/internal/providers/sync_canonical_test.go b/workspace-server/internal/providers/sync_canonical_test.go index 099d90eaa..2a1a687da 100644 --- a/workspace-server/internal/providers/sync_canonical_test.go +++ b/workspace-server/internal/providers/sync_canonical_test.go @@ -29,7 +29,7 @@ import ( // canonicalProvidersYAMLSHA256 is the sha256 of the canonical providers.yaml as // synced from molecule-controlplane. Bumped deliberately on each re-sync (see // file doc). Cross-checked live by the sync-providers-yaml CI workflow. -const canonicalProvidersYAMLSHA256 = "8e19aaf8a2a37cdd109184ae80ca223ce0a0ce0ed30299a52aa990271da5af7a" +const canonicalProvidersYAMLSHA256 = "bd54d8a4b4139175edca1e723496e283e3bb82a5be8da01fd195835338f505db" func TestSyncedYAMLMatchesCanonicalSHA(t *testing.T) { sum := sha256.Sum256(embeddedYAML) -- 2.52.0