fix(core): sync canonical providers yaml #2017

Closed
devops-engineer wants to merge 1 commits from fix/sync-providers-yaml-openai-split-20260531 into main
8 changed files with 170 additions and 65 deletions
@@ -51,10 +51,10 @@ func (p Provider) IsPlatform() bool {
// never selectable for that runtime, even if its catalog regex matches.
// 3. EXACT model-id match is authoritative (CTO 2026-05-27 "disambiguate by
// exact model id"): if the model id appears verbatim in exactly one
// native provider ref's Models list, that provider wins outright — this
// resolves the kimi namespace split (moonshot/kimi-k2.6 -> platform vs
// bare kimi-for-coding -> kimi-coding) deterministically and overrides
// any broader prefix match.
// native provider ref's Models list, that provider wins outright. If it is
// exact-listed by multiple native providers, available auth env selects the
// matching provider; with no auth context, the first-declared provider is
// the runtime default.
// 4. Otherwise, fall back to model_prefix_match among the native providers.
// 5. If >1 native provider still matches, disambiguate by auth env: keep
// only the providers whose auth_env intersects availableAuthEnv. If
@@ -85,11 +85,10 @@ func (m *Manifest) DeriveProvider(runtime, model string, availableAuthEnv []stri
}
// Step 3: exact model-id match against each native provider ref's Models.
// Authoritative — a verbatim id beats any prefix. If two native refs both
// list the same id, that is a manifest ambiguity we surface rather than
// silently pick (LoadManifest already forbids a provider ref appearing
// twice in one runtime, but two DIFFERENT providers listing the same id
// is not load-rejected, so guard it here).
// Authoritative — a verbatim id beats any prefix. Some runtimes intentionally
// exact-list the same model id under two auth arms (for example codex
// subscription + OpenAI API key), so auth env or provider order resolves
// those declared defaults.
var exact []Provider
for _, ref := range native.Providers {
for _, mid := range ref.Models {
@@ -105,8 +104,14 @@ func (m *Manifest) DeriveProvider(runtime, model string, availableAuthEnv []stri
return exact[0], nil
}
if len(exact) > 1 {
if p, ok := disambiguateByAuthEnv(exact, availableAuthEnv); ok {
return p, nil
}
if len(availableAuthEnv) == 0 {
return exact[0], nil
}
return Provider{}, fmt.Errorf(
"providers: model %q for runtime %q is exact-listed by %d native providers (%s) — manifest ambiguity",
"providers: model %q for runtime %q is exact-listed by %d native providers (%s) and auth env did not disambiguate — resolve in the registry",
model, runtime, len(exact), strings.Join(providerNames(exact), ", "))
}
@@ -132,26 +137,8 @@ func (m *Manifest) DeriveProvider(runtime, model string, availableAuthEnv []stri
}
// Step 5: >1 prefix match — disambiguate by available auth env.
if len(availableAuthEnv) > 0 {
avail := make(map[string]struct{}, len(availableAuthEnv))
for _, e := range availableAuthEnv {
avail[e] = struct{}{}
}
var byAuth []Provider
for _, p := range matched {
for _, want := range p.AuthEnv {
if _, ok := avail[want]; ok {
byAuth = append(byAuth, p)
break
}
}
}
if len(byAuth) == 1 {
return byAuth[0], nil
}
if len(byAuth) > 1 {
matched = byAuth // narrowed but still ambiguous; report the narrowed set
}
if p, ok := disambiguateByAuthEnv(matched, availableAuthEnv); ok {
return p, nil
}
// Step 6: still ambiguous -> error (never silently pick).
@@ -160,6 +147,29 @@ func (m *Manifest) DeriveProvider(runtime, model string, availableAuthEnv []stri
model, runtime, len(matched), strings.Join(providerNames(matched), ", "))
}
func disambiguateByAuthEnv(candidates []Provider, availableAuthEnv []string) (Provider, bool) {
if len(availableAuthEnv) == 0 {
return Provider{}, false
}
avail := make(map[string]struct{}, len(availableAuthEnv))
for _, e := range availableAuthEnv {
avail[e] = struct{}{}
}
var byAuth []Provider
for _, p := range candidates {
for _, want := range p.AuthEnv {
if _, ok := avail[want]; ok {
byAuth = append(byAuth, p)
break
}
}
}
if len(byAuth) == 1 {
return byAuth[0], true
}
return Provider{}, false
}
// Upstream is the result of ResolveUpstream: the proxy's upstream-vendor key
// (the 4-name vocabulary {openai, moonshot, anthropic, minimax} the proxy's
// resolveLLMProviderTarget switch dispatches on to pick the upstream base URL +
@@ -15,7 +15,8 @@ import (
// 2. model_prefix_match among native providers, then
// 3. auth-env disambiguation when >1 native provider still matches.
//
// It ERRORS on overlap (>=2 unresolved) and on none — never silently picks.
// It ERRORS on unresolved prefix overlap and on none. Exact-list overlap may
// use auth env or provider order to choose the runtime's declared default.
func TestDeriveProvider_RealManifest(t *testing.T) {
m, err := LoadManifest()
if err != nil {
@@ -58,7 +59,9 @@ func TestDeriveProvider_RealManifest(t *testing.T) {
{"claude-code api sonnet versioned", "claude-code", "claude-sonnet-4-6", []string{"ANTHROPIC_API_KEY"}, "anthropic-api"},
// --- other runtimes' native sets --------------------------------
{"codex byok gpt-5.5", "codex", "gpt-5.5", []string{"OPENAI_API_KEY"}, "openai"},
{"codex subscription gpt-5.5", "codex", "gpt-5.5", []string{"CODEX_AUTH_JSON"}, "openai-subscription"},
{"codex byok gpt-5.5", "codex", "gpt-5.5", []string{"OPENAI_API_KEY"}, "openai-api"},
{"codex default gpt-5.5", "codex", "gpt-5.5", nil, "openai-subscription"},
{"claude-code minimax", "claude-code", "MiniMax-M2.7", []string{"MINIMAX_API_KEY"}, "minimax"},
{"openclaw byok colon", "openclaw", "moonshot:kimi-k2.6", []string{"KIMI_API_KEY"}, "kimi-coding"},
}
@@ -334,9 +337,9 @@ func TestResolveUpstream_RealManifest(t *testing.T) {
{"platform moonshot colon (openclaw)", "moonshot:kimi-k2.6", "moonshot", "kimi-k2.6", "moonshot", false},
// anthropic namespace resolves to the anthropic-api ENTRY (name != vendor).
{"platform anthropic ns", "anthropic/claude-opus-4-7", "anthropic", "claude-opus-4-7", "anthropic-api", false},
{"platform openai ns", "openai/gpt-5.4", "openai", "gpt-5.4", "openai", false},
{"platform openai ns", "openai/gpt-5.4", "openai", "gpt-5.4", "openai-api", false},
{"platform minimax ns", "minimax/MiniMax-M2.7", "minimax", "MiniMax-M2.7", "minimax", false},
{"openai ns gpt-4o", "openai/gpt-4o", "openai", "gpt-4o", "openai", false},
{"openai ns gpt-4o", "openai/gpt-4o", "openai", "gpt-4o", "openai-api", false},
// --- bare ids are VESTIGIAL at the proxy: ResolveUpstream errors (the
// proxy falls back to its legacy switch for these). No live bare traffic.
{"bare kimi -> err (vestigial, legacy fallback)", "kimi-k2.6", "", "", "", true},
@@ -417,7 +420,7 @@ func TestResolveUpstream_ResolvesToProviderEntry(t *testing.T) {
{"moonshot/kimi-k2.6", "moonshot", "https://api.moonshot.ai/v1", "https://api.moonshot.ai/anthropic/v1", "MOONSHOT_API_KEY"},
{"anthropic/claude-opus-4-7", "anthropic-api", "https://api.anthropic.com/v1", "https://api.anthropic.com/v1", "ANTHROPIC_API_KEY"},
{"minimax/MiniMax-M2.7", "minimax", "https://api.minimax.io/v1", "https://api.minimax.io/anthropic/v1", "MINIMAX_API_KEY"},
{"openai/gpt-5.4", "openai", "https://api.openai.com/v1", "", "OPENAI_API_KEY"},
{"openai/gpt-5.4", "openai-api", "https://api.openai.com/v1", "", "OPENAI_API_KEY"},
}
for _, tc := range cases {
up, err := m.ResolveUpstream(tc.model)
@@ -505,7 +508,7 @@ func TestResolveUpstream_OnlyRoutingEntriesCarryVendor(t *testing.T) {
}
want := map[string]string{
"anthropic": "anthropic-api",
"openai": "openai",
"openai": "openai-api",
"moonshot": "moonshot",
"minimax": "minimax",
}
@@ -16,7 +16,7 @@ const SchemaVersion = 1
// Fingerprint is a stable content hash of the generated projection (schema
// version + provider catalog + runtime native sets). It changes iff the
// registry DATA changes (comment-only YAML edits do not churn it).
const Fingerprint = "cbd39dfe934302e0"
const Fingerprint = "3ff3e3ff03e31c62"
// GenProvider is the generated projection of one provider catalog entry —
// the subset a downstream consumer needs to derive + display a provider.
@@ -46,7 +46,8 @@ type GenRuntimeRef struct {
var Providers = []GenProvider{
{Name: "anthropic-api", DisplayName: "Anthropic API", Protocol: "anthropic", AuthMode: "anthropic_api", AuthEnv: []string{"ANTHROPIC_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^claude", IsPlatform: false, UpstreamVendor: "anthropic"},
{Name: "anthropic-oauth", DisplayName: "Claude Code subscription", Protocol: "anthropic", AuthMode: "oauth", AuthEnv: []string{"CLAUDE_CODE_OAUTH_TOKEN"}, ModelPrefixMatch: "^(sonnet|opus|haiku)$", IsPlatform: false},
{Name: "openai", DisplayName: "OpenAI", Protocol: "openai", AuthMode: "anthropic_api", AuthEnv: []string{"OPENAI_API_KEY"}, ModelPrefixMatch: "^gpt-", IsPlatform: false, UpstreamVendor: "openai"},
{Name: "openai-subscription", DisplayName: "OpenAI Codex subscription", Protocol: "openai", AuthMode: "oauth", AuthEnv: []string{"CODEX_AUTH_JSON", "CODEX_CHATGPT_AUTH_JSON"}, ModelPrefixMatch: "^gpt-", IsPlatform: false},
{Name: "openai-api", DisplayName: "OpenAI API", Protocol: "openai", AuthMode: "anthropic_api", AuthEnv: []string{"OPENAI_API_KEY"}, ModelPrefixMatch: "^openai-api[:/]", IsPlatform: false, UpstreamVendor: "openai"},
{Name: "moonshot", DisplayName: "Moonshot (Kimi)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOONSHOT_API_KEY", "KIMI_API_KEY"}, ModelPrefixMatch: "^moonshot[:/-]", IsPlatform: false, UpstreamVendor: "moonshot"},
{Name: "minimax", DisplayName: "MiniMax", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^minimax-m", IsPlatform: false, UpstreamVendor: "minimax"},
{Name: "platform", DisplayName: "Platform", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"}, ModelPrefixMatch: "^platform/", IsPlatform: true},
@@ -82,7 +83,8 @@ var Runtimes = map[string][]GenRuntimeRef{
{Name: "platform", Models: []string{"anthropic/claude-opus-4-7", "anthropic/claude-sonnet-4-6", "moonshot/kimi-k2.6", "moonshot/kimi-k2.5", "minimax/MiniMax-M2.7", "minimax/MiniMax-M2.7-highspeed"}},
},
"codex": {
{Name: "openai", Models: []string{"gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2"}},
{Name: "openai-subscription", Models: []string{"gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2"}},
{Name: "openai-api", Models: []string{"gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2"}},
{Name: "platform", Models: []string{"openai/gpt-5.4", "openai/gpt-5.4-mini"}},
},
"hermes": {
@@ -311,9 +311,16 @@ func (m *Manifest) ModelsForRuntime(rt string) ([]string, error) {
if !ok {
return nil, fmt.Errorf("providers: unknown runtime %q", rt)
}
seen := make(map[string]struct{})
var out []string
for _, ref := range native.Providers {
out = append(out, ref.Models...)
for _, model := range ref.Models {
if _, ok := seen[model]; ok {
continue
}
seen[model] = struct{}{}
out = append(out, model)
}
}
return out, nil
}
@@ -110,30 +110,85 @@ providers:
model_aliases: [sonnet, opus, haiku]
# ===========================================================================
# OpenAI — proxy default arm + DB catalog + canvas. NOT in the adapter
# template (claude-code template is Anthropic-protocol only).
# OpenAI / Codex — SPLIT into two providers, mirroring the anthropic-api /
# anthropic-oauth pair above. The codex runtime authenticates via EITHER a
# ChatGPT/Codex subscription (OAuth-style auth.json blob — the CLI talks to
# OpenAI directly, never the proxy) OR a direct OpenAI API key. The codex
# template/adapter registry
# (git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-codex
# config.yaml `providers:`) uses the SPLIT names `openai-subscription`
# (auth_mode chatgpt_subscription) + `openai-api` (auth_mode openai_api); it
# does NOT accept a bare `openai`. The previous single `openai` SSOT entry
# derived bare `openai` for (codex, gpt-*), which the adapter rejected with
# "picks provider='openai' but it is not in the providers registry. Known
# providers: openai-subscription, openai-api" — leaving codex agents NOT
# CONFIGURED. The split below converges the SSOT onto the adapter's vocabulary.
#
# openai-subscription is the OAuth arm (mirror of anthropic-oauth): auth_mode
# oauth, NO upstream_vendor (OAuth never traverses the proxy — the CLI dials
# OpenAI directly), base_url null, the ChatGPT-OAuth auth.json env. It is the
# codex DEFAULT (the template adapter's resolve-provider precedence #1 picks
# the subscription when CODEX_AUTH_JSON is present), so it owns the bare gpt-*
# family by prefix + exact-list and DeriveProvider(codex, gpt-5.5) -> here.
# ===========================================================================
- name: openai
display_name: "OpenAI"
- name: openai-subscription
display_name: "OpenAI Codex subscription"
vendor_logo: "openai"
protocol: openai
auth_mode: anthropic_api # OpenAI is openai-protocol; auth is a bearer API key.
auth_mode: oauth # ChatGPT/Codex subscription — auth.json blob, not a bearer key.
base_url_template: null # OAuth: the codex CLI talks to OpenAI directly (no proxy).
base_url_anthropic: null # OpenAI exposes only the OpenAI protocol surface.
# The codex template's chatgpt_subscription auth_env, verbatim from the
# deployed config.yaml: CODEX_AUTH_JSON wins over the older
# CODEX_CHATGPT_AUTH_JSON alias when both are set.
auth_env: [CODEX_AUTH_JSON, CODEX_CHATGPT_AUTH_JSON]
auth_token_env: CODEX_AUTH_JSON
# Canvas matches /^gpt-/i. This is the codex DEFAULT arm, so it owns the
# bare gpt-* prefix (the codex runtime exact-lists the gpt-* ids under it).
model_prefix_match: "^gpt-"
model_aliases: []
# NO upstream_vendor — OAuth never traverses the proxy (mirror of
# anthropic-oauth). The proxy's `openai/` namespace token + Responses
# surface resolve to openai-api below (which carries upstream_vendor:
# openai), keeping proxy routing + billing byte-identical.
# ===========================================================================
# OpenAI API key — the BYOK direct-key arm AND the proxy arm (mirror of
# anthropic-api). Carries upstream_vendor: openai so ResolveUpstream still
# maps the `openai/` namespace token to THIS entry and the proxy's OpenAI
# Responses surface (codex platform-managed) routes upstream vendor "openai"
# exactly as before — proxy + billing are byte-identical to the pre-split
# `openai` entry. Its catalog model_prefix_match is a DISJOINT sentinel
# (`^openai-api[:/]`) so the bare gpt-* family stays owned by
# openai-subscription and the catalog overlap guard (no slug may match two
# providers) stays green — exactly as anthropic-oauth's `^(sonnet|opus|
# haiku)$` is disjoint from anthropic-api's `^claude`.
# ===========================================================================
- name: openai-api
display_name: "OpenAI API"
vendor_logo: "openai"
protocol: openai
auth_mode: anthropic_api # openai-protocol; auth is a bearer API key (mirror of anthropic-api).
base_url_template: "https://api.openai.com/v1"
base_url_anthropic: null # OpenAI exposes only the OpenAI protocol surface.
auth_env: [OPENAI_API_KEY]
auth_token_env: OPENAI_API_KEY
# Proxy treats openai as the DEFAULT (catch-all) arm of inferLLMProvider;
# there is no explicit prefix today. Canvas matches /^gpt-/i. Encode the
# canvas prefix so the explicit slugs route; the proxy's catch-all
# behavior is a routing decision for PR-3, not the manifest.
model_prefix_match: "^gpt-"
# DISJOINT sentinel prefix: openai-api is selected explicitly (the
# provisioner's LLM_PROVIDER=openai-api) or via the `openai/` proxy
# namespace (ResolveUpstream uses upstream_vendor, NOT this prefix). The
# bare gpt-* family is owned by openai-subscription (the codex default), so
# this prefix must NOT also claim `^gpt-` or the catalog overlap guard
# (TestNoAmbiguousModelMatch) would flag gpt-5.5 as matching two providers.
model_prefix_match: "^openai-api[:/]"
model_aliases: []
# internal#718 P1 (CONVERGED): the proxy's upstream-vendor key. ResolveUpstream
# maps the `openai/` namespace token to THIS entry. openai is ALSO the proxy's
# historical catch-all (the switch's `default:` arm) for bare/unknown ids
# but the catch-all is a VESTIGIAL bare-id behavior (no live bare traffic), so
# it lives in the retained legacy fallback (inferLLMProviderLegacy), NOT as a
# registry data flag. Live `openai/<m>` ids resolve here by namespace.
# maps the `openai/` namespace token to THIS entry, then dials its
# base_url_template + auth (the SINGLE source). openai is ALSO the proxy's
# historical catch-all (the legacy switch's `default:` arm) for bare/unknown
# ids — a VESTIGIAL bare-id behavior (no live bare traffic) retained in
# inferLLMProviderLegacy, NOT a registry flag. Live `openai/<m>` ids resolve
# here by namespace. The openai-subscription OAuth arm carries NO
# upstream_vendor (OAuth never traverses the proxy).
upstream_vendor: openai
# ===========================================================================
@@ -561,7 +616,8 @@ providers:
# AUTHORITATIVE MATRIX (provider level), encoded EXACTLY below:
# claude-code -> anthropic (oauth + api), kimi (kimi-coding), minimax
# hermes -> kimi (kimi-coding)
# codex -> openai
# codex -> openai (subscription + api — the split openai-subscription /
# openai-api pair, mirroring anthropic oauth+api)
# openclaw -> kimi (kimi-coding)
#
# Each runtime entry lists native provider NAMES (referencing `providers:`
@@ -689,12 +745,39 @@ runtimes:
- moonshot/kimi-k2.6
- moonshot/kimi-k2.5
# codex: OpenAI — BYOK (subscription + API key, both map to the `openai`
# manifest provider) + platform-managed (the `platform` ref below, served
# via the proxy Responses surface).
# codex: OpenAI — BYOK split across TWO native providers
# (openai-subscription + openai-api), mirroring claude-code's anthropic
# oauth+api split, PLUS platform-managed (the `platform` ref below, served via
# the proxy Responses surface).
#
# The split fixes the prod "picks provider='openai' but it is not in the
# providers registry. Known providers: openai-subscription, openai-api" wedge:
# the codex template/adapter registry uses the SPLIT names, never bare
# `openai`, so the SSOT must derive one of them. openai-subscription is the
# DEFAULT (the adapter's resolve-provider precedence #1 picks the ChatGPT/Codex
# subscription when CODEX_AUTH_JSON is present), so it is listed FIRST and owns
# the bare gpt-* family — DeriveProvider(codex, gpt-5.5) -> openai-subscription.
# openai-api is referenced too (the direct-OPENAI_API_KEY BYOK arm); the same
# gpt-* ids are exact-listed under both arms and DeriveProvider disambiguates
# by available auth env (OPENAI_API_KEY -> openai-api; the subscription
# auth.json env or no auth context -> the first-declared default,
# openai-subscription) — the identical oauth-vs-key disambiguation
# claude-code's anthropic pair uses.
codex:
providers:
- name: openai
# DEFAULT arm (listed first): ChatGPT/Codex subscription via OAuth.
- name: openai-subscription
models:
- gpt-5.5
- gpt-5.4
- gpt-5.4-mini
- gpt-5.3-codex
- gpt-5.3-codex-spark
- gpt-5.2
# Direct OpenAI API-key BYOK arm. Same gpt-* family; selected over the
# subscription default when OPENAI_API_KEY is the available auth env (or
# via the explicit provisioner LLM_PROVIDER=openai-api).
- name: openai-api
models:
- gpt-5.5
- gpt-5.4
@@ -114,8 +114,8 @@ func TestMatchesModel(t *testing.T) {
{"MiniMax-M2", "minimax"},
{"minimax-m2.5", "minimax"},
// OpenAI — DB gpt-5.x + canvas /^gpt-/.
{"gpt-5.5", "openai"},
{"gpt-5.4-mini", "openai"},
{"gpt-5.5", "openai-subscription"},
{"gpt-5.4-mini", "openai-subscription"},
// Xiaomi MiMo — adapter mimo- + canvas /^mimo-/.
{"mimo-v2.5-pro", "xiaomi-mimo"},
// Z.ai GLM — adapter glm- + canvas /^GLM-/ (mixed case).
@@ -19,12 +19,12 @@ import (
//
// claude-code -> anthropic (oauth+api), kimi (kimi-coding), minimax, platform
// hermes -> kimi (kimi-coding), platform
// codex -> openai, platform
// codex -> openai-subscription, openai-api, platform
// openclaw -> kimi (kimi-coding), platform
var runtimeNativeProviders = map[string][]string{
"claude-code": {"anthropic-api", "anthropic-oauth", "kimi-coding", "minimax", "platform"},
"hermes": {"kimi-coding", "platform"},
"codex": {"openai", "platform"}, // platform openai via the proxy Responses surface
"codex": {"openai-api", "openai-subscription", "platform"}, // platform openai via the proxy Responses surface
"openclaw": {"kimi-coding", "platform"},
}
@@ -29,7 +29,7 @@ import (
// canonicalProvidersYAMLSHA256 is the sha256 of the canonical providers.yaml as
// synced from molecule-controlplane. Bumped deliberately on each re-sync (see
// file doc). Cross-checked live by the sync-providers-yaml CI workflow.
const canonicalProvidersYAMLSHA256 = "73e8003062edaa4ce75bfb324be615b6e2b380f07487e3af4dc16cb644dc12bc"
const canonicalProvidersYAMLSHA256 = "6200f0a046921b55bcd9fbb7c2367d4162ca1d745283f832d98429042b7a3fc4"
func TestSyncedYAMLMatchesCanonicalSHA(t *testing.T) {
sum := sha256.Sum256(embeddedYAML)