2026-05-31 23:50:53 +00:00
9 changed files with 479 additions and 89 deletions
@@ -24,6 +24,7 @@ var platformManagedDirectLLMBypassKeys = map[string]struct{}{
 	"ANTHROPIC_AUTH_TOKEN":    {},
 	"ARCEEAI_API_KEY":         {},
 	"CLAUDE_CODE_OAUTH_TOKEN": {},
+	"CODEX_AUTH_JSON":         {},
 	"DASHSCOPE_API_KEY":       {},
 	"DEEPSEEK_API_KEY":        {},
 	"GEMINI_API_KEY":          {},
@@ -54,7 +54,17 @@ func (p Provider) IsPlatform() bool {
 //     native provider ref's Models list, that provider wins outright — this
 //     resolves the kimi namespace split (moonshot/kimi-k2.6 -> platform vs
 //     bare kimi-for-coding -> kimi-coding) deterministically and overrides
-//     any broader prefix match.
+//     any broader prefix match. If the SAME id is exact-listed by MORE THAN
+//     ONE native arm — the legitimate "one model id, two auth arms" shape (the
+//     codex gpt-* family is offered on BOTH the openai-subscription OAuth arm
+//     and the openai-api direct-key arm, mirroring claude-code's anthropic
+//     oauth+api split) — disambiguate by available auth env exactly as the
+//     prefix step (5) does: keep the arms whose auth_env intersects
+//     availableAuthEnv; if exactly one survives it wins. With no auth context
+//     (or an unresolved tie), the FIRST-declared native arm wins — the
+//     deterministic default (codex lists openai-subscription first, so a
+//     gpt-* id with no auth context defaults to the subscription, matching the
+//     codex adapter's resolve-provider precedence #1).
 //  4. Otherwise, fall back to model_prefix_match among the native providers.
 //  5. If >1 native provider still matches, disambiguate by auth env: keep
 //     only the providers whose auth_env intersects availableAuthEnv. If
@@ -85,11 +95,14 @@ func (m *Manifest) DeriveProvider(runtime, model string, availableAuthEnv []stri
 	}

 	// Step 3: exact model-id match against each native provider ref's Models.
-	// Authoritative — a verbatim id beats any prefix. If two native refs both
-	// list the same id, that is a manifest ambiguity we surface rather than
-	// silently pick (LoadManifest already forbids a provider ref appearing
-	// twice in one runtime, but two DIFFERENT providers listing the same id
-	// is not load-rejected, so guard it here).
+	// Authoritative — a verbatim id beats any prefix. `exact` is collected in
+	// native-declaration order. When ONE native arm lists the id, it wins
+	// outright. When MORE THAN ONE lists it (the codex oauth-vs-key "one id,
+	// two auth arms" shape), it is disambiguated by available auth env, with
+	// the first-declared arm as the deterministic default (handled below) —
+	// NOT a load error, since a model legitimately offered on two auth arms is
+	// a feature, not a typo. (LoadManifest still forbids the SAME provider ref
+	// appearing twice in one runtime.)
 	var exact []Provider
 	for _, ref := range native.Providers {
 		for _, mid := range ref.Models {
@@ -105,9 +118,19 @@ func (m *Manifest) DeriveProvider(runtime, model string, availableAuthEnv []stri
 		return exact[0], nil
 	}
 	if len(exact) > 1 {
-		return Provider{}, fmt.Errorf(
-			"providers: model %q for runtime %q is exact-listed by %d native providers (%s) — manifest ambiguity",
-			model, runtime, len(exact), strings.Join(providerNames(exact), ", "))
+		// The same id is exact-listed by >1 native arm — the legitimate
+		// "one model id, two auth arms" shape (codex gpt-* on both the
+		// openai-subscription OAuth arm and the openai-api direct-key arm,
+		// mirroring claude-code's anthropic oauth+api split). Disambiguate by
+		// available auth env exactly as the prefix step does. `exact` is in
+		// native-declaration order, so the first-declared arm is the
+		// deterministic default when auth env does not resolve it.
+		if p, ok := disambiguateByAuthEnv(exact, availableAuthEnv); ok {
+			return p, nil
+		}
+		// No auth context (or an unresolved tie): the first-declared native
+		// arm is the default (codex declares openai-subscription first).
+		return exact[0], nil
 	}

 	// Step 4: prefix match among native providers only.
@@ -132,26 +155,11 @@ func (m *Manifest) DeriveProvider(runtime, model string, availableAuthEnv []stri
 	}

 	// Step 5: >1 prefix match — disambiguate by available auth env.
-	if len(availableAuthEnv) > 0 {
-		avail := make(map[string]struct{}, len(availableAuthEnv))
-		for _, e := range availableAuthEnv {
-			avail[e] = struct{}{}
-		}
-		var byAuth []Provider
-		for _, p := range matched {
-			for _, want := range p.AuthEnv {
-				if _, ok := avail[want]; ok {
-					byAuth = append(byAuth, p)
-					break
-				}
-			}
-		}
-		if len(byAuth) == 1 {
-			return byAuth[0], nil
-		}
-		if len(byAuth) > 1 {
-			matched = byAuth // narrowed but still ambiguous; report the narrowed set
-		}
+	if p, ok := disambiguateByAuthEnv(matched, availableAuthEnv); ok {
+		return p, nil
+	}
+	if narrowed := authEnvMatches(matched, availableAuthEnv); len(narrowed) > 1 {
+		matched = narrowed // narrowed but still ambiguous; report the narrowed set
 	}

 	// Step 6: still ambiguous -> error (never silently pick).
@@ -247,6 +255,41 @@ func (m *Manifest) ResolveUpstream(model string) (Upstream, error) {
 		"providers: %q is not an upstream-namespaced model id (vendor/model); bare ids are vestigial at the proxy and resolve via the legacy fallback", model)
 }

+// authEnvMatches returns the subset of candidates whose AuthEnv intersects
+// availableAuthEnv, preserving the input order. A nil/empty availableAuthEnv
+// yields nil (the tie-break cannot fire).
+func authEnvMatches(candidates []Provider, availableAuthEnv []string) []Provider {
+	if len(availableAuthEnv) == 0 {
+		return nil
+	}
+	avail := make(map[string]struct{}, len(availableAuthEnv))
+	for _, e := range availableAuthEnv {
+		avail[e] = struct{}{}
+	}
+	var out []Provider
+	for _, p := range candidates {
+		for _, want := range p.AuthEnv {
+			if _, ok := avail[want]; ok {
+				out = append(out, p)
+				break
+			}
+		}
+	}
+	return out
+}
+
+// disambiguateByAuthEnv returns the single candidate whose AuthEnv intersects
+// availableAuthEnv when EXACTLY one does, else ok=false. Used by both the
+// exact-id step (codex oauth-vs-key arms exact-listing the same gpt-* id) and
+// the prefix step to split an auth-distinguished provider overlap.
+func disambiguateByAuthEnv(candidates []Provider, availableAuthEnv []string) (Provider, bool) {
+	byAuth := authEnvMatches(candidates, availableAuthEnv)
+	if len(byAuth) == 1 {
+		return byAuth[0], true
+	}
+	return Provider{}, false
+}
+
 // providerNames returns the sorted names of a provider slice for stable,
 // deterministic error messages (test assertions + operator readability).
 func providerNames(ps []Provider) []string {
@@ -58,7 +58,18 @@ func TestDeriveProvider_RealManifest(t *testing.T) {
 		{"claude-code api sonnet versioned", "claude-code", "claude-sonnet-4-6", []string{"ANTHROPIC_API_KEY"}, "anthropic-api"},

 		// --- other runtimes' native sets --------------------------------
-		{"codex byok gpt-5.5", "codex", "gpt-5.5", []string{"OPENAI_API_KEY"}, "openai"},
+		// codex OpenAI is split across openai-subscription (OAuth — the
+		// DEFAULT) + openai-api (direct key), mirroring the anthropic
+		// oauth+api split. The codex template/adapter registry uses these
+		// SPLIT names, never bare `openai` (the prod "picks provider='openai'
+		// but it is not in the providers registry" wedge this fixes). The
+		// shared gpt-* ids are exact-listed under BOTH arms and disambiguated
+		// by available auth env, defaulting to the first-declared arm
+		// (openai-subscription) when no auth context resolves it.
+		{"codex byok gpt-5.5 with OPENAI_API_KEY -> api", "codex", "gpt-5.5", []string{"OPENAI_API_KEY"}, "openai-api"},
+		{"codex byok gpt-5.5 with CODEX_AUTH_JSON -> subscription", "codex", "gpt-5.5", []string{"CODEX_AUTH_JSON"}, "openai-subscription"},
+		{"codex byok gpt-5.5 no auth -> subscription (default)", "codex", "gpt-5.5", nil, "openai-subscription"},
+		{"codex byok gpt-5.4-mini no auth -> subscription (default)", "codex", "gpt-5.4-mini", nil, "openai-subscription"},
 		{"claude-code minimax", "claude-code", "MiniMax-M2.7", []string{"MINIMAX_API_KEY"}, "minimax"},
 		{"openclaw byok colon", "openclaw", "moonshot:kimi-k2.6", []string{"KIMI_API_KEY"}, "kimi-coding"},
 	}
@@ -334,9 +345,13 @@ func TestResolveUpstream_RealManifest(t *testing.T) {
 		{"platform moonshot colon (openclaw)", "moonshot:kimi-k2.6", "moonshot", "kimi-k2.6", "moonshot", false},
 		// anthropic namespace resolves to the anthropic-api ENTRY (name != vendor).
 		{"platform anthropic ns", "anthropic/claude-opus-4-7", "anthropic", "claude-opus-4-7", "anthropic-api", false},
-		{"platform openai ns", "openai/gpt-5.4", "openai", "gpt-5.4", "openai", false},
+		// openai namespace resolves to the openai-api ENTRY (name != vendor),
+		// mirroring anthropic/ -> anthropic-api: the OAuth subscription arm
+		// carries NO upstream_vendor (OAuth never traverses the proxy), so the
+		// `openai/` namespace + Responses surface route through openai-api.
+		{"platform openai ns", "openai/gpt-5.4", "openai", "gpt-5.4", "openai-api", false},
 		{"platform minimax ns", "minimax/MiniMax-M2.7", "minimax", "MiniMax-M2.7", "minimax", false},
-		{"openai ns gpt-4o", "openai/gpt-4o", "openai", "gpt-4o", "openai", false},
+		{"openai ns gpt-4o", "openai/gpt-4o", "openai", "gpt-4o", "openai-api", false},
 		// --- bare ids are VESTIGIAL at the proxy: ResolveUpstream errors (the
 		//     proxy falls back to its legacy switch for these). No live bare traffic.
 		{"bare kimi -> err (vestigial, legacy fallback)", "kimi-k2.6", "", "", "", true},
@@ -417,7 +432,7 @@ func TestResolveUpstream_ResolvesToProviderEntry(t *testing.T) {
 		{"moonshot/kimi-k2.6", "moonshot", "https://api.moonshot.ai/v1", "https://api.moonshot.ai/anthropic/v1", "MOONSHOT_API_KEY"},
 		{"anthropic/claude-opus-4-7", "anthropic-api", "https://api.anthropic.com/v1", "https://api.anthropic.com/v1", "ANTHROPIC_API_KEY"},
 		{"minimax/MiniMax-M2.7", "minimax", "https://api.minimax.io/v1", "https://api.minimax.io/anthropic/v1", "MINIMAX_API_KEY"},
-		{"openai/gpt-5.4", "openai", "https://api.openai.com/v1", "", "OPENAI_API_KEY"},
+		{"openai/gpt-5.4", "openai-api", "https://api.openai.com/v1", "", "OPENAI_API_KEY"},
 	}
 	for _, tc := range cases {
 		up, err := m.ResolveUpstream(tc.model)
@@ -505,9 +520,12 @@ func TestResolveUpstream_OnlyRoutingEntriesCarryVendor(t *testing.T) {
 	}
 	want := map[string]string{
 		"anthropic": "anthropic-api",
-		"openai":    "openai",
-		"moonshot":  "moonshot",
-		"minimax":   "minimax",
+		// openai's upstream_vendor lives on the openai-api entry (the proxy
+		// arm); the openai-subscription OAuth arm carries none — OAuth never
+		// traverses the proxy (mirror of anthropic-oauth).
+		"openai":   "openai-api",
+		"moonshot": "moonshot",
+		"minimax":  "minimax",
 	}
 	if len(got) != len(want) {
 		t.Fatalf("upstream_vendor entries = %v, want exactly %v", got, want)
@@ -518,3 +536,85 @@ func TestResolveUpstream_OnlyRoutingEntriesCarryVendor(t *testing.T) {
 		}
 	}
 }
+
+// codexTemplateProviderRegistry is the set of provider names the DEPLOYED codex
+// workspace template/adapter accepts in its `providers:` registry
+// (git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-codex
+// config.yaml — the source of truth for the codex adapter's vocabulary). The
+// adapter REJECTS any provider name not in this set with the prod error:
+//
+//	ValueError: codex adapter: workspace config picks provider='openai' but it
+//	is not in the providers registry. Known providers: openai-subscription,
+//	openai-api
+//
+// `minimax-token-plan` + `platform` are also in the template but are NOT in the
+// codex NATIVE matrix (token-plan is pruned — the vendor's /v1/responses leg
+// 404s; platform is core-only billing), so the BYOK arms the SSOT derives must
+// be a SUBSET of {openai-subscription, openai-api}. `platform` is the legitimate
+// platform-managed exception (it is in both the template and the native set).
+var codexTemplateProviderRegistry = map[string]struct{}{
+	"openai-subscription": {},
+	"openai-api":          {},
+	"minimax-token-plan":  {},
+	"platform":            {},
+}
+
+// TestCodexDerivesOnlyTemplateRegistryProviders is the DRIFT GATE that the
+// pre-fix gate MISSED: it caught the claude-code/kimi SSOT↔template divergence
+// but NOT codex's (the SSOT derived bare `openai`, which the codex adapter
+// rejects). It asserts that for EVERY model the codex runtime natively exposes,
+// DeriveProvider resolves to a provider name the deployed codex template's
+// `providers:` registry actually accepts — so a future SSOT edit that derives a
+// codex-adapter-unknown provider (a bare `openai` regression, a typo'd arm)
+// fails RED here instead of wedging codex agents as NOT CONFIGURED in prod.
+func TestCodexDerivesOnlyTemplateRegistryProviders(t *testing.T) {
+	m, err := LoadManifest()
+	if err != nil {
+		t.Fatalf("LoadManifest() error = %v", err)
+	}
+	models, err := m.ModelsForRuntime("codex")
+	if err != nil {
+		t.Fatalf("ModelsForRuntime(codex) error = %v", err)
+	}
+	if len(models) == 0 {
+		t.Fatal("codex native model set is empty — nothing to gate")
+	}
+	// Exercise both auth contexts the codex adapter resolves: the OAuth
+	// subscription (CODEX_AUTH_JSON) and the direct key (OPENAI_API_KEY), plus
+	// the no-auth default. Every resulting provider name MUST be one the codex
+	// template registry accepts (never bare `openai`).
+	authContexts := [][]string{
+		nil,                          // no auth -> default (subscription)
+		{"CODEX_AUTH_JSON"},          // ChatGPT/Codex subscription
+		{"OPENAI_API_KEY"},           // direct OpenAI key
+		{"MOLECULE_LLM_USAGE_TOKEN"}, // platform-managed
+	}
+	for _, model := range models {
+		for _, authEnv := range authContexts {
+			p, derr := m.DeriveProvider("codex", model, authEnv)
+			if derr != nil {
+				// A platform/-namespaced id requires the platform auth env to
+				// disambiguate; an unrelated auth context legitimately can't
+				// resolve it. Only a CLEAN derivation must be in-registry.
+				continue
+			}
+			if _, ok := codexTemplateProviderRegistry[p.Name]; !ok {
+				t.Errorf("codex model %q (authEnv=%v) derived provider %q, which the codex template registry REJECTS (known: openai-subscription, openai-api, minimax-token-plan, platform) — SSOT↔template drift, the exact prod wedge",
+					model, authEnv, p.Name)
+			}
+		}
+	}
+	// And pin the load-bearing default explicitly: the bare gpt-* family with
+	// no auth context defaults to the OAuth subscription (the codex adapter's
+	// resolve-provider precedence #1), never bare `openai`.
+	for _, model := range []string{"gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2"} {
+		p, derr := m.DeriveProvider("codex", model, nil)
+		if derr != nil {
+			t.Errorf("codex default derivation for %q errored: %v", model, derr)
+			continue
+		}
+		if p.Name != "openai-subscription" {
+			t.Errorf("codex default for %q = %q, want openai-subscription (the OAuth subscription default)", model, p.Name)
+		}
+	}
+}
@@ -16,7 +16,7 @@ const SchemaVersion = 1
 // Fingerprint is a stable content hash of the generated projection (schema
 // version + provider catalog + runtime native sets). It changes iff the
 // registry DATA changes (comment-only YAML edits do not churn it).
-const Fingerprint = "cbd39dfe934302e0"
+const Fingerprint = "1100bc3e42a5f425"

 // GenProvider is the generated projection of one provider catalog entry —
 // the subset a downstream consumer needs to derive + display a provider.
@@ -46,7 +46,8 @@ type GenRuntimeRef struct {
 var Providers = []GenProvider{
 	{Name: "anthropic-api", DisplayName: "Anthropic API", Protocol: "anthropic", AuthMode: "anthropic_api", AuthEnv: []string{"ANTHROPIC_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^claude", IsPlatform: false, UpstreamVendor: "anthropic"},
 	{Name: "anthropic-oauth", DisplayName: "Claude Code subscription", Protocol: "anthropic", AuthMode: "oauth", AuthEnv: []string{"CLAUDE_CODE_OAUTH_TOKEN"}, ModelPrefixMatch: "^(sonnet|opus|haiku)$", IsPlatform: false},
-	{Name: "openai", DisplayName: "OpenAI", Protocol: "openai", AuthMode: "anthropic_api", AuthEnv: []string{"OPENAI_API_KEY"}, ModelPrefixMatch: "^gpt-", IsPlatform: false, UpstreamVendor: "openai"},
+	{Name: "openai-subscription", DisplayName: "OpenAI Codex subscription", Protocol: "openai", AuthMode: "oauth", AuthEnv: []string{"CODEX_AUTH_JSON", "CODEX_CHATGPT_AUTH_JSON"}, ModelPrefixMatch: "^gpt-", IsPlatform: false},
+	{Name: "openai-api", DisplayName: "OpenAI API", Protocol: "openai", AuthMode: "anthropic_api", AuthEnv: []string{"OPENAI_API_KEY"}, ModelPrefixMatch: "^openai-api[:/]", IsPlatform: false, UpstreamVendor: "openai"},
 	{Name: "moonshot", DisplayName: "Moonshot (Kimi)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOONSHOT_API_KEY", "KIMI_API_KEY"}, ModelPrefixMatch: "^moonshot[:/-]", IsPlatform: false, UpstreamVendor: "moonshot"},
 	{Name: "minimax", DisplayName: "MiniMax", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^minimax-m", IsPlatform: false, UpstreamVendor: "minimax"},
 	{Name: "platform", DisplayName: "Platform", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"}, ModelPrefixMatch: "^platform/", IsPlatform: true},
@@ -55,6 +56,7 @@ var Providers = []GenProvider{
 	{Name: "kimi-coding", DisplayName: "Moonshot Kimi (coding-tuned)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"KIMI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^kimi-", IsPlatform: false},
 	{Name: "deepseek", DisplayName: "DeepSeek", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"DEEPSEEK_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "^deepseek-", IsPlatform: false},
 	{Name: "google", DisplayName: "Google Gemini", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GEMINI_API_KEY", "GOOGLE_API_KEY"}, ModelPrefixMatch: "^gemini-", IsPlatform: false},
+	{Name: "vertex", DisplayName: "Google Vertex AI (keyless ADC)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GOOGLE_APPLICATION_CREDENTIALS"}, ModelPrefixMatch: "^vertex:", IsPlatform: false},
 	{Name: "alibaba", DisplayName: "Alibaba Qwen (DashScope)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"DASHSCOPE_API_KEY", "ALIBABA_API_KEY"}, ModelPrefixMatch: "^qwen-", IsPlatform: false},
 	{Name: "nousresearch", DisplayName: "Nous Research (Hermes)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"NOUSRESEARCH_API_KEY"}, ModelPrefixMatch: "^nousresearch/", IsPlatform: false},
 	{Name: "openrouter", DisplayName: "OpenRouter (any model)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENROUTER_API_KEY"}, ModelPrefixMatch: "^openrouter/", IsPlatform: false},
@@ -82,9 +84,14 @@ var Runtimes = map[string][]GenRuntimeRef{
 		{Name: "platform", Models: []string{"anthropic/claude-opus-4-7", "anthropic/claude-sonnet-4-6", "moonshot/kimi-k2.6", "moonshot/kimi-k2.5", "minimax/MiniMax-M2.7", "minimax/MiniMax-M2.7-highspeed"}},
 	},
 	"codex": {
-		{Name: "openai", Models: []string{"gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2"}},
+		{Name: "openai-subscription", Models: []string{"gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2"}},
+		{Name: "openai-api", Models: []string{"gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2"}},
 		{Name: "platform", Models: []string{"openai/gpt-5.4", "openai/gpt-5.4-mini"}},
 	},
+	"google-adk": {
+		{Name: "vertex", Models: []string{"vertex:gemini-2.5-pro"}},
+		{Name: "google", Models: []string{"gemini-2.5-pro"}},
+	},
 	"hermes": {
 		{Name: "kimi-coding", Models: []string{"kimi-coding/kimi-k2"}},
 		{Name: "platform", Models: []string{"moonshot/kimi-k2.6", "moonshot/kimi-k2.5"}},
@@ -1,27 +1,18 @@
-// Package providers is the molecule-core SIDE of the LLM provider registry
-// SSOT (internal#718 P2-A, CTO 2026-05-27 "Distribution = SDK via codegen +
-// verify-CI"). It is a load-time mirror of the canonical loader that lives in
-// molecule-controlplane internal/providers — same parse, same validation, same
-// DeriveProvider/IsPlatform/ResolveUpstream API.
+// Package providers is the SSOT baseline for the LLM provider registry.
 //
-// CANONICAL SSOT = molecule-controlplane internal/providers/providers.yaml.
-// This package embeds a SYNCED COPY of that file (providers.yaml here is a
-// byte-for-byte mirror of the canonical, NOT a second authoring surface). The
-// CTO-decided distribution model for a multi-repo registry is
-// "codegen-checked-into-each-repo + verify-CI": every consumer repo carries the
-// generated projection and a drift gate, so a registry change in CP must be
-// re-synced here (the sync-providers-yaml verify gate goes RED if this copy
-// drifts from the canonical). molecule-core has no Go module dependency on
-// controlplane, so a synced+gated copy is the blessed path (a shared Go module
-// is not viable across the two repos today).
+// RFC: molecule-ai/molecule-controlplane#340 "Canonical Providers
+// Manifest". This package is PR-1: it embeds and parses providers.yaml
+// (the git-tracked baseline that transcribes the union of the proxy
+// switch, the canvas VENDOR_LABELS, the adapter config.yaml `providers:`
+// block, and the DB llm_price_catalog). NOTHING imports it yet — the
+// consumers (internal/handlers/llm_proxy.go, the canvas dropdown, and
+// the workspace-template adapters) are migrated in later PRs. Reverting
+// PR-1 = delete this package; zero runtime behavior change.
 //
-// P2-A is ADDITIVE, ZERO behavior change (the P0 shape mirrored): the loader +
-// DeriveProvider land here, plus the generated artifact (cmd/gen-providers) and
-// the verify-providers-gen drift gate, but NO production code path imports this
-// package yet. P2-B wires the billing/credential decision onto DeriveProvider.
-//
-// Distribution model mirrors molecule-controlplane internal/providers: go:embed
-// the YAML into the binary so a boot-time Load never touches the network.
+// Distribution model mirrors internal/envs (RFC internal#213 §6.5.4
+// Option C): go:embed the YAML into the binary so a boot-time Load never
+// touches the network. A future DB override layer (RFC §3 (c)) can merge
+// on top of the embedded baseline without breaking this package's API.
 package providers

 import (
@@ -311,9 +302,24 @@ func (m *Manifest) ModelsForRuntime(rt string) ([]string, error) {
 	if !ok {
 		return nil, fmt.Errorf("providers: unknown runtime %q", rt)
 	}
+	// De-duplicate while preserving first-seen order. A single model id may be
+	// exact-listed under MORE THAN ONE native arm — the legitimate "one model
+	// id, two auth arms" shape (codex's gpt-* family is offered on both the
+	// openai-subscription OAuth arm and the openai-api direct-key arm, mirroring
+	// claude-code's anthropic oauth+api split). The canvas surfaces each id
+	// once (the auth path is chosen at runtime by which key is present), so the
+	// flattened native model set must not repeat it. A no-op for every runtime
+	// whose arms list disjoint ids.
 	var out []string
+	seen := make(map[string]struct{})
 	for _, ref := range native.Providers {
-		out = append(out, ref.Models...)
+		for _, mid := range ref.Models {
+			if _, dup := seen[mid]; dup {
+				continue
+			}
+			seen[mid] = struct{}{}
+			out = append(out, mid)
+		}
 	}
 	return out, nil
 }
@@ -110,30 +110,85 @@ providers:
    model_aliases: [sonnet, opus, haiku]

  # ===========================================================================
-  # OpenAI — proxy default arm + DB catalog + canvas. NOT in the adapter
-  # template (claude-code template is Anthropic-protocol only).
+  # OpenAI / Codex — SPLIT into two providers, mirroring the anthropic-api /
+  # anthropic-oauth pair above. The codex runtime authenticates via EITHER a
+  # ChatGPT/Codex subscription (OAuth-style auth.json blob — the CLI talks to
+  # OpenAI directly, never the proxy) OR a direct OpenAI API key. The codex
+  # template/adapter registry
+  # (git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-codex
+  # config.yaml `providers:`) uses the SPLIT names `openai-subscription`
+  # (auth_mode chatgpt_subscription) + `openai-api` (auth_mode openai_api); it
+  # does NOT accept a bare `openai`. The previous single `openai` SSOT entry
+  # derived bare `openai` for (codex, gpt-*), which the adapter rejected with
+  # "picks provider='openai' but it is not in the providers registry. Known
+  # providers: openai-subscription, openai-api" — leaving codex agents NOT
+  # CONFIGURED. The split below converges the SSOT onto the adapter's vocabulary.
+  #
+  # openai-subscription is the OAuth arm (mirror of anthropic-oauth): auth_mode
+  # oauth, NO upstream_vendor (OAuth never traverses the proxy — the CLI dials
+  # OpenAI directly), base_url null, the ChatGPT-OAuth auth.json env. It is the
+  # codex DEFAULT (the template adapter's resolve-provider precedence #1 picks
+  # the subscription when CODEX_AUTH_JSON is present), so it owns the bare gpt-*
+  # family by prefix + exact-list and DeriveProvider(codex, gpt-5.5) -> here.
  # ===========================================================================
-  - name: openai
-    display_name: "OpenAI"
+  - name: openai-subscription
+    display_name: "OpenAI Codex subscription"
    vendor_logo: "openai"
    protocol: openai
-    auth_mode: anthropic_api  # OpenAI is openai-protocol; auth is a bearer API key.
+    auth_mode: oauth  # ChatGPT/Codex subscription — auth.json blob, not a bearer key.
+    base_url_template: null  # OAuth: the codex CLI talks to OpenAI directly (no proxy).
+    base_url_anthropic: null  # OpenAI exposes only the OpenAI protocol surface.
+    # The codex template's chatgpt_subscription auth_env, verbatim from the
+    # deployed config.yaml: CODEX_AUTH_JSON wins over the older
+    # CODEX_CHATGPT_AUTH_JSON alias when both are set.
+    auth_env: [CODEX_AUTH_JSON, CODEX_CHATGPT_AUTH_JSON]
+    auth_token_env: CODEX_AUTH_JSON
+    # Canvas matches /^gpt-/i. This is the codex DEFAULT arm, so it owns the
+    # bare gpt-* prefix (the codex runtime exact-lists the gpt-* ids under it).
+    model_prefix_match: "^gpt-"
+    model_aliases: []
+    # NO upstream_vendor — OAuth never traverses the proxy (mirror of
+    # anthropic-oauth). The proxy's `openai/` namespace token + Responses
+    # surface resolve to openai-api below (which carries upstream_vendor:
+    # openai), keeping proxy routing + billing byte-identical.
+
+  # ===========================================================================
+  # OpenAI API key — the BYOK direct-key arm AND the proxy arm (mirror of
+  # anthropic-api). Carries upstream_vendor: openai so ResolveUpstream still
+  # maps the `openai/` namespace token to THIS entry and the proxy's OpenAI
+  # Responses surface (codex platform-managed) routes upstream vendor "openai"
+  # exactly as before — proxy + billing are byte-identical to the pre-split
+  # `openai` entry. Its catalog model_prefix_match is a DISJOINT sentinel
+  # (`^openai-api[:/]`) so the bare gpt-* family stays owned by
+  # openai-subscription and the catalog overlap guard (no slug may match two
+  # providers) stays green — exactly as anthropic-oauth's `^(sonnet|opus|
+  # haiku)$` is disjoint from anthropic-api's `^claude`.
+  # ===========================================================================
+  - name: openai-api
+    display_name: "OpenAI API"
+    vendor_logo: "openai"
+    protocol: openai
+    auth_mode: anthropic_api  # openai-protocol; auth is a bearer API key (mirror of anthropic-api).
    base_url_template: "https://api.openai.com/v1"
    base_url_anthropic: null  # OpenAI exposes only the OpenAI protocol surface.
    auth_env: [OPENAI_API_KEY]
    auth_token_env: OPENAI_API_KEY
-    # Proxy treats openai as the DEFAULT (catch-all) arm of inferLLMProvider;
-    # there is no explicit prefix today. Canvas matches /^gpt-/i. Encode the
-    # canvas prefix so the explicit slugs route; the proxy's catch-all
-    # behavior is a routing decision for PR-3, not the manifest.
-    model_prefix_match: "^gpt-"
+    # DISJOINT sentinel prefix: openai-api is selected explicitly (the
+    # provisioner's LLM_PROVIDER=openai-api) or via the `openai/` proxy
+    # namespace (ResolveUpstream uses upstream_vendor, NOT this prefix). The
+    # bare gpt-* family is owned by openai-subscription (the codex default), so
+    # this prefix must NOT also claim `^gpt-` or the catalog overlap guard
+    # (TestNoAmbiguousModelMatch) would flag gpt-5.5 as matching two providers.
+    model_prefix_match: "^openai-api[:/]"
    model_aliases: []
    # internal#718 P1 (CONVERGED): the proxy's upstream-vendor key. ResolveUpstream
-    # maps the `openai/` namespace token to THIS entry. openai is ALSO the proxy's
-    # historical catch-all (the switch's `default:` arm) for bare/unknown ids —
-    # but the catch-all is a VESTIGIAL bare-id behavior (no live bare traffic), so
-    # it lives in the retained legacy fallback (inferLLMProviderLegacy), NOT as a
-    # registry data flag. Live `openai/<m>` ids resolve here by namespace.
+    # maps the `openai/` namespace token to THIS entry, then dials its
+    # base_url_template + auth (the SINGLE source). openai is ALSO the proxy's
+    # historical catch-all (the legacy switch's `default:` arm) for bare/unknown
+    # ids — a VESTIGIAL bare-id behavior (no live bare traffic) retained in
+    # inferLLMProviderLegacy, NOT a registry flag. Live `openai/<m>` ids resolve
+    # here by namespace. The openai-subscription OAuth arm carries NO
+    # upstream_vendor (OAuth never traverses the proxy).
    upstream_vendor: openai

  # ===========================================================================
@@ -357,6 +412,26 @@ providers:
    model_prefix_match: "^gemini-"
    model_aliases: []

+  # Google Vertex AI — KEYLESS arm (mirrors the anthropic-oauth / anthropic-api
+  # and openai-subscription / openai-api split: same vendor, distinct auth).
+  # google-adk serves Gemini via Vertex using Application Default Credentials
+  # over Workload Identity Federation (AWS EC2 role -> GCP STS -> SA), injected
+  # by the provisioner (cp#416 + envs.yaml vertex block) as a NON-SECRET
+  # external_account cred-config at GOOGLE_APPLICATION_CREDENTIALS. No API key.
+  # Distinct `vertex:` model namespace keeps it unambiguous vs the API-key
+  # `google` vendor's ^gemini- (TestNoAmbiguousModelMatch).
+  - name: vertex
+    display_name: "Google Vertex AI (keyless ADC)"
+    vendor_logo: "google"
+    protocol: openai
+    auth_mode: third_party_anthropic_compat
+    base_url_template: null
+    base_url_anthropic: null
+    auth_env: [GOOGLE_APPLICATION_CREDENTIALS]
+    auth_token_env: ANTHROPIC_AUTH_TOKEN
+    model_prefix_match: "^vertex:"
+    model_aliases: []
+
  - name: alibaba
    display_name: "Alibaba Qwen (DashScope)"
    vendor_logo: "alibaba"
@@ -561,7 +636,8 @@ providers:
 # AUTHORITATIVE MATRIX (provider level), encoded EXACTLY below:
 #   claude-code -> anthropic (oauth + api), kimi (kimi-coding), minimax
 #   hermes      -> kimi (kimi-coding)
-#   codex       -> openai
+#   codex       -> openai (subscription + api — the split openai-subscription /
+#                  openai-api pair, mirroring anthropic oauth+api)
 #   openclaw    -> kimi (kimi-coding)
 #
 # Each runtime entry lists native provider NAMES (referencing `providers:`
@@ -689,12 +765,39 @@ runtimes:
          - moonshot/kimi-k2.6
          - moonshot/kimi-k2.5

-  # codex: OpenAI — BYOK (subscription + API key, both map to the `openai`
-  # manifest provider) + platform-managed (the `platform` ref below, served
-  # via the proxy Responses surface).
+  # codex: OpenAI — BYOK split across TWO native providers
+  # (openai-subscription + openai-api), mirroring claude-code's anthropic
+  # oauth+api split, PLUS platform-managed (the `platform` ref below, served via
+  # the proxy Responses surface).
+  #
+  # The split fixes the prod "picks provider='openai' but it is not in the
+  # providers registry. Known providers: openai-subscription, openai-api" wedge:
+  # the codex template/adapter registry uses the SPLIT names, never bare
+  # `openai`, so the SSOT must derive one of them. openai-subscription is the
+  # DEFAULT (the adapter's resolve-provider precedence #1 picks the ChatGPT/Codex
+  # subscription when CODEX_AUTH_JSON is present), so it is listed FIRST and owns
+  # the bare gpt-* family — DeriveProvider(codex, gpt-5.5) -> openai-subscription.
+  # openai-api is referenced too (the direct-OPENAI_API_KEY BYOK arm); the same
+  # gpt-* ids are exact-listed under both arms and DeriveProvider disambiguates
+  # by available auth env (OPENAI_API_KEY -> openai-api; the subscription
+  # auth.json env or no auth context -> the first-declared default,
+  # openai-subscription) — the identical oauth-vs-key disambiguation
+  # claude-code's anthropic pair uses.
  codex:
    providers:
-      - name: openai
+      # DEFAULT arm (listed first): ChatGPT/Codex subscription via OAuth.
+      - name: openai-subscription
+        models:
+          - gpt-5.5
+          - gpt-5.4
+          - gpt-5.4-mini
+          - gpt-5.3-codex
+          - gpt-5.3-codex-spark
+          - gpt-5.2
+      # Direct OpenAI API-key BYOK arm. Same gpt-* family; selected over the
+      # subscription default when OPENAI_API_KEY is the available auth env (or
+      # via the explicit provisioner LLM_PROVIDER=openai-api).
+      - name: openai-api
        models:
          - gpt-5.5
          - gpt-5.4
@@ -730,3 +833,19 @@ runtimes:
        models:
          - moonshot/kimi-k2.6
          - moonshot/kimi-k2.5
+
+
+  # google-adk: Gemini via Vertex AI, keyless ADC (Workload Identity
+  # Federation; provisioner cp#416 + envs.yaml). The google vendor entry
+  # in the top-level providers: list supplies auth/model-prefix metadata;
+  # this runtimes entry declares the selectable model set.
+  google-adk:
+    providers:
+      # Keyless Vertex (org-compliant default): Gemini via Vertex AI + ADC/WIF.
+      - name: vertex
+        models:
+          - vertex:gemini-2.5-pro
+      # API-key BYOK arm: AI Studio GEMINI_API_KEY/GOOGLE_API_KEY.
+      - name: google
+        models:
+          - gemini-2.5-pro
@@ -113,9 +113,13 @@ func TestMatchesModel(t *testing.T) {
 		{"MiniMax-M2.7", "minimax"},
 		{"MiniMax-M2", "minimax"},
 		{"minimax-m2.5", "minimax"},
-		// OpenAI — DB gpt-5.x + canvas /^gpt-/.
-		{"gpt-5.5", "openai"},
-		{"gpt-5.4-mini", "openai"},
+		// OpenAI — the bare gpt-* family is owned by the codex DEFAULT arm
+		// openai-subscription (the OAuth subscription); openai-api uses a
+		// disjoint sentinel prefix so the catalog overlap guard stays green
+		// (mirror of anthropic-oauth's alias-only regex vs anthropic-api's
+		// ^claude). canvas /^gpt-/.
+		{"gpt-5.5", "openai-subscription"},
+		{"gpt-5.4-mini", "openai-subscription"},
 		// Xiaomi MiMo — adapter mimo- + canvas /^mimo-/.
 		{"mimo-v2.5-pro", "xiaomi-mimo"},
 		// Z.ai GLM — adapter glm- + canvas /^GLM-/ (mixed case).
@@ -205,3 +209,109 @@ func TestMatchesModelZeroValue(t *testing.T) {
 		t.Error("Provider with an empty regex must never match")
 	}
 }
+
+// TestGoogleADKRuntimeRegistered locks the providers.yaml SSOT entry for the
+// google-adk runtime (Gemini via Vertex AI, keyless ADC). The runtime picker
+// + GET /templates enrichment read this matrix as SSOT; a missing entry
+// silently degrades the ADK runtime's model/provider surface. See
+// project_canvas_runtime_dropdown_ssot_fix.
+func TestGoogleADKRuntimeRegistered(t *testing.T) {
+	m, err := LoadManifest()
+	if err != nil {
+		t.Fatalf("LoadManifest() error = %v", err)
+	}
+	models, err := m.ModelsForRuntime("google-adk")
+	if err != nil {
+		t.Fatalf("ModelsForRuntime(google-adk) error = %v", err)
+	}
+	hasModel := false
+	for _, id := range models {
+		if id == "gemini-2.5-pro" {
+			hasModel = true
+		}
+	}
+	if !hasModel {
+		t.Errorf("google-adk models missing gemini-2.5-pro; got %v", models)
+	}
+	provs, err := m.ProvidersForRuntime("google-adk")
+	if err != nil {
+		t.Fatalf("ProvidersForRuntime(google-adk) error = %v", err)
+	}
+	hasProv := false
+	for _, p := range provs {
+		if p.Name == "google" {
+			hasProv = true
+		}
+	}
+	if !hasProv {
+		t.Errorf("google-adk providers missing google vendor; got %d providers", len(provs))
+	}
+}
+
+// TestVertexProviderRegistered locks the keyless Vertex provider variant in the
+// providers.yaml SSOT. google-adk serves Gemini via Vertex AI with ADC/WIF
+// (no API key); the registry must model that as a first-class "vertex" provider
+// (auth_env GOOGLE_APPLICATION_CREDENTIALS, ^vertex: namespace) distinct from
+// the API-key "google" vendor, and the google-adk runtime must offer both arms.
+// See project_canvas_runtime_dropdown_ssot_fix.
+func TestVertexProviderRegistered(t *testing.T) {
+	ps, err := Load()
+	if err != nil {
+		t.Fatalf("Load() error = %v", err)
+	}
+	var vertex *Provider
+	for i := range ps {
+		if ps[i].Name == "vertex" {
+			vertex = &ps[i]
+		}
+	}
+	if vertex == nil {
+		t.Fatal("vertex provider not registered in providers.yaml")
+	}
+	// Keyless: ADC env, not an API key.
+	hasADC := false
+	for _, e := range vertex.AuthEnv {
+		if e == "GOOGLE_APPLICATION_CREDENTIALS" {
+			hasADC = true
+		}
+	}
+	if !hasADC {
+		t.Errorf("vertex auth_env should be keyless GOOGLE_APPLICATION_CREDENTIALS; got %v", vertex.AuthEnv)
+	}
+	// Owns the vertex: namespace, NOT ^gemini- (which the API-key google vendor owns).
+	if !vertex.MatchesModel("vertex:gemini-2.5-pro") {
+		t.Errorf("vertex provider should match vertex:gemini-2.5-pro")
+	}
+	if vertex.MatchesModel("gemini-2.5-pro") {
+		t.Errorf("vertex provider must NOT claim the bare gemini- namespace (owned by google vendor)")
+	}
+
+	m, err := LoadManifest()
+	if err != nil {
+		t.Fatalf("LoadManifest() error = %v", err)
+	}
+	provs, err := m.ProvidersForRuntime("google-adk")
+	if err != nil {
+		t.Fatalf("ProvidersForRuntime(google-adk) error = %v", err)
+	}
+	names := map[string]bool{}
+	for _, p := range provs {
+		names[p.Name] = true
+	}
+	if !names["vertex"] {
+		t.Errorf("google-adk runtime should offer the keyless vertex arm; got %v", names)
+	}
+	if !names["google"] {
+		t.Errorf("google-adk runtime should keep the API-key google arm; got %v", names)
+	}
+	models, _ := m.ModelsForRuntime("google-adk")
+	hasVertexModel := false
+	for _, id := range models {
+		if id == "vertex:gemini-2.5-pro" {
+			hasVertexModel = true
+		}
+	}
+	if !hasVertexModel {
+		t.Errorf("google-adk models should include vertex:gemini-2.5-pro; got %v", models)
+	}
+}
@@ -19,13 +19,17 @@ import (
 //
 //	claude-code -> anthropic (oauth+api), kimi (kimi-coding), minimax, platform
 //	hermes      -> kimi (kimi-coding), platform
-//	codex       -> openai, platform
+//	codex       -> openai (subscription + api), platform
 //	openclaw    -> kimi (kimi-coding), platform
 var runtimeNativeProviders = map[string][]string{
 	"claude-code": {"anthropic-api", "anthropic-oauth", "kimi-coding", "minimax", "platform"},
 	"hermes":      {"kimi-coding", "platform"},
-	"codex":       {"openai", "platform"}, // platform openai via the proxy Responses surface
-	"openclaw":    {"kimi-coding", "platform"},
+	// codex's OpenAI BYOK is split across the OAuth subscription arm
+	// (openai-subscription) and the direct-key arm (openai-api), mirroring
+	// claude-code's anthropic oauth+api split; platform openai via the proxy
+	// Responses surface.
+	"codex":    {"openai-subscription", "openai-api", "platform"},
+	"openclaw": {"kimi-coding", "platform"},
 }

 func sortedCopy(in []string) []string {
@@ -29,7 +29,7 @@ import (
 // canonicalProvidersYAMLSHA256 is the sha256 of the canonical providers.yaml as
 // synced from molecule-controlplane. Bumped deliberately on each re-sync (see
 // file doc). Cross-checked live by the sync-providers-yaml CI workflow.
-const canonicalProvidersYAMLSHA256 = "73e8003062edaa4ce75bfb324be615b6e2b380f07487e3af4dc16cb644dc12bc"
+const canonicalProvidersYAMLSHA256 = "dedbb8ccb00155202a54160b38574f9e6688587509e79ffc676e8e2667f76187"

 func TestSyncedYAMLMatchesCanonicalSHA(t *testing.T) {
 	sum := sha256.Sum256(embeddedYAML)