diff --git a/workspace-server/internal/handlers/model_registry_validation.go b/workspace-server/internal/handlers/model_registry_validation.go index 43bf8fd1c..426a2325f 100644 --- a/workspace-server/internal/handlers/model_registry_validation.go +++ b/workspace-server/internal/handlers/model_registry_validation.go @@ -17,6 +17,7 @@ package handlers import ( "fmt" + "sort" "strings" ) @@ -55,3 +56,95 @@ func validateRegisteredModelForRuntime(runtime, model string) (bool, string) { "model %q is not a registered model for runtime %q; pick one of the runtime's registered models (provider-registry SSOT, internal#718)", model, runtime) } + +// validateDerivedProviderInRegistry (issue #2172) is the provider-side companion +// to validateRegisteredModelForRuntime. The model-side check asks "is this +// (runtime, model) in the registry?"; the provider-side check asks "is the +// provider this model DERIVES to — the same one the adapter will resolve at +// boot — a known provider in providers.yaml?" +// +// Live trigger (adk-demo Assistant, 2026-06-03): workspace config +// `model=moonshot/kimi-k2.6` (claude-code) → adapter derives `provider=moonshot` +// → `ValueError: provider=moonshot not in providers registry` at BOOT. The +// save was accepted (no validation at the API boundary), and the failure only +// surfaced when the agent tried to register. CI never saw it. The drift gate +// (RFC#580) validates TEMPLATES against the registry, NOT per-workspace +// configs; the existing model-side check rejects a model the runtime doesn't +// own but says nothing about the DERIVED provider's registry membership. +// +// Returns: +// +// (true, "") — pass: model is empty (MODEL_REQUIRED owns it), the +// runtime is not in the registry (fail-open for +// federated / non-first-party runtimes — mirror of the +// model-side check's federation contract), the registry +// failed to load (build-time gate owns it), OR the +// derived provider name is a known provider in the +// registry's `providers:` list. +// (false, reason) — reject: a known (runtime, model) pair derives to a +// provider name absent from the providers list. This is +// the structural class the adk-demo boot failure belongs +// to — the registry's `runtimes:` block references a +// provider not declared in `providers:`, which by +// construction is a registry-data bug. Catching it at +// config-SAVE keeps it out of the agent-boot path. +// +// Defense-in-depth: by construction, a model in a runtime's native provider set +// has a provider that IS in the catalog (the runtime ref names a provider from +// the providers list). So the rejection path is primarily a registry-consistency +// guard. The real value is the FAIL-LOUD semantics — any future drift between +// `providers:` and `runtimes:` fails the create call with a clear pointer to +// the missing provider, instead of silently wedging the agent at boot. +func validateDerivedProviderInRegistry(runtime, model string) (bool, string) { + model = strings.TrimSpace(model) + if model == "" { + return true, "" // MODEL_REQUIRED owns this. + } + m, err := providerRegistry() + if err != nil || m == nil { + // Registry unavailable (build-time defect the gates catch). Fail open — + // do not block create on a registry-load failure. + return true, "" + } + // DeriveProvider is fail-closed for unknown runtimes. Mirror the + // model-side check's federation contract: a runtime the registry does + // NOT know (langgraph / external / kimi / mock / federated) is allowed + // to pass through. DeriveProvider's `unknown runtime` error IS that + // signal — treat it as fail-open, identical to ModelsForRuntime's + // not-found behavior above. + p, err := m.DeriveProvider(runtime, model, nil) + if err != nil { + // Either the runtime is unknown (fail-open by contract) OR the model + // is not native to the runtime (the model-side validator already + // rejected this — DeriveProvider's error here means + // validateRegisteredModelForRuntime should have caught it. Don't + // double-reject: pass through and let the model-side response own + // the message). + return true, "" + } + // Defense-in-depth: confirm the DERIVED provider is a known entry in the + // providers list. By construction it should be (DeriveProvider only + // returns a Provider that was looked up by name from `providers:`), but + // a future federation merge could introduce a runtime ref pointing at a + // contributed provider absent from the core catalog. Reject loudly here + // rather than letting the save reach the agent-boot path and wedge with + // "provider=X not in providers registry" (the original adk-demo class). + for _, candidate := range m.Providers { + if candidate.Name == p.Name { + return true, "" + } + } + // Build a sorted, comma-separated list of valid provider names so the + // operator/caller sees the actionable list (the boot-time error message + // the adk-demo class produced does NOT include this — the fix is to + // surface it at the API boundary, where the caller can fix the request + // without a stuck workspace + operator page). + valid := make([]string, 0, len(m.Providers)) + for _, c := range m.Providers { + valid = append(valid, c.Name) + } + sort.Strings(valid) + return false, fmt.Sprintf( + "derived provider %q (for model %q on runtime %q) is not in the providers registry; pick a model whose derived provider is one of: %s", + p.Name, model, runtime, strings.Join(valid, ", ")) +} diff --git a/workspace-server/internal/handlers/model_registry_validation_test.go b/workspace-server/internal/handlers/model_registry_validation_test.go index 92a015b06..c68b0c818 100644 --- a/workspace-server/internal/handlers/model_registry_validation_test.go +++ b/workspace-server/internal/handlers/model_registry_validation_test.go @@ -6,8 +6,17 @@ package handlers // fail OPEN (allow) for a runtime the registry doesn't know yet (federation / // langgraph/etc. not in the first-party registry) so the existing knownRuntimes // gate stays authoritative there. +// +// TestValidateDerivedProviderInRegistry (issue #2172) is the provider-side +// companion: once the model-side check passes, confirm the DERIVED provider +// (the one the adapter will resolve at boot) is a known provider in +// providers.yaml. Catches the adk-demo "provider=X not in providers registry" +// class at config-SAVE time instead of letting it wedge the agent at boot. -import "testing" +import ( + "strings" + "testing" +) func TestValidateRegisteredModelForRuntime(t *testing.T) { type tc struct { @@ -80,3 +89,163 @@ func TestValidateRegisteredModelForRuntime(t *testing.T) { }) } } + +func TestValidateDerivedProviderInRegistry(t *testing.T) { + type tc struct { + name string + runtime string + model string + wantOK bool + // wantReasonContains: a substring the rejection reason must include + // (skipped for OK cases). Pins the actionable list / derivation pointer + // so the caller knows which provider was missing and what the valid + // set looks like — this is the fix that distinguishes the new gate + // from the boot-time "provider=X not in providers registry" string + // it replaces. + wantReasonContains string + } + cases := []tc{ + // PASS — every native (runtime, model) in the catalog derives to a + // provider that IS in the providers list. These are the live corpus + // entries; the test pins the registry-consistency invariant. + { + name: "claude_code_anthropic_api_native", + runtime: "claude-code", + model: "claude-sonnet-4-6", + wantOK: true, + }, + { + name: "claude_code_kimi_coding_native", + runtime: "claude-code", + model: "kimi-for-coding", + wantOK: true, + }, + { + name: "claude_code_minimax_native", + runtime: "claude-code", + model: "MiniMax-M2.7", + wantOK: true, + }, + { + name: "claude_code_platform_namespaced", + runtime: "claude-code", + model: "moonshot/kimi-k2.6", + wantOK: true, + }, + { + name: "codex_openai_subscription_default_arm", + runtime: "codex", + model: "gpt-5.5", + wantOK: true, + }, + { + name: "codex_platform_namespaced", + runtime: "codex", + model: "openai/gpt-5.4-mini", + wantOK: true, + }, + { + name: "hermes_kimi_coding", + runtime: "hermes", + model: "kimi-coding/kimi-k2", + wantOK: true, + }, + { + name: "hermes_platform_namespaced", + runtime: "hermes", + model: "moonshot/kimi-k2.6", + wantOK: true, + }, + { + name: "openclaw_kimi_coding", + runtime: "openclaw", + model: "moonshot:kimi-k2.6", + wantOK: true, + }, + // FAIL — model-side validator catches this, but the provider-side + // gate is called AFTER it in Create and inherits the fail-open + // contract for "model is not native to runtime" (DeriveProvider + // errors → allow, letting the model-side response own the message). + // This is the deliberate "don't double-reject" decision. + { + name: "unregistered_model_pass_through_to_model_side", + runtime: "claude-code", + model: "totally-made-up-model-xyz", + wantOK: true, // pass-through: model-side validator owns the rejection + }, + // Federation contract — mirror of the model-side test above. + { + name: "langgraph_runtime_failopen", + runtime: "langgraph", + model: "anything-goes", + wantOK: true, + }, + { + name: "external_runtime_failopen", + runtime: "external", + model: "whatever", + wantOK: true, + }, + // Empty model — MODEL_REQUIRED owns it; allow. + { + name: "empty_model_allowed_other_gate_owns_it", + runtime: "claude-code", + model: "", + wantOK: true, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + ok, why := validateDerivedProviderInRegistry(c.runtime, c.model) + if ok != c.wantOK { + t.Errorf("validateDerivedProviderInRegistry(%q,%q) ok=%v want %v (reason=%q)", + c.runtime, c.model, ok, c.wantOK, why) + } + if !c.wantOK && c.wantReasonContains != "" && !strings.Contains(why, c.wantReasonContains) { + t.Errorf("rejection reason missing %q: got %q", c.wantReasonContains, why) + } + }) + } +} + +// TestRegistryConsistency_AllNativeModelsDeriveToKnownProvider walks every +// (runtime, model) pair in the registry's native model sets and asserts each +// one derives to a provider that IS in the providers list. This is the +// static regression gate the issue calls for ("a CI test fails if any shipped +// demo/template config references an unregistered provider") — generalized +// to the catalog as a whole: if anyone edits providers.yaml such that a +// `runtimes:` block names a provider absent from `providers:`, this test +// fires before the bad config can reach a customer workspace. +// +// By construction this invariant should always hold (DeriveProvider only +// returns a Provider that was looked up by name from `providers:`), so the +// test primarily guards against future federation merges that introduce a +// runtime ref pointing at a contributed provider absent from the core +// catalog — exactly the failure shape the adk-demo Assistant wedge +// belongs to. +func TestRegistryConsistency_AllNativeModelsDeriveToKnownProvider(t *testing.T) { + m, err := providerRegistry() + if err != nil || m == nil { + t.Skipf("providerRegistry unavailable in test env (err=%v); skipping consistency walk", err) + } + providerNames := make(map[string]struct{}, len(m.Providers)) + for _, p := range m.Providers { + providerNames[p.Name] = struct{}{} + } + for runtimeName, runtime := range m.Runtimes { + for _, ref := range runtime.Providers { + for _, modelID := range ref.Models { + p, err := m.DeriveProvider(runtimeName, modelID, nil) + if err != nil { + t.Errorf("catalog invariant broken: runtime=%q model=%q failed DeriveProvider: %v", + runtimeName, modelID, err) + continue + } + if _, ok := providerNames[p.Name]; !ok { + t.Errorf("catalog invariant broken: runtime=%q model=%q derives to provider %q which is not in the providers list (refs=%q)", + runtimeName, modelID, p.Name, ref.Name) + } + } + } + } +} diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index 30c453aed..19b3b16bf 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -474,6 +474,32 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { }) return } + // issue #2172 (provider-side companion): once the (runtime, model) + // pair is known to be registered, confirm the DERIVED provider + // (the one the adapter will resolve at boot) is a known provider + // in the providers.yaml catalog. Live trigger (adk-demo Assistant, + // 2026-06-03): the model-side check passed for a registry-resident + // model whose derived provider name was NOT in the providers list, + // so the save was accepted and the agent wedged at boot with + // "provider=X not in providers registry". This check is a + // defense-in-depth registry-consistency guard: by construction a + // model in a runtime's native set derives to a provider that IS in + // the catalog, so the rejection path is primarily a registry-data + // invariant — any future drift between `providers:` and `runtimes:` + // fails the create with a clear pointer to the missing provider + // rather than silently wedging the agent. Fails open for runtimes + // the registry doesn't know (langgraph/external/kimi/mock/federated + // — the federation contract the model-side check also honors). + if ok, why := validateDerivedProviderInRegistry(payload.Runtime, payload.Model); !ok { + log.Printf("Create: 422 DERIVED_PROVIDER_NOT_IN_REGISTRY (runtime=%q model=%q): %s [issue #2172 hard-reject]", payload.Runtime, payload.Model, why) + c.JSON(http.StatusUnprocessableEntity, gin.H{ + "error": why, + "runtime": payload.Runtime, + "model": payload.Model, + "code": "DERIVED_PROVIDER_NOT_IN_REGISTRY", + }) + return + } } ctx := c.Request.Context()