From 2cf0a6b1f53b1913ed1a532d6e9b8e2d3f8c3dc0 Mon Sep 17 00:00:00 2001 From: "Claude (devops)" Date: Tue, 23 Jun 2026 18:34:30 -0700 Subject: [PATCH] =?UTF-8?q?fix(workspace-server):=20create=5Fworkspace=20c?= =?UTF-8?q?hildren=20born=20NOT=5FCONFIGURED=20=E2=80=94=20pin=20LLM=5FPRO?= =?UTF-8?q?VIDER=3Dplatform=20for=20platform-managed=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A child workspace the concierge spawns via the `create_workspace` management-MCP tool flows through WorkspaceHandler.Create, which persists MODEL (setModelSecret) but — since the internal#718 P4 closure removed the unconditional setProviderSecret write — persisted NO LLM_PROVIDER. The kind=platform concierge gets its pin from ensureConciergeProvider, but that helper runs ONLY on the platform provision path, so children were left with secrets = [MODEL] only. For a platform-managed model id like "moonshot/kimi-k2.6" the on-box runtime re-derives the provider with its own slug-split (_derive_provider_from_model → "moonshot", a model PREFIX, not a registry NAME), so the claude-code adapter fail-closes ("workspace config picks provider='moonshot' but it is not in the providers registry") → the child boots online but NOT_CONFIGURED. Fix: after setModelSecret, Create now calls ensureCreatedWorkspaceProviderPin, mirroring the concierge's IsPlatform gate. It derives the provider via the registry (providers.Manifest.DeriveProvider) from (runtime, model, payload secret keys) and persists LLM_PROVIDER=platform iff the derivation is the closed `platform` provider. This is parent-independent and not moonshot-specific; BYOK/OAuth/self-host children (whose model derives to a real provider entry) are left untouched so the runtime's own derivation is not mis-routed. Non-fatal: registry-unavailable / derive-miss / persist-error log and continue. Adds create_workspace_provider_pin_test.go: platform-managed child gets the pin; the pinned value equals the registry-derived provider (self-consistent config); BYOK child carrying a vendor key is NOT pinned; empty-model and unknown-runtime are no-ops. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../create_workspace_provider_pin_test.go | 111 ++++++++++++++++++ .../internal/handlers/platform_agent.go | 65 ++++++++++ .../internal/handlers/workspace.go | 20 ++++ 3 files changed, 196 insertions(+) create mode 100644 workspace-server/internal/handlers/create_workspace_provider_pin_test.go diff --git a/workspace-server/internal/handlers/create_workspace_provider_pin_test.go b/workspace-server/internal/handlers/create_workspace_provider_pin_test.go new file mode 100644 index 00000000..7ed68cb3 --- /dev/null +++ b/workspace-server/internal/handlers/create_workspace_provider_pin_test.go @@ -0,0 +1,111 @@ +package handlers + +// create_workspace NOT_CONFIGURED regression gate. +// +// A child workspace the concierge spawns via the `create_workspace` +// management-MCP tool flows through WorkspaceHandler.Create, which persists +// MODEL (setModelSecret) but — since the internal#718 P4 closure removed the +// unconditional setProviderSecret write — persisted NO LLM_PROVIDER. For a +// platform-managed model id like "moonshot/kimi-k2.6" the on-box runtime +// re-derives provider="moonshot" (a model PREFIX, not a registry NAME) and the +// claude-code adapter fail-closes ("workspace config picks provider='moonshot' +// but it is not in the providers registry") → the child boots online but +// NOT_CONFIGURED. +// +// ensureCreatedWorkspaceProviderPin (wired into Create after setModelSecret) +// closes that gap: it pins LLM_PROVIDER=platform iff the registry derivation of +// (runtime, model) is the closed `platform` provider, mirroring the concierge's +// ensureConciergeProvider IsPlatform gate. These tests prove a child created via +// create_workspace is born with a COMPLETE, self-consistent (runtime, model, +// provider) config that the registry validates — not a MODEL-without-provider +// the adapter would reject — while BYOK/OAuth children are left untouched. + +import ( + "context" + "testing" + + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers" + "github.com/DATA-DOG/go-sqlmock" +) + +func TestEnsureCreatedWorkspaceProviderPin(t *testing.T) { + const secretInsert = `INSERT INTO workspace_secrets` + + t.Run("platform-managed child (moonshot/kimi-k2.6) gets LLM_PROVIDER=platform pinned", func(t *testing.T) { + mock := setupTestDB(t) + // setProviderSecret writes the LLM_PROVIDER row directly (no preceding + // existence SELECT — Create owns the fresh row, there is nothing to respect). + mock.ExpectExec(secretInsert). + WithArgs("ws-child", sqlmock.AnyArg(), sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) + + ensureCreatedWorkspaceProviderPin(context.Background(), "ws-child", "claude-code", "moonshot/kimi-k2.6", nil) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("platform-managed child was NOT pinned LLM_PROVIDER=platform (the create_workspace NOT_CONFIGURED bug): %v", err) + } + }) + + t.Run("pinned provider matches the registry-derived provider (config is self-consistent)", func(t *testing.T) { + // The value the helper persists MUST equal what DeriveProvider resolves for + // the same (runtime, model) — otherwise the config the adapter reads is + // internally inconsistent. Assert the pin equals the derived provider name + // AND that it IS the closed platform provider. + m, err := providerRegistry() + if err != nil || m == nil { + t.Fatalf("provider registry unavailable: %v", err) + } + prov, derr := m.DeriveProvider("claude-code", "moonshot/kimi-k2.6", nil) + if derr != nil { + t.Fatalf("DeriveProvider(claude-code, moonshot/kimi-k2.6) failed: %v", derr) + } + if !prov.IsPlatform() { + t.Fatalf("registry SSOT changed: moonshot/kimi-k2.6 no longer derives to the platform provider (got %q) — the pin logic is keyed off this", prov.Name) + } + if prov.Name != providers.PlatformProviderName { + t.Errorf("derived provider name %q != %q (the value the helper pins)", prov.Name, providers.PlatformProviderName) + } + }) + + t.Run("BYOK child carrying a vendor key derives to its real provider — NOT pinned platform", func(t *testing.T) { + // A create that supplies ANTHROPIC_API_KEY for an anthropic-api model + // derives to anthropic-api (a real provider entry), not platform. The + // helper must NOT write any LLM_PROVIDER row (no INSERT expected): pinning + // would mis-route the runtime's own correct derivation. + mock := setupTestDB(t) + // No ExpectExec — any INSERT is an unmet-expectation failure below. + + // Model whose registry derivation is a real (non-platform) provider when + // the anthropic-api auth env is present. + ensureCreatedWorkspaceProviderPin( + context.Background(), + "ws-byok-child", + "claude-code", + "anthropic:claude-opus-4-7", + []string{"ANTHROPIC_API_KEY"}, + ) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("BYOK child wrongly got an LLM_PROVIDER write (would mis-route the runtime's own derivation): %v", err) + } + }) + + t.Run("empty model is a no-op (external/registerless create)", func(t *testing.T) { + mock := setupTestDB(t) + // No INSERT expected. + ensureCreatedWorkspaceProviderPin(context.Background(), "ws-empty", "claude-code", "", nil) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("empty model should be a no-op (no provider pin): %v", err) + } + }) + + t.Run("unknown/federated runtime derive-miss is a no-op (fails open, no pin)", func(t *testing.T) { + mock := setupTestDB(t) + // DeriveProvider errors for an unknown runtime → helper returns without a + // write (matches the create-boundary registry gates that fail open here). + ensureCreatedWorkspaceProviderPin(context.Background(), "ws-fed", "some-federated-runtime", "vendor/model", nil) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unknown-runtime derive-miss should be a no-op: %v", err) + } + }) +} diff --git a/workspace-server/internal/handlers/platform_agent.go b/workspace-server/internal/handlers/platform_agent.go index 386cf266..29426c5f 100644 --- a/workspace-server/internal/handlers/platform_agent.go +++ b/workspace-server/internal/handlers/platform_agent.go @@ -34,6 +34,7 @@ import ( "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto" "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db" "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models" + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers" "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner" "github.com/gin-gonic/gin" "github.com/google/uuid" @@ -624,6 +625,70 @@ func setProviderSecret(ctx context.Context, workspaceID, provider string) error return err } +// ensureCreatedWorkspaceProviderPin pins LLM_PROVIDER=platform on a freshly +// CREATED workspace (the Create handler / `create_workspace` management-MCP +// path) when, and ONLY when, its (runtime, model) derives to the closed +// `platform` provider via the registry. +// +// Why this exists (the create_workspace NOT_CONFIGURED bug): the platform/root +// concierge gets LLM_PROVIDER=platform from ensureConciergeProvider, but that +// helper runs ONLY on the kind=platform provision path. A CHILD workspace the +// concierge spawns via `create_workspace` goes through WorkspaceHandler.Create, +// which persists MODEL (setModelSecret) but — since the internal#718 P4 closure +// removed the unconditional setProviderSecret write — persists NO LLM_PROVIDER. +// For a platform-managed model id like "moonshot/kimi-k2.6" the on-box runtime +// re-derives the provider with its own slug-split (_derive_provider_from_model → +// "moonshot", a model-PREFIX, NOT a registry provider NAME), so the claude-code +// adapter fail-closes: "workspace config picks provider='moonshot' but it is not +// in the providers registry" → the child boots online but NOT_CONFIGURED. This +// is the exact symptom ensureConciergeProvider was added to cure for the root; +// children created via create_workspace need the same env-level pin. +// +// The pin is gated on the registry-DERIVED provider being `platform` +// (providers.Manifest.DeriveProvider → IsPlatform), NOT on a model-prefix string +// or on the parent being platform-managed. This: +// - is parent-independent and not moonshot-specific (any platform-managed +// model id whose registry derivation is `platform` gets the pin); +// - leaves BYOK / OAuth / self-host children UNTOUCHED — their model derives to +// a real provider entry (anthropic-oauth, minimax, kimi-coding, …) the +// runtime resolves correctly on its own, so pinning would be wrong. Mirrors +// ensureConciergeProvider's `IsPlatform` gate (it only ever pins `platform`). +// +// availableAuthEnv is the auth-env-var NAMES present in the create payload's +// secrets — the same disambiguation input DeriveProvider uses elsewhere — so a +// BYOK create that carries vendor keys derives to its real provider (not +// platform) and is correctly skipped. Best-effort + non-fatal: a derive miss or +// a persist error logs and returns; the workspace row stays consistent and the +// (unchanged) downstream provision validation still applies. +func ensureCreatedWorkspaceProviderPin(ctx context.Context, workspaceID, runtime, model string, secretKeys []string) { + model = strings.TrimSpace(model) + if model == "" { + return + } + m, err := providerRegistry() + if err != nil || m == nil { + log.Printf("Create workspace %s: provider registry unavailable; cannot pin LLM_PROVIDER (non-fatal): %v", workspaceID, err) + return + } + prov, derr := m.DeriveProvider(runtime, model, secretKeys) + if derr != nil { + // Unknown/ambiguous (runtime, model) — the create-boundary registry + // gates already let this through (e.g. federated/unknown runtimes), and + // a non-derivable model is not platform-managed by construction. No pin. + return + } + if !prov.IsPlatform() { + // BYOK / OAuth / self-host: the model derives to a real provider entry + // the runtime resolves on its own. Pinning here would mis-route. + return + } + if setErr := setProviderSecret(ctx, workspaceID, providers.PlatformProviderName); setErr != nil { + log.Printf("Create workspace %s: failed to pin LLM_PROVIDER=%s for platform-managed model %q: %v (non-fatal)", workspaceID, providers.PlatformProviderName, model, setErr) + return + } + log.Printf("Create workspace %s: pinned LLM_PROVIDER=%s for platform-managed model %q (create_workspace child config completeness)", workspaceID, providers.PlatformProviderName, model) +} + // EnsureSelfHostedPlatformAgent installs the org's platform agent (the concierge, // the org root) on a tenant that has no control plane to do it — i.e. self-hosted // or local. In SaaS the CP calls InstallPlatformAgent at org-provision time; this diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index 5baabc54..9fb4e3be 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -837,6 +837,26 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { if err := setModelSecret(ctx, id, payload.Model); err != nil { log.Printf("Create workspace %s: failed to persist MODEL_PROVIDER %q: %v (non-fatal)", id, payload.Model, err) } + + // core (create_workspace NOT_CONFIGURED): a child workspace the concierge + // spawns via `create_workspace` lands here with MODEL but — since the + // internal#718 P4 closure removed the unconditional setProviderSecret write + // — NO LLM_PROVIDER. For a platform-managed model id (e.g. + // "moonshot/kimi-k2.6") the on-box runtime re-derives provider="moonshot" + // (a model PREFIX, not a registry NAME) and the adapter fail-closes ("picks + // provider='moonshot' but it is not in the providers registry") → the child + // boots online but NOT_CONFIGURED. Mirror the concierge's ensureConcierge- + // Provider pin here so the child is born with a COMPLETE config: pin + // LLM_PROVIDER=platform iff the registry derivation of (runtime, model) is + // the closed `platform` provider. BYOK/OAuth/self-host children (whose model + // derives to a real provider entry) are left untouched. availableAuthEnv = + // the create payload's secret KEYS so a BYOK create derives to its real + // provider and is correctly skipped. Non-fatal. + createSecretKeys := make([]string, 0, len(payload.Secrets)) + for k := range payload.Secrets { + createSecretKeys = append(createSecretKeys, k) + } + ensureCreatedWorkspaceProviderPin(ctx, id, payload.Runtime, payload.Model, createSecretKeys) } // Insert canvas layout — non-fatal: workspace can be dragged into position later -- 2.52.0