fix(workspace-server): create_workspace children born NOT_CONFIGURED — pin LLM_PROVIDER=platform for platform-managed models #3200
@@ -0,0 +1,111 @@
|
||||
package handlers
|
||||
|
||||
// create_workspace NOT_CONFIGURED regression gate.
|
||||
//
|
||||
// A child workspace the concierge spawns via the `create_workspace`
|
||||
// management-MCP tool flows through WorkspaceHandler.Create, which persists
|
||||
// MODEL (setModelSecret) but — since the internal#718 P4 closure removed the
|
||||
// unconditional setProviderSecret write — persisted NO LLM_PROVIDER. For a
|
||||
// platform-managed model id like "moonshot/kimi-k2.6" the on-box runtime
|
||||
// re-derives provider="moonshot" (a model PREFIX, not a registry NAME) and the
|
||||
// claude-code adapter fail-closes ("workspace config picks provider='moonshot'
|
||||
// but it is not in the providers registry") → the child boots online but
|
||||
// NOT_CONFIGURED.
|
||||
//
|
||||
// ensureCreatedWorkspaceProviderPin (wired into Create after setModelSecret)
|
||||
// closes that gap: it pins LLM_PROVIDER=platform iff the registry derivation of
|
||||
// (runtime, model) is the closed `platform` provider, mirroring the concierge's
|
||||
// ensureConciergeProvider IsPlatform gate. These tests prove a child created via
|
||||
// create_workspace is born with a COMPLETE, self-consistent (runtime, model,
|
||||
// provider) config that the registry validates — not a MODEL-without-provider
|
||||
// the adapter would reject — while BYOK/OAuth children are left untouched.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
)
|
||||
|
||||
func TestEnsureCreatedWorkspaceProviderPin(t *testing.T) {
|
||||
const secretInsert = `INSERT INTO workspace_secrets`
|
||||
|
||||
t.Run("platform-managed child (moonshot/kimi-k2.6) gets LLM_PROVIDER=platform pinned", func(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
// setProviderSecret writes the LLM_PROVIDER row directly (no preceding
|
||||
// existence SELECT — Create owns the fresh row, there is nothing to respect).
|
||||
mock.ExpectExec(secretInsert).
|
||||
WithArgs("ws-child", sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
ensureCreatedWorkspaceProviderPin(context.Background(), "ws-child", "claude-code", "moonshot/kimi-k2.6", nil)
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("platform-managed child was NOT pinned LLM_PROVIDER=platform (the create_workspace NOT_CONFIGURED bug): %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("pinned provider matches the registry-derived provider (config is self-consistent)", func(t *testing.T) {
|
||||
// The value the helper persists MUST equal what DeriveProvider resolves for
|
||||
// the same (runtime, model) — otherwise the config the adapter reads is
|
||||
// internally inconsistent. Assert the pin equals the derived provider name
|
||||
// AND that it IS the closed platform provider.
|
||||
m, err := providerRegistry()
|
||||
if err != nil || m == nil {
|
||||
t.Fatalf("provider registry unavailable: %v", err)
|
||||
}
|
||||
prov, derr := m.DeriveProvider("claude-code", "moonshot/kimi-k2.6", nil)
|
||||
if derr != nil {
|
||||
t.Fatalf("DeriveProvider(claude-code, moonshot/kimi-k2.6) failed: %v", derr)
|
||||
}
|
||||
if !prov.IsPlatform() {
|
||||
t.Fatalf("registry SSOT changed: moonshot/kimi-k2.6 no longer derives to the platform provider (got %q) — the pin logic is keyed off this", prov.Name)
|
||||
}
|
||||
if prov.Name != providers.PlatformProviderName {
|
||||
t.Errorf("derived provider name %q != %q (the value the helper pins)", prov.Name, providers.PlatformProviderName)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("BYOK child carrying a vendor key derives to its real provider — NOT pinned platform", func(t *testing.T) {
|
||||
// A create that supplies ANTHROPIC_API_KEY for an anthropic-api model
|
||||
// derives to anthropic-api (a real provider entry), not platform. The
|
||||
// helper must NOT write any LLM_PROVIDER row (no INSERT expected): pinning
|
||||
// would mis-route the runtime's own correct derivation.
|
||||
mock := setupTestDB(t)
|
||||
// No ExpectExec — any INSERT is an unmet-expectation failure below.
|
||||
|
||||
// Model whose registry derivation is a real (non-platform) provider when
|
||||
// the anthropic-api auth env is present.
|
||||
ensureCreatedWorkspaceProviderPin(
|
||||
context.Background(),
|
||||
"ws-byok-child",
|
||||
"claude-code",
|
||||
"anthropic:claude-opus-4-7",
|
||||
[]string{"ANTHROPIC_API_KEY"},
|
||||
)
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("BYOK child wrongly got an LLM_PROVIDER write (would mis-route the runtime's own derivation): %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("empty model is a no-op (external/registerless create)", func(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
// No INSERT expected.
|
||||
ensureCreatedWorkspaceProviderPin(context.Background(), "ws-empty", "claude-code", "", nil)
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("empty model should be a no-op (no provider pin): %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("unknown/federated runtime derive-miss is a no-op (fails open, no pin)", func(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
// DeriveProvider errors for an unknown runtime → helper returns without a
|
||||
// write (matches the create-boundary registry gates that fail open here).
|
||||
ensureCreatedWorkspaceProviderPin(context.Background(), "ws-fed", "some-federated-runtime", "vendor/model", nil)
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unknown-runtime derive-miss should be a no-op: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -34,6 +34,7 @@ import (
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
@@ -624,6 +625,70 @@ func setProviderSecret(ctx context.Context, workspaceID, provider string) error
|
||||
return err
|
||||
}
|
||||
|
||||
// ensureCreatedWorkspaceProviderPin pins LLM_PROVIDER=platform on a freshly
|
||||
// CREATED workspace (the Create handler / `create_workspace` management-MCP
|
||||
// path) when, and ONLY when, its (runtime, model) derives to the closed
|
||||
// `platform` provider via the registry.
|
||||
//
|
||||
// Why this exists (the create_workspace NOT_CONFIGURED bug): the platform/root
|
||||
// concierge gets LLM_PROVIDER=platform from ensureConciergeProvider, but that
|
||||
// helper runs ONLY on the kind=platform provision path. A CHILD workspace the
|
||||
// concierge spawns via `create_workspace` goes through WorkspaceHandler.Create,
|
||||
// which persists MODEL (setModelSecret) but — since the internal#718 P4 closure
|
||||
// removed the unconditional setProviderSecret write — persists NO LLM_PROVIDER.
|
||||
// For a platform-managed model id like "moonshot/kimi-k2.6" the on-box runtime
|
||||
// re-derives the provider with its own slug-split (_derive_provider_from_model →
|
||||
// "moonshot", a model-PREFIX, NOT a registry provider NAME), so the claude-code
|
||||
// adapter fail-closes: "workspace config picks provider='moonshot' but it is not
|
||||
// in the providers registry" → the child boots online but NOT_CONFIGURED. This
|
||||
// is the exact symptom ensureConciergeProvider was added to cure for the root;
|
||||
// children created via create_workspace need the same env-level pin.
|
||||
//
|
||||
// The pin is gated on the registry-DERIVED provider being `platform`
|
||||
// (providers.Manifest.DeriveProvider → IsPlatform), NOT on a model-prefix string
|
||||
// or on the parent being platform-managed. This:
|
||||
// - is parent-independent and not moonshot-specific (any platform-managed
|
||||
// model id whose registry derivation is `platform` gets the pin);
|
||||
// - leaves BYOK / OAuth / self-host children UNTOUCHED — their model derives to
|
||||
// a real provider entry (anthropic-oauth, minimax, kimi-coding, …) the
|
||||
// runtime resolves correctly on its own, so pinning would be wrong. Mirrors
|
||||
// ensureConciergeProvider's `IsPlatform` gate (it only ever pins `platform`).
|
||||
//
|
||||
// availableAuthEnv is the auth-env-var NAMES present in the create payload's
|
||||
// secrets — the same disambiguation input DeriveProvider uses elsewhere — so a
|
||||
// BYOK create that carries vendor keys derives to its real provider (not
|
||||
// platform) and is correctly skipped. Best-effort + non-fatal: a derive miss or
|
||||
// a persist error logs and returns; the workspace row stays consistent and the
|
||||
// (unchanged) downstream provision validation still applies.
|
||||
func ensureCreatedWorkspaceProviderPin(ctx context.Context, workspaceID, runtime, model string, secretKeys []string) {
|
||||
model = strings.TrimSpace(model)
|
||||
if model == "" {
|
||||
return
|
||||
}
|
||||
m, err := providerRegistry()
|
||||
if err != nil || m == nil {
|
||||
log.Printf("Create workspace %s: provider registry unavailable; cannot pin LLM_PROVIDER (non-fatal): %v", workspaceID, err)
|
||||
return
|
||||
}
|
||||
prov, derr := m.DeriveProvider(runtime, model, secretKeys)
|
||||
if derr != nil {
|
||||
// Unknown/ambiguous (runtime, model) — the create-boundary registry
|
||||
// gates already let this through (e.g. federated/unknown runtimes), and
|
||||
// a non-derivable model is not platform-managed by construction. No pin.
|
||||
return
|
||||
}
|
||||
if !prov.IsPlatform() {
|
||||
// BYOK / OAuth / self-host: the model derives to a real provider entry
|
||||
// the runtime resolves on its own. Pinning here would mis-route.
|
||||
return
|
||||
}
|
||||
if setErr := setProviderSecret(ctx, workspaceID, providers.PlatformProviderName); setErr != nil {
|
||||
log.Printf("Create workspace %s: failed to pin LLM_PROVIDER=%s for platform-managed model %q: %v (non-fatal)", workspaceID, providers.PlatformProviderName, model, setErr)
|
||||
return
|
||||
}
|
||||
log.Printf("Create workspace %s: pinned LLM_PROVIDER=%s for platform-managed model %q (create_workspace child config completeness)", workspaceID, providers.PlatformProviderName, model)
|
||||
}
|
||||
|
||||
// EnsureSelfHostedPlatformAgent installs the org's platform agent (the concierge,
|
||||
// the org root) on a tenant that has no control plane to do it — i.e. self-hosted
|
||||
// or local. In SaaS the CP calls InstallPlatformAgent at org-provision time; this
|
||||
|
||||
@@ -837,6 +837,26 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
|
||||
if err := setModelSecret(ctx, id, payload.Model); err != nil {
|
||||
log.Printf("Create workspace %s: failed to persist MODEL_PROVIDER %q: %v (non-fatal)", id, payload.Model, err)
|
||||
}
|
||||
|
||||
// core (create_workspace NOT_CONFIGURED): a child workspace the concierge
|
||||
// spawns via `create_workspace` lands here with MODEL but — since the
|
||||
// internal#718 P4 closure removed the unconditional setProviderSecret write
|
||||
// — NO LLM_PROVIDER. For a platform-managed model id (e.g.
|
||||
// "moonshot/kimi-k2.6") the on-box runtime re-derives provider="moonshot"
|
||||
// (a model PREFIX, not a registry NAME) and the adapter fail-closes ("picks
|
||||
// provider='moonshot' but it is not in the providers registry") → the child
|
||||
// boots online but NOT_CONFIGURED. Mirror the concierge's ensureConcierge-
|
||||
// Provider pin here so the child is born with a COMPLETE config: pin
|
||||
// LLM_PROVIDER=platform iff the registry derivation of (runtime, model) is
|
||||
// the closed `platform` provider. BYOK/OAuth/self-host children (whose model
|
||||
// derives to a real provider entry) are left untouched. availableAuthEnv =
|
||||
// the create payload's secret KEYS so a BYOK create derives to its real
|
||||
// provider and is correctly skipped. Non-fatal.
|
||||
createSecretKeys := make([]string, 0, len(payload.Secrets))
|
||||
for k := range payload.Secrets {
|
||||
createSecretKeys = append(createSecretKeys, k)
|
||||
}
|
||||
ensureCreatedWorkspaceProviderPin(ctx, id, payload.Runtime, payload.Model, createSecretKeys)
|
||||
}
|
||||
|
||||
// Insert canvas layout — non-fatal: workspace can be dragged into position later
|
||||
|
||||
Reference in New Issue
Block a user