fix(provision): platform-managed workspace must fail-closed when CP proxy env absent (#2162) #2164
@@ -372,3 +372,78 @@ func TestApplyPlatformManagedLLMEnv_WorkspaceOriginCredExemptFromStrip(t *testin
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_MissingProxyEnvFailClosed is the #2162
|
||||
// regression guard. A platform-managed workspace whose CP proxy env is absent
|
||||
// must NOT start credential-less. The empty-proxy path must return
|
||||
// HasUsableLLMCred=false so the caller aborts with MISSING_PLATFORM_PROXY.
|
||||
//
|
||||
// Mutation: revert the early-return from HasUsableLLMCred=false to true
|
||||
// → workspace starts with zero credential → "container started but never
|
||||
// called /registry/register" (600s provision-timeout sweep) → this test RED.
|
||||
func TestApplyPlatformManagedLLMEnv_MissingProxyEnvFailClosed(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "29b95be9-811e-4857-be36-1dafdbf4f697" // adk-demo failure workspace
|
||||
|
||||
mock := setupTestDB(t)
|
||||
expectOverrideQuery(mock, wsID, "")
|
||||
|
||||
// No proxy env present — simulates the boot-race / misconfig path.
|
||||
envVars := map[string]string{}
|
||||
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "moonshot/kimi-k2.6", nil)
|
||||
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
t.Fatalf("platform-managed model must stay platform_managed, got %q (source=%s)", res.ResolvedMode, res.Source)
|
||||
}
|
||||
// THE FIX: must NOT report usable credential when none was injected.
|
||||
if res.HasUsableLLMCred {
|
||||
t.Fatalf("empty proxy env → HasUsableLLMCred must be false (fail-closed), got true — the #2162 dark-wedge class")
|
||||
}
|
||||
// No credential env must be present.
|
||||
if _, present := envVars["ANTHROPIC_API_KEY"]; present {
|
||||
t.Errorf("empty proxy env must NOT inject ANTHROPIC_API_KEY")
|
||||
}
|
||||
if _, present := envVars["MOLECULE_LLM_USAGE_TOKEN"]; present {
|
||||
t.Errorf("empty proxy env must NOT inject MOLECULE_LLM_USAGE_TOKEN")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_ProxyEnvPresentInjectsCredential is the
|
||||
// positive-path pair to the #2162 regression guard: when the CP proxy env IS
|
||||
// present, the platform-managed path must inject ANTHROPIC_API_KEY +
|
||||
// ANTHROPIC_BASE_URL for an Anthropic-native runtime and report
|
||||
// HasUsableLLMCred=true.
|
||||
func TestApplyPlatformManagedLLMEnv_ProxyEnvPresentInjectsCredential(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
|
||||
|
||||
mock := setupTestDB(t)
|
||||
expectOverrideQuery(mock, wsID, "")
|
||||
|
||||
envVars := map[string]string{}
|
||||
// Simulate the CP proxy env being present (as it is in production).
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.moleculesai.app/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.moleculesai.app/api/v1/internal/llm/anthropic/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "PLATFORM-PROXY-TOKEN")
|
||||
|
||||
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "moonshot/kimi-k2.6", nil)
|
||||
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
t.Fatalf("expected platform_managed, got %q", res.ResolvedMode)
|
||||
}
|
||||
if !res.HasUsableLLMCred {
|
||||
t.Fatalf("proxy env present → HasUsableLLMCred must be true, got false")
|
||||
}
|
||||
if envVars["ANTHROPIC_API_KEY"] != "PLATFORM-PROXY-TOKEN" {
|
||||
t.Errorf("ANTHROPIC_API_KEY must be injected with the platform proxy token; got %q", envVars["ANTHROPIC_API_KEY"])
|
||||
}
|
||||
if envVars["ANTHROPIC_BASE_URL"] != "https://api.moleculesai.app/api/v1/internal/llm/anthropic/v1" {
|
||||
t.Errorf("ANTHROPIC_BASE_URL must be injected with the platform anthropic proxy; got %q", envVars["ANTHROPIC_BASE_URL"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,3 +93,16 @@ func formatMissingBYOKCredentialError(mode string) string {
|
||||
mode,
|
||||
)
|
||||
}
|
||||
|
||||
// formatMissingPlatformProxyError builds the user-facing message for a
|
||||
// provision failure caused by a platform-managed workspace whose control-plane
|
||||
// proxy environment is absent (#2162). The platform-managed path requires
|
||||
// MOLECULE_LLM_BASE_URL + MOLECULE_LLM_USAGE_TOKEN (or their OPENAI_*
|
||||
// fallbacks) to inject a usable credential; without them the workspace must
|
||||
// NOT start credential-less.
|
||||
func formatMissingPlatformProxyError() string {
|
||||
return "this workspace is configured for platform-managed LLM billing but the control-plane proxy is not ready. " +
|
||||
"The required platform proxy env (MOLECULE_LLM_BASE_URL + MOLECULE_LLM_USAGE_TOKEN) is absent. " +
|
||||
"This is usually a transient boot-race; retry in 30 seconds. If it persists, verify the platform proxy " +
|
||||
"is configured for this tenant/runtime and contact the platform team."
|
||||
}
|
||||
|
||||
@@ -1003,12 +1003,13 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string,
|
||||
anthropicBaseURL := firstNonEmptyEnv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "ANTHROPIC_BASE_URL")
|
||||
token := firstNonEmptyEnv("MOLECULE_LLM_USAGE_TOKEN", "OPENAI_API_KEY")
|
||||
if baseURL == "" || token == "" {
|
||||
// Proxy not configured (boot race / misconfig). On the platform_managed
|
||||
// path the workspace IS entitled to platform creds, so we do NOT strip
|
||||
// here — but we report HasUsableLLMCred from whatever survived so the
|
||||
// caller's fail-closed branch (non-platform only) is never reached on
|
||||
// this path.
|
||||
return platformLLMEnvResult{ResolvedMode: res.ResolvedMode, HasUsableLLMCred: true, Source: res.Source}
|
||||
// Proxy not configured (boot race / misconfig). The platform_managed
|
||||
// path REQUIRES the CP proxy env to inject a usable credential.
|
||||
// Reporting HasUsableLLMCred=true here would start the workspace
|
||||
// credential-less — the adk-demo dark-wedge class (#2162).
|
||||
// Return false so the caller's fail-closed branch aborts with
|
||||
// MISSING_PLATFORM_PROXY.
|
||||
return platformLLMEnvResult{ResolvedMode: res.ResolvedMode, HasUsableLLMCred: false, Source: res.Source}
|
||||
}
|
||||
stripPlatformManagedLLMBypassEnv(envVars)
|
||||
|
||||
|
||||
@@ -134,6 +134,11 @@ func TestProvisionWorkspaceAuto_NoBackendMarksFailed(t *testing.T) {
|
||||
// This is the regression-prevention test for the Design Director bug
|
||||
// where 7-of-7 sub-agents went down the Docker path on SaaS.
|
||||
func TestProvisionWorkspaceAuto_RoutesToCPWhenSet(t *testing.T) {
|
||||
// Supply the CP proxy env so the platform-managed default does not abort
|
||||
// with MISSING_PLATFORM_PROXY (molecule-core#2162).
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
mock := setupTestDB(t)
|
||||
mock.MatchExpectationsInOrder(false)
|
||||
|
||||
@@ -597,6 +602,11 @@ func TestNoCallSiteCallsBareStop(t *testing.T) {
|
||||
// count without mocking out the retry helper itself, which would
|
||||
// invert the test contract — the retry IS the dispatcher's job here).
|
||||
func TestRestartWorkspaceAuto_RoutesToCPWhenSet(t *testing.T) {
|
||||
// Supply the CP proxy env so the platform-managed default does not abort
|
||||
// with MISSING_PLATFORM_PROXY (molecule-core#2162).
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
rec := &trackingCPProv{}
|
||||
bcast := &concurrentSafeBroadcaster{}
|
||||
h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir())
|
||||
@@ -795,6 +805,11 @@ func TestResumeHandler_UsesProvisionWorkspaceAuto(t *testing.T) {
|
||||
// the async tests; the absence of `go` semantics is the load-bearing
|
||||
// distinction we're pinning.
|
||||
func TestProvisionWorkspaceAutoSync_RoutesToCPWhenSet(t *testing.T) {
|
||||
// Supply the CP proxy env so the platform-managed default does not abort
|
||||
// with MISSING_PLATFORM_PROXY (molecule-core#2162).
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
mock := setupTestDB(t)
|
||||
mock.MatchExpectationsInOrder(false)
|
||||
// provisionWorkspaceCP runs prepareProvisionContext synchronously, which
|
||||
|
||||
@@ -98,6 +98,11 @@ func (r *recordingCPProv) startedSet() map[string]struct{} {
|
||||
func TestProvisionWorkspaceCP_ConcurrentBurst_NoSilentDrop(t *testing.T) {
|
||||
const numWorkspaces = 7
|
||||
|
||||
// Supply the CP proxy env so the platform-managed default does not abort
|
||||
// with MISSING_PLATFORM_PROXY (molecule-core#2162).
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
mock := setupTestDB(t)
|
||||
|
||||
// Every goroutine runs prepareProvisionContext → mintWorkspaceSecrets
|
||||
|
||||
@@ -230,6 +230,18 @@ func (h *WorkspaceHandler) prepareProvisionContext(
|
||||
Extra: map[string]interface{}{"error": msg, "code": "MISSING_BYOK_CREDENTIAL", "billing_mode": llmRes.ResolvedMode, "issue": "1994"},
|
||||
}
|
||||
}
|
||||
// Fail closed for a platform-managed workspace whose CP proxy env is
|
||||
// absent: do NOT start it credential-less (adk-demo dark-wedge class,
|
||||
// #2162). The platform_managed path requires the proxy injection to
|
||||
// produce a usable credential.
|
||||
if llmRes.ResolvedMode == LLMBillingModePlatformManaged && !llmRes.HasUsableLLMCred {
|
||||
msg := formatMissingPlatformProxyError()
|
||||
log.Printf("Provisioner: ABORT workspace=%s — platform-managed billing mode but CP proxy env absent (MISSING_PLATFORM_PROXY, molecule-core#2162)", workspaceID)
|
||||
return nil, &provisionAbort{
|
||||
Msg: msg,
|
||||
Extra: map[string]interface{}{"error": msg, "code": "MISSING_PLATFORM_PROXY", "billing_mode": llmRes.ResolvedMode, "issue": "2162"},
|
||||
}
|
||||
}
|
||||
applyRuntimeModelEnv(envVars, payload.Runtime, payload.Model)
|
||||
if payload.Role != "" {
|
||||
envVars["MOLECULE_AGENT_ROLE"] = payload.Role
|
||||
|
||||
@@ -264,6 +264,11 @@ func TestPrepareProvisionContext_ParentIDInjection(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
// Supply the CP proxy env so the platform-managed default does not abort
|
||||
// with MISSING_PLATFORM_PROXY (molecule-core#2162).
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
@@ -331,6 +336,10 @@ func TestPrepareProvisionContext_InjectsGitHTTPCredsFromPersonaToken(t *testing.
|
||||
}
|
||||
}
|
||||
t.Setenv("MOLECULE_PERSONA_ROOT", root)
|
||||
// Supply the CP proxy env so the platform-managed default does not abort
|
||||
// with MISSING_PLATFORM_PROXY (molecule-core#2162).
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
@@ -459,6 +468,10 @@ func TestPrepareProvisionContext_WorkspaceSecretWinsOverPersonaToken(t *testing.
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Setenv("MOLECULE_PERSONA_ROOT", root)
|
||||
// Supply the CP proxy env so the platform-managed default does not abort
|
||||
// with MISSING_PLATFORM_PROXY (molecule-core#2162).
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
|
||||
|
||||
@@ -1424,6 +1424,11 @@ func (s *stubFailingCPProv) IsRunning(_ context.Context, _ string) (bool, error)
|
||||
// the broadcast payload would surface every marker; the canned
|
||||
// "provisioning failed" message must surface none of them.
|
||||
func TestProvisionWorkspaceCP_NoInternalErrorsInBroadcast(t *testing.T) {
|
||||
// Supply the CP proxy env so the platform-managed default does not abort
|
||||
// with MISSING_PLATFORM_PROXY (molecule-core#2162).
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
mock := setupTestDB(t)
|
||||
|
||||
// loadWorkspaceSecrets queries global_secrets and workspace_secrets
|
||||
|
||||
Reference in New Issue
Block a user