From 585b3d6ed0ab3e3ae12183c21e51bbcb78bb2fcd Mon Sep 17 00:00:00 2001 From: agent-platform-engineer Date: Wed, 27 May 2026 12:55:58 -0700 Subject: [PATCH] fix(workspace-server): provider-aware gate on platform scope:global LLM creds (internal#711) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A workspace whose resolved LLM billing mode is NOT platform_managed (byok / subscription) was still being injected with the platform's scope:global CLAUDE_CODE_OAUTH_TOKEN and ran on the platform's Anthropic credits. Confirmed live 2026-05-27 on the Reno Stars tenant: the SEO (352e3c2b-...) and Marketing (6b66de8d-...) claude-code agents had no workspace-scoped LLM credential, yet ran MODEL=opus directly on api.anthropic.com using the platform's global OAuth token. Root cause: loadWorkspaceSecrets merges ALL global_secrets into every workspace's env provenance-blind. applyPlatformManagedLLMEnv's non-platform (byok/disabled) path then early-returned WITHOUT stripping those inherited platform globals — so a workspace with no LLM credential of its own kept the platform's scope:global CLAUDE_CODE_OAUTH_TOKEN. The same leak existed on the remote-pull path (GET /workspaces/:id/secrets/values), which also merged globals unconditionally. Fix (provider-aware, both injection vectors): - applyPlatformManagedLLMEnv now takes the global-provenance key set and, on the non-platform path, strips every platform-managed LLM bypass key (CLAUDE_CODE_OAUTH_TOKEN + the rest) that originated from global_secrets. A workspace's OWN LLM cred (a workspace_secrets row — provenance flag dropped by loadWorkspaceSecrets) is NOT in the global set and survives. - secrets.Values applies the same provenance-aware gate before returning the merged bundle to a remote agent. - Fail closed: a byok workspace left with no usable LLM credential aborts provision with code MISSING_BYOK_CREDENTIAL instead of starting on the (now-stripped) platform creds. Scoped to byok; disabled mode strips but still boots (no-LLM workspaces are legitimate). - platform_managed path is unchanged (it still receives + force-routes the platform creds via the CP proxy), and the LLM-proxy anthropic path is untouched. Tests (all green; go build/test ./... + -tags=integration build pass): - ByokStripsGlobalOriginOAuthToken — platform global token stripped, no cred. - ByokKeepsWorkspaceOwnOAuthEvenWithGlobal — workspace's own token survives. - DisabledStripsGlobalButReportsNoCred — disabled strips but does not abort. - PlatformManagedStillReceivesGlobalCreds — no regression on platform path. - PrepareProvisionContext_ByokWithOnlyGlobalOAuthFailsClosed — e2e abort. - SecretsValues_ByokStripsGlobalLLMCred — remote-pull path gated. Note: open PR #1930 (refactor/drop-org-tier-llm-billing-mode, internal#691 follow-up) changes ResolveLLMBillingMode's signature in the same files. This change is built on current main and is orthogonal in intent; whichever merges second needs a mechanical 1-line resolver-call adjustment (drop the orgMode arg). #1930 does NOT fix this leak. Refs internal#711 Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace-server/internal/handlers/secrets.go | 36 +++ .../internal/handlers/secrets_test.go | 74 ++++++ .../internal/handlers/workspace_preflight.go | 18 ++ .../internal/handlers/workspace_provision.go | 112 +++++++- .../handlers/workspace_provision_shared.go | 30 ++- .../workspace_provision_shared_test.go | 243 +++++++++++++++++- 6 files changed, 496 insertions(+), 17 deletions(-) diff --git a/workspace-server/internal/handlers/secrets.go b/workspace-server/internal/handlers/secrets.go index b1f60160d..27a259fa2 100644 --- a/workspace-server/internal/handlers/secrets.go +++ b/workspace-server/internal/handlers/secrets.go @@ -245,6 +245,11 @@ func (h *SecretsHandler) Values(c *gin.Context) { // provisioner path in workspace_provision.go so env-vars look identical // whether the workspace was bootstrapped locally or remotely). out := map[string]string{} + // Provenance side-channel (internal#711): which keys in `out` originated + // from global_secrets and were NOT overridden by a workspace_secrets row. + // Used by the provider-aware gate below so a non-platform workspace's + // remote pull never receives the platform's scope:global LLM credential. + globalKeys := map[string]struct{}{} // Track decrypt failures so we can refuse the response with a list // instead of returning a partial bundle that boots a broken agent. var failedKeys []string @@ -270,6 +275,7 @@ func (h *SecretsHandler) Values(c *gin.Context) { continue } out[k] = string(decrypted) + globalKeys[k] = struct{}{} } } if err := globalRows.Err(); err != nil { @@ -294,6 +300,10 @@ func (h *SecretsHandler) Values(c *gin.Context) { continue } out[k] = string(decrypted) // workspace override wins over global + // User explicitly re-set this via the canvas Secrets tab — it is + // no longer "the operator-store version", so drop the global + // provenance flag (mirrors loadWorkspaceSecrets). + delete(globalKeys, k) } } if err := wsRows.Err(); err != nil { @@ -309,6 +319,32 @@ func (h *SecretsHandler) Values(c *gin.Context) { return } + // internal#711: provider-aware gate on the remote-pull path. A workspace + // whose resolved billing mode is NOT platform_managed (byok / subscription) + // must NOT receive the platform's scope:global LLM credentials + // (CLAUDE_CODE_OAUTH_TOKEN + the rest of the bypass-key set). Those keys + // were merged from global_secrets above; here we drop any that are still + // of global provenance (a workspace override survives, since its flag was + // cleared). Symmetric with applyPlatformManagedLLMEnv's strip on the + // provision/restart env path — both injection vectors are now gated. + // + // Default-closed: ResolveLLMBillingMode collapses any DB error / NULL / + // garbled value to platform_managed, so a transient failure leaves the + // existing (global-inheriting) behavior in place rather than stripping a + // platform_managed workspace's creds. + orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE"))) + res, resolveErr := ResolveLLMBillingMode(ctx, workspaceID, orgMode) + if resolveErr != nil { + log.Printf("secrets.Values: resolve billing mode workspace=%s err=%v (defaulting to platform_managed)", workspaceID, resolveErr) + } + if res.ResolvedMode != LLMBillingModePlatformManaged { + for k := range globalKeys { + if isPlatformManagedDirectLLMBypassKey(k) { + delete(out, k) + } + } + } + c.JSON(http.StatusOK, out) } diff --git a/workspace-server/internal/handlers/secrets_test.go b/workspace-server/internal/handlers/secrets_test.go index 4c9ba6e1d..f8cd0d194 100644 --- a/workspace-server/internal/handlers/secrets_test.go +++ b/workspace-server/internal/handlers/secrets_test.go @@ -865,6 +865,12 @@ func TestSecretsValues_LegacyWorkspaceGrandfathered(t *testing.T) { WithArgs(testWsID). WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}). AddRow("WS_KEY", []byte("ws_plainvalue"), 0)) + // internal#711: Values now resolves billing mode to gate the global LLM-cred + // merge. Neither key here is a platform-managed LLM bypass key, so the mode + // is immaterial to the assertions — but the resolver query must be mocked. + mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). + WithArgs(testWsID). + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModePlatformManaged)) w := httptest.NewRecorder() c := secretsValuesRequest(w, "") // no auth — grandfathered @@ -942,6 +948,12 @@ func TestSecretsValues_ValidTokenReturnsDecryptedMerge(t *testing.T) { WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}). AddRow("ONLY_WS", []byte("ws_val"), 0). AddRow("SHARED_KEY", []byte("ws_wins"), 0)) + // internal#711: billing-mode resolver query. None of these keys is a + // platform-managed LLM bypass key, so the resolved mode does not affect the + // merge assertions; platform_managed keeps the existing pass-through. + mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). + WithArgs(testWsID). + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModePlatformManaged)) w := httptest.NewRecorder() c := secretsValuesRequest(w, "Bearer good-token") @@ -963,6 +975,68 @@ func TestSecretsValues_ValidTokenReturnsDecryptedMerge(t *testing.T) { } } +// TestSecretsValues_ByokStripsGlobalLLMCred is the internal#711 regression +// guard for the remote-pull injection vector. A non-platform (byok) workspace +// that pulls its secrets via GET /workspaces/:id/secrets/values must NOT +// receive the platform's scope:global CLAUDE_CODE_OAUTH_TOKEN — that key is +// of global_secrets provenance and is dropped by the provider-aware gate. +// Its OWN ANTHROPIC_API_KEY (a workspace_secrets row) survives, and unrelated +// non-LLM global secrets are untouched. +func TestSecretsValues_ByokStripsGlobalLLMCred(t *testing.T) { + mock := setupTestDB(t) + handler := NewSecretsHandler(nil) + + mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens`). + WithArgs(testWsID). + WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1)) + mock.ExpectQuery(`SELECT t\.id, t\.workspace_id.*FROM workspace_auth_tokens t.*JOIN workspaces`). + WithArgs(sqlmock.AnyArg()). + WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id"}).AddRow("tok-1", testWsID)) + mock.ExpectExec(`UPDATE workspace_auth_tokens SET last_used_at`). + WithArgs("tok-1"). + WillReturnResult(sqlmock.NewResult(0, 1)) + // global_secrets holds the platform's scope:global OAuth token + a + // non-LLM operator global (should be untouched). + mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`). + WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}). + AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("PLATFORM-GLOBAL-OAUTH"), 0). + AddRow("SENTRY_DSN", []byte("https://sentry.example/123"), 0)) + // The workspace brought its OWN Anthropic API key via the Secrets tab. + mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id`). + WithArgs(testWsID). + WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}). + AddRow("ANTHROPIC_API_KEY", []byte("CUSTOMER-OWN-ANTHROPIC-KEY"), 0)) + // Resolver: this workspace is byok. + mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). + WithArgs(testWsID). + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK)) + + w := httptest.NewRecorder() + c := secretsValuesRequest(w, "Bearer good-token") + handler.Values(c) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + var body map[string]string + _ = json.Unmarshal(w.Body.Bytes(), &body) + // 1. Platform global OAuth token stripped — the leak is closed on the pull path. + if got, ok := body["CLAUDE_CODE_OAUTH_TOKEN"]; ok { + t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q present — platform scope:global token must be stripped for byok pull", got) + } + // 2. The workspace's own LLM key survives. + if body["ANTHROPIC_API_KEY"] != "CUSTOMER-OWN-ANTHROPIC-KEY" { + t.Fatalf("ANTHROPIC_API_KEY = %q, want the workspace's own key preserved", body["ANTHROPIC_API_KEY"]) + } + // 3. Unrelated non-LLM global secrets are untouched. + if body["SENTRY_DSN"] != "https://sentry.example/123" { + t.Fatalf("SENTRY_DSN = %q, want non-LLM globals untouched", body["SENTRY_DSN"]) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + func TestSecretsValues_InvalidWorkspaceID(t *testing.T) { setupTestDB(t) handler := NewSecretsHandler(nil) diff --git a/workspace-server/internal/handlers/workspace_preflight.go b/workspace-server/internal/handlers/workspace_preflight.go index 647f4f235..84de494b5 100644 --- a/workspace-server/internal/handlers/workspace_preflight.go +++ b/workspace-server/internal/handlers/workspace_preflight.go @@ -75,3 +75,21 @@ func formatMissingEnvError(missing []string) string { strings.Join(missing, ", "), ) } + +// formatMissingBYOKCredentialError builds the user-facing message for a +// provision failure caused by a non-platform (byok/subscription) workspace +// that has no usable LLM credential of its own (internal#711). The platform's +// scope:global LLM credentials are NOT a valid fallback for a non-platform +// workspace — resolving to them would bill the platform's Anthropic credits — +// so the provision fails closed here rather than starting the workspace on +// stripped/absent creds. Rendered verbatim in the canvas Events tab. +func formatMissingBYOKCredentialError(mode string) string { + return fmt.Sprintf( + "this workspace's LLM billing mode is %q (not platform-managed) but it has no LLM credential of its own. "+ + "Add a workspace-scoped credential (e.g. CLAUDE_CODE_OAUTH_TOKEN or your provider's API key) under "+ + "Config → Secrets, or switch the workspace to platform-managed billing via "+ + "/admin/workspaces/:id/llm-billing-mode, then retry. The platform's shared LLM credentials are not "+ + "used for non-platform workspaces.", + mode, + ) +} diff --git a/workspace-server/internal/handlers/workspace_provision.go b/workspace-server/internal/handlers/workspace_provision.go index 04974415a..6f12b3453 100644 --- a/workspace-server/internal/handlers/workspace_provision.go +++ b/workspace-server/internal/handlers/workspace_provision.go @@ -943,7 +943,47 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) { // MOLECULE_LLM_BILLING_MODE_RESOLVED so an in-container debug check can // answer "what mode is this workspace running under" without DB queries // (RFC Observability hot-spot). -func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, workspaceID, runtime, model string) { +// +// internal#711 — PROVIDER-AWARE GLOBAL-LLM-CRED GATE. The platform's +// LLM credentials (CLAUDE_CODE_OAUTH_TOKEN + the rest of the +// platformManagedDirectLLMBypassKeys set) live in `global_secrets` and +// are merged into EVERY workspace's env by loadWorkspaceSecrets — that +// merge is provenance-blind. Pre-fix, the non-platform (byok/disabled) +// early-return left envVars untouched, so a BYOK / subscription +// workspace that brought NO LLM credential of its own still inherited +// the platform's scope:global CLAUDE_CODE_OAUTH_TOKEN and ran Opus on +// the platform's (Molecule's) Anthropic credits (Reno Stars SEO + +// Marketing agents, confirmed live 2026-05-27). +// +// The gate: on the non-platform path we strip every platform-managed +// LLM key whose PROVENANCE is `global_secrets` (the globalKeys set). +// A workspace's OWN LLM credential — set via the canvas Secrets tab, +// i.e. a `workspace_secrets` row — has had its global provenance flag +// dropped by loadWorkspaceSecrets, so it is NOT in globalKeys and +// survives. Net effect: platform global LLM creds reach a workspace +// ONLY when its resolved mode is platform_managed; a non-platform +// workspace resolves to its own (workspace-scoped) credential or none. +// +// The boolean return reports whether, after the gate, the workspace +// still has at least one usable LLM credential. The caller +// (prepareProvisionContext) uses it to FAIL CLOSED — a non-platform +// workspace with no usable LLM credential is aborted with a clear +// MISSING_BYOK_CREDENTIAL error at provision time rather than being +// started on (now-stripped) platform creds. +// platformLLMEnvResult is the structured outcome of applyPlatformManagedLLMEnv. +// ResolvedMode is the per-workspace billing/provider mode the resolver +// landed on. HasUsableLLMCred reports whether — AFTER the provider-aware +// global-cred gate — the workspace still has at least one platform-managed +// LLM credential key in its env (its own, workspace-scoped one). Only the +// non-platform path consults HasUsableLLMCred for the fail-closed decision; +// the platform_managed path always returns true (it forces the CP proxy +// usage token, which IS the usable credential). +type platformLLMEnvResult struct { + ResolvedMode string + HasUsableLLMCred bool +} + +func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, globalKeys map[string]struct{}, workspaceID, runtime, model string) platformLLMEnvResult { orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE"))) res, resolveErr := ResolveLLMBillingMode(ctx, workspaceID, orgMode) if resolveErr != nil { @@ -966,18 +1006,35 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, // pulling logs or hitting the admin route. envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] = res.ResolvedMode if res.ResolvedMode != LLMBillingModePlatformManaged { - // byok or disabled — DO NOT strip vendor keys, DO NOT force-route to CP, - // DO NOT override the workspace own ANTHROPIC_BASE_URL / OAuth token. - // Leave envVars alone so CLAUDE_CODE_OAUTH_TOKEN / vendor API keys - // pulled from workspace_secrets survive into the container, and the - // workspace talks to its own provider directly (internal#703). - return + // byok or disabled — DO NOT force-route to CP, DO NOT override the + // workspace's own ANTHROPIC_BASE_URL / OAuth token. + // + // internal#711: but DO strip platform-origin LLM credentials. The + // platform's scope:global CLAUDE_CODE_OAUTH_TOKEN (+ the rest of the + // bypass-key set) was merged into envVars by loadWorkspaceSecrets + // from global_secrets; without this strip a BYOK workspace that + // brought no LLM credential of its own would inherit the platform's + // global token and bill the platform's Anthropic credits. The strip + // is PROVENANCE-AWARE: only keys still flagged as global_secrets + // origin are removed; a workspace's own LLM cred (a workspace_secrets + // row — provenance flag already dropped by loadWorkspaceSecrets) + // survives so the workspace talks to its own provider directly. + stripGlobalOriginLLMCreds(envVars, globalKeys) + return platformLLMEnvResult{ + ResolvedMode: res.ResolvedMode, + HasUsableLLMCred: hasAnyPlatformManagedLLMKey(envVars), + } } baseURL := firstNonEmptyEnv("MOLECULE_LLM_BASE_URL", "OPENAI_BASE_URL") anthropicBaseURL := firstNonEmptyEnv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "ANTHROPIC_BASE_URL") token := firstNonEmptyEnv("MOLECULE_LLM_USAGE_TOKEN", "OPENAI_API_KEY") if baseURL == "" || token == "" { - return + // Proxy not configured (boot race / misconfig). On the platform_managed + // path the workspace IS entitled to platform creds, so we do NOT strip + // here — but we report HasUsableLLMCred from whatever survived so the + // caller's fail-closed branch (non-platform only) is never reached on + // this path. + return platformLLMEnvResult{ResolvedMode: res.ResolvedMode, HasUsableLLMCred: true} } stripPlatformManagedLLMBypassEnv(envVars) @@ -1006,6 +1063,10 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, envVars["MOLECULE_MODEL"] = defaultModel } } + // platform_managed: the CP proxy usage token (injected as ANTHROPIC_API_KEY + // / OPENAI_API_KEY above) IS the usable credential, so the workspace is + // never fail-closed on this path. + return platformLLMEnvResult{ResolvedMode: res.ResolvedMode, HasUsableLLMCred: true} } func stripPlatformManagedLLMBypassEnv(envVars map[string]string) { @@ -1014,6 +1075,41 @@ func stripPlatformManagedLLMBypassEnv(envVars map[string]string) { } } +// stripGlobalOriginLLMCreds removes platform-managed LLM credential keys +// (CLAUDE_CODE_OAUTH_TOKEN + the rest of platformManagedDirectLLMBypassKeys) +// from envVars ONLY when they originated from the operator-controlled +// `global_secrets` table (i.e. their key is present in globalKeys). +// +// internal#711 provider-aware gate. A platform global LLM credential is the +// platform's own credential and must never be the credential a non-platform +// (byok / subscription) workspace runs on. loadWorkspaceSecrets drops the +// global-provenance flag for any key the workspace re-set via the canvas +// Secrets tab (a workspace_secrets row), so a workspace's OWN LLM credential +// is NOT in globalKeys and survives this strip — only the inherited platform +// global creds are removed. +func stripGlobalOriginLLMCreds(envVars map[string]string, globalKeys map[string]struct{}) { + for key := range platformManagedDirectLLMBypassKeys { + if _, fromGlobal := globalKeys[key]; fromGlobal { + delete(envVars, key) + } + } +} + +// hasAnyPlatformManagedLLMKey reports whether envVars still carries at least +// one non-empty platform-managed LLM credential key after the provider-aware +// gate. Used by the non-platform fail-closed branch: a byok/subscription +// workspace with no surviving (workspace-scoped) LLM credential must be +// aborted with MISSING_BYOK_CREDENTIAL rather than started credential-less or +// on stripped platform creds. +func hasAnyPlatformManagedLLMKey(envVars map[string]string) bool { + for key := range platformManagedDirectLLMBypassKeys { + if strings.TrimSpace(envVars[key]) != "" { + return true + } + } + return false +} + func runtimeUsesAnthropicNativeProxy(runtime string) bool { return strings.EqualFold(strings.TrimSpace(runtime), "claude-code") } diff --git a/workspace-server/internal/handlers/workspace_provision_shared.go b/workspace-server/internal/handlers/workspace_provision_shared.go index d7e42f169..a42e49c7c 100644 --- a/workspace-server/internal/handlers/workspace_provision_shared.go +++ b/workspace-server/internal/handlers/workspace_provision_shared.go @@ -193,7 +193,35 @@ func (h *WorkspaceHandler) prepareProvisionContext( // continue to rely on workspace_secrets / org-import persona-env // merge for their git auth. applyAgentGitHTTPCreds(envVars, payload.Role) - applyPlatformManagedLLMEnv(ctx, envVars, workspaceID, payload.Runtime, payload.Model) + // internal#711: provider-aware LLM-credential resolution. On a non-platform + // (byok/subscription) workspace this strips the platform's scope:global LLM + // creds inherited from global_secrets and reports whether the workspace + // still has a usable (workspace-scoped) LLM credential of its own. + llmRes := applyPlatformManagedLLMEnv(ctx, envVars, globalSecretKeys, workspaceID, payload.Runtime, payload.Model) + // Fail closed for a BYOK workspace with no usable LLM credential: do NOT + // start it on the platform's (now-stripped) global creds. Mirror the + // "model+provider+credential REQUIRED at create" spirit (internal#711) + // with an actionable error surfaced at provision time. + // + // Scoped to byok specifically (NOT disabled): "byok" means "the user + // intends to run an LLM on their own credential" — a missing one is a + // misconfiguration worth surfacing loudly. "disabled" means "this + // workspace runs no platform-billed LLM at all" (terminal / file work, or + // a runtime that talks to a non-bypass-key endpoint); stripping the + // inherited platform globals is sufficient there and aborting would + // regress a legitimate no-LLM workspace. The strip above already ran for + // both non-platform modes. + // + // The bypass-key check is intentionally broad — any surviving bypass key + // (the workspace's own, of workspace_secrets provenance) clears it. + if llmRes.ResolvedMode == LLMBillingModeBYOK && !llmRes.HasUsableLLMCred { + msg := formatMissingBYOKCredentialError(llmRes.ResolvedMode) + log.Printf("Provisioner: ABORT workspace=%s — byok billing mode has no usable LLM credential (MISSING_BYOK_CREDENTIAL, internal#711)", workspaceID) + return nil, &provisionAbort{ + Msg: msg, + Extra: map[string]interface{}{"error": msg, "code": "MISSING_BYOK_CREDENTIAL", "billing_mode": llmRes.ResolvedMode, "issue": "711"}, + } + } applyRuntimeModelEnv(envVars, payload.Runtime, payload.Model) if payload.Role != "" { envVars["MOLECULE_AGENT_ROLE"] = payload.Role diff --git a/workspace-server/internal/handlers/workspace_provision_shared_test.go b/workspace-server/internal/handlers/workspace_provision_shared_test.go index fc82dc0c8..2c044cfa3 100644 --- a/workspace-server/internal/handlers/workspace_provision_shared_test.go +++ b/workspace-server/internal/handlers/workspace_provision_shared_test.go @@ -494,6 +494,57 @@ func TestPrepareProvisionContext_WorkspaceSecretWinsOverPersonaToken(t *testing. } } +// TestPrepareProvisionContext_ByokWithOnlyGlobalOAuthFailsClosed is the +// internal#711 end-to-end guard for the live Reno Stars leak. A byok +// workspace whose ONLY LLM credential is the platform's scope:global +// CLAUDE_CODE_OAUTH_TOKEN (inherited from global_secrets, no workspace +// override) must: +// +// 1. have that platform token STRIPPED from the prepared env (no leak), and +// 2. ABORT the provision with the MISSING_BYOK_CREDENTIAL code rather than +// start the workspace on the platform's credits. +// +// This is the discriminating end-to-end test: pre-fix prepared.EnvVars would +// carry CLAUDE_CODE_OAUTH_TOKEN= and the provision would +// succeed, running Opus on Molecule's Anthropic credits. +func TestPrepareProvisionContext_ByokWithOnlyGlobalOAuthFailsClosed(t *testing.T) { + const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc" // Reno Stars SEO agent + t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) + + mock := setupTestDB(t) + // global_secrets carries the platform's scope:global OAuth token. + mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`). + WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}). + AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("PLATFORM-GLOBAL-OAUTH"), 0)) + // Workspace set NO secrets of its own. + mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"})) + // Resolver: workspace override = byok. + mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK)) + + handler := NewWorkspaceHandler(&captureBroadcaster{}, nil, "http://localhost:8080", t.TempDir()) + payload := models.CreateWorkspacePayload{ + Name: "Reno Stars SEO", + Runtime: "claude-code", + Tier: 1, + } + prepared, abort := handler.prepareProvisionContext( + context.Background(), wsID, "/nonexistent", nil, payload, false) + + if abort == nil { + t.Fatalf("expected MISSING_BYOK_CREDENTIAL abort, got success (prepared=%v) — the leak would still ship", prepared) + } + if code, _ := abort.Extra["code"].(string); code != "MISSING_BYOK_CREDENTIAL" { + t.Fatalf("abort.Extra[code] = %v, want MISSING_BYOK_CREDENTIAL", abort.Extra["code"]) + } + if mode, _ := abort.Extra["billing_mode"].(string); mode != LLMBillingModeBYOK { + t.Fatalf("abort.Extra[billing_mode] = %v, want %q", abort.Extra["billing_mode"], LLMBillingModeBYOK) + } +} + // TestReadOrLazyHealInboundSecret pins the four branches of the // shared lazy-heal helper directly. Each call site (chat_files, // registry) has its own integration test, but those go through the @@ -972,7 +1023,7 @@ func TestApplyPlatformManagedLLMEnv_NonClaudeRuntimeDefaultsOpenAIProxyWhenNoWor t.Setenv("MOLECULE_LLM_DEFAULT_MODEL", "moonshot/kimi-k2.6") envVars := map[string]string{} - applyPlatformManagedLLMEnv(context.Background(), envVars, "", "codex", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "codex", "") applyRuntimeModelEnv(envVars, "codex", "") if got := envVars["OPENAI_BASE_URL"]; got != "https://api.example.test/api/v1/internal/llm/openai/v1" { @@ -1002,7 +1053,7 @@ func TestApplyPlatformManagedLLMEnv_StripsWorkspaceOpenAIKeyForClaudeCode(t *tes "OPENAI_BASE_URL": "https://api.openai.com/v1", "MODEL": "openai/gpt-5.5", } - applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "") if _, ok := envVars["OPENAI_API_KEY"]; ok { t.Fatalf("OPENAI_API_KEY should be stripped for claude-code platform-managed mode") @@ -1028,7 +1079,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeUsesAnthropicProxyOverOAuth(t *tes "CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token", "MODEL": "sonnet", } - applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "") if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok { t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN should be stripped in platform-managed mode") @@ -1051,7 +1102,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeInjectsAnthropicProxyWhenNoWorkspa t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") envVars := map[string]string{} - applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "minimax/MiniMax-M2.7") + applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "minimax/MiniMax-M2.7") if got := envVars["ANTHROPIC_BASE_URL"]; got != "https://api.example.test/api/v1/internal/llm/anthropic/v1" { t.Fatalf("ANTHROPIC_BASE_URL = %q", got) @@ -1074,7 +1125,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeStripsVendorBYOK(t *testing.T) { "MINIMAX_API_KEY": "user-minimax-key", "MODEL": "MiniMax-M2.7", } - applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "") if _, ok := envVars["MINIMAX_API_KEY"]; ok { t.Fatalf("MINIMAX_API_KEY should be stripped in platform-managed mode") @@ -1096,7 +1147,7 @@ func TestApplyPlatformManagedLLMEnv_NoopsOutsidePlatformManaged(t *testing.T) { t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") envVars := map[string]string{} - applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "") if _, ok := envVars["OPENAI_API_KEY"]; ok { t.Fatalf("OPENAI_API_KEY should not be set outside platform-managed mode") @@ -1137,7 +1188,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv(t *testing "CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token", "MODEL": "sonnet", } - applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "") // 1. OAuth token intact — not stripped. if got := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; got != "user-oauth-token" { @@ -1168,6 +1219,182 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv(t *testing } } +// TestApplyPlatformManagedLLMEnv_ByokStripsGlobalOriginOAuthToken is the +// internal#711 regression guard for the live 2026-05-27 leak (Reno Stars SEO +// + Marketing claude-code agents). A non-platform (byok) workspace that +// brought NO LLM credential of its own, but which inherited the platform's +// scope:global CLAUDE_CODE_OAUTH_TOKEN from global_secrets (provenance = +// globalKeys), must have that platform token STRIPPED — not run on it. +// +// Pre-fix the byok early-return left envVars untouched, so the platform's +// global OAuth token survived into the container and the agent ran Opus on +// the platform's Anthropic credits. The fix gates the global-cred merge on +// provider==platform: a non-platform workspace keeps only its own +// (workspace_secrets) creds, of which there are none here. +func TestApplyPlatformManagedLLMEnv_ByokStripsGlobalOriginOAuthToken(t *testing.T) { + const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc" // Reno Stars SEO agent + mock := setupTestDB(t) + mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK)) + + t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) + t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1") + t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic") + t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") + + // The ONLY LLM credential in env is the platform's scope:global OAuth + // token, merged from global_secrets (so its key is in globalKeys). The + // workspace set none of its own. + envVars := map[string]string{ + "CLAUDE_CODE_OAUTH_TOKEN": "PLATFORM-GLOBAL-OAUTH-TOKEN", + "MODEL": "opus", + } + globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}} + + res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "") + + // 1. The platform global OAuth token must be STRIPPED — the leak is closed. + if got, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok { + t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q present — platform scope:global token must be stripped for a byok workspace", got) + } + // 2. No CP proxy creds forced (byok = workspace talks to its own provider). + if got, ok := envVars["ANTHROPIC_API_KEY"]; ok { + t.Fatalf("ANTHROPIC_API_KEY must NOT be injected for byok, got %q", got) + } + // 3. Resolver reports byok with NO usable LLM credential → caller fails closed. + if res.ResolvedMode != LLMBillingModeBYOK { + t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeBYOK) + } + if res.HasUsableLLMCred { + t.Fatalf("HasUsableLLMCred = true, want false (only the stripped platform global token was present)") + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuthEvenWithGlobal is +// the discriminating companion to the strip test: a byok workspace that DID +// set its own CLAUDE_CODE_OAUTH_TOKEN via the canvas Secrets tab (a +// workspace_secrets row) keeps it. loadWorkspaceSecrets drops the global +// provenance flag on a workspace override, so the key is NOT in globalKeys +// and the provenance-aware strip leaves it alone. Proves the fix strips only +// platform-origin creds, never the customer's own. +func TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuthEvenWithGlobal(t *testing.T) { + const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809" // Reno Stars Marketing agent + mock := setupTestDB(t) + mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK)) + + t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) + t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1") + t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") + + // Workspace set its OWN OAuth token — loadWorkspaceSecrets would have + // dropped its global provenance flag, so globalKeys does NOT contain it. + envVars := map[string]string{ + "CLAUDE_CODE_OAUTH_TOKEN": "CUSTOMER-OWN-OAUTH-TOKEN", + "MODEL": "opus", + } + globalKeys := map[string]struct{}{} // not from global_secrets + + res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "") + + if got := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; got != "CUSTOMER-OWN-OAUTH-TOKEN" { + t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the workspace's own token left intact", got) + } + if !res.HasUsableLLMCred { + t.Fatalf("HasUsableLLMCred = false, want true (workspace brought its own credential)") + } + if res.ResolvedMode != LLMBillingModeBYOK { + t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeBYOK) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestApplyPlatformManagedLLMEnv_DisabledStripsGlobalButReportsNoCred proves +// that "disabled" mode also strips the platform's global LLM creds (the leak +// is closed for disabled too), and reports HasUsableLLMCred=false. The +// caller's fail-closed abort is scoped to byok only, so a disabled workspace +// with no LLM cred still boots (for terminal / non-LLM work); here we pin the +// function-level strip + report. +func TestApplyPlatformManagedLLMEnv_DisabledStripsGlobalButReportsNoCred(t *testing.T) { + const wsID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" + mock := setupTestDB(t) + mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeDisabled)) + + t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) + + envVars := map[string]string{ + "CLAUDE_CODE_OAUTH_TOKEN": "PLATFORM-GLOBAL-OAUTH-TOKEN", + } + globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}} + + res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "") + + if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok { + t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN must be stripped for disabled mode too") + } + if res.ResolvedMode != LLMBillingModeDisabled { + t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeDisabled) + } + if res.HasUsableLLMCred { + t.Fatalf("HasUsableLLMCred = true, want false") + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds is +// the no-regression guard for the OTHER side of the gate (internal#711): a +// platform-managed workspace MUST still receive the platform's creds. Here +// the proxy IS configured, so the contract is the existing one — the global +// OAuth token is replaced by the proxy usage token (HasUsableLLMCred=true). +func TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds(t *testing.T) { + const wsID = "99999999-9999-9999-9999-999999999999" + mock := setupTestDB(t) + mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModePlatformManaged)) + + t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) + t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1") + t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic") + t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") + + envVars := map[string]string{ + "CLAUDE_CODE_OAUTH_TOKEN": "PLATFORM-GLOBAL-OAUTH-TOKEN", + "MODEL": "opus", + } + globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}} + + res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "") + + // Platform-managed routes through the CP proxy: OAuth stripped, proxy creds forced. + if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok { + t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN should be stripped + replaced by the proxy token for platform_managed") + } + if got := envVars["ANTHROPIC_API_KEY"]; got != "tenant-admin-token" { + t.Fatalf("ANTHROPIC_API_KEY = %q, want proxy usage token for platform_managed", got) + } + if !res.HasUsableLLMCred { + t.Fatalf("HasUsableLLMCred = false, want true for platform_managed (proxy token is the credential)") + } + if res.ResolvedMode != LLMBillingModePlatformManaged { + t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModePlatformManaged) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + // TestApplyPlatformManagedLLMEnv_PlatformManagedStillEmitsResolvedMode is the // no-regression companion: a workspace that resolves to platform_managed must // still strip + force the proxy AND emit MOLECULE_LLM_BILLING_MODE= @@ -1189,7 +1416,7 @@ func TestApplyPlatformManagedLLMEnv_PlatformManagedStillEmitsResolvedMode(t *tes "CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token", "MODEL": "sonnet", } - applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "") // OAuth stripped, proxy forced — unchanged platform_managed contract. if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok { -- 2.52.0