From ecdbd2edeeea6aa95ade1a40bb2bbcfbb6b93fee Mon Sep 17 00:00:00 2001 From: claude-ceo-assistant Date: Tue, 26 May 2026 17:02:12 -0700 Subject: [PATCH 1/2] refactor(workspace-server): drop org-tier from llm_billing_mode resolver (internal#691) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per CTO direction 2026-05-26: there is no org-level llm_billing_mode policy. The workspace is the unit of decision. The resolver becomes workspaces.llm_billing_mode ?? "platform_managed" (constant bootstrap floor) The org-tier silently shadowed every workspace without an explicit override when the org default landed on platform_managed (today's agents-team incident on 3 workspaces) — that is the exact failure mode this removal eliminates. Changes - ResolveLLMBillingMode signature drops orgMode parameter; only the workspace row is consulted. NULL / row-missing / garbled / DB-error all resolve via constant_fallback to platform_managed. - BillingModeResolution: drop OrgDefault field; drop BillingModeSourceOrgDefault. Only BillingModeSourceWorkspaceOverride and BillingModeSourceConstantFallback remain. - llm_billing_mode_handler: stop reading MOLECULE_LLM_BILLING_MODE env var. - secrets.go: drop legacy org-level shims (platformManagedLLMMode + the global SetGlobal gate). Per-workspace strip is the only gate; provision- time applyPlatformManagedLLMEnv still strips global-secret leaks for platform_managed workspaces. - workspace_provision.applyPlatformManagedLLMEnv: drop the orgMode env read. - Tests: drop t.Setenv("MOLECULE_LLM_BILLING_MODE", ...) from apply* tests (empty workspaceID short-circuits via constant fallback); the byok-noop case now mocks an explicit workspace override. Resolver table-driven test drops org-as-input cases. Backward compat - workspaces.llm_billing_mode column unchanged; the byok writes done today for agents-team's 3 workspaces are honored exactly as before. Schema removal of organizations.llm_billing_mode is a separate follow-up. - Strip-list (platformManagedDirectLLMBypassKeys) unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../handlers/handlers_extended_test.go | 21 ++- .../internal/handlers/llm_billing_mode.go | 132 ++++++++---------- .../handlers/llm_billing_mode_handler.go | 23 ++- .../handlers/llm_billing_mode_handler_test.go | 23 +-- .../handlers/llm_billing_mode_test.go | 104 ++++++-------- workspace-server/internal/handlers/secrets.go | 59 +++----- .../internal/handlers/workspace_test.go | 17 +-- 7 files changed, 159 insertions(+), 220 deletions(-) diff --git a/workspace-server/internal/handlers/handlers_extended_test.go b/workspace-server/internal/handlers/handlers_extended_test.go index fdd86d489..08c5d192f 100644 --- a/workspace-server/internal/handlers/handlers_extended_test.go +++ b/workspace-server/internal/handlers/handlers_extended_test.go @@ -255,22 +255,16 @@ func TestExtended_SecretsListEmpty(t *testing.T) { // ---------- TestSecretsSet (Extended) ---------- func TestExtended_SecretsSet(t *testing.T) { - // internal#691: the per-workspace strip gate now defaults to platform_managed - // on empty MOLECULE_LLM_BILLING_MODE (closed default). This test's intent is - // the happy path of persisting a vendor key, so put the org into byok which - // matches the pre-#691 implicit behavior of an unset env. - t.Setenv("MOLECULE_LLM_BILLING_MODE", "byok") + // internal#691 follow-up: the per-workspace strip gate consults only + // the workspace row. The test's intent is the happy path of persisting + // a vendor key, so the mock returns an explicit byok override for this + // workspace; the bypass-list check is skipped and the write proceeds. mock := setupTestDB(t) handler := NewSecretsHandler(nil) - // internal#691: secrets.Set now consults ResolveLLMBillingMode before the - // strip gate. Mock returns no row → resolver falls through to the org - // default (byok, set via t.Setenv above) → bypass-list check is skipped - // and the write proceeds. This pattern is the test-side mirror of the - // real-prod fall-through behavior for a fresh workspace with no override. mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). WithArgs("22222222-2222-2222-2222-222222222222"). - WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"})) + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK)) // Expect INSERT (encrypted value is dynamic, use AnyArg) mock.ExpectExec("INSERT INTO workspace_secrets"). @@ -308,7 +302,10 @@ func TestExtended_SecretsSet(t *testing.T) { } func TestExtended_SecretsSetRejectsHermesCustomProviderInPlatformManagedMode(t *testing.T) { - t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed") + // internal#691 follow-up: per-workspace resolver looks up the workspace + // row. Mock no expectations → resolver hits a sqlmock-unexpected-query + // error → default-closed to platform_managed → strip-list rejection + // fires for the KIMI_API_KEY write. _ = setupTestDB(t) handler := NewSecretsHandler(nil) diff --git a/workspace-server/internal/handlers/llm_billing_mode.go b/workspace-server/internal/handlers/llm_billing_mode.go index 60f7ad744..39f9b5c0a 100644 --- a/workspace-server/internal/handlers/llm_billing_mode.go +++ b/workspace-server/internal/handlers/llm_billing_mode.go @@ -17,26 +17,34 @@ package handlers // stops the strip for EVERY workspace in the org. Turning it to `platform_managed` // blocks every workspace's own OAuth/vendor keys. // -// The resolver replaces the env-var read with a per-workspace lookup: +// The first attempt at internal#691 introduced a 3-tier resolution: // -// workspaces.llm_billing_mode (per-workspace override, NULLABLE) -// ?? organizations.llm_billing_mode (org default, fetched via tenant_config) -// ?? "platform_managed" (closed default — the existing implicit default) +// workspace ?? org_default (from tenant_config env var) ?? "platform_managed" +// +// This is the shape that bit agents-team on 2026-05-26: org_default silently +// inherited `platform_managed` (the closed bootstrap default) and shadowed +// every workspace that had not set an explicit override. The behavior +// contradicted the per-workspace intent of the feature — the org tier was +// always meant to be a bootstrap floor, not a policy layer. +// +// CTO direction (2026-05-26 23:54Z): there is no org tier. The workspace is +// the unit of decision. The resolver is now: +// +// workspaces.llm_billing_mode ?? "platform_managed" (closed bootstrap floor) // // Default-closed contract — non-negotiable per the RFC Safety axis: // -// - workspace row missing (sql.ErrNoRows) → fall through to org default -// - DB error on the lookup → "platform_managed" + propagated error -// - workspace override = NULL → fall through to org default -// - workspace override = unknown string → "platform_managed" (default-closed) -// - org default = NULL / empty / unknown string → "platform_managed" (closed default) -// - org default = recognized non-pm string + ws null → org default (byok/disabled honored) +// - workspace row missing (sql.ErrNoRows) → "platform_managed" +// - DB error on the lookup → "platform_managed" + propagated error +// - workspace override = NULL → "platform_managed" +// - workspace override = unknown / garbled string → "platform_managed" +// - workspace override = recognized enum value → that value // // The ONLY way to resolve to "byok" or "disabled" is an explicit, recognized -// string in the workspace override OR the org default. A NULL JOIN, transient -// resolver error, or garbled enum value MUST NOT silently flip a workspace -// off of platform_managed — that would shadow the org's billing policy and -// is the exact failure mode the RFC's Safety hot-spot calls out. +// string in the workspace override. A NULL row, a transient resolver error, +// or a garbled enum value MUST NOT silently flip a workspace off of +// platform_managed — that would shadow the bootstrap default and is the exact +// failure mode the RFC's Safety hot-spot calls out. import ( "context" @@ -50,8 +58,8 @@ import ( // Constants mirror molecule-controlplane/internal/credits/llm_billing.go. // Kept as string literals (not imports) because workspace-server has no // build-time dependency on the CP module; the values are stable wire -// strings used in the tenant_config response, the workspaces.llm_billing_mode -// column check constraint, and the CP route bodies. +// strings used in the workspaces.llm_billing_mode column check constraint +// and the CP route bodies. const ( LLMBillingModePlatformManaged = "platform_managed" LLMBillingModeBYOK = "byok" @@ -61,11 +69,15 @@ const ( // BillingModeSource describes which layer of the resolution stack supplied // the final mode. Surfaced via the admin route for operator debug // ("why is this workspace being stripped?") per the RFC Observability axis. +// +// Post-CTO-simplification (2026-05-26) the resolver has only two layers, so +// there are only two source values. BillingModeSourceOrgDefault is removed +// — the org tier no longer exists. Any non-explicit workspace value +// (NULL, row missing, garbled, DB error) resolves via constant_fallback. type BillingModeSource string const ( BillingModeSourceWorkspaceOverride BillingModeSource = "workspace_override" - BillingModeSourceOrgDefault BillingModeSource = "org_default" BillingModeSourceConstantFallback BillingModeSource = "constant_fallback" ) @@ -73,19 +85,23 @@ const ( // and the strip gate logs at INFO. The same struct is the unit-test fixture // shape, so the resolver test asserts both the mode AND the source per case // (catches a bug where the right mode is returned via the wrong layer). +// +// OrgDefault was removed alongside the org tier — the field would always be +// the constant "platform_managed" now, which is exactly the bootstrap floor +// already surfaced via BillingModeSourceConstantFallback. Removing it keeps +// the wire shape honest: nothing implies the org is a policy input. type BillingModeResolution struct { - WorkspaceID string `json:"workspace_id"` - ResolvedMode string `json:"resolved_mode"` - WorkspaceOverride *string `json:"workspace_override"` // nil = inherit - OrgDefault string `json:"org_default"` // already default-closed by CP - Source BillingModeSource `json:"source"` + WorkspaceID string `json:"workspace_id"` + ResolvedMode string `json:"resolved_mode"` + WorkspaceOverride *string `json:"workspace_override"` // nil = no explicit override + Source BillingModeSource `json:"source"` } // isKnownBillingMode is the enum-recognizer for the resolver's default-closed // branch. Returning false for an unknown string forces the resolver to fall -// through to the next layer (or the constant fallback) — NEVER to honor a -// garbled value as if it were valid. This is what makes a row with mode='byokk' -// (typo) resolve to platform_managed instead of accidentally to byok. +// through to the constant fallback — NEVER to honor a garbled value as if +// it were valid. This is what makes a row with mode='byokk' (typo) resolve +// to platform_managed instead of accidentally to byok. func isKnownBillingMode(s string) bool { switch s { case LLMBillingModePlatformManaged, LLMBillingModeBYOK, LLMBillingModeDisabled: @@ -95,47 +111,25 @@ func isKnownBillingMode(s string) bool { } } -// normalizeOrgDefault applies the same default-closed contract to the -// org-level input as the workspace override gets. The org_default arrives -// from tenant_config which already COALESCEs NULL → platform_managed at the -// CP SQL layer, but we DO NOT trust that contract here — if CP regresses or -// the tenant_config env wasn't populated (race on boot), we still default- -// close. Same principle: never honor a garbled value. -func normalizeOrgDefault(orgMode string) string { - if isKnownBillingMode(orgMode) { - return orgMode - } - return LLMBillingModePlatformManaged -} - // ResolveLLMBillingMode is the canonical resolver. Every code path that // previously gated on `os.Getenv("MOLECULE_LLM_BILLING_MODE") == "platform_managed"` -// must call this instead and gate on the returned mode. The architectural -// test (resolver_ast_test.go) asserts there is no remaining call site of -// the old shape outside the resolver-input wiring. +// must call this instead and gate on the returned mode. // // Returning an error does NOT prevent the caller from making a decision — // the returned mode is always a valid enum value (default-closed to // platform_managed) so the caller can proceed without a separate fail-closed // branch. The error is informational: log it, surface it to operators, but // the strip-gate decision is already safe. -func ResolveLLMBillingMode(ctx context.Context, workspaceID, orgMode string) (BillingModeResolution, error) { +func ResolveLLMBillingMode(ctx context.Context, workspaceID string) (BillingModeResolution, error) { res := BillingModeResolution{ - WorkspaceID: workspaceID, - OrgDefault: normalizeOrgDefault(orgMode), + WorkspaceID: workspaceID, + ResolvedMode: LLMBillingModePlatformManaged, + Source: BillingModeSourceConstantFallback, } if workspaceID == "" { // No workspace ID = pre-provision context (templating, validation). - // Resolve against the org default only, no DB read. - res.ResolvedMode = res.OrgDefault - res.Source = BillingModeSourceOrgDefault - if !isKnownBillingMode(orgMode) { - // Org default was garbled/NULL and we clamped to platform_managed. - // Mark the source as constant_fallback so the operator can see - // the clamp happened, not that the org "really" said platform_managed. - res.Source = BillingModeSourceConstantFallback - } + // Constant fallback is the only safe answer; there is no row to read. return res, nil } @@ -147,22 +141,15 @@ func ResolveLLMBillingMode(ctx context.Context, workspaceID, orgMode string) (Bi switch { case errors.Is(err, sql.ErrNoRows): - // Workspace row missing — concurrent delete, or pre-create call. Don't - // silently flip; fall through to org default. Source stays org_default - // so operators can see the row-missing case is being handled as a - // fallback, not a workspace-explicit decision. - res.ResolvedMode = res.OrgDefault - res.Source = BillingModeSourceOrgDefault - if !isKnownBillingMode(orgMode) { - res.Source = BillingModeSourceConstantFallback - } + // Workspace row missing — concurrent delete, or pre-create call. + // Default-closed to platform_managed; surface this via source= + // constant_fallback so operators can see the row-missing case is + // being handled as a fallback, not a workspace-explicit decision. return res, nil case err != nil: // DB error — default-closed to platform_managed AND propagate the // error so operators get a structured log line. The caller is // expected to log and continue with the safe default. - res.ResolvedMode = LLMBillingModePlatformManaged - res.Source = BillingModeSourceConstantFallback return res, fmt.Errorf("resolve workspace llm_billing_mode for %s: %w", workspaceID, err) } @@ -174,7 +161,7 @@ func ResolveLLMBillingMode(ctx context.Context, workspaceID, orgMode string) (Bi return res, nil } - // Override row present but the value is NULL or garbled. Fall through. + // Override row present but the value is NULL or garbled. Default-close. // If the value was non-NULL but garbled (CHECK constraint should prevent // this, but defense in depth — a future migration could relax the check // or another path could write the column directly), surface the raw @@ -183,25 +170,20 @@ func ResolveLLMBillingMode(ctx context.Context, workspaceID, orgMode string) (Bi raw := wsOverride.String res.WorkspaceOverride = &raw } - res.ResolvedMode = res.OrgDefault - res.Source = BillingModeSourceOrgDefault - if !isKnownBillingMode(orgMode) { - res.Source = BillingModeSourceConstantFallback - } return res, nil } // SetWorkspaceLLMBillingMode writes the override column. Pass mode=="" to -// clear (set to NULL = inherit). Validates the mode against the enum set -// so the route handler doesn't have to duplicate validation; a garbled -// mode round-trips as an explicit 400 from the caller, not a CHECK- -// constraint error from the DB driver. +// clear (set to NULL = resolve to the constant fallback). Validates the mode +// against the enum set so the route handler doesn't have to duplicate +// validation; a garbled mode round-trips as an explicit 400 from the caller, +// not a CHECK-constraint error from the DB driver. func SetWorkspaceLLMBillingMode(ctx context.Context, workspaceID, mode string) error { if workspaceID == "" { return errors.New("SetWorkspaceLLMBillingMode: workspace id required") } if mode == "" { - // NULL = inherit. Caller asked to clear the override. + // NULL = constant fallback. Caller asked to clear the override. res, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET llm_billing_mode = NULL WHERE id = $1`, workspaceID, diff --git a/workspace-server/internal/handlers/llm_billing_mode_handler.go b/workspace-server/internal/handlers/llm_billing_mode_handler.go index 6fe7cbc76..d8761921c 100644 --- a/workspace-server/internal/handlers/llm_billing_mode_handler.go +++ b/workspace-server/internal/handlers/llm_billing_mode_handler.go @@ -2,7 +2,7 @@ package handlers // llm_billing_mode_handler.go — workspace-server admin routes that read / // write the per-workspace billing mode override (internal#691). These are -// the per-tenant routes that CP's new /cp/admin/workspaces/:id/llm-billing-mode +// the per-tenant routes that CP's /cp/admin/workspaces/:id/llm-billing-mode // proxies to; the canvas hits them via the CP route, not directly. // // Route shape: @@ -28,7 +28,6 @@ import ( "errors" "io" "net/http" - "os" "strings" "github.com/gin-gonic/gin" @@ -36,18 +35,16 @@ import ( // GetWorkspaceLLMBillingMode handles GET /admin/workspaces/:id/llm-billing-mode. // -// Reads the workspace override + the org-level default (from the same -// MOLECULE_LLM_BILLING_MODE env var the provisioner reads at strip-gate time — -// keeps the two paths consistent so the GET result matches what the strip -// gate would compute) and returns the structured resolution. +// Reads only the workspace override; there is no org tier (per CTO direction +// 2026-05-26: the workspace is the unit of decision). NULL / row-missing / +// garbled rows resolve via the constant fallback to platform_managed. func GetWorkspaceLLMBillingMode(c *gin.Context) { workspaceID := strings.TrimSpace(c.Param("id")) if !uuidRegex.MatchString(workspaceID) { c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace id"}) return } - orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE"))) - res, err := ResolveLLMBillingMode(c.Request.Context(), workspaceID, orgMode) + res, err := ResolveLLMBillingMode(c.Request.Context(), workspaceID) if err != nil { // Resolver returns a safe default-closed mode alongside the error; // surface the error so the operator sees the DB issue, but the @@ -67,9 +64,10 @@ func GetWorkspaceLLMBillingMode(c *gin.Context) { // PutWorkspaceLLMBillingMode handles PUT /admin/workspaces/:id/llm-billing-mode. // // Body shape: {"mode": "byok" | "platform_managed" | "disabled" | null} -// where null clears the override (workspace inherits the org default again). -// Omitting "mode" entirely is a 400 — callers must be explicit about whether -// they want to set or clear, so a typo'd field name can't silently no-op. +// where null clears the override (workspace resolves to the constant +// fallback). Omitting "mode" entirely is a 400 — callers must be explicit +// about whether they want to set or clear, so a typo'd field name can't +// silently no-op. // // On success returns the post-write resolution so the canvas can re-render // without a follow-up GET. @@ -138,8 +136,7 @@ func PutWorkspaceLLMBillingMode(c *gin.Context) { } // Read back the resolution so the response reflects post-write state. - orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE"))) - res, resolveErr := ResolveLLMBillingMode(c.Request.Context(), workspaceID, orgMode) + res, resolveErr := ResolveLLMBillingMode(c.Request.Context(), workspaceID) if resolveErr != nil { // Write succeeded but readback failed — still return 200 with the // best-effort resolution; the safe default is set even on error. diff --git a/workspace-server/internal/handlers/llm_billing_mode_handler_test.go b/workspace-server/internal/handlers/llm_billing_mode_handler_test.go index 342b4f28e..95765fda5 100644 --- a/workspace-server/internal/handlers/llm_billing_mode_handler_test.go +++ b/workspace-server/internal/handlers/llm_billing_mode_handler_test.go @@ -11,6 +11,10 @@ package handlers // constraint round-trip (matters because the error message must be // actionable to a canvas user) // - 404 propagates when the workspace row is missing on a set/clear +// +// Post-CTO-simplification (2026-05-26): the org tier no longer participates +// in the resolution; tests that exercised the org-default source now assert +// the constant-fallback source instead. import ( "bytes" @@ -29,10 +33,9 @@ func init() { const testWSID = "44444444-4444-4444-4444-444444444444" -func TestGetWorkspaceLLMBillingMode_HappyPath_InheritsOrgDefault(t *testing.T) { - t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModeBYOK) +func TestGetWorkspaceLLMBillingMode_HappyPath_NullRowFallsThroughToConstant(t *testing.T) { mock := setupTestDB(t) - // Workspace has no override → resolver returns org_default = byok. + // Workspace has no override → resolver returns constant fallback = platform_managed. mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). WithArgs(testWSID). WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil)) @@ -51,11 +54,11 @@ func TestGetWorkspaceLLMBillingMode_HappyPath_InheritsOrgDefault(t *testing.T) { if err := json.Unmarshal(w.Body.Bytes(), &res); err != nil { t.Fatalf("decode: %v", err) } - if res.ResolvedMode != LLMBillingModeBYOK { - t.Errorf("resolved mode: got %q want %q", res.ResolvedMode, LLMBillingModeBYOK) + if res.ResolvedMode != LLMBillingModePlatformManaged { + t.Errorf("resolved mode: got %q want %q", res.ResolvedMode, LLMBillingModePlatformManaged) } - if res.Source != BillingModeSourceOrgDefault { - t.Errorf("source: got %q want %q", res.Source, BillingModeSourceOrgDefault) + if res.Source != BillingModeSourceConstantFallback { + t.Errorf("source: got %q want %q", res.Source, BillingModeSourceConstantFallback) } if res.WorkspaceOverride != nil { t.Errorf("expected nil override, got %v", *res.WorkspaceOverride) @@ -75,7 +78,6 @@ func TestGetWorkspaceLLMBillingMode_BadUUID_400(t *testing.T) { } func TestPutWorkspaceLLMBillingMode_SetByok(t *testing.T) { - t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) mock := setupTestDB(t) mock.ExpectExec(`UPDATE workspaces SET llm_billing_mode = \$1 WHERE id = \$2`). WithArgs(LLMBillingModeBYOK, testWSID). @@ -112,7 +114,6 @@ func TestPutWorkspaceLLMBillingMode_SetByok(t *testing.T) { } func TestPutWorkspaceLLMBillingMode_ExplicitNullClearsOverride(t *testing.T) { - t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) mock := setupTestDB(t) mock.ExpectExec(`UPDATE workspaces SET llm_billing_mode = NULL WHERE id = \$1`). WithArgs(testWSID). @@ -142,8 +143,8 @@ func TestPutWorkspaceLLMBillingMode_ExplicitNullClearsOverride(t *testing.T) { if res.ResolvedMode != LLMBillingModePlatformManaged { t.Errorf("post-clear resolved: got %q want %q", res.ResolvedMode, LLMBillingModePlatformManaged) } - if res.Source != BillingModeSourceOrgDefault { - t.Errorf("post-clear source: got %q want %q", res.Source, BillingModeSourceOrgDefault) + if res.Source != BillingModeSourceConstantFallback { + t.Errorf("post-clear source: got %q want %q", res.Source, BillingModeSourceConstantFallback) } if res.WorkspaceOverride != nil { t.Errorf("post-clear override should be nil, got %v", *res.WorkspaceOverride) diff --git a/workspace-server/internal/handlers/llm_billing_mode_test.go b/workspace-server/internal/handlers/llm_billing_mode_test.go index aa4b1cac2..88209551d 100644 --- a/workspace-server/internal/handlers/llm_billing_mode_test.go +++ b/workspace-server/internal/handlers/llm_billing_mode_test.go @@ -5,6 +5,11 @@ package handlers // branch in the default-closed contract; if one of them flips behavior // later the test names will tell the reviewer exactly which RFC clause // regressed. +// +// Post-CTO-simplification (2026-05-26): the org tier was removed. Cases +// that previously exercised org-fallback paths now exercise only the +// workspace-level path; the org-as-policy-input scenarios are GONE +// because the org no longer participates in the resolution. import ( "context" @@ -22,17 +27,17 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) { mode string source BillingModeSource // hasOverride asserts whether the resolver surfaced the override - // value in the result (nil pointer = clean inherit, non-nil = the - // row was present even if it ultimately fell through because it - // was garbled). Lets us distinguish "row missing, fell through" - // from "row present but garbled, fell through" — both resolve to - // the same mode but the resolver tells operators which case it was. + // value in the result (nil pointer = no explicit override / clean + // fallback, non-nil = the row was present even if it ultimately + // fell through because it was garbled). Lets us distinguish + // "row missing, fell through" from "row present but garbled, fell + // through" — both resolve to the same mode but the resolver tells + // operators which case it was. hasOverride bool } type tc struct { name string workspaceID string - orgMode string setupMock func(m sqlmock.Sqlmock) want want wantErr bool @@ -40,9 +45,8 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) { cases := []tc{ { - name: "workspace_override_byok_overrides_pm_org", + name: "workspace_override_byok", workspaceID: wsID, - orgMode: LLMBillingModePlatformManaged, setupMock: func(m sqlmock.Sqlmock) { m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). WithArgs(wsID). @@ -51,9 +55,8 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) { want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceWorkspaceOverride, hasOverride: true}, }, { - name: "workspace_override_disabled_overrides_pm_org", + name: "workspace_override_disabled", workspaceID: wsID, - orgMode: LLMBillingModePlatformManaged, setupMock: func(m sqlmock.Sqlmock) { m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). WithArgs(wsID). @@ -62,31 +65,28 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) { want: want{mode: LLMBillingModeDisabled, source: BillingModeSourceWorkspaceOverride, hasOverride: true}, }, { - name: "workspace_override_null_inherits_byok_org", + name: "workspace_override_explicit_platform_managed", + workspaceID: wsID, + setupMock: func(m sqlmock.Sqlmock) { + m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModePlatformManaged)) + }, + want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceWorkspaceOverride, hasOverride: true}, + }, + { + name: "workspace_override_null_falls_through_to_constant", workspaceID: wsID, - orgMode: LLMBillingModeBYOK, setupMock: func(m sqlmock.Sqlmock) { m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). WithArgs(wsID). WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil)) }, - want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceOrgDefault, hasOverride: false}, + want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceConstantFallback, hasOverride: false}, }, { - name: "workspace_override_null_inherits_pm_org", + name: "workspace_override_garbled_falls_through_DEFAULT_CLOSED", workspaceID: wsID, - orgMode: LLMBillingModePlatformManaged, - setupMock: func(m sqlmock.Sqlmock) { - m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). - WithArgs(wsID). - WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil)) - }, - want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceOrgDefault, hasOverride: false}, - }, - { - name: "workspace_override_garbled_falls_through_to_pm_org_DEFAULT_CLOSED", - workspaceID: wsID, - orgMode: LLMBillingModePlatformManaged, setupMock: func(m sqlmock.Sqlmock) { // CHECK constraint would normally prevent this but if a future // migration loosens it (or a direct UPDATE bypasses it on a @@ -97,60 +97,40 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) { WithArgs(wsID). WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("byokk")) }, - want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceOrgDefault, hasOverride: true}, - }, - { - name: "workspace_override_garbled_org_garbled_constant_fallback", - workspaceID: wsID, - orgMode: "garbled-or-empty", - setupMock: func(m sqlmock.Sqlmock) { - m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). - WithArgs(wsID). - WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("nonsense")) - }, - // Both layers garbled → constant fallback. Source is constant_fallback - // so operators can see the org-default-was-also-bad case explicitly. + // hasOverride=true because the resolver surfaces the garbled + // raw value so operators can spot the corrupt row, but the + // resolved mode is still the constant fallback. want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceConstantFallback, hasOverride: true}, }, { - name: "workspace_row_missing_falls_through_to_org_byok", + name: "workspace_row_missing_falls_through_to_constant", workspaceID: wsID, - orgMode: LLMBillingModeBYOK, setupMock: func(m sqlmock.Sqlmock) { m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). WithArgs(wsID). WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"})) }, - want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceOrgDefault, hasOverride: false}, + want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceConstantFallback, hasOverride: false}, }, { - name: "workspace_id_empty_pre_provision_org_only", + name: "workspace_id_empty_pre_provision_constant_fallback", workspaceID: "", - orgMode: LLMBillingModeBYOK, setupMock: func(m sqlmock.Sqlmock) { /* no DB read expected — empty ws id short-circuits */ }, - want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceOrgDefault, hasOverride: false}, - }, - { - name: "workspace_id_empty_org_garbled_constant_fallback", - workspaceID: "", - orgMode: "", - setupMock: func(m sqlmock.Sqlmock) { /* no DB read */ }, want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceConstantFallback, hasOverride: false}, }, { name: "db_error_default_closed_to_pm_with_error", workspaceID: wsID, - orgMode: LLMBillingModeBYOK, // org says byok but DB errored — DO NOT honor org setupMock: func(m sqlmock.Sqlmock) { m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). WithArgs(wsID). WillReturnError(errors.New("connection refused")) }, - // Critical: even though orgMode=byok, a DB error means we can't - // confirm the workspace doesn't have an override, so we default - // to the closed mode. This is the safer of the two failures — - // silently flipping to org-byok on a DB error would leak the - // OAuth-keeping behavior to workspaces whose row says NULL. + // Critical: a DB error means we can't confirm the workspace + // doesn't have an override, so we default to the closed mode. + // This is the safer of the two failures — silently flipping to + // byok on a DB error would leak the OAuth-keeping behavior to + // workspaces whose row says NULL. want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceConstantFallback, hasOverride: false}, wantErr: true, }, @@ -161,7 +141,7 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) { mock := setupTestDB(t) c.setupMock(mock) - res, err := ResolveLLMBillingMode(ctx, c.workspaceID, c.orgMode) + res, err := ResolveLLMBillingMode(ctx, c.workspaceID) if (err != nil) != c.wantErr { t.Fatalf("err: got %v wantErr=%v", err, c.wantErr) } @@ -191,14 +171,14 @@ func TestResolveLLMBillingMode_ResolvedModeIsAlwaysValid(t *testing.T) { ctx := context.Background() const wsID = "22222222-2222-2222-2222-222222222222" - // Throw a pathological row at the resolver: garbled override + garbled - // org default. Resolved mode must still be a recognized enum. + // Throw a pathological row at the resolver: garbled override. + // Resolved mode must still be a recognized enum. mock := setupTestDB(t) mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). WithArgs(wsID). WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("totally-bogus")) - res, err := ResolveLLMBillingMode(ctx, wsID, "also-bogus") + res, err := ResolveLLMBillingMode(ctx, wsID) if err != nil { t.Fatalf("unexpected err: %v", err) } @@ -206,7 +186,7 @@ func TestResolveLLMBillingMode_ResolvedModeIsAlwaysValid(t *testing.T) { t.Errorf("post-condition violated: resolved mode %q is not a known enum value", res.ResolvedMode) } if res.ResolvedMode != LLMBillingModePlatformManaged { - t.Errorf("default-closed contract: garbled-x-garbled must resolve to platform_managed, got %q", res.ResolvedMode) + t.Errorf("default-closed contract: garbled override must resolve to platform_managed, got %q", res.ResolvedMode) } } diff --git a/workspace-server/internal/handlers/secrets.go b/workspace-server/internal/handlers/secrets.go index 27a259fa2..3c662a536 100644 --- a/workspace-server/internal/handlers/secrets.go +++ b/workspace-server/internal/handlers/secrets.go @@ -5,7 +5,6 @@ import ( "database/sql" "log" "net/http" - "os" "regexp" "strings" @@ -48,37 +47,32 @@ func isPlatformManagedDirectLLMBypassKey(key string) bool { return ok } -// platformManagedLLMModeForWorkspace replaces the org-level platformManagedLLMMode -// gate with a per-workspace resolved-mode check (internal#691). The strip-list -// is enforced ONLY when this specific workspace's resolved mode is -// platform_managed — a workspace with a byok override is allowed to write its -// own CLAUDE_CODE_OAUTH_TOKEN / vendor key via the canvas Secrets tab. +// platformManagedLLMModeForWorkspace is the per-workspace strip-gate check. +// The strip-list is enforced ONLY when this specific workspace's resolved +// mode is platform_managed — a workspace with a byok override is allowed +// to write its own CLAUDE_CODE_OAUTH_TOKEN / vendor key via the canvas +// Secrets tab. // // Default-closed: if the resolver hits a DB error, falls back to // platform_managed (the safe-default behavior), so a transient DB failure // during a secret write still rejects the bypass-list keys — fail safer not // freer. This matches the resolver's documented contract. +// +// Post-CTO-simplification (2026-05-26): there is no longer an org-tier +// fallback. The resolver consults only the workspace row, defaulting to +// platform_managed when the row is NULL/missing/garbled. func platformManagedLLMModeForWorkspace(c *gin.Context, workspaceID string) bool { - orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE"))) - res, err := ResolveLLMBillingMode(c.Request.Context(), workspaceID, orgMode) + res, err := ResolveLLMBillingMode(c.Request.Context(), workspaceID) if err != nil { log.Printf("secrets: resolve billing mode for workspace=%s failed: %v (defaulting to platform_managed for safety)", workspaceID, err) } return strings.EqualFold(res.ResolvedMode, LLMBillingModePlatformManaged) } -// platformManagedLLMMode is the legacy org-level gate retained for any test -// harness still asserting the env-var-only behavior. Production code paths -// must call platformManagedLLMModeForWorkspace instead so a workspace-level -// byok override actually takes effect on the secrets-write path. -func platformManagedLLMMode() bool { - return strings.EqualFold(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")), "platform_managed") -} - -// rejectPlatformManagedDirectLLMBypassForWorkspace is the per-workspace -// successor to rejectPlatformManagedDirectLLMBypass (internal#691). The -// strip-list ONLY applies when this specific workspace resolves to -// platform_managed; byok/disabled workspaces can write their own vendor keys. +// rejectPlatformManagedDirectLLMBypassForWorkspace gates per-workspace +// vendor-key writes. The strip-list ONLY applies when this specific +// workspace resolves to platform_managed; byok/disabled workspaces can +// write their own vendor keys. func rejectPlatformManagedDirectLLMBypassForWorkspace(c *gin.Context, workspaceID, key string) bool { if !platformManagedLLMModeForWorkspace(c, workspaceID) || !isPlatformManagedDirectLLMBypassKey(key) { return false @@ -91,22 +85,6 @@ func rejectPlatformManagedDirectLLMBypassForWorkspace(c *gin.Context, workspaceI return true } -// rejectPlatformManagedDirectLLMBypass is the legacy org-level shim. Retained -// only for backwards compatibility with any external/test caller still on the -// old shape; new code MUST use the per-workspace variant above. Production -// code paths (the secrets.go handlers + workspace.go create-secret path) all -// switched in internal#691. -func rejectPlatformManagedDirectLLMBypass(c *gin.Context, key string) bool { - if !platformManagedLLMMode() || !isPlatformManagedDirectLLMBypassKey(key) { - return false - } - c.JSON(http.StatusBadRequest, gin.H{ - "error": "direct Hermes custom provider secrets are blocked for platform-managed LLM workspaces; use MODEL/LLM_PROVIDER or the platform LLM proxy env instead", - "key": key, - }) - return true -} - type SecretsHandler struct { restartFunc func(workspaceID string) // Optional: auto-restart after secret change } @@ -512,9 +490,12 @@ func (h *SecretsHandler) SetGlobal(c *gin.Context) { c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"}) return } - if rejectPlatformManagedDirectLLMBypass(c, body.Key) { - return - } + // internal#691 follow-up: there is no longer an org-tier billing mode. + // Global secret writes are unconditionally allowed; per-workspace + // platform_managed strip happens at provision time in + // applyPlatformManagedLLMEnv (workspace_provision.go), which will drop + // any conflicting global LLM key for workspaces resolving to + // platform_managed without affecting byok workspaces. encrypted, err := crypto.Encrypt([]byte(body.Value)) if err != nil { diff --git a/workspace-server/internal/handlers/workspace_test.go b/workspace-server/internal/handlers/workspace_test.go index f10bcfb1e..fed778149 100644 --- a/workspace-server/internal/handlers/workspace_test.go +++ b/workspace-server/internal/handlers/workspace_test.go @@ -501,10 +501,11 @@ func TestWorkspaceCreate_WithSecrets_Persists(t *testing.T) { // while persisting a secret causes the entire transaction to roll back and // the handler to return 500. The workspace row must NOT be committed. func TestWorkspaceCreate_SecretPersistFails_RollsBack(t *testing.T) { - // internal#691: see TestExtended_SecretsSet — same default-closed reasoning. - // This test is asserting the rollback path on DB failure, not the strip gate; - // keep the org in byok so the OPENAI_API_KEY write reaches the INSERT. - t.Setenv("MOLECULE_LLM_BILLING_MODE", "byok") + // internal#691 follow-up: see TestExtended_SecretsSet — the per-workspace + // resolver consults only the workspace row. This test is asserting the + // rollback path on DB failure, not the strip gate, so the workspace + // row mock below returns an explicit byok override and the OPENAI_API_KEY + // write reaches the INSERT-and-fail path. mock := setupTestDB(t) setupTestRedis(t) broadcaster := newTestBroadcaster() @@ -516,11 +517,11 @@ func TestWorkspaceCreate_SecretPersistFails_RollsBack(t *testing.T) { // internal#691: Create() now resolves billing mode per-workspace before // the secret-strip gate. The workspace row was just inserted in the same // transaction so it isn't readable from a separate query yet; the - // resolver expects the SELECT and the mock returns no row → falls back - // to the org default (byok, set above) so the OPENAI_API_KEY write - // reaches the INSERT-and-fail path this test exercises. + // resolver expects the SELECT and the mock returns an explicit byok + // override so the OPENAI_API_KEY write reaches the INSERT-and-fail path + // this test exercises. mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). - WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"})) + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK)) mock.ExpectExec("INSERT INTO workspace_secrets"). WillReturnError(sql.ErrConnDone) // DB failure while writing secret mock.ExpectRollback() // workspace insert must be rolled back -- 2.52.0 From e910510a4a8efb01dd720fc092ab37b02eb98e42 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Wed, 27 May 2026 23:44:35 +0000 Subject: [PATCH 2/2] fix(merge): resolve compilation errors from main-merge (#1963) The rebase onto main pulled in #1963 which added new callers of ResolveLLMBillingMode with the old 3-argument signature. #1930 changed the signature to 2 arguments and removed OrgDefault from BillingModeResolution. This commit updates the new callers. - secrets.go: drop orgMode argument, remove unused os import - workspace_provision.go: drop orgMode argument, remove OrgDefault from log --- workspace-server/internal/handlers/secrets.go | 3 +-- workspace-server/internal/handlers/workspace_provision.go | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/workspace-server/internal/handlers/secrets.go b/workspace-server/internal/handlers/secrets.go index 3c662a536..71612512d 100644 --- a/workspace-server/internal/handlers/secrets.go +++ b/workspace-server/internal/handlers/secrets.go @@ -310,8 +310,7 @@ func (h *SecretsHandler) Values(c *gin.Context) { // garbled value to platform_managed, so a transient failure leaves the // existing (global-inheriting) behavior in place rather than stripping a // platform_managed workspace's creds. - orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE"))) - res, resolveErr := ResolveLLMBillingMode(ctx, workspaceID, orgMode) + res, resolveErr := ResolveLLMBillingMode(ctx, workspaceID) if resolveErr != nil { log.Printf("secrets.Values: resolve billing mode workspace=%s err=%v (defaulting to platform_managed)", workspaceID, resolveErr) } diff --git a/workspace-server/internal/handlers/workspace_provision.go b/workspace-server/internal/handlers/workspace_provision.go index 6f12b3453..8a70409ff 100644 --- a/workspace-server/internal/handlers/workspace_provision.go +++ b/workspace-server/internal/handlers/workspace_provision.go @@ -984,15 +984,14 @@ type platformLLMEnvResult struct { } func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, globalKeys map[string]struct{}, workspaceID, runtime, model string) platformLLMEnvResult { - orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE"))) - res, resolveErr := ResolveLLMBillingMode(ctx, workspaceID, orgMode) + res, resolveErr := ResolveLLMBillingMode(ctx, workspaceID) if resolveErr != nil { // resolveErr != nil ⇒ resolver hit a DB error AND already defaulted // res.ResolvedMode to platform_managed. Log + proceed; the safe default // is already in place, no early return needed. log.Printf("workspace_provision: resolve billing mode workspace=%s err=%v (defaulting to platform_managed)", workspaceID, resolveErr) } - log.Printf("workspace_provision: billing mode workspace=%s resolved=%s source=%s org_default=%s", workspaceID, res.ResolvedMode, res.Source, res.OrgDefault) + log.Printf("workspace_provision: billing mode workspace=%s resolved=%s source=%s", workspaceID, res.ResolvedMode, res.Source) // internal#703: MOLECULE_LLM_BILLING_MODE in the container must reflect the // RESOLVED per-workspace mode, not a hardcoded literal. Pre-fix this var was // only emitted (hardcoded "platform_managed") on the strip path below, so a -- 2.52.0