fix(billing): per-workspace SSOT — remove org-level billing mode entirely (core#2594 follow-up) #2672

Merged
devops-engineer merged 1 commits from fix/2594-followup-per-workspace-byok-ssot into main 2026-06-12 22:46:57 +00:00
13 changed files with 454 additions and 441 deletions
@@ -255,20 +255,24 @@ func TestExtended_SecretsListEmpty(t *testing.T) {
// ---------- TestSecretsSet (Extended) ----------
func TestExtended_SecretsSet(t *testing.T) {
// internal#718 P2-B: the per-workspace strip gate keys off the DERIVED mode
// (org rung retired). This test's intent is the happy path of persisting a
// vendor key on a byok workspace; the realistic way a workspace is byok for
// a direct vendor-key write is an explicit operator override (the escape
// hatch the reject error itself points to: PUT /admin/.../llm-billing-mode).
// The override short-circuits the resolver to byok in a single read, so the
// bypass-list check is skipped and the write proceeds.
t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed") // org env ignored now
// The per-workspace key-write guard keys off the workspace's SELECTED model:
// a VENDOR model (kimi-for-coding → kimi-coding, IsPlatform=false) is a BYOK
// workspace, so writing a vendor key proceeds. Guard query order: runtime →
// secrets (MODEL) → (vendor only) override. (A platform model would block —
// see TestPlatformManagedLLMModeForWorkspace_GatesOnModelNotResolvedMode.)
mock := setupTestDB(t)
handler := NewSecretsHandler(nil)
mock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs("22222222-2222-2222-2222-222222222222").
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs("22222222-2222-2222-2222-222222222222").
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("MODEL", []byte("kimi-for-coding"), 0))
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs("22222222-2222-2222-2222-222222222222").
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
// Expect INSERT (encrypted value is dynamic, use AnyArg)
mock.ExpectExec("INSERT INTO workspace_secrets").
@@ -1,42 +1,33 @@
package handlers
// llm_billing_mode.go — per-workspace LLM billing mode resolution (internal#691).
// llm_billing_mode.go — PER-WORKSPACE LLM billing mode resolution.
//
// The resolver answers a single question at provision time:
// "Should we strip CLAUDE_CODE_OAUTH_TOKEN + every vendor key from this
// workspace's env, force-route to the CP proxy, and bill org credits?"
// workspace's env, force-route to the CP proxy, and bill platform credits?"
//
// That question used to be a single env-var read inside applyPlatformManagedLLMEnv:
// SSOT (CTO 2026-06-12 — "no org default; it's all per workspace; a workspace
// defaults to platform but I can switch it to BYOK anytime"). There is NO
// org-level billing mode anywhere: the old org-env MOLECULE_LLM_BILLING_MODE
// input, its org_default machinery, and the org_default response field are all
// retired. Resolution is purely per-workspace, in precedence order:
//
// os.Getenv("MOLECULE_LLM_BILLING_MODE") == "platform_managed" → strip
// 1. workspaces.llm_billing_mode — explicit per-WORKSPACE operator override.
// 2. DERIVE from the workspace's (runtime, model) via the provider registry
// (the one SSOT, providers.DeriveProvider):
// - model → closed `platform` provider → platform_managed
// - model → a vendor AND the workspace holds its OWN
// matching key (availableAuthEnv) → byok
// - model → a vendor but NO matching key → the DEPLOYMENT
// default (PlatformManagedProxyConfigured: proxy wired → platform_managed,
// self-host → byok). This is a DEPLOY fact, NOT an org setting, and is
// what makes a keyless workspace "default to platform".
// 3. no model / unknown runtime / unregistered / ambiguous → deployment default.
//
// where MOLECULE_LLM_BILLING_MODE was an ORG-level value, fetched from CP's
// tenant_config and exported into the workspace-server process at boot. That
// shape made it impossible to mix billing modes across workspaces in the same
// org: turning the org dial to `byok` so one workspace could keep its OAuth
// stops the strip for EVERY workspace in the org. Turning it to `platform_managed`
// blocks every workspace's own OAuth/vendor keys.
//
// The resolver replaces the env-var read with a per-workspace lookup:
//
// workspaces.llm_billing_mode (per-workspace override, NULLABLE)
// ?? organizations.llm_billing_mode (org default, fetched via tenant_config)
// ?? "platform_managed" (closed default — the existing implicit default)
//
// Default-closed contract — non-negotiable per the RFC Safety axis:
//
// - workspace row missing (sql.ErrNoRows) → fall through to org default
// - DB error on the lookup → "platform_managed" + propagated error
// - workspace override = NULL → fall through to org default
// - workspace override = unknown string → "platform_managed" (default-closed)
// - org default = NULL / empty / unknown string → "platform_managed" (closed default)
// - org default = recognized non-pm string + ws null → org default (byok/disabled honored)
//
// The ONLY way to resolve to "byok" or "disabled" is an explicit, recognized
// string in the workspace override OR the org default. A NULL JOIN, transient
// resolver error, or garbled enum value MUST NOT silently flip a workspace
// off of platform_managed — that would shadow the org's billing policy and
// is the exact failure mode the RFC's Safety hot-spot calls out.
// Default-closed contract: a transient DB error or a garbled override value
// resolves to the deployment default (platform_managed when a proxy is wired) —
// never a silent flip to byok. The ONLY way to byok is an explicit override OR a
// derived vendor provider whose own key is present.
import (
"context"
@@ -44,7 +35,6 @@ import (
"errors"
"fmt"
"log"
"strings"
"sync"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
@@ -96,7 +86,6 @@ type BillingModeSource string
const (
BillingModeSourceWorkspaceOverride BillingModeSource = "workspace_override"
BillingModeSourceOrgDefault BillingModeSource = "org_default"
BillingModeSourceConstantFallback BillingModeSource = "constant_fallback"
// BillingModeSourceDerivedProvider means the mode was DERIVED from the
// workspace's (runtime, model) via the provider registry — the SSOT
@@ -120,8 +109,7 @@ const (
type BillingModeResolution struct {
WorkspaceID string `json:"workspace_id"`
ResolvedMode string `json:"resolved_mode"`
WorkspaceOverride *string `json:"workspace_override"` // nil = inherit
OrgDefault string `json:"org_default"` // Org-level default delivered via tenant_config / MOLECULE_LLM_BILLING_MODE. Consulted in the decision when no workspace override exists (core#2608).
WorkspaceOverride *string `json:"workspace_override"` // nil = inherit (derive from model)
Source BillingModeSource `json:"source"`
// ProviderSelection surfaces the DERIVED provider name (internal#718 P2-B)
// when the mode came from the registry derivation — the literal provider the
@@ -170,30 +158,6 @@ func isKnownBillingMode(s string) bool {
}
}
// orgDefaultForDisplay normalizes the raw org-mode string for the wire-format
// OrgDefault field. Returns the recognized lower-case value when known, else
// the closed platform default. This keeps the admin route response honest
// about which org default was actually consulted.
func orgDefaultForDisplay(orgMode string) string {
mode := strings.ToLower(strings.TrimSpace(orgMode))
if isKnownBillingMode(mode) {
return mode
}
return LLMBillingModePlatformManaged
}
// recognizedOrgDefault returns the normalized, recognized org default value
// and true when orgMode is a known billing mode (after trimming whitespace and
// lower-casing). Callers use this single normalization point so the decision
// semantics and the display value cannot drift.
func recognizedOrgDefault(orgMode string) (string, bool) {
mode := strings.ToLower(strings.TrimSpace(orgMode))
if isKnownBillingMode(mode) {
return mode, true
}
return LLMBillingModePlatformManaged, false
}
// readWorkspaceBillingOverride reads the OPTIONAL explicit operator override
// (workspaces.llm_billing_mode). Returns:
//
@@ -223,19 +187,26 @@ func readWorkspaceBillingOverride(ctx context.Context, workspaceID string) (stri
}
// ResolveLLMBillingModeDerived is the SSOT billing-mode resolver (internal#718
// P2-B + core#2608). It consults (in precedence order):
// P2-B + core#2608 + the 2026-06-12 per-workspace-BYOK precedence change). It
// consults (in precedence order):
//
// 1. EXPLICIT workspace operator override (workspaces.llm_billing_mode).
// 2. ORG default (passed via tenant_config / MOLECULE_LLM_BILLING_MODE). A
// recognized org default wins over provider derivation so a SaaS org pinned
// to platform_managed is not flipped to byok for models whose provider is
// not the closed `platform` provider (core#2608 first-run failure).
// 3. DERIVE: providers.DeriveProvider(runtime, model, availableAuthEnv).
// - resolves to the closed `platform` provider → platform_managed
// - resolves to any other (BYOK/third-party) provider → byok
// 4. DEFAULT-CLOSED: derive fails or org default is absent/unrecognized →
// platform_managed when a platform proxy is configured, else byok on
// self-host.
// 2. DERIVE: providers.DeriveProvider(runtime, model, availableAuthEnv) — the
// per-workspace SSOT (CTO 2026-06-12: "no org default; it's all per
// workspace; a workspace defaults to platform but I can switch it to BYOK
// anytime"). The org default is NO LONGER a blanket short-circuit over
// derivation:
// - model → the closed `platform` provider → platform_managed (proxy).
// - model → a specific vendor AND the workspace holds its OWN matching
// key (availableAuthEnv) → byok. The customer's own key IS the explicit
// BYOK choice and WINS over the org default.
// - model → a specific vendor but NO matching key → fall back to the ORG
// DEFAULT (managed billing via the proxy). Preserves core#2608: a fresh
// tenant on a platform_managed org default whose workspace has no key
// keeps working on the proxy instead of failing MISSING_BYOK_CREDENTIAL.
// 3. DEFAULT: derive fails (unregistered/ambiguous/no model) → org default when
// recognized, else default-closed (platform_managed with a proxy, byok on
// self-host).
//
// availableAuthEnv is the set of auth-env-var NAMES present for the workspace
// (never secret values) — the same disambiguation input DeriveProvider uses to
@@ -243,95 +214,73 @@ func readWorkspaceBillingOverride(ctx context.Context, workspaceID string) (stri
//
// A returned error never prevents a decision: ResolvedMode is always a valid
// enum value (default-closed). The error is informational (log + surface).
func ResolveLLMBillingModeDerived(ctx context.Context, workspaceID, runtime, model, orgMode string, availableAuthEnv []string) (BillingModeResolution, error) {
res := BillingModeResolution{
WorkspaceID: workspaceID,
// OrgDefault reflects the passed org default for wire-compat /
// observability. It is consulted in the decision when no workspace
// override exists (core#2608).
OrgDefault: orgDefaultForDisplay(orgMode),
}
// Normalize once and use the same value for both the decision and the
// empty-workspace path so callers cannot accidentally skip an org default
// because of casing or whitespace.
orgDefault, orgDefaultOK := recognizedOrgDefault(orgMode)
func ResolveLLMBillingModeDerived(ctx context.Context, workspaceID, runtime, model string, availableAuthEnv []string) (BillingModeResolution, error) {
res := BillingModeResolution{WorkspaceID: workspaceID}
// Pre-provision context (no workspace row yet): no override to read.
// A recognized org default still applies (no DB needed); otherwise default
// closed. This path is reached from ResolveLLMBillingMode with an empty id.
if workspaceID == "" {
if orgDefaultOK {
res.ResolvedMode = orgDefault
res.Source = BillingModeSourceOrgDefault
// Precedence 1: explicit per-workspace operator override (workspaces
// .llm_billing_mode). This is the ONLY non-derived input — and it is
// per-WORKSPACE, not org-level. Skipped in the pre-provision templating path
// (empty id), which derives straight from the passed model.
if workspaceID != "" {
if mode, ok, err := readWorkspaceBillingOverride(ctx, workspaceID); err != nil {
// DB error — default closed AND propagate (never flip on a transient error).
res.ResolvedMode = defaultClosedBillingMode()
res.Source = BillingModeSourceConstantFallback
return res, err
} else if ok {
m := mode
res.WorkspaceOverride = &m
res.ResolvedMode = mode
res.Source = BillingModeSourceWorkspaceOverride
return res, nil
}
res.ResolvedMode = defaultClosedBillingMode()
res.Source = BillingModeSourceDerivedDefault
return res, nil
}
// Precedence 1: explicit operator override.
if mode, ok, err := readWorkspaceBillingOverride(ctx, workspaceID); err != nil {
// DB error — default closed AND propagate (never flip on a transient error).
res.ResolvedMode = LLMBillingModePlatformManaged
res.Source = BillingModeSourceConstantFallback
return res, err
} else if ok {
m := mode
res.WorkspaceOverride = &m
res.ResolvedMode = mode
res.Source = BillingModeSourceWorkspaceOverride
return res, nil
}
// Precedence 2: org default. A recognized org-level default (delivered via
// tenant_config / MOLECULE_LLM_BILLING_MODE) wins over provider derivation
// so a SaaS org pinned to platform_managed does not get flipped to byok for
// models whose provider is not the closed `platform` provider. This closes
// core#2608: fresh SaaS tenants with platform_managed org default failed
// provision with MISSING_BYOK_CREDENTIAL because derivation ran first.
if orgDefaultOK {
res.ResolvedMode = orgDefault
res.Source = BillingModeSourceOrgDefault
return res, nil
}
// Precedence 3: DERIVE the provider from (runtime, model).
// Precedence 2: DERIVE from the workspace's (runtime, model) — the SSOT.
// There is NO org-level billing mode consulted anywhere (CTO 2026-06-12:
// "no org default — it's all per workspace; a workspace defaults to platform
// but I can switch it to BYOK anytime"). Decision:
// - model → the closed `platform` provider → platform_managed.
// - model → a specific vendor AND the workspace holds its OWN matching key
// → byok (the customer's
// own key IS the explicit BYOK choice; nothing overrides it).
// - model → a specific vendor but NO matching key → the DEPLOYMENT
// default: platform_managed when a proxy is wired (SaaS), else byok
// (self-host). This is `PlatformManagedProxyConfigured()` — a deploy fact,
// NOT an org billing setting — and is what makes a keyless workspace
// "default to platform" without any org-level mode.
manifest, mErr := providerRegistry()
if mErr != nil || manifest == nil {
// Registry unavailable (malformed embedded YAML — a build-time defect the
// gates catch). Default closed (byok on self-host where no proxy exists).
res.ResolvedMode = defaultClosedBillingMode()
res.Source = BillingModeSourceDerivedDefault
return res, mErr
}
provider, dErr := manifest.DeriveProvider(runtime, model, availableAuthEnv)
if dErr != nil {
// No model / unknown runtime / unregistered / ambiguous → default closed.
// NOT an error to the caller: an unregistered model is a legitimate
// "we can't say it's BYOK, so bill the platform default" outcome, and the
// only-registered gate at the create/config API is where an unregistered
// model is rejected loudly. Here we just fail closed for safety. On a
// self-hosted stack (no proxy configured) the safe default is byok, since
// platform_managed is unreachable there.
res.ResolvedMode = defaultClosedBillingMode()
res.Source = BillingModeSourceDerivedDefault
sel := model
if sel != "" {
res.ProviderSelection = &sel
if mErr == nil && manifest != nil {
if provider, dErr := manifest.DeriveProvider(runtime, model, availableAuthEnv); dErr == nil {
derivedName := provider.Name
res.ProviderSelection = &derivedName
res.Source = BillingModeSourceDerivedProvider
if provider.IsPlatform() {
// The workspace SELECTED the Platform provider (a platform-
// namespaced model id) → platform-managed (proxy + platform bill).
res.ResolvedMode = LLMBillingModePlatformManaged
} else {
// The workspace SELECTED a specific vendor provider → BYOK. The
// PROVIDER CHOICE is the signal (CTO 2026-06-12: "if I select a
// provider that is not Platform it means BYOK already") — NOT key
// presence. A byok workspace that has no usable credential fails
// closed loudly at provision (MISSING_BYOK_CREDENTIAL), which is
// correct: you chose to bring your own key, so bring one.
res.ResolvedMode = LLMBillingModeBYOK
}
return res, nil
}
return res, nil
}
derivedName := provider.Name
res.ProviderSelection = &derivedName
res.Source = BillingModeSourceDerivedProvider
if provider.IsPlatform() {
res.ResolvedMode = LLMBillingModePlatformManaged
} else {
// A specific (non-platform) vendor was derived → bring-your-own-key.
res.ResolvedMode = LLMBillingModeBYOK
// No model / unknown runtime / unregistered / ambiguous / registry error →
// deployment default-closed (proxy → platform_managed; self-host → byok).
res.ResolvedMode = defaultClosedBillingMode()
res.Source = BillingModeSourceDerivedDefault
if model != "" {
sel := model
res.ProviderSelection = &sel
}
return res, nil
return res, mErr
}
// ResolveLLMBillingMode is the legacy-signature resolver retained for callers
@@ -349,46 +298,17 @@ func ResolveLLMBillingModeDerived(ctx context.Context, workspaceID, runtime, mod
// platform_managed) so the caller can proceed without a separate fail-closed
// branch. The error is informational: log it, surface it to operators, but
// the strip-gate decision is already safe.
func ResolveLLMBillingMode(ctx context.Context, workspaceID, orgMode string) (BillingModeResolution, error) {
func ResolveLLMBillingMode(ctx context.Context, workspaceID string) (BillingModeResolution, error) {
if workspaceID == "" {
// Pre-provision context (templating, validation): default closed, no DB.
return ResolveLLMBillingModeDerived(ctx, "", "", "", orgMode, nil)
// Pre-provision context (templating, validation): no DB; derive defaults.
return ResolveLLMBillingModeDerived(ctx, "", "", "", nil)
}
// Precedence 1: explicit operator override. Read it FIRST so an overridden
// workspace short-circuits without the extra runtime/secrets reads (and so
// the query order is override → runtime → secrets, matching the derived
// resolver's own override-first precedence).
if mode, ok, err := readWorkspaceBillingOverride(ctx, workspaceID); err != nil {
return BillingModeResolution{
WorkspaceID: workspaceID,
OrgDefault: orgDefaultForDisplay(orgMode),
ResolvedMode: LLMBillingModePlatformManaged,
Source: BillingModeSourceConstantFallback,
}, err
} else if ok {
m := mode
return BillingModeResolution{
WorkspaceID: workspaceID,
OrgDefault: orgDefaultForDisplay(orgMode),
ResolvedMode: mode,
WorkspaceOverride: &m,
Source: BillingModeSourceWorkspaceOverride,
}, nil
}
// Precedence 2: DERIVE. Read the stored (runtime, model, available-auth-env)
// so the derived resolver can DeriveProvider for callers that don't carry
// them (admin route, secrets remote-pull). A read miss/error degrades
// gracefully: pass the empty/partial inputs through — DeriveProvider then
// errors and the derived resolver defaults closed to platform_managed.
//
// ResolveLLMBillingModeDerived re-reads the override (NULL again here) before
// deriving; that one extra cheap read keeps the derived resolver a complete,
// independently-callable SSOT rather than splitting its precedence across two
// functions.
// Load the stored (runtime, model, available-auth-env) for callers that do
// not carry them (admin route, secrets remote-pull) and delegate to the ONE
// SSOT resolver, which applies override → derive. No duplicate override read,
// no org-level input.
runtime, model, authEnv := readWorkspaceDeriveInputs(ctx, workspaceID)
return ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, model, orgMode, authEnv)
return ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, model, authEnv)
}
// readWorkspaceDeriveInputs loads the workspace's stored runtime + selected
@@ -75,10 +75,10 @@ func TestResolveLLMBillingModeDerived_BehaviorDelta(t *testing.T) {
wantSource: BillingModeSourceDerivedProvider,
},
{
// NON-PLATFORM-DERIVED → byok (THE FIX). claude-code + the
// kimi-coding-native model derives to the non-platform kimi-coding
// provider → IsPlatform=false → byok. This is the Reno billing-leak
// class: pre-P2 it resolved platform_managed and ran on platform creds.
// NON-PLATFORM-DERIVED → byok (THE FIX). The workspace SELECTED a vendor
// model (kimi-for-coding → kimi-coding provider, IsPlatform=false) ⇒
// byok. The PROVIDER CHOICE is the signal — NOT key presence. Reno
// billing-leak class: pre-P2 this resolved platform_managed.
name: "non_platform_derived_resolves_byok_THE_FIX",
runtime: "claude-code",
model: "kimi-for-coding",
@@ -87,7 +87,7 @@ func TestResolveLLMBillingModeDerived_BehaviorDelta(t *testing.T) {
wantSource: BillingModeSourceDerivedProvider,
},
{
// NON-PLATFORM vendor on codex: gpt-5.5 derives to `openai` (BYOK).
// NON-PLATFORM vendor on codex: gpt-5.5 openai (vendor) → byok.
name: "non_platform_openai_codex_byok",
runtime: "codex",
model: "gpt-5.5",
@@ -180,7 +180,7 @@ func TestResolveLLMBillingModeDerived_BehaviorDelta(t *testing.T) {
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, c.override)
res, err := ResolveLLMBillingModeDerived(ctx, wsID, c.runtime, c.model, "", c.authEnv)
res, err := ResolveLLMBillingModeDerived(ctx, wsID, c.runtime, c.model, c.authEnv)
if (err != nil) != c.wantErr {
t.Fatalf("err: got %v wantErr=%v", err, c.wantErr)
}
@@ -215,7 +215,7 @@ func TestResolveLLMBillingModeDerived_OverrideDBError_DefaultClosed(t *testing.T
WithArgs(wsID).
WillReturnError(errors.New("connection refused"))
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "kimi-for-coding", "", nil)
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "kimi-for-coding", nil)
if err == nil {
t.Fatalf("expected propagated DB error, got nil")
}
@@ -228,18 +228,20 @@ func TestResolveLLMBillingModeDerived_OverrideDBError_DefaultClosed(t *testing.T
}
// TestResolveLLMBillingModeDerived_EmptyWorkspaceID_PlatformDefault asserts the
// pre-provision context (no workspace id, no override read) defaults to
// platform_managed without a DB query.
// pre-provision context (no workspace id, no override read) with NO model to
// derive defaults to platform_managed without a DB query. (With a vendor model,
// the pre-provision path derives byok like any other — that is the SELECTED
// provider speaking; this test pins the no-model deployment default.)
func TestResolveLLMBillingModeDerived_EmptyWorkspaceID_PlatformDefault(t *testing.T) {
withProxyConfigured(t) // SaaS context.
ctx := context.Background()
mock := setupTestDB(t) // no query expected
res, err := ResolveLLMBillingModeDerived(ctx, "", "claude-code", "kimi-for-coding", "", nil)
res, err := ResolveLLMBillingModeDerived(ctx, "", "claude-code", "", nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Errorf("empty workspace id must default platform_managed, got %q", res.ResolvedMode)
t.Errorf("empty workspace id, no model must default platform_managed, got %q", res.ResolvedMode)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
@@ -265,7 +267,7 @@ func TestResolveLLMBillingModeDerived_SelfHost_DefaultsBYOK(t *testing.T) {
t.Run("unset_model_defaults_byok_on_selfhost", func(t *testing.T) {
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "") // NULL override
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "", "", nil)
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "", nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
@@ -283,7 +285,7 @@ func TestResolveLLMBillingModeDerived_SelfHost_DefaultsBYOK(t *testing.T) {
t.Run("unregistered_model_defaults_byok_on_selfhost", func(t *testing.T) {
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "")
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "totally-made-up-model-xyz", "", nil)
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "totally-made-up-model-xyz", nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
@@ -300,7 +302,7 @@ func TestResolveLLMBillingModeDerived_SelfHost_DefaultsBYOK(t *testing.T) {
t.Run("empty_workspace_id_defaults_byok_on_selfhost", func(t *testing.T) {
mock := setupTestDB(t) // no query expected (pre-provision path)
res, err := ResolveLLMBillingModeDerived(ctx, "", "claude-code", "kimi-for-coding", "", nil)
res, err := ResolveLLMBillingModeDerived(ctx, "", "claude-code", "kimi-for-coding", nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
@@ -317,7 +319,7 @@ func TestResolveLLMBillingModeDerived_SelfHost_DefaultsBYOK(t *testing.T) {
// no-proxy default only governs the derive-failure fallback.
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, LLMBillingModePlatformManaged)
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "", "", nil)
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "", nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
@@ -333,80 +335,30 @@ func TestResolveLLMBillingModeDerived_SelfHost_DefaultsBYOK(t *testing.T) {
})
}
// TestResolveLLMBillingModeDerived_OrgDefaultWins asserts that a recognized
// org default (delivered via MOLECULE_LLM_BILLING_MODE / tenant_config) takes
// precedence over provider derivation when no workspace override exists. This
// closes core#2608: fresh SaaS tenants with org default platform_managed failed
// concierge provision because a non-platform-derived model flipped the workspace
// to byok before any credential existed.
func TestResolveLLMBillingModeDerived_OrgDefaultWins(t *testing.T) {
withProxyConfigured(t) // SaaS context.
// TestResolveLLMBillingModeDerived_PerWorkspaceNoOrgDefault is the SSOT
// regression after the org-level billing mode was fully removed (CTO
// 2026-06-12). Billing is decided per-workspace ONLY: explicit override →
// derive from model → deployment default-closed. There is NO org default
// anywhere. On SaaS (proxy wired) a keyless vendor-model workspace "defaults to
// platform" via the DEPLOYMENT fact PlatformManagedProxyConfigured(), not via
// an org setting.
func TestResolveLLMBillingModeDerived_PerWorkspaceNoOrgDefault(t *testing.T) {
withProxyConfigured(t) // SaaS context: PlatformManagedProxyConfigured() == true.
ctx := context.Background()
const wsID = "66666666-6666-6666-6666-666666666666"
t.Run("org_platform_managed_wins_over_non_platform_derive", func(t *testing.T) {
t.Run("vendor_model_is_byok_regardless_of_key", func(t *testing.T) {
// kimi-for-coding → kimi-coding vendor (IsPlatform=false) ⇒ byok. The
// SELECTED provider is the signal — NO key-presence check (a byok ws with
// no usable cred fails closed later at provision, which is correct).
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "") // NULL override
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "kimi-for-coding", LLMBillingModePlatformManaged, nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Errorf("org default platform_managed: got %q want platform_managed", res.ResolvedMode)
}
if res.Source != BillingModeSourceOrgDefault {
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceOrgDefault)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
})
t.Run("org_byok_wins_over_platform_derive", func(t *testing.T) {
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "") // NULL override
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "anthropic/claude-opus-4-7", LLMBillingModeBYOK, nil)
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "kimi-for-coding", nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if res.ResolvedMode != LLMBillingModeBYOK {
t.Errorf("org default byok: got %q want byok", res.ResolvedMode)
}
if res.Source != BillingModeSourceOrgDefault {
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceOrgDefault)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
})
t.Run("workspace_override_still_wins_over_org_default", func(t *testing.T) {
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, LLMBillingModeBYOK)
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "anthropic/claude-opus-4-7", LLMBillingModePlatformManaged, nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if res.ResolvedMode != LLMBillingModeBYOK {
t.Errorf("workspace override must beat org default: got %q want byok", res.ResolvedMode)
}
if res.Source != BillingModeSourceWorkspaceOverride {
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceWorkspaceOverride)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
})
t.Run("unrecognized_org_default_ignored", func(t *testing.T) {
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "") // NULL override
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "kimi-for-coding", "not-a-real-mode", nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if res.ResolvedMode != LLMBillingModeBYOK {
t.Errorf("unrecognized org default ignored: got %q want byok", res.ResolvedMode)
t.Errorf("vendor model: got %q want byok", res.ResolvedMode)
}
if res.Source != BillingModeSourceDerivedProvider {
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceDerivedProvider)
@@ -415,5 +367,41 @@ func TestResolveLLMBillingModeDerived_OrgDefaultWins(t *testing.T) {
t.Errorf("sqlmock expectations: %v", err)
}
})
t.Run("platform_model_is_platform_managed", func(t *testing.T) {
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "") // NULL override
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "anthropic/claude-opus-4-7", nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Errorf("platform model: got %q want platform_managed", res.ResolvedMode)
}
if res.Source != BillingModeSourceDerivedProvider {
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceDerivedProvider)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
})
t.Run("explicit_workspace_override_wins", func(t *testing.T) {
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, LLMBillingModeBYOK)
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "anthropic/claude-opus-4-7", nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if res.ResolvedMode != LLMBillingModeBYOK {
t.Errorf("workspace override must win: got %q want byok", res.ResolvedMode)
}
if res.Source != BillingModeSourceWorkspaceOverride {
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceWorkspaceOverride)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
})
}
@@ -28,7 +28,6 @@ import (
"errors"
"io"
"net/http"
"os"
"strings"
"github.com/gin-gonic/gin"
@@ -36,20 +35,18 @@ import (
// GetWorkspaceLLMBillingMode handles GET /admin/workspaces/:id/llm-billing-mode.
//
// internal#718 P2-B: the resolution now DERIVES the provider from the
// workspace's stored (runtime, model) via the registry (org rung retired). The
// passed orgMode is ignored by the resolver; it is left here only to avoid
// churning the call signature. The returned resolution matches what the
// provision-time strip gate computes (same derived resolver), so operators see
// the real platform-vs-byok decision + the derived provider in ProviderSelection.
// Resolution DERIVES the provider from the workspace's stored (runtime, model)
// via the registry — per-workspace only, no org-level billing mode (retired
// 2026-06-12). The returned resolution matches what the provision-time strip
// gate computes (same SSOT resolver), so operators see the real platform-vs-byok
// decision + the derived provider in ProviderSelection.
func GetWorkspaceLLMBillingMode(c *gin.Context) {
workspaceID := strings.TrimSpace(c.Param("id"))
if !uuidRegex.MatchString(workspaceID) {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace id"})
return
}
orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")))
res, err := ResolveLLMBillingMode(c.Request.Context(), workspaceID, orgMode)
res, err := ResolveLLMBillingMode(c.Request.Context(), workspaceID)
if err != nil {
// Resolver returns a safe default-closed mode alongside the error;
// surface the error so the operator sees the DB issue, but the
@@ -140,8 +137,7 @@ func PutWorkspaceLLMBillingMode(c *gin.Context) {
}
// Read back the resolution so the response reflects post-write state.
orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")))
res, resolveErr := ResolveLLMBillingMode(c.Request.Context(), workspaceID, orgMode)
res, resolveErr := ResolveLLMBillingMode(c.Request.Context(), workspaceID)
if resolveErr != nil {
// Write succeeded but readback failed — still return 200 with the
// best-effort resolution; the safe default is set even on error.
@@ -35,14 +35,9 @@ const testWSID = "44444444-4444-4444-4444-444444444444"
// read, and the workspace_secrets scan (for MODEL + auth-env names). model==""
// means no MODEL secret row.
func expectDeriveShimQueries(m sqlmock.Sqlmock, wsID, runtime, model string) {
nullOverride := func() {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
}
// Order: override(NULL) shim check, runtime, secrets, override(NULL) again
// (the derived resolver re-checks the override as a complete SSOT).
nullOverride()
// Order: runtime, secrets, override(NULL). ResolveLLMBillingMode loads the
// derive inputs first, then ResolveLLMBillingModeDerived reads the override
// ONCE (the prior double-read was removed with the org-level path, 2026-06-12).
m.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow(runtime))
@@ -54,7 +49,9 @@ func expectDeriveShimQueries(m sqlmock.Sqlmock, wsID, runtime, model string) {
m.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs(wsID).
WillReturnRows(secretRows)
nullOverride()
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
}
// internal#718 P2-B + core#2608: with no workspace override and an unset org
@@ -121,11 +121,9 @@ func TestApplyPlatformManagedLLMEnv_ReadProvisionParity(t *testing.T) {
const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809"
// ---- READ PATH ----
// ResolveLLMBillingMode reads in order: override (NULL) → runtime → secrets
// (MODEL=opus + the oauth key) → then ResolveLLMBillingModeDerived re-reads
// the override (NULL again).
// ResolveLLMBillingMode reads in order: runtime → secrets (MODEL=opus + the
// oauth key) → override (NULL, read once inside ResolveLLMBillingModeDerived).
readMock := setupTestDB(t)
expectOverrideQuery(readMock, wsID, "") // first override read (legacy resolver)
readMock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
@@ -134,9 +132,9 @@ func TestApplyPlatformManagedLLMEnv_ReadProvisionParity(t *testing.T) {
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("MODEL", []byte("opus"), 0).
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("RENO-OWN-OAUTH"), 0))
expectOverrideQuery(readMock, wsID, "") // second override read (derived resolver)
expectOverrideQuery(readMock, wsID, "") // override read (derived resolver)
readRes, err := ResolveLLMBillingMode(ctx, wsID, "")
readRes, err := ResolveLLMBillingMode(ctx, wsID)
if err != nil {
t.Fatalf("read-path resolve err: %v", err)
}
@@ -25,13 +25,13 @@ import (
// complete, independently-callable SSOT.
//
// model=="" means no MODEL secret row.
func expectLegacyShimQueries(m sqlmock.Sqlmock, wsID, runtime, model string) {
nullOverride := func() {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
}
nullOverride()
// expectShimQueries mocks the query sequence ResolveLLMBillingMode issues: it
// loads the derive inputs (runtime, secrets) then delegates to
// ResolveLLMBillingModeDerived, which reads the override ONCE. Order: runtime →
// secrets → override (single override read — the prior double-read was removed
// when the org-level path was retired 2026-06-12). override="" mocks a NULL
// (absent) override.
func expectShimQueries(m sqlmock.Sqlmock, wsID, runtime, model, override string) {
m.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow(runtime))
@@ -42,7 +42,15 @@ func expectLegacyShimQueries(m sqlmock.Sqlmock, wsID, runtime, model string) {
m.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs(wsID).
WillReturnRows(secretRows)
nullOverride()
overrideRow := sqlmock.NewRows([]string{"llm_billing_mode"})
if override == "" {
overrideRow.AddRow(nil)
} else {
overrideRow.AddRow(override)
}
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(overrideRow)
}
func TestResolveLLMBillingMode_LegacyShimDerives(t *testing.T) {
@@ -57,7 +65,6 @@ func TestResolveLLMBillingMode_LegacyShimDerives(t *testing.T) {
}
type tc struct {
name string
orgMode string
setupMock func(m sqlmock.Sqlmock)
want want
wantErr bool
@@ -65,79 +72,59 @@ func TestResolveLLMBillingMode_LegacyShimDerives(t *testing.T) {
cases := []tc{
{
// Explicit override still wins (first precedence).
name: "explicit_override_byok_wins",
orgMode: "",
// Explicit per-workspace override wins (first precedence). Inputs are
// loaded first (runtime, secrets) then the override read decides.
name: "explicit_override_byok_wins",
setupMock: func(m sqlmock.Sqlmock) {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
expectShimQueries(m, wsID, "claude-code", "anthropic/claude-opus-4-7", LLMBillingModeBYOK)
},
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceWorkspaceOverride, hasOverride: true},
},
{
// No override + org default platform_managed wins over non-platform derive.
name: "org_default_platform_managed_wins_over_derive",
orgMode: LLMBillingModePlatformManaged,
// No override + vendor model → byok via derive (the SELECTED provider
// is the signal; no key-presence check).
name: "vendor_model_derives_byok",
setupMock: func(m sqlmock.Sqlmock) {
expectLegacyShimQueries(m, wsID, "claude-code", "kimi-for-coding")
},
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceOrgDefault, hasOverride: false},
},
{
// No override + no org default + non-platform-deriving model → byok via derive.
name: "no_override_derives_byok_from_model",
orgMode: "",
setupMock: func(m sqlmock.Sqlmock) {
expectLegacyShimQueries(m, wsID, "claude-code", "kimi-for-coding")
expectShimQueries(m, wsID, "claude-code", "kimi-for-coding", "")
},
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceDerivedProvider, hasOverride: false},
},
{
// No override + no org default + platform-namespaced model → platform_managed.
name: "no_override_derives_platform_from_model",
orgMode: "",
// No override + platform-namespaced model → platform_managed via derive.
name: "platform_model_derives_platform",
setupMock: func(m sqlmock.Sqlmock) {
expectLegacyShimQueries(m, wsID, "claude-code", "anthropic/claude-opus-4-7")
expectShimQueries(m, wsID, "claude-code", "anthropic/claude-opus-4-7", "")
},
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceDerivedProvider, hasOverride: false},
},
{
// No override + no org default + no model → derived_default → platform_managed.
name: "no_override_no_model_platform_default",
orgMode: "",
// No override + no model → derived_default → platform_managed (proxy wired).
name: "no_model_deploy_default_platform",
setupMock: func(m sqlmock.Sqlmock) {
expectLegacyShimQueries(m, wsID, "claude-code", "")
expectShimQueries(m, wsID, "claude-code", "", "")
},
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceDerivedDefault, hasOverride: false},
},
{
// Garbled override is NOT honored — falls through to org default if present.
name: "garbled_override_falls_through_to_org_default",
orgMode: LLMBillingModePlatformManaged,
// Garbled override is NOT honored — falls through to derivation. With no
// model it lands on the deployment default (platform_managed).
name: "garbled_override_falls_through_to_derive",
setupMock: func(m sqlmock.Sqlmock) {
expectShimQueries(m, wsID, "claude-code", "", "byokk")
},
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceDerivedDefault, hasOverride: false},
},
{
// DB error on the override read → default-closed + propagated error.
// (Inputs load first, then the override read errors.)
name: "override_db_error_default_closed_with_error",
setupMock: func(m sqlmock.Sqlmock) {
// override read 1 (garbled → not honored), runtime, secrets,
// override read 2 (garbled again, derived resolver re-check).
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("byokk"))
m.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
m.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("byokk"))
},
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceOrgDefault, hasOverride: false},
},
{
// DB error on the override read → default-closed + propagated error.
name: "override_db_error_default_closed_with_error",
orgMode: "",
setupMock: func(m sqlmock.Sqlmock) {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnError(errors.New("connection refused"))
@@ -152,7 +139,7 @@ func TestResolveLLMBillingMode_LegacyShimDerives(t *testing.T) {
mock := setupTestDB(t)
c.setupMock(mock)
res, err := ResolveLLMBillingMode(ctx, wsID, c.orgMode)
res, err := ResolveLLMBillingMode(ctx, wsID)
if (err != nil) != c.wantErr {
t.Fatalf("err: got %v wantErr=%v", err, c.wantErr)
}
@@ -179,7 +166,7 @@ func TestResolveLLMBillingMode_EmptyWorkspaceID_PlatformDefault(t *testing.T) {
withProxyConfigured(t) // SaaS context.
ctx := context.Background()
mock := setupTestDB(t) // no DB read expected
res, err := ResolveLLMBillingMode(ctx, "", "")
res, err := ResolveLLMBillingMode(ctx, "")
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
@@ -191,25 +178,6 @@ func TestResolveLLMBillingMode_EmptyWorkspaceID_PlatformDefault(t *testing.T) {
}
}
func TestResolveLLMBillingMode_EmptyWorkspaceID_OrgDefaultHonored(t *testing.T) {
withProxyConfigured(t)
ctx := context.Background()
mock := setupTestDB(t) // no DB read expected
res, err := ResolveLLMBillingMode(ctx, "", LLMBillingModeBYOK)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if res.ResolvedMode != LLMBillingModeBYOK {
t.Errorf("empty ws id with org byok: got %q want byok", res.ResolvedMode)
}
if res.Source != BillingModeSourceOrgDefault {
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceOrgDefault)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
// TestResolveLLMBillingMode_ResolvedModeIsAlwaysValid asserts the resolver's
// post-condition: the returned mode is ALWAYS one of the three known enum
// values. The strip gate downstream relies on this so it can switch on
@@ -220,23 +188,12 @@ func TestResolveLLMBillingMode_ResolvedModeIsAlwaysValid(t *testing.T) {
const wsID = "22222222-2222-2222-2222-222222222222"
// Garbled override + no derivable model: must still resolve a known enum
// (platform_managed, default-closed). Query order: override(garbled),
// runtime, secrets, override(garbled again — derived resolver re-check).
// (platform_managed, default-closed). Query order: runtime, secrets, override
// (garbled → not honored → derive → no model → deployment default).
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("totally-bogus"))
mock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("totally-bogus"))
expectShimQueries(mock, wsID, "claude-code", "", "totally-bogus")
res, err := ResolveLLMBillingMode(ctx, wsID, "")
res, err := ResolveLLMBillingMode(ctx, wsID)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
@@ -382,6 +382,21 @@ func (h *WorkspaceHandler) applyConciergeProvisionConfig(
// universal MISSING_MODEL gate then fails the provision CLOSED rather than
// letting the runtime pick an opaque default.
func (h *WorkspaceHandler) ensureConciergeModel(ctx context.Context, workspaceID string, envVars map[string]string) {
// SEED-ONLY (CTO 2026-06-12: customer setting > platform default; the
// concierge's model is changeable like any workspace, "anytime"). If a MODEL
// secret already exists — whether the original seed or a model the customer
// later picked in the canvas — RESPECT it: loadWorkspaceSecrets +
// applyRuntimeModelEnv have already put it in envVars, so do nothing. Only
// SEED the declared default when the concierge has no model at all (first
// boot). Pre-fix this function re-asserted conciergeDeclaredModel on EVERY
// provision, silently reverting the customer's pick (e.g. kimi-for-coding →
// moonshot/kimi-k2.6) — exactly the platform-overriding-customer violation
// the SSOT directive forbids.
if existing := readStoredModelSecret(ctx, workspaceID); existing != "" {
return // explicit model already set; never overwrite the customer's choice
}
// First boot — no model yet. Seed the concierge's declared default.
model := conciergeDeclaredModel
if ok, why := validateRegisteredModelForRuntime(conciergeRuntime, model); !ok {
log.Printf("Provisioner: concierge %s declared model %q is NOT registered for runtime %q (%s) — leaving model unset; provision will fail closed", workspaceID, model, conciergeRuntime, why)
@@ -394,29 +409,36 @@ func (h *WorkspaceHandler) ensureConciergeModel(ctx context.Context, workspaceID
// Seed the container env (precedence MOLECULE_MODEL > MODEL in the runtime).
// applyRuntimeModelEnv already ran with an empty payload model for the
// concierge (it has no stored MODEL on first boot), so set both canonical
// names here so this provision actually runs the declared model.
// concierge (no stored MODEL on first boot), so set both canonical names
// here so this provision actually runs the seeded default.
envVars["MOLECULE_MODEL"] = model
envVars["MODEL"] = model
// Persist so GET /workspaces/:id/model returns it (Config tab visibility).
// Idempotent: only write when the stored value differs, to avoid churning
// updated_at on every restart.
var stored []byte
var version int
err := db.DB.QueryRowContext(ctx,
`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1 AND key = 'MODEL'`,
workspaceID).Scan(&stored, &version)
if err == nil {
if dec, decErr := crypto.DecryptVersioned(stored, version); decErr == nil && string(dec) == model {
return // already persisted with the right value
}
}
if setErr := setModelSecret(ctx, workspaceID, model); setErr != nil {
log.Printf("Provisioner: concierge %s persist MODEL secret failed: %v (env still seeded for this provision)", workspaceID, setErr)
}
}
// readStoredModelSecret returns the decrypted MODEL workspace_secret, or "" when
// none is stored (or on any read/decrypt error — treated as "unset" so a
// transient miss re-seeds rather than wedges). Used by ensureConciergeModel to
// decide seed-vs-respect.
func readStoredModelSecret(ctx context.Context, workspaceID string) string {
var stored []byte
var version int
if err := db.DB.QueryRowContext(ctx,
`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1 AND key = 'MODEL'`,
workspaceID).Scan(&stored, &version); err != nil {
return ""
}
dec, err := crypto.DecryptVersioned(stored, version)
if err != nil {
return ""
}
return string(dec)
}
// EnsureSelfHostedPlatformAgent installs the org's platform agent (the concierge,
// the org root) on a tenant that has no control plane to do it — i.e. self-hosted
// or local. In SaaS the CP calls InstallPlatformAgent at org-provision time; this
@@ -575,25 +575,33 @@ func TestEnsureConciergeModel_SeedsEnvAndPersistsWhenAbsent(t *testing.T) {
}
}
// TestEnsureConciergeModel_SkipsWriteWhenAlreadyCorrect verifies the idempotent
// path: when the stored MODEL already equals the declared model, no write fires
// (env is still seeded). encryption_version=0 = raw bytes (crypto disabled in test).
func TestEnsureConciergeModel_SkipsWriteWhenAlreadyCorrect(t *testing.T) {
// TestEnsureConciergeModel_RespectsExistingModel is the SEED-ONLY regression
// guard (CTO 2026-06-12): when a MODEL secret already exists — ESPECIALLY a
// DIFFERENT, customer-picked one (e.g. they switched the concierge to
// kimi-for-coding for BYOK) — ensureConciergeModel must NOT touch it: no write,
// and it must NOT force the declared default back into the env. Pre-fix it
// re-asserted conciergeDeclaredModel on every provision, silently reverting the
// customer's choice. encryption_version=0 = raw bytes (crypto disabled in test).
func TestEnsureConciergeModel_RespectsExistingModel(t *testing.T) {
mock := setupTestDB(t)
const wsID = "concierge-ws-2"
const customerModel = "kimi-for-coding" // the customer's explicit pick
mock.ExpectQuery(`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'MODEL'`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"encrypted_value", "encryption_version"}).
AddRow([]byte(conciergeDeclaredModel), 0))
// NO ExpectExec — a write here would be an unmet/unexpected expectation.
AddRow([]byte(customerModel), 0))
// NO ExpectExec — any write would be an unmet/unexpected expectation.
h := &WorkspaceHandler{}
envVars := map[string]string{}
h.ensureConciergeModel(context.Background(), wsID, envVars)
if got := envVars["MODEL"]; got != conciergeDeclaredModel {
t.Errorf("MODEL env = %q, want %q", got, conciergeDeclaredModel)
// Must NOT have overwritten the env with the declared default — the customer's
// stored model wins and is wired by loadWorkspaceSecrets/applyRuntimeModelEnv,
// not by this seed-only helper.
if got := envVars["MODEL"]; got == conciergeDeclaredModel {
t.Errorf("MODEL env was forced to the declared default %q — must respect the customer's stored %q", conciergeDeclaredModel, customerModel)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
+32 -6
View File
@@ -5,7 +5,6 @@ import (
"database/sql"
"log"
"net/http"
"os"
"regexp"
"strings"
@@ -61,12 +60,39 @@ func isPlatformManagedDirectLLMBypassKey(key string) bool {
// during a secret write still rejects the bypass-list keys — fail safer not
// freer. This matches the resolver's documented contract.
func platformManagedLLMModeForWorkspace(c *gin.Context, workspaceID string) bool {
orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")))
res, err := ResolveLLMBillingMode(c.Request.Context(), workspaceID, orgMode)
if err != nil {
log.Printf("secrets: resolve billing mode for workspace=%s failed: %v (defaulting to platform_managed for safety)", workspaceID, err)
ctx := c.Request.Context()
// CTO 2026-06-12 (per-workspace BYOK SSOT) + CR (agent-researcher): block a
// vendor-key write whenever it would co-mingle with platform billing. The
// MODEL is checked FIRST: a platform-servable model (derives to the closed
// `platform` provider; the proxy serves it and bills the platform) blocks
// stray vendor-key co-storage REGARDLESS of any (possibly stale/incorrect)
// billing override — a platform model must never host a co-stored vendor key.
// Only a specific VENDOR model is a BYOK setup where the customer may write
// their own key (INCLUDING the first key, before billing has derived byok).
// For a vendor model, an explicit platform_managed override still forces the
// block (the operator chose managed billing → a vendor key would co-mingle).
runtime, model, authEnv := readWorkspaceDeriveInputs(ctx, workspaceID)
manifest, err := providerRegistry()
if err != nil || manifest == nil {
log.Printf("secrets: provider registry unavailable for workspace=%s: %v (blocking vendor-key write for safety)", workspaceID, err)
return true
}
return strings.EqualFold(res.ResolvedMode, LLMBillingModePlatformManaged)
provider, dErr := manifest.DeriveProvider(runtime, model, authEnv)
if dErr != nil {
// Unregistered / ambiguous / no model → cannot prove it's a vendor BYOK
// setup; block (safe default, matches the create-time only-registered gate).
return true
}
if provider.IsPlatform() {
// Platform-servable model → block, even under a byok override.
return true
}
// Vendor model: allow the key write UNLESS an explicit platform_managed
// override forces managed billing (then a vendor key would co-mingle).
if mode, ok, err := readWorkspaceBillingOverride(ctx, workspaceID); err == nil && ok {
return strings.EqualFold(mode, LLMBillingModePlatformManaged)
}
return false
}
// rejectPlatformManagedDirectLLMBypassForWorkspace is the per-workspace
@@ -1845,3 +1845,99 @@ func TestDeleteGlobal_SpoofedHeader_DoesNotSuppressRestart(t *testing.T) {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestPlatformManagedLLMModeForWorkspace_GatesOnModelNotResolvedMode is the Fix C
// guard regression (CTO 2026-06-12): the vendor-key-write guard must key off
// whether the workspace's MODEL is platform-servable (derives to the closed
// `platform` provider), NOT off the resolved billing mode. A workspace on a
// VENDOR model must be allowed to write its own key — including the FIRST key,
// before billing has derived byok (when the resolved mode is still the org
// platform_managed fallback). A workspace on a PLATFORM model is blocked (a
// stray vendor key would co-mingle with proxy billing).
func TestPlatformManagedLLMModeForWorkspace_GatesOnModelNotResolvedMode(t *testing.T) {
overrideQ := `SELECT llm_billing_mode FROM workspaces WHERE id = \$1`
runtimeQ := `SELECT runtime FROM workspaces WHERE id = \$1`
secretsQ := `SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`
newCtx := func(wsID string) *gin.Context {
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("PUT", "/workspaces/"+wsID+"/secrets", nil)
return c
}
// Query order (new): runtime → secrets → (vendor only) override. A platform
// model short-circuits at IsPlatform and never reads the override.
vendorOK := func(m sqlmock.Sqlmock, wsID, override string) {
m.ExpectQuery(runtimeQ).WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
m.ExpectQuery(secretsQ).WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("MODEL", []byte("kimi-for-coding"), 0)) // vendor model
row := sqlmock.NewRows([]string{"llm_billing_mode"})
if override == "" {
row.AddRow(nil)
} else {
row.AddRow(override)
}
m.ExpectQuery(overrideQ).WithArgs(wsID).WillReturnRows(row)
}
platformModelOnly := func(m sqlmock.Sqlmock, wsID string) {
m.ExpectQuery(runtimeQ).WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
m.ExpectQuery(secretsQ).WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("MODEL", []byte("moonshot/kimi-k2.6"), 0)) // platform model — no override read
}
t.Run("vendor model + no override → NOT blocked (can add first BYOK key)", func(t *testing.T) {
mock := setupTestDB(t)
const wsID = "11111111-1111-1111-1111-111111111111"
vendorOK(mock, wsID, "")
if platformManagedLLMModeForWorkspace(newCtx(wsID), wsID) {
t.Error("vendor-model workspace must NOT be blocked from writing its own key")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
})
t.Run("platform model → blocked", func(t *testing.T) {
mock := setupTestDB(t)
const wsID = "22222222-2222-2222-2222-222222222222"
platformModelOnly(mock, wsID)
if !platformManagedLLMModeForWorkspace(newCtx(wsID), wsID) {
t.Error("platform-model workspace must block stray vendor-key writes")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
})
t.Run("platform model + byok override → STILL blocked (CR: co-mingle guard)", func(t *testing.T) {
// agent-researcher REQUEST_CHANGES: a platform-servable MODEL must stay
// protected even when a (stale/incorrect) byok override is set — the
// MODEL is checked first, so the override is never even read.
mock := setupTestDB(t)
const wsID = "33333333-3333-3333-3333-333333333333"
platformModelOnly(mock, wsID) // no override query — platform short-circuits
if !platformManagedLLMModeForWorkspace(newCtx(wsID), wsID) {
t.Error("platform model must block vendor-key writes even under a byok override")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
})
t.Run("vendor model + platform_managed override → blocked (operator forced managed)", func(t *testing.T) {
mock := setupTestDB(t)
const wsID = "44444444-4444-4444-4444-444444444444"
vendorOK(mock, wsID, LLMBillingModePlatformManaged)
if !platformManagedLLMModeForWorkspace(newCtx(wsID), wsID) {
t.Error("vendor model with an explicit platform_managed override must block (co-mingle)")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock: %v", err)
}
})
}
@@ -1006,21 +1006,15 @@ func effectiveModelForBilling(model string, envVars map[string]string) string {
// applyPlatformManagedLLMEnv wires the control-plane LLM proxy into a
// workspace only when the RESOLVED billing mode for this workspace is
// platform_managed. "Resolved" means: the workspace-level override (if any)
// wins over the org default (delivered via tenant_config in MOLECULE_LLM_BILLING_MODE).
// platform_managed. "Resolved" is PER-WORKSPACE only: the workspace-level
// override (if any) wins, else the mode is DERIVED from the workspace's
// selected model/provider (platform-namespaced → platform_managed; a specific
// vendor → byok). There is NO org-level billing mode (retired 2026-06-12); the
// SSOT resolver (ResolveLLMBillingModeDerived) never reads any org input.
//
// Pre-internal#691 this gate read the org-level env var directly, which made
// it impossible to mix billing modes across workspaces in the same org. The
// resolver (ResolveLLMBillingMode) is the single source of truth now; the
// architectural test asserts no remaining code path gates on os.Getenv
// ("MOLECULE_LLM_BILLING_MODE") for strip-decision purposes — that env value
// is still read INTO the resolver as the org-default input, but it is never
// the final decision.
//
// Default-closed: any resolver error / NULL JOIN / garbled enum value
// collapses to platform_managed (see llm_billing_mode.go for the contract).
// This preserves the existing implicit default exactly while making the
// per-workspace opt-out path safe.
// Default-closed: any resolver error / garbled enum / underivable model
// collapses to the deployment default (platform_managed when a proxy is wired,
// byok on self-host — see llm_billing_mode.go).
//
// The resolved mode is exported into the workspace container as
// MOLECULE_LLM_BILLING_MODE_RESOLVED so an in-container debug check can
@@ -1090,9 +1084,9 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string,
// runtime + model + the workspace env, so it calls the DERIVED resolver
// directly (no DB round-trip for runtime/model). availableAuthEnv is the set
// of recognized provider auth-env-var NAMES present in envVars (the same
// disambiguation input the registry uses to split oauth-vs-api). The org-env
// MOLECULE_LLM_BILLING_MODE is NO LONGER read into the decision (retired).
orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")))
// disambiguation input the registry uses to split oauth-vs-api). There is NO
// org-level billing mode input: the org-env MOLECULE_LLM_BILLING_MODE was
// fully retired (CTO 2026-06-12 — billing is per-workspace only).
availableAuthEnv := availableAuthEnvNames(envVars)
// molecule-core#1994: derive billing mode from the EFFECTIVE model, not the
// raw payload.Model. On a re-provision (restart/resume/auto-restart) the
@@ -1107,7 +1101,7 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string,
// read-path's — keeping the two resolvers in parity (the #1994 regression
// guard test asserts this).
effectiveModel := effectiveModelForBilling(model, envVars)
res, resolveErr := ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, effectiveModel, orgMode, availableAuthEnv)
res, resolveErr := ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, effectiveModel, availableAuthEnv)
if resolveErr != nil {
// resolveErr != nil ⇒ resolver hit a DB error AND already defaulted
// res.ResolvedMode to platform_managed. Log + proceed; the safe default
@@ -1056,9 +1056,13 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeUsesAnthropicProxyOverOAuth(t *tes
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic/v1")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
// A PLATFORM-namespaced model id ⇒ the workspace SELECTED the Platform
// provider ⇒ platform_managed ⇒ the stray oauth is stripped and the proxy
// is injected. (A vendor model id would instead resolve byok and KEEP the
// oauth — covered by the byok tests.)
envVars := map[string]string{
"CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token",
"MODEL": "sonnet",
"MODEL": "anthropic/claude-sonnet-4-6",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "", nil)
@@ -1102,9 +1106,12 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeStripsVendorBYOK(t *testing.T) {
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic/v1")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
// A PLATFORM-namespaced model ⇒ platform_managed ⇒ a stray vendor key is
// stripped (the proxy is used). (If the workspace had SELECTED the MiniMax
// vendor model, it would resolve byok and KEEP its MiniMax key instead.)
envVars := map[string]string{
"MINIMAX_API_KEY": "user-minimax-key",
"MODEL": "MiniMax-M2.7",
"MODEL": "anthropic/claude-sonnet-4-6",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "", nil)