From 2cf7d006a97fa57cb1bcf60a0680931d4375dbba Mon Sep 17 00:00:00 2001 From: hongming Date: Fri, 29 May 2026 04:26:28 +0000 Subject: [PATCH] =?UTF-8?q?fix(secrets):=20drop=20retired=20org-level=20gu?= =?UTF-8?q?ard=20from=20SetGlobal=20=E2=80=94=20global=20vendor=20keys=20a?= =?UTF-8?q?re=20tenant-owned?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit internal#718 retired the org-level LLM billing rung (billing is resolved per-workspace now). SetGlobal still called the legacy org-env guard rejectPlatformManagedDirectLLMBypass, which reads MOLECULE_LLM_BILLING_MODE and 400s any vendor/oauth key write when the (legacy) org default is platform_managed. That blocked setting a tenant's own MINIMAX_API_KEY (or any custom-provider key) at global scope on a byok tenant — agents-team hit "direct Hermes custom provider secrets are blocked for platform-managed LLM workspaces". A global secret is the tenant's OWN shared credential. The provision-time provider-matched strip (workspace_provision, core#2000) already removes any global cred a given workspace's resolved provider does not accept, and the platform-managed path strips bypass keys at provision too — so a platform-managed workspace can never USE a non-matching global vendor/oauth key. The SetGlobal org-env gate was redundant belt-and-suspenders keyed off the retired rung. - SetGlobal: remove the org-level guard call. - Delete the now-dead legacy helpers platformManagedLLMMode + rejectPlatformManagedDirectLLMBypass (org-env shims; the per-workspace successors rejectPlatformManagedDirectLLMBypassForWorkspace / platformManagedLLMModeForWorkspace remain and still gate per-workspace writes). - Tests: convert the obsolete platform-managed rejection test into TestSetGlobal_AllowsTenantOwnedVendorKeyDespiteLegacyOrgEnv (asserts the global write SUCCEEDS even with the legacy env still set to platform_managed). Co-Authored-By: Claude Opus 4.8 (1M context) --- workspace-server/internal/handlers/secrets.go | 36 +++------- .../internal/handlers/secrets_test.go | 68 +++++++++++-------- 2 files changed, 49 insertions(+), 55 deletions(-) diff --git a/workspace-server/internal/handlers/secrets.go b/workspace-server/internal/handlers/secrets.go index 236e11b4e..33fced2f1 100644 --- a/workspace-server/internal/handlers/secrets.go +++ b/workspace-server/internal/handlers/secrets.go @@ -67,14 +67,6 @@ func platformManagedLLMModeForWorkspace(c *gin.Context, workspaceID string) bool return strings.EqualFold(res.ResolvedMode, LLMBillingModePlatformManaged) } -// platformManagedLLMMode is the legacy org-level gate retained for any test -// harness still asserting the env-var-only behavior. Production code paths -// must call platformManagedLLMModeForWorkspace instead so a workspace-level -// byok override actually takes effect on the secrets-write path. -func platformManagedLLMMode() bool { - return strings.EqualFold(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")), "platform_managed") -} - // rejectPlatformManagedDirectLLMBypassForWorkspace is the per-workspace // successor to rejectPlatformManagedDirectLLMBypass (internal#691). The // strip-list ONLY applies when this specific workspace resolves to @@ -91,22 +83,6 @@ func rejectPlatformManagedDirectLLMBypassForWorkspace(c *gin.Context, workspaceI return true } -// rejectPlatformManagedDirectLLMBypass is the legacy org-level shim. Retained -// only for backwards compatibility with any external/test caller still on the -// old shape; new code MUST use the per-workspace variant above. Production -// code paths (the secrets.go handlers + workspace.go create-secret path) all -// switched in internal#691. -func rejectPlatformManagedDirectLLMBypass(c *gin.Context, key string) bool { - if !platformManagedLLMMode() || !isPlatformManagedDirectLLMBypassKey(key) { - return false - } - c.JSON(http.StatusBadRequest, gin.H{ - "error": "direct Hermes custom provider secrets are blocked for platform-managed LLM workspaces; use MODEL/LLM_PROVIDER or the platform LLM proxy env instead", - "key": key, - }) - return true -} - type SecretsHandler struct { restartFunc func(workspaceID string) // Optional: auto-restart after secret change } @@ -486,9 +462,15 @@ func (h *SecretsHandler) SetGlobal(c *gin.Context) { c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"}) return } - if rejectPlatformManagedDirectLLMBypass(c, body.Key) { - return - } + // internal#718: the org-level LLM billing rung was retired — billing is + // resolved per-workspace, not per-org. A global secret is the tenant's OWN + // shared credential; the provision-time provider-matched strip + // (workspace_provision) removes any global cred a given workspace's resolved + // provider does not accept, so a platform-managed workspace can never USE a + // non-matching global vendor/oauth key. The legacy org-env SetGlobal gate + // (keyed off the retired MOLECULE_LLM_BILLING_MODE) is therefore removed; + // per-workspace writes still enforce the strip-list via + // rejectPlatformManagedDirectLLMBypassForWorkspace. encrypted, err := crypto.Encrypt([]byte(body.Value)) if err != nil { diff --git a/workspace-server/internal/handlers/secrets_test.go b/workspace-server/internal/handlers/secrets_test.go index 3e30c0db6..5a59862bd 100644 --- a/workspace-server/internal/handlers/secrets_test.go +++ b/workspace-server/internal/handlers/secrets_test.go @@ -979,47 +979,59 @@ func TestSetGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) { } } -// TestSetGlobal_RejectsPlatformBypassKeyOnPlatformManagedTenant is the -// molecule-core#1994 co-mingling GUARD regression. Removing the byok strip is -// only safe if the platform's own credential is never written into a tenant's -// global_secrets. SetGlobal is the in-code write boundary: on a tenant whose -// resolved LLM mode is platform_managed (the metered default), a direct -// vendor / oauth bypass-list key MUST be rejected (400) and NOT persisted — -// the tenant is supposed to route through the CP proxy, not carry a direct -// platform-shaped credential at global scope. This is what keeps a -// platform-origin token out of global_secrets going forward. +// TestSetGlobal_AllowsTenantOwnedVendorKeyDespiteLegacyOrgEnv pins the +// internal#718 correction: the org-level LLM billing rung is RETIRED (billing +// is resolved per-workspace, not per-org). A global secret is the tenant's OWN +// shared credential and is always writable at global scope; the provision-time +// provider-matched strip (workspace_provision) keeps any platform-managed +// workspace from USING a non-matching global cred, and per-workspace secret +// writes still enforce the strip-list via the per-workspace guard. So even with +// the legacy MOLECULE_LLM_BILLING_MODE env still set to platform_managed, a +// global vendor/oauth key write MUST SUCCEED (200) and persist — the retired +// org rung no longer gates it. // -// (On a byok/disabled tenant the same write is ALLOWED — that key is the -// tenant's OWN credential, which the corrected model expects at global scope. -// TestSetGlobal_AutoRestartsAffectedWorkspaces covers that allowed path.) -// -// Mutation: drop the rejectPlatformManagedDirectLLMBypass guard from SetGlobal -// → the write reaches the INSERT (no 400) → this test RED. -func TestSetGlobal_RejectsPlatformBypassKeyOnPlatformManagedTenant(t *testing.T) { - setupTestDB(t) - handler := NewSecretsHandler(nil) +// Mutation: re-add the org-level rejectPlatformManagedDirectLLMBypass guard to +// SetGlobal → the write 400s before the INSERT → this test RED. +func TestSetGlobal_AllowsTenantOwnedVendorKeyDespiteLegacyOrgEnv(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) - // Org/tenant default is platform_managed — the metered path. A direct - // vendor key write into global_secrets must be refused here. + restarted := make(chan string, 2) + handler := NewSecretsHandler(func(id string) { restarted <- id }) + + // Legacy org env still platform_managed — it must no longer gate the write. t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) + mock.ExpectExec("INSERT INTO global_secrets"). + WithArgs("CLAUDE_CODE_OAUTH_TOKEN", sqlmock.AnyArg(), sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectQuery("SELECT id FROM workspaces"). + WithArgs("CLAUDE_CODE_OAUTH_TOKEN"). + WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-a")) + w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"key":"CLAUDE_CODE_OAUTH_TOKEN","value":"sk-ant-oat01-platform-shaped"}` + body := `{"key":"CLAUDE_CODE_OAUTH_TOKEN","value":"sk-ant-oat01-tenant-own"}` c.Request = httptest.NewRequest("POST", "/admin/secrets", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") handler.SetGlobal(c) - if w.Code != http.StatusBadRequest { - t.Fatalf("expected 400 (bypass-list key rejected for platform_managed tenant), got %d: %s", w.Code, w.Body.String()) + if w.Code != http.StatusOK { + t.Fatalf("expected 200 (global write allowed; org rung retired), got %d: %s", w.Code, w.Body.String()) } - if !strings.Contains(w.Body.String(), "blocked") { - t.Errorf("response should explain the block; got %s", w.Body.String()) + // Wait on the async restart fan-out so its SELECT drains before db swap. + select { + case id := <-restarted: + if id != "ws-a" { + t.Errorf("expected ws-a restarted, got %s", id) + } + case <-time.After(2 * time.Second): + t.Fatal("auto-restart not fired for affected workspace") + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) } - // No INSERT was expected on the mock — sqlmock would error on an - // unexpected ExecContext, so reaching here with a 400 proves the write - // was refused before the DB. } // TestDeleteGlobal_AutoRestartsAffectedWorkspaces covers the delete branch of #15. -- 2.52.0