diff --git a/workspace-server/internal/handlers/byok_credential_gate.go b/workspace-server/internal/handlers/byok_credential_gate.go index 70b04828..166b6709 100644 --- a/workspace-server/internal/handlers/byok_credential_gate.go +++ b/workspace-server/internal/handlers/byok_credential_gate.go @@ -55,6 +55,14 @@ import ( // // Keeping the rule here — not duplicated in two call sites — is the whole point: // "what counts as a usable BYOK credential" is defined once. +// +// NOTE (RC 12082): the provider-AWARE predicate anyBYOKCredentialKeyMatchesProvider +// is the REAL fail-closed check — a key in platformManagedDirectLLMBypassKeys +// that does NOT match the resolved provider's auth_env is a stray (e.g. an +// OPENAI_API_KEY in a claude-code+anthropic workspace). this predicate remains +// the cheap filter for callers that have not yet derived the provider; callers +// that HAVE should prefer anyBYOKCredentialKeyMatchesProvider to avoid the +// provider-mismatch bypass. func anyBYOKCredentialKeyPresent(presentKeys map[string]struct{}) bool { for key := range presentKeys { if _, ok := platformManagedDirectLLMBypassKeys[strings.ToUpper(strings.TrimSpace(key))]; ok { @@ -64,6 +72,47 @@ func anyBYOKCredentialKeyPresent(presentKeys map[string]struct{}) bool { return false } +// anyBYOKCredentialKeyMatchesProvider is the provider-aware presence predicate +// (RC 12082 — fixes the create-time and provision-time provider-mismatch +// bypass). It reports whether presentKeys contains at least one +// platform-managed-shaped LLM bypass key that ALSO matches the resolved +// provider's auth_env set — i.e. a credential the provider would actually +// authenticate with. +// +// The shared global bypass set (platformManagedDirectLLMBypassKeys) is by +// design over-broad: every known LLM vendor key name lives there so the +// canvas Secrets tab can write ANY vendor's key without a registry check. +// That same breadth is the bypass: a claude-code + anthropic-model workspace +// whose secrets hold only OPENAI_API_KEY satisfies the cheap presence +// predicate (the key IS in the global set), but the resolved provider +// (anthropic-api) would never accept it as a credential, so the create-time +// gate silently passes on a credential the runtime will reject. The fix: +// AFTER deriving the provider, require at least one present key to be in the +// provider's auth_env set. That makes the cheap global filter a pre-filter +// (still useful for fast reject of "no LLM keys at all") and the provider +// match the actual fail-closed gate. +func anyBYOKCredentialKeyMatchesProvider(provider providers.Provider, presentKeys map[string]struct{}) bool { + // Build the accepted-key set from the provider's auth_env (case-insensitive). + accepted := make(map[string]struct{}, len(provider.AuthEnv)) + for _, e := range provider.AuthEnv { + accepted[strings.ToUpper(strings.TrimSpace(e))] = struct{}{} + } + for key := range presentKeys { + upper := strings.ToUpper(strings.TrimSpace(key)) + if _, ok := accepted[upper]; !ok { + continue + } + // Must also be a recognized bypass shape (catches operator-store names + // that the provider accepts but that are NOT LLM keys — e.g. an + // anthropic-protocol non-LLM secret). + if _, ok := platformManagedDirectLLMBypassKeys[upper]; !ok { + continue + } + return true + } + return false +} + // gatherCreateTimeLLMCredKeys collects the set of credential env-var NAMES that // will be available to the workspace at provision time, as known at CREATE: // @@ -194,12 +243,17 @@ func (h *WorkspaceHandler) evaluateCreateBYOKCredentialGate( // A non-platform (BYOK) provider was derived. Enforce the BYOK contract. - // GAP A — PRESENCE. Reuse the EXACT predicate the provision-time check keys - // off (anyBYOKCredentialKeyPresent) so create and provision can never - // disagree on "what is a usable BYOK credential." - if !anyBYOKCredentialKeyPresent(availableCredKeys) { + // GAP A — PRESENCE (provider-aware). RC 12082: the global bypass-set + // predicate is a pre-filter only — the real fail-closed check is + // provider-matched. A present key that is NOT in the resolved + // provider's auth_env (e.g. OPENAI_API_KEY in a claude-code+anthropic + // workspace) is a stray that the runtime cannot authenticate with; the + // gate MUST NOT let such a create through. anyBYOKCredentialKeyMatchesProvider + // shares the underlying bypass-set lookup with the provision-time check + // (hasAnyPlatformManagedLLMKey below) so the two gates can never disagree. + if !anyBYOKCredentialKeyMatchesProvider(provider, availableCredKeys) { msg := formatMissingBYOKCredentialError(LLMBillingModeBYOK) - log.Printf("Create: FAIL-CLOSED (MISSING_BYOK_CREDENTIAL) — runtime=%q model=%q resolved=byok provider=%q but no usable LLM credential present at any scope", + log.Printf("Create: FAIL-CLOSED (MISSING_BYOK_CREDENTIAL) — runtime=%q model=%q resolved=byok provider=%q but no usable LLM credential for THIS provider present at any scope", runtime, model, provider.Name) return createBYOKGateResult{ OK: false, diff --git a/workspace-server/internal/handlers/workspace_test.go b/workspace-server/internal/handlers/workspace_test.go index 32526be2..a748f9fb 100644 --- a/workspace-server/internal/handlers/workspace_test.go +++ b/workspace-server/internal/handlers/workspace_test.go @@ -508,53 +508,44 @@ func TestWorkspaceCreate_WithSecrets_Persists(t *testing.T) { } } -// TestWorkspaceCreate_SecretPersistFails_RollsBack asserts that a DB error -// while persisting a secret causes the entire transaction to roll back and -// the handler to return 500. The workspace row must NOT be committed. -func TestWorkspaceCreate_SecretPersistFails_RollsBack(t *testing.T) { - // internal#718 P2-B: this test asserts the rollback path on DB failure, not - // the strip gate. The create-time secret gate keys off the DERIVED mode now - // (org rung retired). An explicit byok override makes the workspace byok in a - // single resolver read (precedence-1 short-circuit), so the OPENAI_API_KEY - // write is allowed and reaches the INSERT-and-fail path this test exercises. - t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed") // org env ignored now +// TestWorkspaceCreate_BYOK_ProviderMismatch_Rejected422 pins the RC 12082 fix: +// an anthropic-derived model + only OPENAI_API_KEY (not in anthropic-api's +// auth_env) MUST be rejected synchronously with 422 MISSING_BYOK_CREDENTIAL, +// NOT reach the secret-persist step. The pre-fix bug: the cheap global +// bypass-set filter accepted OPENAI_API_KEY as presence even though the +// derived provider (anthropic-api) would never authenticate with it; the +// liveness gate silently skipped because no provider-auth-env key was +// readable; the workspace was 201'd credential-less and died at provision. +// +// This test exercises that exact provider-mismatch path and asserts the +// create-time BYOK gate catches it BEFORE any DB write. +func TestWorkspaceCreate_BYOK_ProviderMismatch_Rejected422(t *testing.T) { + // The create-time BYOK gate runs first. anthropic:claude-opus-4-7 derives to + // anthropic-api (BYOK). Only OPENAI_API_KEY is in the payload — but it is + // NOT in anthropic-api's auth_env. The provider-aware presence predicate + // (anyBYOKCredentialKeyMatchesProvider) rejects this with 422 BEFORE any DB + // write. The global presence query precedes the reject. + t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed") mock := setupTestDB(t) setupTestRedis(t) broadcaster := newTestBroadcaster() handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) - // The create-time BYOK gate runs first: anthropic:claude-opus-4-7 derives to - // anthropic-api (BYOK). Presence is satisfied by the payload OPENAI_API_KEY - // (a recognized LLM bypass key) — the liveness probe is skipped because - // OPENAI_API_KEY is not in anthropic-api's auth_env, so no value is read for - // the derived provider. Only the global presence query precedes BeginTx. - expectGlobalSecretsPresenceQuery(mock /* no global keys */) - - mock.ExpectBegin() - mock.ExpectExec("INSERT INTO workspaces"). - WillReturnResult(sqlmock.NewResult(0, 1)) - // Create() resolves billing mode per-workspace before the secret-strip gate. - // An explicit byok override short-circuits the resolver (precedence 1) so the - // OPENAI_API_KEY write is allowed and reaches the INSERT-and-fail path. - mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). - WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK)) - mock.ExpectExec("INSERT INTO workspace_secrets"). - WillReturnError(sql.ErrConnDone) // DB failure while writing secret - mock.ExpectRollback() // workspace insert must be rolled back + expectGlobalSecretsPresenceQuery(mock) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Rollback Agent","model":"anthropic:claude-opus-4-7","secrets":{"OPENAI_API_KEY":"sk-fail"}}` + body := `{"name":"Mismatch Agent","model":"anthropic:claude-opus-4-7","secrets":{"OPENAI_API_KEY":"sk-stray"}}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") handler.Create(c) - if w.Code != http.StatusInternalServerError { - t.Errorf("expected status 500, got %d: %s", w.Code, w.Body.String()) + if w.Code != http.StatusUnprocessableEntity { + t.Errorf("expected 422 (MISSING_BYOK_CREDENTIAL — provider mismatch), got %d: %s", w.Code, w.Body.String()) } - + // No DB writes should have happened — the BYOK gate fires BEFORE BeginTx. if err := mock.ExpectationsWereMet(); err != nil { t.Errorf("unmet sqlmock expectations: %v", err) }