fix(core#3162): fail-CLOSED on readStoredProviderSecret decrypt/read error #3165
@@ -24,6 +24,7 @@ package handlers
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
@@ -492,7 +493,18 @@ const conciergePlatformMCPCreateWorkspaceTool = "mcp__molecule-platform__create_
|
||||
func (h *WorkspaceHandler) ensureConciergeProvider(ctx context.Context, workspaceID string, envVars map[string]string) {
|
||||
// Respect an explicit provider already set (customer canvas pick or a prior
|
||||
// seed): loadWorkspaceSecrets already injected it into envVars. Do nothing.
|
||||
if existing := readStoredProviderSecret(ctx, workspaceID); existing != "" {
|
||||
//
|
||||
// Fail-CLOSED on decrypt/read error (core#3162): if the secret store returned
|
||||
// an error rather than a clean "" (the row exists but the value is unreadable
|
||||
// — transient DB blip, key-rotation drift, ciphertext corruption), we MUST NOT
|
||||
// fall through to the platform-provider pin below. Doing so would wedge a
|
||||
// transient decrypt failure into a fail-OPEN platform mis-pin on the next
|
||||
// provision — combined with a momentarily-empty MODEL, that could silently
|
||||
// route a BYOK/self-host concierge through the platform LLM proxy.
|
||||
if existing, readErr := readStoredProviderSecret(ctx, workspaceID); readErr != nil {
|
||||
log.Printf("Provisioner: concierge %s LLM_PROVIDER read failed (failing closed, NOT seeding): %v", workspaceID, readErr)
|
||||
return
|
||||
} else if existing != "" {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -524,22 +536,47 @@ func (h *WorkspaceHandler) ensureConciergeProvider(ctx context.Context, workspac
|
||||
}
|
||||
}
|
||||
|
||||
// readStoredProviderSecret returns the decrypted LLM_PROVIDER workspace_secret,
|
||||
// or "" when none is stored (or on any read/decrypt error — treated as "unset"
|
||||
// so a transient miss re-seeds rather than wedges). Mirrors readStoredModelSecret.
|
||||
func readStoredProviderSecret(ctx context.Context, workspaceID string) string {
|
||||
// readStoredProviderSecret returns the decrypted LLM_PROVIDER workspace_secret.
|
||||
// The second return value distinguishes the three observed states so the caller
|
||||
// can fail closed rather than fail open on a transient error:
|
||||
//
|
||||
// - (value, nil): a secret is stored and decrypted successfully → caller
|
||||
// respects the existing provider pin and skips the seed.
|
||||
// - ("", nil): no row exists for this workspace/key → caller may re-seed
|
||||
// safely (this is the fresh-boot / cleared-secret case).
|
||||
// - ("", error): the row exists (or the read otherwise succeeded) but the
|
||||
// decryption failed → caller MUST NOT treat this as "unset" and MUST NOT
|
||||
// fall back to seeding the platform provider. Returning "" without the
|
||||
// error used to wedge a transient decrypt failure into a fail-OPEN
|
||||
// platform-pin (see core#3162): combined with a momentarily-empty MODEL,
|
||||
// a BYOK/self-host concierge could be silently mis-routed onto the
|
||||
// platform LLM proxy on the next provision.
|
||||
//
|
||||
// `sql.ErrNoRows` is the canonical "no row" case and is mapped to ("", nil).
|
||||
// Any other Scan error or a DecryptVersioned error is treated as the
|
||||
// error-case and returned as ("", err).
|
||||
//
|
||||
// NOTE: this fix is scoped to the BYOK fail-open path (core#3162).
|
||||
// `readStoredModelSecret` has the same shape but is intentionally out of scope
|
||||
// per the issue body and PM's scope discipline (one item, don't bundle).
|
||||
// Tracking separately.
|
||||
func readStoredProviderSecret(ctx context.Context, workspaceID string) (string, error) {
|
||||
var stored []byte
|
||||
var version int
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1 AND key = 'LLM_PROVIDER'`,
|
||||
workspaceID).Scan(&stored, &version); err != nil {
|
||||
return ""
|
||||
}
|
||||
dec, err := crypto.DecryptVersioned(stored, version)
|
||||
workspaceID).Scan(&stored, &version)
|
||||
if err != nil {
|
||||
return ""
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return "", nil
|
||||
}
|
||||
return "", fmt.Errorf("readStoredProviderSecret scan: %w", err)
|
||||
}
|
||||
return string(dec)
|
||||
dec, derr := crypto.DecryptVersioned(stored, version)
|
||||
if derr != nil {
|
||||
return "", fmt.Errorf("readStoredProviderSecret decrypt: %w", derr)
|
||||
}
|
||||
return string(dec), nil
|
||||
}
|
||||
|
||||
// setProviderSecret persists (or clears, when provider == "") the LLM_PROVIDER
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os/exec"
|
||||
@@ -1039,3 +1040,103 @@ func TestRecordDeclaredPlugin_PrivilegedPluginEntitlement(t *testing.T) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestEnsureConciergeProvider_FailsClosedOnReadError is the CI regression gate
|
||||
// for core#3162 (BYOK fail-open): a transient decrypt/read error on an EXISTING
|
||||
// LLM_PROVIDER row used to be collapsed into "" and treated as "unset", which
|
||||
// combined with a momentarily-empty MODEL could silently mis-pin a BYOK/self-host
|
||||
// concierge onto the platform LLM proxy. The fix returns the error to the
|
||||
// caller; ensureConciergeProvider MUST fail closed (return without seeding) so
|
||||
// the next provision re-tries rather than silently mis-routing.
|
||||
func TestEnsureConciergeProvider_FailsClosedOnReadError(t *testing.T) {
|
||||
h := &WorkspaceHandler{}
|
||||
const providerSelQuery = `SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'LLM_PROVIDER'`
|
||||
const secretInsert = `INSERT INTO workspace_secrets`
|
||||
|
||||
t.Run("decrypt error on existing row fails closed (does NOT seed platform)", func(t *testing.T) {
|
||||
// Real encrypted_value bytes that cannot be decrypted by the current
|
||||
// key/algorithm: forces crypto.DecryptVersioned to return an error.
|
||||
// This is the realistic "the row exists but the ciphertext is unreadable"
|
||||
// case — exactly the failure mode that previously fell through to a
|
||||
// fail-OPEN platform pin.
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(providerSelQuery).WithArgs("ws-decrypt-fail").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"encrypted_value", "encryption_version"}).
|
||||
AddRow([]byte("corrupt-ciphertext-that-cannot-be-decrypted"), 0))
|
||||
// NO ExpectExec: the fail-closed path MUST NOT persist LLM_PROVIDER.
|
||||
|
||||
env := map[string]string{} // empty MODEL — the mis-pin window
|
||||
h.ensureConciergeProvider(context.Background(), "ws-decrypt-fail", env)
|
||||
|
||||
if _, pinned := env["LLM_PROVIDER"]; pinned {
|
||||
t.Errorf("transient decrypt error caused a platform provider pin (env=%v) — would mis-route a BYOK/self-host concierge", env)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations (a pin INSERT means the fail-closed path leaked): %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("db scan error (non-ErrNoRows) fails closed", func(t *testing.T) {
|
||||
// A connection error / context-cancellation on the secret lookup. Not
|
||||
// the clean "row doesn't exist" case (sql.ErrNoRows) — this is a real
|
||||
// failure that must also fail closed.
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(providerSelQuery).WithArgs("ws-scan-fail").
|
||||
WillReturnError(fmt.Errorf("connection refused"))
|
||||
// NO ExpectExec: fail closed.
|
||||
|
||||
env := map[string]string{}
|
||||
h.ensureConciergeProvider(context.Background(), "ws-scan-fail", env)
|
||||
|
||||
if _, pinned := env["LLM_PROVIDER"]; pinned {
|
||||
t.Errorf("DB scan error caused a platform provider pin (env=%v)", env)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("sql.ErrNoRows (genuine unset) proceeds to seed (no regression)", func(t *testing.T) {
|
||||
// The clean "no row exists" case MUST still let the caller proceed to
|
||||
// the platform-provider seed. This is the fresh-boot / cleared-secret
|
||||
// case the existing happy-path tests cover; we re-pin it here so a
|
||||
// future refactor of the error-path can break it loudly.
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(providerSelQuery).WithArgs("ws-fresh-unset").
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
mock.ExpectExec(secretInsert).
|
||||
WithArgs("ws-fresh-unset", sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
env := map[string]string{} // empty MODEL → seed path
|
||||
h.ensureConciergeProvider(context.Background(), "ws-fresh-unset", env)
|
||||
|
||||
if env["LLM_PROVIDER"] != conciergeProvider {
|
||||
t.Errorf("genuine unset did not pin LLM_PROVIDER=%q; got %q (env=%v) — regression on the seed path", conciergeProvider, env["LLM_PROVIDER"], env)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations (the seed path did not persist): %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("existing stored provider is respected on successful read (no regression)", func(t *testing.T) {
|
||||
// Existing happy path: a stored provider row reads cleanly → caller
|
||||
// returns early without pinning. Pinned here as the regression
|
||||
// sentinel for the successful-read branch.
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(providerSelQuery).WithArgs("ws-existing-prov").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"encrypted_value", "encryption_version"}).
|
||||
AddRow([]byte("customer-picked-byok-provider"), 0))
|
||||
// NO ExpectExec: existing provider wins → no pin.
|
||||
|
||||
env := map[string]string{}
|
||||
h.ensureConciergeProvider(context.Background(), "ws-existing-prov", env)
|
||||
|
||||
if _, pinned := env["LLM_PROVIDER"]; pinned {
|
||||
t.Errorf("existing stored provider wrongly pinned platform (env=%v) — would override the customer pick", env)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user