fix(workspace): fail-closed on MODEL secret DB/decrypt errors in PATCH runtime #2963

Merged
devops-engineer merged 4 commits from fix/patch-runtime-resolved-model-workspace-secrets into main 2026-06-15 20:00:28 +00:00
3 changed files with 120 additions and 39 deletions
@@ -17,6 +17,7 @@ import (
"time"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/approvals"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
@@ -279,25 +280,44 @@ func (h *WorkspaceHandler) Update(c *gin.Context) {
// create-boundary uses (workspace_crud.go create + llm_billing_mode
// resolver); mirroring it here keeps the PATCH-runtime path consistent
// and catches the drift surface CR2 found on the #21 review.
var currentModel sql.NullString
if err := db.DB.QueryRowContext(ctx,
`SELECT COALESCE(model, '') FROM workspaces WHERE id = $1`, id,
).Scan(&currentModel); err != nil {
log.Printf("Update runtime validation: read current model for %s: %v", id, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read current model for runtime compatibility check"})
// The CURRENT model lives in the MODEL workspace_secret (the SSOT that
// GET /model + the boot path use), NOT the workspaces.model column.
// Reading the column wedged this PATCH at 500 for workspaces whose
// model is only in workspace_secrets (e.g. JRS: GET /model →
// source:"workspace_secrets"). Read it the same way GetModel does.
var currentModel string
var mEnc []byte
var mVer int
mErr := db.DB.QueryRowContext(ctx,
`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1 AND key = 'MODEL'`, id,
).Scan(&mEnc, &mVer)
switch {
case mErr == nil:
if dec, derr := crypto.DecryptVersioned(mEnc, mVer); derr == nil {
currentModel = string(dec)
} else {
log.Printf("Update runtime: decrypt MODEL secret for %s failed: %v", id, derr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to decrypt model secret"})
return
}
case errors.Is(mErr, sql.ErrNoRows):
// No stored model → unresolved. Skip the strict (runtime, model)
// check; a genuinely missing model fails closed at boot, not here.
default:
log.Printf("Update runtime: read MODEL secret for %s: %v", id, mErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read model secret"})
return
}
if ok, reason := validateRegisteredModelForRuntime(runtimeStr, currentModel.String); !ok {
log.Printf("Update: PATCH runtime=%q on %s REJECTED (model=%q is not registered for that runtime): %s",
runtimeStr, id, currentModel.String, reason)
// 422 (Unprocessable Entity) matches the create-boundary's
// validateRegisteredModelForRuntime path (secrets.go:942, 952
// + workspace_crud.go create) — both reviewers flagged the
// original 400 as inconsistent. 422 = "syntactically valid
// PATCH body, but the (runtime, model) pair is unroutable
// per the registry SSOT", which is the precise semantic.
c.JSON(http.StatusUnprocessableEntity, gin.H{"error": reason})
return
// Only enforce the (runtime, model) pair when we actually HAVE a model.
if currentModel != "" {
if ok, reason := validateRegisteredModelForRuntime(runtimeStr, currentModel); !ok {
log.Printf("Update: PATCH runtime=%q on %s REJECTED (model=%q is not registered for that runtime): %s",
runtimeStr, id, currentModel, reason)
// 422 = syntactically valid PATCH body, but the (runtime, model)
// pair is unroutable per the registry SSOT.
c.JSON(http.StatusUnprocessableEntity, gin.H{"error": reason})
return
}
}
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET runtime = $2, updated_at = now() WHERE id = $1`, id, runtimeStr); err != nil {
log.Printf("Update runtime error for %s: %v", id, err)
@@ -5,6 +5,7 @@ import (
"context"
"database/sql"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"strings"
@@ -600,11 +601,11 @@ func TestUpdate_Runtime_RegisteredModelForRuntime_Passes(t *testing.T) {
mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1\)`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
// New (newRuntime, currentModel) check — model is read so we can
// pair it with the new runtime.
mock.ExpectQuery(`SELECT COALESCE\(model, ''\) FROM workspaces WHERE id = \$1`).
// New (newRuntime, currentModel) check — the RESOLVED model is read from
// the MODEL workspace_secret (the SSOT), not the workspaces.model column.
mock.ExpectQuery(`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'MODEL'`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"model"}).AddRow("moonshot/kimi-k2.6"))
WillReturnRows(sqlmock.NewRows([]string{"encrypted_value", "encryption_version"}).AddRow([]byte("moonshot/kimi-k2.6"), 0))
// The validation passes (moonshot/kimi-k2.6 is a registered model
// for claude-code in the harness's provider registry), so the
// UPDATE proceeds.
@@ -649,9 +650,9 @@ func TestUpdate_Runtime_UnroutableModel_Fails422(t *testing.T) {
// "unroutable/unknown" has no model-prefix matches in any runtime's
// native provider set, and doesn't appear on any runtime's
// ModelsForRuntime list — both checks fail.
mock.ExpectQuery(`SELECT COALESCE\(model, ''\) FROM workspaces WHERE id = \$1`).
mock.ExpectQuery(`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'MODEL'`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"model"}).AddRow("unroutable/unknown"))
WillReturnRows(sqlmock.NewRows([]string{"encrypted_value", "encryption_version"}).AddRow([]byte("unroutable/unknown"), 0))
body := map[string]interface{}{"runtime": "claude-code"}
b, _ := json.Marshal(body)
@@ -713,3 +714,68 @@ func TestUpdate_Runtime_UnknownPseudoRuntime_Fails422(t *testing.T) {
t.Errorf("unmet expectations: %v", err)
}
}
// TestUpdate_Runtime_ModelUnresolved_SkipsCheckAndProceeds pins the JRS-conversion
// fix: the compat-check reads the RESOLVED model from the MODEL workspace_secret,
// not the workspaces.model column (which wedged the PATCH at 500 for workspaces
// whose model lives only in workspace_secrets). When the MODEL secret is absent
// (sql.ErrNoRows), the strict (runtime, model) check is SKIPPED — the boot path
// fail-closes on a genuinely missing model — so the PATCH must NOT 500 and must
// proceed to update the runtime.
func TestUpdate_Runtime_ModelUnresolved_SkipsCheckAndProceeds(t *testing.T) {
wsID := "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
mock, r := setupWorkspaceCrudTest(t)
h := newWorkspaceCrudHandler(t)
r.PATCH("/workspaces/:id", h.Update)
mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1\)`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
// No MODEL secret → ErrNoRows → unresolved → strict check skipped (NOT 500).
mock.ExpectQuery(`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'MODEL'`).
WithArgs(wsID).
WillReturnError(sql.ErrNoRows)
mock.ExpectExec(`UPDATE workspaces\s+SET runtime = \$2`).
WithArgs(wsID, "claude-code").
WillReturnResult(sqlmock.NewResult(0, 1))
body := map[string]interface{}{"runtime": "claude-code"}
b, _ := json.Marshal(body)
req, _ := http.NewRequest("PATCH", "/workspaces/"+wsID, bytes.NewReader(b))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200 (unresolved model → skip check, proceed), got %d: %s", w.Code, w.Body.String())
}
}
// TestUpdate_Runtime_ModelSecretDBError_Fails500 pins that a genuine DB error
// reading the MODEL workspace_secret is fail-closed (500). Only sql.ErrNoRows
// (unresolved model) skips the strict compat-check; real DB/decrypt errors
// must not silently let an unvalidated (runtime, model) PATCH through.
func TestUpdate_Runtime_ModelSecretDBError_Fails500(t *testing.T) {
wsID := "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
mock, r := setupWorkspaceCrudTest(t)
h := newWorkspaceCrudHandler(t)
r.PATCH("/workspaces/:id", h.Update)
mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1\)`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
mock.ExpectQuery(`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'MODEL'`).
WithArgs(wsID).
WillReturnError(errors.New("database unavailable"))
body := map[string]interface{}{"runtime": "claude-code"}
b, _ := json.Marshal(body)
req, _ := http.NewRequest("PATCH", "/workspaces/"+wsID, bytes.NewReader(b))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500 (genuine DB error), got %d: %s", w.Code, w.Body.String())
}
}
@@ -958,14 +958,13 @@ func TestWorkspaceUpdate_RuntimeField(t *testing.T) {
mock.ExpectQuery("SELECT EXISTS.*workspaces WHERE id").
WithArgs("cccccccc-0006-0000-0000-000000000000").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// New (newRuntime, currentModel) compatibility check (#2926):
// the PATCH-runtime path now reads the current model + validates
// (runtime, model) against the provider registry. moonshot/kimi-k2.6
// is registered for claude-code in the harness's provider registry
// (providers.yaml:919) so the validation passes + the UPDATE proceeds.
mock.ExpectQuery(`SELECT COALESCE\(model, ''\) FROM workspaces WHERE id = \$1`).
// The PATCH-runtime compat-check reads the RESOLVED model from the
// MODEL workspace_secret (SSOT), not the workspaces.model column.
// moonshot/kimi-k2.6 is registered for claude-code in the harness's
// provider registry, so validation passes and the UPDATE proceeds.
mock.ExpectQuery(`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'MODEL'`).
WithArgs("cccccccc-0006-0000-0000-000000000000").
WillReturnRows(sqlmock.NewRows([]string{"model"}).AddRow("moonshot/kimi-k2.6"))
WillReturnRows(sqlmock.NewRows([]string{"encrypted_value", "encryption_version"}).AddRow([]byte("moonshot/kimi-k2.6"), 0))
mock.ExpectExec("UPDATE workspaces SET runtime").
WithArgs("cccccccc-0006-0000-0000-000000000000", "claude-code").
WillReturnResult(sqlmock.NewResult(0, 1))
@@ -1005,16 +1004,12 @@ func TestWorkspaceUpdate_RuntimeField_DBErrorReturnsServerError(t *testing.T) {
mock.ExpectQuery("SELECT EXISTS.*workspaces WHERE id").
WithArgs("cccccccc-0006-0000-0000-000000000001").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// New (newRuntime, currentModel) compatibility check (#2926):
// the SELECT COALESCE must succeed (the UPDATE is the one that
// errors out). moonshot/kimi-k2.6 is registered for hermes
// (providers.yaml:919) so the validation passes + the UPDATE is
// attempted, where the sqlmock is set to return the DB error.
mock.ExpectQuery(`SELECT COALESCE\(model, ''\) FROM workspaces WHERE id = \$1`).
// A genuine DB error reading the MODEL workspace_secret must fail
// closed (500). Only sql.ErrNoRows (unresolved model) is allowed to
// skip the strict compat-check; a real DB error must not silently
// let an unvalidated (runtime, model) PATCH through.
mock.ExpectQuery(`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'MODEL'`).
WithArgs("cccccccc-0006-0000-0000-000000000001").
WillReturnRows(sqlmock.NewRows([]string{"model"}).AddRow("moonshot/kimi-k2.6"))
mock.ExpectExec("UPDATE workspaces SET runtime").
WithArgs("cccccccc-0006-0000-0000-000000000001", "hermes").
WillReturnError(errors.New("database unavailable"))
w := httptest.NewRecorder()