P4 PR-2 internal#718: flip only-registered (runtime, model) gate from WARN to HARD-REJECT 422 (BEHAVIOR-AFFECTING) #1981

Merged
hongming merged 1 commits from feat/internal-718-p4-pr2-hard-reject-unregistered into main 2026-05-28 04:19:18 +00:00
3 changed files with 112 additions and 46 deletions
+38 -17
View File
@@ -428,30 +428,51 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
return
}
// internal#718 P2-B: ONLY-REGISTERED validation at the create boundary.
// internal#718 P4 PR-2: ONLY-REGISTERED validation at the create boundary
// FLIPPED from WARN to HARD-REJECT (was the P2-B WARN-mode signal).
//
// For a runtime the provider registry knows (first-party:
// claude-code/codex/hermes/openclaw) this checks the (runtime, model) pair
// against the registry's native model set. Fails OPEN for runtimes the
// registry doesn't know (langgraph/external/kimi/mock/federated) so
// non-first-party flows are UNCHANGED. Skipped for external workspaces.
// non-first-party / federated flows are UNCHANGED. Skipped for external
// workspaces (the URL is the contract, not the model — see MODEL_REQUIRED
// rationale above).
//
// P2 ENFORCEMENT MODE = WARN, not hard-reject (deliberate, scoped). The
// legacy colon-namespaced BYOK model vocabulary ("anthropic:claude-opus-4-7"
// etc.) is still live across the create/import/template corpus and is NOT
// yet reconciled into the registry's exact-id model sets — that convergence
// is P3 (canvas only-offers-registered) + P4 (template codegen). Hard-
// rejecting an unregistered (runtime, model) now would 422 those legitimate
// existing flows, a large behavior change outside P2's scope (P2's behavior
// delta is the billing/credential flip, below). So P2 surfaces the
// unregistered pair as a queryable warning + an X-Molecule-Model-Unregistered
// response header (operator/canvas signal) and lets create proceed; the gate
// flips to hard-reject (uncomment the 422 below) once P3/P4 land the
// vocabulary convergence. The registry model set is code-generated from the
// canonical providers.yaml (PR-A), so the check stays in sync with the SSOT.
// THE FLIP (was WARN, now 422):
// * P2-B carried the gate in WARN mode (X-Molecule-Model-Unregistered
// response header + log line, create proceeds) because the legacy
// colon-namespaced BYOK vocabulary ('anthropic:claude-opus-4-7' etc.)
// was live across the create corpus but not yet in the registry's
// exact-id model sets — hard-rejecting would have 422'd legitimate
// existing flows.
// * P4 PR-1 reconciled that colon vocab into the registry as
// first-class native-set entries (each runtime native set now lists
// both bare/slash AND colon forms for the BYOK ids the live corpus
// uses; openclaw's pre-existing colon-form precedent extended to
// claude-code). DeriveProvider / Manifest.ModelsForRuntime now
// resolves every legitimate model in the corpus.
// * With the reconcile landed, an unregistered (runtime, model) pair
// is a real misconfiguration — the corpus has no legitimate model
// this validator now rejects. We flip to 422
// UNREGISTERED_MODEL_FOR_RUNTIME so the caller fails LOUDLY at the
// boundary instead of provisioning a workspace that will wedge at
// adapter init (the codex 'anthropic:claude-opus-4-7' wedge class
// the MODEL_REQUIRED gate also targets).
//
// The registry model set is code-generated from the canonical
// providers.yaml (P2-A artifact); the check stays in sync with the SSOT
// via the verify-providers-gen + sync-providers-yaml CI gates.
if !isExternal {
if ok, why := validateRegisteredModelForRuntime(payload.Runtime, payload.Model); !ok {
log.Printf("Create: WARN unregistered model (runtime=%q model=%q): %s [internal#718 P2 warn-mode; hard-reject gated on P3/P4 vocabulary convergence]", payload.Runtime, payload.Model, why)
c.Header("X-Molecule-Model-Unregistered", "true")
log.Printf("Create: 422 UNREGISTERED_MODEL_FOR_RUNTIME (runtime=%q model=%q): %s [internal#718 P4 PR-2 hard-reject]", payload.Runtime, payload.Model, why)
c.JSON(http.StatusUnprocessableEntity, gin.H{
"error": why,
"runtime": payload.Runtime,
"model": payload.Model,
"code": "UNREGISTERED_MODEL_FOR_RUNTIME",
})
return
}
}
@@ -126,7 +126,7 @@ func TestWorkspaceCreate_WithInvalidCompute_ReturnsBadRequest(t *testing.T) {
c, _ := gin.CreateTestContext(w)
body := `{
"name":"Oversized Agent",
"model":"gpt-4",
"model":"claude-opus-4-7",
"compute":{"instance_type":"p4d.24xlarge"}
}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
@@ -1786,7 +1786,7 @@ func TestWorkspaceCreate_TemplateDefaultsMissingRuntimeAndModel(t *testing.T) {
tier: 2
runtime: hermes
runtime_config:
model: nousresearch/hermes-4-70b
model: moonshot/kimi-k2.6
`)
if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil {
t.Fatalf("write cfg: %v", err)
@@ -1841,7 +1841,7 @@ func TestWorkspaceCreate_TemplateDefaultsLegacyTopLevelModel(t *testing.T) {
cfg := []byte(`name: Legacy Agent
tier: 1
runtime: hermes
model: anthropic:claude-sonnet-4-5
model: moonshot/kimi-k2.5
`)
if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil {
t.Fatalf("write cfg: %v", err)
@@ -1896,7 +1896,7 @@ func TestWorkspaceCreate_CallerModelOverridesTemplateDefault(t *testing.T) {
}
cfg := []byte(`runtime: hermes
runtime_config:
model: nousresearch/hermes-4-70b
model: moonshot/kimi-k2.6
`)
if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil {
t.Fatalf("write cfg: %v", err)
@@ -1923,7 +1923,11 @@ runtime_config:
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"Custom Hermes","template":"hermes-template","model":"minimax/MiniMax-M2.7"}`
// Caller overrides with a different hermes-valid model — registry permits
// both moonshot/kimi-k2.5 and moonshot/kimi-k2.6 for hermes (P4 PR-1 native
// set). The template default would have been moonshot/kimi-k2.6; caller
// picks kimi-k2.5 explicitly to prove the override actually fires.
body := `{"name":"Custom Hermes","template":"hermes-template","model":"moonshot/kimi-k2.5"}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
@@ -2047,28 +2051,19 @@ func TestWorkspaceCreate_188_NoTemplateNoRuntime_NowMODEL_REQUIRED(t *testing.T)
}
}
// internal#718 P2-B: only-registered validation in P2 WARN-mode. A known
// (registry) runtime with a model NOT in its registered set is allowed to
// proceed (201) but flagged with the X-Molecule-Model-Unregistered response
// header + a queryable warning log. (Hard-reject is gated on P3/P4 vocabulary
// convergence — see the create handler comment; flipping to 422 there is a
// one-line change once the legacy colon-form model vocabulary is reconciled.)
func TestWorkspaceCreate_718_UnregisteredModelWarnsButProceeds(t *testing.T) {
// internal#718 P4 PR-2: only-registered validation HARD-REJECT. A known
// (registry) runtime with a model NOT in its registered set is rejected at the
// create boundary with 422 UNREGISTERED_MODEL_FOR_RUNTIME — no DB rows touched,
// no provisioning attempt, no wedged workspace. Replaces P2-B's WARN-mode
// header.
func TestWorkspaceCreate_718_P4_UnregisteredModelHardReject422(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO workspace_secrets").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO canvas_layouts").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
// No DB expectations: the 422 fires BEFORE BeginTx, so any unexpected
// INSERT will fail the test via ExpectationsWereMet.
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -2077,16 +2072,32 @@ func TestWorkspaceCreate_718_UnregisteredModelWarnsButProceeds(t *testing.T) {
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusCreated {
t.Fatalf("unregistered-model create (warn-mode): expected 201, got %d: %s", w.Code, w.Body.String())
if w.Code != http.StatusUnprocessableEntity {
t.Fatalf("unregistered-model create: expected 422, got %d: %s", w.Code, w.Body.String())
}
if w.Header().Get("X-Molecule-Model-Unregistered") != "true" {
t.Errorf("expected X-Molecule-Model-Unregistered=true header on unregistered model, got %q", w.Header().Get("X-Molecule-Model-Unregistered"))
if !bytes.Contains(w.Body.Bytes(), []byte(`"code":"UNREGISTERED_MODEL_FOR_RUNTIME"`)) {
t.Errorf("expected code=UNREGISTERED_MODEL_FOR_RUNTIME in 422 body, got %s", w.Body.String())
}
if !bytes.Contains(w.Body.Bytes(), []byte(`"runtime":"claude-code"`)) {
t.Errorf("expected runtime=claude-code echoed in 422 body, got %s", w.Body.String())
}
if !bytes.Contains(w.Body.Bytes(), []byte(`"model":"totally-made-up-xyz"`)) {
t.Errorf("expected model echoed in 422 body, got %s", w.Body.String())
}
// The legacy WARN header must NOT fire — there is no "proceeded with
// warning" path anymore.
if w.Header().Get("X-Molecule-Model-Unregistered") != "" {
t.Errorf("P4 hard-reject must not emit the legacy WARN header, got %q", w.Header().Get("X-Molecule-Model-Unregistered"))
}
// Strict mock check: no DB ops should have happened.
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unexpected DB activity on hard-reject path: %v", err)
}
}
// A REGISTERED model on a registry runtime sets NO unregistered header.
func TestWorkspaceCreate_718_RegisteredModelNoWarnHeader(t *testing.T) {
// A REGISTERED model on a registry runtime proceeds with 201 and no unregistered header.
func TestWorkspaceCreate_718_P4_RegisteredModelProceeds(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
@@ -2115,7 +2126,41 @@ func TestWorkspaceCreate_718_RegisteredModelNoWarnHeader(t *testing.T) {
t.Fatalf("registered-model create: expected 201, got %d: %s", w.Code, w.Body.String())
}
if w.Header().Get("X-Molecule-Model-Unregistered") != "" {
t.Errorf("registered model must NOT set the unregistered header, got %q", w.Header().Get("X-Molecule-Model-Unregistered"))
t.Errorf("registered model must NOT set the legacy unregistered header, got %q", w.Header().Get("X-Molecule-Model-Unregistered"))
}
}
// internal#718 P4 PR-2: the legacy colon-namespaced BYOK vocabulary
// 'anthropic:claude-opus-4-7' is now a FIRST-CLASS registered claude-code model
// (P4 PR-1 reconciled the colon-vocab into the registry). The hard-reject must
// NOT 422 this legitimate live-corpus form — verifying the reconcile + flip work
// together. This is the canonical regression guard for the colon-vocab path.
func TestWorkspaceCreate_718_P4_LegacyColonVocabAccepted(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO workspace_secrets").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO canvas_layouts").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"Legacy Colon","runtime":"claude-code","model":"anthropic:claude-opus-4-7"}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusCreated {
t.Fatalf("legacy colon-form create (P4 PR-1 reconciled): expected 201, got %d: %s", w.Code, w.Body.String())
}
}