From eacb8183c3fe66580cc4e2299a06672bbca687ad Mon Sep 17 00:00:00 2001 From: claude-ceo-assistant Date: Thu, 28 May 2026 03:21:39 +0000 Subject: [PATCH] P4 PR-2 internal#718: flip only-registered (runtime, model) gate from WARN to HARD-REJECT 422 (BEHAVIOR-AFFECTING) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WorkspaceHandler.Create now returns 422 UNREGISTERED_MODEL_FOR_RUNTIME when the provider registry knows the runtime but the (runtime, model) pair is not in its native model set. Was the P2-B WARN-mode signal (X-Molecule-Model-Unregistered header + log; create proceeds); now a hard rejection at the boundary with no DB rows touched. Behavior delta (under test): * Workspace with a REGISTERED (runtime, model) → 201, unchanged. * Workspace with an UNREGISTERED (runtime, model) → 422 with body {code:UNREGISTERED_MODEL_FOR_RUNTIME, error, runtime, model}, no DB writes (mock ExpectationsWereMet asserts zero unexpected DB calls). * Workspace with the legacy colon-form anthropic:claude-opus-4-7 for runtime=claude-code → 201 (P4 PR-1 reconciled the colon-vocab into the registry, making this a first-class registered model alongside the slash form). * Workspace with runtime NOT in the registry (langgraph/external/kimi/mock/federated) → unchanged (fails OPEN — federation-ready, the registry can not speak to non-first-party runtimes). * External workspaces (external=true or external-like runtime) → unchanged (URL is the contract, not the model). Why P4 vs P2-B: P2-B kept WARN-mode because the legacy colon-namespaced BYOK vocabulary (anthropic:claude-opus-4-7 etc.) was live across the create/import/template corpus and not yet in the registry. P4 PR-1 reconciled that vocab into the per-runtime native sets (each runtime now lists bare + slash + colon forms for the BYOK ids in the live corpus). With the reconcile landed, an unregistered pair is a real misconfiguration and the gate flips loud — the codex anthropic:claude-opus-4-7 wedge class (the MODEL_REQUIRED gate targets the same failure mode) now fails AT THE BOUNDARY instead of provisioning a workspace that will wedge at adapter init. Test surface (workspace_test.go): * TestWorkspaceCreate_718_P4_UnregisteredModelHardReject422 (NEW) — explicit 422 + body code + no DB writes * TestWorkspaceCreate_718_P4_RegisteredModelProceeds (renamed from _RegisteredModelNoWarnHeader) — 201 + no legacy WARN header * TestWorkspaceCreate_718_P4_LegacyColonVocabAccepted (NEW) — anthropic:claude-opus-4-7 on claude-code proceeds (the central regression guard for the PR-1 reconcile + PR-2 flip combo) * TestWorkspaceCreate_718_NonRegistryRuntimeFailsOpen — unchanged (federation path) Fixture updates for the flip (tests that previously used an unregistered model as a fixture for OTHER gate paths; updated to a valid model so those gates can actually fire): * TestWorkspaceCreate_WithInvalidCompute_ReturnsBadRequest — gpt-4 (no runtime owns it) → claude-opus-4-7 (so the compute-validation 400 path tests what it should) * TestWorkspaceCreate_TemplateDefaultsMissingRuntimeAndModel — hermes/nousresearch/hermes-4-70b → hermes/moonshot/kimi-k2.6 (hermes native set per the CTO matrix) * TestWorkspaceCreate_TemplateDefaultsLegacyTopLevelModel — hermes/anthropic:claude-sonnet-4-5 → hermes/moonshot/kimi-k2.5 * TestWorkspaceCreate_CallerModelOverridesTemplateDefault — hermes override minimax/MiniMax-M2.7 → moonshot/kimi-k2.5 (still tests the caller-overrides-template-default mechanic, just with a hermes-valid pair) Phase-1 falsification + Phase-2 design were established in PR-1. Phase-3 TDD: each new behavior assertion mapped to a discriminating test (422 vs 201 vs unchanged WARN-header absence). Phase-4 Five-Axis to follow in PR review. NOT regressed (verified via -short + -tags=integration -short for handlers + providers): * cp#362 anthropic passthrough (proxy layer; unaffected). * P1 proxy ResolveUpstream (registry resolution by namespace token; unaffected). * P2-B billing-derive (DeriveProvider semantics unchanged by the reconcile). * P3 templates-from-registry (GET /templates still serves ModelsForRuntime; PR-1 enlarges the set, this PR rejects calls outside it). Stacked on feat/internal-718-p4-pr1-reconcile-colon-vocab-sync (PR-1 must merge first; this PR's tests would 422 the legacy colon vocab otherwise). Refs internal#718. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/handlers/workspace.go | 55 +++++++--- .../handlers/workspace_compute_test.go | 2 +- .../internal/handlers/workspace_test.go | 101 +++++++++++++----- 3 files changed, 112 insertions(+), 46 deletions(-) diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index 93554d080..02328e03f 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -428,30 +428,51 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { return } - // internal#718 P2-B: ONLY-REGISTERED validation at the create boundary. + // internal#718 P4 PR-2: ONLY-REGISTERED validation at the create boundary — + // FLIPPED from WARN to HARD-REJECT (was the P2-B WARN-mode signal). + // // For a runtime the provider registry knows (first-party: // claude-code/codex/hermes/openclaw) this checks the (runtime, model) pair // against the registry's native model set. Fails OPEN for runtimes the // registry doesn't know (langgraph/external/kimi/mock/federated) so - // non-first-party flows are UNCHANGED. Skipped for external workspaces. + // non-first-party / federated flows are UNCHANGED. Skipped for external + // workspaces (the URL is the contract, not the model — see MODEL_REQUIRED + // rationale above). // - // P2 ENFORCEMENT MODE = WARN, not hard-reject (deliberate, scoped). The - // legacy colon-namespaced BYOK model vocabulary ("anthropic:claude-opus-4-7" - // etc.) is still live across the create/import/template corpus and is NOT - // yet reconciled into the registry's exact-id model sets — that convergence - // is P3 (canvas only-offers-registered) + P4 (template codegen). Hard- - // rejecting an unregistered (runtime, model) now would 422 those legitimate - // existing flows, a large behavior change outside P2's scope (P2's behavior - // delta is the billing/credential flip, below). So P2 surfaces the - // unregistered pair as a queryable warning + an X-Molecule-Model-Unregistered - // response header (operator/canvas signal) and lets create proceed; the gate - // flips to hard-reject (uncomment the 422 below) once P3/P4 land the - // vocabulary convergence. The registry model set is code-generated from the - // canonical providers.yaml (PR-A), so the check stays in sync with the SSOT. + // THE FLIP (was WARN, now 422): + // * P2-B carried the gate in WARN mode (X-Molecule-Model-Unregistered + // response header + log line, create proceeds) because the legacy + // colon-namespaced BYOK vocabulary ('anthropic:claude-opus-4-7' etc.) + // was live across the create corpus but not yet in the registry's + // exact-id model sets — hard-rejecting would have 422'd legitimate + // existing flows. + // * P4 PR-1 reconciled that colon vocab into the registry as + // first-class native-set entries (each runtime native set now lists + // both bare/slash AND colon forms for the BYOK ids the live corpus + // uses; openclaw's pre-existing colon-form precedent extended to + // claude-code). DeriveProvider / Manifest.ModelsForRuntime now + // resolves every legitimate model in the corpus. + // * With the reconcile landed, an unregistered (runtime, model) pair + // is a real misconfiguration — the corpus has no legitimate model + // this validator now rejects. We flip to 422 + // UNREGISTERED_MODEL_FOR_RUNTIME so the caller fails LOUDLY at the + // boundary instead of provisioning a workspace that will wedge at + // adapter init (the codex 'anthropic:claude-opus-4-7' wedge class + // the MODEL_REQUIRED gate also targets). + // + // The registry model set is code-generated from the canonical + // providers.yaml (P2-A artifact); the check stays in sync with the SSOT + // via the verify-providers-gen + sync-providers-yaml CI gates. if !isExternal { if ok, why := validateRegisteredModelForRuntime(payload.Runtime, payload.Model); !ok { - log.Printf("Create: WARN unregistered model (runtime=%q model=%q): %s [internal#718 P2 warn-mode; hard-reject gated on P3/P4 vocabulary convergence]", payload.Runtime, payload.Model, why) - c.Header("X-Molecule-Model-Unregistered", "true") + log.Printf("Create: 422 UNREGISTERED_MODEL_FOR_RUNTIME (runtime=%q model=%q): %s [internal#718 P4 PR-2 hard-reject]", payload.Runtime, payload.Model, why) + c.JSON(http.StatusUnprocessableEntity, gin.H{ + "error": why, + "runtime": payload.Runtime, + "model": payload.Model, + "code": "UNREGISTERED_MODEL_FOR_RUNTIME", + }) + return } } diff --git a/workspace-server/internal/handlers/workspace_compute_test.go b/workspace-server/internal/handlers/workspace_compute_test.go index 8a278a809..38d07113c 100644 --- a/workspace-server/internal/handlers/workspace_compute_test.go +++ b/workspace-server/internal/handlers/workspace_compute_test.go @@ -126,7 +126,7 @@ func TestWorkspaceCreate_WithInvalidCompute_ReturnsBadRequest(t *testing.T) { c, _ := gin.CreateTestContext(w) body := `{ "name":"Oversized Agent", - "model":"gpt-4", + "model":"claude-opus-4-7", "compute":{"instance_type":"p4d.24xlarge"} }` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) diff --git a/workspace-server/internal/handlers/workspace_test.go b/workspace-server/internal/handlers/workspace_test.go index 249b8496e..19f7d2934 100644 --- a/workspace-server/internal/handlers/workspace_test.go +++ b/workspace-server/internal/handlers/workspace_test.go @@ -1786,7 +1786,7 @@ func TestWorkspaceCreate_TemplateDefaultsMissingRuntimeAndModel(t *testing.T) { tier: 2 runtime: hermes runtime_config: - model: nousresearch/hermes-4-70b + model: moonshot/kimi-k2.6 `) if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil { t.Fatalf("write cfg: %v", err) @@ -1841,7 +1841,7 @@ func TestWorkspaceCreate_TemplateDefaultsLegacyTopLevelModel(t *testing.T) { cfg := []byte(`name: Legacy Agent tier: 1 runtime: hermes -model: anthropic:claude-sonnet-4-5 +model: moonshot/kimi-k2.5 `) if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil { t.Fatalf("write cfg: %v", err) @@ -1896,7 +1896,7 @@ func TestWorkspaceCreate_CallerModelOverridesTemplateDefault(t *testing.T) { } cfg := []byte(`runtime: hermes runtime_config: - model: nousresearch/hermes-4-70b + model: moonshot/kimi-k2.6 `) if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil { t.Fatalf("write cfg: %v", err) @@ -1923,7 +1923,11 @@ runtime_config: w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Custom Hermes","template":"hermes-template","model":"minimax/MiniMax-M2.7"}` + // Caller overrides with a different hermes-valid model — registry permits + // both moonshot/kimi-k2.5 and moonshot/kimi-k2.6 for hermes (P4 PR-1 native + // set). The template default would have been moonshot/kimi-k2.6; caller + // picks kimi-k2.5 explicitly to prove the override actually fires. + body := `{"name":"Custom Hermes","template":"hermes-template","model":"moonshot/kimi-k2.5"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -2047,28 +2051,19 @@ func TestWorkspaceCreate_188_NoTemplateNoRuntime_NowMODEL_REQUIRED(t *testing.T) } } -// internal#718 P2-B: only-registered validation in P2 WARN-mode. A known -// (registry) runtime with a model NOT in its registered set is allowed to -// proceed (201) but flagged with the X-Molecule-Model-Unregistered response -// header + a queryable warning log. (Hard-reject is gated on P3/P4 vocabulary -// convergence — see the create handler comment; flipping to 422 there is a -// one-line change once the legacy colon-form model vocabulary is reconciled.) -func TestWorkspaceCreate_718_UnregisteredModelWarnsButProceeds(t *testing.T) { +// internal#718 P4 PR-2: only-registered validation HARD-REJECT. A known +// (registry) runtime with a model NOT in its registered set is rejected at the +// create boundary with 422 UNREGISTERED_MODEL_FOR_RUNTIME — no DB rows touched, +// no provisioning attempt, no wedged workspace. Replaces P2-B's WARN-mode +// header. +func TestWorkspaceCreate_718_P4_UnregisteredModelHardReject422(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) broadcaster := newTestBroadcaster() handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) - mock.ExpectBegin() - mock.ExpectExec("INSERT INTO workspaces"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectCommit() - mock.ExpectExec("INSERT INTO workspace_secrets"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("INSERT INTO canvas_layouts"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("INSERT INTO structure_events"). - WillReturnResult(sqlmock.NewResult(0, 1)) + // No DB expectations: the 422 fires BEFORE BeginTx, so any unexpected + // INSERT will fail the test via ExpectationsWereMet. w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -2077,16 +2072,32 @@ func TestWorkspaceCreate_718_UnregisteredModelWarnsButProceeds(t *testing.T) { c.Request.Header.Set("Content-Type", "application/json") handler.Create(c) - if w.Code != http.StatusCreated { - t.Fatalf("unregistered-model create (warn-mode): expected 201, got %d: %s", w.Code, w.Body.String()) + if w.Code != http.StatusUnprocessableEntity { + t.Fatalf("unregistered-model create: expected 422, got %d: %s", w.Code, w.Body.String()) } - if w.Header().Get("X-Molecule-Model-Unregistered") != "true" { - t.Errorf("expected X-Molecule-Model-Unregistered=true header on unregistered model, got %q", w.Header().Get("X-Molecule-Model-Unregistered")) + if !bytes.Contains(w.Body.Bytes(), []byte(`"code":"UNREGISTERED_MODEL_FOR_RUNTIME"`)) { + t.Errorf("expected code=UNREGISTERED_MODEL_FOR_RUNTIME in 422 body, got %s", w.Body.String()) + } + if !bytes.Contains(w.Body.Bytes(), []byte(`"runtime":"claude-code"`)) { + t.Errorf("expected runtime=claude-code echoed in 422 body, got %s", w.Body.String()) + } + if !bytes.Contains(w.Body.Bytes(), []byte(`"model":"totally-made-up-xyz"`)) { + t.Errorf("expected model echoed in 422 body, got %s", w.Body.String()) + } + // The legacy WARN header must NOT fire — there is no "proceeded with + // warning" path anymore. + if w.Header().Get("X-Molecule-Model-Unregistered") != "" { + t.Errorf("P4 hard-reject must not emit the legacy WARN header, got %q", w.Header().Get("X-Molecule-Model-Unregistered")) + } + + // Strict mock check: no DB ops should have happened. + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unexpected DB activity on hard-reject path: %v", err) } } -// A REGISTERED model on a registry runtime sets NO unregistered header. -func TestWorkspaceCreate_718_RegisteredModelNoWarnHeader(t *testing.T) { +// A REGISTERED model on a registry runtime proceeds with 201 and no unregistered header. +func TestWorkspaceCreate_718_P4_RegisteredModelProceeds(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) broadcaster := newTestBroadcaster() @@ -2115,7 +2126,41 @@ func TestWorkspaceCreate_718_RegisteredModelNoWarnHeader(t *testing.T) { t.Fatalf("registered-model create: expected 201, got %d: %s", w.Code, w.Body.String()) } if w.Header().Get("X-Molecule-Model-Unregistered") != "" { - t.Errorf("registered model must NOT set the unregistered header, got %q", w.Header().Get("X-Molecule-Model-Unregistered")) + t.Errorf("registered model must NOT set the legacy unregistered header, got %q", w.Header().Get("X-Molecule-Model-Unregistered")) + } +} + +// internal#718 P4 PR-2: the legacy colon-namespaced BYOK vocabulary +// 'anthropic:claude-opus-4-7' is now a FIRST-CLASS registered claude-code model +// (P4 PR-1 reconciled the colon-vocab into the registry). The hard-reject must +// NOT 422 this legitimate live-corpus form — verifying the reconcile + flip work +// together. This is the canonical regression guard for the colon-vocab path. +func TestWorkspaceCreate_718_P4_LegacyColonVocabAccepted(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + mock.ExpectBegin() + mock.ExpectExec("INSERT INTO workspaces"). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectCommit() + mock.ExpectExec("INSERT INTO workspace_secrets"). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec("INSERT INTO canvas_layouts"). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec("INSERT INTO structure_events"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + body := `{"name":"Legacy Colon","runtime":"claude-code","model":"anthropic:claude-opus-4-7"}` + c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + handler.Create(c) + + if w.Code != http.StatusCreated { + t.Fatalf("legacy colon-form create (P4 PR-1 reconciled): expected 201, got %d: %s", w.Code, w.Body.String()) } } -- 2.52.0