From f8e031a9710cf16454a22a02dc2bf041565631c5 Mon Sep 17 00:00:00 2001 From: cp-be Date: Thu, 21 May 2026 21:05:24 -0700 Subject: [PATCH 1/4] feat(workspace-server): kill DefaultModel + require model at Create (CTO 2026-05-22 SSOT) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Empirical trigger: Code Reviewer 5ba15d7e was created with `{"name":"Code Reviewer","runtime":"codex","tier":4,...}` — no model field. The legacy `models.DefaultModel("codex")` returned `"anthropic:claude-opus-4-7"`. Codex adapter only supports openai-* providers, so the workspace wedged forever in `not_configured` with: ValueError: codex adapter: workspace config picks provider='anthropic' but it is not in the providers registry PATCH /workspaces/:id explicitly disallows updating `model` (workspace_crud.go:138 — `model not patchable`), so the only recovery path was SQL UPDATE or delete+recreate. CTO 2026-05-22T03:42Z SSOT directive: model is REQUIRED user input at create time. The platform must not provide a default. The runtime must not fall back. The decision belongs to the user (or to the agent acting on the user's behalf), never to the platform. Soft fallbacks hide contract bugs — this is the canonical example. Changes: - DELETE `models.DefaultModel` (runtime_defaults.go). The function is the bug magnet — every callsite that previously fell back to it now fails closed. - `handlers.WorkspaceHandler.Create` (workspace.go): add MODEL_REQUIRED gate AFTER template-resolution and AFTER the runtime-default-to-langgraph path. Caller gets 422 with `code=MODEL_REQUIRED` and a hint message pointing at the contract. Same fail-closed shape as the existing controlplane#188 runtime-unresolved gate. - `ensureDefaultConfig` (workspace_provision.go): drop the DefaultModel call. If we ever reach this path with empty model, log loudly and let the workspace boot into not_configured — defence-in-depth assertion; the Create gate should have rejected the request upstream. - `org_import.go createWorkspaceTree`: drop the DefaultModel call. Org templates MUST declare `model:` per-workspace or under `defaults:` — return an error otherwise. Stops malformed templates from silently provisioning runtime-incompatible workspaces. - Tests: add `TestCreate_ModelRequired_Returns422` covering the three trigger shapes (bare name; explicit codex+no-model; explicit hermes+no-model). Update `TestWorkspaceCreate` to pass an explicit model (was a bare-defaults 201 test — now requires explicit model to express the new contract). Reduce `runtime_defaults_test.go` to a doc-comment file (function doesn't exist anymore; compile-time check is the contract). Known blast radius — DELIBERATELY NOT FIXED IN THIS COMMIT, surfaced for triage: The CTO directive inverts the prior contract; ~20 existing tests pin the OLD behaviour and now fail. They fall into two buckets: (a) Mechanical: add `"model":"..."` to request body. These tests exercise unrelated invariants (parent_id, SSRF, secrets, tier, runtime-label preservation, etc.) and would still pass once they supply an explicit model. ~15 tests in handlers_additional_test.go, registry_test.go, handlers_test.go, etc. (b) Semantic: these tests pinned the OLD contract directly and need rewriting or deletion: - `TestEnsureDefaultConfig_Hermes` / `_ClaudeCode` / `_EmptyRuntimeDefaultsToClaudeCode` — assert the YAML output contains the DefaultModel-derived string. Either pass an explicit model in the payload, or drop the YAML model assertion (it now reflects whatever the caller supplied). - `TestWorkspaceCreate_FirstDeploy_NoModel_NoSecretWritten` — literally asserts that empty model is a 201, just with no secrets written. Under the new contract empty model is a 422 with no DB writes at all. Test should be inverted to assert the 422 + no-DB-write outcome. - `TestWorkspaceCreate_188_NoTemplateNoRuntime_StillDefaultsLanggraph` — premise was "bare {name} should default to langgraph 201". Now bare {name} is a 422 because model is missing. Premise changes to "bare {name} now fails MODEL_REQUIRED before runtime resolution". Holding the test surgery for a follow-up so reviewers can sanity-check the contract change against the inventory above before we churn 20+ tests. Marking DRAFT until that follow-up lands. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../handlers/handlers_extended_test.go | 46 +++++++++++++ .../internal/handlers/handlers_test.go | 8 ++- .../internal/handlers/org_import.go | 13 ++-- .../internal/handlers/workspace.go | 31 +++++++++ .../internal/handlers/workspace_provision.go | 19 ++++-- .../internal/models/runtime_defaults.go | 58 +++++++--------- .../internal/models/runtime_defaults_test.go | 66 +++---------------- 7 files changed, 141 insertions(+), 100 deletions(-) diff --git a/workspace-server/internal/handlers/handlers_extended_test.go b/workspace-server/internal/handlers/handlers_extended_test.go index 5e297e24e..59f1f5ce8 100644 --- a/workspace-server/internal/handlers/handlers_extended_test.go +++ b/workspace-server/internal/handlers/handlers_extended_test.go @@ -777,6 +777,52 @@ func TestCreate_FieldValidation_Returns400(t *testing.T) { } } +// TestCreate_ModelRequired_Returns422 pins the CTO 2026-05-22 SSOT +// directive (feedback_workspace_model_required_no_platform_default_dynamic_credential_intake): +// model is required user input; the platform must not supply a default, +// the runtime must not fall back. Empirical trigger: Code Reviewer +// 5ba15d7e was created with `{"name":..., "runtime":"codex", ...}` (no +// model). The legacy DefaultModel fallback returned "anthropic:claude-opus-4-7" +// and codex adapter wedged forever — `picks provider='anthropic' but it +// is not in the providers registry`. The gate at the Create boundary +// turns that silent stuck-workspace failure into an immediate 422 the +// caller can react to. +// +// Three shapes covered: +// 1. bare name (no template, no runtime, no model) — formerly defaulted +// to langgraph + anthropic; now 422 because model is unspecified. +// 2. explicit runtime, no model — the Code Reviewer repro shape. +// 3. explicit runtime+template path, but template (when missing on +// disk or unreadable) would leave model empty — exercised here by +// pointing at a non-existent template under /tmp/configs. +func TestCreate_ModelRequired_Returns422(t *testing.T) { + setupTestDB(t) + setupTestRedis(t) + handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", "/tmp/configs") + + cases := []struct{ label, body string }{ + {"bare_name_no_runtime_no_model", `{"name":"x"}`}, + {"explicit_codex_no_model", `{"name":"Code Reviewer","role":"code reviewer","runtime":"codex","tier":4,"max_concurrent_tasks":1}`}, + {"explicit_hermes_no_model", `{"name":"researcher","runtime":"hermes"}`}, + } + for _, tc := range cases { + t.Run(tc.label, func(t *testing.T) { + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(tc.body)) + c.Request.Header.Set("Content-Type", "application/json") + handler.Create(c) + if w.Code != http.StatusUnprocessableEntity { + t.Errorf("Create(%s): want 422 MODEL_REQUIRED, got %d: %s", tc.label, w.Code, w.Body.String()) + return + } + if !bytes.Contains(w.Body.Bytes(), []byte(`"code":"MODEL_REQUIRED"`)) { + t.Errorf("Create(%s): want body containing code=MODEL_REQUIRED, got %s", tc.label, w.Body.String()) + } + }) + } +} + func TestUpdate_FieldValidation_Returns400(t *testing.T) { setupTestDB(t) setupTestRedis(t) diff --git a/workspace-server/internal/handlers/handlers_test.go b/workspace-server/internal/handlers/handlers_test.go index 7ce01b239..802a6f33d 100644 --- a/workspace-server/internal/handlers/handlers_test.go +++ b/workspace-server/internal/handlers/handlers_test.go @@ -386,7 +386,13 @@ func TestWorkspaceCreate(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Test Agent","canvas":{"x":100,"y":200}}` + // Note: model is now required at the Create boundary (CTO 2026-05-22 + // SSOT directive — see feedback_workspace_model_required_no_platform_default_dynamic_credential_intake + // and TestCreate_ModelRequired_Returns422). This test happens to take + // the bare-defaults path (no template, no runtime → langgraph), so + // the body must declare an explicit model. Using a langgraph-compatible + // id; the test doesn't exercise model semantics beyond presence. + body := `{"name":"Test Agent","model":"anthropic:claude-opus-4-7","canvas":{"x":100,"y":200}}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") diff --git a/workspace-server/internal/handlers/org_import.go b/workspace-server/internal/handlers/org_import.go index 819192e36..cdce0639c 100644 --- a/workspace-server/internal/handlers/org_import.go +++ b/workspace-server/internal/handlers/org_import.go @@ -69,10 +69,15 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX model = defaults.Model } if model == "" { - // SSOT: per-runtime defaults live in models/runtime_defaults.go - // (see RFC #2873). Consolidated from a duplicate of the same - // branch in workspace_provision.go. - model = models.DefaultModel(runtime) + // SSOT (CTO 2026-05-22, feedback_workspace_model_required_no_platform_default_dynamic_credential_intake): + // model is REQUIRED. The org-import template MUST declare a + // model — either per-workspace (`ws.Model`) or via the org + // defaults block (`defaults.Model`). If neither is present + // the template is malformed and the import must fail-closed + // rather than silently provisioning a workspace with a + // runtime-incompatible default (the prior `anthropic:claude-opus-4-7` + // fallback wedged every codex workspace at adapter init). + return fmt.Errorf("org import: workspace %q has no model and the org defaults block does not provide one (runtime=%s) — model is a required field per the workspace-creation contract; either set `model:` on the workspace or under `defaults:`", ws.Name, runtime) } tier := ws.Tier if tier == 0 { diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index c89622fde..7ae242824 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -321,6 +321,37 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { payload.Runtime = "langgraph" } + // SSOT (CTO 2026-05-22, feedback_workspace_model_required_no_platform_default_dynamic_credential_intake): + // model is REQUIRED user input. The platform must not provide a default; + // the runtime must not fall back. The decision belongs to the user (or + // to the agent acting on the user's behalf), never to the platform. + // + // Empirical trigger: Code Reviewer 5ba15d7e was created with + // `{"name":"Code Reviewer","role":"...","runtime":"codex",...}` (no + // model). The legacy `DefaultModel(runtime)` fallback in + // provisionWorkspace returned `"anthropic:claude-opus-4-7"`. Codex + // adapter only supports openai-* providers — it wedged forever with + // `codex adapter: workspace config picks provider='anthropic' but + // it is not in the providers registry`. PATCH /workspaces/:id + // explicitly disallows updating model (the comment literally reads + // `model not patchable`), so the only recovery path was SQL UPDATE + // or delete+recreate. + // + // Fail-closed at the Create boundary so the caller learns the + // contract immediately — same shape as the controlplane#188 + // runtime-unresolved gate above. Caller fixes the request, no + // EC2 launched, no stuck workspace, no operator paging. + if payload.Model == "" { + log.Printf("Create: FAIL-CLOSED — model is required (runtime=%q template=%q); refusing the silent DefaultModel fallback per CTO 2026-05-22 SSOT directive", payload.Runtime, payload.Template) + c.JSON(http.StatusUnprocessableEntity, gin.H{ + "error": "model is required and has no platform-side default — pass an explicit \"model\" in the request body, or use a \"template\" whose config.yaml declares one. See feedback_workspace_model_required_no_platform_default_dynamic_credential_intake for the contract.", + "runtime": payload.Runtime, + "template": payload.Template, + "code": "MODEL_REQUIRED", + }) + return + } + ctx := c.Request.Context() // Convert empty role to NULL diff --git a/workspace-server/internal/handlers/workspace_provision.go b/workspace-server/internal/handlers/workspace_provision.go index 11da5f448..9e05282d2 100644 --- a/workspace-server/internal/handlers/workspace_provision.go +++ b/workspace-server/internal/handlers/workspace_provision.go @@ -548,13 +548,22 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model // via a crafted runtime string (#241). runtime := sanitizeRuntime(payload.Runtime) - // Generate a minimal config.yaml + // Generate a minimal config.yaml. + // + // SSOT (CTO 2026-05-22): model is REQUIRED user input. The platform + // must not provide a default; the runtime must not fall back. The + // Create handler is responsible for rejecting empty model BEFORE + // reaching provisionWorkspace; this is a defence-in-depth assertion. + // If we hit here with an empty model the YAML below would still + // render a `model: ""` line — which renders all downstream provider + // derivation undefined. Log loudly and let the workspace boot into + // not_configured rather than masking the contract violation with a + // silently-broken default (the prior `anthropic:claude-opus-4-7` + // fallback was the canonical example — every codex workspace + // created without an explicit model wedged). model := payload.Model if model == "" { - // SSOT: per-runtime defaults live in models/runtime_defaults.go - // (see RFC #2873). Was previously duplicated here AND in - // org_import.go; consolidating prevents silent drift. - model = models.DefaultModel(runtime) + log.Printf("ensureDefaultConfig: workspace %s reached provisioning with empty model — Create handler should have rejected this; rendering empty model: \"\" in config.yaml (workspace will boot not_configured)", workspaceID) } if runtime == "claude-code" { model = normalizeClaudeCodeModel(model) diff --git a/workspace-server/internal/models/runtime_defaults.go b/workspace-server/internal/models/runtime_defaults.go index 79da0fba4..71dbadc63 100644 --- a/workspace-server/internal/models/runtime_defaults.go +++ b/workspace-server/internal/models/runtime_defaults.go @@ -1,39 +1,31 @@ package models -// runtime_defaults.go — single source of truth for per-runtime defaults -// the platform applies when the operator/agent didn't supply a value. +// runtime_defaults.go — DELETED helper. Intentionally empty. // -// Why this lives in models/ (not handlers/): default selection is a -// pure data fact about the runtime, not handler logic. Multiple -// callers (Create-workspace handler, org-import handler, future -// auto-provision paths) need the same answer; concentrating the -// rule here means one edit when a runtime's default changes. +// Previously held `DefaultModel(runtime string) string` which returned +// "sonnet" for claude-code and "anthropic:claude-opus-4-7" for everything +// else. That function was a SOFT-FALLBACK bug magnet: // -// Related work (RFC #2873): this is the seed for a future -// `RuntimeConfig` interface that will also expose `ProvisioningTimeout()`, -// `CapabilitiesSupported()`, and other per-runtime facts. For now the -// surface is one helper — extracted from the duplicate branch in -// workspace_provision.go:537 and org_import.go:54 that diverged silently -// during refactors before this consolidation. - -// DefaultModel returns the model slug to use when a workspace is -// created without an explicit model and the runtime can't infer one -// from its own config. +// - codex workspaces created without an explicit `model` silently +// received `anthropic:claude-opus-4-7`. Codex adapter only supports +// openai-* providers, so they wedged in `not_configured` with +// `codex adapter: workspace config picks provider='anthropic' but +// it is not in the providers registry`. The fallback never matched +// a runtime that could actually use it (only langgraph + hermes +// could even partially execute anthropic:claude-opus-4-7 without +// extra credential plumbing). It existed as a "must return +// something" placeholder that turned every silent miss into a +// prod incident. // -// - claude-code: "sonnet" — Anthropic's CLI accepts the short -// name and resolves it via the operator's anthropic-oauth or -// ANTHROPIC_API_KEY chain. -// - everything else (hermes, langgraph, autogen, codex, openclaw, -// external, ""): a fully-qualified -// vendor:model slug that the universal MODEL_PROVIDER chain in -// molecule-core PR #247 can route via per-vendor required_env. +// - The fallback hid the contract bug at every callsite: Create +// handler, org_import, anywhere a stale CreateWorkspacePayload +// bubbled through to provisionWorkspace. // -// The function never returns an empty string; an unknown runtime -// gets the universal default rather than failing closed (matches the -// pre-refactor behavior — both call sites used the same fallback). -func DefaultModel(runtime string) string { - if runtime == "claude-code" { - return "sonnet" - } - return "anthropic:claude-opus-4-7" -} +// SSOT principle (CTO 2026-05-22T03:42Z, feedback_workspace_model_required_no_platform_default_dynamic_credential_intake): +// model / provider / provider-credential are REQUIRED user input at +// create time. The platform must not provide a default. The runtime +// must not fall back. Decision belongs to the user (or to the agent +// acting on the user's behalf), never to the platform. +// +// Callers that previously fell back to DefaultModel must now fail-closed +// when model is empty after template-resolution. diff --git a/workspace-server/internal/models/runtime_defaults_test.go b/workspace-server/internal/models/runtime_defaults_test.go index 13873b082..99121bcb9 100644 --- a/workspace-server/internal/models/runtime_defaults_test.go +++ b/workspace-server/internal/models/runtime_defaults_test.go @@ -1,59 +1,11 @@ package models -import "testing" - -// TestDefaultModel pins the contract: known runtimes return their -// expected default; unknowns and the empty string fall through to the -// universal default. Add new runtimes here as `case` entries — pre-fix -// adding a runtime required two source edits + an audit; post-SSOT it -// requires one entry in DefaultModel + one assertion here. -func TestDefaultModel(t *testing.T) { - cases := []struct { - runtime string - want string - }{ - // Known runtimes. - {"claude-code", "sonnet"}, - - // Universal fallback for everything else. Each runtime is named - // explicitly so a future drift (e.g., adding a hermes-specific - // branch) shows up as a failure on the runtime that drifted, not - // as a generic "unknown" failure. - {"hermes", "anthropic:claude-opus-4-7"}, - {"langgraph", "anthropic:claude-opus-4-7"}, - {"autogen", "anthropic:claude-opus-4-7"}, - {"codex", "anthropic:claude-opus-4-7"}, - {"openclaw", "anthropic:claude-opus-4-7"}, - {"external", "anthropic:claude-opus-4-7"}, - - // Unknown / empty — fall through to universal default rather - // than failing closed. Pre-refactor both call sites also fell - // through; pinning the existing behavior, not changing it. - {"", "anthropic:claude-opus-4-7"}, - {"some-future-runtime", "anthropic:claude-opus-4-7"}, - {"CLAUDE-CODE", "anthropic:claude-opus-4-7"}, // case-sensitive — matches prior behavior - } - for _, tc := range cases { - t.Run(tc.runtime, func(t *testing.T) { - got := DefaultModel(tc.runtime) - if got != tc.want { - t.Errorf("DefaultModel(%q) = %q, want %q", tc.runtime, got, tc.want) - } - }) - } -} - -// TestDefaultModel_NeverEmpty — invariant: no input produces an empty -// string. The handlers that consume this would write empty into -// config.yaml, which the runtime then can't dispatch — pinning the -// non-empty contract here protects against a future "return early on -// unknown runtime" change that would silently break workspace creation. -func TestDefaultModel_NeverEmpty(t *testing.T) { - for _, runtime := range []string{ - "", "claude-code", "hermes", "unknown-runtime", - } { - if got := DefaultModel(runtime); got == "" { - t.Errorf("DefaultModel(%q) returned empty string", runtime) - } - } -} +// runtime_defaults_test.go — previously pinned DefaultModel's contract +// (claude-code → "sonnet", everything else → "anthropic:claude-opus-4-7"). +// +// DefaultModel was removed as a soft-fallback bug magnet (CTO 2026-05-22): +// model is REQUIRED user input; the platform must not provide a default. +// See runtime_defaults.go for the deletion rationale, and the new +// fail-closed gate in `handlers.WorkspaceHandler.Create` for the boundary +// enforcement. No test stub here — the contract is "this function does +// not exist", which the type-checker enforces at compile time. -- 2.52.0 From 1ece444ea2d4ff0edd1e3a38fcb54a88a581f451 Mon Sep 17 00:00:00 2001 From: cp-be Date: Thu, 21 May 2026 21:12:24 -0700 Subject: [PATCH 2/4] test(workspace-server): cascade test updates for MODEL_REQUIRED contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the prior commit (kill DefaultModel + 422 at Create when model missing). Fixes the two test buckets called out in the DRAFT PR description. (a) Mechanical: add `"model":"..."` to request bodies so tests can still reach the 201 path. These tests exercise unrelated invariants (parent_id wiring, claude-code runtime label, max_concurrent_tasks override, DB insert error rollback, defaults, SaaS tier-4 force, with- and-without secrets persistence/rollback, external SSRF gates, Kimi runtime label preservation, controlplane#188 explicit-runtime path, budget limit). Model strings picked from the same set the prior DefaultModel(runtime) would have returned (`sonnet` for claude-code, `anthropic:claude-opus-4-7` elsewhere) plus `gpt-5.5` for codex, `kimi-coding/kimi-k2-coding-6` for kimi, `external:custom` for the external runtime — so the tests exercise realistic shapes, not just a single placeholder. (b) Semantic — the three tests that pinned the OLD contract directly: - `TestEnsureDefaultConfig_Hermes` / `_ClaudeCode` / `_EmptyRuntimeDefaultsToClaudeCode` — the function no longer sources the default; it just renders what Create supplied. Each test now passes the model that the prior implicit DefaultModel branch would have returned, so the YAML assertion remains meaningful. Comment updated to explain the shift. - `TestWorkspaceCreate_FirstDeploy_NoModel_NoSecretWritten` was renamed `TestWorkspaceCreate_FirstDeploy_NoModel_Returns422` and its premise INVERTED. Pre-fix it asserted that empty model goes through to 201 with no workspace_secrets writes; post-fix it asserts the request 422s at the Create boundary with NO DB writes at all. The sqlmock has zero ExpectExec calls so a late-firing gate surfaces as "unexpected call to ExecQuery 'INSERT INTO workspaces'". Comment block calls out the inversion explicitly. - `TestWorkspaceCreate_188_NoTemplateNoRuntime_StillDefaultsLanggraph` — was "bare {name} routes to langgraph 201". Now split into two halves: bare body MUST 422 with MODEL_REQUIRED, and (with model supplied) the original langgraph default invariant still holds. The 422 body must show `runtime:"langgraph"` to prove the gate fires AFTER the langgraph-default branch — that ordering is load-bearing for the diagnostic clarity of the error. Verified: go test -count=1 -short -timeout 300s ./internal/handlers/... \ ./internal/models/... → OK The Create gate test (`TestCreate_ModelRequired_Returns422`) from the prior commit remains green; it covers the three trigger shapes from the empirical incident (bare name, explicit codex+no-model, explicit hermes+no-model). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../handlers/handlers_additional_test.go | 6 +- .../handlers/workspace_budget_test.go | 2 +- .../workspace_provision_shared_test.go | 53 ++++++++-------- .../handlers/workspace_provision_test.go | 26 ++++++-- .../internal/handlers/workspace_test.go | 63 ++++++++++++++----- 5 files changed, 100 insertions(+), 50 deletions(-) diff --git a/workspace-server/internal/handlers/handlers_additional_test.go b/workspace-server/internal/handlers/handlers_additional_test.go index 0e13600d5..ec4fb027d 100644 --- a/workspace-server/internal/handlers/handlers_additional_test.go +++ b/workspace-server/internal/handlers/handlers_additional_test.go @@ -44,7 +44,7 @@ func TestWorkspaceCreate_WithParentID(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Child Agent","parent_id":"parent-ws-123"}` + body := `{"name":"Child Agent","model":"anthropic:claude-opus-4-7","parent_id":"parent-ws-123"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -80,7 +80,7 @@ func TestWorkspaceCreate_ExplicitClaudeCodeRuntime(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"CC Agent","tier":2,"runtime":"claude-code","canvas":{"x":10,"y":20}}` + body := `{"name":"CC Agent","tier":2,"runtime":"claude-code","model":"sonnet","canvas":{"x":10,"y":20}}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -301,7 +301,7 @@ func TestWorkspaceCreate_MaxConcurrentTasksOverride(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Leader Agent","runtime":"claude-code","max_concurrent_tasks":3}` + body := `{"name":"Leader Agent","runtime":"claude-code","model":"sonnet","max_concurrent_tasks":3}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") diff --git a/workspace-server/internal/handlers/workspace_budget_test.go b/workspace-server/internal/handlers/workspace_budget_test.go index 4652e2932..ed5822637 100644 --- a/workspace-server/internal/handlers/workspace_budget_test.go +++ b/workspace-server/internal/handlers/workspace_budget_test.go @@ -168,7 +168,7 @@ func TestWorkspaceBudget_Create_WithLimit(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Budgeted Agent","budget_limit":1000}` + body := `{"name":"Budgeted Agent","model":"anthropic:claude-opus-4-7","budget_limit":1000}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") handler.Create(c) diff --git a/workspace-server/internal/handlers/workspace_provision_shared_test.go b/workspace-server/internal/handlers/workspace_provision_shared_test.go index fe3e1dea2..e7dc8f9ce 100644 --- a/workspace-server/internal/handlers/workspace_provision_shared_test.go +++ b/workspace-server/internal/handlers/workspace_provision_shared_test.go @@ -756,33 +756,33 @@ func TestWorkspaceCreate_FirstDeploy_PersistsModelAndProvider(t *testing.T) { } } -// TestWorkspaceCreate_FirstDeploy_NoModel_NoSecretWritten asserts that -// when payload.Model is empty, NEITHER MODEL nor LLM_PROVIDER is -// written. Important: the canvas can omit `model` (template inherits -// the runtime default later); we must not poison workspace_secrets with -// empty rows in that case. -func TestWorkspaceCreate_FirstDeploy_NoModel_NoSecretWritten(t *testing.T) { +// TestWorkspaceCreate_FirstDeploy_NoModel_Returns422 inverts the prior +// premise (CTO 2026-05-22 SSOT directive — see +// feedback_workspace_model_required_no_platform_default_dynamic_credential_intake +// and TestCreate_ModelRequired_Returns422 in handlers_extended_test.go). +// +// Pre-2026-05-22 the canvas was allowed to omit `model` and the workspace +// would 201 with no workspace_secrets rows for MODEL/LLM_PROVIDER (the +// thinking being that templates inherit the runtime default later). That +// "soft fallback" was the load-bearing bug magnet — `DefaultModel(runtime)` +// would later return `anthropic:claude-opus-4-7`, and codex workspaces +// wedged forever at adapter init. +// +// New contract: empty model is a 422 MODEL_REQUIRED, with NO DB writes +// at all. The gate fires at the Create boundary before INSERT INTO +// workspaces. The follow-on workspace_secrets gate (which the original +// test pinned) is therefore unreachable on the empty-model path — there +// is no row to mint secrets for. +func TestWorkspaceCreate_FirstDeploy_NoModel_Returns422(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) broadcaster := newTestBroadcaster() handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) - mock.ExpectBegin() - mock.ExpectExec("INSERT INTO workspaces"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectCommit() - // NO INSERT INTO workspace_secrets here — the gate is payload.Model != "". - - mock.ExpectExec("INSERT INTO canvas_layouts"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("INSERT INTO structure_events"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec(`UPDATE workspaces SET status =`). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("INSERT INTO workspace_auth_tokens"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("INSERT INTO structure_events"). - WillReturnResult(sqlmock.NewResult(0, 1)) + // NO mock.ExpectBegin / INSERT INTO workspaces — the Create gate + // MUST fire before any DB write. If the gate fires late, sqlmock + // will surface "call to ExecQuery 'INSERT INTO workspaces' was not + // expected" — which is exactly the failure mode we want to flag. w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -792,11 +792,14 @@ func TestWorkspaceCreate_FirstDeploy_NoModel_NoSecretWritten(t *testing.T) { handler.Create(c) - if w.Code != http.StatusCreated { - t.Fatalf("expected status 201, got %d: %s", w.Code, w.Body.String()) + if w.Code != http.StatusUnprocessableEntity { + t.Fatalf("expected 422 MODEL_REQUIRED for empty model, got %d: %s", w.Code, w.Body.String()) + } + if !bytes.Contains(w.Body.Bytes(), []byte(`"code":"MODEL_REQUIRED"`)) { + t.Errorf("expected code=MODEL_REQUIRED in body, got %s", w.Body.String()) } if err := mock.ExpectationsWereMet(); err != nil { - t.Errorf("sqlmock expectations not met — empty payload.Model should NOT trigger workspace_secrets writes: %v", err) + t.Errorf("sqlmock saw an unexpected DB write — the MODEL_REQUIRED gate fired too late: %v", err) } } diff --git a/workspace-server/internal/handlers/workspace_provision_test.go b/workspace-server/internal/handlers/workspace_provision_test.go index a5e46d64a..7609068d6 100644 --- a/workspace-server/internal/handlers/workspace_provision_test.go +++ b/workspace-server/internal/handlers/workspace_provision_test.go @@ -193,10 +193,17 @@ func TestEnsureDefaultConfig_Hermes(t *testing.T) { broadcaster := newTestBroadcaster() handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + // Post-CTO-SSOT-directive (2026-05-22): model is required user input; + // ensureDefaultConfig no longer fills in a runtime default. The Create + // handler gates on empty model and 422s before reaching here, so this + // test now passes the model explicitly to exercise the YAML rendering + // path — same model value the prior implicit DefaultModel("hermes") + // returned. payload := models.CreateWorkspacePayload{ Name: "Test Agent", Tier: 1, Runtime: "hermes", + Model: "anthropic:claude-opus-4-7", } files := handler.ensureDefaultConfig("ws-test-123", payload) @@ -219,7 +226,7 @@ func TestEnsureDefaultConfig_Hermes(t *testing.T) { t.Errorf("config.yaml missing tier, got:\n%s", content) } if !contains(content, `model: "anthropic:claude-opus-4-7"`) { - t.Errorf("config.yaml should use default non-claude model, got:\n%s", content) + t.Errorf("config.yaml should render the supplied model, got:\n%s", content) } } @@ -227,10 +234,14 @@ func TestEnsureDefaultConfig_ClaudeCode(t *testing.T) { broadcaster := newTestBroadcaster() handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + // Post-CTO-SSOT-directive (2026-05-22): model is supplied explicitly + // instead of relying on the deleted DefaultModel("claude-code") = + // "sonnet" fallback. The Create handler 422s on empty model upstream. payload := models.CreateWorkspacePayload{ Name: "Code Agent", Tier: 2, Runtime: "claude-code", + Model: "sonnet", } files := handler.ensureDefaultConfig("ws-code-123", payload) @@ -407,9 +418,16 @@ func TestEnsureDefaultConfig_EmptyRuntimeDefaultsToClaudeCode(t *testing.T) { broadcaster := newTestBroadcaster() handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + // Post-CTO-SSOT-directive (2026-05-22): ensureDefaultConfig is no + // longer the source of the model default — it just renders whatever + // the Create handler decided. The "empty runtime → claude-code" + // fallback inside sanitizeRuntime() is still in effect; this test + // continues to pin that behaviour by supplying the explicit + // claude-code model that the Create handler would have required. payload := models.CreateWorkspacePayload{ - Name: "Default Agent", - Tier: 1, + Name: "Default Agent", + Tier: 1, + Model: "sonnet", } files := handler.ensureDefaultConfig("ws-empty-rt", payload) @@ -418,7 +436,7 @@ func TestEnsureDefaultConfig_EmptyRuntimeDefaultsToClaudeCode(t *testing.T) { t.Errorf("empty runtime should default to claude-code, got:\n%s", configYAML) } if !contains(configYAML, `model: "sonnet"`) { - t.Errorf("claude-code default model should be sonnet (quoted), got:\n%s", configYAML) + t.Errorf("claude-code workspace should render the supplied model (quoted), got:\n%s", configYAML) } } diff --git a/workspace-server/internal/handlers/workspace_test.go b/workspace-server/internal/handlers/workspace_test.go index 7f329da2e..dc55cf311 100644 --- a/workspace-server/internal/handlers/workspace_test.go +++ b/workspace-server/internal/handlers/workspace_test.go @@ -349,7 +349,7 @@ func TestWorkspaceCreate_DBInsertError(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Failing Agent"}` + body := `{"name":"Failing Agent","model":"anthropic:claude-opus-4-7"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -391,7 +391,7 @@ func TestWorkspaceCreate_DefaultsApplied(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Default Agent"}` + body := `{"name":"Default Agent","model":"anthropic:claude-opus-4-7"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -438,7 +438,7 @@ func TestWorkspaceCreate_SaaSHardForcesTier4(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"SaaS External Agent","runtime":"external","external":true,"url":"https://example.com/agent","tier":2}` + body := `{"name":"SaaS External Agent","runtime":"external","model":"external:custom","external":true,"url":"https://example.com/agent","tier":2}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -479,7 +479,7 @@ func TestWorkspaceCreate_WithSecrets_Persists(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Hermes Agent","runtime":"hermes","external":true,"secrets":{"HERMES_API_KEY":"sk-test-123"}}` + body := `{"name":"Hermes Agent","runtime":"hermes","model":"anthropic:claude-opus-4-7","external":true,"secrets":{"HERMES_API_KEY":"sk-test-123"}}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -513,7 +513,7 @@ func TestWorkspaceCreate_SecretPersistFails_RollsBack(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Rollback Agent","secrets":{"OPENAI_API_KEY":"sk-fail"}}` + body := `{"name":"Rollback Agent","model":"anthropic:claude-opus-4-7","secrets":{"OPENAI_API_KEY":"sk-fail"}}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -548,7 +548,7 @@ func TestWorkspaceCreate_EmptySecrets_OK(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"No Secrets Agent","external":true,"secrets":{}}` + body := `{"name":"No Secrets Agent","model":"anthropic:claude-opus-4-7","external":true,"secrets":{}}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -587,7 +587,7 @@ func TestWorkspaceCreate_ExternalURL_SSRFSafe(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Ext Agent","runtime":"external","external":true,"url":"http://localhost:8000"}` + body := `{"name":"Ext Agent","runtime":"external","model":"external:custom","external":true,"url":"http://localhost:8000"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -629,7 +629,7 @@ func TestWorkspaceCreate_KimiRuntime_PreservesLabel(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Kimi Agent","runtime":"kimi","tier":3,"canvas":{"x":100,"y":100}}` + body := `{"name":"Kimi Agent","runtime":"kimi","model":"kimi-coding/kimi-k2-coding-6","tier":3,"canvas":{"x":100,"y":100}}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -659,7 +659,7 @@ func TestWorkspaceCreate_ExternalURL_SSRFMetadataBlocked(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Bad Agent","runtime":"external","external":true,"url":"http://169.254.169.254/latest/meta-data/"}` + body := `{"name":"Bad Agent","runtime":"external","model":"external:custom","external":true,"url":"http://169.254.169.254/latest/meta-data/"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -690,7 +690,7 @@ func TestWorkspaceCreate_ExternalURL_SSRFLoopbackBlocked(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Bad Loopback","runtime":"external","external":true,"url":"http://127.0.0.1:9000/a2a"}` + body := `{"name":"Bad Loopback","runtime":"external","model":"external:custom","external":true,"url":"http://127.0.0.1:9000/a2a"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -1844,15 +1844,44 @@ func TestWorkspaceCreate_188_TemplateConfigNoRuntimeKey_FailsClosed(t *testing.T } } -// Regression guard: the legitimate default path (no template, no runtime — -// bare {"name":...}) MUST still default to langgraph and return 201. The -// #188 fix must not break this. +// Regression guard for the legitimate default path (no template, no +// runtime — bare `{"name":...}`). Pre-2026-05-22 this returned 201 with +// runtime defaulted to langgraph + model defaulted to +// "anthropic:claude-opus-4-7" via the (now-deleted) DefaultModel helper. +// Post-CTO-SSOT-directive the bare body must fail-closed at MODEL_REQUIRED +// before reaching the runtime-default langgraph branch — the model +// gate runs AFTER the langgraph fallback so we can observe the +// "runtime=langgraph, model=missing" framing in the error body. With +// model supplied, the langgraph default still kicks in (so the original +// invariant — bare-name still routes to langgraph — is preserved when +// the new contract is satisfied). func TestWorkspaceCreate_188_NoTemplateNoRuntime_StillDefaultsLanggraph(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) broadcaster := newTestBroadcaster() handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + // First half: bare body now MUST 422 with MODEL_REQUIRED. No DB + // writes are expected — the gate fires before INSERT. + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + body := `{"name":"Plain Default"}` + c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + handler.Create(c) + if w.Code != http.StatusUnprocessableEntity { + t.Fatalf("bare-body create: expected 422 MODEL_REQUIRED, got %d: %s", w.Code, w.Body.String()) + } + if !bytes.Contains(w.Body.Bytes(), []byte(`"code":"MODEL_REQUIRED"`)) { + t.Errorf("bare-body create: expected code=MODEL_REQUIRED in body, got %s", w.Body.String()) + } + if !bytes.Contains(w.Body.Bytes(), []byte(`"runtime":"langgraph"`)) { + t.Errorf("bare-body create: expected runtime=langgraph in 422 body (gate runs after the langgraph default), got %s", w.Body.String()) + } + + // Second half: with explicit model, the langgraph default still + // kicks in — original invariant (bare-name routes to langgraph) + // preserved when the new contract is satisfied. mock.ExpectBegin() mock.ExpectExec("INSERT INTO workspaces"). WithArgs(sqlmock.AnyArg(), "Plain Default", nil, 3, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). @@ -1864,9 +1893,9 @@ func TestWorkspaceCreate_188_NoTemplateNoRuntime_StillDefaultsLanggraph(t *testi mock.ExpectExec("INSERT INTO structure_events"). WillReturnResult(sqlmock.NewResult(0, 1)) - w := httptest.NewRecorder() - c, _ := gin.CreateTestContext(w) - body := `{"name":"Plain Default"}` + w = httptest.NewRecorder() + c, _ = gin.CreateTestContext(w) + body = `{"name":"Plain Default","model":"anthropic:claude-opus-4-7"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -1901,7 +1930,7 @@ func TestWorkspaceCreate_188_ExplicitRuntimeNoTemplate_OK(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"Explicit Codex","runtime":"codex"}` + body := `{"name":"Explicit Codex","runtime":"codex","model":"gpt-5.5"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") -- 2.52.0 From a3c15bc9bee559a60dfe3cec2e2d7325b5053953 Mon Sep 17 00:00:00 2001 From: cp-be Date: Thu, 21 May 2026 21:42:49 -0700 Subject: [PATCH 3/4] feat(workspace-server): exempt external runtime from MODEL_REQUIRED + E2E updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the kill-DefaultModel + Create-handler-gate commits. PR#1667 CI surfaced two real failures: - E2E Peer Visibility (local) - (test_claude_code_e2e.sh, depending on which workflow) Root cause: the MODEL_REQUIRED gate as initially written 422s every external workspace too. External workspaces intentionally do NOT spawn a Docker container or run an adapter (workspace_provision.go:497-498: "external is a first-class runtime that intentionally does NOT spawn a Docker container"). They delegate to a registered URL — model has no meaning for them; the URL is the contract. Requiring it would 422 every legitimate "register my agent at https://..." flow. Changes: - `handlers.WorkspaceHandler.Create` (workspace.go): exempt external workspaces from the MODEL_REQUIRED gate. Two spellings count as external — `payload.External == true` and `isExternalLikeRuntime(payload.Runtime)`. The helper is already used throughout the codebase for the same "is this a URL-delegated workspace" decision (discovery.go, registry.go, a2a_proxy_helpers.go, plugins.go, workspace_restart.go). - New test `TestCreate_ExternalRuntime_NoModel_OK`: pins the exemption. Uses the test_api.sh Echo Agent body shape verbatim — empty model, `runtime:"external","external":true` → 201. - Rewrote `TestWorkspaceCreate_188_NoTemplateNoRuntime_StillDefaultsLanggraph` to `_NowMODEL_REQUIRED` — the bare-body langgraph 201 path no longer exists; the new test asserts the 422 shape includes `runtime:"langgraph"` so the diagnostic still surfaces what runtime WOULD have been used (the gate fires AFTER the langgraph-default assignment). Bare-body-with-explicit-model 201 path is covered by the existing `TestWorkspaceCreate` (which I patched in the prior commit) — no duplicate needed here. - `TestWorkspaceCreate_FirstDeploy_NoModel_Returns422` body now uses `runtime:"hermes"` without `external:true` to ensure the gate actually fires (hermes spawns a real adapter; the previous `runtime:"hermes","external":true` shape would now exempt and 201). - `tests/e2e/test_peer_visibility_mcp_local.sh`: added `_model_for_runtime` helper at both create sites (PARENT + sibling) so the parent + per-runtime sibling workspaces declare an explicit model. Mapping mirrors what the deleted DefaultModel returned plus the gpt-5.5 / kimi / minimax slugs used in production. - `tests/e2e/test_claude_code_e2e.sh`: added `"model":"sonnet"` to both POST bodies (ROOT + CHILD). Both use `runtime:"claude-code"` which spawns a real adapter, not exempt. Other E2E scripts inspected: - `test_api.sh` — already uses `runtime:"external","external":true`, exempt by the new gate. No change. - `test_a2a_e2e.sh`:133, `test_activity_e2e.sh`:218, `test_dev_mode.sh`:72, `test_workspace_abilities_e2e.sh`:93,99, `test_comprehensive_e2e.sh`:78,105,134,139,144, `test_notify_attachments_e2e.sh`:95, `test_mcp_stdio_staging.sh`:76 — these create workspaces without an explicit runtime field. They fall through to the langgraph default in the Create handler and are NOT external. They will still 422 under the new contract; held for a separate cleanup commit so the diff stays scoped to the PR#1667 failing CI surface (Peer Visibility + claude-code flows). Tracked for follow-up. Verified: go test -count=1 -short -timeout 300s ./internal/handlers/... \ ./internal/models/... → OK Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/e2e/test_claude_code_e2e.sh | 7 +- tests/e2e/test_peer_visibility_mcp_local.sh | 21 +++++- .../handlers/handlers_extended_test.go | 51 +++++++++++++++ .../internal/handlers/workspace.go | 22 +++++-- .../workspace_provision_shared_test.go | 7 +- .../internal/handlers/workspace_test.go | 65 ++++++------------- 6 files changed, 119 insertions(+), 54 deletions(-) diff --git a/tests/e2e/test_claude_code_e2e.sh b/tests/e2e/test_claude_code_e2e.sh index 3635869db..4b6c0d1b4 100755 --- a/tests/e2e/test_claude_code_e2e.sh +++ b/tests/e2e/test_claude_code_e2e.sh @@ -50,13 +50,16 @@ docker rm $(docker ps -aq --filter "name=ws-") 2>/dev/null || true echo "" echo "--- Create Workspaces ---" +# model is required at the Create boundary (CTO 2026-05-22 SSOT — +# feedback_workspace_model_required_no_platform_default_dynamic_credential_intake). +# Pass the same value the deleted DefaultModel("claude-code") returned. ROOT=$(curl -s -X POST $PLATFORM/workspaces -H "Content-Type: application/json" \ - -d '{"name":"Root Agent","role":"Company coordinator","runtime":"claude-code","tier":3}' \ + -d '{"name":"Root Agent","role":"Company coordinator","runtime":"claude-code","model":"sonnet","tier":3}' \ | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") check_contains "Create root workspace" "-" "$ROOT" CHILD=$(curl -s -X POST $PLATFORM/workspaces -H "Content-Type: application/json" \ - -d "{\"name\":\"Child Agent\",\"role\":\"Sub-team member\",\"runtime\":\"claude-code\",\"tier\":2,\"parent_id\":\"$ROOT\"}" \ + -d "{\"name\":\"Child Agent\",\"role\":\"Sub-team member\",\"runtime\":\"claude-code\",\"model\":\"sonnet\",\"tier\":2,\"parent_id\":\"$ROOT\"}" \ | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") check_contains "Create child workspace" "-" "$CHILD" diff --git a/tests/e2e/test_peer_visibility_mcp_local.sh b/tests/e2e/test_peer_visibility_mcp_local.sh index 4fd378d45..833bae927 100755 --- a/tests/e2e/test_peer_visibility_mcp_local.sh +++ b/tests/e2e/test_peer_visibility_mcp_local.sh @@ -241,8 +241,24 @@ else fi log "1/5 provisioning parent ($PARENT_RUNTIME, mode=$PV_LOCAL_PROVISION_MODE) + one sibling per runtime under test..." +# Map runtime → model per the CTO 2026-05-22 SSOT directive (model is +# required, no platform default). External runtimes are exempt by the +# Create-handler gate — for them the URL is the contract — but we still +# pass model="external:custom" defensively in case a downstream consumer +# of the create body asserts presence. +_model_for_runtime() { + case "$1" in + claude-code) echo "sonnet" ;; + codex) echo "gpt-5.5" ;; + kimi) echo "kimi-coding/kimi-k2-coding-6" ;; + minimax) echo "minimax/MiniMax-M2.7" ;; + external) echo "external:custom" ;; + *) echo "anthropic:claude-opus-4-7" ;; + esac +} +PARENT_MODEL=$(_model_for_runtime "$PARENT_RUNTIME") P_RESP=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \ - -d "{\"name\":\"${NAME_PREFIX}-parent\",\"runtime\":\"$PARENT_RUNTIME\",\"tier\":3$PARENT_EXTRA,\"secrets\":$PARENT_SECRETS}") + -d "{\"name\":\"${NAME_PREFIX}-parent\",\"runtime\":\"$PARENT_RUNTIME\",\"model\":\"$PARENT_MODEL\",\"tier\":3$PARENT_EXTRA,\"secrets\":$PARENT_SECRETS}") PARENT_ID=$(echo "$P_RESP" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))' 2>/dev/null) if [ -z "$PARENT_ID" ]; then echo "::error::parent create failed: $(echo "$P_RESP" | head -c 300)" >&2 @@ -291,8 +307,9 @@ for rt in $PV_RUNTIMES; do CREATE_RUNTIME="$rt" CREATE_EXTRA="" fi + CREATE_MODEL=$(_model_for_runtime "$CREATE_RUNTIME") R=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \ - -d "{\"name\":\"${NAME_PREFIX}-$rt\",\"runtime\":\"$CREATE_RUNTIME\",\"tier\":2,\"parent_id\":\"$PARENT_ID\"$CREATE_EXTRA,\"secrets\":$SEC}") + -d "{\"name\":\"${NAME_PREFIX}-$rt\",\"runtime\":\"$CREATE_RUNTIME\",\"model\":\"$CREATE_MODEL\",\"tier\":2,\"parent_id\":\"$PARENT_ID\"$CREATE_EXTRA,\"secrets\":$SEC}") WID=$(echo "$R" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))' 2>/dev/null) if [ -z "$WID" ]; then echo "::error::$rt workspace create failed: $(echo "$R" | head -c 300)" >&2 diff --git a/workspace-server/internal/handlers/handlers_extended_test.go b/workspace-server/internal/handlers/handlers_extended_test.go index 59f1f5ce8..b4250e6b8 100644 --- a/workspace-server/internal/handlers/handlers_extended_test.go +++ b/workspace-server/internal/handlers/handlers_extended_test.go @@ -823,6 +823,57 @@ func TestCreate_ModelRequired_Returns422(t *testing.T) { } } +// TestCreate_ExternalRuntime_NoModel_OK pins the external-runtime +// exemption from the MODEL_REQUIRED gate. External workspaces +// intentionally do not spawn a Docker container or run an adapter; +// they delegate to a registered URL (workspace_provision.go:497-498: +// "external is a first-class runtime that intentionally does NOT +// spawn a Docker container"). The model field has no meaning for +// them — the URL is the contract, and the gate would 422 every +// legitimate "register my agent at https://..." flow. +// +// Both spellings count as external: +// 1. payload.External == true (the canonical flag, e.g. with any runtime) +// 2. payload.Runtime == "external" (legacy shape some E2E scripts still use) +// +// The isExternalLikeRuntime() helper catches both "external" and any +// future external-like runtime alias. +func TestCreate_ExternalRuntime_NoModel_OK(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir()) + + // External=true with explicit runtime — the test_api.sh / Echo Agent shape. + mock.ExpectBegin() + mock.ExpectExec("INSERT INTO workspaces"). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectCommit() + mock.ExpectExec("INSERT INTO canvas_layouts"). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec("INSERT INTO structure_events"). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec(`UPDATE workspaces SET status =`). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec("INSERT INTO workspace_auth_tokens"). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec("INSERT INTO structure_events"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + body := `{"name":"Echo Agent","tier":1,"runtime":"external","external":true}` + c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + handler.Create(c) + + if w.Code != http.StatusCreated { + t.Fatalf("external workspace without model: want 201, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("sqlmock expectations not met: %v", err) + } +} + func TestUpdate_FieldValidation_Returns400(t *testing.T) { setupTestDB(t) setupTestRedis(t) diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index 7ae242824..5ea6317cd 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -322,9 +322,10 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { } // SSOT (CTO 2026-05-22, feedback_workspace_model_required_no_platform_default_dynamic_credential_intake): - // model is REQUIRED user input. The platform must not provide a default; - // the runtime must not fall back. The decision belongs to the user (or - // to the agent acting on the user's behalf), never to the platform. + // model is REQUIRED user input for SPAWNED-runtime workspaces. The + // platform must not provide a default; the runtime must not fall back. + // The decision belongs to the user (or to the agent acting on the + // user's behalf), never to the platform. // // Empirical trigger: Code Reviewer 5ba15d7e was created with // `{"name":"Code Reviewer","role":"...","runtime":"codex",...}` (no @@ -337,11 +338,24 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { // `model not patchable`), so the only recovery path was SQL UPDATE // or delete+recreate. // + // External workspaces are EXEMPT — they intentionally do not spawn + // a Docker container or run an adapter; they delegate to a registered + // URL (see provision.go: "external is a first-class runtime that + // intentionally does NOT spawn a Docker container"). The MODEL_REQUIRED + // gate is meaningful for spawned-runtime workspaces where the model + // id drives provider selection at adapter init. For external workspaces + // the contract is the URL, not the model — requiring it would be + // ceremony with no payoff, and would 422 every legitimate "register + // my agent at https://..." flow. The SSOT directive concerns + // platform-side defaults; an external workspace genuinely has no + // "model decision" for the user to make. + // // Fail-closed at the Create boundary so the caller learns the // contract immediately — same shape as the controlplane#188 // runtime-unresolved gate above. Caller fixes the request, no // EC2 launched, no stuck workspace, no operator paging. - if payload.Model == "" { + isExternal := payload.External || isExternalLikeRuntime(payload.Runtime) + if payload.Model == "" && !isExternal { log.Printf("Create: FAIL-CLOSED — model is required (runtime=%q template=%q); refusing the silent DefaultModel fallback per CTO 2026-05-22 SSOT directive", payload.Runtime, payload.Template) c.JSON(http.StatusUnprocessableEntity, gin.H{ "error": "model is required and has no platform-side default — pass an explicit \"model\" in the request body, or use a \"template\" whose config.yaml declares one. See feedback_workspace_model_required_no_platform_default_dynamic_credential_intake for the contract.", diff --git a/workspace-server/internal/handlers/workspace_provision_shared_test.go b/workspace-server/internal/handlers/workspace_provision_shared_test.go index e7dc8f9ce..75ae9b85f 100644 --- a/workspace-server/internal/handlers/workspace_provision_shared_test.go +++ b/workspace-server/internal/handlers/workspace_provision_shared_test.go @@ -784,9 +784,14 @@ func TestWorkspaceCreate_FirstDeploy_NoModel_Returns422(t *testing.T) { // will surface "call to ExecQuery 'INSERT INTO workspaces' was not // expected" — which is exactly the failure mode we want to flag. + // Body: hermes runtime WITHOUT external:true (the external-runtime + // exemption — see TestCreate_ExternalRuntime_NoModel_OK — does NOT + // apply here; hermes spawns a real adapter and model selection + // matters at adapter init). This is exactly the shape the old + // "no-model-no-secret-write" test pinned, minus the external flag. w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"name":"No Model Agent","runtime":"hermes","external":true}` + body := `{"name":"No Model Agent","runtime":"hermes"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") diff --git a/workspace-server/internal/handlers/workspace_test.go b/workspace-server/internal/handlers/workspace_test.go index dc55cf311..f8cc70690 100644 --- a/workspace-server/internal/handlers/workspace_test.go +++ b/workspace-server/internal/handlers/workspace_test.go @@ -1844,31 +1844,35 @@ func TestWorkspaceCreate_188_TemplateConfigNoRuntimeKey_FailsClosed(t *testing.T } } -// Regression guard for the legitimate default path (no template, no -// runtime — bare `{"name":...}`). Pre-2026-05-22 this returned 201 with -// runtime defaulted to langgraph + model defaulted to -// "anthropic:claude-opus-4-7" via the (now-deleted) DefaultModel helper. -// Post-CTO-SSOT-directive the bare body must fail-closed at MODEL_REQUIRED -// before reaching the runtime-default langgraph branch — the model -// gate runs AFTER the langgraph fallback so we can observe the -// "runtime=langgraph, model=missing" framing in the error body. With -// model supplied, the langgraph default still kicks in (so the original -// invariant — bare-name still routes to langgraph — is preserved when -// the new contract is satisfied). -func TestWorkspaceCreate_188_NoTemplateNoRuntime_StillDefaultsLanggraph(t *testing.T) { - mock := setupTestDB(t) +// Pre-2026-05-22 this test guarded "bare {name} → langgraph 201" — the +// regression check for controlplane#188 (where an explicit runtime that +// failed to resolve must NOT silently substitute langgraph) had a sibling +// to ensure the LEGITIMATE bare default still landed on langgraph. +// +// Post-CTO-SSOT-directive (2026-05-22) bare body is 422 MODEL_REQUIRED +// before reaching the langgraph branch — the gate runs AFTER the +// langgraph-default assignment so the error body still surfaces +// runtime=langgraph (helps the caller see "ok, langgraph WOULD have +// been the runtime, but you still owe me a model"). The bare-body +// langgraph 201 path no longer exists; what we guard now is the +// 422-shape diagnostic. +// +// Bare-body-with-explicit-model 201 (the new "legitimate default" path) +// is covered by TestWorkspaceCreate in handlers_test.go — no need to +// duplicate the mock dance here. +func TestWorkspaceCreate_188_NoTemplateNoRuntime_NowMODEL_REQUIRED(t *testing.T) { + setupTestDB(t) setupTestRedis(t) broadcaster := newTestBroadcaster() handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) - // First half: bare body now MUST 422 with MODEL_REQUIRED. No DB - // writes are expected — the gate fires before INSERT. w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) body := `{"name":"Plain Default"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") handler.Create(c) + if w.Code != http.StatusUnprocessableEntity { t.Fatalf("bare-body create: expected 422 MODEL_REQUIRED, got %d: %s", w.Code, w.Body.String()) } @@ -1876,36 +1880,7 @@ func TestWorkspaceCreate_188_NoTemplateNoRuntime_StillDefaultsLanggraph(t *testi t.Errorf("bare-body create: expected code=MODEL_REQUIRED in body, got %s", w.Body.String()) } if !bytes.Contains(w.Body.Bytes(), []byte(`"runtime":"langgraph"`)) { - t.Errorf("bare-body create: expected runtime=langgraph in 422 body (gate runs after the langgraph default), got %s", w.Body.String()) - } - - // Second half: with explicit model, the langgraph default still - // kicks in — original invariant (bare-name routes to langgraph) - // preserved when the new contract is satisfied. - mock.ExpectBegin() - mock.ExpectExec("INSERT INTO workspaces"). - WithArgs(sqlmock.AnyArg(), "Plain Default", nil, 3, "langgraph", sqlmock.AnyArg(), (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectCommit() - mock.ExpectExec("INSERT INTO canvas_layouts"). - WithArgs(sqlmock.AnyArg(), float64(0), float64(0)). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("INSERT INTO structure_events"). - WillReturnResult(sqlmock.NewResult(0, 1)) - - w = httptest.NewRecorder() - c, _ = gin.CreateTestContext(w) - body = `{"name":"Plain Default","model":"anthropic:claude-opus-4-7"}` - c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) - c.Request.Header.Set("Content-Type", "application/json") - - handler.Create(c) - - if w.Code != http.StatusCreated { - t.Fatalf("expected 201 (legitimate default path), got %d: %s", w.Code, w.Body.String()) - } - if err := mock.ExpectationsWereMet(); err != nil { - t.Errorf("unmet sqlmock expectations: %v", err) + t.Errorf("bare-body create: expected runtime=\"langgraph\" in 422 body (the gate runs AFTER the langgraph-default assignment so the diagnostic surfaces what runtime WOULD have been used), got %s", w.Body.String()) } } -- 2.52.0 From 680434a8e640ea5acf0822533c62e91ffc69ab95 Mon Sep 17 00:00:00 2001 From: cp-be Date: Thu, 21 May 2026 22:16:11 -0700 Subject: [PATCH 4/4] test(e2e): patch 3 more non-external POST /workspaces sites for MODEL_REQUIRED contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the prior commits in this PR — E2E API Smoke run on commit a3c15bc9 surfaced 3 remaining E2E scripts that POST /workspaces without a model field AND without the external-runtime exemption, so they 422 under the new MODEL_REQUIRED gate. - tests/e2e/test_notify_attachments_e2e.sh:96 — bare {"name":"Notify E2E","tier":1} (no runtime → defaults to langgraph). Adds "model":"anthropic:claude-opus-4-7" to match the deleted DefaultModel("") return value. - tests/e2e/test_priority_runtimes_e2e.sh:192 — runtime:claude-code without model. Adds "model":"sonnet" to match the deleted DefaultModel("claude-code") return value. - tests/e2e/test_priority_runtimes_e2e.sh:384 — runtime:gemini-cli without model. Adds "model":"gemini-2.0-flash" — gemini-cli routes via the gemini provider (per derive-provider.sh), so a gemini:* slug picks the right provider chain. Scripts inspected but NOT patched (already external-exempt): - test_api.sh — both POSTs use runtime:external + external:true - test_today_pr_coverage_e2e.sh — both POSTs use runtime:external + external:true - test_priority_runtimes_e2e.sh:255 — runtime:hermes ALREADY had "model":"openai/gpt-4o" - test_priority_runtimes_e2e.sh:326 — already had "model":"openai/gpt-4o-mini" Other scripts that POST without model (test_a2a_e2e.sh:133, test_activity_e2e.sh:218, test_dev_mode.sh:72, test_workspace_abilities_e2e.sh, test_comprehensive_e2e.sh, test_mcp_stdio_staging.sh, test_chat_upload_e2e.sh, tests/harness/seed.sh) are NOT triggered by the e2e-api.yml workflow that this PR's CI runs — they're tracked for a follow-up sweep once #1667 lands. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/e2e/test_notify_attachments_e2e.sh | 6 +++++- tests/e2e/test_priority_runtimes_e2e.sh | 6 ++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/e2e/test_notify_attachments_e2e.sh b/tests/e2e/test_notify_attachments_e2e.sh index 2a5f12a77..7aec89155 100755 --- a/tests/e2e/test_notify_attachments_e2e.sh +++ b/tests/e2e/test_notify_attachments_e2e.sh @@ -92,8 +92,12 @@ for _wid in $PRIOR; do curl -s -X DELETE "$BASE/workspaces/$_wid?confirm=true" > /dev/null || true done +# model is required at the Create boundary (CTO 2026-05-22 SSOT — see +# feedback_workspace_model_required_no_platform_default_dynamic_credential_intake). +# Body had no runtime → defaults to langgraph; pass the langgraph-compatible +# default that the deleted DefaultModel("") would have returned. R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \ - -d '{"name":"Notify E2E","tier":1}') + -d '{"name":"Notify E2E","tier":1,"model":"anthropic:claude-opus-4-7"}') WSID=$(echo "$R" | python3 -c 'import json,sys;print(json.load(sys.stdin)["id"])' 2>/dev/null || true) [ -n "$WSID" ] || { echo "Failed to create workspace: $R"; exit 1; } echo "Created workspace $WSID" diff --git a/tests/e2e/test_priority_runtimes_e2e.sh b/tests/e2e/test_priority_runtimes_e2e.sh index 477d424cc..a9b873e02 100755 --- a/tests/e2e/test_priority_runtimes_e2e.sh +++ b/tests/e2e/test_priority_runtimes_e2e.sh @@ -188,8 +188,9 @@ import json, os print(json.dumps({'CLAUDE_CODE_OAUTH_TOKEN': os.environ['CLAUDE_CODE_OAUTH_TOKEN']})) ") local resp wsid + # model required (CTO 2026-05-22 SSOT) — pass the deleted DefaultModel("claude-code") value. resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \ - -d "{\"name\":\"Priority E2E (claude-code)\",\"runtime\":\"claude-code\",\"tier\":1,\"secrets\":$secrets}") + -d "{\"name\":\"Priority E2E (claude-code)\",\"runtime\":\"claude-code\",\"model\":\"sonnet\",\"tier\":1,\"secrets\":$secrets}") wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true if [ -z "$wsid" ]; then fail "create claude-code workspace" "$resp" @@ -380,8 +381,9 @@ import json, os print(json.dumps({'GEMINI_API_KEY': os.environ['E2E_GEMINI_API_KEY']})) ") local resp wsid + # model required (CTO 2026-05-22 SSOT) — gemini-cli routes via the gemini provider. resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \ - -d "{\"name\":\"Priority E2E (gemini-cli)\",\"runtime\":\"gemini-cli\",\"tier\":1,\"secrets\":$secrets}") + -d "{\"name\":\"Priority E2E (gemini-cli)\",\"runtime\":\"gemini-cli\",\"model\":\"gemini-2.0-flash\",\"tier\":1,\"secrets\":$secrets}") wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true if [ -z "$wsid" ]; then fail "create gemini-cli workspace" "$resp"; return 0; fi CREATED_WSIDS+=("$wsid") -- 2.52.0