2026-06-04 04:22:08 +00:00
5 changed files with 420 additions and 3 deletions
@@ -48,8 +48,10 @@ on:
      - 'workspace-server/internal/handlers/a2a_proxy.go'
      - 'workspace-server/internal/middleware/**'
      - 'workspace-server/internal/provisioner/**'
+      - 'workspace-server/internal/providers/providers.yaml'
      - 'tests/e2e/test_staging_full_saas.sh'
      - 'tests/e2e/lib/completion_assert.sh'
+      - 'tests/e2e/lib/model_slug.sh'
      - 'tests/e2e/lib/aws_leak_check.sh'
      - 'tests/e2e/test_aws_leak_check.sh'
      - '.gitea/workflows/e2e-staging-saas.yml'
@@ -61,8 +63,10 @@ on:
      - 'workspace-server/internal/handlers/a2a_proxy.go'
      - 'workspace-server/internal/middleware/**'
      - 'workspace-server/internal/provisioner/**'
+      - 'workspace-server/internal/providers/providers.yaml'
      - 'tests/e2e/test_staging_full_saas.sh'
      - 'tests/e2e/lib/completion_assert.sh'
+      - 'tests/e2e/lib/model_slug.sh'
      - 'tests/e2e/lib/aws_leak_check.sh'
      - 'tests/e2e/test_aws_leak_check.sh'
      - '.gitea/workflows/e2e-staging-saas.yml'
@@ -315,3 +319,148 @@ jobs:
            echo "::warning::saas teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
          fi
          exit 0
+
+  # ── PLATFORM-MANAGED BOOT REGRESSION (moonshot/kimi NOT_CONFIGURED) ──────────
+  #
+  # The REAL-boot complement to the deterministic unit suite
+  # (workspace_provision_platform_boot_test.go). Provisions a REAL staging
+  # claude-code workspace on the PLATFORM-managed path — provider=platform,
+  # model=moonshot/kimi-k2.6, NO tenant LLM key — and asserts it reaches
+  # status=online (NOT not_configured) and a completion returns 200, via the same
+  # online-wait + completion-assert the BYOK job uses.
+  #
+  # Why a SEPARATE job (not a matrix leg of e2e-staging-saas): the platform path
+  # injects NO secret and pins a different model, so its env block diverges from
+  # the BYOK job's. A dedicated job keeps each path's "verify key present" preflight
+  # honest (BYOK requires a key; platform requires its ABSENCE not to matter) and
+  # gives the regression its own named commit-status for branch protection.
+  #
+  # Add `E2E Staging Platform Boot` to branch protection after 3 consecutive
+  # green runs on main (de-flake window; this path shares the cp#245
+  # boot-timeout flake surface the BYOK job has, so it must prove stable before
+  # it can BLOCK — see the gate-making plan in the PR body).
+  # bp-required: pending #2187
+  e2e-staging-platform-boot:
+    name: E2E Staging Platform Boot
+    runs-on: ubuntu-latest
+    # Phase 3 (RFC #219 §1): surface without blocking until the de-flake window
+    # closes. mc#1982: do NOT renew this mask silently — the gate-making plan
+    # tracks the flip to false under #2187.
+    continue-on-error: true
+    timeout-minutes: 45
+    permissions:
+      contents: read
+
+    env:
+      MOLECULE_CP_URL: https://staging-api.moleculesai.app
+      MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+      AWS_DEFAULT_REGION: us-east-2
+      E2E_AWS_LEAK_CHECK: required
+      E2E_AWS_TERMINATE_LEAKS: '1'
+      # The regression combo: claude-code + platform-managed + moonshot/kimi-k2.6.
+      # NO E2E_*_API_KEY is set — platform-managed billing is owned by Molecule via
+      # the CP LLM proxy. The harness's E2E_LLM_PATH=platform branch sends empty
+      # secrets and pin-selects the platform model.
+      E2E_RUNTIME: claude-code
+      E2E_LLM_PATH: platform
+      # Smoke mode: a single parent workspace is enough to prove online +
+      # completion for the platform path (the A2A/delegation matrix is the BYOK
+      # job's job). Override E2E_DEFAULT_PLATFORM_MODEL via workflow_dispatch to
+      # exercise another platform model id.
+      E2E_MODE: smoke
+      E2E_RUN_ID: "platform-${{ github.run_id }}-${{ github.run_attempt }}"
+      E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Verify admin token present
+        run: |
+          if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
+            echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
+            exit 2
+          fi
+          for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
+            if [ -z "${!var:-}" ]; then
+              echo "::error::$var not set — EC2 leak verification cannot run"
+              exit 2
+            fi
+          done
+          echo "Admin token present ✓"
+
+      - name: Assert NO BYOK key leaks into the platform run
+        run: |
+          # The whole point of this job is the platform-managed path. A stray
+          # E2E_*_API_KEY in the runner env would (via the harness) still be
+          # skipped by the E2E_LLM_PATH=platform branch — but assert their
+          # absence loudly here so a future env edit can't silently convert this
+          # into a masked BYOK run that no longer exercises the regression.
+          for var in E2E_MINIMAX_API_KEY E2E_ANTHROPIC_API_KEY E2E_OPENAI_API_KEY; do
+            if [ -n "${!var:-}" ]; then
+              echo "::warning::$var is set in this platform-boot job's env — the harness ignores it on E2E_LLM_PATH=platform, but it should not be wired here."
+            fi
+          done
+          echo "Platform-managed path: no tenant LLM key required ✓"
+
+      - name: CP staging health preflight
+        run: |
+          code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
+          if [ "$code" != "200" ]; then
+            echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a workspace bug."
+            exit 1
+          fi
+          echo "Staging CP healthy ✓"
+
+      - name: Run platform-managed boot E2E (online + completion)
+        id: e2e
+        run: bash tests/e2e/test_staging_full_saas.sh
+
+      - name: Teardown safety net (runs on cancel/failure)
+        if: always()
+        env:
+          ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
+        run: |
+          set +e
+          orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
+            -H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
+            | python3 -c "
+          import json, sys, os, datetime
+          run_id = os.environ.get('GITHUB_RUN_ID', '')
+          d = json.load(sys.stdin)
+          today = datetime.date.today()
+          yesterday = today - datetime.timedelta(days=1)
+          dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
+          # smoke mode slugs are e2e-smoke-YYYYMMDD-platform-<run_id>-...
+          if run_id:
+              prefixes = tuple(f'e2e-smoke-{d}-platform-{run_id}-' for d in dates)
+          else:
+              prefixes = tuple(f'e2e-smoke-{d}-platform-' for d in dates)
+          candidates = [o['slug'] for o in d.get('orgs', [])
+                        if any(o.get('slug','').startswith(p) for p in prefixes)
+                        and o.get('instance_status') not in ('purged',)]
+          print('\n'.join(candidates))
+          " 2>/dev/null)
+          leaks=()
+          for slug in $orgs; do
+            echo "Safety-net teardown: $slug"
+            set +e
+            curl -sS -o /tmp/plat-cleanup.out -w "%{http_code}" \
+              -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
+              -H "Authorization: Bearer $ADMIN_TOKEN" \
+              -H "Content-Type: application/json" \
+              -d "{\"confirm\":\"$slug\"}" >/tmp/plat-cleanup.code
+            set -e
+            code=$(cat /tmp/plat-cleanup.code 2>/dev/null || echo "000")
+            if [ "$code" = "200" ] || [ "$code" = "204" ]; then
+              echo "[teardown] deleted $slug (HTTP $code)"
+            else
+              echo "::warning::platform-boot teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/plat-cleanup.out 2>/dev/null)"
+              leaks+=("$slug")
+            fi
+          done
+          if [ ${#leaks[@]} -gt 0 ]; then
+            echo "::warning::platform-boot teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
+          fi
+          exit 0
@@ -23,23 +23,61 @@
 #                  their provider entries, otherwise the workspace boots
 #                  reachable but the first A2A call hits the wrong auth path.
 #
-# When E2E_MODEL_SLUG is set, it overrides this dispatch — useful when an
-# operator dispatches the workflow to test a specific slug.
+# PLATFORM-MANAGED path (E2E_LLM_PATH=platform) — the moonshot/kimi
+# NOT_CONFIGURED regression (RFC#340 Fix A #2187):
+#
+#   The branches above all exercise BYOK: a tenant key (MINIMAX/ANTHROPIC/
+#   OPENAI) is injected as a workspace secret and the model id resolves to that
+#   vendor's *BYOK* provider entry. That path NEVER exercises the platform arm —
+#   the exact arm that booted "moonshot/kimi-k2.6" into NOT_CONFIGURED in prod,
+#   because the generated config.yaml lacked the derived `provider: platform`.
+#
+#   E2E_LLM_PATH=platform selects a platform-managed model id (slash-namespaced,
+#   no tenant key — Molecule owns billing via the CP LLM proxy). The default is
+#   "moonshot/kimi-k2.6", the headline incident combo. Override the specific
+#   platform model with E2E_MODEL_SLUG. The provision branch in
+#   test_staging_full_saas.sh sends NO secrets for this path (platform-managed
+#   needs none), so the workspace must boot online purely on the proxy env the
+#   control plane injects + the manifest-derived `provider: platform` that Fix A
+#   stamps. That is the REAL boot-path assertion the deterministic unit test
+#   (workspace_provision_platform_boot_test.go) cannot make.
+#
+# When E2E_MODEL_SLUG is set, it overrides this dispatch entirely — useful when
+# an operator dispatches the workflow to test a specific slug (or a specific
+# platform model id).
 #
 # Unit tested by tests/e2e/test_model_slug.sh — every branch must stay
 # pinned because regressions silently mask as "Could not resolve
 # authentication method" + the synth-E2E gate goes red without naming
 # the slug-format mismatch.

+# Default platform-managed model for the platform-boot regression path. The
+# exact id that booted NOT_CONFIGURED in prod. Must stay a member of the
+# claude-code `platform` arm in workspace-server/internal/providers/providers.yaml
+# (the deterministic suite TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel
+# enforces every member of that arm derives provider=platform). Resolved INSIDE
+# pick_model_slug via ${E2E_DEFAULT_PLATFORM_MODEL:-...} so callers can override
+# it (or unset it) without tripping `set -u`.
+E2E_DEFAULT_PLATFORM_MODEL_FALLBACK="moonshot/kimi-k2.6"
+
 # Usage: pick_model_slug <runtime>
 #   stdout: the slug string
 #   E2E_MODEL_SLUG (env): if set + non-empty, used as-is (operator override)
+#   E2E_LLM_PATH=platform (env): select the platform-managed model id
+#     (E2E_DEFAULT_PLATFORM_MODEL) instead of a BYOK slug. Takes precedence over
+#     the per-key BYOK branches; E2E_MODEL_SLUG still wins over everything.
 pick_model_slug() {
  local runtime="${1:-}"
  if [ -n "${E2E_MODEL_SLUG:-}" ]; then
    printf '%s' "$E2E_MODEL_SLUG"
    return 0
  fi
+  # Platform-managed path: the slash-namespaced platform model, no tenant key.
+  # Exercises the arm the moonshot/kimi NOT_CONFIGURED bug shipped on.
+  if [ "${E2E_LLM_PATH:-}" = "platform" ]; then
+    printf '%s' "${E2E_DEFAULT_PLATFORM_MODEL:-$E2E_DEFAULT_PLATFORM_MODEL_FALLBACK}"
+    return 0
+  fi
  case "$runtime" in
    hermes)      printf 'openai/gpt-4o' ;;
    claude-code)
@@ -65,6 +65,28 @@ assert_eq "claude-code + both keys → MiniMax priority"            "$got" "Mini
 run_test "unknown runtime → slash-form fallback"                   gemini      "openai/gpt-4o"
 run_test "empty runtime → slash-form fallback"                     ""          "openai/gpt-4o"

+# ── Platform-managed path (E2E_LLM_PATH=platform) ──
+# The moonshot/kimi NOT_CONFIGURED regression path (RFC#340 Fix A #2187).
+# Selects the slash-namespaced platform model (default moonshot/kimi-k2.6),
+# takes precedence over the per-key BYOK branches, and is itself overridden by
+# E2E_MODEL_SLUG. These pins guard the harness's ability to drive the platform
+# arm — the one the prod bug shipped on.
+echo
+echo "Test: pick_model_slug — platform-managed path (E2E_LLM_PATH=platform)"
+echo
+
+got=$(unset E2E_MODEL_SLUG E2E_DEFAULT_PLATFORM_MODEL; E2E_LLM_PATH=platform pick_model_slug claude-code)
+assert_eq "claude-code + platform path → headline kimi model"      "$got" "moonshot/kimi-k2.6"
+
+got=$(unset E2E_MODEL_SLUG E2E_DEFAULT_PLATFORM_MODEL; E2E_LLM_PATH=platform E2E_MINIMAX_API_KEY="mx-stray" pick_model_slug claude-code)
+assert_eq "platform path beats a stray BYOK key (no mask)"         "$got" "moonshot/kimi-k2.6"
+
+got=$(unset E2E_MODEL_SLUG; E2E_LLM_PATH=platform E2E_DEFAULT_PLATFORM_MODEL="minimax/MiniMax-M3" pick_model_slug claude-code)
+assert_eq "platform path honours E2E_DEFAULT_PLATFORM_MODEL"        "$got" "minimax/MiniMax-M3"
+
+got=$(unset E2E_DEFAULT_PLATFORM_MODEL; E2E_MODEL_SLUG="anthropic/claude-opus-4-7" E2E_LLM_PATH=platform pick_model_slug claude-code)
+assert_eq "E2E_MODEL_SLUG still wins over platform path"            "$got" "anthropic/claude-opus-4-7"
+
 # ── Override via E2E_MODEL_SLUG ──
 # When the operator sets E2E_MODEL_SLUG, the per-runtime dispatch is
 # bypassed. Used during workflow_dispatch to A/B specific slugs.
@@ -476,7 +476,19 @@ wait_workspaces_online_routable() {
 # All empty → '{}' (workspace will fail at first turn with an
 # expected, actionable auth error rather than masking the test).
 SECRETS_JSON='{}'
-if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
+# Platform-managed path (E2E_LLM_PATH=platform) — the moonshot/kimi
+# NOT_CONFIGURED regression (RFC#340 Fix A #2187). Molecule owns billing via the
+# CP LLM proxy, so the workspace needs NO tenant key: provision with empty
+# secrets and let the workspace boot purely on (a) the proxy env the control
+# plane injects + (b) the manifest-derived `provider: platform` Fix A stamps into
+# the generated config.yaml. This is the path that booted NOT_CONFIGURED in prod
+# precisely because the BYOK branches below never exercise it. We deliberately
+# skip the key-injection branches so a stray E2E_*_API_KEY in the runner env
+# cannot silently convert this into a BYOK run and mask the regression.
+if [ "${E2E_LLM_PATH:-}" = "platform" ]; then
+  log "    LLM path: PLATFORM-MANAGED (no tenant key; proxy + Fix A provider stamp)"
+  SECRETS_JSON='{}'
+elif [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
  SECRETS_JSON=$(python3 -c "
 import json, os
 k = os.environ['E2E_MINIMAX_API_KEY']
@@ -0,0 +1,196 @@
+package handlers
+
+// workspace_provision_platform_boot_test.go — the deterministic, SSOT-driven
+// regression suite for the class of bug behind the moonshot/kimi
+// "canvas-created claude-code workspace boots NOT_CONFIGURED" production
+// incident (RFC#340 Fix A #2187, canvas Fix C #2188).
+//
+// THE BUG (what shipped to prod):
+//   A claude-code workspace created via the canvas with provider=Platform +
+//   model="moonshot/kimi-k2.6" booted NOT_CONFIGURED. Unit tests passed; the
+//   REAL boot path was broken. ensureDefaultConfig generated a config.yaml that
+//   carried NO derived `provider:` key, so the cp#329 config-bundle the adapter
+//   actually reads left molecule-runtime config.py to slash-split the model id
+//   "moonshot/kimi-k2.6" -> provider="moonshot", which is NOT in the providers
+//   registry -> NOT_CONFIGURED.
+//
+// THE FIX A INVARIANT (this file pins it, and pins it for the WHOLE class):
+//   ensureDefaultConfig MUST stamp the manifest-derived provider into the
+//   generated config.yaml — at BOTH the top level and under runtime_config —
+//   for every (runtime, model) the providers SSOT maps to a platform provider.
+//   The single-combo pin (TestEnsureDefaultConfig_StampsDerivedProvider in
+//   workspace_provision_test.go) proves the headline case. THIS file closes the
+//   gap that single pin leaves: it is PARAMETRIZED OVER THE SSOT, so when a NEW
+//   platform model is added to providers.yaml for claude-code (or any runtime
+//   with a platform arm), the new id is automatically covered — a future
+//   platform model that fails to derive `provider: platform` fails THIS test at
+//   build time, before it can ship a NOT_CONFIGURED boot.
+//
+// WHY SSOT-DRIVEN AND NOT A HAND-MAINTAINED LIST:
+//   The original bug was a divergence between "what the canvas offers"
+//   (providers.yaml platform arm) and "what the config generator stamps". A
+//   hardcoded test model list would itself drift from the SSOT and re-open the
+//   same divergence gap. By enumerating the platform model set directly from the
+//   loaded providers.Manifest (the SAME manifest ensureDefaultConfig's
+//   deriveDefaultConfigProvider resolves against), this test cannot fall behind
+//   the offered set: add a platform model, get a test case for free; the test
+//   only passes if the generator actually stamps it.
+//
+// SCOPE: deterministic, no live infra. The REAL-boot complement (provision a
+// staging workspace and assert status=online + a completion returns 200 for the
+// SAME combo) is the bash staging harness — see
+// tests/e2e/test_staging_full_saas.sh (E2E_LLM_PATH=platform) and the
+// e2e-staging-platform-boot job in .gitea/workflows/e2e-staging-saas.yml. That
+// asserts the REAL artifact (booted status / completion); THIS asserts the
+// deterministic config-generation invariant the real boot depends on.
+
+import (
+	"testing"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
+	"gopkg.in/yaml.v3"
+)
+
+// platformModelsForRuntime returns the exact model ids the providers SSOT lists
+// under runtime rt's `platform` native provider arm — the set the canvas offers
+// as provider=Platform and the set ensureDefaultConfig MUST stamp
+// `provider: platform` for. Reads the SAME embedded manifest the config
+// generator derives against (providers.LoadManifest), so it can never drift from
+// the offered set. Returns nil when the runtime has no platform arm.
+func platformModelsForRuntime(t *testing.T, rt string) []string {
+	t.Helper()
+	m, err := providers.LoadManifest()
+	if err != nil {
+		t.Fatalf("LoadManifest: %v", err)
+	}
+	native, ok := m.Runtimes[rt]
+	if !ok {
+		t.Fatalf("providers SSOT has no runtimes entry for %q", rt)
+	}
+	for _, ref := range native.Providers {
+		if ref.Name == "platform" {
+			return ref.Models
+		}
+	}
+	return nil
+}
+
+// TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel is the
+// class-level regression for the moonshot/kimi NOT_CONFIGURED incident. For
+// EVERY model the providers SSOT offers under claude-code's platform arm, it
+// asserts the generated config.yaml carries the manifest-derived provider at
+// both the top level and under runtime_config. This is the Fix A invariant,
+// parametrized over the SSOT so a newly-offered platform model cannot ship
+// without the stamp (the exact divergence — offered-but-not-stamped — that
+// booted "moonshot/kimi-k2.6" into NOT_CONFIGURED).
+func TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel(t *testing.T) {
+	const runtime = "claude-code"
+	platformModels := platformModelsForRuntime(t, runtime)
+	if len(platformModels) == 0 {
+		t.Fatalf("providers SSOT lists no platform models for runtime %q — the regression matrix would be empty; the SSOT shape changed (this test is the canary)", runtime)
+	}
+	// Headline sentinel: the exact id that booted NOT_CONFIGURED in prod MUST be
+	// in the enumerated set. If a refactor drops it from the platform arm, this
+	// test must still cover it explicitly — fail loud rather than silently
+	// shrinking the matrix.
+	if !containsString(platformModels, "moonshot/kimi-k2.6") {
+		t.Fatalf("the headline incident model \"moonshot/kimi-k2.6\" is no longer in the claude-code platform SSOT set (%v) — regression coverage for the original bug would be lost", platformModels)
+	}
+
+	for _, model := range platformModels {
+		model := model
+		t.Run(model, func(t *testing.T) {
+			broadcaster := newTestBroadcaster()
+			handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+
+			files := handler.ensureDefaultConfig("ws-platform-boot", models.CreateWorkspacePayload{
+				Name:    "Platform Boot Agent",
+				Tier:    2,
+				Runtime: runtime,
+				Model:   model,
+			})
+
+			raw, ok := files["config.yaml"]
+			if !ok {
+				t.Fatalf("expected config.yaml in generated files for model %q", model)
+			}
+
+			var parsed struct {
+				Model         string `yaml:"model"`
+				Provider      string `yaml:"provider"`
+				RuntimeConfig struct {
+					Model    string `yaml:"model"`
+					Provider string `yaml:"provider"`
+				} `yaml:"runtime_config"`
+			}
+			if err := yaml.Unmarshal(raw, &parsed); err != nil {
+				t.Fatalf("generated YAML invalid for model %q: %v\n%s", model, err, raw)
+			}
+
+			// The load-bearing invariant: BOTH the top-level and the
+			// runtime_config provider must be exactly "platform". An empty or
+			// vendor-namespace ("moonshot") value here is the prod NOT_CONFIGURED
+			// boot — the adapter would slash-split the model id and look up an
+			// unregistered provider.
+			if parsed.Provider != "platform" {
+				t.Errorf("model %q: top-level provider = %q, want \"platform\" (Fix A invariant — empty/vendor value is the NOT_CONFIGURED boot)\n%s", model, parsed.Provider, raw)
+			}
+			if parsed.RuntimeConfig.Provider != "platform" {
+				t.Errorf("model %q: runtime_config.provider = %q, want \"platform\"\n%s", model, parsed.RuntimeConfig.Provider, raw)
+			}
+			// Sanity: the config must still render a non-empty model (a config
+			// with provider but no model is equally undeployable).
+			if parsed.Model == "" {
+				t.Errorf("model %q: generated config has empty top-level model\n%s", model, raw)
+			}
+		})
+	}
+}
+
+// TestPlatformModelDeriveProvider_SSOTConsistency is the upstream half of the
+// same invariant, one layer below ensureDefaultConfig: it asserts the providers
+// manifest's DeriveProvider — the resolver deriveDefaultConfigProvider calls —
+// maps every SSOT-offered claude-code platform model to a provider whose Name is
+// "platform". If DeriveProvider itself regressed (e.g. a model_prefix_match
+// change made "moonshot/kimi-k2.6" resolve to the bare "moonshot" entry again),
+// this fails closer to the root cause than the config-shape test above, making
+// the diagnosis unambiguous: SSOT/derive regression vs config-emission
+// regression.
+func TestPlatformModelDeriveProvider_SSOTConsistency(t *testing.T) {
+	const runtime = "claude-code"
+	m, err := providers.LoadManifest()
+	if err != nil {
+		t.Fatalf("LoadManifest: %v", err)
+	}
+	platformModels := platformModelsForRuntime(t, runtime)
+	if len(platformModels) == 0 {
+		t.Fatalf("no platform models for %q in SSOT", runtime)
+	}
+	for _, model := range platformModels {
+		model := model
+		t.Run(model, func(t *testing.T) {
+			// nil availableAuthEnv mirrors deriveDefaultConfigProvider's call at
+			// config-generation time (no per-workspace auth context yet).
+			p, err := m.DeriveProvider(runtime, model, nil)
+			if err != nil {
+				t.Fatalf("DeriveProvider(%q, %q): unexpected error %v — an SSOT-offered platform model MUST derive", runtime, model, err)
+			}
+			if p.Name != "platform" {
+				t.Errorf("DeriveProvider(%q, %q).Name = %q, want \"platform\" (this is the exact slash-split-to-vendor regression that booted NOT_CONFIGURED)", runtime, model, p.Name)
+			}
+		})
+	}
+}
+
+// containsString is a tiny local membership helper. Kept here (not a shared
+// test util) so this regression file is self-contained and can be read top to
+// bottom without chasing helpers across the package.
+func containsString(xs []string, want string) bool {
+	for _, x := range xs {
+		if x == want {
+			return true
+		}
+	}
+	return false
+}