From 9c506d5c8cb0da5538f73fb698d7fbaa6e6167be Mon Sep 17 00:00:00 2001 From: "Hongming Wang (CTO)" Date: Wed, 3 Jun 2026 19:51:53 -0700 Subject: [PATCH] test(provision): SSOT-parametrized + real-boot regression for moonshot/kimi NOT_CONFIGURED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The moonshot/kimi incident: a canvas-created claude-code workspace with provider=Platform + model=moonshot/kimi-k2.6 booted NOT_CONFIGURED in prod because the generated config.yaml lacked the manifest-derived `provider:` key, so the adapter slash-split "moonshot/..." -> unregistered provider. Fixed by #2187 (ensureDefaultConfig stamps DeriveProvider->provider:platform) + #2188 (canvas). Unit tests passed; the REAL boot path was the gap. This adds comprehensive regression coverage so the CLASS cannot reship: Deterministic (no live infra, runs in the normal unit suite): workspace-server/internal/handlers/workspace_provision_platform_boot_test.go - TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel: enumerates the claude-code `platform` arm from the providers SSOT (providers.LoadManifest) and asserts ensureDefaultConfig stamps provider:platform (top-level AND runtime_config) for EVERY offered platform model — not just the single moonshot/kimi pin #2187 shipped. A newly-offered platform model gets a case for free and only passes if actually stamped (closes the offered-but-not-stamped divergence the bug rode in on). Mutation-verified: disabling the stamp fails the test. - TestPlatformModelDeriveProvider_SSOTConsistency: the upstream half — DeriveProvider maps every SSOT platform model to provider Name "platform". Real-boot (staging; I will run it): Extends the existing staging harness (no new harness) with a platform-managed path: E2E_LLM_PATH=platform pin-selects moonshot/kimi-k2.6, sends NO tenant key, and reuses the harness's online-wait + completion assertions to prove the workspace reaches status=online (not not_configured) and a completion returns 200. The BYOK branches never exercised the platform arm — the exact arm the bug shipped on. - tests/e2e/lib/model_slug.sh: platform path + override semantics - tests/e2e/test_model_slug.sh: 4 new pinned cases (16/16 green) - tests/e2e/test_staging_full_saas.sh: empty-secrets platform branch - .gitea/workflows/e2e-staging-saas.yml: new `E2E Staging Platform Boot` job (continue-on-error during de-flake; bp-required: pending #2187), + providers.yaml/model_slug.sh added to the path triggers. Coverage-audit theme: mc#1982 (continue-on-error masks; de-flake-then-gate). Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitea/workflows/e2e-staging-saas.yml | 149 +++++++++++++ tests/e2e/lib/model_slug.sh | 42 +++- tests/e2e/test_model_slug.sh | 22 ++ tests/e2e/test_staging_full_saas.sh | 14 +- .../workspace_provision_platform_boot_test.go | 196 ++++++++++++++++++ 5 files changed, 420 insertions(+), 3 deletions(-) create mode 100644 workspace-server/internal/handlers/workspace_provision_platform_boot_test.go diff --git a/.gitea/workflows/e2e-staging-saas.yml b/.gitea/workflows/e2e-staging-saas.yml index 06acc3d83..82b3c46d8 100644 --- a/.gitea/workflows/e2e-staging-saas.yml +++ b/.gitea/workflows/e2e-staging-saas.yml @@ -48,8 +48,10 @@ on: - 'workspace-server/internal/handlers/a2a_proxy.go' - 'workspace-server/internal/middleware/**' - 'workspace-server/internal/provisioner/**' + - 'workspace-server/internal/providers/providers.yaml' - 'tests/e2e/test_staging_full_saas.sh' - 'tests/e2e/lib/completion_assert.sh' + - 'tests/e2e/lib/model_slug.sh' - 'tests/e2e/lib/aws_leak_check.sh' - 'tests/e2e/test_aws_leak_check.sh' - '.gitea/workflows/e2e-staging-saas.yml' @@ -61,8 +63,10 @@ on: - 'workspace-server/internal/handlers/a2a_proxy.go' - 'workspace-server/internal/middleware/**' - 'workspace-server/internal/provisioner/**' + - 'workspace-server/internal/providers/providers.yaml' - 'tests/e2e/test_staging_full_saas.sh' - 'tests/e2e/lib/completion_assert.sh' + - 'tests/e2e/lib/model_slug.sh' - 'tests/e2e/lib/aws_leak_check.sh' - 'tests/e2e/test_aws_leak_check.sh' - '.gitea/workflows/e2e-staging-saas.yml' @@ -315,3 +319,148 @@ jobs: echo "::warning::saas teardown left ${#leaks[@]} leak(s): ${leaks[*]}" fi exit 0 + + # ── PLATFORM-MANAGED BOOT REGRESSION (moonshot/kimi NOT_CONFIGURED) ────────── + # + # The REAL-boot complement to the deterministic unit suite + # (workspace_provision_platform_boot_test.go). Provisions a REAL staging + # claude-code workspace on the PLATFORM-managed path — provider=platform, + # model=moonshot/kimi-k2.6, NO tenant LLM key — and asserts it reaches + # status=online (NOT not_configured) and a completion returns 200, via the same + # online-wait + completion-assert the BYOK job uses. + # + # Why a SEPARATE job (not a matrix leg of e2e-staging-saas): the platform path + # injects NO secret and pins a different model, so its env block diverges from + # the BYOK job's. A dedicated job keeps each path's "verify key present" preflight + # honest (BYOK requires a key; platform requires its ABSENCE not to matter) and + # gives the regression its own named commit-status for branch protection. + # + # Add `E2E Staging Platform Boot` to branch protection after 3 consecutive + # green runs on main (de-flake window; this path shares the cp#245 + # boot-timeout flake surface the BYOK job has, so it must prove stable before + # it can BLOCK — see the gate-making plan in the PR body). + # bp-required: pending #2187 + e2e-staging-platform-boot: + name: E2E Staging Platform Boot + runs-on: ubuntu-latest + # Phase 3 (RFC #219 §1): surface without blocking until the de-flake window + # closes. mc#1982: do NOT renew this mask silently — the gate-making plan + # tracks the flip to false under #2187. + continue-on-error: true + timeout-minutes: 45 + permissions: + contents: read + + env: + MOLECULE_CP_URL: https://staging-api.moleculesai.app + MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: us-east-2 + E2E_AWS_LEAK_CHECK: required + E2E_AWS_TERMINATE_LEAKS: '1' + # The regression combo: claude-code + platform-managed + moonshot/kimi-k2.6. + # NO E2E_*_API_KEY is set — platform-managed billing is owned by Molecule via + # the CP LLM proxy. The harness's E2E_LLM_PATH=platform branch sends empty + # secrets and pin-selects the platform model. + E2E_RUNTIME: claude-code + E2E_LLM_PATH: platform + # Smoke mode: a single parent workspace is enough to prove online + + # completion for the platform path (the A2A/delegation matrix is the BYOK + # job's job). Override E2E_DEFAULT_PLATFORM_MODEL via workflow_dispatch to + # exercise another platform model id. + E2E_MODE: smoke + E2E_RUN_ID: "platform-${{ github.run_id }}-${{ github.run_attempt }}" + E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }} + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Verify admin token present + run: | + if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then + echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)" + exit 2 + fi + for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do + if [ -z "${!var:-}" ]; then + echo "::error::$var not set — EC2 leak verification cannot run" + exit 2 + fi + done + echo "Admin token present ✓" + + - name: Assert NO BYOK key leaks into the platform run + run: | + # The whole point of this job is the platform-managed path. A stray + # E2E_*_API_KEY in the runner env would (via the harness) still be + # skipped by the E2E_LLM_PATH=platform branch — but assert their + # absence loudly here so a future env edit can't silently convert this + # into a masked BYOK run that no longer exercises the regression. + for var in E2E_MINIMAX_API_KEY E2E_ANTHROPIC_API_KEY E2E_OPENAI_API_KEY; do + if [ -n "${!var:-}" ]; then + echo "::warning::$var is set in this platform-boot job's env — the harness ignores it on E2E_LLM_PATH=platform, but it should not be wired here." + fi + done + echo "Platform-managed path: no tenant LLM key required ✓" + + - name: CP staging health preflight + run: | + code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health") + if [ "$code" != "200" ]; then + echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a workspace bug." + exit 1 + fi + echo "Staging CP healthy ✓" + + - name: Run platform-managed boot E2E (online + completion) + id: e2e + run: bash tests/e2e/test_staging_full_saas.sh + + - name: Teardown safety net (runs on cancel/failure) + if: always() + env: + ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }} + run: | + set +e + orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \ + -H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \ + | python3 -c " + import json, sys, os, datetime + run_id = os.environ.get('GITHUB_RUN_ID', '') + d = json.load(sys.stdin) + today = datetime.date.today() + yesterday = today - datetime.timedelta(days=1) + dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d')) + # smoke mode slugs are e2e-smoke-YYYYMMDD-platform--... + if run_id: + prefixes = tuple(f'e2e-smoke-{d}-platform-{run_id}-' for d in dates) + else: + prefixes = tuple(f'e2e-smoke-{d}-platform-' for d in dates) + candidates = [o['slug'] for o in d.get('orgs', []) + if any(o.get('slug','').startswith(p) for p in prefixes) + and o.get('instance_status') not in ('purged',)] + print('\n'.join(candidates)) + " 2>/dev/null) + leaks=() + for slug in $orgs; do + echo "Safety-net teardown: $slug" + set +e + curl -sS -o /tmp/plat-cleanup.out -w "%{http_code}" \ + -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \ + -H "Authorization: Bearer $ADMIN_TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"confirm\":\"$slug\"}" >/tmp/plat-cleanup.code + set -e + code=$(cat /tmp/plat-cleanup.code 2>/dev/null || echo "000") + if [ "$code" = "200" ] || [ "$code" = "204" ]; then + echo "[teardown] deleted $slug (HTTP $code)" + else + echo "::warning::platform-boot teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/plat-cleanup.out 2>/dev/null)" + leaks+=("$slug") + fi + done + if [ ${#leaks[@]} -gt 0 ]; then + echo "::warning::platform-boot teardown left ${#leaks[@]} leak(s): ${leaks[*]}" + fi + exit 0 diff --git a/tests/e2e/lib/model_slug.sh b/tests/e2e/lib/model_slug.sh index dc56b0335..93207c96f 100755 --- a/tests/e2e/lib/model_slug.sh +++ b/tests/e2e/lib/model_slug.sh @@ -23,23 +23,61 @@ # their provider entries, otherwise the workspace boots # reachable but the first A2A call hits the wrong auth path. # -# When E2E_MODEL_SLUG is set, it overrides this dispatch — useful when an -# operator dispatches the workflow to test a specific slug. +# PLATFORM-MANAGED path (E2E_LLM_PATH=platform) — the moonshot/kimi +# NOT_CONFIGURED regression (RFC#340 Fix A #2187): +# +# The branches above all exercise BYOK: a tenant key (MINIMAX/ANTHROPIC/ +# OPENAI) is injected as a workspace secret and the model id resolves to that +# vendor's *BYOK* provider entry. That path NEVER exercises the platform arm — +# the exact arm that booted "moonshot/kimi-k2.6" into NOT_CONFIGURED in prod, +# because the generated config.yaml lacked the derived `provider: platform`. +# +# E2E_LLM_PATH=platform selects a platform-managed model id (slash-namespaced, +# no tenant key — Molecule owns billing via the CP LLM proxy). The default is +# "moonshot/kimi-k2.6", the headline incident combo. Override the specific +# platform model with E2E_MODEL_SLUG. The provision branch in +# test_staging_full_saas.sh sends NO secrets for this path (platform-managed +# needs none), so the workspace must boot online purely on the proxy env the +# control plane injects + the manifest-derived `provider: platform` that Fix A +# stamps. That is the REAL boot-path assertion the deterministic unit test +# (workspace_provision_platform_boot_test.go) cannot make. +# +# When E2E_MODEL_SLUG is set, it overrides this dispatch entirely — useful when +# an operator dispatches the workflow to test a specific slug (or a specific +# platform model id). # # Unit tested by tests/e2e/test_model_slug.sh — every branch must stay # pinned because regressions silently mask as "Could not resolve # authentication method" + the synth-E2E gate goes red without naming # the slug-format mismatch. +# Default platform-managed model for the platform-boot regression path. The +# exact id that booted NOT_CONFIGURED in prod. Must stay a member of the +# claude-code `platform` arm in workspace-server/internal/providers/providers.yaml +# (the deterministic suite TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel +# enforces every member of that arm derives provider=platform). Resolved INSIDE +# pick_model_slug via ${E2E_DEFAULT_PLATFORM_MODEL:-...} so callers can override +# it (or unset it) without tripping `set -u`. +E2E_DEFAULT_PLATFORM_MODEL_FALLBACK="moonshot/kimi-k2.6" + # Usage: pick_model_slug # stdout: the slug string # E2E_MODEL_SLUG (env): if set + non-empty, used as-is (operator override) +# E2E_LLM_PATH=platform (env): select the platform-managed model id +# (E2E_DEFAULT_PLATFORM_MODEL) instead of a BYOK slug. Takes precedence over +# the per-key BYOK branches; E2E_MODEL_SLUG still wins over everything. pick_model_slug() { local runtime="${1:-}" if [ -n "${E2E_MODEL_SLUG:-}" ]; then printf '%s' "$E2E_MODEL_SLUG" return 0 fi + # Platform-managed path: the slash-namespaced platform model, no tenant key. + # Exercises the arm the moonshot/kimi NOT_CONFIGURED bug shipped on. + if [ "${E2E_LLM_PATH:-}" = "platform" ]; then + printf '%s' "${E2E_DEFAULT_PLATFORM_MODEL:-$E2E_DEFAULT_PLATFORM_MODEL_FALLBACK}" + return 0 + fi case "$runtime" in hermes) printf 'openai/gpt-4o' ;; claude-code) diff --git a/tests/e2e/test_model_slug.sh b/tests/e2e/test_model_slug.sh index ecfb2134d..e3282c41b 100755 --- a/tests/e2e/test_model_slug.sh +++ b/tests/e2e/test_model_slug.sh @@ -65,6 +65,28 @@ assert_eq "claude-code + both keys → MiniMax priority" "$got" "Mini run_test "unknown runtime → slash-form fallback" gemini "openai/gpt-4o" run_test "empty runtime → slash-form fallback" "" "openai/gpt-4o" +# ── Platform-managed path (E2E_LLM_PATH=platform) ── +# The moonshot/kimi NOT_CONFIGURED regression path (RFC#340 Fix A #2187). +# Selects the slash-namespaced platform model (default moonshot/kimi-k2.6), +# takes precedence over the per-key BYOK branches, and is itself overridden by +# E2E_MODEL_SLUG. These pins guard the harness's ability to drive the platform +# arm — the one the prod bug shipped on. +echo +echo "Test: pick_model_slug — platform-managed path (E2E_LLM_PATH=platform)" +echo + +got=$(unset E2E_MODEL_SLUG E2E_DEFAULT_PLATFORM_MODEL; E2E_LLM_PATH=platform pick_model_slug claude-code) +assert_eq "claude-code + platform path → headline kimi model" "$got" "moonshot/kimi-k2.6" + +got=$(unset E2E_MODEL_SLUG E2E_DEFAULT_PLATFORM_MODEL; E2E_LLM_PATH=platform E2E_MINIMAX_API_KEY="mx-stray" pick_model_slug claude-code) +assert_eq "platform path beats a stray BYOK key (no mask)" "$got" "moonshot/kimi-k2.6" + +got=$(unset E2E_MODEL_SLUG; E2E_LLM_PATH=platform E2E_DEFAULT_PLATFORM_MODEL="minimax/MiniMax-M3" pick_model_slug claude-code) +assert_eq "platform path honours E2E_DEFAULT_PLATFORM_MODEL" "$got" "minimax/MiniMax-M3" + +got=$(unset E2E_DEFAULT_PLATFORM_MODEL; E2E_MODEL_SLUG="anthropic/claude-opus-4-7" E2E_LLM_PATH=platform pick_model_slug claude-code) +assert_eq "E2E_MODEL_SLUG still wins over platform path" "$got" "anthropic/claude-opus-4-7" + # ── Override via E2E_MODEL_SLUG ── # When the operator sets E2E_MODEL_SLUG, the per-runtime dispatch is # bypassed. Used during workflow_dispatch to A/B specific slugs. diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index b7d8ea1b7..bbe8f0c2d 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -476,7 +476,19 @@ wait_workspaces_online_routable() { # All empty → '{}' (workspace will fail at first turn with an # expected, actionable auth error rather than masking the test). SECRETS_JSON='{}' -if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then +# Platform-managed path (E2E_LLM_PATH=platform) — the moonshot/kimi +# NOT_CONFIGURED regression (RFC#340 Fix A #2187). Molecule owns billing via the +# CP LLM proxy, so the workspace needs NO tenant key: provision with empty +# secrets and let the workspace boot purely on (a) the proxy env the control +# plane injects + (b) the manifest-derived `provider: platform` Fix A stamps into +# the generated config.yaml. This is the path that booted NOT_CONFIGURED in prod +# precisely because the BYOK branches below never exercise it. We deliberately +# skip the key-injection branches so a stray E2E_*_API_KEY in the runner env +# cannot silently convert this into a BYOK run and mask the regression. +if [ "${E2E_LLM_PATH:-}" = "platform" ]; then + log " LLM path: PLATFORM-MANAGED (no tenant key; proxy + Fix A provider stamp)" + SECRETS_JSON='{}' +elif [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then SECRETS_JSON=$(python3 -c " import json, os k = os.environ['E2E_MINIMAX_API_KEY'] diff --git a/workspace-server/internal/handlers/workspace_provision_platform_boot_test.go b/workspace-server/internal/handlers/workspace_provision_platform_boot_test.go new file mode 100644 index 000000000..ec2b872db --- /dev/null +++ b/workspace-server/internal/handlers/workspace_provision_platform_boot_test.go @@ -0,0 +1,196 @@ +package handlers + +// workspace_provision_platform_boot_test.go — the deterministic, SSOT-driven +// regression suite for the class of bug behind the moonshot/kimi +// "canvas-created claude-code workspace boots NOT_CONFIGURED" production +// incident (RFC#340 Fix A #2187, canvas Fix C #2188). +// +// THE BUG (what shipped to prod): +// A claude-code workspace created via the canvas with provider=Platform + +// model="moonshot/kimi-k2.6" booted NOT_CONFIGURED. Unit tests passed; the +// REAL boot path was broken. ensureDefaultConfig generated a config.yaml that +// carried NO derived `provider:` key, so the cp#329 config-bundle the adapter +// actually reads left molecule-runtime config.py to slash-split the model id +// "moonshot/kimi-k2.6" -> provider="moonshot", which is NOT in the providers +// registry -> NOT_CONFIGURED. +// +// THE FIX A INVARIANT (this file pins it, and pins it for the WHOLE class): +// ensureDefaultConfig MUST stamp the manifest-derived provider into the +// generated config.yaml — at BOTH the top level and under runtime_config — +// for every (runtime, model) the providers SSOT maps to a platform provider. +// The single-combo pin (TestEnsureDefaultConfig_StampsDerivedProvider in +// workspace_provision_test.go) proves the headline case. THIS file closes the +// gap that single pin leaves: it is PARAMETRIZED OVER THE SSOT, so when a NEW +// platform model is added to providers.yaml for claude-code (or any runtime +// with a platform arm), the new id is automatically covered — a future +// platform model that fails to derive `provider: platform` fails THIS test at +// build time, before it can ship a NOT_CONFIGURED boot. +// +// WHY SSOT-DRIVEN AND NOT A HAND-MAINTAINED LIST: +// The original bug was a divergence between "what the canvas offers" +// (providers.yaml platform arm) and "what the config generator stamps". A +// hardcoded test model list would itself drift from the SSOT and re-open the +// same divergence gap. By enumerating the platform model set directly from the +// loaded providers.Manifest (the SAME manifest ensureDefaultConfig's +// deriveDefaultConfigProvider resolves against), this test cannot fall behind +// the offered set: add a platform model, get a test case for free; the test +// only passes if the generator actually stamps it. +// +// SCOPE: deterministic, no live infra. The REAL-boot complement (provision a +// staging workspace and assert status=online + a completion returns 200 for the +// SAME combo) is the bash staging harness — see +// tests/e2e/test_staging_full_saas.sh (E2E_LLM_PATH=platform) and the +// e2e-staging-platform-boot job in .gitea/workflows/e2e-staging-saas.yml. That +// asserts the REAL artifact (booted status / completion); THIS asserts the +// deterministic config-generation invariant the real boot depends on. + +import ( + "testing" + + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models" + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers" + "gopkg.in/yaml.v3" +) + +// platformModelsForRuntime returns the exact model ids the providers SSOT lists +// under runtime rt's `platform` native provider arm — the set the canvas offers +// as provider=Platform and the set ensureDefaultConfig MUST stamp +// `provider: platform` for. Reads the SAME embedded manifest the config +// generator derives against (providers.LoadManifest), so it can never drift from +// the offered set. Returns nil when the runtime has no platform arm. +func platformModelsForRuntime(t *testing.T, rt string) []string { + t.Helper() + m, err := providers.LoadManifest() + if err != nil { + t.Fatalf("LoadManifest: %v", err) + } + native, ok := m.Runtimes[rt] + if !ok { + t.Fatalf("providers SSOT has no runtimes entry for %q", rt) + } + for _, ref := range native.Providers { + if ref.Name == "platform" { + return ref.Models + } + } + return nil +} + +// TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel is the +// class-level regression for the moonshot/kimi NOT_CONFIGURED incident. For +// EVERY model the providers SSOT offers under claude-code's platform arm, it +// asserts the generated config.yaml carries the manifest-derived provider at +// both the top level and under runtime_config. This is the Fix A invariant, +// parametrized over the SSOT so a newly-offered platform model cannot ship +// without the stamp (the exact divergence — offered-but-not-stamped — that +// booted "moonshot/kimi-k2.6" into NOT_CONFIGURED). +func TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel(t *testing.T) { + const runtime = "claude-code" + platformModels := platformModelsForRuntime(t, runtime) + if len(platformModels) == 0 { + t.Fatalf("providers SSOT lists no platform models for runtime %q — the regression matrix would be empty; the SSOT shape changed (this test is the canary)", runtime) + } + // Headline sentinel: the exact id that booted NOT_CONFIGURED in prod MUST be + // in the enumerated set. If a refactor drops it from the platform arm, this + // test must still cover it explicitly — fail loud rather than silently + // shrinking the matrix. + if !containsString(platformModels, "moonshot/kimi-k2.6") { + t.Fatalf("the headline incident model \"moonshot/kimi-k2.6\" is no longer in the claude-code platform SSOT set (%v) — regression coverage for the original bug would be lost", platformModels) + } + + for _, model := range platformModels { + model := model + t.Run(model, func(t *testing.T) { + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + files := handler.ensureDefaultConfig("ws-platform-boot", models.CreateWorkspacePayload{ + Name: "Platform Boot Agent", + Tier: 2, + Runtime: runtime, + Model: model, + }) + + raw, ok := files["config.yaml"] + if !ok { + t.Fatalf("expected config.yaml in generated files for model %q", model) + } + + var parsed struct { + Model string `yaml:"model"` + Provider string `yaml:"provider"` + RuntimeConfig struct { + Model string `yaml:"model"` + Provider string `yaml:"provider"` + } `yaml:"runtime_config"` + } + if err := yaml.Unmarshal(raw, &parsed); err != nil { + t.Fatalf("generated YAML invalid for model %q: %v\n%s", model, err, raw) + } + + // The load-bearing invariant: BOTH the top-level and the + // runtime_config provider must be exactly "platform". An empty or + // vendor-namespace ("moonshot") value here is the prod NOT_CONFIGURED + // boot — the adapter would slash-split the model id and look up an + // unregistered provider. + if parsed.Provider != "platform" { + t.Errorf("model %q: top-level provider = %q, want \"platform\" (Fix A invariant — empty/vendor value is the NOT_CONFIGURED boot)\n%s", model, parsed.Provider, raw) + } + if parsed.RuntimeConfig.Provider != "platform" { + t.Errorf("model %q: runtime_config.provider = %q, want \"platform\"\n%s", model, parsed.RuntimeConfig.Provider, raw) + } + // Sanity: the config must still render a non-empty model (a config + // with provider but no model is equally undeployable). + if parsed.Model == "" { + t.Errorf("model %q: generated config has empty top-level model\n%s", model, raw) + } + }) + } +} + +// TestPlatformModelDeriveProvider_SSOTConsistency is the upstream half of the +// same invariant, one layer below ensureDefaultConfig: it asserts the providers +// manifest's DeriveProvider — the resolver deriveDefaultConfigProvider calls — +// maps every SSOT-offered claude-code platform model to a provider whose Name is +// "platform". If DeriveProvider itself regressed (e.g. a model_prefix_match +// change made "moonshot/kimi-k2.6" resolve to the bare "moonshot" entry again), +// this fails closer to the root cause than the config-shape test above, making +// the diagnosis unambiguous: SSOT/derive regression vs config-emission +// regression. +func TestPlatformModelDeriveProvider_SSOTConsistency(t *testing.T) { + const runtime = "claude-code" + m, err := providers.LoadManifest() + if err != nil { + t.Fatalf("LoadManifest: %v", err) + } + platformModels := platformModelsForRuntime(t, runtime) + if len(platformModels) == 0 { + t.Fatalf("no platform models for %q in SSOT", runtime) + } + for _, model := range platformModels { + model := model + t.Run(model, func(t *testing.T) { + // nil availableAuthEnv mirrors deriveDefaultConfigProvider's call at + // config-generation time (no per-workspace auth context yet). + p, err := m.DeriveProvider(runtime, model, nil) + if err != nil { + t.Fatalf("DeriveProvider(%q, %q): unexpected error %v — an SSOT-offered platform model MUST derive", runtime, model, err) + } + if p.Name != "platform" { + t.Errorf("DeriveProvider(%q, %q).Name = %q, want \"platform\" (this is the exact slash-split-to-vendor regression that booted NOT_CONFIGURED)", runtime, model, p.Name) + } + }) + } +} + +// containsString is a tiny local membership helper. Kept here (not a shared +// test util) so this regression file is self-contained and can be read top to +// bottom without chasing helpers across the package. +func containsString(xs []string, want string) bool { + for _, x := range xs { + if x == want { + return true + } + } + return false +} -- 2.52.0