test(provision): SSOT-parametrized + real-boot regression for moonshot/kimi NOT_CONFIGURED #2197
@@ -48,8 +48,10 @@ on:
|
||||
- 'workspace-server/internal/handlers/a2a_proxy.go'
|
||||
- 'workspace-server/internal/middleware/**'
|
||||
- 'workspace-server/internal/provisioner/**'
|
||||
- 'workspace-server/internal/providers/providers.yaml'
|
||||
- 'tests/e2e/test_staging_full_saas.sh'
|
||||
- 'tests/e2e/lib/completion_assert.sh'
|
||||
- 'tests/e2e/lib/model_slug.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/test_aws_leak_check.sh'
|
||||
- '.gitea/workflows/e2e-staging-saas.yml'
|
||||
@@ -61,8 +63,10 @@ on:
|
||||
- 'workspace-server/internal/handlers/a2a_proxy.go'
|
||||
- 'workspace-server/internal/middleware/**'
|
||||
- 'workspace-server/internal/provisioner/**'
|
||||
- 'workspace-server/internal/providers/providers.yaml'
|
||||
- 'tests/e2e/test_staging_full_saas.sh'
|
||||
- 'tests/e2e/lib/completion_assert.sh'
|
||||
- 'tests/e2e/lib/model_slug.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/test_aws_leak_check.sh'
|
||||
- '.gitea/workflows/e2e-staging-saas.yml'
|
||||
@@ -315,3 +319,148 @@ jobs:
|
||||
echo "::warning::saas teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
# ── PLATFORM-MANAGED BOOT REGRESSION (moonshot/kimi NOT_CONFIGURED) ──────────
|
||||
#
|
||||
# The REAL-boot complement to the deterministic unit suite
|
||||
# (workspace_provision_platform_boot_test.go). Provisions a REAL staging
|
||||
# claude-code workspace on the PLATFORM-managed path — provider=platform,
|
||||
# model=moonshot/kimi-k2.6, NO tenant LLM key — and asserts it reaches
|
||||
# status=online (NOT not_configured) and a completion returns 200, via the same
|
||||
# online-wait + completion-assert the BYOK job uses.
|
||||
#
|
||||
# Why a SEPARATE job (not a matrix leg of e2e-staging-saas): the platform path
|
||||
# injects NO secret and pins a different model, so its env block diverges from
|
||||
# the BYOK job's. A dedicated job keeps each path's "verify key present" preflight
|
||||
# honest (BYOK requires a key; platform requires its ABSENCE not to matter) and
|
||||
# gives the regression its own named commit-status for branch protection.
|
||||
#
|
||||
# Add `E2E Staging Platform Boot` to branch protection after 3 consecutive
|
||||
# green runs on main (de-flake window; this path shares the cp#245
|
||||
# boot-timeout flake surface the BYOK job has, so it must prove stable before
|
||||
# it can BLOCK — see the gate-making plan in the PR body).
|
||||
# bp-required: pending #2187
|
||||
e2e-staging-platform-boot:
|
||||
name: E2E Staging Platform Boot
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface without blocking until the de-flake window
|
||||
# closes. mc#1982: do NOT renew this mask silently — the gate-making plan
|
||||
# tracks the flip to false under #2187.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 45
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
MOLECULE_CP_URL: https://staging-api.moleculesai.app
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
E2E_AWS_LEAK_CHECK: required
|
||||
E2E_AWS_TERMINATE_LEAKS: '1'
|
||||
# The regression combo: claude-code + platform-managed + moonshot/kimi-k2.6.
|
||||
# NO E2E_*_API_KEY is set — platform-managed billing is owned by Molecule via
|
||||
# the CP LLM proxy. The harness's E2E_LLM_PATH=platform branch sends empty
|
||||
# secrets and pin-selects the platform model.
|
||||
E2E_RUNTIME: claude-code
|
||||
E2E_LLM_PATH: platform
|
||||
# Smoke mode: a single parent workspace is enough to prove online +
|
||||
# completion for the platform path (the A2A/delegation matrix is the BYOK
|
||||
# job's job). Override E2E_DEFAULT_PLATFORM_MODEL via workflow_dispatch to
|
||||
# exercise another platform model id.
|
||||
E2E_MODE: smoke
|
||||
E2E_RUN_ID: "platform-${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Verify admin token present
|
||||
run: |
|
||||
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
|
||||
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
|
||||
exit 2
|
||||
fi
|
||||
for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
|
||||
if [ -z "${!var:-}" ]; then
|
||||
echo "::error::$var not set — EC2 leak verification cannot run"
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
echo "Admin token present ✓"
|
||||
|
||||
- name: Assert NO BYOK key leaks into the platform run
|
||||
run: |
|
||||
# The whole point of this job is the platform-managed path. A stray
|
||||
# E2E_*_API_KEY in the runner env would (via the harness) still be
|
||||
# skipped by the E2E_LLM_PATH=platform branch — but assert their
|
||||
# absence loudly here so a future env edit can't silently convert this
|
||||
# into a masked BYOK run that no longer exercises the regression.
|
||||
for var in E2E_MINIMAX_API_KEY E2E_ANTHROPIC_API_KEY E2E_OPENAI_API_KEY; do
|
||||
if [ -n "${!var:-}" ]; then
|
||||
echo "::warning::$var is set in this platform-boot job's env — the harness ignores it on E2E_LLM_PATH=platform, but it should not be wired here."
|
||||
fi
|
||||
done
|
||||
echo "Platform-managed path: no tenant LLM key required ✓"
|
||||
|
||||
- name: CP staging health preflight
|
||||
run: |
|
||||
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
|
||||
if [ "$code" != "200" ]; then
|
||||
echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a workspace bug."
|
||||
exit 1
|
||||
fi
|
||||
echo "Staging CP healthy ✓"
|
||||
|
||||
- name: Run platform-managed boot E2E (online + completion)
|
||||
id: e2e
|
||||
run: bash tests/e2e/test_staging_full_saas.sh
|
||||
|
||||
- name: Teardown safety net (runs on cancel/failure)
|
||||
if: always()
|
||||
env:
|
||||
ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
run: |
|
||||
set +e
|
||||
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
|
||||
| python3 -c "
|
||||
import json, sys, os, datetime
|
||||
run_id = os.environ.get('GITHUB_RUN_ID', '')
|
||||
d = json.load(sys.stdin)
|
||||
today = datetime.date.today()
|
||||
yesterday = today - datetime.timedelta(days=1)
|
||||
dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
|
||||
# smoke mode slugs are e2e-smoke-YYYYMMDD-platform-<run_id>-...
|
||||
if run_id:
|
||||
prefixes = tuple(f'e2e-smoke-{d}-platform-{run_id}-' for d in dates)
|
||||
else:
|
||||
prefixes = tuple(f'e2e-smoke-{d}-platform-' for d in dates)
|
||||
candidates = [o['slug'] for o in d.get('orgs', [])
|
||||
if any(o.get('slug','').startswith(p) for p in prefixes)
|
||||
and o.get('instance_status') not in ('purged',)]
|
||||
print('\n'.join(candidates))
|
||||
" 2>/dev/null)
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
echo "Safety-net teardown: $slug"
|
||||
set +e
|
||||
curl -sS -o /tmp/plat-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/plat-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/plat-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
echo "::warning::platform-boot teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/plat-cleanup.out 2>/dev/null)"
|
||||
leaks+=("$slug")
|
||||
fi
|
||||
done
|
||||
if [ ${#leaks[@]} -gt 0 ]; then
|
||||
echo "::warning::platform-boot teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
@@ -23,23 +23,61 @@
|
||||
# their provider entries, otherwise the workspace boots
|
||||
# reachable but the first A2A call hits the wrong auth path.
|
||||
#
|
||||
# When E2E_MODEL_SLUG is set, it overrides this dispatch — useful when an
|
||||
# operator dispatches the workflow to test a specific slug.
|
||||
# PLATFORM-MANAGED path (E2E_LLM_PATH=platform) — the moonshot/kimi
|
||||
# NOT_CONFIGURED regression (RFC#340 Fix A #2187):
|
||||
#
|
||||
# The branches above all exercise BYOK: a tenant key (MINIMAX/ANTHROPIC/
|
||||
# OPENAI) is injected as a workspace secret and the model id resolves to that
|
||||
# vendor's *BYOK* provider entry. That path NEVER exercises the platform arm —
|
||||
# the exact arm that booted "moonshot/kimi-k2.6" into NOT_CONFIGURED in prod,
|
||||
# because the generated config.yaml lacked the derived `provider: platform`.
|
||||
#
|
||||
# E2E_LLM_PATH=platform selects a platform-managed model id (slash-namespaced,
|
||||
# no tenant key — Molecule owns billing via the CP LLM proxy). The default is
|
||||
# "moonshot/kimi-k2.6", the headline incident combo. Override the specific
|
||||
# platform model with E2E_MODEL_SLUG. The provision branch in
|
||||
# test_staging_full_saas.sh sends NO secrets for this path (platform-managed
|
||||
# needs none), so the workspace must boot online purely on the proxy env the
|
||||
# control plane injects + the manifest-derived `provider: platform` that Fix A
|
||||
# stamps. That is the REAL boot-path assertion the deterministic unit test
|
||||
# (workspace_provision_platform_boot_test.go) cannot make.
|
||||
#
|
||||
# When E2E_MODEL_SLUG is set, it overrides this dispatch entirely — useful when
|
||||
# an operator dispatches the workflow to test a specific slug (or a specific
|
||||
# platform model id).
|
||||
#
|
||||
# Unit tested by tests/e2e/test_model_slug.sh — every branch must stay
|
||||
# pinned because regressions silently mask as "Could not resolve
|
||||
# authentication method" + the synth-E2E gate goes red without naming
|
||||
# the slug-format mismatch.
|
||||
|
||||
# Default platform-managed model for the platform-boot regression path. The
|
||||
# exact id that booted NOT_CONFIGURED in prod. Must stay a member of the
|
||||
# claude-code `platform` arm in workspace-server/internal/providers/providers.yaml
|
||||
# (the deterministic suite TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel
|
||||
# enforces every member of that arm derives provider=platform). Resolved INSIDE
|
||||
# pick_model_slug via ${E2E_DEFAULT_PLATFORM_MODEL:-...} so callers can override
|
||||
# it (or unset it) without tripping `set -u`.
|
||||
E2E_DEFAULT_PLATFORM_MODEL_FALLBACK="moonshot/kimi-k2.6"
|
||||
|
||||
# Usage: pick_model_slug <runtime>
|
||||
# stdout: the slug string
|
||||
# E2E_MODEL_SLUG (env): if set + non-empty, used as-is (operator override)
|
||||
# E2E_LLM_PATH=platform (env): select the platform-managed model id
|
||||
# (E2E_DEFAULT_PLATFORM_MODEL) instead of a BYOK slug. Takes precedence over
|
||||
# the per-key BYOK branches; E2E_MODEL_SLUG still wins over everything.
|
||||
pick_model_slug() {
|
||||
local runtime="${1:-}"
|
||||
if [ -n "${E2E_MODEL_SLUG:-}" ]; then
|
||||
printf '%s' "$E2E_MODEL_SLUG"
|
||||
return 0
|
||||
fi
|
||||
# Platform-managed path: the slash-namespaced platform model, no tenant key.
|
||||
# Exercises the arm the moonshot/kimi NOT_CONFIGURED bug shipped on.
|
||||
if [ "${E2E_LLM_PATH:-}" = "platform" ]; then
|
||||
printf '%s' "${E2E_DEFAULT_PLATFORM_MODEL:-$E2E_DEFAULT_PLATFORM_MODEL_FALLBACK}"
|
||||
return 0
|
||||
fi
|
||||
case "$runtime" in
|
||||
hermes) printf 'openai/gpt-4o' ;;
|
||||
claude-code)
|
||||
|
||||
@@ -65,6 +65,28 @@ assert_eq "claude-code + both keys → MiniMax priority" "$got" "Mini
|
||||
run_test "unknown runtime → slash-form fallback" gemini "openai/gpt-4o"
|
||||
run_test "empty runtime → slash-form fallback" "" "openai/gpt-4o"
|
||||
|
||||
# ── Platform-managed path (E2E_LLM_PATH=platform) ──
|
||||
# The moonshot/kimi NOT_CONFIGURED regression path (RFC#340 Fix A #2187).
|
||||
# Selects the slash-namespaced platform model (default moonshot/kimi-k2.6),
|
||||
# takes precedence over the per-key BYOK branches, and is itself overridden by
|
||||
# E2E_MODEL_SLUG. These pins guard the harness's ability to drive the platform
|
||||
# arm — the one the prod bug shipped on.
|
||||
echo
|
||||
echo "Test: pick_model_slug — platform-managed path (E2E_LLM_PATH=platform)"
|
||||
echo
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG E2E_DEFAULT_PLATFORM_MODEL; E2E_LLM_PATH=platform pick_model_slug claude-code)
|
||||
assert_eq "claude-code + platform path → headline kimi model" "$got" "moonshot/kimi-k2.6"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG E2E_DEFAULT_PLATFORM_MODEL; E2E_LLM_PATH=platform E2E_MINIMAX_API_KEY="mx-stray" pick_model_slug claude-code)
|
||||
assert_eq "platform path beats a stray BYOK key (no mask)" "$got" "moonshot/kimi-k2.6"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG; E2E_LLM_PATH=platform E2E_DEFAULT_PLATFORM_MODEL="minimax/MiniMax-M3" pick_model_slug claude-code)
|
||||
assert_eq "platform path honours E2E_DEFAULT_PLATFORM_MODEL" "$got" "minimax/MiniMax-M3"
|
||||
|
||||
got=$(unset E2E_DEFAULT_PLATFORM_MODEL; E2E_MODEL_SLUG="anthropic/claude-opus-4-7" E2E_LLM_PATH=platform pick_model_slug claude-code)
|
||||
assert_eq "E2E_MODEL_SLUG still wins over platform path" "$got" "anthropic/claude-opus-4-7"
|
||||
|
||||
# ── Override via E2E_MODEL_SLUG ──
|
||||
# When the operator sets E2E_MODEL_SLUG, the per-runtime dispatch is
|
||||
# bypassed. Used during workflow_dispatch to A/B specific slugs.
|
||||
|
||||
@@ -476,7 +476,19 @@ wait_workspaces_online_routable() {
|
||||
# All empty → '{}' (workspace will fail at first turn with an
|
||||
# expected, actionable auth error rather than masking the test).
|
||||
SECRETS_JSON='{}'
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
# Platform-managed path (E2E_LLM_PATH=platform) — the moonshot/kimi
|
||||
# NOT_CONFIGURED regression (RFC#340 Fix A #2187). Molecule owns billing via the
|
||||
# CP LLM proxy, so the workspace needs NO tenant key: provision with empty
|
||||
# secrets and let the workspace boot purely on (a) the proxy env the control
|
||||
# plane injects + (b) the manifest-derived `provider: platform` Fix A stamps into
|
||||
# the generated config.yaml. This is the path that booted NOT_CONFIGURED in prod
|
||||
# precisely because the BYOK branches below never exercise it. We deliberately
|
||||
# skip the key-injection branches so a stray E2E_*_API_KEY in the runner env
|
||||
# cannot silently convert this into a BYOK run and mask the regression.
|
||||
if [ "${E2E_LLM_PATH:-}" = "platform" ]; then
|
||||
log " LLM path: PLATFORM-MANAGED (no tenant key; proxy + Fix A provider stamp)"
|
||||
SECRETS_JSON='{}'
|
||||
elif [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "
|
||||
import json, os
|
||||
k = os.environ['E2E_MINIMAX_API_KEY']
|
||||
|
||||
@@ -0,0 +1,196 @@
|
||||
package handlers
|
||||
|
||||
// workspace_provision_platform_boot_test.go — the deterministic, SSOT-driven
|
||||
// regression suite for the class of bug behind the moonshot/kimi
|
||||
// "canvas-created claude-code workspace boots NOT_CONFIGURED" production
|
||||
// incident (RFC#340 Fix A #2187, canvas Fix C #2188).
|
||||
//
|
||||
// THE BUG (what shipped to prod):
|
||||
// A claude-code workspace created via the canvas with provider=Platform +
|
||||
// model="moonshot/kimi-k2.6" booted NOT_CONFIGURED. Unit tests passed; the
|
||||
// REAL boot path was broken. ensureDefaultConfig generated a config.yaml that
|
||||
// carried NO derived `provider:` key, so the cp#329 config-bundle the adapter
|
||||
// actually reads left molecule-runtime config.py to slash-split the model id
|
||||
// "moonshot/kimi-k2.6" -> provider="moonshot", which is NOT in the providers
|
||||
// registry -> NOT_CONFIGURED.
|
||||
//
|
||||
// THE FIX A INVARIANT (this file pins it, and pins it for the WHOLE class):
|
||||
// ensureDefaultConfig MUST stamp the manifest-derived provider into the
|
||||
// generated config.yaml — at BOTH the top level and under runtime_config —
|
||||
// for every (runtime, model) the providers SSOT maps to a platform provider.
|
||||
// The single-combo pin (TestEnsureDefaultConfig_StampsDerivedProvider in
|
||||
// workspace_provision_test.go) proves the headline case. THIS file closes the
|
||||
// gap that single pin leaves: it is PARAMETRIZED OVER THE SSOT, so when a NEW
|
||||
// platform model is added to providers.yaml for claude-code (or any runtime
|
||||
// with a platform arm), the new id is automatically covered — a future
|
||||
// platform model that fails to derive `provider: platform` fails THIS test at
|
||||
// build time, before it can ship a NOT_CONFIGURED boot.
|
||||
//
|
||||
// WHY SSOT-DRIVEN AND NOT A HAND-MAINTAINED LIST:
|
||||
// The original bug was a divergence between "what the canvas offers"
|
||||
// (providers.yaml platform arm) and "what the config generator stamps". A
|
||||
// hardcoded test model list would itself drift from the SSOT and re-open the
|
||||
// same divergence gap. By enumerating the platform model set directly from the
|
||||
// loaded providers.Manifest (the SAME manifest ensureDefaultConfig's
|
||||
// deriveDefaultConfigProvider resolves against), this test cannot fall behind
|
||||
// the offered set: add a platform model, get a test case for free; the test
|
||||
// only passes if the generator actually stamps it.
|
||||
//
|
||||
// SCOPE: deterministic, no live infra. The REAL-boot complement (provision a
|
||||
// staging workspace and assert status=online + a completion returns 200 for the
|
||||
// SAME combo) is the bash staging harness — see
|
||||
// tests/e2e/test_staging_full_saas.sh (E2E_LLM_PATH=platform) and the
|
||||
// e2e-staging-platform-boot job in .gitea/workflows/e2e-staging-saas.yml. That
|
||||
// asserts the REAL artifact (booted status / completion); THIS asserts the
|
||||
// deterministic config-generation invariant the real boot depends on.
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// platformModelsForRuntime returns the exact model ids the providers SSOT lists
|
||||
// under runtime rt's `platform` native provider arm — the set the canvas offers
|
||||
// as provider=Platform and the set ensureDefaultConfig MUST stamp
|
||||
// `provider: platform` for. Reads the SAME embedded manifest the config
|
||||
// generator derives against (providers.LoadManifest), so it can never drift from
|
||||
// the offered set. Returns nil when the runtime has no platform arm.
|
||||
func platformModelsForRuntime(t *testing.T, rt string) []string {
|
||||
t.Helper()
|
||||
m, err := providers.LoadManifest()
|
||||
if err != nil {
|
||||
t.Fatalf("LoadManifest: %v", err)
|
||||
}
|
||||
native, ok := m.Runtimes[rt]
|
||||
if !ok {
|
||||
t.Fatalf("providers SSOT has no runtimes entry for %q", rt)
|
||||
}
|
||||
for _, ref := range native.Providers {
|
||||
if ref.Name == "platform" {
|
||||
return ref.Models
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel is the
|
||||
// class-level regression for the moonshot/kimi NOT_CONFIGURED incident. For
|
||||
// EVERY model the providers SSOT offers under claude-code's platform arm, it
|
||||
// asserts the generated config.yaml carries the manifest-derived provider at
|
||||
// both the top level and under runtime_config. This is the Fix A invariant,
|
||||
// parametrized over the SSOT so a newly-offered platform model cannot ship
|
||||
// without the stamp (the exact divergence — offered-but-not-stamped — that
|
||||
// booted "moonshot/kimi-k2.6" into NOT_CONFIGURED).
|
||||
func TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel(t *testing.T) {
|
||||
const runtime = "claude-code"
|
||||
platformModels := platformModelsForRuntime(t, runtime)
|
||||
if len(platformModels) == 0 {
|
||||
t.Fatalf("providers SSOT lists no platform models for runtime %q — the regression matrix would be empty; the SSOT shape changed (this test is the canary)", runtime)
|
||||
}
|
||||
// Headline sentinel: the exact id that booted NOT_CONFIGURED in prod MUST be
|
||||
// in the enumerated set. If a refactor drops it from the platform arm, this
|
||||
// test must still cover it explicitly — fail loud rather than silently
|
||||
// shrinking the matrix.
|
||||
if !containsString(platformModels, "moonshot/kimi-k2.6") {
|
||||
t.Fatalf("the headline incident model \"moonshot/kimi-k2.6\" is no longer in the claude-code platform SSOT set (%v) — regression coverage for the original bug would be lost", platformModels)
|
||||
}
|
||||
|
||||
for _, model := range platformModels {
|
||||
model := model
|
||||
t.Run(model, func(t *testing.T) {
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-platform-boot", models.CreateWorkspacePayload{
|
||||
Name: "Platform Boot Agent",
|
||||
Tier: 2,
|
||||
Runtime: runtime,
|
||||
Model: model,
|
||||
})
|
||||
|
||||
raw, ok := files["config.yaml"]
|
||||
if !ok {
|
||||
t.Fatalf("expected config.yaml in generated files for model %q", model)
|
||||
}
|
||||
|
||||
var parsed struct {
|
||||
Model string `yaml:"model"`
|
||||
Provider string `yaml:"provider"`
|
||||
RuntimeConfig struct {
|
||||
Model string `yaml:"model"`
|
||||
Provider string `yaml:"provider"`
|
||||
} `yaml:"runtime_config"`
|
||||
}
|
||||
if err := yaml.Unmarshal(raw, &parsed); err != nil {
|
||||
t.Fatalf("generated YAML invalid for model %q: %v\n%s", model, err, raw)
|
||||
}
|
||||
|
||||
// The load-bearing invariant: BOTH the top-level and the
|
||||
// runtime_config provider must be exactly "platform". An empty or
|
||||
// vendor-namespace ("moonshot") value here is the prod NOT_CONFIGURED
|
||||
// boot — the adapter would slash-split the model id and look up an
|
||||
// unregistered provider.
|
||||
if parsed.Provider != "platform" {
|
||||
t.Errorf("model %q: top-level provider = %q, want \"platform\" (Fix A invariant — empty/vendor value is the NOT_CONFIGURED boot)\n%s", model, parsed.Provider, raw)
|
||||
}
|
||||
if parsed.RuntimeConfig.Provider != "platform" {
|
||||
t.Errorf("model %q: runtime_config.provider = %q, want \"platform\"\n%s", model, parsed.RuntimeConfig.Provider, raw)
|
||||
}
|
||||
// Sanity: the config must still render a non-empty model (a config
|
||||
// with provider but no model is equally undeployable).
|
||||
if parsed.Model == "" {
|
||||
t.Errorf("model %q: generated config has empty top-level model\n%s", model, raw)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestPlatformModelDeriveProvider_SSOTConsistency is the upstream half of the
|
||||
// same invariant, one layer below ensureDefaultConfig: it asserts the providers
|
||||
// manifest's DeriveProvider — the resolver deriveDefaultConfigProvider calls —
|
||||
// maps every SSOT-offered claude-code platform model to a provider whose Name is
|
||||
// "platform". If DeriveProvider itself regressed (e.g. a model_prefix_match
|
||||
// change made "moonshot/kimi-k2.6" resolve to the bare "moonshot" entry again),
|
||||
// this fails closer to the root cause than the config-shape test above, making
|
||||
// the diagnosis unambiguous: SSOT/derive regression vs config-emission
|
||||
// regression.
|
||||
func TestPlatformModelDeriveProvider_SSOTConsistency(t *testing.T) {
|
||||
const runtime = "claude-code"
|
||||
m, err := providers.LoadManifest()
|
||||
if err != nil {
|
||||
t.Fatalf("LoadManifest: %v", err)
|
||||
}
|
||||
platformModels := platformModelsForRuntime(t, runtime)
|
||||
if len(platformModels) == 0 {
|
||||
t.Fatalf("no platform models for %q in SSOT", runtime)
|
||||
}
|
||||
for _, model := range platformModels {
|
||||
model := model
|
||||
t.Run(model, func(t *testing.T) {
|
||||
// nil availableAuthEnv mirrors deriveDefaultConfigProvider's call at
|
||||
// config-generation time (no per-workspace auth context yet).
|
||||
p, err := m.DeriveProvider(runtime, model, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("DeriveProvider(%q, %q): unexpected error %v — an SSOT-offered platform model MUST derive", runtime, model, err)
|
||||
}
|
||||
if p.Name != "platform" {
|
||||
t.Errorf("DeriveProvider(%q, %q).Name = %q, want \"platform\" (this is the exact slash-split-to-vendor regression that booted NOT_CONFIGURED)", runtime, model, p.Name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// containsString is a tiny local membership helper. Kept here (not a shared
|
||||
// test util) so this regression file is self-contained and can be read top to
|
||||
// bottom without chasing helpers across the package.
|
||||
func containsString(xs []string, want string) bool {
|
||||
for _, x := range xs {
|
||||
if x == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user