diff --git a/tests/e2e/lib/model_slug.sh b/tests/e2e/lib/model_slug.sh index aca6148bc..f685c2516 100755 --- a/tests/e2e/lib/model_slug.sh +++ b/tests/e2e/lib/model_slug.sh @@ -11,10 +11,10 @@ # default + 401, see PR #1714.) # # claude-code → auth-aware: -# E2E_MINIMAX_API_KEY → "minimax:MiniMax-M2.7" -# (colon-namespaced BYOK id; bare -# "MiniMax-M2" 400s on a deploy-skewed -# staging registry — #2263) +# E2E_MINIMAX_API_KEY → "MiniMax-M2.7" +# (BARE registered BYOK id — see the +# claude-code dispatch arm below for +# why bare, not the colon form) # E2E_ANTHROPIC_API_KEY → "claude-sonnet-4-6" # otherwise → "sonnet" # @@ -88,24 +88,37 @@ pick_model_slug() { # (it is absent from manifest.json + runtime_registry.go). Its config.yaml # declares `runtime: claude-code` and copies the claude-code `providers:` # block (providers.yaml:21 "The same block is copy-pasted into the seo-agent - # template"), so its model dispatch is IDENTICAL to claude-code's: the - # MiniMax BYOK colon id (the staging-default key path), else direct + # template"), so its model dispatch is IDENTICAL to claude-code's: the BARE + # registered MiniMax BYOK id (the staging-default key path), else direct # Anthropic, else the OAuth `sonnet` alias. Sharing the claude-code branch # keeps the SSOT one place — a seo-agent run is just a claude-code run - # behind a productized template skin. + # behind a productized template skin, and (because the runtime resolves to + # claude-code server-side) its model must be a *claude-code-registered* form. claude-code|seo-agent) if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then - # Namespaced (colon) BYOK id, not bare "MiniMax-M2" (#2263 deploy-skew): - # bare ids can lag the deployed staging ws-server's compiled registry, - # so workspace-create's validateRegisteredModelForRuntime 400s the bare - # form on an older image. The colon-namespaced `minimax:MiniMax-M2.7` - # resolves the same way the proven-working sibling `moonshot/kimi-k2.6` - # does. It stays in the BYOK `minimax` arm (providers.yaml:851), so - # DeriveProvider -> provider_selection=minimax (BYOK) and the #1994 - # byok-not-platform guard (test_staging_full_saas.sh:1000) still passes — - # unlike the slash/platform form `minimax/MiniMax-M2.7`, which resolves - # to provider=platform and would trip that guard. - printf 'minimax:MiniMax-M2.7' + # BARE registered BYOK id `MiniMax-M2.7`, NOT the colon form + # `minimax:MiniMax-M2.7`. On the claude-code runtime the three MiniMax + # spellings have three DISTINCT, intentional outcomes (provider-registry + # SSOT, internal#718; pinned by workspace-server/internal/providers/ + # derive_provider_matrix_test.go, the #2263/#2274 "colon-vs-slash-vs-bare + # triple"): + # * bare "MiniMax-M2.7" -> provider=minimax (BYOK, MINIMAX_API_KEY) + # * slash "minimax/MiniMax-M2.7" -> provider=platform (CP proxy bills) + # * colon "minimax:MiniMax-M2.7" -> UNREGISTERED 422 (the claude-code + # adapter CANNOT strip the `minimax:` prefix, so the id is not a + # registered model for runtime claude-code; create-validation, + # internal#718, rejects it) + # The bare form is registered in the claude-code `minimax` arm + # (registry_gen.go:88 Models=[MiniMax-M2,MiniMax-M2.7, + # MiniMax-M2.7-highspeed,MiniMax-M3]) and derives provider=minimax (BYOK + # via MINIMAX_API_KEY), so it satisfies the #1994 byok-not-platform guard + # (test_staging_full_saas.sh) AND passes create-validation — unlike the + # colon form, which 422'd "5/11 Provisioning parent workspace" with + # UNREGISTERED_MODEL_FOR_RUNTIME on real staging (job 295075). + # NOTE: the colon form IS the correct BYOK-minimax id on openclaw/hermes + # (those adapters DO strip `minimax:` — matrix test), but this dispatch + # arm only emits for claude-code/seo-agent, where bare is the right form. + printf 'MiniMax-M2.7' elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then printf 'claude-sonnet-4-6' else diff --git a/tests/e2e/test_model_slug.sh b/tests/e2e/test_model_slug.sh index 02f81e05d..e04834003 100755 --- a/tests/e2e/test_model_slug.sh +++ b/tests/e2e/test_model_slug.sh @@ -48,14 +48,19 @@ run_test "hermes → slash-form (derive-provider.sh contract)" hermes run_test "codex → slash-form fallback" codex "openai/gpt-4o" run_test "claude-code → OAuth/default alias" claude-code "sonnet" +# BARE registered BYOK id (registry_gen.go:88), NOT colon `minimax:…`. On +# claude-code the colon form is intentionally UNREGISTERED (the adapter can't +# strip `minimax:`) and 422s create-validation (internal#718, job 295075); +# bare resolves to provider=minimax BYOK. Pinned by the matrix test's +# colon-vs-slash-vs-bare triple in derive_provider_matrix_test.go. got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug claude-code) -assert_eq "claude-code + MiniMax key → MiniMax model" "$got" "minimax:MiniMax-M2.7" +assert_eq "claude-code + MiniMax key → bare registered MiniMax model" "$got" "MiniMax-M2.7" got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug claude-code) assert_eq "claude-code + Anthropic API key → Anthropic API model" "$got" "claude-sonnet-4-6" got=$(unset E2E_MODEL_SLUG; E2E_MINIMAX_API_KEY="mx-priority" E2E_ANTHROPIC_API_KEY="sk-ant-loser" pick_model_slug claude-code) -assert_eq "claude-code + both keys → MiniMax priority" "$got" "minimax:MiniMax-M2.7" +assert_eq "claude-code + both keys → MiniMax priority (bare)" "$got" "MiniMax-M2.7" # ── seo-agent (claude-code-adapter template variant) ── # seo-agent shares the claude-code dispatch branch (it reuses the claude-code @@ -65,7 +70,7 @@ assert_eq "claude-code + both keys → MiniMax priority" "$got" "mini run_test "seo-agent → claude-code default alias" seo-agent "sonnet" got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug seo-agent) -assert_eq "seo-agent + MiniMax key → MiniMax model (==claude-code)" "$got" "minimax:MiniMax-M2.7" +assert_eq "seo-agent + MiniMax key → bare MiniMax model (==claude-code)" "$got" "MiniMax-M2.7" got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug seo-agent) assert_eq "seo-agent + Anthropic key → Anthropic model (==claude-code)" "$got" "claude-sonnet-4-6" diff --git a/tests/e2e/test_priority_runtimes_e2e.sh b/tests/e2e/test_priority_runtimes_e2e.sh index 61b852a11..68fe537d5 100755 --- a/tests/e2e/test_priority_runtimes_e2e.sh +++ b/tests/e2e/test_priority_runtimes_e2e.sh @@ -641,16 +641,19 @@ for r in (d.get("workspaces") or d.get("results") or []): #################################################################### # NOTE: this is now a BEST-EFFORT arm, not the REQUIRE-LIVE backbone. # mock (run_mock above) is the guaranteed, no-key validation that keeps -# the gate honest. MiniMax-create is fragile in CI: the namespaced model -# id minimax:MiniMax-M2.7 is NOT in claude-code's native model set and -# does NOT resolve via DeriveProvider (its only prefix-owner, byok-minimax, -# is not wired as a claude-code runtime arm), so the create is rejected -# 422 UNREGISTERED_MODEL_FOR_RUNTIME before any provisioning (RCA core -# registry_gen.go Runtimes["claude-code"]). Rather than red the REQUIRED -# gate on that registry-skew (or on any transient MiniMax provisioning / -# model-registration issue), this arm reports a best-effort MISS via -# bestfail() and lets mock carry the validation. If MiniMax DOES come up -# it validates as a bonus real-LLM check. +# the gate honest. This arm uses the BARE registered BYOK id `MiniMax-M2.7` +# (NOT the colon `minimax:MiniMax-M2.7`): on claude-code the colon form is +# INTENTIONALLY unregistered — the claude-code adapter cannot strip the +# `minimax:` prefix, so DeriveProvider rejects it 422 +# UNREGISTERED_MODEL_FOR_RUNTIME before any provisioning (provider-registry +# SSOT, internal#718; pinned by derive_provider_matrix_test.go's +# colon-vs-slash-vs-bare triple, and observed on real staging job 295075). +# The bare id is in claude-code's `minimax` arm (registry_gen.go:88 +# Models=[MiniMax-M2,MiniMax-M2.7,MiniMax-M2.7-highspeed,MiniMax-M3]) and +# derives provider=minimax (BYOK via MINIMAX_API_KEY), so create-validation +# accepts it. This arm stays BEST-EFFORT (bestfail, non-gating) for transient +# MiniMax provisioning / backend issues — mock carries the REQUIRED gate; if +# MiniMax DOES come up it validates as a bonus real-LLM check. # Drives the claude-code runtime against MiniMax (BYOK) using the # already-present Gitea secret MOLECULE_STAGING_MINIMAX_API_KEY, # surfaced into the env as E2E_MINIMAX_API_KEY (same name + secret the @@ -663,12 +666,12 @@ for r in (d.get("workspaces") or d.get("results") or []): # and routes ANTHROPIC_BASE_URL → api.minimax.io/anthropic. So the # ONLY tenant secret needed is {"MINIMAX_API_KEY": } — exactly # the SECRETS_JSON branch test_staging_full_saas.sh uses. -# - Model id is the NAMESPACED colon-form `minimax:MiniMax-M2.7`, the -# registered BYOK arm for claude-code (registry_gen.go Runtimes -# ["claude-code"]["minimax"]). Per core#2263 the BARE `MiniMax-M2` -# id can 400 on a registry-skewed ws-server build; the namespaced -# form resolves the way kimi's `moonshot/…` does, so it's the -# robust choice for the gate. +# - Model id is the BARE `MiniMax-M2.7`, the registered BYOK arm for +# claude-code (registry_gen.go:88 Runtimes["claude-code"]["minimax"] +# Models). DeriveProvider routes bare → provider=minimax (BYOK). The +# colon-namespaced `minimax:MiniMax-M2.7` is UNREGISTERED on claude-code +# (the adapter can't strip `minimax:`; internal#718) and 422s create — +# it is only the correct BYOK id on openclaw/hermes, which DO strip it. run_minimax() { echo "" echo "=== minimax (claude-code BYOK) happy path ===" @@ -685,16 +688,18 @@ import json, os print(json.dumps({'MINIMAX_API_KEY': os.environ['E2E_MINIMAX_API_KEY']})) ") local resp wsid - # Namespaced BYOK model id (core#2263): bare MiniMax-M2 can 400 on a - # registry-skewed ws-server build; minimax:MiniMax-M2.7 is the - # registered claude-code BYOK arm and resolves like kimi's moonshot/… + # BARE registered BYOK model id `MiniMax-M2.7` (registry_gen.go:88). The + # colon form `minimax:MiniMax-M2.7` is UNREGISTERED on claude-code (adapter + # can't strip `minimax:`; internal#718) and 422s create — bare derives + # provider=minimax (BYOK via MINIMAX_API_KEY) and passes create-validation. resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \ - -d "{\"name\":\"Priority E2E (minimax)\",\"runtime\":\"claude-code\",\"model\":\"minimax:MiniMax-M2.7\",\"tier\":1,\"secrets\":$secrets}") + -d "{\"name\":\"Priority E2E (minimax)\",\"runtime\":\"claude-code\",\"model\":\"MiniMax-M2.7\",\"tier\":1,\"secrets\":$secrets}") wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true if [ -z "$wsid" ]; then - # BEST-EFFORT: MiniMax-create is fragile (see header — the namespaced - # model id is registry-skewed → 422). Do NOT red the gate; mock is the - # required backbone. Report the create response so the skew is visible. + # BEST-EFFORT: real MiniMax create/provision can still miss on transient + # backend / provisioning issues (the bare model id itself is registered — + # see header). Do NOT red the gate; mock is the required backbone. Report + # the create response so any miss is visible. bestfail "create minimax workspace (best-effort; mock carries the gate)" "$resp" return 0 fi diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index 862b468a2..a39e99096 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -1095,7 +1095,7 @@ fi # identical on main's scheduled synthetic E2E and on PRs (so it is an # environmental backend regression, never PR-introduced). if echo "$AGENT_TEXT" | grep -qiF "message contained no text content"; then - fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (the claude-code default is minimax:MiniMax-M2.7 since #2263; was bare MiniMax-M2 #2710) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT" + fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (the claude-code MiniMax-BYOK default is the BARE registered id MiniMax-M2.7 — the colon minimax:MiniMax-M2.7 is UNREGISTERED on claude-code, internal#718) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT" fi # Generic catch-all — falls through if none of the known regressions hit. if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then