From d0ab3d7c4b88b1499a656341cf74a59b26c76aa5 Mon Sep 17 00:00:00 2001
From: core-devops <core-devops@moleculesai.app>
Date: Thu, 4 Jun 2026 18:49:56 -0700
Subject: [PATCH] fix(e2e): staging SaaS canary uses namespaced
 minimax:MiniMax-M2.7 (#2263)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The staging SaaS E2E provisioned its claude-code canary with the BARE id
`MiniMax-M2`. The deployed staging tenant ws-server's compiled model
registry lags source, so validateRegisteredModelForRuntime returns HTTP
400 on the bare id at workspace-create. The sibling Platform Boot job, on
the SAME image, succeeds with the NAMESPACED `moonshot/kimi-k2.6` — only
the id form differs (deploy-skew, internal#718; NOT flaky).

Harness-side fix: switch the claude-code MiniMax default from bare
`MiniMax-M2` to the COLON-namespaced `minimax:MiniMax-M2.7`. Crucially
this is the colon (BYOK) form, NOT the slash/platform form
`minimax/MiniMax-M2.7` the issue floated: the canary injects
E2E_MINIMAX_API_KEY (BYOK), so the #1994 byok-not-platform guard asserts
provider_selection=minimax. The colon form stays in the BYOK `minimax`
arm (providers.yaml:851 → provider=minimax, passes the guard); the slash
form resolves to provider=platform and would trip it. Mirrors how the
proven-working kimi BYOK colon-form is registered.

Changed both the operator-override default in e2e-staging-saas.yml (which
sets E2E_MODEL_SLUG and wins over pick_model_slug) and the pick_model_slug
fallback in lib/model_slug.sh, plus the pinned unit-test expectations.

Also: widen the known-answer A2A POST retry grep to include the
Cloudflare-shaped literal `error code: 502/504` token, matching the
cold-start PONG probe and delegation loops. A single un-retried edge 502
right after a healthy round-trip (Platform Boot, task 268859) fell through
to break and failed the gate on the first attempt. Bounded by the existing
6-attempt/sleep-10 loop — no new sleep-as-fix.

NOTE: harness-side only. The durable fix is promoting the staging tenant
ws-server runtime image to a build whose compiled registry includes the
bare id.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .gitea/workflows/e2e-staging-saas.yml | 11 ++++++++++-
 tests/e2e/lib/model_slug.sh           | 17 +++++++++++++++--
 tests/e2e/test_model_slug.sh          |  4 ++--
 tests/e2e/test_staging_full_saas.sh   | 11 +++++++++--
 4 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/.gitea/workflows/e2e-staging-saas.yml b/.gitea/workflows/e2e-staging-saas.yml
index 82b3c46d8..584349142 100644
--- a/.gitea/workflows/e2e-staging-saas.yml
+++ b/.gitea/workflows/e2e-staging-saas.yml
@@ -172,7 +172,16 @@ jobs:
       # and defeats the cost saving. Operators can override via the
       # workflow_dispatch flow (no input wired here yet — runtime
       # override is enough for ad-hoc).
-      E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2' }}
+      #
+      # #2263 deploy-skew: the claude-code default is the COLON-namespaced BYOK
+      # id `minimax:MiniMax-M2.7`, NOT bare `MiniMax-M2`. The deployed staging
+      # ws-server's compiled registry can lag source; validateRegisteredModelForRuntime
+      # 400s the bare form on an older image (the sibling Platform Boot job, on
+      # the SAME image, succeeds with namespaced `moonshot/kimi-k2.6`). The colon
+      # form stays in the BYOK `minimax` arm (providers.yaml:851) so it resolves
+      # provider=minimax (BYOK) and the #1994 byok-not-platform guard still
+      # passes — the slash/platform form `minimax/MiniMax-M2.7` would not.
+      E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'minimax:MiniMax-M2.7' }}
       E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
       E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
 
diff --git a/tests/e2e/lib/model_slug.sh b/tests/e2e/lib/model_slug.sh
index 93207c96f..efb5fd71f 100755
--- a/tests/e2e/lib/model_slug.sh
+++ b/tests/e2e/lib/model_slug.sh
@@ -11,7 +11,10 @@
 #                                    default + 401, see PR #1714.)
 #
 #   claude-code → auth-aware:
-#                  E2E_MINIMAX_API_KEY    → "MiniMax-M2"
+#                  E2E_MINIMAX_API_KEY    → "minimax:MiniMax-M2.7"
+#                                           (colon-namespaced BYOK id; bare
+#                                            "MiniMax-M2" 400s on a deploy-skewed
+#                                            staging registry — #2263)
 #                  E2E_ANTHROPIC_API_KEY  → "claude-sonnet-4-6"
 #                  otherwise              → "sonnet"
 #
@@ -82,7 +85,17 @@ pick_model_slug() {
     hermes)      printf 'openai/gpt-4o' ;;
     claude-code)
       if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
-        printf 'MiniMax-M2'
+        # Namespaced (colon) BYOK id, not bare "MiniMax-M2" (#2263 deploy-skew):
+        # bare ids can lag the deployed staging ws-server's compiled registry,
+        # so workspace-create's validateRegisteredModelForRuntime 400s the bare
+        # form on an older image. The colon-namespaced `minimax:MiniMax-M2.7`
+        # resolves the same way the proven-working sibling `moonshot/kimi-k2.6`
+        # does. It stays in the BYOK `minimax` arm (providers.yaml:851), so
+        # DeriveProvider -> provider_selection=minimax (BYOK) and the #1994
+        # byok-not-platform guard (test_staging_full_saas.sh:1000) still passes —
+        # unlike the slash/platform form `minimax/MiniMax-M2.7`, which resolves
+        # to provider=platform and would trip that guard.
+        printf 'minimax:MiniMax-M2.7'
       elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
         printf 'claude-sonnet-4-6'
       else
diff --git a/tests/e2e/test_model_slug.sh b/tests/e2e/test_model_slug.sh
index e3282c41b..32b805fb0 100755
--- a/tests/e2e/test_model_slug.sh
+++ b/tests/e2e/test_model_slug.sh
@@ -49,13 +49,13 @@ run_test "codex → slash-form fallback"                             codex
 run_test "claude-code → OAuth/default alias"                      claude-code "sonnet"
 
 got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug claude-code)
-assert_eq "claude-code + MiniMax key → MiniMax model"             "$got" "MiniMax-M2"
+assert_eq "claude-code + MiniMax key → MiniMax model"             "$got" "minimax:MiniMax-M2.7"
 
 got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug claude-code)
 assert_eq "claude-code + Anthropic API key → Anthropic API model" "$got" "claude-sonnet-4-6"
 
 got=$(unset E2E_MODEL_SLUG; E2E_MINIMAX_API_KEY="mx-priority" E2E_ANTHROPIC_API_KEY="sk-ant-loser" pick_model_slug claude-code)
-assert_eq "claude-code + both keys → MiniMax priority"            "$got" "MiniMax-M2"
+assert_eq "claude-code + both keys → MiniMax priority"            "$got" "minimax:MiniMax-M2.7"
 
 # ── Fallback for unknown runtime ──
 # Picks slash-form (hermes-shaped) since hermes is the historical
diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh
index 3761ace5f..40b6e5030 100755
--- a/tests/e2e/test_staging_full_saas.sh
+++ b/tests/e2e/test_staging_full_saas.sh
@@ -886,7 +886,7 @@ fi
 # identical on main's scheduled synthetic E2E and on PRs (so it is an
 # environmental backend regression, never PR-introduced).
 if echo "$AGENT_TEXT" | grep -qiF "message contained no text content"; then
-  fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (MiniMax-M2 since #2710) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT"
+  fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (the claude-code default is minimax:MiniMax-M2.7 since #2263; was bare MiniMax-M2 #2710) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT"
 fi
 # Generic catch-all — falls through if none of the known regressions hit.
 if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then
@@ -952,7 +952,14 @@ for KA_ATTEMPT in $(seq 1 6); do
   KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
   # Retry ONLY on transient transport errors — never on an agent-level
   # error (those must surface and fail the gate).
-  if echo "$KA_CODE" | grep -Eq '^(502|503|504)$' && echo "$KA_SAFE_BODY" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|workspace agent unreachable|connection refused|no healthy upstream|workspace agent busy|native_session'; then
+  # #2263: include the Cloudflare-shaped literal `error code: 502/504` token so a
+  # bare edge/gateway 502 (no "Bad Gateway" body) is retried here the same way the
+  # cold-start PONG probe (line ~800) and the delegation loop (line ~1234) already
+  # do. Without it, a single un-retried edge 502 right after a healthy round-trip
+  # fell through to break and failed the gate on the first attempt (Platform Boot
+  # job, task 268859). Bounded by the existing 6-attempt / sleep-10 loop — no new
+  # sleep-as-fix; this only widens the transient-match to the sibling pattern.
+  if echo "$KA_CODE" | grep -Eq '^(502|503|504)$' && echo "$KA_SAFE_BODY" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|error code: 502|error code: 504|workspace agent unreachable|connection refused|no healthy upstream|workspace agent busy|native_session'; then
     log "    known-answer A2A transient $KA_CODE attempt $KA_ATTEMPT/6: $KA_SAFE_BODY"
     if [ "$KA_ATTEMPT" -lt 6 ]; then sleep 10; continue; fi
   fi
-- 
2.52.0