7 changed files with 19 additions and 16 deletions
@@ -118,7 +118,7 @@ jobs:
    timeout-minutes: 20
    env:
      # claude-code default: cold-start ~5 min (comparable to langgraph),
-      # but uses MiniMax-M2 via the template's third-party-
+      # but uses MiniMax-M2.7 via the template's third-party-
      # Anthropic-compat path (workspace-configs-templates/claude-code-
      # default/config.yaml:64-69). MiniMax is ~5-10x cheaper than
      # gpt-4.1-mini per token AND avoids the recurring OpenAI quota-
@@ -131,9 +131,9 @@ jobs:
      # on the per-runtime default ("sonnet" → routes to direct
      # Anthropic, defeats the cost saving). Operators can override
      # via workflow_dispatch by setting a different E2E_MODEL_SLUG
-      # input if they need to exercise a specific model. MiniMax-M2 is the
+      # input if they need to exercise a specific model. MiniMax-M2.7 is the
      # stable staging MiniMax path used by the full-SaaS smoke.
-      E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2' }}
+      E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2.7' }}
      # Bound to 10 min so a stuck provision fails the run instead of
      # holding up the next cron firing. 15-min default in the script
      # is for the on-PR full lifecycle where we have more headroom.
@@ -172,7 +172,7 @@ jobs:
      # and defeats the cost saving. Operators can override via the
      # workflow_dispatch flow (no input wired here yet — runtime
      # override is enough for ad-hoc).
-      E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2' }}
+      E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2.7' }}
      E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
      E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}

@@ -112,9 +112,9 @@ jobs:
      E2E_RUNTIME: claude-code
      # Pin the smoke to a specific MiniMax model rather than relying
      # on the per-runtime default (which could resolve to "sonnet" →
-      # direct Anthropic and defeat the cost saving). MiniMax-M2 is the
+      # direct Anthropic and defeat the cost saving). MiniMax-M2.7 is the
      # stable staging MiniMax path used by the full-SaaS smoke.
-      E2E_MODEL_SLUG: MiniMax-M2
+      E2E_MODEL_SLUG: MiniMax-M2.7
      E2E_RUN_ID: "smoke-${{ github.run_id }}"
      # Debug-only: when an operator dispatches with keep_on_failure=true,
      # the smoke script's E2E_KEEP_ORG=1 path skips teardown so the
@@ -11,7 +11,7 @@
 #                                    default + 401, see PR #1714.)
 #
 #   claude-code → auth-aware:
-#                  E2E_MINIMAX_API_KEY    → "MiniMax-M2"
+#                  E2E_MINIMAX_API_KEY    → "MiniMax-M2.7"
 #                  E2E_ANTHROPIC_API_KEY  → "claude-sonnet-4-6"
 #                  otherwise              → "sonnet"
 #
@@ -82,7 +82,7 @@ pick_model_slug() {
    hermes)      printf 'openai/gpt-4o' ;;
    claude-code)
      if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
-        printf 'MiniMax-M2'
+        printf 'MiniMax-M2.7'
      elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
        printf 'claude-sonnet-4-6'
      else
@@ -49,13 +49,13 @@ run_test "codex → slash-form fallback"                             codex
 run_test "claude-code → OAuth/default alias"                      claude-code "sonnet"

 got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug claude-code)
-assert_eq "claude-code + MiniMax key → MiniMax model"             "$got" "MiniMax-M2"
+assert_eq "claude-code + MiniMax key → MiniMax model"             "$got" "MiniMax-M2.7"

 got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug claude-code)
 assert_eq "claude-code + Anthropic API key → Anthropic API model" "$got" "claude-sonnet-4-6"

 got=$(unset E2E_MODEL_SLUG; E2E_MINIMAX_API_KEY="mx-priority" E2E_ANTHROPIC_API_KEY="sk-ant-loser" pick_model_slug claude-code)
-assert_eq "claude-code + both keys → MiniMax priority"            "$got" "MiniMax-M2"
+assert_eq "claude-code + both keys → MiniMax priority"            "$got" "MiniMax-M2.7"

 # ── Fallback for unknown runtime ──
 # Picks slash-form (hermes-shaped) since hermes is the historical
@@ -881,12 +881,13 @@ fi
 # and NOT NOT_CONFIGURED (that fails earlier, at boot). Name it explicitly
 # so the canary alert points at the model, not the platform: a generic
 # "error-shaped response" misdirects triage to workspace-server. Observed
-# 2026-06-03/04 across every staging canary on MODEL_SLUG=MiniMax-M2 (the
-# canary default since #2710) — 100% on the parent's first cold turn,
+# 2026-06-03/04 across every staging canary on MODEL_SLUG=MiniMax-M2.7 (the
+# canary default since #2710, updated to M2.7 2026-06-04 after M2 was
+# rejected by staging with HTTP 400) — 100% on the parent's first cold turn,
 # identical on main's scheduled synthetic E2E and on PRs (so it is an
 # environmental backend regression, never PR-introduced).
 if echo "$AGENT_TEXT" | grep -qiF "message contained no text content"; then
-  fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (MiniMax-M2 since #2710) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT"
+  fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (MiniMax-M2.7 since #2710, updated 2026-06-04) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT"
 fi
 # Generic catch-all — falls through if none of the known regressions hit.
 if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then
@@ -952,7 +953,7 @@ for KA_ATTEMPT in $(seq 1 6); do
  KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
  # Retry ONLY on transient transport errors — never on an agent-level
  # error (those must surface and fail the gate).
-  if echo "$KA_CODE" | grep -Eq '^(502|503|504)$' && echo "$KA_SAFE_BODY" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|workspace agent unreachable|connection refused|no healthy upstream|workspace agent busy|native_session'; then
+  if echo "$KA_CODE" | grep -Eq '^(502|503|504)$' && echo "$KA_SAFE_BODY" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|error code: 502|error code: 504|workspace agent unreachable|connection refused|no healthy upstream|workspace agent busy|native_session'; then
    log "    known-answer A2A transient $KA_CODE attempt $KA_ATTEMPT/6: $KA_SAFE_BODY"
    if [ "$KA_ATTEMPT" -lt 6 ]; then sleep 10; continue; fi
  fi
@@ -1,3 +1,4 @@
+import re
 from pathlib import Path


@@ -14,5 +15,6 @@ def test_staging_e2e_workflows_use_stable_minimax_default() -> None:

    for rel in workflow_paths:
        text = (ROOT / rel).read_text()
-        assert "MiniMax-M2.7-highspeed" not in text
-        assert "MiniMax-M2" in text
+        # Reject bare MiniMax-M2 (not followed by "."), but allow MiniMax-M2.7
+        assert re.search(r"MiniMax-M2(?!\.)", text) is None
+        assert "MiniMax-M2.7" in text