diff --git a/.github/workflows/e2e-staging-saas.yml b/.github/workflows/e2e-staging-saas.yml index 2a7efe16..2c252d10 100644 --- a/.github/workflows/e2e-staging-saas.yml +++ b/.github/workflows/e2e-staging-saas.yml @@ -48,9 +48,9 @@ on: workflow_dispatch: inputs: runtime: - description: "Runtime to test (hermes | claude-code | langgraph)" + description: "Runtime to test (claude-code [default, MiniMax] | hermes [OpenAI] | langgraph [OpenAI])" required: false - default: "hermes" + default: "claude-code" keep_org: description: "Skip teardown for debugging (only use via manual dispatch!)" required: false @@ -83,11 +83,27 @@ jobs: # retrieval + teardown. Configure in # Settings → Secrets and variables → Actions → Repository secrets. MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }} - # OpenAI key for workspace LLM calls (section 8 A2A). Without it, - # Hermes runtime crashes at boot with "No provider API key found". - # Configure at Settings → Secrets → Actions → MOLECULE_STAGING_OPENAI_KEY. + # MiniMax is the PRIMARY LLM auth path post-2026-05-04. Switched + # from hermes+OpenAI default after #2578 (the staging OpenAI key + # account went over quota and stayed dead for 36+ hours, taking + # the full-lifecycle E2E red on every provisioning-critical push). + # claude-code template's `minimax` provider routes + # ANTHROPIC_BASE_URL to api.minimax.io/anthropic and reads + # MINIMAX_API_KEY at boot — separate billing account so an + # OpenAI quota collapse no longer wedges the gate. Mirrors the + # canary-staging.yml + continuous-synth-e2e.yml migrations. + E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }} + # OpenAI fallback — kept wired so an operator-dispatched run with + # E2E_RUNTIME=hermes or =langgraph via workflow_dispatch can still + # exercise the OpenAI path. E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_KEY }} - E2E_RUNTIME: ${{ github.event.inputs.runtime || 'hermes' }} + E2E_RUNTIME: ${{ github.event.inputs.runtime || 'claude-code' }} + # Pin the model when running on the default claude-code path — + # the per-runtime default ("sonnet") routes to direct Anthropic + # and defeats the cost saving. Operators can override via the + # workflow_dispatch flow (no input wired here yet — runtime + # override is enough for ad-hoc). + E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'langgraph' && 'openai:gpt-4o' || 'MiniMax-M2.7-highspeed' }} E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}" E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }} @@ -102,13 +118,34 @@ jobs: fi echo "Admin token present ✓" - - name: Verify OpenAI key present + - name: Verify LLM key present run: | - if [ -z "$E2E_OPENAI_API_KEY" ]; then - echo "::error::MOLECULE_STAGING_OPENAI_KEY secret not set — workspaces will fail at boot with 'No provider API key found'" + # Per-runtime key check — claude-code uses MiniMax; hermes / + # langgraph (operator-dispatched only) use OpenAI. Hard-fail + # rather than soft-skip per #2578's lesson — empty key + # silently falls through to the wrong SECRETS_JSON branch and + # produces a confusing auth error 5 min later instead of the + # clean "secret missing" message at the top. + case "${E2E_RUNTIME}" in + claude-code) + required_secret_name="MOLECULE_STAGING_MINIMAX_API_KEY" + required_secret_value="${E2E_MINIMAX_API_KEY:-}" + ;; + langgraph|hermes) + required_secret_name="MOLECULE_STAGING_OPENAI_KEY" + required_secret_value="${E2E_OPENAI_API_KEY:-}" + ;; + *) + echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check" + required_secret_name="" + required_secret_value="present" + ;; + esac + if [ -n "$required_secret_name" ] && [ -z "$required_secret_value" ]; then + echo "::error::${required_secret_name} secret not set for runtime=${E2E_RUNTIME} — workspaces will fail at boot with 'No provider API key found'" exit 2 fi - echo "OpenAI key present ✓ (len=${#E2E_OPENAI_API_KEY})" + echo "LLM key present ✓ (runtime=${E2E_RUNTIME}, key=${required_secret_name}, len=${#required_secret_value})" - name: CP staging health preflight run: |