From 9689c6f6d57721b9d231c75430832d0163585fd3 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sun, 3 May 2026 18:32:26 -0700 Subject: [PATCH] fix(synth-e2e): verify-secrets step must hard-fail (exit 0 only ends step) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous soft-skip-on-dispatch path used `exit 0`, which only ends the STEP — the rest of the workflow continued with empty secrets. Caught 2026-05-04 by dispatched run 25296530706: - E2E_MINIMAX_API_KEY: empty - verify-secrets printed warning + exit 0 - Install required tools: ran - Run synthetic E2E: ran with empty MiniMax key - SECRETS_JSON branched to OpenAI shape (MINIMAX empty → fall through) - But model slug stayed MiniMax-M2.7-highspeed (workflow env) - Workspace booted with OpenAI keys + MiniMax model - 5 min later: "Agent error (Exception)" — claude SDK 401'd against api.minimax.io with the OpenAI key The confusing failure mode silently masked the real problem (missing secret) under a runtime-error label. Fix: drop both soft-skip paths and exit 1 always. Operators who want to verify a YAML change without setting up secrets can read the verify-secrets step's stderr — the failure IS the verification signal. Pure visibility fix; preserves the cron hard-fail path (now also the dispatch hard-fail path). No mechanism change beyond the exit code. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/continuous-synth-e2e.yml | 40 +++++++++------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/.github/workflows/continuous-synth-e2e.yml b/.github/workflows/continuous-synth-e2e.yml index ba9633a9..5964693f 100644 --- a/.github/workflows/continuous-synth-e2e.yml +++ b/.github/workflows/continuous-synth-e2e.yml @@ -128,24 +128,22 @@ jobs: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Verify required secrets present - env: - # Re-bind so the per-runtime LLM key check below sees the right - # secret. The job-level env block already reads both; this just - # makes them visible inside the conditional shell. - IS_DISPATCH: ${{ github.event_name == 'workflow_dispatch' }} run: | - # Schedule-vs-dispatch hardening (mirrors the sweep-cf-* and - # redeploy-tenants-on-* workflows): hard-fail on missing secret - # for cron firing so a misconfigured-repo doesn't silently - # report green while doing nothing. Soft-skip on operator - # dispatch — operators can dispatch ad-hoc to verify a fix - # without setting up the secret first. + # Hard-fail on missing secret REGARDLESS of trigger. Previously + # this step soft-skipped on workflow_dispatch via `exit 0`, but + # `exit 0` only ends the STEP — subsequent steps still ran with + # the empty secret, the synth script fell through to the wrong + # SECRETS_JSON branch, and the canary failed 5 min later with a + # confusing "Agent error (Exception)" instead of the clean + # "secret missing" message at the top. Caught 2026-05-04 by + # dispatched run 25296530706: claude-code + missing MINIMAX + # silently used OpenAI keys but kept model=MiniMax-M2.7, then + # the workspace 401'd against MiniMax once it tried to call. + # Fix: exit 1 in both cron and dispatch paths. Operators who + # want to verify a YAML change without setting up the secret + # can read the verify-secrets step's stderr — the failure is + # itself the verification signal. if [ -z "${MOLECULE_ADMIN_TOKEN:-}" ]; then - if [ "$IS_DISPATCH" = "true" ]; then - echo "::warning::CP_STAGING_ADMIN_API_TOKEN not set — synth E2E cannot run" - echo "::warning::Set it at Settings → Secrets and Variables → Actions" - exit 0 - fi echo "::error::CP_STAGING_ADMIN_API_TOKEN secret missing — synth E2E cannot run" echo "::error::Set it at Settings → Secrets and Variables → Actions; pull from staging-CP's CP_ADMIN_API_TOKEN env in Railway." exit 1 @@ -153,8 +151,7 @@ jobs: # LLM-key requirement is per-runtime: claude-code uses MiniMax # (MOLECULE_STAGING_MINIMAX_API_KEY), langgraph + hermes use - # OpenAI (MOLECULE_STAGING_OPENAI_KEY). Cron firing must have - # the right key for the active runtime; dispatch can soft-skip. + # OpenAI (MOLECULE_STAGING_OPENAI_KEY). case "${E2E_RUNTIME}" in claude-code) required_secret_name="MOLECULE_STAGING_MINIMAX_API_KEY" @@ -171,13 +168,8 @@ jobs: ;; esac if [ -n "$required_secret_name" ] && [ -z "$required_secret_value" ]; then - if [ "$IS_DISPATCH" = "true" ]; then - echo "::warning::${required_secret_name} not set — synth E2E with runtime=${E2E_RUNTIME} cannot reach an LLM" - echo "::warning::Set it at Settings → Secrets and Variables → Actions, OR dispatch with a different runtime" - exit 0 - fi echo "::error::${required_secret_name} secret missing — runtime=${E2E_RUNTIME} cannot authenticate against its LLM provider" - echo "::error::Set it at Settings → Secrets and Variables → Actions" + echo "::error::Set it at Settings → Secrets and Variables → Actions, OR dispatch with a different runtime" exit 1 fi