Merge pull request #2645 from Molecule-AI/staging

staging → main: auto-promote f689c81
This commit is contained in:
molecule-ai[bot] 2026-05-03 22:54:00 +00:00 committed by GitHub
commit 1141a42910
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 283 additions and 60 deletions

View File

@ -39,9 +39,14 @@ on:
workflow_dispatch:
inputs:
runtime:
description: "Runtime to provision (langgraph = fastest, default; hermes = slower but covers SDK-native path; claude-code = needs OAUTH token in tenant env)"
description: "Runtime to provision (claude-code = default + cheapest via MiniMax; langgraph = OpenAI-only; hermes = SDK-native path, slower)"
required: false
default: "langgraph"
default: "claude-code"
type: string
model_slug:
description: "Model id to provision the workspace with (default MiniMax-M2.7-highspeed; e.g. 'sonnet' to test direct Anthropic, 'openai/gpt-4o' for hermes)"
required: false
default: "MiniMax-M2.7-highspeed"
type: string
keep_org:
description: "Skip teardown for post-mortem debugging (only manual dispatch — never set this for cron runs)"
@ -70,13 +75,23 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 12
env:
# langgraph default keeps cold-start under 5 min on staging EC2.
# hermes is slower (~7-10 min) and isn't needed for the
# regression class this gate exists to catch (deployment-pipeline
# + schema-drift + integration). Operators can pick hermes via
# workflow_dispatch when they need to exercise the SDK-native
# session path.
E2E_RUNTIME: ${{ github.event.inputs.runtime || 'langgraph' }}
# claude-code default: cold-start ~5 min (comparable to langgraph),
# but uses MiniMax-M2.7-highspeed via the template's third-party-
# Anthropic-compat path (workspace-configs-templates/claude-code-
# default/config.yaml:64-69). MiniMax is ~5-10x cheaper than
# gpt-4.1-mini per token AND avoids the recurring OpenAI quota-
# exhaustion class that took the canary down 2026-05-03 (#265).
# Operators can pick langgraph / hermes via workflow_dispatch
# when they specifically need to exercise the OpenAI or SDK-
# native paths.
E2E_RUNTIME: ${{ github.event.inputs.runtime || 'claude-code' }}
# Pin the canary to a specific MiniMax model rather than relying
# on the per-runtime default ("sonnet" → routes to direct
# Anthropic, defeats the cost saving). Operators can override
# via workflow_dispatch by setting a different E2E_MODEL_SLUG
# input if they need to exercise a specific model. M2.7-highspeed
# is "Token Plan only" but cheap-per-token and fast.
E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2.7-highspeed' }}
# Bound to 10 min so a stuck provision fails the run instead of
# holding up the next cron firing. 15-min default in the script
# is for the on-PR full lifecycle where we have more headroom.
@ -88,19 +103,26 @@ jobs:
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org == 'true' && '1' || '' }}
MOLECULE_CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }}
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
# Provisioned tenant's default model (langgraph: openai:gpt-4.1-mini)
# needs OPENAI_API_KEY at first call. Sibling workflows
# e2e-staging-saas.yml + canary-staging.yml use the same secret;
# without this wire-up the tenant boots, accepts a2a messages,
# then returns "Could not resolve authentication method" — masked
# earlier by the a2a-sdk task-mode contract bugs PR #2558+#2563
# fixed. tests/e2e/test_staging_full_saas.sh:325 reads this and
# persists it as a workspace_secret on tenant create.
# MiniMax key is the canary's PRIMARY auth path. claude-code
# template's `minimax` provider routes ANTHROPIC_BASE_URL to
# api.minimax.io/anthropic and reads MINIMAX_API_KEY at boot.
# tests/e2e/test_staging_full_saas.sh branches SECRETS_JSON on
# which key is present — MiniMax wins when set.
E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
# OpenAI fallback — kept wired so operators can dispatch with
# E2E_RUNTIME=langgraph or =hermes and still have a working
# canary path. The script picks the right blob shape based on
# which key is non-empty.
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_KEY }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Verify required secret present
- name: Verify required secrets present
env:
# Re-bind so the per-runtime LLM key check below sees the right
# secret. The job-level env block already reads both; this just
# makes them visible inside the conditional shell.
IS_DISPATCH: ${{ github.event_name == 'workflow_dispatch' }}
run: |
# Schedule-vs-dispatch hardening (mirrors the sweep-cf-* and
# redeploy-tenants-on-* workflows): hard-fail on missing secret
@ -109,7 +131,7 @@ jobs:
# dispatch — operators can dispatch ad-hoc to verify a fix
# without setting up the secret first.
if [ -z "${MOLECULE_ADMIN_TOKEN:-}" ]; then
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
if [ "$IS_DISPATCH" = "true" ]; then
echo "::warning::CP_STAGING_ADMIN_API_TOKEN not set — synth E2E cannot run"
echo "::warning::Set it at Settings → Secrets and Variables → Actions"
exit 0
@ -119,6 +141,36 @@ jobs:
exit 1
fi
# LLM-key requirement is per-runtime: claude-code uses MiniMax
# (MOLECULE_STAGING_MINIMAX_API_KEY), langgraph + hermes use
# OpenAI (MOLECULE_STAGING_OPENAI_KEY). Cron firing must have
# the right key for the active runtime; dispatch can soft-skip.
case "${E2E_RUNTIME}" in
claude-code)
required_secret_name="MOLECULE_STAGING_MINIMAX_API_KEY"
required_secret_value="${E2E_MINIMAX_API_KEY:-}"
;;
langgraph|hermes)
required_secret_name="MOLECULE_STAGING_OPENAI_KEY"
required_secret_value="${E2E_OPENAI_API_KEY:-}"
;;
*)
echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
required_secret_name=""
required_secret_value="present"
;;
esac
if [ -n "$required_secret_name" ] && [ -z "$required_secret_value" ]; then
if [ "$IS_DISPATCH" = "true" ]; then
echo "::warning::${required_secret_name} not set — synth E2E with runtime=${E2E_RUNTIME} cannot reach an LLM"
echo "::warning::Set it at Settings → Secrets and Variables → Actions, OR dispatch with a different runtime"
exit 0
fi
echo "::error::${required_secret_name} secret missing — runtime=${E2E_RUNTIME} cannot authenticate against its LLM provider"
echo "::error::Set it at Settings → Secrets and Variables → Actions"
exit 1
fi
- name: Install required tools
run: |
# The script depends on jq + curl (already on ubuntu-latest)

View File

@ -98,9 +98,17 @@ export function CookieConsent() {
};
return (
<div
role="dialog"
aria-modal="true"
// role="region" + aria-label, NOT role="dialog" + aria-modal. The
// banner is informational — it never blocks the page, never traps
// focus, and the user can keep using the canvas while it's up.
// Claiming aria-modal="true" without a focus trap is genuinely
// harmful for screen-reader users: they get told the rest of the
// page is inert, jump into the banner, and then can't escape.
// Region semantics let assistive tech navigate around it normally.
// (Also: forcing a modal cookie banner would be a dark pattern —
// GDPR explicitly discourages it.)
<section
role="region"
aria-labelledby="cookie-consent-title"
aria-describedby="cookie-consent-body"
className="fixed bottom-0 left-0 right-0 z-[9999] border-t border-line bg-surface/95 backdrop-blur-sm p-4 shadow-[0_-4px_12px_rgba(0,0,0,0.4)]"
@ -117,7 +125,7 @@ export function CookieConsent() {
workspaces). See our{" "}
<a
href="https://moleculesai.app/legal/privacy"
className="text-accent underline hover:text-accent"
className="text-accent underline underline-offset-2 hover:text-accent-strong focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 rounded-sm"
target="_blank"
rel="noreferrer"
>
@ -130,20 +138,20 @@ export function CookieConsent() {
<button
type="button"
onClick={() => decide("rejected")}
className="rounded border border-line bg-surface-sunken px-4 py-2 text-sm text-ink hover:bg-surface-card"
className="rounded border border-line bg-surface-sunken px-4 py-2 text-sm text-ink hover:bg-surface-card focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface"
>
Necessary only
</button>
<button
type="button"
onClick={() => decide("accepted")}
className="rounded border border-accent bg-accent-strong px-4 py-2 text-sm font-medium text-white hover:bg-accent"
className="rounded border border-accent bg-accent-strong px-4 py-2 text-sm font-medium text-white hover:bg-accent focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface"
>
Accept all
</button>
</div>
</div>
</div>
</section>
);
}

View File

@ -40,7 +40,7 @@ afterEach(() => {
describe("CookieConsent", () => {
it("renders the banner when no decision is stored", () => {
render(<CookieConsent />);
expect(screen.getByRole("dialog")).toBeTruthy();
expect(screen.getByRole("region")).toBeTruthy();
expect(screen.getByRole("button", { name: "Accept all" })).toBeTruthy();
expect(screen.getByRole("button", { name: "Necessary only" })).toBeTruthy();
});
@ -48,7 +48,7 @@ describe("CookieConsent", () => {
it("stores 'accepted' and hides the banner when user clicks Accept all", () => {
render(<CookieConsent />);
fireEvent.click(screen.getByRole("button", { name: "Accept all" }));
expect(screen.queryByRole("dialog")).toBeNull();
expect(screen.queryByRole("region")).toBeNull();
const raw = window.localStorage.getItem(STORAGE_KEY);
expect(raw).not.toBeNull();
@ -61,7 +61,7 @@ describe("CookieConsent", () => {
it("stores 'rejected' and hides the banner when user clicks Necessary only", () => {
render(<CookieConsent />);
fireEvent.click(screen.getByRole("button", { name: "Necessary only" }));
expect(screen.queryByRole("dialog")).toBeNull();
expect(screen.queryByRole("region")).toBeNull();
const parsed = JSON.parse(window.localStorage.getItem(STORAGE_KEY)!);
expect(parsed.decision).toBe("rejected");
@ -73,7 +73,7 @@ describe("CookieConsent", () => {
JSON.stringify({ decision: "accepted", decidedAt: new Date().toISOString(), version: 1 }),
);
render(<CookieConsent />);
expect(screen.queryByRole("dialog")).toBeNull();
expect(screen.queryByRole("region")).toBeNull();
});
it("re-prompts when the stored decision is on an older policy version", () => {
@ -82,13 +82,13 @@ describe("CookieConsent", () => {
JSON.stringify({ decision: "accepted", decidedAt: new Date().toISOString(), version: 0 }),
);
render(<CookieConsent />);
expect(screen.getByRole("dialog")).toBeTruthy();
expect(screen.getByRole("region")).toBeTruthy();
});
it("re-prompts when localStorage contains invalid JSON", () => {
window.localStorage.setItem(STORAGE_KEY, "{not json");
render(<CookieConsent />);
expect(screen.getByRole("dialog")).toBeTruthy();
expect(screen.getByRole("region")).toBeTruthy();
});
it("exposes a privacy-policy link with target='_blank'", () => {
@ -99,11 +99,19 @@ describe("CookieConsent", () => {
expect(link.getAttribute("rel")).toContain("noreferrer");
});
it("uses role=dialog with aria-labelledby and aria-describedby for screen readers", () => {
it("uses role=region (NOT dialog) with aria-labelledby/describedby — banner is informational, not modal", () => {
// Regression guard: an earlier version claimed role="dialog"
// aria-modal="true" without a focus trap. That falsely told screen
// readers the rest of the page was inert, trapping AT users in a
// banner they couldn't escape. role="region" lets assistive tech
// navigate around it normally; the banner stays informational.
render(<CookieConsent />);
const dialog = screen.getByRole("dialog");
expect(dialog.getAttribute("aria-labelledby")).toBe("cookie-consent-title");
expect(dialog.getAttribute("aria-describedby")).toBe("cookie-consent-body");
const banner = screen.getByRole("region");
expect(banner.getAttribute("aria-labelledby")).toBe("cookie-consent-title");
expect(banner.getAttribute("aria-describedby")).toBe("cookie-consent-body");
// No aria-modal claim — explicit guard against regression.
expect(banner.getAttribute("aria-modal")).toBeNull();
expect(screen.queryByRole("dialog")).toBeNull();
});
it("does NOT render on local dev (non-SaaS hostname)", () => {
@ -116,7 +124,7 @@ describe("CookieConsent", () => {
value: { ...window.location, hostname: "localhost" },
});
render(<CookieConsent />);
expect(screen.queryByRole("dialog")).toBeNull();
expect(screen.queryByRole("region")).toBeNull();
});
it("does NOT render on a LAN hostname (192.168.*, *.local)", () => {
@ -125,7 +133,7 @@ describe("CookieConsent", () => {
value: { ...window.location, hostname: "192.168.1.74" },
});
render(<CookieConsent />);
expect(screen.queryByRole("dialog")).toBeNull();
expect(screen.queryByRole("region")).toBeNull();
});
});

View File

@ -0,0 +1,145 @@
#!/usr/bin/env bash
# Regression test for the SECRETS_JSON branching in
# tests/e2e/test_staging_full_saas.sh (lines ~322-368).
#
# The synth-E2E canary picks one of two LLM auth paths based on which
# E2E_*_API_KEY is set. The branch order is load-bearing:
#
# E2E_MINIMAX_API_KEY first → claude-code MiniMax path (cheap canary
# default since 2026-05-03; routes via
# workspace-configs-templates/claude-
# code-default/config.yaml's `minimax`
# provider entry).
#
# E2E_OPENAI_API_KEY second → langgraph + hermes legacy path (kept
# as fallback for operator dispatches
# that need the OpenAI-shaped
# HERMES_CUSTOM_* env block).
#
# Without this gate, a future "tidy up the if/elif" refactor could
# silently flip the precedence (OpenAI wins when both are set →
# claude-code workspace boots without MINIMAX_API_KEY → 401 at first
# turn → canary red without any signal that the wrong key shape was
# selected). The 2026-05-03 OpenAI-quota incident took ~16h to
# diagnose for exactly this class of "looks like an LLM problem,
# was actually a wiring problem" failure.
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SAAS_SCRIPT="$SCRIPT_DIR/test_staging_full_saas.sh"
if [ ! -f "$SAAS_SCRIPT" ]; then
echo "FATAL: cannot locate test_staging_full_saas.sh at $SAAS_SCRIPT" >&2
exit 2
fi
PASS=0
FAIL=0
assert_eq() {
local label="$1" got="$2" want="$3"
if [ "$got" = "$want" ]; then
echo "$label"
PASS=$((PASS+1))
else
echo "$label" >&2
echo " got: $got" >&2
echo " want: $want" >&2
FAIL=$((FAIL+1))
fi
}
# Extract just the SECRETS_JSON block from the saas script and source
# it into a sub-shell so we can run the branching logic in isolation.
# Anchor on the comment header so a structural refactor that moves the
# block fails this test loudly rather than silently sourcing nothing.
extract_block() {
awk '
/^# ─── 5\. Provision parent workspace/ {capture=1; next}
capture && /^MODEL_SLUG=/ {exit}
capture {print}
' "$SAAS_SCRIPT"
}
BLOCK=$(extract_block)
if [ -z "$BLOCK" ]; then
echo "FATAL: SECRETS_JSON block not found in $SAAS_SCRIPT — refactor anchor changed?" >&2
exit 2
fi
# Run the extracted block in a clean env, capturing SECRETS_JSON.
run_block() {
# Caller passes vars on the command line, e.g.
# run_block E2E_MINIMAX_API_KEY=mx-test
env -i PATH="$PATH" "$@" bash -c "
set -uo pipefail
$BLOCK
echo \"\$SECRETS_JSON\"
" 2>/dev/null | tail -1
}
# Resolve a JSON key from the captured payload using python3 (already
# a hard dep of the saas script). Returns empty string on missing key.
get_json_key() {
local payload="$1" key="$2"
python3 -c "
import json, sys
p = json.loads(sys.argv[1])
print(p.get(sys.argv[2], ''))
" "$payload" "$key"
}
list_json_keys() {
python3 -c "
import json, sys
p = json.loads(sys.argv[1])
print(','.join(sorted(p.keys())))
" "$1"
}
echo "Test: SECRETS_JSON branching in test_staging_full_saas.sh"
echo
# ── Branch 1: MiniMax wins when set ──
SECRETS_JSON=$(run_block E2E_MINIMAX_API_KEY=mx-test)
assert_eq "MiniMax key set → MINIMAX_API_KEY in payload" \
"$(get_json_key "$SECRETS_JSON" MINIMAX_API_KEY)" "mx-test"
assert_eq "MiniMax-only payload contains exactly MINIMAX_API_KEY" \
"$(list_json_keys "$SECRETS_JSON")" "MINIMAX_API_KEY"
# ── Branch 1 precedence: MiniMax beats OpenAI when both set ──
# Critical: the 2026-05-03 incident shape was "two paths exist, wrong
# one wins". The bash if/elif must keep MiniMax above OpenAI so the
# claude-code default canary doesn't accidentally use the (more
# expensive, quota-burnt) OpenAI key.
SECRETS_JSON=$(run_block E2E_MINIMAX_API_KEY=mx-priority E2E_OPENAI_API_KEY=oai-loser)
assert_eq "Both keys set → MiniMax wins" \
"$(get_json_key "$SECRETS_JSON" MINIMAX_API_KEY)" "mx-priority"
assert_eq "Both keys set → OpenAI block NOT emitted" \
"$(get_json_key "$SECRETS_JSON" OPENAI_API_KEY)" ""
assert_eq "Both keys set → no HERMES_* leakage from OpenAI branch" \
"$(get_json_key "$SECRETS_JSON" HERMES_INFERENCE_PROVIDER)" ""
# ── Branch 2: OpenAI used when MiniMax absent ──
SECRETS_JSON=$(run_block E2E_OPENAI_API_KEY=oai-test)
assert_eq "Only OpenAI set → OPENAI_API_KEY in payload" \
"$(get_json_key "$SECRETS_JSON" OPENAI_API_KEY)" "oai-test"
assert_eq "Only OpenAI set → HERMES_CUSTOM_API_KEY mirrors OpenAI key" \
"$(get_json_key "$SECRETS_JSON" HERMES_CUSTOM_API_KEY)" "oai-test"
assert_eq "Only OpenAI set → MODEL_PROVIDER pinned to colon-form" \
"$(get_json_key "$SECRETS_JSON" MODEL_PROVIDER)" "openai:gpt-4o"
assert_eq "Only OpenAI set → MINIMAX_API_KEY NOT emitted" \
"$(get_json_key "$SECRETS_JSON" MINIMAX_API_KEY)" ""
# ── No keys: empty payload ──
SECRETS_JSON=$(run_block)
assert_eq "No keys set → SECRETS_JSON is empty object" \
"$SECRETS_JSON" "{}"
echo
echo "─────────────────────────────────────────────────"
echo "PASSED: $PASS"
echo "FAILED: $FAIL"
echo "─────────────────────────────────────────────────"
[ "$FAIL" -eq 0 ]

View File

@ -320,29 +320,39 @@ tenant_call() {
}
# ─── 5. Provision parent workspace ─────────────────────────────────────
# Runtimes like hermes crash at boot with "No provider API key found"
# if nothing in the standard env-var list is set. Inject the API key
# from E2E_OPENAI_API_KEY so the runtime can actually start — it's
# per-workspace secret, so it's persisted as a workspace_secret and
# materialized into the container env. Missing key falls through to
# an empty secrets map; workspace will still fail but the error is
# expected and actionable.
# Inject the LLM provider key so the runtime can authenticate at boot.
# Branch by which secret is set so the script supports both paths
# without forcing every dispatch to ship both keys:
#
# E2E_MINIMAX_API_KEY → claude-code MiniMax path. Cheapest, default
# for the cron canary post-2026-05-03. Routes via the claude-code
# template's `minimax` provider (workspace-configs-templates/
# claude-code-default/config.yaml:64-69) which sets
# ANTHROPIC_BASE_URL=https://api.minimax.io/anthropic at boot.
# MINIMAX_API_KEY is the vendor-specific env name the adapter
# reads (PR #244 — per-vendor envs prevent ANTHROPIC_AUTH_TOKEN
# collisions when a user runs MiniMax + Z.ai workspaces side-by-
# side).
#
# E2E_OPENAI_API_KEY → langgraph + hermes paths. Kept as fallback
# for operator dispatches that explicitly want to exercise the
# OpenAI path. The HERMES_* fields pin hermes-agent's bridge to
# api.openai.com (template-hermes' derive-provider.sh otherwise
# resolves openai/* → openrouter.ai and 401s). MODEL_PROVIDER
# follows workspace/config.py:258's 'provider:model' format.
#
# Both empty → '{}' (workspace will fail at first turn with an
# expected, actionable auth error rather than masking the test).
SECRETS_JSON='{}'
if [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
# MODEL_PROVIDER is a full model slug in 'provider:model' format per
# workspace/config.py:258. Using just "openai" gets parsed as the
# model name → 404 model_not_found. Also set OPENAI_BASE_URL to
# OpenAI's own endpoint — default is openrouter.ai which would need
# a different key format.
#
# The HERMES_* fields below bypass template-hermes/scripts/derive-provider.sh
# — verified 2026-04-24 that even with template-hermes#19's fix in main,
# staging tenants sometimes resolve openai/* to PROVIDER=openrouter and
# emit {'message':'Missing Authentication header','code':401} (OpenRouter's
# shape) in the A2A reply. Setting HERMES_INFERENCE_PROVIDER=custom +
# HERMES_CUSTOM_{BASE_URL,API_KEY,API_MODE} pins the bridge deterministically
# so the test doesn't depend on every tenant EC2 having a freshly-cloned
# template-hermes.
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
SECRETS_JSON=$(python3 -c "
import json, os
k = os.environ['E2E_MINIMAX_API_KEY']
print(json.dumps({
'MINIMAX_API_KEY': k,
}))
")
elif [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
SECRETS_JSON=$(python3 -c "
import json, os
k = os.environ['E2E_OPENAI_API_KEY']