From 06240ab67b32a2564edbabed07f7e0a1283fb172 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sun, 3 May 2026 03:44:05 -0700 Subject: [PATCH 1/6] fix(preflight): skip required_env check in MOLECULE_SMOKE_MODE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Boot smoke (#2275) exercises executor.execute() against stub deps and never hits the real provider, so missing auth env is not a real blocker. Without this bypass, every adapter that introduces a new auth env var must be mirrored into molecule-ci's fake-env list — a maintenance treadmill that just bit hermes-template: - 2026-05-03 09:47 UTC: hermes publish-image smoke fails on HERMES_API_KEY preflight (workflow injects CLAUDE_CODE_OAUTH_TOKEN, ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY but not HERMES_API_KEY or OPENROUTER_API_KEY). Failed for two cycles before being noticed. The bypass demotes Required-env failures to warnings when MOLECULE_SMOKE_MODE is truthy, so the unset env stays visible in the boot log without blocking. Production paths are unchanged (env unset → fail). Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace/preflight.py | 32 ++++++++++++++++---- workspace/tests/test_preflight.py | 49 +++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 5 deletions(-) diff --git a/workspace/preflight.py b/workspace/preflight.py index d6123f25..e1929f3e 100644 --- a/workspace/preflight.py +++ b/workspace/preflight.py @@ -180,16 +180,38 @@ def run_preflight(config: WorkspaceConfig, config_path: str) -> PreflightReport: required_env = list(entry.get("required_env") or []) break + # Smoke mode skips the auth-env block: the boot smoke (CI publish-image, + # issue #2275) exercises executor.execute() against stub deps, never + # hits the real provider, and CI cannot enumerate every adapter's auth + # env without forming a maintenance treadmill. Hermes 2026-05-03 outage: + # template smoke crashed for two cycles because molecule-ci injected + # CLAUDE_CODE_OAUTH_TOKEN/ANTHROPIC_API_KEY/etc. but not HERMES_API_KEY. + # Bypass here means new templates can ship without the workflow + # learning their env names. + smoke_mode = os.environ.get("MOLECULE_SMOKE_MODE", "").strip().lower() in ( + "1", "true", "yes", "on", + ) for env_var in required_env: - if not os.environ.get(env_var): - report.failures.append( + if os.environ.get(env_var): + continue + if smoke_mode: + report.warnings.append( PreflightIssue( - severity="fail", + severity="warn", title="Required env", - detail=f"Missing required environment variable: {env_var}", - fix=f"Set {env_var} via the secrets API (global or workspace-level).", + detail=f"Missing {env_var} (skipped — MOLECULE_SMOKE_MODE)", + fix="", ) ) + continue + report.failures.append( + PreflightIssue( + severity="fail", + title="Required env", + detail=f"Missing required environment variable: {env_var}", + fix=f"Set {env_var} via the secrets API (global or workspace-level).", + ) + ) # Backward compat: if legacy auth_token_file is set, warn but don't block # if the token is available via required_env or auth_token_env. diff --git a/workspace/tests/test_preflight.py b/workspace/tests/test_preflight.py index febf536a..063dcb8f 100644 --- a/workspace/tests/test_preflight.py +++ b/workspace/tests/test_preflight.py @@ -286,6 +286,55 @@ def test_required_env_empty_list_passes(tmp_path): assert report.ok is True +def test_required_env_skipped_in_smoke_mode(tmp_path, monkeypatch): + """MOLECULE_SMOKE_MODE=1 demotes Required-env failures to warnings. + + Boot smoke (issue #2275) exercises executor.execute() against stub + deps and never hits the real provider, so missing auth env is not + a real blocker. Without this bypass, every adapter that introduces + a new auth env var (HERMES_API_KEY, OPENROUTER_API_KEY, etc.) + would silently break the publish-image gate until molecule-ci's + fake-env list catches up — the 2026-05-03 hermes outage. The + warning still surfaces in the report so unset env doesn't go + completely silent. + """ + monkeypatch.delenv("HERMES_API_KEY", raising=False) + monkeypatch.setenv("MOLECULE_SMOKE_MODE", "1") + + config = make_config( + runtime_config=RuntimeConfig(required_env=["HERMES_API_KEY"]), + ) + + report = run_preflight(config, str(tmp_path)) + + assert report.ok is True + assert any( + issue.title == "Required env" and "HERMES_API_KEY" in issue.detail + for issue in report.warnings + ), "smoke-mode bypass should still warn so unset env stays visible" + assert not any( + issue.title == "Required env" for issue in report.failures + ) + + +def test_required_env_smoke_mode_off_still_fails(tmp_path, monkeypatch): + """Sanity: smoke bypass is OFF when MOLECULE_SMOKE_MODE is unset.""" + monkeypatch.delenv("HERMES_API_KEY", raising=False) + monkeypatch.delenv("MOLECULE_SMOKE_MODE", raising=False) + + config = make_config( + runtime_config=RuntimeConfig(required_env=["HERMES_API_KEY"]), + ) + + report = run_preflight(config, str(tmp_path)) + + assert report.ok is False + assert any( + issue.title == "Required env" and "HERMES_API_KEY" in issue.detail + for issue in report.failures + ) + + # ---------- Per-model required_env (models[] override) ---------- From 09010212a0a887071d1c1f855b586641d8cca939 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sun, 3 May 2026 03:52:39 -0700 Subject: [PATCH 2/6] feat(ci): structural drift gate for cascade list vs manifest (RFC #388 PR-3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the recurrence path of PR #2556. The data fix realigned 8→4 templates in publish-runtime.yml's TEMPLATES variable, but the underlying drift hazard was unguarded — the next manifest change could silently leave cascade out of sync again. This gate fails any PR that changes manifest.json or publish-runtime.yml in a way that makes the cascade list diverge from manifest workspace_templates (suffix-stripped). Either direction is caught: missing-from-cascade templates that won't auto-rebuild on a new wheel publish (the codex-stuck-on-stale-runtime bug class — PR #2512 added codex to manifest, cascade wasn't updated, codex stayed pinned to its last-built runtime version for weeks). extra-in-cascade cascade dispatches to deprecated templates (the wasted-API-calls + dead-CI-noise class — PR #2536 pruned 5 templates from manifest; cascade kept dispatching to all 8 until PR #2556). Triggers narrowly: only on PRs that touch manifest.json, publish-runtime.yml, or the script itself. Fast (single grep+sed+comm pipeline, no Go build). Surfaced during the RFC #388 prior-art audit; folded in as the structural follow-up to the data fix #2556 promised. Self-tested both failure modes locally before commit: - Drop codex from cascade → script fails with "MISSING: codex" - Add langgraph to cascade → script fails with "EXTRA: langgraph" Refs: https://github.com/Molecule-AI/molecule-controlplane/issues/388 Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/cascade-list-drift-gate.yml | 39 ++++++++ scripts/check-cascade-list-vs-manifest.sh | 95 +++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 .github/workflows/cascade-list-drift-gate.yml create mode 100755 scripts/check-cascade-list-vs-manifest.sh diff --git a/.github/workflows/cascade-list-drift-gate.yml b/.github/workflows/cascade-list-drift-gate.yml new file mode 100644 index 00000000..284a68d8 --- /dev/null +++ b/.github/workflows/cascade-list-drift-gate.yml @@ -0,0 +1,39 @@ +name: cascade-list-drift-gate + +# Structural gate: TEMPLATES list in publish-runtime.yml must match +# manifest.json's workspace_templates exactly. Closes the recurrence +# path of PR #2556 (the data fix) and is the first concrete deliverable +# of RFC #388 PR-3. +# +# Why a gate, not just discipline: PR #2536 pruned the manifest, but the +# cascade list wasn't updated for ~weeks before someone (PR #2556) +# noticed during an unrelated audit. During that window, codex never +# rebuilt on a runtime publish. A structural gate catches the drift +# the same day either file changes. +# +# Triggers narrowly to keep CI quiet: only on PRs that actually change +# one of the two files. The path-filtered split + always-emit-result +# pattern (memory: "Required check names need a job that always runs") +# is unnecessary here because the workflow IS the check name and PR +# branch protection should require it directly. Future-proof: if this +# becomes a required check, add a no-op aggregator with always() so the +# name still emits when paths don't match. + +on: + pull_request: + branches: [staging, main] + paths: + - manifest.json + - .github/workflows/publish-runtime.yml + - scripts/check-cascade-list-vs-manifest.sh + +permissions: + contents: read + +jobs: + check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - name: Check cascade list matches manifest + run: bash scripts/check-cascade-list-vs-manifest.sh diff --git a/scripts/check-cascade-list-vs-manifest.sh b/scripts/check-cascade-list-vs-manifest.sh new file mode 100755 index 00000000..434069a5 --- /dev/null +++ b/scripts/check-cascade-list-vs-manifest.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +# check-cascade-list-vs-manifest.sh — structural drift gate for the +# publish-runtime cascade list vs manifest.json workspace_templates. +# +# WHY: PR #2536 pruned the manifest to 4 supported runtimes; PR #2556 +# realigned the cascade list to match. The underlying drift hazard +# (cascade-list ≠ manifest) was unguarded — the data fix didn't prevent +# recurrence. This script is the structural gate that does. +# +# Behavior-based per project pattern: derives the expected set from +# manifest.json and the actual set from the workflow YAML, fails on +# any divergence in either direction. +# +# missing-from-cascade → templates in manifest that publish-runtime.yml +# won't auto-rebuild on a new wheel publish +# (the codex-stuck-on-stale-runtime bug class) +# extra-in-cascade → cascade dispatches to deprecated templates +# (the wasted-API-calls + dead-CI-noise class) +# +# Suffix mapping: manifest names map to GHCR repos via +# {name without -default suffix} → molecule-ai-workspace-template- +# That's the same map publish-runtime.yml's TEMPLATES variable iterates. +# +# Exit: +# 0 cascade matches manifest exactly +# 1 drift detected (script prints the diff) +# 2 bad usage / missing inputs + +set -eu + +MANIFEST="${1:-manifest.json}" +WORKFLOW="${2:-.github/workflows/publish-runtime.yml}" + +if [ ! -f "$MANIFEST" ]; then + echo "::error::manifest not found: $MANIFEST" >&2 + exit 2 +fi +if [ ! -f "$WORKFLOW" ]; then + echo "::error::workflow not found: $WORKFLOW" >&2 + exit 2 +fi + +# Expected cascade entries: manifest workspace_templates → suffix-only +# (strip -default tail, e.g. claude-code-default → claude-code, since +# publish-runtime.yml's TEMPLATES uses suffixes that match the +# molecule-ai-workspace-template- repo naming). +EXPECTED=$(jq -r '.workspace_templates[].name' "$MANIFEST" \ + | sed 's/-default$//' \ + | sort -u) + +# Actual cascade entries: extract from the TEMPLATES="…" line. We look +# for the line, pull the contents between the quotes, and split into +# one-per-line. Single source of truth in the workflow itself, no +# parallel registry needed. +# +# Why not \s in the regex: BSD sed (macOS) doesn't recognize \s as +# whitespace — treats it as literal `s`. POSIX [[:space:]] works on +# both BSD and GNU sed. Same hazard nuked the original draft of this +# script: \s* matched empty-prefix-of-literal-s, then the leading +# whitespace stayed in the captured group. +ACTUAL=$(grep -E '[[:space:]]*TEMPLATES="' "$WORKFLOW" \ + | head -1 \ + | sed -E 's/^[[:space:]]*TEMPLATES="([^"]*)".*$/\1/' \ + | tr ' ' '\n' \ + | grep -v '^$' \ + | sort -u) + +if [ -z "$ACTUAL" ]; then + echo "::error::could not extract TEMPLATES=\"…\" from $WORKFLOW — has the variable name or quoting changed?" >&2 + exit 2 +fi + +MISSING=$(comm -23 <(printf '%s\n' "$EXPECTED") <(printf '%s\n' "$ACTUAL")) +EXTRA=$(comm -13 <(printf '%s\n' "$EXPECTED") <(printf '%s\n' "$ACTUAL")) + +if [ -z "$MISSING" ] && [ -z "$EXTRA" ]; then + echo "✓ cascade list matches manifest workspace_templates ($(echo "$EXPECTED" | wc -l | tr -d ' ') entries)" + exit 0 +fi + +echo "::error::cascade list drift detected between $MANIFEST and $WORKFLOW" >&2 +echo "" >&2 +if [ -n "$MISSING" ]; then + echo " Templates in manifest but MISSING from cascade (won't auto-rebuild on wheel publish):" >&2 + echo "$MISSING" | sed 's/^/ - /' >&2 + echo "" >&2 +fi +if [ -n "$EXTRA" ]; then + echo " Templates in cascade but NOT in manifest (deprecated, wasting dispatch calls):" >&2 + echo "$EXTRA" | sed 's/^/ - /' >&2 + echo "" >&2 +fi +echo " Fix: edit the TEMPLATES=\"…\" line in $WORKFLOW so the set matches" >&2 +echo " manifest.json's workspace_templates (suffix-stripped). See PR #2556 for context." >&2 +exit 1 From e1628c4d56d753ee38632a5d7dbdf10954fe4490 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sun, 3 May 2026 04:06:45 -0700 Subject: [PATCH 3/6] fix(a2a): route terminal Message via TaskUpdater.complete/failed in task mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #2558 enqueued a Task at the start of new requests so the v1 SDK would accept TaskUpdater.start_work() — fix #1 of the v0→v1 migration gap (PR #2170). But after Task is enqueued, the executor enters "task mode" and the SDK rejects raw Message enqueues at the terminal step: {"code":-32603,"message":"Received Message object in task mode. Use TaskStatusUpdateEvent or TaskArtifactUpdateEvent instead."} Synth-E2E 2026-05-03T11:00:34Z surfaced this on the very first run after the prior fix cascaded. Validation site is the same a2a/server/agent_execution/active_task.py — the framework's job is to enforce the v1 invariant; we're catching up to it. The fix routes both terminal events through TaskUpdater helpers: - success: updater.complete(message=msg) wraps in TaskStatusUpdateEvent(state=COMPLETED, final=True) - error: updater.failed(message=...) wraps in TaskStatusUpdateEvent(state=FAILED, final=True) Both helpers exist in a2a-sdk ≥ 1.0; verified via TaskUpdater.complete signature. Tests: - conftest TaskUpdater stub now records complete/failed calls AND routes the message back through event_queue.enqueue_event so the ~20 legacy tests asserting on enqueue_event keep working - 2 new regression tests pin the contract: * test_terminal_success_routes_via_updater_complete * test_terminal_error_routes_via_updater_failed - Both NEW tests verified to FAIL on staging-baseline (without this fix) and PASS with it — they'd catch the regression before staging if the wheel-smoke gate covered task-mode terminal events too (separate yak-shave for #131 follow-up) Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace/a2a_executor.py | 21 ++++++-- workspace/tests/conftest.py | 26 +++++++--- workspace/tests/test_a2a_executor.py | 78 ++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 11 deletions(-) diff --git a/workspace/a2a_executor.py b/workspace/a2a_executor.py index 38860c03..9b4d9464 100644 --- a/workspace/a2a_executor.py +++ b/workspace/a2a_executor.py @@ -509,7 +509,15 @@ class LangGraphA2AExecutor(AgentExecutor): # accept the assignment. See #1787 + commit dcbcf19 # for the original test-mock motivation. logger.debug("metadata attach skipped (non-Message return from new_text_message)") - await event_queue.enqueue_event(msg) + # A2A v1 (a2a-sdk ≥ 1.0): once Task is enqueued (above, PR #2558), + # the executor is in task mode and raw Message enqueues are + # rejected with InvalidAgentResponseError("Received Message + # object in task mode. Use TaskStatusUpdateEvent or + # TaskArtifactUpdateEvent instead."). updater.complete() + # wraps the Message in a terminal TaskStatusUpdateEvent + # (state=COMPLETED, final=True) which both streaming and + # non-streaming clients accept. + await updater.complete(message=msg) _result = final_text except Exception as e: @@ -520,10 +528,13 @@ class LangGraphA2AExecutor(AgentExecutor): task_span.set_status(StatusCode.ERROR, str(e)) except Exception: pass - # Emit a Message so both streaming and non-streaming clients - # receive an error response rather than hanging. - await event_queue.enqueue_event( - new_text_message( + # A2A v1: in task mode, terminal errors must publish a + # FAILED TaskStatusUpdateEvent (carrying the error Message) + # rather than a raw Message enqueue. updater.failed() does + # exactly this — both streaming and non-streaming clients + # receive the error and stop polling. + await updater.failed( + message=new_text_message( f"Agent error: {e}", task_id=task_id, context_id=context_id ) ) diff --git a/workspace/tests/conftest.py b/workspace/tests/conftest.py index 0d130a6f..cb1b75b4 100644 --- a/workspace/tests/conftest.py +++ b/workspace/tests/conftest.py @@ -35,27 +35,41 @@ def _make_a2a_mocks(): events_mod.EventQueue = EventQueue - # a2a.server.tasks needs a TaskUpdater stub whose async methods are no-ops. - # In tests, TaskUpdater calls go to this stub rather than the real SDK so - # event_queue.enqueue_event is only called via explicit executor code paths. + # a2a.server.tasks needs a TaskUpdater stub whose async methods are no-ops + # for status transitions but ROUTE the terminal message back through + # event_queue.enqueue_event so legacy assertions on enqueue_event keep + # working. The wrapper preserves identity (the same Message object the + # executor passed in) so tests inspecting str(event_arg) still see the + # response text. complete()/failed() also record their last call on the + # event_queue itself (`_complete_calls`, `_failed_calls`) so the v1 + # contract regression test (#262 follow-on to #2558) can pin the proper + # path was taken — raw enqueue from executor would NOT touch these. tasks_mod = ModuleType("a2a.server.tasks") class TaskUpdater: - """Stub TaskUpdater — no-op async methods for unit tests.""" + """Stub TaskUpdater — terminal helpers route through event_queue.""" def __init__(self, event_queue, task_id, context_id, *args, **kwargs): self.event_queue = event_queue self.task_id = task_id self.context_id = context_id + if not hasattr(event_queue, "_complete_calls"): + event_queue._complete_calls = [] + if not hasattr(event_queue, "_failed_calls"): + event_queue._failed_calls = [] async def start_work(self, message=None): pass async def complete(self, message=None): - pass + self.event_queue._complete_calls.append(message) + if message is not None: + await self.event_queue.enqueue_event(message) async def failed(self, message=None): - pass + self.event_queue._failed_calls.append(message) + if message is not None: + await self.event_queue.enqueue_event(message) async def add_artifact( self, parts, artifact_id=None, name=None, metadata=None, diff --git a/workspace/tests/test_a2a_executor.py b/workspace/tests/test_a2a_executor.py index 134c56ba..1835092c 100644 --- a/workspace/tests/test_a2a_executor.py +++ b/workspace/tests/test_a2a_executor.py @@ -1123,3 +1123,81 @@ async def test_no_task_enqueue_on_continuation(): assert not isinstance(event, Task), ( f"continuation must not re-enqueue Task, but got Task at {call}" ) + + +# --------------------------------------------------------------------------- +# A2A v1 task-mode terminal-event contract (PR #2558 follow-up, task #262) +# --------------------------------------------------------------------------- +# After PR #2558 enqueues a Task at the start of new requests, the executor +# is in v1 "task mode". The SDK then rejects any subsequent raw Message +# enqueue with InvalidAgentResponseError("Received Message object in task +# mode. Use TaskStatusUpdateEvent or TaskArtifactUpdateEvent instead.") — +# see a2a/server/agent_execution/active_task.py validation site. Synth-E2E +# 2026-05-03T11:00:34Z surfaced this. The fix routes the terminal Message +# through TaskUpdater.complete()/failed() which wrap it in a +# TaskStatusUpdateEvent. Both tests below pin that path so the regression +# can't recur (raw enqueue at the terminal step would NOT touch +# event_queue._complete_calls / _failed_calls). + +@pytest.mark.asyncio +async def test_terminal_success_routes_via_updater_complete(): + """A successful run must terminate via updater.complete(message=...) — + raw event_queue.enqueue_event(Message) crashes the v1 SDK in task mode.""" + agent = MagicMock() + agent.astream_events = MagicMock(return_value=_stream(_text_chunk("Hello"))) + executor = LangGraphA2AExecutor(agent) + + part = MagicMock() + part.text = "Hi" + + context = _make_context([part], "ctx-term-ok", task_id="task-term-ok") + context.current_task = None # forces task-mode (Task gets enqueued) + eq = _make_event_queue() + # Pre-init real lists so the AsyncMock event_queue doesn't auto-spec + # _complete_calls/_failed_calls into child MagicMocks. The conftest + # TaskUpdater stub appends to these lists when complete/failed fire. + eq._complete_calls = [] + eq._failed_calls = [] + + await executor.execute(context, eq) + + assert eq._complete_calls, ( + "terminal Message must route via updater.complete() in task mode — " + "raw event_queue.enqueue_event(Message) is rejected by a2a-sdk v1" + ) + final_msg = eq._complete_calls[-1] + assert "Hello" in str(final_msg) + + +@pytest.mark.asyncio +async def test_terminal_error_routes_via_updater_failed(): + """An agent crash must terminate via updater.failed(message=...) — raw + enqueue in task mode hits the same v1 contract violation.""" + async def _error_stream(*args, **kwargs): + raise RuntimeError("model crashed") + yield # pragma: no cover — makes this an async generator + + agent = MagicMock() + agent.astream_events = MagicMock(return_value=_error_stream()) + executor = LangGraphA2AExecutor(agent) + + part = MagicMock() + part.text = "Break things" + + context = _make_context([part], "ctx-term-err", task_id="task-term-err") + context.current_task = None # forces task-mode + eq = _make_event_queue() + eq._complete_calls = [] + eq._failed_calls = [] + + await executor.execute(context, eq) + + assert eq._failed_calls, ( + "terminal error Message must route via updater.failed() in task mode" + ) + err_msg = eq._failed_calls[-1] + assert "model crashed" in str(err_msg) + # And complete() must NOT have been called on the failure path. + assert not eq._complete_calls, ( + "complete() should not fire when execute() raises" + ) From df7edfcd3fcfa3a542c7a6953d81a5b5ab448527 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sun, 3 May 2026 04:11:35 -0700 Subject: [PATCH 4/6] fix(canvas): wire ReactFlow colorMode to resolvedTheme MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #2555 (Tailwind v4 + warm-paper) migrated all canvas chrome (toolbar, side panel, modal layer) to semantic tokens, but missed the React Flow viewport's `colorMode="dark"` literal — and two paired hardcoded dark literals on the Background dot color and MiniMap mask. Net result on prod: the user picked light mode, the toolbar flipped warm-paper, but the canvas backplate, edges, dots, controls, and minimap stayed black — visibly half-themed. Three coordinated fixes inside the canvas viewport: - ReactFlow `colorMode={resolvedTheme}` so the library's own dark/light styles flip with the user's choice. - Background dot color picks the line-soft tone in light mode (zinc-800 was invisible-on-cream). - MiniMap maskColor warm-tints the off-viewport dim so the unselected region doesn't render as a hard black bar over warm-paper. Verification: - `npx tsc --noEmit` clean - `npx vitest run` 188/188 pass - (will browser-verify post-redeploy on hongming.moleculesai.app) Co-Authored-By: Claude Opus 4.7 (1M context) --- canvas/src/components/Canvas.tsx | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/canvas/src/components/Canvas.tsx b/canvas/src/components/Canvas.tsx index f677862a..ebd8a1d3 100644 --- a/canvas/src/components/Canvas.tsx +++ b/canvas/src/components/Canvas.tsx @@ -13,6 +13,7 @@ import { import "@xyflow/react/dist/style.css"; import { useCanvasStore } from "@/store/canvas"; +import { useTheme } from "@/lib/theme-provider"; import { A2ATopologyOverlay } from "./A2ATopologyOverlay"; import { WorkspaceNode } from "./WorkspaceNode"; import { SidePanel } from "./SidePanel"; @@ -69,6 +70,14 @@ export function Canvas() { } function CanvasInner() { + // ReactFlow's `colorMode` prop drives the styling of every viewport + // primitive it renders directly (background dots, edge defaults, + // selection rings, controls, minimap mask). Pre-fix this was hard-pinned + // to "dark" — so on light theme the chrome (toolbar, side panel) flipped + // to warm-paper but the canvas backplate + edges stayed black, leaving a + // half-themed page. Pull resolvedTheme so the canvas matches the user's + // selected mode (and the system preference when they pick "system"). + const { resolvedTheme } = useTheme(); const rawNodes = useCanvasStore((s) => s.nodes); const edges = useCanvasStore((s) => s.edges); const a2aEdges = useCanvasStore((s) => s.a2aEdges); @@ -250,7 +259,7 @@ function CanvasInner() {
{ // Parents show as a filled region — hierarchy visible at // a glance in the minimap without needing to zoom. From 596e797dca895789e0dc7f5cab305f4782fc23c0 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sun, 3 May 2026 04:28:29 -0700 Subject: [PATCH 5/6] ci(deploy): broaden ephemeral-prefix matchers to cover rt-e2e-* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The redeploy-tenants-on-staging soft-warn filter and the sweep-stale-e2e-orgs janitor both hardcoded `^e2e-` to identify ephemeral test tenants. Runtime-test harness fixtures (RFC #2251) mint slugs prefixed with `rt-e2e-`, which neither matcher recognized. Concrete impact observed today: - Two `rt-e2e-v{5,6}-*` tenants left orphaned 8h on staging (sweep-stale-e2e-orgs ignored them). - On the next staging redeploy their phantom EC2s returned `InvalidInstanceId: Instances not in a valid state for account` from SSM SendCommand → CP returned HTTP 500 + ok=false. - The redeploy soft-warn missed them too, so the workflow went red, which broke the auto-promote-staging chain feeding the canvas warm-paper rollout to prod. Fix: switch both matchers to recognize the alternation `^(e2e-|rt-e2e-)`. Long-lived prefixes (demo-prep, dryrun-*, dryrun2-*) remain non-ephemeral and continue to hard-fail. Comment documents the source-of-truth list and the cross-file invariant. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../workflows/redeploy-tenants-on-staging.yml | 40 +++++++++++-------- .github/workflows/sweep-stale-e2e-orgs.yml | 14 +++++-- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/.github/workflows/redeploy-tenants-on-staging.yml b/.github/workflows/redeploy-tenants-on-staging.yml index caaeb56e..97392172 100644 --- a/.github/workflows/redeploy-tenants-on-staging.yml +++ b/.github/workflows/redeploy-tenants-on-staging.yml @@ -176,35 +176,41 @@ jobs: # # CP returns HTTP 500 + ok=false whenever ANY tenant in the # fleet failed SSM or healthz. In practice the recurring source - # of these is ephemeral e2e-* tenants (saas/canvas/ext) being - # torn down by their parent E2E run mid-redeploy: the EC2 dies → - # SSM exit=2 or healthz timeout → CP marks the fleet failed → - # this workflow goes red even though every operator-facing - # tenant rolled fine. + # of these is ephemeral test tenants being torn down by their + # parent E2E run mid-redeploy: the EC2 dies → SSM exit=2 or + # healthz timeout → CP marks the fleet failed → this workflow + # goes red even though every operator-facing tenant rolled fine. # - # Filter: if HTTP=500/ok=false AND every failed slug matches - # ^e2e-, treat as soft-warn and let the verify step downstream - # handle the unreachable-vs-stale distinction (it already knows - # the difference per #2402). Any non-e2e-* failure or a non-500 - # HTTP response remains a hard failure. + # Ephemeral slug prefixes (kept in sync with sweep-stale-e2e-orgs.yml + # — see that file for the source-of-truth list and rationale): + # - e2e-* — canvas/saas/ext E2E suites + # - rt-e2e-* — runtime-test harness fixtures (RFC #2251) + # Long-lived prefixes that are NOT ephemeral and MUST hard-fail: + # demo-prep, dryrun-*, dryrun2-*, plus all human tenant slugs. + # + # Filter: if HTTP=500/ok=false AND every failed slug matches an + # ephemeral prefix, treat as soft-warn and let the verify step + # downstream handle unreachable-vs-stale (#2402). Any non-ephemeral + # failure or a non-500 HTTP response remains a hard failure. OK=$(jq -r '.ok // "false"' "$HTTP_RESPONSE") FAILED_SLUGS=$(jq -r ' .results[]? | select((.healthz_ok != true) or (.ssm_status != "Success")) | .slug' "$HTTP_RESPONSE" 2>/dev/null || true) - NON_E2E_FAILED=$(printf '%s\n' "$FAILED_SLUGS" | grep -v '^$' | grep -v '^e2e-' || true) + EPHEMERAL_PREFIX_RE='^(e2e-|rt-e2e-)' + NON_EPHEMERAL_FAILED=$(printf '%s\n' "$FAILED_SLUGS" | grep -v '^$' | grep -Ev "$EPHEMERAL_PREFIX_RE" || true) if [ "$HTTP_CODE" = "200" ] && [ "$OK" = "true" ]; then : # happy path — fall through to verification - elif [ "$HTTP_CODE" = "500" ] && [ -z "$NON_E2E_FAILED" ] && [ -n "$FAILED_SLUGS" ]; then - COUNT=$(printf '%s\n' "$FAILED_SLUGS" | grep -c '^e2e-' || true) - echo "::warning::redeploy-fleet returned HTTP 500 but every failed tenant ($COUNT) is e2e-* ephemeral — treating as teardown race, soft-warning." + elif [ "$HTTP_CODE" = "500" ] && [ -z "$NON_EPHEMERAL_FAILED" ] && [ -n "$FAILED_SLUGS" ]; then + COUNT=$(printf '%s\n' "$FAILED_SLUGS" | grep -Ec "$EPHEMERAL_PREFIX_RE" || true) + echo "::warning::redeploy-fleet returned HTTP 500 but every failed tenant ($COUNT) is ephemeral (e2e-*/rt-e2e-*) — treating as teardown race, soft-warning." printf '%s\n' "$FAILED_SLUGS" | sed 's/^/::warning:: failed: /' elif [ "$HTTP_CODE" != "200" ]; then echo "::error::redeploy-fleet returned HTTP $HTTP_CODE" - if [ -n "$NON_E2E_FAILED" ]; then - echo "::error::non-e2e tenant(s) failed:" - printf '%s\n' "$NON_E2E_FAILED" | sed 's/^/::error:: /' + if [ -n "$NON_EPHEMERAL_FAILED" ]; then + echo "::error::non-ephemeral tenant(s) failed:" + printf '%s\n' "$NON_EPHEMERAL_FAILED" | sed 's/^/::error:: /' fi exit 1 else diff --git a/.github/workflows/sweep-stale-e2e-orgs.yml b/.github/workflows/sweep-stale-e2e-orgs.yml index 6913cba2..5a0dce30 100644 --- a/.github/workflows/sweep-stale-e2e-orgs.yml +++ b/.github/workflows/sweep-stale-e2e-orgs.yml @@ -87,20 +87,28 @@ jobs: > orgs.json # Filter: - # 1. slug starts with 'e2e-' (covers e2e-, e2e-canary-, - # e2e-canvas-* — all variants the test scripts mint) + # 1. slug starts with one of the ephemeral test prefixes: + # - 'e2e-' — covers e2e-canary-, e2e-canvas-*, etc. + # - 'rt-e2e-' — runtime-test harness fixtures (RFC #2251); + # missing this prefix left two such tenants + # orphaned 8h on staging (2026-05-03), then + # hard-failed redeploy-tenants-on-staging + # and broke the staging→main auto-promote + # chain. Kept in sync with the EPHEMERAL_PREFIX_RE + # regex in redeploy-tenants-on-staging.yml. # 2. created_at is older than MAX_AGE_MINUTES ago # Output one slug per line to a file the next step reads. python3 > stale_slugs.txt <<'PY' import json, os from datetime import datetime, timezone, timedelta + EPHEMERAL_PREFIXES = ("e2e-", "rt-e2e-") with open("orgs.json") as f: data = json.load(f) max_age = int(os.environ["MAX_AGE_MINUTES"]) cutoff = datetime.now(timezone.utc) - timedelta(minutes=max_age) for o in data.get("orgs", []): slug = o.get("slug", "") - if not slug.startswith("e2e-"): + if not slug.startswith(EPHEMERAL_PREFIXES): continue created = o.get("created_at") if not created: From 5e46ea70d639b194e2e29d30e9b340dcf0493396 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sun, 3 May 2026 04:43:07 -0700 Subject: [PATCH 6/6] ci(synth-e2e): wire MOLECULE_STAGING_OPENAI_KEY into provisioned tenant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The synth-E2E (#2342) provisions a langgraph tenant whose default model `openai:gpt-4.1-mini` requires OPENAI_API_KEY for the first LLM call. Sibling workflows already wire this: - e2e-staging-saas.yml:89 - canary-staging.yml:63 continuous-synth-e2e.yml just forgot. Result: tenant boots, accepts a2a messages, then returns: Agent error: "Could not resolve authentication method. Expected either api_key or auth_token to be set." This was masked since 2026-04-29 (workflow creation) by a2a-sdk v0→v1 contract violations — PR #2558 (Task-enqueue) and #2563 (TaskUpdater.complete/failed terminal events) cleared those, exposing the underlying auth gap on the synth-E2E firing at 11:39 UTC today. The script tests/e2e/test_staging_full_saas.sh:325 already reads E2E_OPENAI_API_KEY and persists it as a workspace_secret on tenant create — only the workflow wiring was missing. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/continuous-synth-e2e.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/continuous-synth-e2e.yml b/.github/workflows/continuous-synth-e2e.yml index ba5f80ce..c6c482b8 100644 --- a/.github/workflows/continuous-synth-e2e.yml +++ b/.github/workflows/continuous-synth-e2e.yml @@ -88,6 +88,15 @@ jobs: E2E_KEEP_ORG: ${{ github.event.inputs.keep_org == 'true' && '1' || '' }} MOLECULE_CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }} MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }} + # Provisioned tenant's default model (langgraph: openai:gpt-4.1-mini) + # needs OPENAI_API_KEY at first call. Sibling workflows + # e2e-staging-saas.yml + canary-staging.yml use the same secret; + # without this wire-up the tenant boots, accepts a2a messages, + # then returns "Could not resolve authentication method" — masked + # earlier by the a2a-sdk task-mode contract bugs PR #2558+#2563 + # fixed. tests/e2e/test_staging_full_saas.sh:325 reads this and + # persists it as a workspace_secret on tenant create. + E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_KEY }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2