feat(wheel-smoke): exercise executor.execute() to catch lazy imports (#2275)

The existing wheel-publish smoke (`wheel_smoke.py`) only IMPORTS
`molecule_runtime.main` at module scope. Lazy imports buried inside
`async def execute(...)` bodies (e.g. `from a2a.types import FilePart`)
NEVER evaluate at static-import time — they crash at first message
delivery in production.

The 2026-04-2x v0→v1 a2a-sdk migration shipped 5 such regressions in
templates that all looked fine at module-load smoke. This change adds
`smoke_mode.py` plus a `MOLECULE_SMOKE_MODE=1` short-circuit in
`main.py`: after `adapter.create_executor(...)`, the boot path invokes
`executor.execute(stub_ctx, stub_queue)` once with a 5s timeout
(`MOLECULE_SMOKE_TIMEOUT_SECS`). Healthy import tree → execution
proceeds far enough to hit a network boundary and times out (exit 0).
Broken lazy import → `ImportError` / `ModuleNotFoundError` from inside
the executor body (exit 1). Other downstream errors (auth, validation)
pass — those are caught by adapter-level tests, not this gate.

Stub `(RequestContext, EventQueue)` is built from the real a2a-sdk so
SendMessageRequest/RequestContext constructor changes also surface as
import-tree failures (the regression class also includes "SDK
refactored mid-publish"). The stub-build itself is wrapped — if it
raises, that's a smoke fail too.

Phase 2 (separate PR, molecule-ci) wires this into
publish-template-image.yml so the publish gate runs the boot smoke
against every template image before pushing the tag.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-04-30 21:21:18 -07:00
parent f46c471f9b
commit aacaba024c
4 changed files with 345 additions and 0 deletions

View File

@ -78,6 +78,7 @@ TOP_LEVEL_MODULES = {
"prompt",
"runtime_wedge",
"shared_runtime",
"smoke_mode",
"transcript_auth",
"watcher",
}

View File

@ -136,6 +136,20 @@ async def main(): # pragma: no cover
await adapter.setup(adapter_config)
executor = await adapter.create_executor(adapter_config)
# 5a. Boot-smoke short-circuit (issue #2275): if MOLECULE_SMOKE_MODE
# is set, exercise the executor's full import tree by calling
# execute() once with stub deps + a short timeout. Skips platform
# registration + uvicorn entirely. Returns process exit code.
from smoke_mode import is_smoke_mode, run_executor_smoke
if is_smoke_mode():
exit_code = await run_executor_smoke(executor)
if hasattr(heartbeat, "stop"):
try:
await heartbeat.stop()
except Exception: # noqa: BLE001
pass
raise SystemExit(exit_code)
# 5b. Restore from pre-stop snapshot if one exists (GH#1391).
# The snapshot is scrubbed before being written, so secrets are
# already redacted — restore_state must not re-expose them.

140
workspace/smoke_mode.py Normal file
View File

@ -0,0 +1,140 @@
"""Boot smoke mode — exercises the executor's full import tree without touching real platforms.
Why this exists (issue #2275): the existing `wheel_smoke.py` only IMPORTS
`molecule_runtime.main` at module scope. Lazy imports buried inside
`async def execute(...)` bodies (e.g. `from a2a.types import FilePart`)
NEVER evaluate at static-import time they crash at first message
delivery in production.
The 2026-04-2x v0v1 a2a-sdk migration shipped 5 such regressions in
templates that all looked fine at module-load smoke. This module fills
the gap by actually invoking `executor.execute(stub_ctx, stub_queue)`
once with a short timeout. If the import-tree is healthy the call
proceeds far enough to hit a network boundary (LLM call, etc.) and
times out that's a *pass*. If a lazy import is broken, the call
raises `ImportError` / `ModuleNotFoundError` from inside the executor
body that's a *fail*.
Activated by setting `MOLECULE_SMOKE_MODE=1` in the env. Wired into
`main.py` after `executor = await adapter.create_executor(...)` so the
full adapter setup path runs first; the smoke just adds one more
exercise step before exit.
CI usage (intended for `molecule-ci/.github/workflows/publish-template-image.yml`):
docker run --rm \
-e WORKSPACE_ID=fake -e MOLECULE_SMOKE_MODE=1 \
"$IMAGE" molecule-runtime
"""
from __future__ import annotations
import asyncio
import logging
import os
import sys
from typing import Any
logger = logging.getLogger(__name__)
_SMOKE_TIMEOUT_SECS = float(os.environ.get("MOLECULE_SMOKE_TIMEOUT_SECS", "5.0"))
def is_smoke_mode() -> bool:
"""True iff MOLECULE_SMOKE_MODE is set to a truthy value.
Recognises the standard truthy strings (`1`, `true`, `yes`,
case-insensitive). An unset / empty / `0` env reads as False so
the boot path takes the normal branch in production.
"""
raw = os.environ.get("MOLECULE_SMOKE_MODE", "").strip().lower()
return raw in ("1", "true", "yes", "on")
def _build_stub_context() -> tuple[Any, Any]:
"""Build a (RequestContext, EventQueue) pair stuffed with a minimal
text message ("smoke test"). The Message is enough that
`extract_message_text(context)` returns non-empty input, so the
executor takes the "real" branch (not the empty-input early-exit)
and exercises any lazy imports along that path.
Imports happen at function scope so smoke_mode.py itself doesn't
pull a2a-sdk into every consumer of the runtime the wheel still
boots without smoke mode active.
"""
from a2a.helpers import new_text_message
from a2a.server.agent_execution import RequestContext
from a2a.server.context import ServerCallContext
from a2a.server.events import EventQueue
from a2a.types import SendMessageRequest
message = new_text_message("smoke test")
call_ctx = ServerCallContext()
request = SendMessageRequest(message=message)
context = RequestContext(call_ctx, request=request)
queue = EventQueue()
return context, queue
async def run_executor_smoke(executor: Any) -> int:
"""Invoke executor.execute() once with stub deps. Return an exit code.
Returns:
0 import tree healthy. Either execution timed out (the
expected outcome we hit a network boundary like an LLM
call) or completed cleanly. Either way, no broken imports.
1 broken lazy import detected. Re-raised as a clear log line
so the publish gate's stderr captures the offending symbol.
The 5-second timeout comes from `MOLECULE_SMOKE_TIMEOUT_SECS` env
(default 5.0). Bump it via env if a slow adapter setup overlaps the
first execute call. Don't make it too long — the publish workflow
multiplies this across N templates.
"""
print(
f"[smoke-mode] invoking executor.execute(stub_ctx, stub_queue) "
f"with {_SMOKE_TIMEOUT_SECS:.1f}s timeout to exercise lazy imports"
)
try:
context, queue = _build_stub_context()
except Exception as build_err: # noqa: BLE001
# If we can't even build the stub, the a2a-sdk import path is
# broken — that's exactly the regression class this gate exists
# for. Treat as a smoke failure.
print(
f"[smoke-mode] FAIL: stub-context build raised "
f"{type(build_err).__name__}: {build_err}",
file=sys.stderr,
)
return 1
try:
await asyncio.wait_for(
executor.execute(context, queue),
timeout=_SMOKE_TIMEOUT_SECS,
)
except (asyncio.TimeoutError, asyncio.CancelledError):
# Timeout = imports healthy, execution was proceeding and hit
# a network boundary or long await. Pass.
print("[smoke-mode] PASS: timed out past import-tree (imports healthy)")
return 0
except (ImportError, ModuleNotFoundError) as imp_err:
# The exact regression class issue #2275 exists to catch.
print(
f"[smoke-mode] FAIL: lazy import broken in execute(): "
f"{type(imp_err).__name__}: {imp_err}",
file=sys.stderr,
)
return 1
except Exception as other_err: # noqa: BLE001
# Anything else (auth errors, validation errors, runtime bugs)
# is downstream of the import gate. Pass — these are caught by
# the relevant adapter-level tests, not by this smoke.
print(
f"[smoke-mode] PASS: execute() raised "
f"{type(other_err).__name__} past import-tree (not an import error)"
)
return 0
else:
print("[smoke-mode] PASS: execute() completed within timeout (imports + body OK)")
return 0

View File

@ -0,0 +1,190 @@
"""Tests for smoke_mode — the executor-stub boot smoke (issue #2275).
These tests exercise the helper module directly. The end-to-end path
(main.py invoking run_executor_smoke + sys.exit) is not unit-tested
here because main() is `# pragma: no cover` and integration-shaped;
that path is covered by the publish-template-image.yml smoke step
(which is the production gate this helper exists for).
Note on a2a-sdk: conftest.py stubs out a2a.* modules with minimal
shims that don't include `a2a.server.context.ServerCallContext` or
`a2a.types.SendMessageRequest` (the real-SDK-only symbols
_build_stub_context needs). Tests that want to verify the
`run_executor_smoke` control flow patch _build_stub_context to
sidestep the real construction; tests that NEED the real SDK
construction skip when those symbols aren't reachable.
"""
from __future__ import annotations
import asyncio
from unittest.mock import patch
import pytest
import smoke_mode
def _real_a2a_sdk_available() -> bool:
"""True when the real a2a-sdk types needed by _build_stub_context
are importable. The conftest's a2a stubs intentionally don't
include these they're only present in the published wheel's
runtime env or when a2a-sdk is installed alongside the test."""
try:
from a2a.server.context import ServerCallContext # noqa: F401
from a2a.types import SendMessageRequest # noqa: F401
return True
except (ImportError, AttributeError):
return False
# ─── is_smoke_mode ─────────────────────────────────────────────────────
@pytest.mark.parametrize("env_value", ["1", "true", "yes", "on", "TRUE", "Yes", "ON"])
def test_is_smoke_mode_truthy_values(env_value: str, monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("MOLECULE_SMOKE_MODE", env_value)
assert smoke_mode.is_smoke_mode() is True
@pytest.mark.parametrize("env_value", ["0", "false", "no", "off", "", " "])
def test_is_smoke_mode_falsy_values(env_value: str, monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("MOLECULE_SMOKE_MODE", env_value)
assert smoke_mode.is_smoke_mode() is False
def test_is_smoke_mode_unset(monkeypatch: pytest.MonkeyPatch):
monkeypatch.delenv("MOLECULE_SMOKE_MODE", raising=False)
assert smoke_mode.is_smoke_mode() is False
# ─── _build_stub_context (real-SDK-only) ───────────────────────────────
@pytest.mark.skipif(
not _real_a2a_sdk_available(),
reason="conftest stubs a2a.* without ServerCallContext / SendMessageRequest; real SDK only",
)
def test_build_stub_context_returns_request_context_with_message():
"""Stub must produce a RequestContext that has a non-empty message
payload otherwise extract_message_text returns empty and the
executor takes the early-exit branch instead of exercising the
full import tree."""
context, _queue = smoke_mode._build_stub_context()
assert context.message is not None
parts = context.message.parts
assert len(parts) == 1
assert parts[0].text == "smoke test"
@pytest.mark.skipif(
not _real_a2a_sdk_available(),
reason="conftest stubs a2a.* without ServerCallContext / SendMessageRequest; real SDK only",
)
def test_build_stub_context_returns_event_queue():
from a2a.server.events import EventQueue
_, queue = smoke_mode._build_stub_context()
assert isinstance(queue, EventQueue)
# ─── run_executor_smoke — control flow with stubbed context ────────────
#
# These tests patch _build_stub_context to return sentinel objects, so
# they don't depend on the real a2a-sdk being present. The executor
# stubs ignore ctx + queue.
class _RaisingExecutor:
def __init__(self, exc: Exception):
self._exc = exc
async def execute(self, context, event_queue) -> None: # noqa: ARG002
raise self._exc
class _BlockingExecutor:
"""Simulates an LLM network call that the smoke timeout cuts short."""
async def execute(self, context, event_queue) -> None: # noqa: ARG002
await asyncio.Event().wait()
class _CleanExecutor:
async def execute(self, context, event_queue) -> None: # noqa: ARG002
return None
@pytest.fixture
def stub_build():
"""Replace _build_stub_context with a no-op so execute() gets
sentinel ctx/queue. Tests can override this fixture's behavior
via monkeypatch when they need a different shape."""
sentinel_ctx = object()
sentinel_queue = object()
with patch.object(
smoke_mode, "_build_stub_context",
lambda: (sentinel_ctx, sentinel_queue),
):
yield
@pytest.mark.asyncio
async def test_smoke_passes_on_timeout(stub_build, monkeypatch: pytest.MonkeyPatch):
monkeypatch.setattr(smoke_mode, "_SMOKE_TIMEOUT_SECS", 0.1)
code = await smoke_mode.run_executor_smoke(_BlockingExecutor())
assert code == 0
@pytest.mark.asyncio
async def test_smoke_passes_on_clean_return(stub_build):
code = await smoke_mode.run_executor_smoke(_CleanExecutor())
assert code == 0
@pytest.mark.asyncio
async def test_smoke_fails_on_import_error(stub_build):
"""The exact regression class issue #2275 exists to catch — a lazy
import inside execute() that the static smoke missed."""
code = await smoke_mode.run_executor_smoke(
_RaisingExecutor(ImportError("cannot import name 'FilePart' from 'a2a.types'"))
)
assert code == 1
@pytest.mark.asyncio
async def test_smoke_fails_on_module_not_found_error(stub_build):
code = await smoke_mode.run_executor_smoke(
_RaisingExecutor(ModuleNotFoundError("No module named 'temporalio'"))
)
assert code == 1
@pytest.mark.asyncio
async def test_smoke_passes_on_non_import_runtime_error(stub_build):
"""Auth errors, validation errors, anything-not-an-import-error
pass those are caught by adapter-level tests, not by this gate."""
code = await smoke_mode.run_executor_smoke(
_RaisingExecutor(RuntimeError("ANTHROPIC_API_KEY missing"))
)
assert code == 0
@pytest.mark.asyncio
async def test_smoke_passes_on_value_error(stub_build):
code = await smoke_mode.run_executor_smoke(
_RaisingExecutor(ValueError("bad config"))
)
assert code == 0
@pytest.mark.asyncio
async def test_smoke_fails_when_stub_context_build_breaks(monkeypatch: pytest.MonkeyPatch):
"""If a2a-sdk's own SendMessageRequest / RequestContext can't be
constructed (e.g. SDK migration broke the constructor), that's
exactly the regression class this gate exists for fail loud."""
def _fail_build():
raise ImportError("simulated: a2a.types refactored mid-publish")
monkeypatch.setattr(smoke_mode, "_build_stub_context", _fail_build)
code = await smoke_mode.run_executor_smoke(_CleanExecutor())
assert code == 1