feat(wheel-smoke): exercise executor.execute() to catch lazy imports (#2275)
The existing wheel-publish smoke (`wheel_smoke.py`) only IMPORTS `molecule_runtime.main` at module scope. Lazy imports buried inside `async def execute(...)` bodies (e.g. `from a2a.types import FilePart`) NEVER evaluate at static-import time — they crash at first message delivery in production. The 2026-04-2x v0→v1 a2a-sdk migration shipped 5 such regressions in templates that all looked fine at module-load smoke. This change adds `smoke_mode.py` plus a `MOLECULE_SMOKE_MODE=1` short-circuit in `main.py`: after `adapter.create_executor(...)`, the boot path invokes `executor.execute(stub_ctx, stub_queue)` once with a 5s timeout (`MOLECULE_SMOKE_TIMEOUT_SECS`). Healthy import tree → execution proceeds far enough to hit a network boundary and times out (exit 0). Broken lazy import → `ImportError` / `ModuleNotFoundError` from inside the executor body (exit 1). Other downstream errors (auth, validation) pass — those are caught by adapter-level tests, not this gate. Stub `(RequestContext, EventQueue)` is built from the real a2a-sdk so SendMessageRequest/RequestContext constructor changes also surface as import-tree failures (the regression class also includes "SDK refactored mid-publish"). The stub-build itself is wrapped — if it raises, that's a smoke fail too. Phase 2 (separate PR, molecule-ci) wires this into publish-template-image.yml so the publish gate runs the boot smoke against every template image before pushing the tag. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f46c471f9b
commit
aacaba024c
@ -78,6 +78,7 @@ TOP_LEVEL_MODULES = {
|
||||
"prompt",
|
||||
"runtime_wedge",
|
||||
"shared_runtime",
|
||||
"smoke_mode",
|
||||
"transcript_auth",
|
||||
"watcher",
|
||||
}
|
||||
|
||||
@ -136,6 +136,20 @@ async def main(): # pragma: no cover
|
||||
await adapter.setup(adapter_config)
|
||||
executor = await adapter.create_executor(adapter_config)
|
||||
|
||||
# 5a. Boot-smoke short-circuit (issue #2275): if MOLECULE_SMOKE_MODE
|
||||
# is set, exercise the executor's full import tree by calling
|
||||
# execute() once with stub deps + a short timeout. Skips platform
|
||||
# registration + uvicorn entirely. Returns process exit code.
|
||||
from smoke_mode import is_smoke_mode, run_executor_smoke
|
||||
if is_smoke_mode():
|
||||
exit_code = await run_executor_smoke(executor)
|
||||
if hasattr(heartbeat, "stop"):
|
||||
try:
|
||||
await heartbeat.stop()
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
raise SystemExit(exit_code)
|
||||
|
||||
# 5b. Restore from pre-stop snapshot if one exists (GH#1391).
|
||||
# The snapshot is scrubbed before being written, so secrets are
|
||||
# already redacted — restore_state must not re-expose them.
|
||||
|
||||
140
workspace/smoke_mode.py
Normal file
140
workspace/smoke_mode.py
Normal file
@ -0,0 +1,140 @@
|
||||
"""Boot smoke mode — exercises the executor's full import tree without touching real platforms.
|
||||
|
||||
Why this exists (issue #2275): the existing `wheel_smoke.py` only IMPORTS
|
||||
`molecule_runtime.main` at module scope. Lazy imports buried inside
|
||||
`async def execute(...)` bodies (e.g. `from a2a.types import FilePart`)
|
||||
NEVER evaluate at static-import time — they crash at first message
|
||||
delivery in production.
|
||||
|
||||
The 2026-04-2x v0→v1 a2a-sdk migration shipped 5 such regressions in
|
||||
templates that all looked fine at module-load smoke. This module fills
|
||||
the gap by actually invoking `executor.execute(stub_ctx, stub_queue)`
|
||||
once with a short timeout. If the import-tree is healthy the call
|
||||
proceeds far enough to hit a network boundary (LLM call, etc.) and
|
||||
times out — that's a *pass*. If a lazy import is broken, the call
|
||||
raises `ImportError` / `ModuleNotFoundError` from inside the executor
|
||||
body — that's a *fail*.
|
||||
|
||||
Activated by setting `MOLECULE_SMOKE_MODE=1` in the env. Wired into
|
||||
`main.py` after `executor = await adapter.create_executor(...)` so the
|
||||
full adapter setup path runs first; the smoke just adds one more
|
||||
exercise step before exit.
|
||||
|
||||
CI usage (intended for `molecule-ci/.github/workflows/publish-template-image.yml`):
|
||||
docker run --rm \
|
||||
-e WORKSPACE_ID=fake -e MOLECULE_SMOKE_MODE=1 \
|
||||
"$IMAGE" molecule-runtime
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_SMOKE_TIMEOUT_SECS = float(os.environ.get("MOLECULE_SMOKE_TIMEOUT_SECS", "5.0"))
|
||||
|
||||
|
||||
def is_smoke_mode() -> bool:
|
||||
"""True iff MOLECULE_SMOKE_MODE is set to a truthy value.
|
||||
|
||||
Recognises the standard truthy strings (`1`, `true`, `yes`,
|
||||
case-insensitive). An unset / empty / `0` env reads as False so
|
||||
the boot path takes the normal branch in production.
|
||||
"""
|
||||
raw = os.environ.get("MOLECULE_SMOKE_MODE", "").strip().lower()
|
||||
return raw in ("1", "true", "yes", "on")
|
||||
|
||||
|
||||
def _build_stub_context() -> tuple[Any, Any]:
|
||||
"""Build a (RequestContext, EventQueue) pair stuffed with a minimal
|
||||
text message ("smoke test"). The Message is enough that
|
||||
`extract_message_text(context)` returns non-empty input, so the
|
||||
executor takes the "real" branch (not the empty-input early-exit)
|
||||
and exercises any lazy imports along that path.
|
||||
|
||||
Imports happen at function scope so smoke_mode.py itself doesn't
|
||||
pull a2a-sdk into every consumer of the runtime — the wheel still
|
||||
boots without smoke mode active.
|
||||
"""
|
||||
from a2a.helpers import new_text_message
|
||||
from a2a.server.agent_execution import RequestContext
|
||||
from a2a.server.context import ServerCallContext
|
||||
from a2a.server.events import EventQueue
|
||||
from a2a.types import SendMessageRequest
|
||||
|
||||
message = new_text_message("smoke test")
|
||||
call_ctx = ServerCallContext()
|
||||
request = SendMessageRequest(message=message)
|
||||
context = RequestContext(call_ctx, request=request)
|
||||
queue = EventQueue()
|
||||
return context, queue
|
||||
|
||||
|
||||
async def run_executor_smoke(executor: Any) -> int:
|
||||
"""Invoke executor.execute() once with stub deps. Return an exit code.
|
||||
|
||||
Returns:
|
||||
0 — import tree healthy. Either execution timed out (the
|
||||
expected outcome — we hit a network boundary like an LLM
|
||||
call) or completed cleanly. Either way, no broken imports.
|
||||
1 — broken lazy import detected. Re-raised as a clear log line
|
||||
so the publish gate's stderr captures the offending symbol.
|
||||
|
||||
The 5-second timeout comes from `MOLECULE_SMOKE_TIMEOUT_SECS` env
|
||||
(default 5.0). Bump it via env if a slow adapter setup overlaps the
|
||||
first execute call. Don't make it too long — the publish workflow
|
||||
multiplies this across N templates.
|
||||
"""
|
||||
print(
|
||||
f"[smoke-mode] invoking executor.execute(stub_ctx, stub_queue) "
|
||||
f"with {_SMOKE_TIMEOUT_SECS:.1f}s timeout to exercise lazy imports"
|
||||
)
|
||||
|
||||
try:
|
||||
context, queue = _build_stub_context()
|
||||
except Exception as build_err: # noqa: BLE001
|
||||
# If we can't even build the stub, the a2a-sdk import path is
|
||||
# broken — that's exactly the regression class this gate exists
|
||||
# for. Treat as a smoke failure.
|
||||
print(
|
||||
f"[smoke-mode] FAIL: stub-context build raised "
|
||||
f"{type(build_err).__name__}: {build_err}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
executor.execute(context, queue),
|
||||
timeout=_SMOKE_TIMEOUT_SECS,
|
||||
)
|
||||
except (asyncio.TimeoutError, asyncio.CancelledError):
|
||||
# Timeout = imports healthy, execution was proceeding and hit
|
||||
# a network boundary or long await. Pass.
|
||||
print("[smoke-mode] PASS: timed out past import-tree (imports healthy)")
|
||||
return 0
|
||||
except (ImportError, ModuleNotFoundError) as imp_err:
|
||||
# The exact regression class issue #2275 exists to catch.
|
||||
print(
|
||||
f"[smoke-mode] FAIL: lazy import broken in execute(): "
|
||||
f"{type(imp_err).__name__}: {imp_err}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
except Exception as other_err: # noqa: BLE001
|
||||
# Anything else (auth errors, validation errors, runtime bugs)
|
||||
# is downstream of the import gate. Pass — these are caught by
|
||||
# the relevant adapter-level tests, not by this smoke.
|
||||
print(
|
||||
f"[smoke-mode] PASS: execute() raised "
|
||||
f"{type(other_err).__name__} past import-tree (not an import error)"
|
||||
)
|
||||
return 0
|
||||
else:
|
||||
print("[smoke-mode] PASS: execute() completed within timeout (imports + body OK)")
|
||||
return 0
|
||||
190
workspace/tests/test_smoke_mode.py
Normal file
190
workspace/tests/test_smoke_mode.py
Normal file
@ -0,0 +1,190 @@
|
||||
"""Tests for smoke_mode — the executor-stub boot smoke (issue #2275).
|
||||
|
||||
These tests exercise the helper module directly. The end-to-end path
|
||||
(main.py invoking run_executor_smoke + sys.exit) is not unit-tested
|
||||
here because main() is `# pragma: no cover` and integration-shaped;
|
||||
that path is covered by the publish-template-image.yml smoke step
|
||||
(which is the production gate this helper exists for).
|
||||
|
||||
Note on a2a-sdk: conftest.py stubs out a2a.* modules with minimal
|
||||
shims that don't include `a2a.server.context.ServerCallContext` or
|
||||
`a2a.types.SendMessageRequest` (the real-SDK-only symbols
|
||||
_build_stub_context needs). Tests that want to verify the
|
||||
`run_executor_smoke` control flow patch _build_stub_context to
|
||||
sidestep the real construction; tests that NEED the real SDK
|
||||
construction skip when those symbols aren't reachable.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
import smoke_mode
|
||||
|
||||
|
||||
def _real_a2a_sdk_available() -> bool:
|
||||
"""True when the real a2a-sdk types needed by _build_stub_context
|
||||
are importable. The conftest's a2a stubs intentionally don't
|
||||
include these — they're only present in the published wheel's
|
||||
runtime env or when a2a-sdk is installed alongside the test."""
|
||||
try:
|
||||
from a2a.server.context import ServerCallContext # noqa: F401
|
||||
from a2a.types import SendMessageRequest # noqa: F401
|
||||
return True
|
||||
except (ImportError, AttributeError):
|
||||
return False
|
||||
|
||||
|
||||
# ─── is_smoke_mode ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.parametrize("env_value", ["1", "true", "yes", "on", "TRUE", "Yes", "ON"])
|
||||
def test_is_smoke_mode_truthy_values(env_value: str, monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("MOLECULE_SMOKE_MODE", env_value)
|
||||
assert smoke_mode.is_smoke_mode() is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize("env_value", ["0", "false", "no", "off", "", " "])
|
||||
def test_is_smoke_mode_falsy_values(env_value: str, monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("MOLECULE_SMOKE_MODE", env_value)
|
||||
assert smoke_mode.is_smoke_mode() is False
|
||||
|
||||
|
||||
def test_is_smoke_mode_unset(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.delenv("MOLECULE_SMOKE_MODE", raising=False)
|
||||
assert smoke_mode.is_smoke_mode() is False
|
||||
|
||||
|
||||
# ─── _build_stub_context (real-SDK-only) ───────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _real_a2a_sdk_available(),
|
||||
reason="conftest stubs a2a.* without ServerCallContext / SendMessageRequest; real SDK only",
|
||||
)
|
||||
def test_build_stub_context_returns_request_context_with_message():
|
||||
"""Stub must produce a RequestContext that has a non-empty message
|
||||
payload — otherwise extract_message_text returns empty and the
|
||||
executor takes the early-exit branch instead of exercising the
|
||||
full import tree."""
|
||||
context, _queue = smoke_mode._build_stub_context()
|
||||
assert context.message is not None
|
||||
parts = context.message.parts
|
||||
assert len(parts) == 1
|
||||
assert parts[0].text == "smoke test"
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _real_a2a_sdk_available(),
|
||||
reason="conftest stubs a2a.* without ServerCallContext / SendMessageRequest; real SDK only",
|
||||
)
|
||||
def test_build_stub_context_returns_event_queue():
|
||||
from a2a.server.events import EventQueue
|
||||
_, queue = smoke_mode._build_stub_context()
|
||||
assert isinstance(queue, EventQueue)
|
||||
|
||||
|
||||
# ─── run_executor_smoke — control flow with stubbed context ────────────
|
||||
#
|
||||
# These tests patch _build_stub_context to return sentinel objects, so
|
||||
# they don't depend on the real a2a-sdk being present. The executor
|
||||
# stubs ignore ctx + queue.
|
||||
|
||||
|
||||
class _RaisingExecutor:
|
||||
def __init__(self, exc: Exception):
|
||||
self._exc = exc
|
||||
|
||||
async def execute(self, context, event_queue) -> None: # noqa: ARG002
|
||||
raise self._exc
|
||||
|
||||
|
||||
class _BlockingExecutor:
|
||||
"""Simulates an LLM network call that the smoke timeout cuts short."""
|
||||
|
||||
async def execute(self, context, event_queue) -> None: # noqa: ARG002
|
||||
await asyncio.Event().wait()
|
||||
|
||||
|
||||
class _CleanExecutor:
|
||||
async def execute(self, context, event_queue) -> None: # noqa: ARG002
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def stub_build():
|
||||
"""Replace _build_stub_context with a no-op so execute() gets
|
||||
sentinel ctx/queue. Tests can override this fixture's behavior
|
||||
via monkeypatch when they need a different shape."""
|
||||
sentinel_ctx = object()
|
||||
sentinel_queue = object()
|
||||
with patch.object(
|
||||
smoke_mode, "_build_stub_context",
|
||||
lambda: (sentinel_ctx, sentinel_queue),
|
||||
):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_smoke_passes_on_timeout(stub_build, monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setattr(smoke_mode, "_SMOKE_TIMEOUT_SECS", 0.1)
|
||||
code = await smoke_mode.run_executor_smoke(_BlockingExecutor())
|
||||
assert code == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_smoke_passes_on_clean_return(stub_build):
|
||||
code = await smoke_mode.run_executor_smoke(_CleanExecutor())
|
||||
assert code == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_smoke_fails_on_import_error(stub_build):
|
||||
"""The exact regression class issue #2275 exists to catch — a lazy
|
||||
import inside execute() that the static smoke missed."""
|
||||
code = await smoke_mode.run_executor_smoke(
|
||||
_RaisingExecutor(ImportError("cannot import name 'FilePart' from 'a2a.types'"))
|
||||
)
|
||||
assert code == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_smoke_fails_on_module_not_found_error(stub_build):
|
||||
code = await smoke_mode.run_executor_smoke(
|
||||
_RaisingExecutor(ModuleNotFoundError("No module named 'temporalio'"))
|
||||
)
|
||||
assert code == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_smoke_passes_on_non_import_runtime_error(stub_build):
|
||||
"""Auth errors, validation errors, anything-not-an-import-error
|
||||
pass — those are caught by adapter-level tests, not by this gate."""
|
||||
code = await smoke_mode.run_executor_smoke(
|
||||
_RaisingExecutor(RuntimeError("ANTHROPIC_API_KEY missing"))
|
||||
)
|
||||
assert code == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_smoke_passes_on_value_error(stub_build):
|
||||
code = await smoke_mode.run_executor_smoke(
|
||||
_RaisingExecutor(ValueError("bad config"))
|
||||
)
|
||||
assert code == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_smoke_fails_when_stub_context_build_breaks(monkeypatch: pytest.MonkeyPatch):
|
||||
"""If a2a-sdk's own SendMessageRequest / RequestContext can't be
|
||||
constructed (e.g. SDK migration broke the constructor), that's
|
||||
exactly the regression class this gate exists for — fail loud."""
|
||||
|
||||
def _fail_build():
|
||||
raise ImportError("simulated: a2a.types refactored mid-publish")
|
||||
|
||||
monkeypatch.setattr(smoke_mode, "_build_stub_context", _fail_build)
|
||||
code = await smoke_mode.run_executor_smoke(_CleanExecutor())
|
||||
assert code == 1
|
||||
Loading…
Reference in New Issue
Block a user