diff --git a/scripts/build_runtime_package.py b/scripts/build_runtime_package.py index 910ea691..e6977e52 100755 --- a/scripts/build_runtime_package.py +++ b/scripts/build_runtime_package.py @@ -78,6 +78,7 @@ TOP_LEVEL_MODULES = { "prompt", "runtime_wedge", "shared_runtime", + "smoke_mode", "transcript_auth", "watcher", } diff --git a/workspace/main.py b/workspace/main.py index 093860c2..356080f3 100644 --- a/workspace/main.py +++ b/workspace/main.py @@ -136,6 +136,20 @@ async def main(): # pragma: no cover await adapter.setup(adapter_config) executor = await adapter.create_executor(adapter_config) + # 5a. Boot-smoke short-circuit (issue #2275): if MOLECULE_SMOKE_MODE + # is set, exercise the executor's full import tree by calling + # execute() once with stub deps + a short timeout. Skips platform + # registration + uvicorn entirely. Returns process exit code. + from smoke_mode import is_smoke_mode, run_executor_smoke + if is_smoke_mode(): + exit_code = await run_executor_smoke(executor) + if hasattr(heartbeat, "stop"): + try: + await heartbeat.stop() + except Exception: # noqa: BLE001 + pass + raise SystemExit(exit_code) + # 5b. Restore from pre-stop snapshot if one exists (GH#1391). # The snapshot is scrubbed before being written, so secrets are # already redacted — restore_state must not re-expose them. diff --git a/workspace/smoke_mode.py b/workspace/smoke_mode.py new file mode 100644 index 00000000..773e0cbe --- /dev/null +++ b/workspace/smoke_mode.py @@ -0,0 +1,140 @@ +"""Boot smoke mode — exercises the executor's full import tree without touching real platforms. + +Why this exists (issue #2275): the existing `wheel_smoke.py` only IMPORTS +`molecule_runtime.main` at module scope. Lazy imports buried inside +`async def execute(...)` bodies (e.g. `from a2a.types import FilePart`) +NEVER evaluate at static-import time — they crash at first message +delivery in production. + +The 2026-04-2x v0→v1 a2a-sdk migration shipped 5 such regressions in +templates that all looked fine at module-load smoke. This module fills +the gap by actually invoking `executor.execute(stub_ctx, stub_queue)` +once with a short timeout. If the import-tree is healthy the call +proceeds far enough to hit a network boundary (LLM call, etc.) and +times out — that's a *pass*. If a lazy import is broken, the call +raises `ImportError` / `ModuleNotFoundError` from inside the executor +body — that's a *fail*. + +Activated by setting `MOLECULE_SMOKE_MODE=1` in the env. Wired into +`main.py` after `executor = await adapter.create_executor(...)` so the +full adapter setup path runs first; the smoke just adds one more +exercise step before exit. + +CI usage (intended for `molecule-ci/.github/workflows/publish-template-image.yml`): + docker run --rm \ + -e WORKSPACE_ID=fake -e MOLECULE_SMOKE_MODE=1 \ + "$IMAGE" molecule-runtime +""" +from __future__ import annotations + +import asyncio +import logging +import os +import sys +from typing import Any + +logger = logging.getLogger(__name__) + + +_SMOKE_TIMEOUT_SECS = float(os.environ.get("MOLECULE_SMOKE_TIMEOUT_SECS", "5.0")) + + +def is_smoke_mode() -> bool: + """True iff MOLECULE_SMOKE_MODE is set to a truthy value. + + Recognises the standard truthy strings (`1`, `true`, `yes`, + case-insensitive). An unset / empty / `0` env reads as False so + the boot path takes the normal branch in production. + """ + raw = os.environ.get("MOLECULE_SMOKE_MODE", "").strip().lower() + return raw in ("1", "true", "yes", "on") + + +def _build_stub_context() -> tuple[Any, Any]: + """Build a (RequestContext, EventQueue) pair stuffed with a minimal + text message ("smoke test"). The Message is enough that + `extract_message_text(context)` returns non-empty input, so the + executor takes the "real" branch (not the empty-input early-exit) + and exercises any lazy imports along that path. + + Imports happen at function scope so smoke_mode.py itself doesn't + pull a2a-sdk into every consumer of the runtime — the wheel still + boots without smoke mode active. + """ + from a2a.helpers import new_text_message + from a2a.server.agent_execution import RequestContext + from a2a.server.context import ServerCallContext + from a2a.server.events import EventQueue + from a2a.types import SendMessageRequest + + message = new_text_message("smoke test") + call_ctx = ServerCallContext() + request = SendMessageRequest(message=message) + context = RequestContext(call_ctx, request=request) + queue = EventQueue() + return context, queue + + +async def run_executor_smoke(executor: Any) -> int: + """Invoke executor.execute() once with stub deps. Return an exit code. + + Returns: + 0 — import tree healthy. Either execution timed out (the + expected outcome — we hit a network boundary like an LLM + call) or completed cleanly. Either way, no broken imports. + 1 — broken lazy import detected. Re-raised as a clear log line + so the publish gate's stderr captures the offending symbol. + + The 5-second timeout comes from `MOLECULE_SMOKE_TIMEOUT_SECS` env + (default 5.0). Bump it via env if a slow adapter setup overlaps the + first execute call. Don't make it too long — the publish workflow + multiplies this across N templates. + """ + print( + f"[smoke-mode] invoking executor.execute(stub_ctx, stub_queue) " + f"with {_SMOKE_TIMEOUT_SECS:.1f}s timeout to exercise lazy imports" + ) + + try: + context, queue = _build_stub_context() + except Exception as build_err: # noqa: BLE001 + # If we can't even build the stub, the a2a-sdk import path is + # broken — that's exactly the regression class this gate exists + # for. Treat as a smoke failure. + print( + f"[smoke-mode] FAIL: stub-context build raised " + f"{type(build_err).__name__}: {build_err}", + file=sys.stderr, + ) + return 1 + + try: + await asyncio.wait_for( + executor.execute(context, queue), + timeout=_SMOKE_TIMEOUT_SECS, + ) + except (asyncio.TimeoutError, asyncio.CancelledError): + # Timeout = imports healthy, execution was proceeding and hit + # a network boundary or long await. Pass. + print("[smoke-mode] PASS: timed out past import-tree (imports healthy)") + return 0 + except (ImportError, ModuleNotFoundError) as imp_err: + # The exact regression class issue #2275 exists to catch. + print( + f"[smoke-mode] FAIL: lazy import broken in execute(): " + f"{type(imp_err).__name__}: {imp_err}", + file=sys.stderr, + ) + return 1 + except Exception as other_err: # noqa: BLE001 + # Anything else (auth errors, validation errors, runtime bugs) + # is downstream of the import gate. Pass — these are caught by + # the relevant adapter-level tests, not by this smoke. + print( + f"[smoke-mode] PASS: execute() raised " + f"{type(other_err).__name__} past import-tree (not an import error)" + ) + return 0 + else: + print("[smoke-mode] PASS: execute() completed within timeout (imports + body OK)") + return 0 diff --git a/workspace/tests/test_smoke_mode.py b/workspace/tests/test_smoke_mode.py new file mode 100644 index 00000000..10edbe30 --- /dev/null +++ b/workspace/tests/test_smoke_mode.py @@ -0,0 +1,190 @@ +"""Tests for smoke_mode — the executor-stub boot smoke (issue #2275). + +These tests exercise the helper module directly. The end-to-end path +(main.py invoking run_executor_smoke + sys.exit) is not unit-tested +here because main() is `# pragma: no cover` and integration-shaped; +that path is covered by the publish-template-image.yml smoke step +(which is the production gate this helper exists for). + +Note on a2a-sdk: conftest.py stubs out a2a.* modules with minimal +shims that don't include `a2a.server.context.ServerCallContext` or +`a2a.types.SendMessageRequest` (the real-SDK-only symbols +_build_stub_context needs). Tests that want to verify the +`run_executor_smoke` control flow patch _build_stub_context to +sidestep the real construction; tests that NEED the real SDK +construction skip when those symbols aren't reachable. +""" +from __future__ import annotations + +import asyncio +from unittest.mock import patch + +import pytest + +import smoke_mode + + +def _real_a2a_sdk_available() -> bool: + """True when the real a2a-sdk types needed by _build_stub_context + are importable. The conftest's a2a stubs intentionally don't + include these — they're only present in the published wheel's + runtime env or when a2a-sdk is installed alongside the test.""" + try: + from a2a.server.context import ServerCallContext # noqa: F401 + from a2a.types import SendMessageRequest # noqa: F401 + return True + except (ImportError, AttributeError): + return False + + +# ─── is_smoke_mode ───────────────────────────────────────────────────── + + +@pytest.mark.parametrize("env_value", ["1", "true", "yes", "on", "TRUE", "Yes", "ON"]) +def test_is_smoke_mode_truthy_values(env_value: str, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("MOLECULE_SMOKE_MODE", env_value) + assert smoke_mode.is_smoke_mode() is True + + +@pytest.mark.parametrize("env_value", ["0", "false", "no", "off", "", " "]) +def test_is_smoke_mode_falsy_values(env_value: str, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("MOLECULE_SMOKE_MODE", env_value) + assert smoke_mode.is_smoke_mode() is False + + +def test_is_smoke_mode_unset(monkeypatch: pytest.MonkeyPatch): + monkeypatch.delenv("MOLECULE_SMOKE_MODE", raising=False) + assert smoke_mode.is_smoke_mode() is False + + +# ─── _build_stub_context (real-SDK-only) ─────────────────────────────── + + +@pytest.mark.skipif( + not _real_a2a_sdk_available(), + reason="conftest stubs a2a.* without ServerCallContext / SendMessageRequest; real SDK only", +) +def test_build_stub_context_returns_request_context_with_message(): + """Stub must produce a RequestContext that has a non-empty message + payload — otherwise extract_message_text returns empty and the + executor takes the early-exit branch instead of exercising the + full import tree.""" + context, _queue = smoke_mode._build_stub_context() + assert context.message is not None + parts = context.message.parts + assert len(parts) == 1 + assert parts[0].text == "smoke test" + + +@pytest.mark.skipif( + not _real_a2a_sdk_available(), + reason="conftest stubs a2a.* without ServerCallContext / SendMessageRequest; real SDK only", +) +def test_build_stub_context_returns_event_queue(): + from a2a.server.events import EventQueue + _, queue = smoke_mode._build_stub_context() + assert isinstance(queue, EventQueue) + + +# ─── run_executor_smoke — control flow with stubbed context ──────────── +# +# These tests patch _build_stub_context to return sentinel objects, so +# they don't depend on the real a2a-sdk being present. The executor +# stubs ignore ctx + queue. + + +class _RaisingExecutor: + def __init__(self, exc: Exception): + self._exc = exc + + async def execute(self, context, event_queue) -> None: # noqa: ARG002 + raise self._exc + + +class _BlockingExecutor: + """Simulates an LLM network call that the smoke timeout cuts short.""" + + async def execute(self, context, event_queue) -> None: # noqa: ARG002 + await asyncio.Event().wait() + + +class _CleanExecutor: + async def execute(self, context, event_queue) -> None: # noqa: ARG002 + return None + + +@pytest.fixture +def stub_build(): + """Replace _build_stub_context with a no-op so execute() gets + sentinel ctx/queue. Tests can override this fixture's behavior + via monkeypatch when they need a different shape.""" + sentinel_ctx = object() + sentinel_queue = object() + with patch.object( + smoke_mode, "_build_stub_context", + lambda: (sentinel_ctx, sentinel_queue), + ): + yield + + +@pytest.mark.asyncio +async def test_smoke_passes_on_timeout(stub_build, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(smoke_mode, "_SMOKE_TIMEOUT_SECS", 0.1) + code = await smoke_mode.run_executor_smoke(_BlockingExecutor()) + assert code == 0 + + +@pytest.mark.asyncio +async def test_smoke_passes_on_clean_return(stub_build): + code = await smoke_mode.run_executor_smoke(_CleanExecutor()) + assert code == 0 + + +@pytest.mark.asyncio +async def test_smoke_fails_on_import_error(stub_build): + """The exact regression class issue #2275 exists to catch — a lazy + import inside execute() that the static smoke missed.""" + code = await smoke_mode.run_executor_smoke( + _RaisingExecutor(ImportError("cannot import name 'FilePart' from 'a2a.types'")) + ) + assert code == 1 + + +@pytest.mark.asyncio +async def test_smoke_fails_on_module_not_found_error(stub_build): + code = await smoke_mode.run_executor_smoke( + _RaisingExecutor(ModuleNotFoundError("No module named 'temporalio'")) + ) + assert code == 1 + + +@pytest.mark.asyncio +async def test_smoke_passes_on_non_import_runtime_error(stub_build): + """Auth errors, validation errors, anything-not-an-import-error + pass — those are caught by adapter-level tests, not by this gate.""" + code = await smoke_mode.run_executor_smoke( + _RaisingExecutor(RuntimeError("ANTHROPIC_API_KEY missing")) + ) + assert code == 0 + + +@pytest.mark.asyncio +async def test_smoke_passes_on_value_error(stub_build): + code = await smoke_mode.run_executor_smoke( + _RaisingExecutor(ValueError("bad config")) + ) + assert code == 0 + + +@pytest.mark.asyncio +async def test_smoke_fails_when_stub_context_build_breaks(monkeypatch: pytest.MonkeyPatch): + """If a2a-sdk's own SendMessageRequest / RequestContext can't be + constructed (e.g. SDK migration broke the constructor), that's + exactly the regression class this gate exists for — fail loud.""" + + def _fail_build(): + raise ImportError("simulated: a2a.types refactored mid-publish") + + monkeypatch.setattr(smoke_mode, "_build_stub_context", _fail_build) + code = await smoke_mode.run_executor_smoke(_CleanExecutor()) + assert code == 1