Three review nits from PR #2473: 1. Narrow `_check_runtime_wedge` import catch to (ImportError, ModuleNotFoundError). The bare `except Exception:` would have masked an `AttributeError`/`TypeError` from a runtime_wedge API rename — silently degrading the smoke gate to "no wedge info" with no log line. The `runtime_wedge_signature.json` snapshot test (task #169) carries the API-drift load instead. 2. Drop the unreachable `or "<unspecified>"` fallback. `wedge_reason()` only returns "" when not wedged, but the call is guarded by `is_wedged()` being True and `mark_wedged` requires a non-None reason. The defensive arm couldn't fire. 3. Promote `reset_runtime_wedge` from a per-file fixture in test_smoke_mode.py to an autouse fixture in workspace/tests/conftest.py. Heartbeat tests or future adapter tests that call `mark_wedged` without cleanup would otherwise leak a sticky wedge into smoke tests later in the same pytest process — smoke tests would fail-via-leak instead of asserting their actual contract. Two-sided reset survives early test failures. Also: `test_check_runtime_wedge_returns_none_when_module_missing` now `monkeypatch.delitem(sys.modules, "runtime_wedge")` before patching `__import__`, so the test re-exercises the import path instead of resolving from the module cache (the test was passing today by luck — it would still pass even if the catch arm were deleted, because the cached module's `is_wedged` returned False). Tests: 28 still pass in test_smoke_mode.py, 57 across smoke + wedge + heartbeat. Regression-injection-checked: catch tightening doesn't regress the existing wedge tests.
351 lines
13 KiB
Python
351 lines
13 KiB
Python
"""Tests for smoke_mode — the executor-stub boot smoke (issue #2275).
|
|
|
|
These tests exercise the helper module directly. The end-to-end path
|
|
(main.py invoking run_executor_smoke + sys.exit) is not unit-tested
|
|
here because main() is `# pragma: no cover` and integration-shaped;
|
|
that path is covered by the publish-template-image.yml smoke step
|
|
(which is the production gate this helper exists for).
|
|
|
|
Note on a2a-sdk: conftest.py stubs out a2a.* modules with minimal
|
|
shims that don't include `a2a.server.context.ServerCallContext` or
|
|
`a2a.types.SendMessageRequest` (the real-SDK-only symbols
|
|
_build_stub_context needs). Tests that want to verify the
|
|
`run_executor_smoke` control flow patch _build_stub_context to
|
|
sidestep the real construction; tests that NEED the real SDK
|
|
construction skip when those symbols aren't reachable.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import sys
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
import smoke_mode
|
|
|
|
|
|
def _real_a2a_sdk_available() -> bool:
|
|
"""True when the real a2a-sdk types needed by _build_stub_context
|
|
are importable. The conftest's a2a stubs intentionally don't
|
|
include these — they're only present in the published wheel's
|
|
runtime env or when a2a-sdk is installed alongside the test."""
|
|
try:
|
|
from a2a.server.context import ServerCallContext # noqa: F401
|
|
from a2a.types import SendMessageRequest # noqa: F401
|
|
return True
|
|
except ImportError:
|
|
return False
|
|
|
|
|
|
# ─── is_smoke_mode ─────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.parametrize("env_value", ["1", "true", "yes", "on", "TRUE", "Yes", "ON"])
|
|
def test_is_smoke_mode_truthy_values(env_value: str, monkeypatch: pytest.MonkeyPatch):
|
|
monkeypatch.setenv("MOLECULE_SMOKE_MODE", env_value)
|
|
assert smoke_mode.is_smoke_mode() is True
|
|
|
|
|
|
@pytest.mark.parametrize("env_value", ["0", "false", "no", "off", "", " "])
|
|
def test_is_smoke_mode_falsy_values(env_value: str, monkeypatch: pytest.MonkeyPatch):
|
|
monkeypatch.setenv("MOLECULE_SMOKE_MODE", env_value)
|
|
assert smoke_mode.is_smoke_mode() is False
|
|
|
|
|
|
def test_is_smoke_mode_unset(monkeypatch: pytest.MonkeyPatch):
|
|
monkeypatch.delenv("MOLECULE_SMOKE_MODE", raising=False)
|
|
assert smoke_mode.is_smoke_mode() is False
|
|
|
|
|
|
# ─── _SMOKE_TIMEOUT_SECS bad-env-var resilience ────────────────────────
|
|
|
|
|
|
def test_smoke_timeout_falls_back_when_env_value_is_malformed(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
):
|
|
"""A typo'd MOLECULE_SMOKE_TIMEOUT_SECS must not crash production
|
|
boot. main.py imports smoke_mode unconditionally — before the
|
|
is_smoke_mode() check — so float()-at-module-load would SystemExit
|
|
every workspace if the env value were bad."""
|
|
import importlib
|
|
monkeypatch.setenv("MOLECULE_SMOKE_TIMEOUT_SECS", "not-a-float")
|
|
reloaded = importlib.reload(smoke_mode)
|
|
try:
|
|
assert reloaded._SMOKE_TIMEOUT_SECS == 5.0
|
|
finally:
|
|
# Restore module to clean default for other tests.
|
|
monkeypatch.delenv("MOLECULE_SMOKE_TIMEOUT_SECS", raising=False)
|
|
importlib.reload(smoke_mode)
|
|
|
|
|
|
# ─── _build_stub_context (real-SDK-only) ───────────────────────────────
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
not _real_a2a_sdk_available(),
|
|
reason="conftest stubs a2a.* without ServerCallContext / SendMessageRequest; real SDK only",
|
|
)
|
|
def test_build_stub_context_returns_request_context_with_message():
|
|
"""Stub must produce a RequestContext that has a non-empty message
|
|
payload — otherwise extract_message_text returns empty and the
|
|
executor takes the early-exit branch instead of exercising the
|
|
full import tree."""
|
|
context, _queue = smoke_mode._build_stub_context()
|
|
assert context.message is not None
|
|
parts = context.message.parts
|
|
assert len(parts) == 1
|
|
assert parts[0].text == "smoke test"
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
not _real_a2a_sdk_available(),
|
|
reason="conftest stubs a2a.* without ServerCallContext / SendMessageRequest; real SDK only",
|
|
)
|
|
def test_build_stub_context_returns_event_queue():
|
|
from a2a.server.events import EventQueue
|
|
_, queue = smoke_mode._build_stub_context()
|
|
assert isinstance(queue, EventQueue)
|
|
|
|
|
|
# ─── run_executor_smoke — control flow with stubbed context ────────────
|
|
#
|
|
# These tests patch _build_stub_context to return sentinel objects, so
|
|
# they don't depend on the real a2a-sdk being present. The executor
|
|
# stubs ignore ctx + queue.
|
|
|
|
|
|
class _RaisingExecutor:
|
|
def __init__(self, exc: Exception):
|
|
self._exc = exc
|
|
|
|
async def execute(self, context, event_queue) -> None: # noqa: ARG002
|
|
raise self._exc
|
|
|
|
|
|
class _BlockingExecutor:
|
|
"""Simulates an LLM network call that the smoke timeout cuts short."""
|
|
|
|
async def execute(self, context, event_queue) -> None: # noqa: ARG002
|
|
await asyncio.Event().wait()
|
|
|
|
|
|
class _CleanExecutor:
|
|
async def execute(self, context, event_queue) -> None: # noqa: ARG002
|
|
return None
|
|
|
|
|
|
@pytest.fixture
|
|
def stub_build():
|
|
"""Replace _build_stub_context with a no-op so execute() gets
|
|
sentinel ctx/queue. Tests can override this fixture's behavior
|
|
via monkeypatch when they need a different shape."""
|
|
sentinel_ctx = object()
|
|
sentinel_queue = object()
|
|
with patch.object(
|
|
smoke_mode, "_build_stub_context",
|
|
lambda: (sentinel_ctx, sentinel_queue),
|
|
):
|
|
yield
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_smoke_passes_on_timeout(stub_build, monkeypatch: pytest.MonkeyPatch):
|
|
monkeypatch.setattr(smoke_mode, "_SMOKE_TIMEOUT_SECS", 0.1)
|
|
code = await smoke_mode.run_executor_smoke(_BlockingExecutor())
|
|
assert code == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_smoke_passes_on_clean_return(stub_build):
|
|
code = await smoke_mode.run_executor_smoke(_CleanExecutor())
|
|
assert code == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_smoke_fails_on_import_error(stub_build):
|
|
"""The exact regression class issue #2275 exists to catch — a lazy
|
|
import inside execute() that the static smoke missed."""
|
|
code = await smoke_mode.run_executor_smoke(
|
|
_RaisingExecutor(ImportError("cannot import name 'FilePart' from 'a2a.types'"))
|
|
)
|
|
assert code == 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_smoke_fails_on_module_not_found_error(stub_build):
|
|
code = await smoke_mode.run_executor_smoke(
|
|
_RaisingExecutor(ModuleNotFoundError("No module named 'temporalio'"))
|
|
)
|
|
assert code == 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_smoke_passes_on_non_import_runtime_error(stub_build):
|
|
"""Auth errors, validation errors, anything-not-an-import-error
|
|
pass — those are caught by adapter-level tests, not by this gate."""
|
|
code = await smoke_mode.run_executor_smoke(
|
|
_RaisingExecutor(RuntimeError("ANTHROPIC_API_KEY missing"))
|
|
)
|
|
assert code == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_smoke_passes_on_value_error(stub_build):
|
|
code = await smoke_mode.run_executor_smoke(
|
|
_RaisingExecutor(ValueError("bad config"))
|
|
)
|
|
assert code == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_smoke_fails_when_stub_context_build_breaks(monkeypatch: pytest.MonkeyPatch):
|
|
"""If a2a-sdk's own SendMessageRequest / RequestContext can't be
|
|
constructed (e.g. SDK migration broke the constructor), that's
|
|
exactly the regression class this gate exists for — fail loud."""
|
|
|
|
def _fail_build():
|
|
raise ImportError("simulated: a2a.types refactored mid-publish")
|
|
|
|
monkeypatch.setattr(smoke_mode, "_build_stub_context", _fail_build)
|
|
code = await smoke_mode.run_executor_smoke(_CleanExecutor())
|
|
assert code == 1
|
|
|
|
|
|
# ─── runtime_wedge integration (universal turn-smoke, task #131) ───────
|
|
#
|
|
# These tests pin the post-execute wedge-check that upgrades a
|
|
# provisional PASS to FAIL when an adapter has marked the runtime
|
|
# wedged via `runtime_wedge.mark_wedged()`. Without this gate, the
|
|
# PR-25-class regression (claude_agent_sdk init wedge from a malformed
|
|
# CLI argv) shipped to GHCR because the smoke saw the outer wait_for
|
|
# timeout as "imports healthy, hit a network boundary."
|
|
|
|
|
|
class _MarkWedgedThenRaiseExecutor:
|
|
"""Mimics the claude_sdk_executor wedge path: catches the SDK's
|
|
`Control request timeout: initialize`, calls
|
|
`runtime_wedge.mark_wedged()` from the catch arm, then re-raises
|
|
a sanitized error. The smoke must surface this as FAIL even
|
|
though the outer exception class (`RuntimeError` here) would
|
|
otherwise be a PASS-on-non-import-error.
|
|
"""
|
|
|
|
def __init__(self, reason: str):
|
|
self._reason = reason
|
|
|
|
async def execute(self, context, event_queue) -> None: # noqa: ARG002
|
|
import runtime_wedge
|
|
runtime_wedge.mark_wedged(self._reason)
|
|
raise RuntimeError("sanitized adapter error after wedge")
|
|
|
|
|
|
class _MarkWedgedThenBlockExecutor:
|
|
"""Mimics a wedge that fires inside a still-running execute() —
|
|
the adapter marks wedged, then continues to await something
|
|
network-shaped that the outer wait_for cuts short. The pre-fix
|
|
smoke returned 0 here ('timed out past import-tree') even though
|
|
the runtime had already self-reported wedged.
|
|
"""
|
|
|
|
def __init__(self, reason: str):
|
|
self._reason = reason
|
|
|
|
async def execute(self, context, event_queue) -> None: # noqa: ARG002
|
|
import runtime_wedge
|
|
runtime_wedge.mark_wedged(self._reason)
|
|
await asyncio.Event().wait()
|
|
|
|
|
|
# Note: runtime_wedge state is reset before/after every test by the
|
|
# autouse `_reset_runtime_wedge_between_tests` fixture in conftest.py
|
|
# so individual wedge tests don't need an explicit fixture argument.
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_smoke_fails_when_adapter_marked_wedged_via_exception(
|
|
stub_build,
|
|
):
|
|
"""PR-25 regression class: adapter catches SDK init wedge, marks
|
|
runtime_wedge, raises a sanitized error. Outer exception class
|
|
(`RuntimeError`) is non-import → would have been PASS pre-fix.
|
|
Post-fix: post-run wedge check overrides PASS → FAIL."""
|
|
code = await smoke_mode.run_executor_smoke(
|
|
_MarkWedgedThenRaiseExecutor("claude SDK init timeout — restart workspace"),
|
|
)
|
|
assert code == 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_smoke_fails_when_adapter_marked_wedged_then_blocks(
|
|
stub_build, monkeypatch: pytest.MonkeyPatch,
|
|
):
|
|
"""Same wedge class as above but the adapter doesn't raise — it
|
|
keeps awaiting (e.g. waiting on a control-message reply that will
|
|
never come). Outer wait_for cuts short → would have been PASS-on-
|
|
timeout pre-fix. Post-fix: wedge check upgrades to FAIL.
|
|
"""
|
|
monkeypatch.setattr(smoke_mode, "_SMOKE_TIMEOUT_SECS", 0.1)
|
|
code = await smoke_mode.run_executor_smoke(
|
|
_MarkWedgedThenBlockExecutor("hermes init handshake timed out"),
|
|
)
|
|
assert code == 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_smoke_passes_when_runtime_wedge_is_clean_after_clean_execute(
|
|
stub_build,
|
|
):
|
|
"""Belt-and-braces: wedge-clean + clean execute() must still PASS.
|
|
Pins that the new check is additive — it doesn't accidentally
|
|
fail healthy executions (e.g. by treating "no runtime_wedge import"
|
|
as a wedge)."""
|
|
code = await smoke_mode.run_executor_smoke(_CleanExecutor())
|
|
assert code == 0
|
|
|
|
|
|
def test_check_runtime_wedge_returns_none_when_module_missing(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
):
|
|
"""Direct test for the import-resilience contract — the helper
|
|
must swallow ImportError so a corrupt install doesn't crash the
|
|
smoke gate. Catch is narrowed to (ImportError, ModuleNotFoundError)
|
|
so a SIGNATURE drift surfaces; this test only pins the missing-
|
|
module case.
|
|
|
|
Defensive: drop runtime_wedge from sys.modules cache before
|
|
patching __import__. Without the cache evict, an earlier test in
|
|
the same file that already imported runtime_wedge would let the
|
|
`from runtime_wedge import ...` here resolve from the cache and
|
|
skip __import__ entirely — the test would pass for the wrong
|
|
reason and a real regression (catch arm removed) wouldn't surface.
|
|
"""
|
|
import builtins
|
|
monkeypatch.delitem(sys.modules, "runtime_wedge", raising=False)
|
|
real_import = builtins.__import__
|
|
|
|
def _raising_import(name, *args, **kwargs):
|
|
if name == "runtime_wedge":
|
|
raise ImportError("simulated: runtime_wedge unavailable")
|
|
return real_import(name, *args, **kwargs)
|
|
|
|
monkeypatch.setattr(builtins, "__import__", _raising_import)
|
|
assert smoke_mode._check_runtime_wedge() is None
|
|
|
|
|
|
def test_check_runtime_wedge_returns_reason_when_marked():
|
|
"""When an adapter has called runtime_wedge.mark_wedged(reason),
|
|
the helper returns that reason verbatim so the smoke can surface
|
|
it in the FAIL log line."""
|
|
import runtime_wedge
|
|
runtime_wedge.mark_wedged("explicit test reason")
|
|
assert smoke_mode._check_runtime_wedge() == "explicit test reason"
|
|
|
|
|
|
def test_check_runtime_wedge_returns_none_when_clean():
|
|
"""Pre-condition for the additive contract: helper must return
|
|
None (not the empty string from `wedge_reason()`) when no adapter
|
|
has marked the runtime wedged, so the caller's `is not None`
|
|
check works."""
|
|
assert smoke_mode._check_runtime_wedge() is None
|