From ef206b5be6cce3a14ff3f7b6580f0c950be0d82f Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Thu, 30 Apr 2026 11:51:01 -0700 Subject: [PATCH] refactor(ci): extract wheel smoke into shared script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit publish-runtime.yml had a broad smoke (AgentCard call-shape, well-known mount alignment, new_text_message) inline as a heredoc. runtime-prbuild- compat.yml had a narrow inline smoke (just `from main import main_sync`). Result: a PR could introduce SDK shape regressions that pass at PR time and only fail at publish time, post-merge. Extract the broad smoke into scripts/wheel_smoke.py and invoke it from both workflows. PR-time gate now matches publish-time gate — same script, same assertions. Eliminates the drift hazard of two heredocs that have to be kept in lockstep manually. Verified locally: * Built wheel from workspace/ source, installed in venv, ran smoke → pass * Simulated AgentCard kwarg-rename regression → smoke catches it as `ValueError: Protocol message AgentCard has no "supported_interfaces" field` (the exact failure mode of #2179 / supported_protocols incident) Path filter for runtime-prbuild-compat extended to include scripts/wheel_smoke.py so smoke-only edits get PR-validated. publish- runtime path filter intentionally NOT extended — smoke-only edits should not auto-trigger a PyPI version bump. Subset of #131 (the broader "invoke main() against stub config" goal remains pending — main() needs a config dir + stub platform server). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/publish-runtime.yml | 134 +---------------- .github/workflows/runtime-prbuild-compat.yml | 10 +- scripts/wheel_smoke.py | 145 +++++++++++++++++++ 3 files changed, 157 insertions(+), 132 deletions(-) create mode 100644 scripts/wheel_smoke.py diff --git a/.github/workflows/publish-runtime.yml b/.github/workflows/publish-runtime.yml index be59fe6c..5cd20a7a 100644 --- a/.github/workflows/publish-runtime.yml +++ b/.github/workflows/publish-runtime.yml @@ -154,139 +154,15 @@ jobs: - name: Verify package contents (sanity) working-directory: ${{ runner.temp }}/runtime-build + # Smoke logic lives in scripts/wheel_smoke.py so the same gate runs + # at both PR-time (runtime-prbuild-compat.yml) and publish-time + # (here). Splitting the smoke across two heredocs let them drift + # apart historically — one script keeps them locked. run: | python -m twine check dist/* - # Smoke-import the built wheel to catch import-rewrite mistakes - # before they hit PyPI. Asserts on STABLE INVARIANTS only — - # symbols + classes that are part of the package's public - # contract (BaseAdapter interface, the canonical a2a sentinel, - # core submodules). Don't add feature-flag-style assertions - # here — they fire false-positive every time staging is mid- - # release of that feature. python -m venv /tmp/smoke /tmp/smoke/bin/pip install --quiet dist/*.whl - WORKSPACE_ID=00000000-0000-0000-0000-000000000000 \ - PLATFORM_URL=http://localhost:8080 \ - /tmp/smoke/bin/python -c " - # Importing main is the strongest smoke test we can do here: - # main.py is the entry point and pulls every other module - # transitively. If the build script missed an import rewrite - # (e.g. left a bare \`from transcript_auth import ...\` instead - # of \`from molecule_runtime.transcript_auth import ...\` — the - # 0.1.16 incident), this fails with ModuleNotFoundError instead - # of shipping to PyPI and breaking every workspace startup. - # Import the entry-point target by NAME — not just the module. - # The wheel's pyproject.toml declares - # `molecule-runtime = molecule_runtime.main:main_sync` so if - # main_sync goes missing (it did in 0.1.16-0.1.18), every - # workspace startup fails with `ImportError: cannot import name - # 'main_sync'`. Plain `import molecule_runtime.main` doesn't - # catch that because the module loads fine. - from molecule_runtime.main import main_sync # noqa: F401 - from molecule_runtime import a2a_client, a2a_tools - from molecule_runtime.builtin_tools import memory - from molecule_runtime.adapters import get_adapter, BaseAdapter, AdapterConfig - # Stable invariants: package exports + BaseAdapter shape. - assert a2a_client._A2A_ERROR_PREFIX, 'a2a_client missing error sentinel' - assert callable(get_adapter), 'adapters.get_adapter must be callable' - assert hasattr(BaseAdapter, 'name'), 'BaseAdapter interface broken' - assert hasattr(AdapterConfig, '__init__'), 'AdapterConfig dataclass missing' - - # Call-shape smoke for AgentCard. Pure imports don't catch - # field-shape regressions in upstream SDKs that only surface - # at construction time. Two bugs of this exact class shipped - # since the a2a-sdk 1.0 migration: - # - state_transition_history=True (fixed in #2179) - # - supported_protocols=[...] (the protobuf field is - # supported_interfaces — caused every workspace boot - # to crash with `ValueError: Protocol message AgentCard - # has no "supported_protocols" field`; fixed alongside - # this smoke) - # - # This block instantiates the EXACT classes main.py uses, - # with the EXACT keyword arguments. If a future a2a-sdk - # upgrade renames any of supported_interfaces / streaming / - # push_notifications / etc., the publish fails here instead - # of breaking every workspace startup. main.py and this - # smoke MUST stay in lockstep — adding a kwarg to one - # without mirroring it here is the regression vector. - from a2a.types import AgentCard, AgentCapabilities, AgentSkill, AgentInterface - AgentCard( - name='smoke-agent', - description='publish-runtime smoke test', - version='0.0.0-smoke', - supported_interfaces=[ - AgentInterface(protocol_binding='https://a2a.g/v1', url='http://localhost:8080'), - ], - capabilities=AgentCapabilities( - streaming=True, - push_notifications=False, - ), - skills=[ - AgentSkill( - id='smoke-skill', - name='Smoke', - description='no-op', - tags=['smoke'], - examples=['noop'], - ), - ], - default_input_modes=['text/plain', 'application/json'], - default_output_modes=['text/plain', 'application/json'], - ) - print('✓ AgentCard call-shape smoke passed') - - # Well-known agent-card path probe alignment. main.py's - # _send_initial_prompt() polls AGENT_CARD_WELL_KNOWN_PATH - # to know when the local A2A server is ready. If the SDK - # ever splits the constant value from the path that - # create_agent_card_routes() actually mounts at, every - # workspace silently drops its initial_prompt: - # - Probe gets 404 every attempt. - # - Falls through to 'server not ready after 30s, - # skipping' even though the server is fine. - # - The user hits a fresh chat with no kickoff context. - # This was the #2193 incident class — the v0.x → v1.x - # rename of /.well-known/agent.json → /.well-known/agent-card.json - # plus the constant itself moving to a2a.utils.constants. - # source-tree pytest (test_agent_card_well_known_path.py) - # catches main.py-side regressions; this catches the - # SDK-side ones BEFORE PyPI upload. - from a2a.utils.constants import AGENT_CARD_WELL_KNOWN_PATH - from a2a.server.routes import create_agent_card_routes - mounted_paths = [ - getattr(r, 'path', None) - for r in create_agent_card_routes( - AgentCard( - name='wk-smoke', - description='well-known mount alignment', - version='0.0.0-smoke', - ) - ) - ] - assert AGENT_CARD_WELL_KNOWN_PATH in mounted_paths, ( - f'AGENT_CARD_WELL_KNOWN_PATH ({AGENT_CARD_WELL_KNOWN_PATH!r}) ' - f'is NOT among paths mounted by create_agent_card_routes ' - f'({mounted_paths!r}). The SDK constant and its own route ' - f'factory have drifted — workspace probes will 404 forever, ' - f'silently dropping every workspace initial_prompt.' - ) - print(f'✓ well-known mount alignment OK ({AGENT_CARD_WELL_KNOWN_PATH})') - - # Message helper smoke. a2a-sdk renamed - # new_agent_text_message → new_text_message in the v1.x - # protobuf-flat migration (per the v0→v1 cheat sheet). main.py - # and a2a_executor.py call new_text_message in hot paths; if - # the import breaks, every reply errors with ImportError before - # the message even leaves the workspace. Importing here - # catches a future v2.x rename at publish time. - from a2a.helpers import new_text_message - msg = new_text_message('smoke') - assert msg is not None, 'new_text_message returned None' - print('✓ message helper import + call OK') - - print('✓ smoke import passed') - " + /tmp/smoke/bin/python "$GITHUB_WORKSPACE/scripts/wheel_smoke.py" - name: Publish to PyPI (Trusted Publisher / OIDC) # PyPI side is configured: project molecule-ai-workspace-runtime → diff --git a/.github/workflows/runtime-prbuild-compat.yml b/.github/workflows/runtime-prbuild-compat.yml index aad6e929..96f1a289 100644 --- a/.github/workflows/runtime-prbuild-compat.yml +++ b/.github/workflows/runtime-prbuild-compat.yml @@ -34,12 +34,14 @@ on: # changes (it controls the wheel layout). - 'workspace/**' - 'scripts/build_runtime_package.py' + - 'scripts/wheel_smoke.py' - '.github/workflows/runtime-prbuild-compat.yml' pull_request: branches: [main, staging] paths: - 'workspace/**' - 'scripts/build_runtime_package.py' + - 'scripts/wheel_smoke.py' - '.github/workflows/runtime-prbuild-compat.yml' workflow_dispatch: # Required-check support: when this becomes a branch-protection gate, @@ -94,7 +96,9 @@ jobs: /tmp/venv-built/bin/pip show molecule-ai-workspace-runtime a2a-sdk \ | grep -E '^(Name|Version):' - name: Smoke import the PR-built wheel - env: - WORKSPACE_ID: 00000000-0000-0000-0000-000000000001 + # Same script publish-runtime.yml runs against the to-be-PyPI wheel. + # Closes the PR-time vs publish-time gap: a PR adding a new SDK + # call-shape no longer passes here (narrow `import main_sync`) only + # to fail post-merge in publish-runtime's broader smoke. run: | - /tmp/venv-built/bin/python -c "from molecule_runtime.main import main_sync; print('PR-built runtime imports OK')" + /tmp/venv-built/bin/python "$GITHUB_WORKSPACE/scripts/wheel_smoke.py" diff --git a/scripts/wheel_smoke.py b/scripts/wheel_smoke.py new file mode 100644 index 00000000..32db3350 --- /dev/null +++ b/scripts/wheel_smoke.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +"""Smoke-test an installed molecule-ai-workspace-runtime wheel. + +Runs the same invariant assertions in two workflows: + * publish-runtime.yml — after building dist/*.whl, before PyPI upload + * runtime-prbuild-compat.yml — after building the PR's wheel, before merge + +Splitting the smoke across two inline heredocs let PR-time and publish-time +drift apart. After 2026-04 we kept hitting publish-time failures for +regressions a PR-time check could have caught. One script, both gates. + +Failure here intentionally exits non-zero so the workflow's `run:` step fails. +Each block prints a single ✓ line on success so the GH summary log stays +readable; assertion errors propagate with their own message. + +Run directly: `python scripts/wheel_smoke.py` after `pip install `. +""" + +import os +import sys + + +def smoke_imports_and_invariants() -> None: + """Module imports + stable contract assertions. + + Importing main_sync by name is the strongest pre-PyPI gate we have for + import-rewrite mistakes (the 0.1.16 incident, where main.py loaded but + main_sync was missing because the build script dropped a re-export). + """ + from molecule_runtime.main import main_sync # noqa: F401 + from molecule_runtime import a2a_client, a2a_tools # noqa: F401 + from molecule_runtime.builtin_tools import memory # noqa: F401 + from molecule_runtime.adapters import get_adapter, BaseAdapter, AdapterConfig + + assert a2a_client._A2A_ERROR_PREFIX, "a2a_client missing error sentinel" + assert callable(get_adapter), "adapters.get_adapter must be callable" + assert hasattr(BaseAdapter, "name"), "BaseAdapter interface broken" + assert hasattr(AdapterConfig, "__init__"), "AdapterConfig dataclass missing" + print("✓ module imports + invariants OK") + + +def smoke_agent_card_call_shape() -> None: + """Construct AgentCard with the EXACT kwargs main.py uses. + + Pure imports don't catch field-shape regressions in upstream SDKs that + only surface at construction time. Two bugs of this exact class shipped + since the a2a-sdk 1.0 migration: + - state_transition_history=True (#2179) + - supported_protocols=[...] (the protobuf field is supported_interfaces; + every workspace boot crashed with `ValueError: Protocol message + AgentCard has no "supported_protocols" field`) + + main.py and this block MUST stay in lockstep — adding a kwarg there + without mirroring it here is the regression vector. + """ + from a2a.types import AgentCard, AgentCapabilities, AgentSkill, AgentInterface + + AgentCard( + name="smoke-agent", + description="wheel-smoke: AgentCard call-shape", + version="0.0.0-smoke", + supported_interfaces=[ + AgentInterface(protocol_binding="https://a2a.g/v1", url="http://localhost:8080"), + ], + capabilities=AgentCapabilities( + streaming=True, + push_notifications=False, + ), + skills=[ + AgentSkill( + id="smoke-skill", + name="Smoke", + description="no-op", + tags=["smoke"], + examples=["noop"], + ), + ], + default_input_modes=["text/plain", "application/json"], + default_output_modes=["text/plain", "application/json"], + ) + print("✓ AgentCard call-shape smoke passed") + + +def smoke_well_known_path_alignment() -> None: + """The SDK's published constant must match the path it actually mounts. + + main.py polls AGENT_CARD_WELL_KNOWN_PATH to detect server readiness. If + the constant and create_agent_card_routes() drift, every workspace's + initial_prompt silently drops (probe 404s, falls through to "skipping"). + This was the #2193 incident class. + """ + from a2a.types import AgentCard + from a2a.utils.constants import AGENT_CARD_WELL_KNOWN_PATH + from a2a.server.routes import create_agent_card_routes + + mounted_paths = [ + getattr(r, "path", None) + for r in create_agent_card_routes( + AgentCard( + name="wk-smoke", + description="well-known mount alignment", + version="0.0.0-smoke", + ) + ) + ] + assert AGENT_CARD_WELL_KNOWN_PATH in mounted_paths, ( + f"AGENT_CARD_WELL_KNOWN_PATH ({AGENT_CARD_WELL_KNOWN_PATH!r}) is NOT among " + f"paths mounted by create_agent_card_routes ({mounted_paths!r}). The SDK " + "constant and its own route factory have drifted — workspace probes will " + "404 forever, silently dropping every workspace initial_prompt." + ) + print(f"✓ well-known mount alignment OK ({AGENT_CARD_WELL_KNOWN_PATH})") + + +def smoke_message_helper() -> None: + """new_text_message is the v1.x rename of new_agent_text_message. + + main.py and a2a_executor.py call new_text_message in hot paths; if the + import breaks, every reply errors with ImportError before the message + even leaves the workspace. Importing here catches a future v2.x rename + at publish time. + """ + from a2a.helpers import new_text_message + + msg = new_text_message("smoke") + assert msg is not None, "new_text_message returned None" + print("✓ message helper import + call OK") + + +def main() -> int: + # main.py validates WORKSPACE_ID at module-import time via platform_auth. + # Set placeholders so the smoke doesn't trip on the env-var guard. + os.environ.setdefault("WORKSPACE_ID", "00000000-0000-0000-0000-000000000000") + os.environ.setdefault("PLATFORM_URL", "http://localhost:8080") + + smoke_imports_and_invariants() + smoke_agent_card_call_shape() + smoke_well_known_path_alignment() + smoke_message_helper() + print("✓ wheel smoke passed") + return 0 + + +if __name__ == "__main__": + sys.exit(main())