molecule-core/.github/workflows/publish-runtime.yml

name: publish-runtime

# Publishes molecule-ai-workspace-runtime to PyPI from monorepo workspace/.
# Monorepo workspace/ is the only source-of-truth for runtime code; this
# workflow is the bridge from monorepo edits to the PyPI artifact that
# the 8 workspace-template-* repos depend on.
#
# Triggered by:
#   - Pushing a tag matching `runtime-vX.Y.Z` (the version is derived from
#     the tag — `runtime-v0.1.6` publishes `0.1.6`).
#   - Manual workflow_dispatch with an explicit `version` input (useful for
#     dev/test releases without tagging the repo).
#   - Auto: any push to `staging` that touches `workspace/**`. The version
#     is derived by querying PyPI for the current latest and bumping the
#     patch component. This closes the human-in-loop gap that caused the
#     2026-04-27 RuntimeCapabilities ImportError outage — adapter symbol
#     additions in workspace/adapters/base.py used to require an operator
#     to remember to publish; now the merge itself triggers the publish.
#
# The workflow:
#   1. Runs scripts/build_runtime_package.py to copy workspace/ →
#      build/molecule_runtime/ with imports rewritten (`a2a_client` →
#      `molecule_runtime.a2a_client`).
#   2. Builds wheel + sdist with `python -m build`.
#   3. Publishes to PyPI via the PyPA Trusted Publisher action (OIDC).
#      No static API token is stored — PyPI verifies the workflow's
#      OIDC claim against the trusted-publisher config registered for
#      molecule-ai-workspace-runtime (Molecule-AI/molecule-core,
#      publish-runtime.yml, environment pypi-publish).
#
# After publish: the 8 template repos pick up the new version on their
# next image rebuild (their requirements.txt pin
# `molecule-ai-workspace-runtime>=0.1.0`, so any new release is eligible).
# To force-pull immediately, bump the pin in each template repo's
# requirements.txt and merge — that triggers their own publish-image.yml.

on:
  push:
    tags:
      - "runtime-v*"
    branches:
      - staging
    paths:
      # Auto-publish when staging gets changes that affect what gets
      # published. Path filter ONLY applies to branch pushes — tag pushes
      # still fire regardless.
      #
      # workspace/** is the source-of-truth for runtime code.
      # scripts/build_runtime_package.py is the build script — changes to
      # it (e.g. a fix to the import rewriter or a manifest emit) directly
      # affect what ships in the wheel even if no workspace/ file changes.
      # The 2026-04-27 lib/ subpackage incident missed an auto-publish for
      # exactly this reason — PR #2174 only changed scripts/ and the
      # operator had to remember a manual dispatch.
      - "workspace/**"
      - "scripts/build_runtime_package.py"
  workflow_dispatch:
    inputs:
      version:
        description: "Version to publish (e.g. 0.1.6). Required for manual dispatch."
        required: true
        type: string

permissions:
  contents: read

# Serialize publishes so two staging merges landing seconds apart don't
# both compute "latest+1" and race on PyPI upload. The second one waits.
concurrency:
  group: publish-runtime
  cancel-in-progress: false

jobs:
  publish:
    runs-on: ubuntu-latest
    environment: pypi-publish
    permissions:
      contents: read
      id-token: write   # PyPI Trusted Publisher (OIDC) — no PYPI_TOKEN needed
    outputs:
      version: ${{ steps.version.outputs.version }}
      wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }}
    steps:
      - uses: actions/checkout@v4

      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
          cache: pip

      - name: Derive version (tag, manual input, or PyPI auto-bump)
        id: version
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            VERSION="${{ inputs.version }}"
          elif echo "$GITHUB_REF_NAME" | grep -q "^runtime-v"; then
            # Tag is `runtime-vX.Y.Z` — strip the prefix.
            VERSION="${GITHUB_REF_NAME#runtime-v}"
          else
            # Auto-publish from staging push. Query PyPI for the current
            # latest and bump the patch component. concurrency: group above
            # serializes parallel staging merges so we don't race on the
            # bump. If PyPI is unreachable, fail loud — better to skip a
            # publish than to overwrite an existing version.
            LATEST=$(curl -fsS --retry 3 https://pypi.org/pypi/molecule-ai-workspace-runtime/json \
              | python -c "import sys,json; print(json.load(sys.stdin)['info']['version'])")
            MAJOR=$(echo "$LATEST" | cut -d. -f1)
            MINOR=$(echo "$LATEST" | cut -d. -f2)
            PATCH=$(echo "$LATEST" | cut -d. -f3)
            VERSION="${MAJOR}.${MINOR}.$((PATCH+1))"
            echo "Auto-bumped from PyPI latest $LATEST -> $VERSION"
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(\.dev[0-9]+|rc[0-9]+|a[0-9]+|b[0-9]+|\.post[0-9]+)?$'; then
            echo "::error::version $VERSION does not match PEP 440"
            exit 1
          fi
          echo "version=$VERSION" >> "$GITHUB_OUTPUT"
          echo "Publishing molecule-ai-workspace-runtime $VERSION"

      - name: Install build tooling
        run: pip install build twine

      - name: Build package from workspace/
        run: |
          python scripts/build_runtime_package.py \
            --version "${{ steps.version.outputs.version }}" \
            --out "${{ runner.temp }}/runtime-build"

      - name: Build wheel + sdist
        working-directory: ${{ runner.temp }}/runtime-build
        run: python -m build

      - name: Capture wheel SHA256 for cascade content-verification
        # Recorded BEFORE upload so the cascade probe can verify the
        # bytes Fastly serves under the new version's URL match what
        # we built. Closes a hole left by #2197: that probe verified
        # pip can resolve the version (catches propagation lag) but
        # not that the wheel content matches (would silently pass a
        # Fastly stale-content scenario where the new version's URL
        # serves an old wheel binary).
        id: wheel_hash
        working-directory: ${{ runner.temp }}/runtime-build
        run: |
          set -eu
          WHEEL=$(ls dist/*.whl 2>/dev/null | head -1)
          if [ -z "$WHEEL" ]; then
            echo "::error::No .whl in dist/ — `python -m build` must have failed silently"
            exit 1
          fi
          HASH=$(sha256sum "$WHEEL" | awk '{print $1}')
          echo "wheel_sha256=${HASH}" >> "$GITHUB_OUTPUT"
          echo "Local wheel SHA256 (pre-upload): ${HASH}"
          echo "Wheel filename: $(basename "$WHEEL")"

      - name: Verify package contents (sanity)
        working-directory: ${{ runner.temp }}/runtime-build
        run: |
          python -m twine check dist/*
          # Smoke-import the built wheel to catch import-rewrite mistakes
          # before they hit PyPI. Asserts on STABLE INVARIANTS only —
          # symbols + classes that are part of the package's public
          # contract (BaseAdapter interface, the canonical a2a sentinel,
          # core submodules). Don't add feature-flag-style assertions
          # here — they fire false-positive every time staging is mid-
          # release of that feature.
          python -m venv /tmp/smoke
          /tmp/smoke/bin/pip install --quiet dist/*.whl
          WORKSPACE_ID=00000000-0000-0000-0000-000000000000 \
          PLATFORM_URL=http://localhost:8080 \
            /tmp/smoke/bin/python -c "
          # Importing main is the strongest smoke test we can do here:
          # main.py is the entry point and pulls every other module
          # transitively. If the build script missed an import rewrite
          # (e.g. left a bare \`from transcript_auth import ...\` instead
          # of \`from molecule_runtime.transcript_auth import ...\` — the
          # 0.1.16 incident), this fails with ModuleNotFoundError instead
          # of shipping to PyPI and breaking every workspace startup.
          # Import the entry-point target by NAME — not just the module.
          # The wheel's pyproject.toml declares
          # `molecule-runtime = molecule_runtime.main:main_sync` so if
          # main_sync goes missing (it did in 0.1.16-0.1.18), every
          # workspace startup fails with `ImportError: cannot import name
          # 'main_sync'`. Plain `import molecule_runtime.main` doesn't
          # catch that because the module loads fine.
          from molecule_runtime.main import main_sync  # noqa: F401
          from molecule_runtime import a2a_client, a2a_tools
          from molecule_runtime.builtin_tools import memory
          from molecule_runtime.adapters import get_adapter, BaseAdapter, AdapterConfig
          # Stable invariants: package exports + BaseAdapter shape.
          assert a2a_client._A2A_ERROR_PREFIX, 'a2a_client missing error sentinel'
          assert callable(get_adapter), 'adapters.get_adapter must be callable'
          assert hasattr(BaseAdapter, 'name'), 'BaseAdapter interface broken'
          assert hasattr(AdapterConfig, '__init__'), 'AdapterConfig dataclass missing'

          # Call-shape smoke for AgentCard. Pure imports don't catch
          # field-shape regressions in upstream SDKs that only surface
          # at construction time. Two bugs of this exact class shipped
          # since the a2a-sdk 1.0 migration:
          #   - state_transition_history=True (fixed in #2179)
          #   - supported_protocols=[...] (the protobuf field is
          #     supported_interfaces — caused every workspace boot
          #     to crash with `ValueError: Protocol message AgentCard
          #     has no "supported_protocols" field`; fixed alongside
          #     this smoke)
          #
          # This block instantiates the EXACT classes main.py uses,
          # with the EXACT keyword arguments. If a future a2a-sdk
          # upgrade renames any of supported_interfaces / streaming /
          # push_notifications / etc., the publish fails here instead
          # of breaking every workspace startup. main.py and this
          # smoke MUST stay in lockstep — adding a kwarg to one
          # without mirroring it here is the regression vector.
          from a2a.types import AgentCard, AgentCapabilities, AgentSkill, AgentInterface
          AgentCard(
              name='smoke-agent',
              description='publish-runtime smoke test',
              version='0.0.0-smoke',
              supported_interfaces=[
                  AgentInterface(protocol_binding='https://a2a.g/v1', url='http://localhost:8080'),
              ],
              capabilities=AgentCapabilities(
                  streaming=True,
                  push_notifications=False,
              ),
              skills=[
                  AgentSkill(
                      id='smoke-skill',
                      name='Smoke',
                      description='no-op',
                      tags=['smoke'],
                      examples=['noop'],
                  ),
              ],
              default_input_modes=['text/plain', 'application/json'],
              default_output_modes=['text/plain', 'application/json'],
          )
          print('✓ AgentCard call-shape smoke passed')

          # Well-known agent-card path probe alignment. main.py's
          # _send_initial_prompt() polls AGENT_CARD_WELL_KNOWN_PATH
          # to know when the local A2A server is ready. If the SDK
          # ever splits the constant value from the path that
          # create_agent_card_routes() actually mounts at, every
          # workspace silently drops its initial_prompt:
          #   - Probe gets 404 every attempt.
          #   - Falls through to 'server not ready after 30s,
          #     skipping' even though the server is fine.
          #   - The user hits a fresh chat with no kickoff context.
          # This was the #2193 incident class — the v0.x → v1.x
          # rename of /.well-known/agent.json → /.well-known/agent-card.json
          # plus the constant itself moving to a2a.utils.constants.
          # source-tree pytest (test_agent_card_well_known_path.py)
          # catches main.py-side regressions; this catches the
          # SDK-side ones BEFORE PyPI upload.
          from a2a.utils.constants import AGENT_CARD_WELL_KNOWN_PATH
          from a2a.server.routes import create_agent_card_routes
          mounted_paths = [
              getattr(r, 'path', None)
              for r in create_agent_card_routes(
                  AgentCard(
                      name='wk-smoke',
                      description='well-known mount alignment',
                      version='0.0.0-smoke',
                  )
              )
          ]
          assert AGENT_CARD_WELL_KNOWN_PATH in mounted_paths, (
              f'AGENT_CARD_WELL_KNOWN_PATH ({AGENT_CARD_WELL_KNOWN_PATH!r}) '
              f'is NOT among paths mounted by create_agent_card_routes '
              f'({mounted_paths!r}). The SDK constant and its own route '
              f'factory have drifted — workspace probes will 404 forever, '
              f'silently dropping every workspace initial_prompt.'
          )
          print(f'✓ well-known mount alignment OK ({AGENT_CARD_WELL_KNOWN_PATH})')

          # Message helper smoke. a2a-sdk renamed
          # new_agent_text_message → new_text_message in the v1.x
          # protobuf-flat migration (per the v0→v1 cheat sheet). main.py
          # and a2a_executor.py call new_text_message in hot paths; if
          # the import breaks, every reply errors with ImportError before
          # the message even leaves the workspace. Importing here
          # catches a future v2.x rename at publish time.
          from a2a.helpers import new_text_message
          msg = new_text_message('smoke')
          assert msg is not None, 'new_text_message returned None'
          print('✓ message helper import + call OK')

          print('✓ smoke import passed')
          "

      - name: Publish to PyPI (Trusted Publisher / OIDC)
        # PyPI side is configured: project molecule-ai-workspace-runtime →
        # publisher Molecule-AI/molecule-core, workflow publish-runtime.yml,
        # environment pypi-publish. The action mints a short-lived OIDC
        # token and exchanges it for a PyPI upload credential — no static
        # API token in this repo's secrets.
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          packages-dir: ${{ runner.temp }}/runtime-build/dist/

  cascade:
    # After PyPI accepts the upload, fan out a repository_dispatch to each
    # template repo so they rebuild their image against the new runtime.
    # Each template's `runtime-published.yml` receiver picks up the event,
    # pulls the new PyPI version (their requirements.txt pin is `>=`), and
    # republishes ghcr.io/molecule-ai/workspace-template-<runtime>:latest.
    #
    # Soft-fail per repo: if one template's dispatch fails (perms missing,
    # repo archived, etc.) we still try the others and surface the failures
    # in the workflow summary instead of aborting the whole cascade.
    needs: publish
    runs-on: ubuntu-latest
    steps:
      - name: Wait for PyPI to propagate the new version
        # PyPI accepts the upload, then takes a few seconds to make the
        # new version visible across all THREE surfaces pip touches:
        #   1. /pypi/<pkg>/<ver>/json — metadata endpoint
        #   2. /simple/<pkg>/         — pip's primary download index
        #   3. files.pythonhosted.org — CDN-fronted wheel binary
        # Each has its own cache. The previous check polled only (1)
        # and would let the cascade fire while (2) or (3) still served
        # the previous version, so downstream `pip install` resolved
        # to the old wheel. Docker layer cache then locked that stale
        # resolution in for subsequent rebuilds (the cache trap that
        # bit us five times in one night).
        #
        # Two-stage probe per poll:
        #   (a) `pip install --no-cache-dir PACKAGE==VERSION` — succeeds
        #       only when the version is resolvable. Catches surface (1)
        #       and (2) propagation lag.
        #   (b) `pip download` of the same wheel + SHA256 compare against
        #       the just-built dist's hash. Catches surface (3) lag AND
        #       Fastly serving stale content under the new version's URL
        #       (a separate Fastly-corruption mode that pip-install alone
        #       can't see, since pip install resolves+unpacks against
        #       whatever bytes Fastly returns and never inspects them).
        # Both must pass before the cascade fans out.
        #
        # The venv is reused across polls; only `pip install`/`pip
        # download` run in the loop, with --force-reinstall +
        # --no-cache-dir so the previous poll's cached state doesn't
        # mask propagation lag.
        env:
          RUNTIME_VERSION: ${{ needs.publish.outputs.version }}
          EXPECTED_SHA256: ${{ needs.publish.outputs.wheel_sha256 }}
        run: |
          set -eu
          if [ -z "$EXPECTED_SHA256" ]; then
            echo "::error::publish job did not expose wheel_sha256 — cannot verify wheel content. Refusing to fan out cascade."
            exit 1
          fi
          python -m venv /tmp/propagation-probe
          PROBE=/tmp/propagation-probe/bin
          $PROBE/pip install --upgrade --quiet pip
          # Poll budget: 30 attempts × (~3-5s pip install + ~3s pip
          # download + 4s sleep) ≈ 5-6 min wall on a slow GH runner.
          # Generous vs PyPI's typical few-seconds propagation;
          # failures past this are signal of a real PyPI / Fastly
          # issue, not just lag.
          for i in $(seq 1 30); do
            # Stage (a): can pip resolve and install the version?
            if $PROBE/pip install \
                  --quiet \
                  --no-cache-dir \
                  --force-reinstall \
                  --no-deps \
                  "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \
                  >/dev/null 2>&1; then
              INSTALLED=$($PROBE/pip show molecule-ai-workspace-runtime 2>/dev/null \
                          | awk -F': ' '/^Version:/{print $2}')
              if [ "$INSTALLED" = "$RUNTIME_VERSION" ]; then
                # Stage (b): does Fastly serve the bytes we uploaded?
                # `pip download` writes the actual .whl file to disk so
                # we can sha256sum it (vs `pip install` which unpacks
                # and discards).
                rm -rf /tmp/probe-dl
                mkdir -p /tmp/probe-dl
                if $PROBE/pip download \
                      --quiet \
                      --no-cache-dir \
                      --no-deps \
                      --dest /tmp/probe-dl \
                      "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \
                      >/dev/null 2>&1; then
                  WHEEL=$(ls /tmp/probe-dl/*.whl 2>/dev/null | head -1)
                  if [ -n "$WHEEL" ]; then
                    ACTUAL=$(sha256sum "$WHEEL" | awk '{print $1}')
                    if [ "$ACTUAL" = "$EXPECTED_SHA256" ]; then
                      echo "::notice::✓ pip resolves AND wheel content matches after ${i} poll(s) (sha256=${EXPECTED_SHA256})"
                      exit 0
                    fi
                    # Hash mismatch: PyPI accepted our upload but Fastly
                    # is serving different bytes under the version's URL.
                    # Most often this is propagation lag of the BINARY
                    # surface — the version is resolvable but the wheel
                    # cache hasn't caught up. Retry.
                    echo "::warning::poll ${i}: wheel content mismatch (got ${ACTUAL:0:12}…, want ${EXPECTED_SHA256:0:12}…) — Fastly likely still serving stale binary, retrying"
                  fi
                fi
              fi
            fi
            sleep 4
          done
          echo "::error::pip never resolved molecule-ai-workspace-runtime==${RUNTIME_VERSION} with matching wheel content within ~5 min."
          echo "::error::Expected wheel SHA256: ${EXPECTED_SHA256}"
          echo "::error::Refusing to fan out cascade against stale or corrupt PyPI surfaces."
          exit 1

      - name: Fan out repository_dispatch
        env:
          # Fine-grained PAT with `actions:write` on the 8 template repos.
          # GITHUB_TOKEN can't fire dispatches across repos — needs an explicit
          # token. Stored as a repo secret; rotate per the standard schedule.
          DISPATCH_TOKEN: ${{ secrets.TEMPLATE_DISPATCH_TOKEN }}
          # Single source of truth: the publish job's output, which handles
          # tag/manual-input/auto-bump uniformly. The previous fallback
          # (`steps.version.outputs.version` from inside the cascade job)
          # was a dead reference — different job, no shared step scope.
          RUNTIME_VERSION: ${{ needs.publish.outputs.version }}
        run: |
          set +e   # don't abort on a single repo failure — collect them all
          if [ -z "$DISPATCH_TOKEN" ]; then
            echo "::warning::TEMPLATE_DISPATCH_TOKEN secret not set — skipping cascade. PyPI was published; templates will pick up the new version on their own next rebuild."
            exit 0
          fi
          VERSION="$RUNTIME_VERSION"
          if [ -z "$VERSION" ]; then
            echo "::error::publish job did not expose a version output — cascade cannot fan out"
            exit 1
          fi
          TEMPLATES="claude-code langgraph crewai autogen deepagents hermes gemini-cli openclaw"
          FAILED=""
          for tpl in $TEMPLATES; do
            REPO="Molecule-AI/molecule-ai-workspace-template-$tpl"
            STATUS=$(curl -sS -o /tmp/dispatch.out -w "%{http_code}" \
              -X POST "https://api.github.com/repos/$REPO/dispatches" \
              -H "Authorization: Bearer $DISPATCH_TOKEN" \
              -H "Accept: application/vnd.github+json" \
              -H "X-GitHub-Api-Version: 2022-11-28" \
              -d "{\"event_type\":\"runtime-published\",\"client_payload\":{\"runtime_version\":\"$VERSION\"}}")
            if [ "$STATUS" = "204" ]; then
              echo "✓ dispatched $tpl ($VERSION)"
            else
              echo "::warning::✗ failed to dispatch $tpl: HTTP $STATUS — $(cat /tmp/dispatch.out)"
              FAILED="$FAILED $tpl"
            fi
          done
          if [ -n "$FAILED" ]; then
            echo "::warning::Cascade incomplete. Failed templates:$FAILED"
            # Don't fail the whole job — PyPI publish already succeeded;
            # operators can retry the failed templates manually.
          fi