name: publish-runtime # Publishes molecule-ai-workspace-runtime to PyPI from monorepo workspace/. # Monorepo workspace/ is the only source-of-truth for runtime code; this # workflow is the bridge from monorepo edits to the PyPI artifact that # the 8 workspace-template-* repos depend on. # # Triggered by: # - Pushing a tag matching `runtime-vX.Y.Z` (the version is derived from # the tag — `runtime-v0.1.6` publishes `0.1.6`). # - Manual workflow_dispatch with an explicit `version` input (useful for # dev/test releases without tagging the repo). # - Auto: any push to `staging` that touches `workspace/**`. The version # is derived by querying PyPI for the current latest and bumping the # patch component. This closes the human-in-loop gap that caused the # 2026-04-27 RuntimeCapabilities ImportError outage — adapter symbol # additions in workspace/adapters/base.py used to require an operator # to remember to publish; now the merge itself triggers the publish. # # The workflow: # 1. Runs scripts/build_runtime_package.py to copy workspace/ → # build/molecule_runtime/ with imports rewritten (`a2a_client` → # `molecule_runtime.a2a_client`). # 2. Builds wheel + sdist with `python -m build`. # 3. Publishes to PyPI via the PyPA Trusted Publisher action (OIDC). # No static API token is stored — PyPI verifies the workflow's # OIDC claim against the trusted-publisher config registered for # molecule-ai-workspace-runtime (molecule-ai/molecule-core, # publish-runtime.yml, environment pypi-publish). # # After publish: the 8 template repos pick up the new version on their # next image rebuild (their requirements.txt pin # `molecule-ai-workspace-runtime>=0.1.0`, so any new release is eligible). # To force-pull immediately, bump the pin in each template repo's # requirements.txt and merge — that triggers their own publish-image.yml. on: push: tags: - "runtime-v*" branches: - staging paths: # Auto-publish when staging gets changes that affect what gets # published. Path filter ONLY applies to branch pushes — tag pushes # still fire regardless. # # workspace/** is the source-of-truth for runtime code. # scripts/build_runtime_package.py is the build script — changes to # it (e.g. a fix to the import rewriter or a manifest emit) directly # affect what ships in the wheel even if no workspace/ file changes. # The 2026-04-27 lib/ subpackage incident missed an auto-publish for # exactly this reason — PR #2174 only changed scripts/ and the # operator had to remember a manual dispatch. - "workspace/**" - "scripts/build_runtime_package.py" workflow_dispatch: inputs: version: description: "Version to publish (e.g. 0.1.6). Required for manual dispatch." required: true type: string permissions: contents: read # Serialize publishes so two staging merges landing seconds apart don't # both compute "latest+1" and race on PyPI upload. The second one waits. concurrency: group: publish-runtime cancel-in-progress: false jobs: publish: runs-on: ubuntu-latest environment: pypi-publish permissions: contents: read id-token: write # PyPI Trusted Publisher (OIDC) — no PYPI_TOKEN needed outputs: version: ${{ steps.version.outputs.version }} wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.11" cache: pip - name: Derive version (tag, manual input, or PyPI auto-bump) id: version run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ inputs.version }}" elif echo "$GITHUB_REF_NAME" | grep -q "^runtime-v"; then # Tag is `runtime-vX.Y.Z` — strip the prefix. VERSION="${GITHUB_REF_NAME#runtime-v}" else # Auto-publish from staging push. Query PyPI for the current # latest and bump the patch component. concurrency: group above # serializes parallel staging merges so we don't race on the # bump. If PyPI is unreachable, fail loud — better to skip a # publish than to overwrite an existing version. LATEST=$(curl -fsS --retry 3 https://pypi.org/pypi/molecule-ai-workspace-runtime/json \ | python -c "import sys,json; print(json.load(sys.stdin)['info']['version'])") MAJOR=$(echo "$LATEST" | cut -d. -f1) MINOR=$(echo "$LATEST" | cut -d. -f2) PATCH=$(echo "$LATEST" | cut -d. -f3) VERSION="${MAJOR}.${MINOR}.$((PATCH+1))" echo "Auto-bumped from PyPI latest $LATEST -> $VERSION" fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(\.dev[0-9]+|rc[0-9]+|a[0-9]+|b[0-9]+|\.post[0-9]+)?$'; then echo "::error::version $VERSION does not match PEP 440" exit 1 fi echo "version=$VERSION" >> "$GITHUB_OUTPUT" echo "Publishing molecule-ai-workspace-runtime $VERSION" - name: Install build tooling run: pip install build twine - name: Build package from workspace/ run: | python scripts/build_runtime_package.py \ --version "${{ steps.version.outputs.version }}" \ --out "${{ runner.temp }}/runtime-build" - name: Build wheel + sdist working-directory: ${{ runner.temp }}/runtime-build run: python -m build - name: Capture wheel SHA256 for cascade content-verification # Recorded BEFORE upload so the cascade probe can verify the # bytes Fastly serves under the new version's URL match what # we built. Closes a hole left by #2197: that probe verified # pip can resolve the version (catches propagation lag) but # not that the wheel content matches (would silently pass a # Fastly stale-content scenario where the new version's URL # serves an old wheel binary). id: wheel_hash working-directory: ${{ runner.temp }}/runtime-build run: | set -eu WHEEL=$(ls dist/*.whl 2>/dev/null | head -1) if [ -z "$WHEEL" ]; then echo "::error::No .whl in dist/ — `python -m build` must have failed silently" exit 1 fi HASH=$(sha256sum "$WHEEL" | awk '{print $1}') echo "wheel_sha256=${HASH}" >> "$GITHUB_OUTPUT" echo "Local wheel SHA256 (pre-upload): ${HASH}" echo "Wheel filename: $(basename "$WHEEL")" - name: Verify package contents (sanity) working-directory: ${{ runner.temp }}/runtime-build # Smoke logic lives in scripts/wheel_smoke.py so the same gate runs # at both PR-time (runtime-prbuild-compat.yml) and publish-time # (here). Splitting the smoke across two heredocs let them drift # apart historically — one script keeps them locked. run: | python -m twine check dist/* python -m venv /tmp/smoke /tmp/smoke/bin/pip install --quiet dist/*.whl /tmp/smoke/bin/python "$GITHUB_WORKSPACE/scripts/wheel_smoke.py" - name: Publish to PyPI (Trusted Publisher / OIDC) # PyPI side is configured: project molecule-ai-workspace-runtime → # publisher molecule-ai/molecule-core, workflow publish-runtime.yml, # environment pypi-publish. The action mints a short-lived OIDC # token and exchanges it for a PyPI upload credential — no static # API token in this repo's secrets. uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: ${{ runner.temp }}/runtime-build/dist/ cascade: # After PyPI accepts the upload, fan out a repository_dispatch to each # template repo so they rebuild their image against the new runtime. # Each template's `runtime-published.yml` receiver picks up the event, # pulls the new PyPI version (their requirements.txt pin is `>=`), and # republishes ghcr.io/molecule-ai/workspace-template-:latest. # # Soft-fail per repo: if one template's dispatch fails (perms missing, # repo archived, etc.) we still try the others and surface the failures # in the workflow summary instead of aborting the whole cascade. needs: publish runs-on: ubuntu-latest steps: - name: Wait for PyPI to propagate the new version # PyPI accepts the upload, then takes a few seconds to make the # new version visible across all THREE surfaces pip touches: # 1. /pypi///json — metadata endpoint # 2. /simple// — pip's primary download index # 3. files.pythonhosted.org — CDN-fronted wheel binary # Each has its own cache. The previous check polled only (1) # and would let the cascade fire while (2) or (3) still served # the previous version, so downstream `pip install` resolved # to the old wheel. Docker layer cache then locked that stale # resolution in for subsequent rebuilds (the cache trap that # bit us five times in one night). # # Two-stage probe per poll: # (a) `pip install --no-cache-dir PACKAGE==VERSION` — succeeds # only when the version is resolvable. Catches surface (1) # and (2) propagation lag. # (b) `pip download` of the same wheel + SHA256 compare against # the just-built dist's hash. Catches surface (3) lag AND # Fastly serving stale content under the new version's URL # (a separate Fastly-corruption mode that pip-install alone # can't see, since pip install resolves+unpacks against # whatever bytes Fastly returns and never inspects them). # Both must pass before the cascade fans out. # # The venv is reused across polls; only `pip install`/`pip # download` run in the loop, with --force-reinstall + # --no-cache-dir so the previous poll's cached state doesn't # mask propagation lag. env: RUNTIME_VERSION: ${{ needs.publish.outputs.version }} EXPECTED_SHA256: ${{ needs.publish.outputs.wheel_sha256 }} run: | set -eu if [ -z "$EXPECTED_SHA256" ]; then echo "::error::publish job did not expose wheel_sha256 — cannot verify wheel content. Refusing to fan out cascade." exit 1 fi python -m venv /tmp/propagation-probe PROBE=/tmp/propagation-probe/bin $PROBE/pip install --upgrade --quiet pip # Poll budget: 30 attempts × (~3-5s pip install + ~3s pip # download + 4s sleep) ≈ 5-6 min wall on a slow GH runner. # Generous vs PyPI's typical few-seconds propagation; # failures past this are signal of a real PyPI / Fastly # issue, not just lag. for i in $(seq 1 30); do # Stage (a): can pip resolve and install the version? if $PROBE/pip install \ --quiet \ --no-cache-dir \ --force-reinstall \ --no-deps \ "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \ >/dev/null 2>&1; then INSTALLED=$($PROBE/pip show molecule-ai-workspace-runtime 2>/dev/null \ | awk -F': ' '/^Version:/{print $2}') if [ "$INSTALLED" = "$RUNTIME_VERSION" ]; then # Stage (b): does Fastly serve the bytes we uploaded? # `pip download` writes the actual .whl file to disk so # we can sha256sum it (vs `pip install` which unpacks # and discards). rm -rf /tmp/probe-dl mkdir -p /tmp/probe-dl if $PROBE/pip download \ --quiet \ --no-cache-dir \ --no-deps \ --dest /tmp/probe-dl \ "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \ >/dev/null 2>&1; then WHEEL=$(ls /tmp/probe-dl/*.whl 2>/dev/null | head -1) if [ -n "$WHEEL" ]; then ACTUAL=$(sha256sum "$WHEEL" | awk '{print $1}') if [ "$ACTUAL" = "$EXPECTED_SHA256" ]; then echo "::notice::✓ pip resolves AND wheel content matches after ${i} poll(s) (sha256=${EXPECTED_SHA256})" exit 0 fi # Hash mismatch: PyPI accepted our upload but Fastly # is serving different bytes under the version's URL. # Most often this is propagation lag of the BINARY # surface — the version is resolvable but the wheel # cache hasn't caught up. Retry. echo "::warning::poll ${i}: wheel content mismatch (got ${ACTUAL:0:12}…, want ${EXPECTED_SHA256:0:12}…) — Fastly likely still serving stale binary, retrying" fi fi fi fi sleep 4 done echo "::error::pip never resolved molecule-ai-workspace-runtime==${RUNTIME_VERSION} with matching wheel content within ~5 min." echo "::error::Expected wheel SHA256: ${EXPECTED_SHA256}" echo "::error::Refusing to fan out cascade against stale or corrupt PyPI surfaces." exit 1 - name: Fan out repository_dispatch env: # Fine-grained PAT with `actions:write` on the 8 template repos. # GITHUB_TOKEN can't fire dispatches across repos — needs an explicit # token. Stored as a repo secret; rotate per the standard schedule. DISPATCH_TOKEN: ${{ secrets.TEMPLATE_DISPATCH_TOKEN }} # Single source of truth: the publish job's output, which handles # tag/manual-input/auto-bump uniformly. The previous fallback # (`steps.version.outputs.version` from inside the cascade job) # was a dead reference — different job, no shared step scope. RUNTIME_VERSION: ${{ needs.publish.outputs.version }} run: | set +e # don't abort on a single repo failure — collect them all # Schedule-vs-dispatch behaviour split (hardened 2026-04-28 # after the sweep-cf-orphans soft-skip incident — same class # of bug): # # The earlier "skipping cascade. templates will pick up the # new version on their own next rebuild" message was wrong — # templates only build on this dispatch trigger; without it # they stay pinned to whatever runtime version they last saw. # A silent skip here means "PyPI is current, templates are # not" and the gap is invisible until someone notices a # template still on the old version weeks later. # # - push → exit 1 (red CI surfaces the gap) # - workflow_dispatch → exit 0 with a warning (operator # ran this ad-hoc; let them rerun # after fixing the secret) if [ -z "$DISPATCH_TOKEN" ]; then if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then echo "::warning::TEMPLATE_DISPATCH_TOKEN secret not set — skipping cascade." echo "::warning::set it at Settings → Secrets and Variables → Actions, then rerun. Templates will stay on the prior runtime version until either this token is set or each template is rebuilt manually." exit 0 fi echo "::error::TEMPLATE_DISPATCH_TOKEN secret missing — cascade cannot fan out." echo "::error::PyPI was published, but the 8 template repos will NOT pick up the new version until this token is restored and a republish dispatches the cascade." echo "::error::set it at Settings → Secrets and Variables → Actions; then re-trigger publish-runtime via workflow_dispatch." exit 1 fi VERSION="$RUNTIME_VERSION" if [ -z "$VERSION" ]; then echo "::error::publish job did not expose a version output — cascade cannot fan out" exit 1 fi # All 9 active workspace template repos. The PR #2536 pruning # ("deprecated, no shipping images") was empirically wrong: # continuous-synth-e2e.yml defaults to langgraph as its primary # canary (line 44), and every excluded template had successful # publish-image runs as of 2026-05-03 — none were dormant. # Symptom of the prune: today's a2a-sdk strict-mode fix # (#2566 / commit e1628c4) cascaded to 4 templates but never # reached langgraph, so the synth-E2E correctly canary'd a fix # that had landed but not deployed. Re-added the 5 templates. # Long-term: derive this list from manifest.json so cascade # scope can't drift from E2E scope — tracked in RFC #388 as a # Phase-1 invariant. TEMPLATES="claude-code hermes openclaw codex langgraph crewai autogen deepagents gemini-cli" FAILED="" for tpl in $TEMPLATES; do REPO="molecule-ai/molecule-ai-workspace-template-$tpl" STATUS=$(curl -sS -o /tmp/dispatch.out -w "%{http_code}" \ -X POST "https://api.github.com/repos/$REPO/dispatches" \ -H "Authorization: Bearer $DISPATCH_TOKEN" \ -H "Accept: application/vnd.github+json" \ -H "X-GitHub-Api-Version: 2022-11-28" \ -d "{\"event_type\":\"runtime-published\",\"client_payload\":{\"runtime_version\":\"$VERSION\"}}") if [ "$STATUS" = "204" ]; then echo "✓ dispatched $tpl ($VERSION)" else echo "::warning::✗ failed to dispatch $tpl: HTTP $STATUS — $(cat /tmp/dispatch.out)" FAILED="$FAILED $tpl" fi done if [ -n "$FAILED" ]; then echo "::warning::Cascade incomplete. Failed templates:$FAILED" # Don't fail the whole job — PyPI publish already succeeded; # operators can retry the failed templates manually. fi