name: publish-runtime # Publishes molecule-ai-workspace-runtime to PyPI from monorepo workspace/. # Monorepo workspace/ is the only source-of-truth for runtime code; this # workflow is the bridge from monorepo edits to the PyPI artifact that # the 8 workspace-template-* repos depend on. # # Triggered by: # - Pushing a tag matching `runtime-vX.Y.Z` (the version is derived from # the tag — `runtime-v0.1.6` publishes `0.1.6`). # - Manual workflow_dispatch with an explicit `version` input (useful for # dev/test releases without tagging the repo). # - Auto: any push to `staging` that touches `workspace/**`. The version # is derived by querying PyPI for the current latest and bumping the # patch component. This closes the human-in-loop gap that caused the # 2026-04-27 RuntimeCapabilities ImportError outage — adapter symbol # additions in workspace/adapters/base.py used to require an operator # to remember to publish; now the merge itself triggers the publish. # # The workflow: # 1. Runs scripts/build_runtime_package.py to copy workspace/ → # build/molecule_runtime/ with imports rewritten (`a2a_client` → # `molecule_runtime.a2a_client`). # 2. Builds wheel + sdist with `python -m build`. # 3. Publishes to PyPI via the PyPA Trusted Publisher action (OIDC). # No static API token is stored — PyPI verifies the workflow's # OIDC claim against the trusted-publisher config registered for # molecule-ai-workspace-runtime (molecule-ai/molecule-core, # publish-runtime.yml, environment pypi-publish). # # After publish: the 8 template repos pick up the new version on their # next image rebuild (their requirements.txt pin # `molecule-ai-workspace-runtime>=0.1.0`, so any new release is eligible). # To force-pull immediately, bump the pin in each template repo's # requirements.txt and merge — that triggers their own publish-image.yml. on: push: tags: - "runtime-v*" branches: - staging paths: # Auto-publish when staging gets changes that affect what gets # published. Path filter ONLY applies to branch pushes — tag pushes # still fire regardless. # # workspace/** is the source-of-truth for runtime code. # scripts/build_runtime_package.py is the build script — changes to # it (e.g. a fix to the import rewriter or a manifest emit) directly # affect what ships in the wheel even if no workspace/ file changes. # The 2026-04-27 lib/ subpackage incident missed an auto-publish for # exactly this reason — PR #2174 only changed scripts/ and the # operator had to remember a manual dispatch. - "workspace/**" - "scripts/build_runtime_package.py" workflow_dispatch: inputs: version: description: "Version to publish (e.g. 0.1.6). Required for manual dispatch." required: true type: string permissions: contents: read # Serialize publishes so two staging merges landing seconds apart don't # both compute "latest+1" and race on PyPI upload. The second one waits. concurrency: group: publish-runtime cancel-in-progress: false jobs: publish: runs-on: ubuntu-latest environment: pypi-publish permissions: contents: read id-token: write # PyPI Trusted Publisher (OIDC) — no PYPI_TOKEN needed outputs: version: ${{ steps.version.outputs.version }} wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.11" cache: pip - name: Derive version (tag, manual input, or PyPI auto-bump) id: version run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ inputs.version }}" elif echo "$GITHUB_REF_NAME" | grep -q "^runtime-v"; then # Tag is `runtime-vX.Y.Z` — strip the prefix. VERSION="${GITHUB_REF_NAME#runtime-v}" else # Auto-publish from staging push. Query PyPI for the current # latest and bump the patch component. concurrency: group above # serializes parallel staging merges so we don't race on the # bump. If PyPI is unreachable, fail loud — better to skip a # publish than to overwrite an existing version. LATEST=$(curl -fsS --retry 3 https://pypi.org/pypi/molecule-ai-workspace-runtime/json \ | python -c "import sys,json; print(json.load(sys.stdin)['info']['version'])") MAJOR=$(echo "$LATEST" | cut -d. -f1) MINOR=$(echo "$LATEST" | cut -d. -f2) PATCH=$(echo "$LATEST" | cut -d. -f3) VERSION="${MAJOR}.${MINOR}.$((PATCH+1))" echo "Auto-bumped from PyPI latest $LATEST -> $VERSION" fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(\.dev[0-9]+|rc[0-9]+|a[0-9]+|b[0-9]+|\.post[0-9]+)?$'; then echo "::error::version $VERSION does not match PEP 440" exit 1 fi echo "version=$VERSION" >> "$GITHUB_OUTPUT" echo "Publishing molecule-ai-workspace-runtime $VERSION" - name: Install build tooling run: pip install build twine - name: Build package from workspace/ run: | python scripts/build_runtime_package.py \ --version "${{ steps.version.outputs.version }}" \ --out "${{ runner.temp }}/runtime-build" - name: Build wheel + sdist working-directory: ${{ runner.temp }}/runtime-build run: python -m build - name: Capture wheel SHA256 for cascade content-verification # Recorded BEFORE upload so the cascade probe can verify the # bytes Fastly serves under the new version's URL match what # we built. Closes a hole left by #2197: that probe verified # pip can resolve the version (catches propagation lag) but # not that the wheel content matches (would silently pass a # Fastly stale-content scenario where the new version's URL # serves an old wheel binary). id: wheel_hash working-directory: ${{ runner.temp }}/runtime-build run: | set -eu WHEEL=$(ls dist/*.whl 2>/dev/null | head -1) if [ -z "$WHEEL" ]; then echo "::error::No .whl in dist/ — `python -m build` must have failed silently" exit 1 fi HASH=$(sha256sum "$WHEEL" | awk '{print $1}') echo "wheel_sha256=${HASH}" >> "$GITHUB_OUTPUT" echo "Local wheel SHA256 (pre-upload): ${HASH}" echo "Wheel filename: $(basename "$WHEEL")" - name: Verify package contents (sanity) working-directory: ${{ runner.temp }}/runtime-build # Smoke logic lives in scripts/wheel_smoke.py so the same gate runs # at both PR-time (runtime-prbuild-compat.yml) and publish-time # (here). Splitting the smoke across two heredocs let them drift # apart historically — one script keeps them locked. run: | python -m twine check dist/* python -m venv /tmp/smoke /tmp/smoke/bin/pip install --quiet dist/*.whl /tmp/smoke/bin/python "$GITHUB_WORKSPACE/scripts/wheel_smoke.py" - name: Publish to PyPI (Trusted Publisher / OIDC) # PyPI side is configured: project molecule-ai-workspace-runtime → # publisher molecule-ai/molecule-core, workflow publish-runtime.yml, # environment pypi-publish. The action mints a short-lived OIDC # token and exchanges it for a PyPI upload credential — no static # API token in this repo's secrets. uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: ${{ runner.temp }}/runtime-build/dist/ cascade: # After PyPI accepts the upload, fan out a repository_dispatch to each # template repo so they rebuild their image against the new runtime. # Each template's `runtime-published.yml` receiver picks up the event, # pulls the new PyPI version (their requirements.txt pin is `>=`), and # republishes ghcr.io/molecule-ai/workspace-template-:latest. # # Soft-fail per repo: if one template's dispatch fails (perms missing, # repo archived, etc.) we still try the others and surface the failures # in the workflow summary instead of aborting the whole cascade. needs: publish runs-on: ubuntu-latest steps: - name: Wait for PyPI to propagate the new version # PyPI accepts the upload, then takes a few seconds to make the # new version visible across all THREE surfaces pip touches: # 1. /pypi///json — metadata endpoint # 2. /simple// — pip's primary download index # 3. files.pythonhosted.org — CDN-fronted wheel binary # Each has its own cache. The previous check polled only (1) # and would let the cascade fire while (2) or (3) still served # the previous version, so downstream `pip install` resolved # to the old wheel. Docker layer cache then locked that stale # resolution in for subsequent rebuilds (the cache trap that # bit us five times in one night). # # Two-stage probe per poll: # (a) `pip install --no-cache-dir PACKAGE==VERSION` — succeeds # only when the version is resolvable. Catches surface (1) # and (2) propagation lag. # (b) `pip download` of the same wheel + SHA256 compare against # the just-built dist's hash. Catches surface (3) lag AND # Fastly serving stale content under the new version's URL # (a separate Fastly-corruption mode that pip-install alone # can't see, since pip install resolves+unpacks against # whatever bytes Fastly returns and never inspects them). # Both must pass before the cascade fans out. # # The venv is reused across polls; only `pip install`/`pip # download` run in the loop, with --force-reinstall + # --no-cache-dir so the previous poll's cached state doesn't # mask propagation lag. env: RUNTIME_VERSION: ${{ needs.publish.outputs.version }} EXPECTED_SHA256: ${{ needs.publish.outputs.wheel_sha256 }} run: | set -eu if [ -z "$EXPECTED_SHA256" ]; then echo "::error::publish job did not expose wheel_sha256 — cannot verify wheel content. Refusing to fan out cascade." exit 1 fi python -m venv /tmp/propagation-probe PROBE=/tmp/propagation-probe/bin $PROBE/pip install --upgrade --quiet pip # Poll budget: 30 attempts × (~3-5s pip install + ~3s pip # download + 4s sleep) ≈ 5-6 min wall on a slow GH runner. # Generous vs PyPI's typical few-seconds propagation; # failures past this are signal of a real PyPI / Fastly # issue, not just lag. for i in $(seq 1 30); do # Stage (a): can pip resolve and install the version? if $PROBE/pip install \ --quiet \ --no-cache-dir \ --force-reinstall \ --no-deps \ "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \ >/dev/null 2>&1; then INSTALLED=$($PROBE/pip show molecule-ai-workspace-runtime 2>/dev/null \ | awk -F': ' '/^Version:/{print $2}') if [ "$INSTALLED" = "$RUNTIME_VERSION" ]; then # Stage (b): does Fastly serve the bytes we uploaded? # `pip download` writes the actual .whl file to disk so # we can sha256sum it (vs `pip install` which unpacks # and discards). rm -rf /tmp/probe-dl mkdir -p /tmp/probe-dl if $PROBE/pip download \ --quiet \ --no-cache-dir \ --no-deps \ --dest /tmp/probe-dl \ "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \ >/dev/null 2>&1; then WHEEL=$(ls /tmp/probe-dl/*.whl 2>/dev/null | head -1) if [ -n "$WHEEL" ]; then ACTUAL=$(sha256sum "$WHEEL" | awk '{print $1}') if [ "$ACTUAL" = "$EXPECTED_SHA256" ]; then echo "::notice::✓ pip resolves AND wheel content matches after ${i} poll(s) (sha256=${EXPECTED_SHA256})" exit 0 fi # Hash mismatch: PyPI accepted our upload but Fastly # is serving different bytes under the version's URL. # Most often this is propagation lag of the BINARY # surface — the version is resolvable but the wheel # cache hasn't caught up. Retry. echo "::warning::poll ${i}: wheel content mismatch (got ${ACTUAL:0:12}…, want ${EXPECTED_SHA256:0:12}…) — Fastly likely still serving stale binary, retrying" fi fi fi fi sleep 4 done echo "::error::pip never resolved molecule-ai-workspace-runtime==${RUNTIME_VERSION} with matching wheel content within ~5 min." echo "::error::Expected wheel SHA256: ${EXPECTED_SHA256}" echo "::error::Refusing to fan out cascade against stale or corrupt PyPI surfaces." exit 1 - name: Fan out via push to .runtime-version env: # Gitea PAT with write:repository scope on the 8 cascade-active # template repos. Used here for `git push` (NOT for an API # dispatch — Gitea 1.22.6 has no repository_dispatch endpoint; # empirically verified across 6 candidate paths in molecule- # core#20 issuecomment-913). The push trips each template's # existing `on: push: branches: [main]` trigger on # publish-image.yml, which then reads the updated # .runtime-version via its resolve-version job. DISPATCH_TOKEN: ${{ secrets.DISPATCH_TOKEN }} RUNTIME_VERSION: ${{ needs.publish.outputs.version }} run: | set +e # don't abort on a single repo failure — collect them all # Soft-skip on workflow_dispatch when the token is missing # (operator ad-hoc test); hard-fail on push so unattended # publishes can't silently skip the cascade. Same shape as # the original v1, intentional split per the schedule-vs- # dispatch hardening 2026-04-28. if [ -z "$DISPATCH_TOKEN" ]; then if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then echo "::warning::DISPATCH_TOKEN secret not set — skipping cascade." echo "::warning::set it at Settings → Secrets and Variables → Actions, then rerun. Templates will stay on the prior runtime version until either this token is set or each template is rebuilt manually." exit 0 fi echo "::error::DISPATCH_TOKEN secret missing — cascade cannot fan out." echo "::error::PyPI was published, but the 8 template repos will NOT pick up the new version until this token is restored and a republish dispatches the cascade." echo "::error::set it at Settings → Secrets and Variables → Actions; then re-trigger publish-runtime via workflow_dispatch." exit 1 fi VERSION="$RUNTIME_VERSION" if [ -z "$VERSION" ]; then echo "::error::publish job did not expose a version output — cascade cannot fan out" exit 1 fi # All 9 workspace templates declared in manifest.json. The list # MUST stay aligned with manifest.json's workspace_templates — # cascade-list-drift-gate.yml enforces this in CI per the # codex-stuck-on-stale-runtime invariant from PR #2556. # Long-term goal: derive this list from manifest.json so it # can't drift even on a manifest edit (RFC #388 Phase-1). # # Per-template publish-image.yml presence is checked at # cascade-time below: codex doesn't ship one today, so the # cascade soft-skips it with an informational message rather # than dropping it from this list (which would re-introduce # the drift the gate exists to catch). GITEA_URL="${GITEA_URL:-https://git.moleculesai.app}" TEMPLATES="claude-code hermes openclaw codex langgraph crewai autogen deepagents gemini-cli" FAILED="" SKIPPED="" # Configure git identity once. The persona owning DISPATCH_TOKEN # is the same identity that authored this commit on each # template; using a generic "publish-runtime cascade" co-author # trailer in the message keeps the audit trail honest about the # workflow-driven origin. git config --global user.name "publish-runtime cascade" git config --global user.email "publish-runtime@moleculesai.app" WORKDIR="$(mktemp -d)" for tpl in $TEMPLATES; do REPO="molecule-ai/molecule-ai-workspace-template-$tpl" CLONE="$WORKDIR/$tpl" # Pre-check: skip templates without a publish-image.yml. # The cascade's job is to trip the template's on-push # rebuild — if there's no rebuild workflow, pushing a # .runtime-version commit is just noise on the target # repo. Use the Gitea contents API (no clone required for # the probe). 200 = present; 404 = absent. HTTP=$(curl -sS -o /dev/null -w "%{http_code}" \ -H "Authorization: token $DISPATCH_TOKEN" \ "$GITEA_URL/api/v1/repos/$REPO/contents/.github/workflows/publish-image.yml") if [ "$HTTP" = "404" ]; then echo "↷ $tpl has no publish-image.yml — soft-skip (informational; manifest still tracks it)" SKIPPED="$SKIPPED $tpl" continue fi if [ "$HTTP" != "200" ]; then echo "::warning::$tpl publish-image.yml probe returned HTTP $HTTP — proceeding anyway, push will surface the real failure if any" fi # Use a per-template attempt loop so a transient race (e.g. # human pushing to the same template at the same instant) # doesn't lose the cascade. Bounded retries (3) — beyond # that we surface the failure and let the operator retry. attempt=0 success=false while [ $attempt -lt 3 ]; do attempt=$((attempt + 1)) rm -rf "$CLONE" if ! git clone --depth=1 \ "https://x-access-token:${DISPATCH_TOKEN}@${GITEA_URL#https://}/$REPO.git" \ "$CLONE" >/tmp/clone.log 2>&1; then echo "::warning::clone $tpl attempt $attempt failed: $(tail -n3 /tmp/clone.log)" sleep 2 continue fi cd "$CLONE" echo "$VERSION" > .runtime-version # Idempotency guard: if the file already matches, this # publish is a re-run for a version already cascaded. # Don't push a no-op commit (would spuriously re-trip the # template's on-push and rebuild for nothing). if git diff --quiet -- .runtime-version; then echo "✓ $tpl already at $VERSION — no commit needed (idempotent)" success=true cd - >/dev/null break fi git add .runtime-version git commit -m "chore: pin runtime to $VERSION (publish-runtime cascade)" \ -m "Co-Authored-By: publish-runtime cascade " \ >/dev/null if git push origin HEAD:main >/tmp/push.log 2>&1; then echo "✓ $tpl pushed $VERSION on attempt $attempt" success=true cd - >/dev/null break fi # Likely a non-fast-forward — pull-rebase and retry. # Don't force-push: that would silently overwrite a racing # human/cascade commit. echo "::warning::push $tpl attempt $attempt failed, pull-rebasing: $(tail -n3 /tmp/push.log)" git pull --rebase origin main >/tmp/rebase.log 2>&1 || true cd - >/dev/null done if [ "$success" != "true" ]; then FAILED="$FAILED $tpl" fi done rm -rf "$WORKDIR" if [ -n "$FAILED" ]; then echo "::error::Cascade incomplete after 3 retries each. Failed templates:$FAILED" echo "::error::PyPI publish succeeded; failed templates lag the new version. Re-run this workflow_dispatch with the same version to retry only the laggers (idempotent — already-cascaded templates skip)." exit 1 fi if [ -n "$SKIPPED" ]; then echo "Cascade complete: pinned $VERSION on cascade-active templates. Soft-skipped (no publish-image.yml):$SKIPPED" else echo "Cascade complete: $VERSION pinned across all manifest workspace_templates." fi