Gitea is case-sensitive on owner slugs; canonical is lowercase `molecule-ai/...`. Mixed-case `Molecule-AI/...` refs fail-at-0s when the runner tries to resolve the cross-repo workflow / checkout. Same fix as molecule-controlplane#12. Mechanical case-correction; no behavior change beyond making CI resolve again. Refs: internal#46 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
398 lines
20 KiB
YAML
398 lines
20 KiB
YAML
name: Publish Workspace Template Image
|
|
|
|
# Reusable workflow for every molecule-ai/molecule-ai-workspace-template-*
|
|
# repo. Builds the template's Dockerfile on main and pushes to GHCR as
|
|
# `ghcr.io/molecule-ai/workspace-template-<runtime>:latest` (plus a
|
|
# per-commit `sha-<7>` tag). Auto-derives <runtime> from the caller repo
|
|
# name so the per-repo wrapper stays one line.
|
|
#
|
|
# Call from each template repo like:
|
|
#
|
|
# name: publish-image
|
|
# on:
|
|
# push: { branches: [main] }
|
|
# workflow_dispatch:
|
|
# permissions:
|
|
# contents: read
|
|
# packages: write
|
|
# jobs:
|
|
# publish:
|
|
# uses: molecule-ai/molecule-ci/.github/workflows/publish-template-image.yml@v1
|
|
# secrets: inherit
|
|
#
|
|
# Runner choice (2026-04-22): ubuntu-latest
|
|
# - All caller repos are PUBLIC → GHA-hosted minutes are free.
|
|
# - Targets are linux/amd64 natively; Ubuntu runners skip QEMU that
|
|
# our arm64 Mac mini had to emulate through, so builds go ~2-3x
|
|
# faster on top of having no queue wait when the Mac mini is busy.
|
|
# - No macOS Keychain gymnastics — standard docker/login-action works.
|
|
# The self-hosted Mac mini remains in service for private repo
|
|
# workflows (see memory: feedback_selfhosted_runner).
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
runtime_name:
|
|
description: >-
|
|
Optional explicit runtime name. When unset, derived from
|
|
the caller repo name (strips `molecule-ai-workspace-template-`
|
|
prefix). Override only if the image should diverge.
|
|
required: false
|
|
type: string
|
|
default: ""
|
|
runtime_version:
|
|
description: >-
|
|
molecule-ai-workspace-runtime version to install. Forwarded
|
|
as RUNTIME_VERSION docker build-arg. When unset, the
|
|
Dockerfile's requirements.txt pin is used. Cascade-triggered
|
|
builds forward client_payload.runtime_version here so each
|
|
rebuild has a unique build-arg → unique cache key →
|
|
guaranteed fresh `pip install`. Solves the
|
|
"cascade rebuilt but image still has old runtime" cache
|
|
trap that bit us repeatedly on 2026-04-27.
|
|
required: false
|
|
type: string
|
|
default: ""
|
|
outputs:
|
|
image:
|
|
description: "Full image reference that was pushed (with :latest tag)"
|
|
value: ${{ jobs.publish.outputs.image }}
|
|
sha:
|
|
description: "Short SHA tag pushed alongside :latest"
|
|
value: ${{ jobs.publish.outputs.sha }}
|
|
|
|
jobs:
|
|
publish:
|
|
name: Build & push template image
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
image: ${{ steps.tags.outputs.image }}
|
|
sha: ${{ steps.tags.outputs.sha }}
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Derive runtime name + image reference
|
|
id: tags
|
|
shell: bash
|
|
env:
|
|
EXPLICIT_RUNTIME: ${{ inputs.runtime_name }}
|
|
REPO_NAME: ${{ github.event.repository.name }}
|
|
run: |
|
|
set -eu
|
|
if [ -n "${EXPLICIT_RUNTIME}" ]; then
|
|
RUNTIME="${EXPLICIT_RUNTIME}"
|
|
else
|
|
# Repo naming convention:
|
|
# molecule-ai-workspace-template-<runtime>
|
|
# Strip the prefix to get <runtime>.
|
|
case "${REPO_NAME}" in
|
|
molecule-ai-workspace-template-*)
|
|
RUNTIME="${REPO_NAME#molecule-ai-workspace-template-}"
|
|
;;
|
|
*)
|
|
echo "::error::Repo name '${REPO_NAME}' does not match 'molecule-ai-workspace-template-<runtime>' — pass runtime_name explicitly." >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
fi
|
|
IMAGE="ghcr.io/molecule-ai/workspace-template-${RUNTIME}"
|
|
SHA="${GITHUB_SHA::7}"
|
|
echo "runtime=${RUNTIME}" >> "$GITHUB_OUTPUT"
|
|
echo "image=${IMAGE}" >> "$GITHUB_OUTPUT"
|
|
echo "sha=${SHA}" >> "$GITHUB_OUTPUT"
|
|
echo "::notice::Publishing runtime='${RUNTIME}' → ${IMAGE}:latest + :sha-${SHA}"
|
|
|
|
- name: Lint — no bare imports of runtime modules
|
|
# Templates that bare-import a workspace/ runtime module
|
|
# (e.g. `from plugins import load_plugins` instead of
|
|
# `from molecule_runtime.plugins import load_plugins`) work in
|
|
# the monorepo's bundled-runtime layout but explode at startup
|
|
# with `ModuleNotFoundError` once the runtime is installed as a
|
|
# package. This bit claude-code (5 imports), langgraph,
|
|
# deepagents, and gemini-cli on 2026-04-27 — each one a
|
|
# separate workspace-stuck-in-provisioning incident.
|
|
#
|
|
# Source of truth: molecule_runtime/_runtime_modules.json
|
|
# inside the published wheel (emitted by
|
|
# scripts/build_runtime_package.py). Pulling the manifest
|
|
# from PyPI's latest wheel ensures the lint never drifts from
|
|
# the rewriter's actual closed list. If the manifest can't be
|
|
# fetched (older wheel, PyPI down, etc.), falls back to the
|
|
# inline list — known to be correct as of 2026-04-27 — so
|
|
# the lint never silently passes on a fetch failure.
|
|
#
|
|
# Fail-fast: this runs before docker login + buildx setup so
|
|
# a bad PR returns red in seconds, not minutes.
|
|
shell: bash
|
|
run: |
|
|
set -eu
|
|
|
|
# Fallback list — used only when the manifest fetch fails.
|
|
# Mirrors scripts/build_runtime_package.py:TOP_LEVEL_MODULES
|
|
# at the time this comment was written.
|
|
FALLBACK_MODULES='plugins|adapter_base|config|main|preflight|prompt|coordinator|consolidation|events|heartbeat|transcript_auth|runtime_wedge|watcher|skill_loader|policies|adapters|builtin_tools|executor_helpers|a2a_executor|a2a_client|a2a_tools|a2a_cli|a2a_mcp_server|agent|agents_md|initial_prompt|molecule_ai_status|platform_auth|shared_runtime'
|
|
|
|
RUNTIME_MODULES=""
|
|
mkdir -p /tmp/runtime-wheel
|
|
if pip download --quiet molecule-ai-workspace-runtime --no-deps -d /tmp/runtime-wheel 2>/dev/null; then
|
|
WHEEL=$(ls /tmp/runtime-wheel/*.whl 2>/dev/null | head -1)
|
|
if [ -n "$WHEEL" ]; then
|
|
# Pull both top_level + subpackage names; both can be bare-imported.
|
|
RUNTIME_MODULES=$(unzip -p "$WHEEL" molecule_runtime/_runtime_modules.json 2>/dev/null \
|
|
| python3 -c "import sys,json; m=json.load(sys.stdin); print('|'.join(sorted(set(m['top_level_modules']) | set(m['subpackages']))))" 2>/dev/null || echo "")
|
|
fi
|
|
fi
|
|
|
|
if [ -n "$RUNTIME_MODULES" ]; then
|
|
echo "::notice::lint module list pulled from molecule-ai-workspace-runtime wheel manifest"
|
|
else
|
|
RUNTIME_MODULES="$FALLBACK_MODULES"
|
|
echo "::warning::could not read _runtime_modules.json from PyPI wheel — using inline fallback list"
|
|
fi
|
|
|
|
# Match `from <module> import` at start of line OR after any whitespace
|
|
# (function-scope imports inside if/try blocks count too).
|
|
if HITS=$(grep -nE "^\s*from (${RUNTIME_MODULES}) import" *.py 2>/dev/null); then
|
|
echo "::error::Bare imports of runtime modules found — must use \`from molecule_runtime.<module> import\`"
|
|
echo "$HITS" | sed 's/^/ /'
|
|
echo "::error::Fix: prefix each match with 'molecule_runtime.' (e.g. 'from plugins' → 'from molecule_runtime.plugins')."
|
|
exit 1
|
|
fi
|
|
echo "::notice::✓ no bare imports of runtime modules in template *.py files"
|
|
|
|
- name: Log in to GHCR
|
|
uses: docker/login-action@v3
|
|
with:
|
|
registry: ghcr.io
|
|
username: ${{ github.actor }}
|
|
password: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
|
|
- name: Build template image (load for smoke test, do not push yet)
|
|
# Build into the runner's local docker first so the smoke test can
|
|
# actually boot the image. We push :latest + :sha-* only AFTER the
|
|
# smoke test passes — this is the gate that prevents broken images
|
|
# from poisoning :latest. Background: 2026-04-27 outage where the
|
|
# template's adapter.py imported a symbol (RuntimeCapabilities)
|
|
# that the published runtime didn't yet export. The old smoke
|
|
# test only inspected the entrypoint string, so the broken image
|
|
# shipped to GHCR and every workspace provision hung.
|
|
uses: docker/build-push-action@v6
|
|
with:
|
|
context: .
|
|
file: ./Dockerfile
|
|
platforms: linux/amd64
|
|
load: true
|
|
push: false
|
|
tags: ${{ steps.tags.outputs.image }}:sha-${{ steps.tags.outputs.sha }}
|
|
cache-from: type=gha
|
|
cache-to: type=gha,mode=max
|
|
# RUNTIME_VERSION is empty by default. When the cascade fires
|
|
# (or workflow_dispatch is invoked with a version), it's the
|
|
# exact runtime version about to be installed. Forwarded as a
|
|
# build-arg so Dockerfiles that declare `ARG RUNTIME_VERSION`
|
|
# get cache-key invalidation per-version. Templates that
|
|
# don't declare the ARG silently ignore it (no breakage).
|
|
build-args: |
|
|
RUNTIME_VERSION=${{ inputs.runtime_version }}
|
|
labels: |
|
|
org.opencontainers.image.source=https://github.com/${{ github.repository }}
|
|
org.opencontainers.image.revision=${{ github.sha }}
|
|
org.opencontainers.image.description=Molecule AI workspace template — ${{ steps.tags.outputs.runtime }} runtime
|
|
|
|
- name: Smoke test — boot image and import every /app/*.py
|
|
# The real boot test. Imports every Python module at /app/ inside
|
|
# the image, which exercises:
|
|
# - adapter.py exists, no syntax errors, all module-level
|
|
# imports resolve against the pip-installed runtime version
|
|
# (catches version skew — symbol added to runtime but PyPI
|
|
# not yet republished, etc.)
|
|
# - executor.py / cli_executor.py / claude_sdk_executor.py /
|
|
# etc. — sibling modules adapter.py imports lazily inside
|
|
# create_executor(). Plain `import adapter` doesn't catch
|
|
# bugs there because they're behind `def create_executor`.
|
|
# This bit hermes (a2a-sdk migration) and langgraph
|
|
# (LangGraphA2AExecutor bare import) on 2026-04-27.
|
|
# - cross-cutting: any bare `from <runtime_module>` (the lint
|
|
# above catches these statically; this catches them at
|
|
# resolution time too, plus any imports of third-party
|
|
# packages that the lint can't reason about).
|
|
# We bypass the gosu/agent entrypoint with --entrypoint sh
|
|
# because import smoke doesn't need workspace permissions.
|
|
shell: bash
|
|
env:
|
|
IMAGE: ${{ steps.tags.outputs.image }}:sha-${{ steps.tags.outputs.sha }}
|
|
run: |
|
|
set -eu
|
|
docker run --rm --entrypoint sh "${IMAGE}" -c '
|
|
set -e
|
|
cd /app
|
|
for f in *.py; do
|
|
[ "$f" = "__init__.py" ] && continue
|
|
mod="${f%.py}"
|
|
python3 -c "import $mod" || { echo "::error::failed to import $mod"; exit 1; }
|
|
echo " ✓ $mod"
|
|
done
|
|
'
|
|
echo "::notice::✓ ${IMAGE} all /app/*.py modules import cleanly against installed runtime"
|
|
|
|
- name: Boot smoke — execute() against stub deps (#2275, task #131)
|
|
# The static import smoke above only IMPORTs /app/*.py — lazy
|
|
# imports buried inside `async def execute(...)` bodies (e.g.
|
|
# `from a2a.types import FilePart`) NEVER evaluate at static-
|
|
# import time. The 2026-04-2x v0→v1 a2a-sdk migration shipped 5
|
|
# such regressions in templates that all looked fine at module-
|
|
# load smoke (claude-code, langgraph, deepagents, gemini-cli,
|
|
# hermes — every one a separate provisioning incident).
|
|
#
|
|
# This step boots the image with MOLECULE_SMOKE_MODE=1, which
|
|
# routes molecule-runtime through smoke_mode.run_executor_smoke()
|
|
# — invokes executor.execute(stub_ctx, stub_queue) once with a
|
|
# short timeout. Healthy import tree → execution proceeds far
|
|
# enough to hit a network boundary and times out (exit 0).
|
|
# Broken lazy import → ImportError/ModuleNotFoundError from
|
|
# inside the executor body (exit 1).
|
|
#
|
|
# Universal turn-smoke (task #131): run_executor_smoke also
|
|
# consults runtime_wedge.is_wedged() at the end of every result
|
|
# path and upgrades a provisional PASS to FAIL when an adapter
|
|
# marked the runtime wedged. Catches PR-25-class regressions
|
|
# (claude-agent-sdk init wedge from a malformed CLI argv) where
|
|
# the SDK takes 60s to time out on `initialize()` — the outer
|
|
# wait_for must outlast that handshake so the adapter's wedge
|
|
# catch arm runs before the smoke gives up. That's why the
|
|
# smoke timeout is 90s (NOT the original 10s) and the outer
|
|
# `timeout` wrapper is 120s (NOT 60s). Lowering either back
|
|
# makes this gate blind to init-wedge bugs again — confirm with
|
|
# an injected wedge in test_smoke_mode.py before changing.
|
|
#
|
|
# Requires runtime >= 0.1.60 (the version that introduced
|
|
# smoke_mode). Older runtimes silently no-op and would hang on
|
|
# uvicorn, so we detect the module first and skip if absent —
|
|
# this lets templates pinned to older runtimes continue to
|
|
# publish without this gate flipping red, while every fresh
|
|
# cascade-triggered build (which forwards the just-published
|
|
# version as RUNTIME_VERSION) gets the gate automatically.
|
|
#
|
|
# Wrapped in `timeout` as a belt-and-suspenders safety net in
|
|
# case smoke_mode itself wedges — runner shouldn't hang
|
|
# indefinitely on a single template.
|
|
shell: bash
|
|
env:
|
|
IMAGE: ${{ steps.tags.outputs.image }}:sha-${{ steps.tags.outputs.sha }}
|
|
run: |
|
|
set -eu
|
|
|
|
HAS_SMOKE_MODE=$(docker run --rm --entrypoint sh "${IMAGE}" -c \
|
|
'python3 -c "import molecule_runtime.smoke_mode" >/dev/null 2>&1 && echo yes || echo no')
|
|
if [ "${HAS_SMOKE_MODE}" = "no" ]; then
|
|
echo "::warning::installed runtime predates molecule-core#2275 (no molecule_runtime.smoke_mode); skipping boot smoke. Bump requirements.txt to molecule-ai-workspace-runtime>=0.1.60 to enable."
|
|
exit 0
|
|
fi
|
|
|
|
if [ ! -f config.yaml ]; then
|
|
echo "::error::config.yaml not found at repo root — boot smoke needs it to populate /configs. Templates without a config.yaml at root cannot be boot-smoked; either add one or skip this gate by setting an old runtime pin."
|
|
exit 1
|
|
fi
|
|
|
|
# Mount the repo's own config.yaml at /configs so the runtime
|
|
# can reach create_executor() — that's where the lazy imports
|
|
# we want to test actually live. The image's entrypoint drops
|
|
# priv from root to agent (uid 1000) before exec'ing
|
|
# molecule-runtime, so /configs needs to be readable AND
|
|
# traversable from uid 1000.
|
|
#
|
|
# Use `a+rX` (capital X — only adds x where it's already
|
|
# executable, i.e. directories): mktemp -d creates the dir
|
|
# with mode 700, so a bare `go+r` would leave the dir
|
|
# un-traversable for agent and config.py would
|
|
# PermissionError on `Path('/configs/config.yaml').exists()`.
|
|
# Mount RW (not :ro) so the entrypoint's `chown -R agent
|
|
# /configs` succeeds — its silent chown failure on a :ro
|
|
# mount was the original symptom.
|
|
SMOKE_CONFIG_DIR=$(mktemp -d)
|
|
cp config.yaml "${SMOKE_CONFIG_DIR}/"
|
|
chmod -R a+rX "${SMOKE_CONFIG_DIR}"
|
|
|
|
# Stub credentials — adapters validate shape at create_executor
|
|
# time but the smoke times out before any real call goes out.
|
|
# Set the common ones so any adapter that early-validates a
|
|
# specific key sees a non-empty value.
|
|
# PYTHONPATH=/app mirrors what the platform's provisioner
|
|
# injects at workspace startup (workspace-server/internal/
|
|
# provisioner/provisioner.go:563). Without it,
|
|
# `importlib.import_module('adapter')` in the runtime's
|
|
# preflight check fails with ModuleNotFoundError because
|
|
# molecule-runtime is a console_scripts entry point —
|
|
# sys.path[0] is /usr/local/bin, NOT /app. The existing
|
|
# static import smoke step above doesn't hit this because
|
|
# `python3 -c "import $mod"` adds cwd to sys.path; only the
|
|
# entry-point invocation needs PYTHONPATH.
|
|
set +e
|
|
# MOLECULE_SMOKE_TIMEOUT_SECS=90 is calibrated to outlast
|
|
# claude-agent-sdk's 60s initialize() handshake (see step
|
|
# comment above + workspace/smoke_mode.py top docstring) so
|
|
# adapter wedge catch arms run before run_executor_smoke
|
|
# gives up. Outer `timeout 120` is the runner-level safety
|
|
# net — slightly longer than the inner timeout so a hung
|
|
# smoke_mode itself surfaces as exit 124 and gets a clear
|
|
# error message instead of just `exit 1`.
|
|
timeout 120 docker run --rm \
|
|
-v "${SMOKE_CONFIG_DIR}:/configs" \
|
|
-e WORKSPACE_ID=fake-smoke \
|
|
-e PYTHONPATH=/app \
|
|
-e MOLECULE_SMOKE_MODE=1 \
|
|
-e MOLECULE_SMOKE_TIMEOUT_SECS=90 \
|
|
-e CLAUDE_CODE_OAUTH_TOKEN=sk-fake-smoke-token \
|
|
-e ANTHROPIC_API_KEY=sk-fake-smoke-key \
|
|
-e GEMINI_API_KEY=fake-smoke-key \
|
|
-e OPENAI_API_KEY=sk-fake-smoke-key \
|
|
"${IMAGE}"
|
|
rc=$?
|
|
set -e
|
|
# Cleanup is best-effort: the entrypoint chowns /configs to
|
|
# uid 1000 (agent) inside the container, which propagates to
|
|
# the host bind-mount, leaving the runner user unable to
|
|
# remove the files. Fall back to `sudo rm` and ignore any
|
|
# remaining failure — the runner is ephemeral, /tmp is
|
|
# cleaned automatically post-job.
|
|
rm -rf "${SMOKE_CONFIG_DIR}" 2>/dev/null \
|
|
|| sudo rm -rf "${SMOKE_CONFIG_DIR}" 2>/dev/null \
|
|
|| true
|
|
|
|
if [ "${rc}" -eq 124 ]; then
|
|
echo "::error::boot smoke wedged past 120s — smoke_mode itself failed to terminate (look for blocking calls before MOLECULE_SMOKE_TIMEOUT_SECS fires)"
|
|
exit 1
|
|
fi
|
|
if [ "${rc}" -ne 0 ]; then
|
|
echo "::error::boot smoke failed (exit ${rc}) — executor.execute() raised an import error OR an adapter marked runtime_wedge.is_wedged() (PR-25-class init wedge). Check the container log above for the offending lazy import or wedge reason."
|
|
exit "${rc}"
|
|
fi
|
|
echo "::notice::✓ ${IMAGE} executor.execute() smoke passed (imports healthy, no runtime wedge)"
|
|
|
|
- name: Push image to GHCR (post-smoke)
|
|
# Now that the smoke test passed, push both tags. build-push-action
|
|
# reuses the cached build from the load step above, so this is fast
|
|
# — it's effectively a layer push, not a rebuild. Same build-args
|
|
# passed for cache key consistency.
|
|
uses: docker/build-push-action@v6
|
|
with:
|
|
context: .
|
|
file: ./Dockerfile
|
|
platforms: linux/amd64
|
|
push: true
|
|
tags: |
|
|
${{ steps.tags.outputs.image }}:latest
|
|
${{ steps.tags.outputs.image }}:sha-${{ steps.tags.outputs.sha }}
|
|
cache-from: type=gha
|
|
cache-to: type=gha,mode=max
|
|
build-args: |
|
|
RUNTIME_VERSION=${{ inputs.runtime_version }}
|
|
labels: |
|
|
org.opencontainers.image.source=https://github.com/${{ github.repository }}
|
|
org.opencontainers.image.revision=${{ github.sha }}
|
|
org.opencontainers.image.description=Molecule AI workspace template — ${{ steps.tags.outputs.runtime }} runtime
|