molecule-core/.github/workflows/publish-platform-image.yml
Workflow config file is invalid. Please check your config file: yaml: line 70: could not find expected ':'
Hongming Wang 0064e61881 feat(ci): add Fly deploy step to publish-platform-image workflow
After pushing the tenant image to registry.fly.io, the workflow now
lists all running/stopped molecule-tenant machines and updates each
to the newly pushed image tag. Gracefully skips if no machines exist
(control plane provisions on demand).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 07:29:42 -07:00

189 lines
8.1 KiB
YAML

name: publish-platform-image
# Builds and pushes the tenant-platform Docker image to GHCR whenever a
# commit lands on main. The private molecule-controlplane provisioner sets
# TENANT_IMAGE=ghcr.io/molecule-ai/platform:<tag> to spawn tenant Fly
# Machines from this image. See molecule-controlplane README for the pairing.
on:
push:
branches: [main]
paths:
# Only rebuild when something platform-relevant changes — saves GHA
# minutes on docs-only / canvas-only / MCP-only PRs.
- 'platform/**'
- '.github/workflows/publish-platform-image.yml'
# Templates now live in standalone repos — template changes no longer
# trigger a platform rebuild. Use workflow_dispatch to manually rebuild
# if a template repo update needs to be baked into the image.
# Manual trigger for re-publishing a tag after a non-platform merge.
workflow_dispatch:
permissions:
contents: read
packages: write # required to push to ghcr.io/${{ github.repository_owner }}/*
env:
# GHCR accepts mixed-case, but most tooling lowercases — keep us consistent.
IMAGE_NAME: ghcr.io/molecule-ai/platform
# Fly registry mirror — tenant machines provisioned by the private
# `molecule-controlplane` pull from here (private GHCR image can't be
# pulled by Fly machines without auth plumbing we don't want to add).
# Fly auto-authenticates same-org machines against registry.fly.io, so
# mirroring keeps GHCR private while tenants still boot.
FLY_IMAGE_NAME: registry.fly.io/molecule-tenant
jobs:
build-and-push:
runs-on: [self-hosted, macos, arm64]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Isolate Docker config (skip keychain)
# The Mac mini self-hosted runner runs as a non-interactive
# launchd service; docker/login-action's default credential store
# is the macOS Keychain, which raises
# error storing credentials - err: exit status 1, out:
# `User interaction is not allowed. (-25308)`
# without an unlocked desktop session.
#
# Point DOCKER_CONFIG at a per-run temp dir. IMPORTANT: writing
# `{"auths": {}}` alone is NOT enough — Docker on macOS picks up
# `osxkeychain` as the default credential store even when
# config.json doesn't declare one, inheriting from Docker
# Desktop's bundled credsStore binding. We must explicitly set
# `credsStore` to an empty string AND clear `credHelpers` so the
# login step writes credentials into the auths map of this
# disposable config.json rather than reaching for the keychain.
# (First tried in #273 without the empty-credsStore line; #319
# + #322 merges showed it still regressed.)
#
# Plus diagnostics: print the docker path so a future EACCES on
# /usr/local/bin/docker surfaces in the log instead of via a
# cryptic docker-login failure mid-step.
shell: bash
run: |
set -euo pipefail
mkdir -p "${RUNNER_TEMP}/docker-config"
cat > "${RUNNER_TEMP}/docker-config/config.json" <<'JSON'
{
"auths": {},
"credsStore": "",
"credHelpers": {}
}
JSON
echo "DOCKER_CONFIG=${RUNNER_TEMP}/docker-config" >> "${GITHUB_ENV}"
echo "=== Runner docker diagnostics ==="
echo "PATH=$PATH"
command -v docker || echo "(docker not in PATH — the runner is missing the Docker CLI or it's not symlinked to a visible location)"
docker --version 2>&1 || true
ls -la /usr/local/bin/docker /opt/homebrew/bin/docker 2>&1 || true
echo "=== config.json after setup ==="
cat "${RUNNER_TEMP}/docker-config/config.json"
- name: Set up QEMU
# Required on the Apple-silicon self-hosted runner — Fly tenant machines
# pull linux/amd64, and buildx needs binfmt handlers in Docker Desktop's
# VM to emulate amd64 during the build.
uses: docker/setup-qemu-action@v3
with:
platforms: linux/amd64
- name: Set up Docker Buildx
# Buildx enables cache-from/cache-to via GHA cache and multi-arch
# builds without local docker daemon wrangling.
uses: docker/setup-buildx-action@v3
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Log in to Fly registry
# username MUST be literal "x". Fly's registry returns 401 for any
# other value (verified locally 2026-04-15 — "molecule-ai" fails,
# "x" succeeds with the same token). The password is the FLY_API_TOKEN.
# Rotation: see docs/runbooks/saas-secrets.md — FLY_API_TOKEN lives in
# two places (GitHub Actions secret here + `fly secrets` on molecule-cp)
# and MUST be updated in both on rotation.
uses: docker/login-action@v3
with:
registry: registry.fly.io
username: x
password: ${{ secrets.FLY_API_TOKEN }}
- name: Compute tags
id: tags
# Emit two tags per build: `latest` (floating, always the main tip)
# and the short commit SHA (immutable, pin-friendly). Control plane
# can deploy `latest` today and pin to :sha in Phase H hardening.
run: |
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
- name: Build & push to GHCR
# Split from the Fly mirror so a registry.fly.io outage doesn't block
# GHCR (or vice versa) — each registry's failure mode is isolated.
# GHA cache is shared because both steps re-use the same Dockerfile
# context + build args.
# Explicit linux/amd64 target: the runner is Apple-silicon (arm64),
# but Fly tenant machines are amd64. QEMU handles the emulation.
uses: docker/build-push-action@v5
with:
context: .
file: ./platform/Dockerfile
platforms: linux/amd64
push: true
tags: |
${{ env.IMAGE_NAME }}:latest
${{ env.IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.description=Molecule AI tenant platform (one instance per org)
- name: Build & push to Fly registry
# Continues even if GHCR push failed — `if: always()` ensures the
# private control plane's tenant-image mirror lands regardless of
# any GHCR-side flakiness.
if: always()
uses: docker/build-push-action@v5
with:
context: .
file: ./platform/Dockerfile
platforms: linux/amd64
push: true
tags: |
${{ env.FLY_IMAGE_NAME }}:latest
${{ env.FLY_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
cache-from: type=gha
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.description=Molecule AI tenant platform (one instance per org)
- name: Install flyctl
uses: superfly/flyctl-actions/setup-flyctl@master
- name: Deploy to Fly tenant machines
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
run: |
MACHINES=$(flyctl machines list -a molecule-tenant --json | jq -r '.[] | select(.state == "started" or .state == "stopped") | .id')
if [ -z "$MACHINES" ]; then
echo "No tenant machines found — skipping deploy (control plane provisions on demand)"
exit 0
fi
for id in $MACHINES; do
echo "Updating machine $id to sha-${{ steps.tags.outputs.sha }}..."
flyctl machines update "$id" \
--image "${{ env.FLY_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}" \
-a molecule-tenant \
--yes
done
echo "All tenant machines updated to sha-${{ steps.tags.outputs.sha }}"