feat(platform-agent): dedicated molecule-platform-agent image build (Phase 3b) #30

Closed
devops-engineer wants to merge 1 commits from feat/platform-agent-image into main
2 changed files with 285 additions and 0 deletions
@@ -0,0 +1,211 @@
name: publish-platform-agent-image
# Builds the dedicated molecule-platform-agent image (images/platform-agent/Dockerfile)
# and pushes it to ECR as `<REGISTRY>/molecule-ai/molecule-platform-agent:latest` + `:sha-<7>`.
# RFC: molecule-core docs/design/rfc-platform-agent.md §5.7.
#
# The image is FROM workspace-template-claude-code and bakes molecule-mcp-server
# at /opt/molecule-mcp-server/dist/index.js (the path the platform agent's
# config.yaml mcp_servers entry references). Because it is a multi-stage build
# of a SECOND repo (molecule-mcp-server), this workflow checks that repo out into
# ./images/platform-agent/mcp-server before building.
#
# Mirrors molecule-ai-workspace-template-claude-code/.gitea/workflows/publish-image.yml
# (the proven ECR pattern) including the hard-won runner placement:
# - runs-on: [publish, release] pins to op-host molecule-runner-publish-{1,2};
# `ubuntu-latest`/`publish`-only would non-deterministically land on a
# Windows self-hosted runner where `docker login --password-stdin` fails.
# - GITHUB_SERVER_URL pinned at workflow level (act_runner regen safety).
#
# REQUIRED repo secrets (provision on molecule-ai/molecule-ci before this is green):
# - AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY (ECR push, acct 153263036946)
# - MCP_CHECKOUT_TOKEN (read token for cross-repo
# checkout of molecule-mcp-server)
# - CP_ADMIN_API_TOKEN / CP_ADMIN_API_TOKEN_STAGING (promote-pin; requires the
# CP to accept template_name
# "platform-agent" in
# runtime_image_pins — Phase 3c)
on:
push:
branches: [main]
workflow_dispatch:
env:
GITHUB_SERVER_URL: https://git.moleculesai.app
ECR_REGISTRY: 153263036946.dkr.ecr.us-east-2.amazonaws.com
IMAGE_NAME: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/molecule-platform-agent
AWS_DEFAULT_REGION: us-east-2
# Pin the base workspace image. :latest tracks the current claude-code
# template; bump to a :sha-<7> tag for a fully reproducible build.
BASE_IMAGE: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/workspace-template-claude-code:latest
permissions:
contents: read
jobs:
resolve-version:
name: Resolve sha
runs-on: ubuntu-latest
timeout-minutes: 2
outputs:
sha: ${{ steps.read.outputs.sha }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- id: read
shell: bash
run: echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
publish:
name: Build & push molecule-platform-agent image
runs-on: [publish, release]
timeout-minutes: 30
needs: resolve-version
outputs:
digest: ${{ steps.push.outputs.digest }}
steps:
- name: Checkout molecule-ci
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Checkout molecule-mcp-server into build context
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
repository: molecule-ai/molecule-mcp-server
ref: main
token: ${{ secrets.MCP_CHECKOUT_TOKEN }}
path: images/platform-agent/mcp-server
- name: Log in to ECR
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: |
set -euo pipefail
aws ecr get-login-password --region us-east-2 | \
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
- name: Verify Docker daemon access
run: |
set -euo pipefail
docker info >/dev/null 2>&1 || { echo "::error::Docker daemon not accessible"; exit 1; }
echo "Docker daemon OK"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
- name: Ensure ECR repository exists
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: |
set -euo pipefail
repo_path="${IMAGE_NAME#*/}"
repo_path="${repo_path#*/}" # → molecule-ai/molecule-platform-agent
if ! aws ecr describe-repositories --repository-names "${repo_path}" --region us-east-2 >/dev/null 2>&1; then
aws ecr create-repository --repository-name "${repo_path}" \
--image-scanning-configuration scanOnPush=true --region us-east-2 >/dev/null
echo "::notice::created ECR repository ${repo_path}"
else
echo "ECR repository ${repo_path} already exists"
fi
- name: Build image (load for smoke test, do not push yet)
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: ./images/platform-agent
file: ./images/platform-agent/Dockerfile
platforms: linux/amd64
load: true
push: false
tags: ${{ env.IMAGE_NAME }}:sha-${{ needs.resolve-version.outputs.sha }}
build-args: |
BASE_IMAGE=${{ env.BASE_IMAGE }}
labels: |
org.opencontainers.image.source=https://git.moleculesai.app/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.description=Molecule AI org-level platform agent (claude-code + org-management MCP)
- name: Smoke test — Node 20 + baked MCP entry present and parses
shell: bash
env:
IMAGE: ${{ env.IMAGE_NAME }}:sha-${{ needs.resolve-version.outputs.sha }}
run: |
set -eu
docker run --rm --entrypoint sh "${IMAGE}" -c '
set -e
node --version | grep -E "^v20\." || { echo "::error::Node is not v20"; exit 1; }
test -f /opt/molecule-mcp-server/dist/index.js
node --check /opt/molecule-mcp-server/dist/index.js
test -d /opt/molecule-mcp-server/node_modules/@modelcontextprotocol
echo "platform-agent image smoke OK: node20 + baked MCP entry parses"
'
- name: Push image to ECR (post-smoke)
id: push
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: ./images/platform-agent
file: ./images/platform-agent/Dockerfile
platforms: linux/amd64
push: true
tags: |
${{ env.IMAGE_NAME }}:latest
${{ env.IMAGE_NAME }}:sha-${{ needs.resolve-version.outputs.sha }}
build-args: |
BASE_IMAGE=${{ env.BASE_IMAGE }}
labels: |
org.opencontainers.image.source=https://git.moleculesai.app/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.description=Molecule AI org-level platform agent (claude-code + org-management MCP)
promote-pin:
name: Promote runtime_image_pins (CP admin)
runs-on: ubuntu-latest
timeout-minutes: 2
needs: [resolve-version, publish]
if: ${{ success() && github.ref == 'refs/heads/main' }}
permissions:
contents: read
strategy:
fail-fast: false
matrix:
include:
- env_name: prod
cp_host: api.moleculesai.app
- env_name: staging
cp_host: staging-api.moleculesai.app
steps:
- name: POST /cp/admin/runtime-image/promote (${{ matrix.env_name }})
env:
PROD_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
STAGING_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN_STAGING }}
ENV_NAME: ${{ matrix.env_name }}
CP_HOST: ${{ matrix.cp_host }}
TEMPLATE_NAME: platform-agent
IMAGE_DIGEST: ${{ needs.publish.outputs.digest }}
GIT_SHA: ${{ needs.resolve-version.outputs.sha }}
run: |
set -euo pipefail
if [ "${ENV_NAME}" = "prod" ]; then
CP_ADMIN_API_TOKEN="${PROD_TOKEN}"; TOKEN_SECRET_NAME="CP_ADMIN_API_TOKEN"
else
CP_ADMIN_API_TOKEN="${STAGING_TOKEN}"; TOKEN_SECRET_NAME="CP_ADMIN_API_TOKEN_STAGING"
fi
if [ -z "${CP_ADMIN_API_TOKEN}" ]; then
echo "::error::${TOKEN_SECRET_NAME} not configured — cannot promote ${ENV_NAME} pin"; exit 1
fi
if [ -z "${IMAGE_DIGEST}" ]; then
echo "::error::needs.publish.outputs.digest empty — Push step did not expose digest"; exit 1
fi
body=$(printf '{"template_name":"%s","image_digest":"%s","git_sha":"%s","notes":"auto-promote via publish-platform-agent-image.yml -> %s"}' \
"${TEMPLATE_NAME}" "${IMAGE_DIGEST}" "${GIT_SHA}" "${ENV_NAME}")
resp=$(mktemp)
code=$(curl -sS -o "${resp}" -w '%{http_code}' -X POST \
-H "Authorization: Bearer ${CP_ADMIN_API_TOKEN}" \
-H "Content-Type: application/json" -d "${body}" \
"https://${CP_HOST}/cp/admin/runtime-image/promote")
echo "HTTP ${code}"; cat "${resp}"; echo
if [ "${code}" != "200" ] && [ "${code}" != "201" ]; then
echo "::error::${ENV_NAME} promote failed (HTTP ${code})"; exit 1
fi
echo "::notice::${ENV_NAME} runtime_image_pins.${TEMPLATE_NAME} bumped to ${IMAGE_DIGEST}"
+74
View File
@@ -0,0 +1,74 @@
# molecule-platform-agent image
# RFC: molecule-core docs/design/rfc-platform-agent.md §5.7
#
# A DEDICATED image, FROM the claude-code workspace template, that bakes in the
# org-management MCP server (molecule-mcp-server) so the org-level platform
# agent can drive the org alongside the always-on a2a MCP. It:
#
# - bakes the MCP at the exact path the platform agent's config.yaml
# references — `mcp_servers: [{name: platform, command: node,
# args: [/opt/molecule-mcp-server/dist/index.js]}]` (Phase 2,
# molecule-ai-workspace-template-claude-code claude_sdk_executor.py
# _apply_extra_mcp_servers);
# - pins Node 20 (the debian-slim base ships Node 18 via apt; the MCP's
# @modelcontextprotocol/sdk expects >=20);
# - keeps the org-admin MCP OUT of ordinary workspace images (security
# hygiene) — ordinary workspaces declare no extra mcp_servers, so the
# org-management surface only exists on this image.
#
# Build context expects the molecule-mcp-server source at ./mcp-server
# (the publish workflow checks it out there).
# ---- Stage 1: build the MCP server (dist + production node_modules) ----
ARG MCP_BUILDER=node:20-bookworm-slim
FROM ${MCP_BUILDER} AS mcpbuild
WORKDIR /build
# Install deps against the lockfile first for layer caching.
COPY mcp-server/package.json mcp-server/package-lock.json ./
RUN npm ci
# Build (tsc -> dist/), then drop devDependencies so only the runtime closure
# (@modelcontextprotocol/sdk, pino, zod, …) is carried into the final image.
COPY mcp-server/ ./
RUN npm run build \
&& npm prune --omit=dev \
&& test -f dist/index.js
# ---- Stage 2: the platform-agent runtime ----
# Pin to a specific workspace-template-claude-code digest/tag at build time via
# --build-arg BASE_IMAGE=...:sha-<7>; defaults to :latest for local builds.
ARG BASE_IMAGE=153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/workspace-template-claude-code:latest
FROM ${BASE_IMAGE}
# Pin Node 20. The base apt-installs `nodejs` (Node 18 on bookworm-slim); the
# org MCP needs >=20. Install Node 20 from NodeSource, replacing the apt nodejs
# so `node` resolves to 20 for both claude-code and the baked MCP.
USER root
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends ca-certificates curl gnupg; \
mkdir -p /etc/apt/keyrings; \
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
| gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg; \
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" \
> /etc/apt/sources.list.d/nodesource.list; \
apt-get update; \
apt-get install -y --no-install-recommends nodejs; \
node --version | grep -E '^v20\.'; \
rm -rf /var/lib/apt/lists/*
# Bake the org-management MCP at the config-referenced path.
COPY --from=mcpbuild /build/dist /opt/molecule-mcp-server/dist
COPY --from=mcpbuild /build/node_modules /opt/molecule-mcp-server/node_modules
COPY --from=mcpbuild /build/package.json /opt/molecule-mcp-server/package.json
# Smoke: the baked entry parses under Node 20 (syntax-only; does not start the
# stdio server). Catches an ESM/Node-version mismatch at build time.
RUN node --check /opt/molecule-mcp-server/dist/index.js \
&& test -d /opt/molecule-mcp-server/node_modules/@modelcontextprotocol
# Marker so the runtime/provisioner can assert it is on the platform-agent
# image (and not an ordinary workspace image) before declaring the platform MCP.
ENV MOLECULE_PLATFORM_AGENT_IMAGE_BAKED=1
# Return to the agent uid the base image runs as.
USER agent