From ab3accdf66e045815ea13f0b75782f48578d36f9 Mon Sep 17 00:00:00 2001 From: devops-engineer Date: Mon, 15 Jun 2026 17:33:09 -0700 Subject: [PATCH] fix(image): build+push the molecule-platform-agent concierge image (fixes concierge identity never deploying) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The concierge (kind=platform) is meant to run a dedicated molecule-platform-agent image that bakes its identity (config.yaml/prompts/mcp_servers/identity-fallback.sh) from the platform-agent template via Dockerfile.platform-agent (#2919/#2955). But: - manifest.json had NO platform-agent entry → clone-manifest.sh never staged the template at .tenant-bundle-deps/workspace-configs-templates/platform-agent → Dockerfile.platform-agent's COPY source was missing. - publish-workspace-server-image.yml never built Dockerfile.platform-agent → the molecule-ai/molecule-platform-agent ECR repo is EMPTY (0 images); molecule-ai/ platform is a month stale (2026-05-15). So #2955's identity bake was built by nothing and pushed nowhere — the concierge falls back to a non-identity image and boots as generic Claude Code (user-reported on test2; verified: 218B config, no prompts, no identity after restart). Changes: - manifest.json: pin the platform-agent template (ref e5c8302, config.yaml present) so clone-manifest.sh stages it into the build context. Updates the _pinning_contract note (supersedes the closed #2959). - publish-workspace-server-image.yml: add a "Build & push platform-agent image" step — builds ./workspace-server/Dockerfile.platform-agent with BASE_IMAGE=, pushes molecule-ai/molecule-platform-agent :staging-+:staging-latest. Runs after the base platform build (FROM it). Follow-ups after merge: (1) promote runtime_image_pins('platform-agent') → molecule-platform-agent:staging-latest so the CP selects it (core#2495); (2) re-provision concierges → identity-fallback fills /configs. The template-delivery-e2e gate (#2971) asserts the concierge boots WITH identity, so it confirms the fix. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../publish-workspace-server-image.yml | 45 +++++++++++++++++++ manifest.json | 5 ++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/publish-workspace-server-image.yml b/.gitea/workflows/publish-workspace-server-image.yml index 02a41f140..2d3ae3af7 100644 --- a/.gitea/workflows/publish-workspace-server-image.yml +++ b/.gitea/workflows/publish-workspace-server-image.yml @@ -78,6 +78,13 @@ env: # touching every workflow). Pattern mirrors `vars.CP_URL || 'literal'` already in # use below in this repo's staging-verify.yml. IMAGE_NAME: ${{ vars.ECR_REGISTRY || '153263036946.dkr.ecr.us-east-2.amazonaws.com' }}/molecule-ai/platform + # Concierge (kind=platform) image: the base platform image + the platform-agent + # template's identity (config.yaml/prompts/mcp_servers/identity-fallback.sh) + # baked in via Dockerfile.platform-agent. Built from .tenant-bundle-deps/ + # workspace-configs-templates/platform-agent (staged by clone-manifest.sh from + # the manifest platform-agent entry). The CP selects this image for kind=platform + # (core#2495). Without this build the concierge boots with no identity (#2919/#2955). + PLATFORM_AGENT_IMAGE_NAME: ${{ vars.ECR_REGISTRY || '153263036946.dkr.ecr.us-east-2.amazonaws.com' }}/molecule-ai/molecule-platform-agent TENANT_IMAGE_NAME: ${{ vars.ECR_REGISTRY || '153263036946.dkr.ecr.us-east-2.amazonaws.com' }}/molecule-ai/platform-tenant STAGING_TENANT_IMAGE_NAME: ${{ vars.STAGING_ECR_REGISTRY || '004947743811.dkr.ecr.us-east-2.amazonaws.com' }}/molecule-ai/platform-tenant @@ -241,6 +248,44 @@ jobs: --tag "${IMAGE_NAME}:${TAG_LATEST}" \ --push . + # Build + push the CONCIERGE platform-agent image. Extends the base + # platform image (just built above, passed via BASE_IMAGE) with the + # concierge identity baked from the platform-agent template (staged at + # .tenant-bundle-deps/workspace-configs-templates/platform-agent by the + # Pre-clone step from the manifest platform-agent entry). MUST run AFTER + # the base platform build (FROM ${IMAGE_NAME}:${TAG_SHA}). The CP selects + # this image for kind=platform (core#2495); without it the concierge boots + # with no identity (#2919 image-bake / #2955 identity-fallback.sh). + - name: Build & push platform-agent image to ECR (staging- + staging-latest) + env: + IMAGE_NAME: ${{ env.IMAGE_NAME }} + PLATFORM_AGENT_IMAGE_NAME: ${{ env.PLATFORM_AGENT_IMAGE_NAME }} + TAG_SHA: staging-${{ steps.tags.outputs.sha }} + TAG_LATEST: staging-latest + GIT_SHA: ${{ steps.tags.outputs.sha }} + REPO: ${{ github.event.repository.name }} + GITHUB_RUN_ID: ${{ github.run_id }} + run: | + set -euo pipefail + ECR_REGISTRY="${PLATFORM_AGENT_IMAGE_NAME%%/*}" + aws ecr get-login-password --region us-east-2 | \ + docker login --username AWS --password-stdin "${ECR_REGISTRY}" + docker buildx build \ + --file ./workspace-server/Dockerfile.platform-agent \ + --build-arg BASE_IMAGE="${IMAGE_NAME}:${TAG_SHA}" \ + --provenance=false \ + --sbom=false \ + --build-arg GIT_SHA="${GIT_SHA}" \ + --label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \ + --label "org.opencontainers.image.revision=${GIT_SHA}" \ + --label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + --label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \ + --cache-from "type=registry,ref=${PLATFORM_AGENT_IMAGE_NAME}:buildcache" \ + --cache-to "type=registry,ref=${PLATFORM_AGENT_IMAGE_NAME}:buildcache,mode=max,image-manifest=true,oci-mediatypes=true,ignore-error=true" \ + --tag "${PLATFORM_AGENT_IMAGE_NAME}:${TAG_SHA}" \ + --tag "${PLATFORM_AGENT_IMAGE_NAME}:${TAG_LATEST}" \ + --push . + # Build + push tenant image (Go platform + Next.js canvas in one image). # Push the same build to the staging account too so fresh staging/E2E # tenants can pull without cross-account ECR reads. The staging ECR repo diff --git a/manifest.json b/manifest.json index b115dc13b..64a7dc417 100644 --- a/manifest.json +++ b/manifest.json @@ -1,6 +1,6 @@ { "_comment": "Platform template registry. Repos may be public or platform-private; CI and runtime template-cache refresh clone them with the SSOT-managed template read token, then strip .git metadata before use. Customer/private tenant templates remain outside this platform manifest.", - "_pinning_contract": "RFC #2927 — every entry's `ref` is pinned to an immutable commit SHA (not a branch like `main` and not a mutable tag). The previous `ref:main` exposure made provisioning non-reproducible — a merge to ANY template's `main` instantly reached every subsequent provision. Pinning restores: (a) reproducible identity (same SHA → same config.yaml + prompts + skills on every boot); (b) auditable provenance (the SHA is the artifact's content-address); (c) explicit upgrades (bumping a pin is a reviewed PR, not silent). CI test TestManifest_RefPinningCompleteness (workspace-server/internal/handlers/manifest_pinning_test.go) asserts the pinning contract: (1) every ref is a 40-char commit SHA, (2) every pinned SHA is reachable in the named repo, (3) workspace_template entries include config.yaml in the pinned ref's tree. To bump a pin: PR with the new SHA, tests run, driver reviews the diff. PLATFORM-AGENT IS NOT PINNED HERE: per #2919, the platform-agent template's `config.yaml` is being added in template PR #1; once merged AND config.yaml exists at the pinned SHA, add the entry here in a follow-up PR.", + "_pinning_contract": "RFC #2927 — every entry's `ref` is pinned to an immutable commit SHA (not a branch like `main` and not a mutable tag). The previous `ref:main` exposure made provisioning non-reproducible — a merge to ANY template's `main` instantly reached every subsequent provision. Pinning restores: (a) reproducible identity (same SHA → same config.yaml + prompts + skills on every boot); (b) auditable provenance (the SHA is the artifact's content-address); (c) explicit upgrades (bumping a pin is a reviewed PR, not silent). CI test TestManifest_RefPinningCompleteness (workspace-server/internal/handlers/manifest_pinning_test.go) asserts the pinning contract: (1) every ref is a 40-char commit SHA, (2) every pinned SHA is reachable in the named repo, (3) workspace_template entries include config.yaml in the pinned ref's tree. To bump a pin: PR with the new SHA, tests run, driver reviews the diff. PLATFORM-AGENT is now pinned (its config.yaml exists at the pinned SHA): clone-manifest.sh stages it at .tenant-bundle-deps/workspace-configs-templates/platform-agent, which Dockerfile.platform-agent COPYs to bake the concierge identity into the molecule-platform-agent image (publish-workspace-server-image.yml). The concierge (kind=platform) provisions on that image (core#2495).", "version": 1, "plugins": [ {"name": "browser-automation", "repo": "molecule-ai/molecule-ai-plugin-browser-automation", "ref": "7a3cea71e684fe87fc2847e2b105301b552a9098"}, @@ -31,7 +31,8 @@ {"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "143e69b56f2530433141f5a87373e8a76578c52e"}, {"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "070447a0afdf66ae6f2bb166ac3e2b2884456951"}, {"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "3f9fd7ef6ea4dd912bb65446607f3c3c991ea76e"}, - {"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "51bee3c0de03c7d38ddc153e7b9dc70e19ededd6", "runtime": "claude-code"} + {"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "51bee3c0de03c7d38ddc153e7b9dc70e19ededd6", "runtime": "claude-code"}, + {"name": "platform-agent", "repo": "molecule-ai/molecule-ai-workspace-template-platform-agent", "ref": "e5c830293b9c7e68ba4abed3e4c1614e6801029c", "runtime": "claude-code"} ], "org_templates": [ {"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "990d7b23f65dadd7afe05958a77eeb74082b4feb"}, -- 2.52.0