2026-06-16 02:22:30 +00:00
1 changed files with 56 additions and 38 deletions
@@ -218,7 +218,14 @@ jobs:
      #   - cache-from on a missing tag (first run) is a warning, not an error.
      #   - Concurrent publishes overwrite :buildcache last-writer-wins —
      #     same best-effort semantics as :staging-latest.
+      # NOTE: molecule-ai/platform is now ORPHANED — the concierge image was
+      # repointed to FROM platform-tenant (the live workspace-server image), and
+      # nothing else consumes molecule-ai/platform (it has not built since
+      # 2026-05-15). continue-on-error so a failure of this dead base build can
+      # never block the tenant + platform-agent builds below. Remove this step
+      # entirely in a follow-up once confirmed no consumer remains.
      - name: Build & push platform image to ECR (staging-<sha> + staging-latest)
+        continue-on-error: true
        env:
          IMAGE_NAME: ${{ env.IMAGE_NAME }}
          TAG_SHA: staging-${{ steps.tags.outputs.sha }}
@@ -248,44 +255,6 @@ jobs:
            --tag "${IMAGE_NAME}:${TAG_LATEST}" \
            --push .

-      # Build + push the CONCIERGE platform-agent image. Extends the base
-      # platform image (just built above, passed via BASE_IMAGE) with the
-      # concierge identity baked from the platform-agent template (staged at
-      # .tenant-bundle-deps/workspace-configs-templates/platform-agent by the
-      # Pre-clone step from the manifest platform-agent entry). MUST run AFTER
-      # the base platform build (FROM ${IMAGE_NAME}:${TAG_SHA}). The CP selects
-      # this image for kind=platform (core#2495); without it the concierge boots
-      # with no identity (#2919 image-bake / #2955 identity-fallback.sh).
-      - name: Build & push platform-agent image to ECR (staging-<sha> + staging-latest)
-        env:
-          IMAGE_NAME: ${{ env.IMAGE_NAME }}
-          PLATFORM_AGENT_IMAGE_NAME: ${{ env.PLATFORM_AGENT_IMAGE_NAME }}
-          TAG_SHA: staging-${{ steps.tags.outputs.sha }}
-          TAG_LATEST: staging-latest
-          GIT_SHA: ${{ steps.tags.outputs.sha }}
-          REPO: ${{ github.event.repository.name }}
-          GITHUB_RUN_ID: ${{ github.run_id }}
-        run: |
-          set -euo pipefail
-          ECR_REGISTRY="${PLATFORM_AGENT_IMAGE_NAME%%/*}"
-          aws ecr get-login-password --region us-east-2 | \
-            docker login --username AWS --password-stdin "${ECR_REGISTRY}"
-          docker buildx build \
-            --file ./workspace-server/Dockerfile.platform-agent \
-            --build-arg BASE_IMAGE="${IMAGE_NAME}:${TAG_SHA}" \
-            --provenance=false \
-            --sbom=false \
-            --build-arg GIT_SHA="${GIT_SHA}" \
-            --label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
-            --label "org.opencontainers.image.revision=${GIT_SHA}" \
-            --label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
-            --label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
-            --cache-from "type=registry,ref=${PLATFORM_AGENT_IMAGE_NAME}:buildcache" \
-            --cache-to "type=registry,ref=${PLATFORM_AGENT_IMAGE_NAME}:buildcache,mode=max,image-manifest=true,oci-mediatypes=true,ignore-error=true" \
-            --tag "${PLATFORM_AGENT_IMAGE_NAME}:${TAG_SHA}" \
-            --tag "${PLATFORM_AGENT_IMAGE_NAME}:${TAG_LATEST}" \
-            --push .
-
      # Build + push tenant image (Go platform + Next.js canvas in one image).
      # Push the same build to the staging account too so fresh staging/E2E
      # tenants can pull without cross-account ECR reads. The staging ECR repo
@@ -358,6 +327,55 @@ jobs:
            fi
          done

+      # Build + push the CONCIERGE platform-agent image. Extends the LIVE
+      # platform-tenant image (built just above) with the concierge identity
+      # baked from the platform-agent template (staged at .tenant-bundle-deps/
+      # workspace-configs-templates/platform-agent by the Pre-clone step from
+      # the manifest platform-agent entry). MUST run AFTER the tenant build —
+      # FROM ${TENANT_IMAGE_NAME}:${TAG_SHA}, the live workspace-server image
+      # concierges already run (it has /entrypoint.sh = entrypoint-tenant.sh,
+      # which the platform-agent wrapper chains to). The dead molecule-ai/platform
+      # base (unbuilt since 2026-05-15) is deliberately NOT used. CP selects this
+      # image for kind=platform (core#2495); without it the concierge boots with
+      # no identity (#2919 image-bake / #2955 identity-fallback.sh).
+      - name: Build & push platform-agent image to ECR (staging-<sha> + staging-latest)
+        env:
+          TENANT_IMAGE_NAME: ${{ env.TENANT_IMAGE_NAME }}
+          PLATFORM_AGENT_IMAGE_NAME: ${{ env.PLATFORM_AGENT_IMAGE_NAME }}
+          TAG_SHA: staging-${{ steps.tags.outputs.sha }}
+          TAG_LATEST: staging-latest
+          GIT_SHA: ${{ steps.tags.outputs.sha }}
+          REPO: ${{ github.event.repository.name }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          AWS_DEFAULT_REGION: us-east-2
+        run: |
+          set -euo pipefail
+          ECR_REGISTRY="${PLATFORM_AGENT_IMAGE_NAME%%/*}"
+          aws ecr get-login-password --region us-east-2 | \
+            docker login --username AWS --password-stdin "${ECR_REGISTRY}"
+          builder="pa-builder-${GITHUB_RUN_ID}"
+          docker buildx create --name "${builder}" --use >/dev/null 2>&1 || true
+          docker buildx build \
+            --builder "${builder}" \
+            --file ./workspace-server/Dockerfile.platform-agent \
+            --build-arg BASE_IMAGE="${TENANT_IMAGE_NAME}:${TAG_SHA}" \
+            --provenance=false \
+            --sbom=false \
+            --build-arg GIT_SHA="${GIT_SHA}" \
+            --label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
+            --label "org.opencontainers.image.revision=${GIT_SHA}" \
+            --label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
+            --label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
+            --cache-from "type=registry,ref=${PLATFORM_AGENT_IMAGE_NAME}:buildcache" \
+            --cache-to "type=registry,ref=${PLATFORM_AGENT_IMAGE_NAME}:buildcache,mode=max,image-manifest=true,oci-mediatypes=true,ignore-error=true" \
+            --tag "${PLATFORM_AGENT_IMAGE_NAME}:${TAG_SHA}" \
+            --tag "${PLATFORM_AGENT_IMAGE_NAME}:${TAG_LATEST}" \
+            --push . || { docker buildx rm "${builder}" >/dev/null 2>&1 || true; echo "::error::platform-agent image build failed"; exit 1; }
+          docker buildx rm "${builder}" >/dev/null 2>&1 || true
+          echo "::notice::platform-agent image pushed: ${PLATFORM_AGENT_IMAGE_NAME}:${TAG_SHA}"
+
  # Staging auto-deploy: every workspace-server image publish on main should
  # roll out to the staging fleet so code fixes reach staging without a
  # manual workflow_dispatch. Gitea 1.22.6 does not support workflow_run, so