From cfe759c6488c0eab266b18bbb75bfb06af8cb9f2 Mon Sep 17 00:00:00 2001 From: devops-engineer Date: Mon, 15 Jun 2026 18:31:15 -0700 Subject: [PATCH] fix(image): auto-bump the platform-agent concierge image pin on every publish (true auto-bump) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #2976 added the molecule-platform-agent build but stopped there — the pin was never promoted, so the concierge never picked up the new identity image (manual job). Per the standing auto-bump-to-prod directive AND the CP's own design (runtime_image_pin.go: "the publish-platform-agent-image workflow promotes to platformAgentPinTemplate"), it must auto-promote. Adds a "Promote platform-agent image pin" step to BOTH the staging and prod deploy jobs of publish-workspace-server-image: resolves the freshly-built molecule-platform-agent:staging- digest and POSTs /cp/admin/runtime-image/ promote {template_name: platform-agent, image_digest, git_sha}. The promote endpoint ALSO triggers a WorkspaceRedeployer for kind=platform (pin_runtime_image.go), so concierges AUTO-ROLL onto the identity-baked image. Net: on every main publish the concierge image now builds (#2976) → pin auto-promotes → concierges auto-roll — no manual dispatch or hand pin-bump. Confirmed end-to-end by template-delivery-e2e (#2971). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../publish-workspace-server-image.yml | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/.gitea/workflows/publish-workspace-server-image.yml b/.gitea/workflows/publish-workspace-server-image.yml index 2d3ae3af7..c5df4230e 100644 --- a/.gitea/workflows/publish-workspace-server-image.yml +++ b/.gitea/workflows/publish-workspace-server-image.yml @@ -449,6 +449,42 @@ jobs: fi cp "$HTTP_RESPONSE" "$RUNNER_TEMP/redeploy-response.json" + # Auto-bump the CONCIERGE platform-agent image pin (the redeploy-fleet above + # only rolls the tenant image; kind=platform concierges resolve their image + # from runtime_image_pins('platform-agent') — core#2495). Promoting the pin + # ALSO triggers a WorkspaceRedeployer for kind=platform (pin_runtime_image.go), + # so concierges auto-roll onto the new identity-baked image. Without this the + # molecule-platform-agent image builds but never reaches concierges (#2919/#2955). + - name: Promote platform-agent image pin (staging) + auto-roll concierges + env: + PLATFORM_AGENT_IMAGE_NAME: ${{ env.PLATFORM_AGENT_IMAGE_NAME }} + TAG_SHA: staging-${{ github.sha }} + GIT_SHA: ${{ github.sha }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: us-east-2 + run: | + set -euo pipefail + if [ -z "${CP_STAGING_ADMIN_API_TOKEN:-}" ]; then + echo "::error::cannot promote platform-agent pin — CP_STAGING_ADMIN_API_TOKEN missing"; exit 1 + fi + DIGEST=$(aws ecr describe-images --region us-east-2 \ + --repository-name molecule-ai/molecule-platform-agent \ + --image-ids imageTag="${TAG_SHA}" \ + --query 'imageDetails[0].imageDigest' --output text 2>/dev/null) + if [ -z "$DIGEST" ] || [ "$DIGEST" = "None" ]; then + echo "::error::could not resolve molecule-platform-agent:${TAG_SHA} digest (build did not push?)"; exit 1 + fi + IMAGE_DIGEST="${PLATFORM_AGENT_IMAGE_NAME}@${DIGEST}" + BODY=$(jq -nc --arg t "platform-agent" --arg img "$IMAGE_DIGEST" --arg sha "$GIT_SHA" \ + '{template_name:$t, region:"global", image_digest:$img, git_sha:$sha, notes:"auto-bump from publish-workspace-server-image"}') + echo "POST $CP_URL/cp/admin/runtime-image/promote (platform-agent → ${IMAGE_DIGEST})" + CODE=$(curl -sS -o /tmp/promote.json -w '%{http_code}' -m 300 \ + -H "Authorization: Bearer $CP_STAGING_ADMIN_API_TOKEN" -H "Content-Type: application/json" \ + -X POST "$CP_URL/cp/admin/runtime-image/promote" -d "$BODY" || echo 000) + echo "HTTP $CODE: $(cat /tmp/promote.json 2>/dev/null | head -c 300)" + [ "$CODE" = "200" ] || [ "$CODE" = "201" ] || { echo "::error::platform-agent pin promote failed (HTTP $CODE)"; exit 1; } + - name: Verify each staging tenant /buildinfo matches published SHA env: EXPECTED_SHA: ${{ github.sha }} @@ -696,6 +732,41 @@ jobs: exit "$ROLLOUT_EXIT" fi + # Auto-bump the CONCIERGE platform-agent image pin to prod (the redeploy-fleet + # above only rolls the tenant image; kind=platform concierges resolve their + # image from runtime_image_pins('platform-agent') — core#2495). Promoting the + # pin ALSO triggers a WorkspaceRedeployer for kind=platform (pin_runtime_image.go), + # so concierges auto-roll onto the identity-baked image. This is the prod half + # of true auto-bump for the concierge image (#2919/#2955). + - name: Promote platform-agent image pin (prod) + auto-roll concierges + if: ${{ steps.plan.outputs.enabled == 'true' && steps.supersede.outputs.superseded != 'true' }} + env: + PLATFORM_AGENT_IMAGE_NAME: ${{ env.PLATFORM_AGENT_IMAGE_NAME }} + TAG_SHA: staging-${{ github.sha }} + GIT_SHA: ${{ github.sha }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: us-east-2 + run: | + set -euo pipefail + [ -z "${CP_ADMIN_API_TOKEN:-}" ] && { echo "::error::CP_ADMIN_API_TOKEN missing for platform-agent promote"; exit 1; } + DIGEST=$(aws ecr describe-images --region us-east-2 \ + --repository-name molecule-ai/molecule-platform-agent \ + --image-ids imageTag="${TAG_SHA}" \ + --query 'imageDetails[0].imageDigest' --output text 2>/dev/null) + if [ -z "$DIGEST" ] || [ "$DIGEST" = "None" ]; then + echo "::error::could not resolve molecule-platform-agent:${TAG_SHA} digest (build did not push?)"; exit 1 + fi + IMAGE_DIGEST="${PLATFORM_AGENT_IMAGE_NAME}@${DIGEST}" + BODY=$(jq -nc --arg t "platform-agent" --arg img "$IMAGE_DIGEST" --arg sha "$GIT_SHA" \ + '{template_name:$t, region:"global", image_digest:$img, git_sha:$sha, notes:"auto-bump from publish-workspace-server-image (prod)"}') + echo "POST $CP_URL/cp/admin/runtime-image/promote (platform-agent → ${IMAGE_DIGEST})" + CODE=$(curl -sS -o /tmp/promote-prod.json -w '%{http_code}' -m 300 \ + -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" -H "Content-Type: application/json" \ + -X POST "$CP_URL/cp/admin/runtime-image/promote" -d "$BODY" || echo 000) + echo "HTTP $CODE: $(cat /tmp/promote-prod.json 2>/dev/null | head -c 300)" + [ "$CODE" = "200" ] || [ "$CODE" = "201" ] || { echo "::error::prod platform-agent pin promote failed (HTTP $CODE)"; exit 1; } + - name: Verify reachable tenants report this SHA # Skip when superseded BEFORE rollout: the redeploy step did not run, so # there is no redeploy-fleet response to verify against and the newer job -- 2.52.0