name: canary-verify # Runs the canary smoke suite against the staging canary tenant fleet # after a new :staging- image lands in GHCR. On green, promotes # :staging- → :latest so the prod tenant fleet's 5-minute # auto-updater picks up the verified digest. On red, :latest stays # on the prior known-good digest and prod is untouched. # # Dependencies: # - publish-workspace-server-image.yml publishes :staging- # (NOT :latest) on main merge # - canary tenants are configured to pull :staging- as their # tenant image (set TENANT_IMAGE=ghcr.io/…:staging- on the # canary provisioner code path OR rotate via an admin endpoint) # - Repo secrets CANARY_TENANT_URLS / CANARY_ADMIN_TOKENS / # CANARY_CP_SHARED_SECRET are populated on: workflow_run: workflows: ["publish-workspace-server-image"] types: [completed] workflow_dispatch: permissions: contents: read packages: write actions: read env: IMAGE_NAME: ghcr.io/molecule-ai/platform TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant jobs: canary-smoke: # Skip when the upstream workflow failed — no image to test against. if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }} # Self-hosted mac mini — GitHub-hosted minutes are quota-blocked on # this org (same reason publish/promote-latest moved earlier). runs-on: [self-hosted, macos, arm64] outputs: sha: ${{ steps.compute.outputs.sha }} steps: - name: Checkout uses: actions/checkout@v4 - name: Compute sha id: compute run: echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" - name: Wait for canary tenants to pick up :staging- # Tenant auto-updater runs every 5 min. Sleep 6 min to give every # canary time to pull + restart. Cheaper than polling. run: sleep 360 - name: Run canary smoke suite env: CANARY_TENANT_URLS: ${{ secrets.CANARY_TENANT_URLS }} CANARY_ADMIN_TOKENS: ${{ secrets.CANARY_ADMIN_TOKENS }} CANARY_CP_BASE_URL: https://staging-api.moleculesai.app CANARY_CP_SHARED_SECRET: ${{ secrets.CANARY_CP_SHARED_SECRET }} run: bash scripts/canary-smoke.sh - name: Summary on failure if: ${{ failure() }} run: | { echo "## Canary smoke FAILED" echo echo "Canary tenants rejected image \`staging-${{ steps.compute.outputs.sha }}\`." echo ":latest stays pinned to the prior good digest — prod is untouched." echo echo "Fix forward and merge again, or investigate the specific failed" echo "assertions in the canary-smoke step log above." } >> "$GITHUB_STEP_SUMMARY" promote-to-latest: # On green, retag :staging- → :latest for BOTH images. # crane is a lightweight registry client (no Docker daemon needed on # the runner) that can retag remotely with a single API call each. needs: canary-smoke if: ${{ needs.canary-smoke.result == 'success' }} runs-on: [self-hosted, macos, arm64] steps: - name: Ensure crane installed # Matches the install pattern in promote-latest.yml — brew # cleanup exits non-zero on the shared runner's /opt/homebrew # symlinks, so skip it. env: HOMEBREW_NO_INSTALL_CLEANUP: "1" HOMEBREW_NO_AUTO_UPDATE: "1" HOMEBREW_NO_ENV_HINTS: "1" run: | if ! command -v crane >/dev/null 2>&1; then brew install crane fi crane version - name: GHCR login run: | echo "${{ secrets.GITHUB_TOKEN }}" | \ crane auth login ghcr.io -u "${{ github.actor }}" --password-stdin - name: Retag platform :staging- → :latest run: | crane tag \ "${IMAGE_NAME}:staging-${{ needs.canary-smoke.outputs.sha }}" \ latest - name: Retag tenant :staging- → :latest run: | crane tag \ "${TENANT_IMAGE_NAME}:staging-${{ needs.canary-smoke.outputs.sha }}" \ latest - name: Summary run: | { echo "## Canary verified — :latest promoted" echo echo "- \`${IMAGE_NAME}:staging-${{ needs.canary-smoke.outputs.sha }}\` → \`${IMAGE_NAME}:latest\`" echo "- \`${TENANT_IMAGE_NAME}:staging-${{ needs.canary-smoke.outputs.sha }}\` → \`${TENANT_IMAGE_NAME}:latest\`" echo echo "Prod tenant fleet will pick up the new digest on its next 5-min auto-update cycle." } >> "$GITHUB_STEP_SUMMARY"