name: publish-workspace-server-image # Builds and pushes Docker images to GHCR on staging or main pushes. # EC2 tenant instances pull the tenant image from GHCR. # # Branch / tag policy (see Compute tags step for the per-branch logic): # # staging push → builds image, tags :staging- + :staging-latest. # staging-CP pins TENANT_IMAGE=:staging-latest, so it # picks up staging-branch code automatically. This is # what makes staging-CP actually test staging-branch # code instead of "yesterday's main" — pre-fix, this # workflow only ran on main, so staging tenants # silently served stale code (#2308 fix RFC #2312 # landed on staging but never reached tenants because # staging→main was wedged on path-filter parity bugs). # # main push → builds image, tags :staging- + :staging-latest # (same as before). canary-verify.yml retags # :staging- → :latest after canary tenants # green-light the digest. The :staging-latest retag # on main push is intentional: when main lands AFTER a # staging push, staging-CP gets the post-promote code # (which equals what it had + any merge resolution), # so the canary-on-staging-CP step still runs against # the prod-bound digest. # # In the steady state both branches refresh :staging-latest; the # semantic is "most recent staging-or-main build of tenant code." # Drift between the two is bounded by the staging→main auto-promote # cadence and is corrected on the next staging push. on: push: branches: [staging, main] paths: - 'workspace-server/**' - 'canvas/**' - 'manifest.json' - '.github/workflows/publish-workspace-server-image.yml' workflow_dispatch: # Serialize per-branch so two rapid staging pushes don't race the same # :staging-latest tag retag. Allow staging and main to run in parallel # (different github.ref → different concurrency group) since they # produce different :staging- tags and last-write-wins on # :staging-latest is acceptable across branches (the post-promote # main code equals current staging code in a healthy flow). # # cancel-in-progress: false → in-flight builds finish; the next push's # build queues. This avoids a partially-pushed image and keeps the # canary fleet pin (:staging-) consistent with what was actually # tested at canary-verify time. concurrency: group: publish-workspace-server-image-${{ github.ref }} cancel-in-progress: false permissions: contents: read packages: write env: IMAGE_NAME: ghcr.io/molecule-ai/platform TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant jobs: build-and-push: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Checkout sibling plugin repo # workspace-server/Dockerfile expects # ./molecule-ai-plugin-github-app-auth at build-context root because # the Go module has a `replace` directive pointing at /plugin inside # the image. Pre-repo-split the plugin lived in the monorepo; the # 2026-04-18 restructure moved it out but didn't add this clone step # — which is why publish was failing after that restructure. # # Uses a fine-grained PAT (PLUGIN_REPO_PAT) because the plugin repo # is private and the default GITHUB_TOKEN is scoped to THIS repo. # The PAT needs Contents:Read on molecule-ai/molecule-ai-plugin- # github-app-auth. Falls back to the default token for the (rare) # case where an operator made the plugin repo public. uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: molecule-ai/molecule-ai-plugin-github-app-auth path: molecule-ai-plugin-github-app-auth token: ${{ secrets.PLUGIN_REPO_PAT || secrets.GITHUB_TOKEN }} - name: Log in to GHCR uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Set up Docker Buildx uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0 - name: Compute tags id: tags run: | echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" # Canary-gated release flow: # - This step always publishes :staging- + :staging-latest. # - On staging push, staging-CP picks up :staging-latest immediately # (its TENANT_IMAGE pin is :staging-latest) — so staging-branch # code reaches staging tenants without waiting for main. # - On main push, canary-verify.yml runs smoke tests against # canary tenants (which pin :staging-), and on green retags # :staging- → :latest. Prod tenants pull :latest. # - On red, :latest stays on the prior good digest — prod is safe. # # Why :staging-latest is retagged on main push too: when main lands # after a staging promote, staging-CP gets the post-promote code so # the canary-on-staging-CP step still runs against the prod-bound # digest. In a healthy flow the post-promote main code == the # current staging code, so this is effectively a no-op except for # the canary fleet pin handoff. # # Pre-fix history: this workflow used to only trigger on main. That # meant staging-CP served "yesterday's main" indefinitely whenever # staging→main was wedged. The 2026-04-30 dogfooding session # surfaced this when RFC #2312 (chat upload HTTP-forward) landed on # staging but staging tenants kept failing chat upload because they # were running pre-RFC code. Adding the staging trigger above closes # that gap. Earlier 2026-04-24 incident: a static :staging- pin # drifted 10 days behind staging — same class of bug, different # mechanism. - name: Build & push platform image to GHCR (staging- + staging-latest) uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0 with: context: . file: ./workspace-server/Dockerfile platforms: linux/amd64 push: true tags: | ${{ env.IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }} ${{ env.IMAGE_NAME }}:staging-latest cache-from: type=gha cache-to: type=gha,mode=max # GIT_SHA bakes into the Go binary via -ldflags so /buildinfo # returns it at runtime — see Dockerfile + buildinfo/buildinfo.go. # This is the same value as the OCI revision label below; passing # it twice is intentional, the OCI label is for registry tooling # while /buildinfo is for the redeploy verification step. build-args: | GIT_SHA=${{ github.sha }} labels: | org.opencontainers.image.source=https://github.com/${{ github.repository }} org.opencontainers.image.revision=${{ github.sha }} org.opencontainers.image.description=Molecule AI platform (Go API server) — pending canary verify - name: Build & push tenant image to GHCR (staging- + staging-latest) uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0 with: context: . file: ./workspace-server/Dockerfile.tenant platforms: linux/amd64 push: true tags: | ${{ env.TENANT_IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }} ${{ env.TENANT_IMAGE_NAME }}:staging-latest cache-from: type=gha cache-to: type=gha,mode=max # Canvas uses same-origin fetches. The tenant Go platform # reverse-proxies /cp/* to the SaaS CP via its CP_UPSTREAM_URL # env; the tenant's /canvas/viewport, /approvals/pending, # /org/templates etc. live on the tenant platform itself. # Both legs share one origin (the tenant subdomain) so # PLATFORM_URL="" forces canvas to fetch paths as relative, # which land same-origin. # # Self-hosted / private-label deployments override this at # build time with a specific backend (e.g. local dev: # NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080). build-args: | NEXT_PUBLIC_PLATFORM_URL= GIT_SHA=${{ github.sha }} labels: | org.opencontainers.image.source=https://github.com/${{ github.repository }} org.opencontainers.image.revision=${{ github.sha }} org.opencontainers.image.description=Molecule AI tenant platform + canvas — pending canary verify