From a6d67b4c68072fc25e8d1ec829a16f0e7b440286 Mon Sep 17 00:00:00 2001 From: devops-engineer Date: Thu, 7 May 2026 12:59:46 -0700 Subject: [PATCH] fix(ci): pre-clone manifest deps in workflow, drop in-image clone (closes #173) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit publish-workspace-server-image.yml could not run on Gitea Actions because Dockerfile.tenant's stage 3 ran `git clone` against private Gitea repos from inside the Docker build context, where no auth path exists. Every workspace-server rebuild required a manual operator-host push. Move cloning to the trusted CI context (where AUTO_SYNC_TOKEN — the devops-engineer persona PAT — is naturally available). Dockerfile.tenant now COPYs from .tenant-bundle-deps/, populated by the workflow's new "Pre-clone manifest deps" step. The Gitea token never enters the image. - scripts/clone-manifest.sh: optional MOLECULE_GITEA_TOKEN env embeds basic-auth in the clone URL; redacted in log output. Anonymous fallback preserved for future public-repo path. - .github/workflows/publish-workspace-server-image.yml: new pre-clone step before docker build; injects AUTO_SYNC_TOKEN. Fail-fast if the secret is empty. - workspace-server/Dockerfile.tenant: drop stage 3 (templates), COPY from .tenant-bundle-deps/ instead. Header documents the prereq. - .gitignore: ignore /.tenant-bundle-deps/ so a local build can't accidentally commit cloned repos. Verified locally: clone-manifest.sh with the devops-engineer persona token cloned all 37 repos (9 ws + 7 org + 21 plugins, 4.9MB after .git strip). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../publish-workspace-server-image.yml | 49 +++++++++++++++++++ .gitignore | 7 +++ scripts/clone-manifest.sh | 43 ++++++++++++++-- workspace-server/Dockerfile.tenant | 48 ++++++++++++------ 4 files changed, 127 insertions(+), 20 deletions(-) diff --git a/.github/workflows/publish-workspace-server-image.yml b/.github/workflows/publish-workspace-server-image.yml index c2bb8178..06e94849 100644 --- a/.github/workflows/publish-workspace-server-image.yml +++ b/.github/workflows/publish-workspace-server-image.yml @@ -102,6 +102,55 @@ jobs: run: | echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" + # Pre-clone manifest deps before docker build (Task #173 fix). + # + # Why pre-clone: post-2026-05-06, every workspace-template-* repo on + # Gitea (codex, crewai, deepagents, gemini-cli, langgraph) plus all + # 7 org-template-* repos are private. The pre-fix Dockerfile.tenant + # ran `git clone` inside an in-image stage, which had no auth path + # — every CI build failed with "fatal: could not read Username for + # https://git.moleculesai.app". For weeks, every workspace-server + # rebuild required a manual operator-host push. Now we clone in the + # trusted CI context (where AUTO_SYNC_TOKEN is naturally available) + # and Dockerfile.tenant just COPYs from .tenant-bundle-deps/. + # + # Token shape: AUTO_SYNC_TOKEN is the devops-engineer persona PAT + # (see /etc/molecule-bootstrap/agent-secrets.env). Per saved memory + # `feedback_per_agent_gitea_identity_default`, every CI surface uses + # a per-persona token, never the founder PAT. clone-manifest.sh + # embeds it as basic-auth (oauth2:) for the duration of the + # clones, then strips .git directories — the token never enters + # the resulting image. + # + # Idempotent: if a re-run finds populated dirs, clone-manifest.sh + # skips them; safe to retrigger via path-filter or workflow_dispatch. + - name: Pre-clone manifest deps + env: + MOLECULE_GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }} + run: | + set -euo pipefail + if [ -z "${MOLECULE_GITEA_TOKEN}" ]; then + echo "::error::AUTO_SYNC_TOKEN secret is empty — register the devops-engineer persona PAT in repo Actions secrets" + exit 1 + fi + mkdir -p .tenant-bundle-deps + bash scripts/clone-manifest.sh \ + manifest.json \ + .tenant-bundle-deps/workspace-configs-templates \ + .tenant-bundle-deps/org-templates \ + .tenant-bundle-deps/plugins + # Sanity-check counts so a silent partial clone fails fast + # instead of producing a half-empty image. + ws_count=$(find .tenant-bundle-deps/workspace-configs-templates -mindepth 1 -maxdepth 1 -type d | wc -l) + org_count=$(find .tenant-bundle-deps/org-templates -mindepth 1 -maxdepth 1 -type d | wc -l) + plugins_count=$(find .tenant-bundle-deps/plugins -mindepth 1 -maxdepth 1 -type d | wc -l) + echo "Cloned: ws=$ws_count org=$org_count plugins=$plugins_count" + # Counts are derived from manifest.json (9 ws / 7 org / 21 + # plugins as of 2026-05-07). If manifest.json grows but the + # clone step regresses silently, the find above caps at the + # actual disk state — but clone-manifest.sh's own EXPECTED vs + # CLONED check (line ~95) is the authoritative fail-fast. + # Canary-gated release flow: # - This step always publishes :staging- + :staging-latest. # - On staging push, staging-CP picks up :staging-latest immediately diff --git a/.gitignore b/.gitignore index 3b6e7451..4984b684 100644 --- a/.gitignore +++ b/.gitignore @@ -131,6 +131,13 @@ backups/ # Cloned by publish-workspace-server-image.yml so the Dockerfile's # replace-directive path resolves. Lives in its own repo. /molecule-ai-plugin-github-app-auth/ +# Tenant-image build context — populated by the workflow's +# "Pre-clone manifest deps" step. Mirrors the public manifest, holds the +# same content as the three /<>/ dirs above but namespaced under one +# parent so the Docker build context is a single COPY-friendly tree. +# Each entry is a transient working-dir, never source-of-truth, never +# committed. +/.tenant-bundle-deps/ # Internal-flavored content lives in Molecule-AI/internal — NEVER in this # public monorepo. Migrated 2026-04-23 (CEO directive). The CI workflow diff --git a/scripts/clone-manifest.sh b/scripts/clone-manifest.sh index 9d873bed..55068d6a 100755 --- a/scripts/clone-manifest.sh +++ b/scripts/clone-manifest.sh @@ -6,6 +6,29 @@ # ./scripts/clone-manifest.sh # # Requires: git, jq (lighter than python3 — ~2MB vs ~50MB in Alpine) +# +# Auth (optional): +# When MOLECULE_GITEA_TOKEN is set, embed it as the basic-auth password so +# private Gitea repos clone successfully. When unset, clone anonymously +# (works only for repos that are public on git.moleculesai.app). +# +# This is the path the publish-workspace-server-image.yml workflow uses: +# it injects AUTO_SYNC_TOKEN (devops-engineer persona PAT, repo:read on +# the molecule-ai org) so the in-CI pre-clone step succeeds for ALL +# manifest entries — including the 5 private workspace-template-* repos +# (codex, crewai, deepagents, gemini-cli, langgraph) and all 7 +# org-template-* repos. +# +# The token never enters the Docker image: this script runs in the +# trusted CI context BEFORE `docker buildx build`, populates +# .tenant-bundle-deps/, then `Dockerfile.tenant` COPYs from there with +# the .git directories already stripped (see line ~67 below). +# +# For backward compatibility — and so a fresh clone works without +# secrets when (eventually) the workspace-template-* repos flip public — +# the unset path remains a plain anonymous HTTPS clone. That path will +# FAIL with "could not read Username" on private repos today; CI MUST +# set MOLECULE_GITEA_TOKEN. set -euo pipefail @@ -52,11 +75,23 @@ clone_category() { # every manifest entry. repo_gitea="$(echo "$repo" | awk -F/ '{ printf "%s", tolower($1); for (i=2; i<=NF; i++) printf "/%s", $i; print "" }')" - echo " cloning $repo_gitea -> $target_dir/$name (ref=$ref)" - if [ "$ref" = "main" ]; then - git clone --depth=1 -q "https://git.moleculesai.app/${repo_gitea}.git" "$target_dir/$name" + # Build the clone URL. When MOLECULE_GITEA_TOKEN is set (CI path) + # embed it as basic-auth so private repos succeed. The username + # part ("oauth2") is conventional and ignored by Gitea — only the + # token-as-password is verified. + if [ -n "${MOLECULE_GITEA_TOKEN:-}" ]; then + clone_url="https://oauth2:${MOLECULE_GITEA_TOKEN}@git.moleculesai.app/${repo_gitea}.git" + display_url="https://oauth2:***@git.moleculesai.app/${repo_gitea}.git" else - git clone --depth=1 -q --branch "$ref" "https://git.moleculesai.app/${repo_gitea}.git" "$target_dir/$name" + clone_url="https://git.moleculesai.app/${repo_gitea}.git" + display_url="$clone_url" + fi + + echo " cloning $display_url -> $target_dir/$name (ref=$ref)" + if [ "$ref" = "main" ]; then + git clone --depth=1 -q "$clone_url" "$target_dir/$name" + else + git clone --depth=1 -q --branch "$ref" "$clone_url" "$target_dir/$name" fi CLONED=$((CLONED + 1)) i=$((i + 1)) diff --git a/workspace-server/Dockerfile.tenant b/workspace-server/Dockerfile.tenant index 6915365d..5c9fda55 100644 --- a/workspace-server/Dockerfile.tenant +++ b/workspace-server/Dockerfile.tenant @@ -3,14 +3,34 @@ # Serves both the API (Go on :8080) and the UI (Node.js on :3000) in a # single container. Go reverse-proxies unknown routes to canvas. # -# Templates are cloned from standalone GitHub repos at build time so the -# monorepo doesn't need to carry them. The repos are public; no auth. +# Templates + plugins are NOT cloned at build time. They are pre-cloned +# in the trusted CI context (or operator host) by +# `scripts/clone-manifest.sh` into `.tenant-bundle-deps/` and COPYed in. +# The reason: post-2026-05-06, every workspace-template-* repo on Gitea +# (codex, crewai, deepagents, gemini-cli, langgraph) plus all 7 +# org-template-* repos are private, so the Docker build can't `git clone` +# from inside the build context — there's no auth path that doesn't leak +# the Gitea token into an image layer. Pre-cloning keeps the token in +# the CI environment only; the resulting image carries the cloned trees +# with `.git` already stripped (see clone-manifest.sh). # -# Build context: repo root. +# Build context: repo root, with `.tenant-bundle-deps/` populated by: +# +# MOLECULE_GITEA_TOKEN= scripts/clone-manifest.sh \ +# manifest.json \ +# .tenant-bundle-deps/workspace-configs-templates \ +# .tenant-bundle-deps/org-templates \ +# .tenant-bundle-deps/plugins +# +# In CI this happens in publish-workspace-server-image.yml's "Pre-clone +# manifest deps" step (uses AUTO_SYNC_TOKEN = devops-engineer persona). +# For a manual operator-host build, source the same token from +# /etc/molecule-bootstrap/agent-secrets.env first. # # docker buildx build --platform linux/amd64 \ # -f workspace-server/Dockerfile.tenant \ -# -t registry.fly.io/molecule-tenant:latest \ +# -t /molecule-ai/platform-tenant:latest \ +# --build-arg GIT_SHA= --build-arg NEXT_PUBLIC_PLATFORM_URL= \ # --push . # ── Stage 1: Go platform binary ────────────────────────────────────── @@ -55,14 +75,7 @@ ENV NEXT_PUBLIC_PLATFORM_URL=$NEXT_PUBLIC_PLATFORM_URL ENV NEXT_PUBLIC_WS_URL=$NEXT_PUBLIC_WS_URL RUN npm run build -# ── Stage 3: Clone templates + plugins from manifest.json ───────────── -FROM alpine:3.20 AS templates -RUN apk add --no-cache git jq -COPY manifest.json /manifest.json -COPY scripts/clone-manifest.sh /scripts/clone-manifest.sh -RUN chmod +x /scripts/clone-manifest.sh && /scripts/clone-manifest.sh /manifest.json /workspace-configs-templates /org-templates /plugins - -# ── Stage 4: Runtime ────────────────────────────────────────────────── +# ── Stage 3: Runtime ────────────────────────────────────────────────── FROM node:20-alpine RUN apk add --no-cache ca-certificates git tzdata openssh-client aws-cli @@ -87,10 +100,13 @@ COPY --from=go-builder /platform /platform COPY --from=go-builder /memory-plugin /memory-plugin COPY workspace-server/migrations /migrations -# Templates + plugins (cloned from GitHub in stage 3) -COPY --from=templates /workspace-configs-templates /workspace-configs-templates -COPY --from=templates /org-templates /org-templates -COPY --from=templates /plugins /plugins +# Templates + plugins (pre-cloned by scripts/clone-manifest.sh in the +# trusted CI / operator-host context, .git already stripped — see +# .tenant-bundle-deps/ in the build context). The Gitea token used to +# clone them never enters this image. +COPY .tenant-bundle-deps/workspace-configs-templates /workspace-configs-templates +COPY .tenant-bundle-deps/org-templates /org-templates +COPY .tenant-bundle-deps/plugins /plugins # Canvas standalone WORKDIR /canvas