From 82004f151536008e4d6f5871e0b478534d17b20e Mon Sep 17 00:00:00 2001 From: core-devops Date: Fri, 22 May 2026 02:26:44 -0700 Subject: [PATCH] chore(ci): make reusable workflows Gitea/ECR native --- .gitea/workflows/auto-promote-branch.yml | 4 +- .gitea/workflows/auto-promote-staging-pr.yml | 6 +- .../workflows/auto-promote-staging.yml | 2 +- .../workflows/disable-auto-merge-on-push.yml | 4 +- .gitea/workflows/publish-template-image.yml | 74 ++-- .gitea/workflows/validate-org-template.yml | 2 +- .gitea/workflows/validate-plugin.yml | 2 +- .../workflows/validate-workspace-template.yml | 2 +- .github/workflows/auto-promote-branch.yml | 219 ---------- .github/workflows/auto-promote-staging-pr.yml | 262 ------------ .../workflows/disable-auto-merge-on-push.yml | 53 --- .github/workflows/publish-template-image.yml | 397 ------------------ .github/workflows/validate-org-template.yml | 77 ---- .github/workflows/validate-plugin.yml | 86 ---- .../workflows/validate-workspace-template.yml | 224 ---------- README.md | 8 +- docs/template-contract.md | 2 +- 17 files changed, 60 insertions(+), 1364 deletions(-) rename {.github => .gitea}/workflows/auto-promote-staging.yml (91%) delete mode 100644 .github/workflows/auto-promote-branch.yml delete mode 100644 .github/workflows/auto-promote-staging-pr.yml delete mode 100644 .github/workflows/disable-auto-merge-on-push.yml delete mode 100644 .github/workflows/publish-template-image.yml delete mode 100644 .github/workflows/validate-org-template.yml delete mode 100644 .github/workflows/validate-plugin.yml delete mode 100644 .github/workflows/validate-workspace-template.yml diff --git a/.gitea/workflows/auto-promote-branch.yml b/.gitea/workflows/auto-promote-branch.yml index 26045e0..7e45c01 100644 --- a/.gitea/workflows/auto-promote-branch.yml +++ b/.gitea/workflows/auto-promote-branch.yml @@ -1,6 +1,6 @@ # Gitea Actions port of .github/workflows/auto-promote-branch.yml # Ported 2026-05-10 per RFC #229 P1-1. -# Caller migration: update `uses: molecule-ai/molecule-ci/.github/workflows/...@ref` +# Caller migration: update `uses: molecule-ai/molecule-ci/.gitea/workflows/...@ref` # → `uses: molecule-ai/molecule-ci/.gitea/workflows/...@ref` in your consuming repo. # Both .github/ and .gitea/ versions exist transitionally for safe consumer migration. # @@ -37,7 +37,7 @@ name: Auto-promote branch (reusable) # administration: read # read branch protection (REQUIRED — see below) # jobs: # promote: -# uses: molecule-ai/molecule-ci/.github/workflows/auto-promote-branch.yml@v1 +# uses: molecule-ai/molecule-ci/.gitea/workflows/auto-promote-branch.yml@v1 # with: # from-branch: staging # to-branch: main diff --git a/.gitea/workflows/auto-promote-staging-pr.yml b/.gitea/workflows/auto-promote-staging-pr.yml index 1896fb1..030cf05 100644 --- a/.gitea/workflows/auto-promote-staging-pr.yml +++ b/.gitea/workflows/auto-promote-staging-pr.yml @@ -1,6 +1,6 @@ # Gitea Actions port of .github/workflows/auto-promote-staging-pr.yml # Ported 2026-05-10 per RFC #229 P1-1. -# Caller migration: update `uses: molecule-ai/molecule-ci/.github/workflows/...@ref` +# Caller migration: update `uses: molecule-ai/molecule-ci/.gitea/workflows/...@ref` # → `uses: molecule-ai/molecule-ci/.gitea/workflows/...@ref` in your consuming repo. # Both .github/ and .gitea/ versions exist transitionally for safe consumer migration. # @@ -44,7 +44,7 @@ name: Auto-promote staging → main (PR-based, reusable) # pull-requests: write # jobs: # promote: -# uses: molecule-ai/molecule-ci/.github/workflows/auto-promote-staging-pr.yml@v1 +# uses: molecule-ai/molecule-ci/.gitea/workflows/auto-promote-staging-pr.yml@v1 # with: # gates: "ci.yml,e2e-staging-canvas.yml,e2e-api.yml,codeql.yml" # force: ${{ github.event.inputs.force == 'true' }} @@ -246,7 +246,7 @@ jobs: cat > "$BODY_FILE" <:latest` (plus a +# repo. Builds the template's Dockerfile on main and pushes to ECR as +# `/molecule-ai/workspace-template-:latest` (plus a # per-commit `sha-<7>` tag). Auto-derives from the caller repo # name so the per-repo wrapper stays one line. # @@ -30,20 +30,14 @@ name: Publish Workspace Template Image # workflow_dispatch: # permissions: # contents: read -# packages: write # jobs: # publish: -# uses: molecule-ai/molecule-ci/.github/workflows/publish-template-image.yml@v1 +# uses: molecule-ai/molecule-ci/.gitea/workflows/publish-template-image.yml@v1 # secrets: inherit # -# Runner choice (2026-04-22): ubuntu-latest -# - All caller repos are PUBLIC → GHA-hosted minutes are free. -# - Targets are linux/amd64 natively; Ubuntu runners skip QEMU that -# our arm64 Mac mini had to emulate through, so builds go ~2-3x -# faster on top of having no queue wait when the Mac mini is busy. -# - No macOS Keychain gymnastics — standard docker/login-action works. -# The self-hosted Mac mini remains in service for private repo -# workflows (see memory: feedback_selfhosted_runner). +# Runner choice: use the dedicated Linux publish runners. They have +# Docker + AWS CLI wired for ECR publishing and avoid the mixed +# ubuntu-latest pool. on: workflow_call: @@ -77,10 +71,15 @@ on: description: "Short SHA tag pushed alongside :latest" value: ${{ jobs.publish.outputs.sha }} +env: + GITHUB_SERVER_URL: https://git.moleculesai.app + ECR_REGISTRY: 153263036946.dkr.ecr.us-east-2.amazonaws.com + AWS_DEFAULT_REGION: us-east-2 + jobs: publish: name: Build & push template image - runs-on: ubuntu-latest + runs-on: [publish, release] outputs: image: ${{ steps.tags.outputs.image }} sha: ${{ steps.tags.outputs.sha }} @@ -112,7 +111,7 @@ jobs: ;; esac fi - IMAGE="ghcr.io/molecule-ai/workspace-template-${RUNTIME}" + IMAGE="${ECR_REGISTRY}/molecule-ai/workspace-template-${RUNTIME}" SHA="${GITHUB_SHA::7}" echo "runtime=${RUNTIME}" >> "$GITHUB_OUTPUT" echo "image=${IMAGE}" >> "$GITHUB_OUTPUT" @@ -177,16 +176,35 @@ jobs: fi echo "::notice::✓ no bare imports of runtime modules in template *.py files" - - name: Log in to GHCR - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + - name: Log in to ECR + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + run: | + set -euo pipefail + aws ecr get-login-password --region "${AWS_DEFAULT_REGION}" | \ + docker login --username AWS --password-stdin "${ECR_REGISTRY}" - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 + - name: Ensure ECR repository exists + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + run: | + set -euo pipefail + repo_path="molecule-ai/workspace-template-${{ steps.tags.outputs.runtime }}" + if ! aws ecr describe-repositories --repository-names "${repo_path}" --region "${AWS_DEFAULT_REGION}" >/dev/null 2>&1; then + aws ecr create-repository \ + --repository-name "${repo_path}" \ + --image-scanning-configuration scanOnPush=true \ + --region "${AWS_DEFAULT_REGION}" >/dev/null + echo "::notice::created ECR repository ${repo_path}" + else + echo "ECR repository ${repo_path} already exists" + fi + - name: Build template image (load for smoke test, do not push yet) # Build into the runner's local docker first so the smoke test can # actually boot the image. We push :latest + :sha-* only AFTER the @@ -195,7 +213,7 @@ jobs: # template's adapter.py imported a symbol (RuntimeCapabilities) # that the published runtime didn't yet export. The old smoke # test only inspected the entrypoint string, so the broken image - # shipped to GHCR and every workspace provision hung. + # shipped to the registry and every workspace provision hung. uses: docker/build-push-action@v6 with: context: . @@ -204,8 +222,6 @@ jobs: load: true push: false tags: ${{ steps.tags.outputs.image }}:sha-${{ steps.tags.outputs.sha }} - cache-from: type=gha - cache-to: type=gha,mode=max # RUNTIME_VERSION is empty by default. When the cascade fires # (or workflow_dispatch is invoked with a version), it's the # exact runtime version about to be installed. Forwarded as a @@ -215,7 +231,7 @@ jobs: build-args: | RUNTIME_VERSION=${{ inputs.runtime_version }} labels: | - org.opencontainers.image.source=https://github.com/${{ github.repository }} + org.opencontainers.image.source=https://git.moleculesai.app/${{ github.repository }} org.opencontainers.image.revision=${{ github.sha }} org.opencontainers.image.description=Molecule AI workspace template — ${{ steps.tags.outputs.runtime }} runtime @@ -389,7 +405,7 @@ jobs: fi echo "::notice::✓ ${IMAGE} executor.execute() smoke passed (imports healthy, no runtime wedge)" - - name: Push image to GHCR (post-smoke) + - name: Push image to ECR (post-smoke) # Now that the smoke test passed, push both tags. build-push-action # reuses the cached build from the load step above, so this is fast # — it's effectively a layer push, not a rebuild. Same build-args @@ -403,11 +419,9 @@ jobs: tags: | ${{ steps.tags.outputs.image }}:latest ${{ steps.tags.outputs.image }}:sha-${{ steps.tags.outputs.sha }} - cache-from: type=gha - cache-to: type=gha,mode=max build-args: | RUNTIME_VERSION=${{ inputs.runtime_version }} labels: | - org.opencontainers.image.source=https://github.com/${{ github.repository }} + org.opencontainers.image.source=https://git.moleculesai.app/${{ github.repository }} org.opencontainers.image.revision=${{ github.sha }} org.opencontainers.image.description=Molecule AI workspace template — ${{ steps.tags.outputs.runtime }} runtime diff --git a/.gitea/workflows/validate-org-template.yml b/.gitea/workflows/validate-org-template.yml index 4b0b464..665a661 100644 --- a/.gitea/workflows/validate-org-template.yml +++ b/.gitea/workflows/validate-org-template.yml @@ -1,6 +1,6 @@ # Gitea Actions port of .github/workflows/validate-org-template.yml # Ported 2026-05-10 per RFC #229 P1-1. -# Caller migration: update `uses: molecule-ai/molecule-ci/.github/workflows/...@ref` +# Caller migration: update `uses: molecule-ai/molecule-ci/.gitea/workflows/...@ref` # → `uses: molecule-ai/molecule-ci/.gitea/workflows/...@ref` in your consuming repo. # Both .github/ and .gitea/ versions exist transitionally for safe consumer migration. # diff --git a/.gitea/workflows/validate-plugin.yml b/.gitea/workflows/validate-plugin.yml index 723de31..ce645e3 100644 --- a/.gitea/workflows/validate-plugin.yml +++ b/.gitea/workflows/validate-plugin.yml @@ -1,6 +1,6 @@ # Gitea Actions port of .github/workflows/validate-plugin.yml # Ported 2026-05-10 per RFC #229 P1-1. -# Caller migration: update `uses: molecule-ai/molecule-ci/.github/workflows/...@ref` +# Caller migration: update `uses: molecule-ai/molecule-ci/.gitea/workflows/...@ref` # → `uses: molecule-ai/molecule-ci/.gitea/workflows/...@ref` in your consuming repo. # Both .github/ and .gitea/ versions exist transitionally for safe consumer migration. # diff --git a/.gitea/workflows/validate-workspace-template.yml b/.gitea/workflows/validate-workspace-template.yml index 075147f..d57f0dd 100644 --- a/.gitea/workflows/validate-workspace-template.yml +++ b/.gitea/workflows/validate-workspace-template.yml @@ -1,6 +1,6 @@ # Gitea Actions port of .github/workflows/validate-workspace-template.yml # Ported 2026-05-10 per RFC #229 P1-1. -# Caller migration: update `uses: molecule-ai/molecule-ci/.github/workflows/...@ref` +# Caller migration: update `uses: molecule-ai/molecule-ci/.gitea/workflows/...@ref` # → `uses: molecule-ai/molecule-ci/.gitea/workflows/...@ref` in your consuming repo. # Both .github/ and .gitea/ versions exist transitionally for safe consumer migration. # diff --git a/.github/workflows/auto-promote-branch.yml b/.github/workflows/auto-promote-branch.yml deleted file mode 100644 index 04ae372..0000000 --- a/.github/workflows/auto-promote-branch.yml +++ /dev/null @@ -1,219 +0,0 @@ -name: Auto-promote branch (reusable) - -# Reusable version of the auto-promote-staging workflow that lived -# directly in molecule-ci. Any repo with a `from-branch` (typically -# `staging`) → `to-branch` (typically `main`) flow can call this -# workflow to fast-forward `to-branch` whenever `from-branch` is -# strictly ahead AND all configured required-status-checks on the -# `from-branch` HEAD are green. -# -# Adoption pattern in a consumer repo: -# -# # .github/workflows/auto-promote.yml -# name: Auto-promote staging → main -# on: -# push: -# branches: [staging] -# workflow_dispatch: -# permissions: -# contents: write # push the fast-forward to to-branch -# statuses: read # read commit status checks -# administration: read # read branch protection (REQUIRED — see below) -# jobs: -# promote: -# uses: molecule-ai/molecule-ci/.github/workflows/auto-promote-branch.yml@v1 -# with: -# from-branch: staging -# to-branch: main -# -# Repo-agnostic by design — gates are read from the consuming repo's -# branch protection at run time, not hardcoded here. -# -# `@v1` is a moving tag pointing at the latest 1.x release of -# molecule-ci's reusable workflows (GitHub Actions convention, same -# as `actions/checkout@v4`). Breaking changes get a new `@v2` tag -# and the old `@v1` keeps working for existing consumers. Pinning to -# `@main` is also accepted for forward-compat preview but is -# unstable — any change merged here rolls out instantly to consumers -# without a release boundary. -# -# `administration: read` is REQUIRED. Without it, the branch-protection -# API returns 403 and the workflow refuses to fast-forward (fail-loud), -# rather than silently degrading to --ff-only-only enforcement (which -# is ancestry-only, not test-status — a green-but-flaky branch would -# ff-promote red commits). If you intentionally want no-gate -# enforcement, leave from-branch unprotected — a 404 from the API is -# treated as "no gates configured" and falls back to --ff-only safety. -# -# Excluded-by-policy repos (molecule-core + molecule-controlplane per -# CEO directive 2026-04-24) simply do not adopt this workflow; the -# reusable shape adds no surface area to repos that don't call it. - -on: - workflow_call: - inputs: - from-branch: - description: "Source branch with green CI" - required: false - default: staging - type: string - to-branch: - description: "Target branch to fast-forward" - required: false - default: main - type: string - -permissions: - contents: write - statuses: read - -jobs: - promote: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Check required gates (if configured) on source HEAD - id: gates - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO: ${{ github.repository }} - HEAD_SHA: ${{ github.sha }} - FROM_BRANCH: ${{ inputs.from-branch }} - shell: bash - run: | - set -euo pipefail - - # Read required gates from branch protection. Three response - # classes, distinguished by HTTP status: - # - # 200 — branch protection is configured. Honor the gates. - # 404 — branch is not protected. Legitimate "no gates"; - # fall back to --ff-only as the sole safety net. - # 403 — caller's GITHUB_TOKEN can't read branch protection. - # FAIL LOUD. The previous behavior conflated this - # with 404 ("api inaccessible") and silently degraded - # to --ff-only-only — which is ancestry-only, not - # test-status. A green-but-flaky branch would - # ff-promote red commits to the target. The fix: - # require the caller to add `administration: read` - # to its permissions block, or explicitly accept the - # no-gates posture by removing branch protection on - # the source branch. - # - # `gh api` exit code is 0 only on 2xx; non-zero on anything - # else. We use --include to capture HTTP status to discriminate. - - if PROTECTION_RESP=$(gh api -i "repos/${REPO}/branches/${FROM_BRANCH}/protection/required_status_checks" 2>&1); then - HTTP_STATUS=200 - else - HTTP_STATUS=$(echo "$PROTECTION_RESP" | grep -oE '^HTTP/[12](\.[01])? [0-9]{3}' | awk '{print $2}' | head -1) - HTTP_STATUS=${HTTP_STATUS:-unknown} - fi - - case "$HTTP_STATUS" in - 200) - # Strip headers from gh -i output to get just the body. - GATES_JSON=$(echo "$PROTECTION_RESP" | awk 'p{print} /^[[:space:]]*$/ && !p {p=1}') - ;; - 404) - echo "::notice::No branch protection on '${FROM_BRANCH}' — relying on --ff-only safety." - echo "ok=true" >> "$GITHUB_OUTPUT" - exit 0 - ;; - 403|401) - echo "::error::Cannot read branch protection on '${FROM_BRANCH}' (HTTP ${HTTP_STATUS})." - echo "::error::Caller's GITHUB_TOKEN lacks 'administration: read' permission." - echo "::error::Refusing to fast-forward without explicit gate enforcement —" - echo "::error::a silent fallback to --ff-only here would let green-but-flaky" - echo "::error::branches promote red commits." - echo "::error::" - echo "::error::Fix: add to the caller's workflow's permissions block:" - echo "::error:: permissions:" - echo "::error:: contents: write" - echo "::error:: statuses: read" - echo "::error:: administration: read" - echo "::error::" - echo "::error::Or, if you intentionally want no-gate enforcement, remove" - echo "::error::branch protection on '${FROM_BRANCH}' so the API returns 404." - exit 1 - ;; - *) - echo "::error::Unexpected HTTP status '${HTTP_STATUS}' from branch-protection API." - echo "::error::Response (first 5 lines):" - echo "$PROTECTION_RESP" | head -5 | sed 's/^/::error:: /' - exit 1 - ;; - esac - - GATES=$(echo "${GATES_JSON}" | jq -r '.contexts[]?' 2>/dev/null || true) - - if [ -z "$GATES" ]; then - echo "::notice::Branch protection on '${FROM_BRANCH}' has zero required-status-checks contexts — relying on --ff-only safety." - echo "ok=true" >> "$GITHUB_OUTPUT" - exit 0 - fi - - echo "Required gates on '${FROM_BRANCH}':" - echo "${GATES}" | sed 's/^/ - /' - - ALL_GREEN=true - while IFS= read -r gate; do - [ -z "$gate" ] && continue - - conclusion=$(gh api "repos/${REPO}/commits/${HEAD_SHA}/check-runs" \ - --jq "[.check_runs[] | select(.name == \"${gate}\")] | sort_by(.completed_at) | last.conclusion" \ - 2>/dev/null || echo "") - - if [ -z "$conclusion" ] || [ "$conclusion" = "null" ]; then - conclusion=$(gh api "repos/${REPO}/commits/${HEAD_SHA}/status" \ - --jq "[.statuses[] | select(.context == \"${gate}\")] | sort_by(.updated_at) | last.state" \ - 2>/dev/null || echo "") - fi - - if [ "$conclusion" != "success" ] && [ "$conclusion" != "SUCCESS" ]; then - echo "::warning::Gate '${gate}' is '${conclusion:-missing}' on ${HEAD_SHA} — skipping promote." - ALL_GREEN=false - else - echo " ✓ ${gate}: success" - fi - done <<< "$GATES" - - echo "ok=${ALL_GREEN}" >> "$GITHUB_OUTPUT" - - - name: Fast-forward target branch to source HEAD - if: steps.gates.outputs.ok == 'true' - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - FROM_BRANCH: ${{ inputs.from-branch }} - TO_BRANCH: ${{ inputs.to-branch }} - shell: bash - run: | - set -euo pipefail - - git config user.email "actions@github.com" - git config user.name "github-actions[bot]" - - # Source branch is what's checked out (workflow fires on push to - # source). Can't fetch into it. Fetch target into a local target. - git fetch origin "${TO_BRANCH}" - git checkout -B "${TO_BRANCH}" "origin/${TO_BRANCH}" - - # Check if target is already at or ahead of source. - if git merge-base --is-ancestor "origin/${FROM_BRANCH}" "${TO_BRANCH}" 2>/dev/null; then - echo "${TO_BRANCH} already contains ${FROM_BRANCH}; nothing to promote." - exit 0 - fi - - # --ff-only refuses if target has independent commits not on - # source (divergence — hotfix direct to target). Human resolves. - if ! git merge --ff-only "origin/${FROM_BRANCH}" 2>&1; then - echo "::warning::${TO_BRANCH} has diverged from ${FROM_BRANCH} — refusing fast-forward. Resolve manually (likely a direct-to-${TO_BRANCH} commit exists that ${FROM_BRANCH} doesn't have)." - exit 0 - fi - - git push origin "${TO_BRANCH}" - echo "::notice::Promoted: ${TO_BRANCH} is now at $(git rev-parse --short HEAD)" diff --git a/.github/workflows/auto-promote-staging-pr.yml b/.github/workflows/auto-promote-staging-pr.yml deleted file mode 100644 index d3308d2..0000000 --- a/.github/workflows/auto-promote-staging-pr.yml +++ /dev/null @@ -1,262 +0,0 @@ -name: Auto-promote staging → main (PR-based, reusable) - -# Reusable PR-based auto-promote for repos whose `main` branch has -# protection rules that require status checks "set by the expected -# GitHub apps" — direct `git push` from a workflow can't satisfy -# that, only PR merges through the merge queue can. -# -# Distinct from the simpler ff-only auto-promote in this same repo -# (auto-promote-staging.yml): that one does `git merge --ff-only` + -# direct push and only works on repos WITHOUT required-status-checks. -# This reusable workflow is for the protected-branch case. -# -# Call from each repo's .github/workflows/ via a thin wrapper: -# -# name: Auto-promote staging → main -# on: -# workflow_run: -# workflows: [CI, E2E Staging Canvas, ...] -# types: [completed] -# workflow_dispatch: -# inputs: -# force: -# description: "Force promote (manual override)" -# required: false -# default: "false" -# permissions: -# contents: write -# pull-requests: write -# jobs: -# promote: -# uses: molecule-ai/molecule-ci/.github/workflows/auto-promote-staging-pr.yml@v1 -# with: -# gates: "ci.yml,e2e-staging-canvas.yml,e2e-api.yml,codeql.yml" -# force: ${{ github.event.inputs.force == 'true' }} -# secrets: inherit -# -# IMPORTANT: the caller MUST keep the `on.workflow_run.workflows` -# display-name list in sync with the `gates` input (which uses -# workflow filenames). The reusable can't validate this — display -# names and filenames are decoupled in GitHub Actions. -# -# Required repo settings (one-time, in the CALLER repo): -# -# Settings → Actions → General → Workflow permissions -# → ✅ Allow GitHub Actions to create and approve pull requests -# -# Without it, every workflow run fails with: -# -# pull request create failed: GraphQL: GitHub Actions is not -# permitted to create or approve pull requests (createPullRequest) -# -# Toggle: caller repo variable AUTO_PROMOTE_ENABLED=true. Override -# via the `enabled-var` input if a different name is needed. -# When the variable is unset, the workflow logs what it would have -# done but doesn't open the PR — useful for dry-running the gate -# logic without surfacing a noisy PR while staging CI is still flaky. - -on: - workflow_call: - inputs: - gates: - description: >- - Comma-separated list of workflow FILENAMES (not display - names) that must be conclusion=success on the staging head - SHA before promote fires. Example: - "ci.yml,e2e-staging-canvas.yml,codeql.yml". File paths are - used (not display names) because gh run list with display - names is ambiguous when two workflows share a name (observed - 2026-04-28 with codeql.yml + GitHub UI's Code-quality default - setup both surfacing as "CodeQL"). - required: true - type: string - target-branch: - description: "Target branch to promote TO (default: main)" - required: false - type: string - default: main - source-branch: - description: "Source branch to promote FROM (default: staging)" - required: false - type: string - default: staging - enabled-var: - description: >- - Repo variable name that gates this workflow. Set this - variable to "true" in the caller repo's Settings → - Variables → Actions to enable. Defaults to - AUTO_PROMOTE_ENABLED. - required: false - type: string - default: AUTO_PROMOTE_ENABLED - merge-method: - description: >- - Merge method for `gh pr merge --auto`. One of merge|squash| - rebase. Defaults to "merge" (matches user preference for - merge commits over squash). - required: false - type: string - default: merge - force: - description: >- - Skip the AUTO_PROMOTE_ENABLED variable check. Pass true - when the caller's workflow_dispatch input is force=true. - Default false. - required: false - type: boolean - default: false - -jobs: - check-all-gates-green: - # Only consider promotions for the source branch's push events. - # PR runs into the source branch don't promote. workflow_dispatch - # passes through unconditionally. - if: > - (github.event_name == 'workflow_run' && - github.event.workflow_run.head_branch == inputs.source-branch && - github.event.workflow_run.event == 'push') - || github.event_name == 'workflow_dispatch' - runs-on: ubuntu-latest - outputs: - all_green: ${{ steps.gates.outputs.all_green }} - head_sha: ${{ steps.gates.outputs.head_sha }} - steps: - - name: Check all required gates on this SHA - id: gates - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - HEAD_SHA: ${{ github.event.workflow_run.head_sha || github.sha }} - REPO: ${{ github.repository }} - GATES_CSV: ${{ inputs.gates }} - SOURCE_BRANCH: ${{ inputs.source-branch }} - run: | - set -euo pipefail - - # Split the comma-separated gates input. Trim whitespace per - # entry so callers can format readably (e.g. "ci.yml, e2e.yml"). - IFS=',' read -ra GATES <<< "$GATES_CSV" - - echo "head_sha=${HEAD_SHA}" >> "$GITHUB_OUTPUT" - echo "Checking gates on SHA ${HEAD_SHA}" - - ALL_GREEN=true - for gate_raw in "${GATES[@]}"; do - gate="${gate_raw## }" - gate="${gate%% }" - if [ -z "$gate" ]; then - continue - fi - - # Query the most recent run of this workflow on this SHA. - # event=push to avoid picking up PR runs. branch filter - # guards against someone dispatching the gate on a non- - # source branch at the same SHA. - RESULT=$(gh run list \ - --repo "$REPO" \ - --workflow "$gate" \ - --branch "$SOURCE_BRANCH" \ - --event push \ - --commit "$HEAD_SHA" \ - --limit 1 \ - --json status,conclusion \ - --jq '.[0] | "\(.status)/\(.conclusion // "none")"' \ - 2>/dev/null || echo "missing/none") - - echo " $gate → $RESULT" - - # Only completed/success counts. Anything else aborts. - if [ "$RESULT" != "completed/success" ]; then - ALL_GREEN=false - fi - done - - echo "all_green=${ALL_GREEN}" >> "$GITHUB_OUTPUT" - if [ "$ALL_GREEN" != "true" ]; then - echo "::notice::auto-promote: not all gates are green on ${HEAD_SHA} — staying on current ${{ inputs.target-branch }}" - fi - - promote: - needs: check-all-gates-green - if: needs.check-all-gates-green.outputs.all_green == 'true' - runs-on: ubuntu-latest - steps: - - name: Check rollout gate - env: - ENABLED_VAR_NAME: ${{ inputs.enabled-var }} - ENABLED_VAR_VALUE: ${{ vars[inputs.enabled-var] }} - FORCE: ${{ inputs.force }} - run: | - set -eu - # Caller repo controls rollout via the named variable. - # Default name is AUTO_PROMOTE_ENABLED; callers can override. - if [ "${ENABLED_VAR_VALUE:-}" != "true" ] && [ "${FORCE:-false}" != "true" ]; then - { - echo "## ⏸ Auto-promote disabled" - echo - echo "Repo variable \`${ENABLED_VAR_NAME}\` is not set to \`true\`." - echo "All gates are green on ${{ inputs.source-branch }}; would have opened a promote PR to \`${{ inputs.target-branch }}\`." - echo - echo "To enable: Settings → Secrets and variables → Actions → Variables → \`${ENABLED_VAR_NAME}=true\`." - echo "To test once manually: workflow_dispatch with \`force=true\`." - } >> "$GITHUB_STEP_SUMMARY" - echo "::notice::auto-promote disabled — dry run only" - exit 0 - fi - - - name: Open (or reuse) ${{ inputs.source-branch }} → ${{ inputs.target-branch }} promote PR + enable auto-merge - if: ${{ vars[inputs.enabled-var] == 'true' || inputs.force == true }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO: ${{ github.repository }} - TARGET_SHA: ${{ needs.check-all-gates-green.outputs.head_sha }} - SOURCE_BRANCH: ${{ inputs.source-branch }} - TARGET_BRANCH: ${{ inputs.target-branch }} - MERGE_METHOD: ${{ inputs.merge-method }} - GATES_CSV: ${{ inputs.gates }} - run: | - set -euo pipefail - - # Look for an existing open promote PR (idempotent on re-run). - # The PR's head IS the source branch — the whole point is - # "advance target to source's tip", so we don't need a per-SHA - # branch like auto-sync-main-to-staging.yml uses. - PR_NUM=$(gh pr list --repo "$REPO" \ - --base "$TARGET_BRANCH" --head "$SOURCE_BRANCH" --state open \ - --json number --jq '.[0].number // ""') - - if [ -z "$PR_NUM" ]; then - TITLE="${SOURCE_BRANCH} → ${TARGET_BRANCH}: auto-promote ${TARGET_SHA:0:7}" - BODY_FILE=$(mktemp) - cat > "$BODY_FILE" <&1; then - echo "::warning::Failed to enable auto-merge on PR #${PR_NUM} — operator may need to merge manually." - fi - - { - echo "## ✅ Auto-promote PR opened" - echo - echo "- Source: \`${SOURCE_BRANCH}\` at \`${TARGET_SHA:0:8}\`" - echo "- Target: \`${TARGET_BRANCH}\`" - echo "- PR: #${PR_NUM}" - echo - echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail." - } >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/workflows/disable-auto-merge-on-push.yml b/.github/workflows/disable-auto-merge-on-push.yml deleted file mode 100644 index cda64cb..0000000 --- a/.github/workflows/disable-auto-merge-on-push.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: Disable auto-merge on push - -# Reusable guard against the "I enabled auto-merge then pushed more -# commits" race. Background: on 2026-04-27, PR #2174 in molecule-core -# auto-merged with only the first commit because the second commit -# was pushed AFTER the merge queue had already locked the PR's SHA. -# The second commit ended up orphaned on a merged-and-deleted branch. -# -# Mechanism: on every `pull_request: synchronize` event (= new commit -# pushed to an open PR), check if auto-merge is enabled. If yes, -# disable it and post a comment. This forces the operator to -# re-engage `gh pr merge --auto` after the new push, with the -# re-engagement acting as the verification step. -# -# Call from each repo's .github/workflows/ via a thin wrapper: -# -# name: pr-guards -# on: -# pull_request: -# types: [synchronize] -# permissions: -# pull-requests: write -# jobs: -# disable-auto-merge-on-push: -# uses: molecule-ai/molecule-ci/.github/workflows/disable-auto-merge-on-push.yml@v1 -# -# False-positive behavior: if a CI bot pushes (e.g. dependency-update -# rebase, secret rotation), this also disables auto-merge for that -# PR. That's acceptable — the operator who originally enabled -# auto-merge gets notified and re-engages, which is exactly the -# verify-after-machine-edits behavior we want. - -on: - workflow_call: - -jobs: - guard: - name: Disable auto-merge on push - runs-on: ubuntu-latest - if: github.event.pull_request.auto_merge != null - permissions: - pull-requests: write - steps: - - name: Disable auto-merge - env: - GH_TOKEN: ${{ github.token }} - PR: ${{ github.event.pull_request.number }} - REPO: ${{ github.repository }} - NEW_SHA: ${{ github.event.pull_request.head.sha }} - run: | - set -eu - gh pr merge "$PR" --disable-auto -R "$REPO" || true - gh pr comment "$PR" -R "$REPO" --body "🔒 Auto-merge disabled — new commit (\`${NEW_SHA:0:7}\`) pushed after auto-merge was enabled. The merge queue locks SHAs at entry, so subsequent pushes can race. Verify the new commit and re-enable with \`gh pr merge --auto\`." diff --git a/.github/workflows/publish-template-image.yml b/.github/workflows/publish-template-image.yml deleted file mode 100644 index 067f891..0000000 --- a/.github/workflows/publish-template-image.yml +++ /dev/null @@ -1,397 +0,0 @@ -name: Publish Workspace Template Image - -# Reusable workflow for every molecule-ai/molecule-ai-workspace-template-* -# repo. Builds the template's Dockerfile on main and pushes to GHCR as -# `ghcr.io/molecule-ai/workspace-template-:latest` (plus a -# per-commit `sha-<7>` tag). Auto-derives from the caller repo -# name so the per-repo wrapper stays one line. -# -# Call from each template repo like: -# -# name: publish-image -# on: -# push: { branches: [main] } -# workflow_dispatch: -# permissions: -# contents: read -# packages: write -# jobs: -# publish: -# uses: molecule-ai/molecule-ci/.github/workflows/publish-template-image.yml@v1 -# secrets: inherit -# -# Runner choice (2026-04-22): ubuntu-latest -# - All caller repos are PUBLIC → GHA-hosted minutes are free. -# - Targets are linux/amd64 natively; Ubuntu runners skip QEMU that -# our arm64 Mac mini had to emulate through, so builds go ~2-3x -# faster on top of having no queue wait when the Mac mini is busy. -# - No macOS Keychain gymnastics — standard docker/login-action works. -# The self-hosted Mac mini remains in service for private repo -# workflows (see memory: feedback_selfhosted_runner). - -on: - workflow_call: - inputs: - runtime_name: - description: >- - Optional explicit runtime name. When unset, derived from - the caller repo name (strips `molecule-ai-workspace-template-` - prefix). Override only if the image should diverge. - required: false - type: string - default: "" - runtime_version: - description: >- - molecule-ai-workspace-runtime version to install. Forwarded - as RUNTIME_VERSION docker build-arg. When unset, the - Dockerfile's requirements.txt pin is used. Cascade-triggered - builds forward client_payload.runtime_version here so each - rebuild has a unique build-arg → unique cache key → - guaranteed fresh `pip install`. Solves the - "cascade rebuilt but image still has old runtime" cache - trap that bit us repeatedly on 2026-04-27. - required: false - type: string - default: "" - outputs: - image: - description: "Full image reference that was pushed (with :latest tag)" - value: ${{ jobs.publish.outputs.image }} - sha: - description: "Short SHA tag pushed alongside :latest" - value: ${{ jobs.publish.outputs.sha }} - -jobs: - publish: - name: Build & push template image - runs-on: ubuntu-latest - outputs: - image: ${{ steps.tags.outputs.image }} - sha: ${{ steps.tags.outputs.sha }} - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Derive runtime name + image reference - id: tags - shell: bash - env: - EXPLICIT_RUNTIME: ${{ inputs.runtime_name }} - REPO_NAME: ${{ github.event.repository.name }} - run: | - set -eu - if [ -n "${EXPLICIT_RUNTIME}" ]; then - RUNTIME="${EXPLICIT_RUNTIME}" - else - # Repo naming convention: - # molecule-ai-workspace-template- - # Strip the prefix to get . - case "${REPO_NAME}" in - molecule-ai-workspace-template-*) - RUNTIME="${REPO_NAME#molecule-ai-workspace-template-}" - ;; - *) - echo "::error::Repo name '${REPO_NAME}' does not match 'molecule-ai-workspace-template-' — pass runtime_name explicitly." >&2 - exit 1 - ;; - esac - fi - IMAGE="ghcr.io/molecule-ai/workspace-template-${RUNTIME}" - SHA="${GITHUB_SHA::7}" - echo "runtime=${RUNTIME}" >> "$GITHUB_OUTPUT" - echo "image=${IMAGE}" >> "$GITHUB_OUTPUT" - echo "sha=${SHA}" >> "$GITHUB_OUTPUT" - echo "::notice::Publishing runtime='${RUNTIME}' → ${IMAGE}:latest + :sha-${SHA}" - - - name: Lint — no bare imports of runtime modules - # Templates that bare-import a workspace/ runtime module - # (e.g. `from plugins import load_plugins` instead of - # `from molecule_runtime.plugins import load_plugins`) work in - # the monorepo's bundled-runtime layout but explode at startup - # with `ModuleNotFoundError` once the runtime is installed as a - # package. This bit claude-code (5 imports), langgraph, - # deepagents, and gemini-cli on 2026-04-27 — each one a - # separate workspace-stuck-in-provisioning incident. - # - # Source of truth: molecule_runtime/_runtime_modules.json - # inside the published wheel (emitted by - # scripts/build_runtime_package.py). Pulling the manifest - # from PyPI's latest wheel ensures the lint never drifts from - # the rewriter's actual closed list. If the manifest can't be - # fetched (older wheel, PyPI down, etc.), falls back to the - # inline list — known to be correct as of 2026-04-27 — so - # the lint never silently passes on a fetch failure. - # - # Fail-fast: this runs before docker login + buildx setup so - # a bad PR returns red in seconds, not minutes. - shell: bash - run: | - set -eu - - # Fallback list — used only when the manifest fetch fails. - # Mirrors scripts/build_runtime_package.py:TOP_LEVEL_MODULES - # at the time this comment was written. - FALLBACK_MODULES='plugins|adapter_base|config|main|preflight|prompt|coordinator|consolidation|events|heartbeat|transcript_auth|runtime_wedge|watcher|skill_loader|policies|adapters|builtin_tools|executor_helpers|a2a_executor|a2a_client|a2a_tools|a2a_cli|a2a_mcp_server|agent|agents_md|initial_prompt|molecule_ai_status|platform_auth|shared_runtime' - - RUNTIME_MODULES="" - mkdir -p /tmp/runtime-wheel - if pip download --quiet molecule-ai-workspace-runtime --no-deps -d /tmp/runtime-wheel 2>/dev/null; then - WHEEL=$(ls /tmp/runtime-wheel/*.whl 2>/dev/null | head -1) - if [ -n "$WHEEL" ]; then - # Pull both top_level + subpackage names; both can be bare-imported. - RUNTIME_MODULES=$(unzip -p "$WHEEL" molecule_runtime/_runtime_modules.json 2>/dev/null \ - | python3 -c "import sys,json; m=json.load(sys.stdin); print('|'.join(sorted(set(m['top_level_modules']) | set(m['subpackages']))))" 2>/dev/null || echo "") - fi - fi - - if [ -n "$RUNTIME_MODULES" ]; then - echo "::notice::lint module list pulled from molecule-ai-workspace-runtime wheel manifest" - else - RUNTIME_MODULES="$FALLBACK_MODULES" - echo "::warning::could not read _runtime_modules.json from PyPI wheel — using inline fallback list" - fi - - # Match `from import` at start of line OR after any whitespace - # (function-scope imports inside if/try blocks count too). - if HITS=$(grep -nE "^\s*from (${RUNTIME_MODULES}) import" *.py 2>/dev/null); then - echo "::error::Bare imports of runtime modules found — must use \`from molecule_runtime. import\`" - echo "$HITS" | sed 's/^/ /' - echo "::error::Fix: prefix each match with 'molecule_runtime.' (e.g. 'from plugins' → 'from molecule_runtime.plugins')." - exit 1 - fi - echo "::notice::✓ no bare imports of runtime modules in template *.py files" - - - name: Log in to GHCR - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build template image (load for smoke test, do not push yet) - # Build into the runner's local docker first so the smoke test can - # actually boot the image. We push :latest + :sha-* only AFTER the - # smoke test passes — this is the gate that prevents broken images - # from poisoning :latest. Background: 2026-04-27 outage where the - # template's adapter.py imported a symbol (RuntimeCapabilities) - # that the published runtime didn't yet export. The old smoke - # test only inspected the entrypoint string, so the broken image - # shipped to GHCR and every workspace provision hung. - uses: docker/build-push-action@v6 - with: - context: . - file: ./Dockerfile - platforms: linux/amd64 - load: true - push: false - tags: ${{ steps.tags.outputs.image }}:sha-${{ steps.tags.outputs.sha }} - cache-from: type=gha - cache-to: type=gha,mode=max - # RUNTIME_VERSION is empty by default. When the cascade fires - # (or workflow_dispatch is invoked with a version), it's the - # exact runtime version about to be installed. Forwarded as a - # build-arg so Dockerfiles that declare `ARG RUNTIME_VERSION` - # get cache-key invalidation per-version. Templates that - # don't declare the ARG silently ignore it (no breakage). - build-args: | - RUNTIME_VERSION=${{ inputs.runtime_version }} - labels: | - org.opencontainers.image.source=https://github.com/${{ github.repository }} - org.opencontainers.image.revision=${{ github.sha }} - org.opencontainers.image.description=Molecule AI workspace template — ${{ steps.tags.outputs.runtime }} runtime - - - name: Smoke test — boot image and import every /app/*.py - # The real boot test. Imports every Python module at /app/ inside - # the image, which exercises: - # - adapter.py exists, no syntax errors, all module-level - # imports resolve against the pip-installed runtime version - # (catches version skew — symbol added to runtime but PyPI - # not yet republished, etc.) - # - executor.py / cli_executor.py / claude_sdk_executor.py / - # etc. — sibling modules adapter.py imports lazily inside - # create_executor(). Plain `import adapter` doesn't catch - # bugs there because they're behind `def create_executor`. - # This bit hermes (a2a-sdk migration) and langgraph - # (LangGraphA2AExecutor bare import) on 2026-04-27. - # - cross-cutting: any bare `from ` (the lint - # above catches these statically; this catches them at - # resolution time too, plus any imports of third-party - # packages that the lint can't reason about). - # We bypass the gosu/agent entrypoint with --entrypoint sh - # because import smoke doesn't need workspace permissions. - shell: bash - env: - IMAGE: ${{ steps.tags.outputs.image }}:sha-${{ steps.tags.outputs.sha }} - run: | - set -eu - docker run --rm --entrypoint sh "${IMAGE}" -c ' - set -e - cd /app - for f in *.py; do - [ "$f" = "__init__.py" ] && continue - mod="${f%.py}" - python3 -c "import $mod" || { echo "::error::failed to import $mod"; exit 1; } - echo " ✓ $mod" - done - ' - echo "::notice::✓ ${IMAGE} all /app/*.py modules import cleanly against installed runtime" - - - name: Boot smoke — execute() against stub deps (#2275, task #131) - # The static import smoke above only IMPORTs /app/*.py — lazy - # imports buried inside `async def execute(...)` bodies (e.g. - # `from a2a.types import FilePart`) NEVER evaluate at static- - # import time. The 2026-04-2x v0→v1 a2a-sdk migration shipped 5 - # such regressions in templates that all looked fine at module- - # load smoke (claude-code, langgraph, deepagents, gemini-cli, - # hermes — every one a separate provisioning incident). - # - # This step boots the image with MOLECULE_SMOKE_MODE=1, which - # routes molecule-runtime through smoke_mode.run_executor_smoke() - # — invokes executor.execute(stub_ctx, stub_queue) once with a - # short timeout. Healthy import tree → execution proceeds far - # enough to hit a network boundary and times out (exit 0). - # Broken lazy import → ImportError/ModuleNotFoundError from - # inside the executor body (exit 1). - # - # Universal turn-smoke (task #131): run_executor_smoke also - # consults runtime_wedge.is_wedged() at the end of every result - # path and upgrades a provisional PASS to FAIL when an adapter - # marked the runtime wedged. Catches PR-25-class regressions - # (claude-agent-sdk init wedge from a malformed CLI argv) where - # the SDK takes 60s to time out on `initialize()` — the outer - # wait_for must outlast that handshake so the adapter's wedge - # catch arm runs before the smoke gives up. That's why the - # smoke timeout is 90s (NOT the original 10s) and the outer - # `timeout` wrapper is 120s (NOT 60s). Lowering either back - # makes this gate blind to init-wedge bugs again — confirm with - # an injected wedge in test_smoke_mode.py before changing. - # - # Requires runtime >= 0.1.60 (the version that introduced - # smoke_mode). Older runtimes silently no-op and would hang on - # uvicorn, so we detect the module first and skip if absent — - # this lets templates pinned to older runtimes continue to - # publish without this gate flipping red, while every fresh - # cascade-triggered build (which forwards the just-published - # version as RUNTIME_VERSION) gets the gate automatically. - # - # Wrapped in `timeout` as a belt-and-suspenders safety net in - # case smoke_mode itself wedges — runner shouldn't hang - # indefinitely on a single template. - shell: bash - env: - IMAGE: ${{ steps.tags.outputs.image }}:sha-${{ steps.tags.outputs.sha }} - run: | - set -eu - - HAS_SMOKE_MODE=$(docker run --rm --entrypoint sh "${IMAGE}" -c \ - 'python3 -c "import molecule_runtime.smoke_mode" >/dev/null 2>&1 && echo yes || echo no') - if [ "${HAS_SMOKE_MODE}" = "no" ]; then - echo "::warning::installed runtime predates molecule-core#2275 (no molecule_runtime.smoke_mode); skipping boot smoke. Bump requirements.txt to molecule-ai-workspace-runtime>=0.1.60 to enable." - exit 0 - fi - - if [ ! -f config.yaml ]; then - echo "::error::config.yaml not found at repo root — boot smoke needs it to populate /configs. Templates without a config.yaml at root cannot be boot-smoked; either add one or skip this gate by setting an old runtime pin." - exit 1 - fi - - # Mount the repo's own config.yaml at /configs so the runtime - # can reach create_executor() — that's where the lazy imports - # we want to test actually live. The image's entrypoint drops - # priv from root to agent (uid 1000) before exec'ing - # molecule-runtime, so /configs needs to be readable AND - # traversable from uid 1000. - # - # Use `a+rX` (capital X — only adds x where it's already - # executable, i.e. directories): mktemp -d creates the dir - # with mode 700, so a bare `go+r` would leave the dir - # un-traversable for agent and config.py would - # PermissionError on `Path('/configs/config.yaml').exists()`. - # Mount RW (not :ro) so the entrypoint's `chown -R agent - # /configs` succeeds — its silent chown failure on a :ro - # mount was the original symptom. - SMOKE_CONFIG_DIR=$(mktemp -d) - cp config.yaml "${SMOKE_CONFIG_DIR}/" - chmod -R a+rX "${SMOKE_CONFIG_DIR}" - - # Stub credentials — adapters validate shape at create_executor - # time but the smoke times out before any real call goes out. - # Set the common ones so any adapter that early-validates a - # specific key sees a non-empty value. - # PYTHONPATH=/app mirrors what the platform's provisioner - # injects at workspace startup (workspace-server/internal/ - # provisioner/provisioner.go:563). Without it, - # `importlib.import_module('adapter')` in the runtime's - # preflight check fails with ModuleNotFoundError because - # molecule-runtime is a console_scripts entry point — - # sys.path[0] is /usr/local/bin, NOT /app. The existing - # static import smoke step above doesn't hit this because - # `python3 -c "import $mod"` adds cwd to sys.path; only the - # entry-point invocation needs PYTHONPATH. - set +e - # MOLECULE_SMOKE_TIMEOUT_SECS=90 is calibrated to outlast - # claude-agent-sdk's 60s initialize() handshake (see step - # comment above + workspace/smoke_mode.py top docstring) so - # adapter wedge catch arms run before run_executor_smoke - # gives up. Outer `timeout 120` is the runner-level safety - # net — slightly longer than the inner timeout so a hung - # smoke_mode itself surfaces as exit 124 and gets a clear - # error message instead of just `exit 1`. - timeout 120 docker run --rm \ - -v "${SMOKE_CONFIG_DIR}:/configs" \ - -e WORKSPACE_ID=fake-smoke \ - -e PYTHONPATH=/app \ - -e MOLECULE_SMOKE_MODE=1 \ - -e MOLECULE_SMOKE_TIMEOUT_SECS=90 \ - -e CLAUDE_CODE_OAUTH_TOKEN=sk-fake-smoke-token \ - -e ANTHROPIC_API_KEY=sk-fake-smoke-key \ - -e GEMINI_API_KEY=fake-smoke-key \ - -e OPENAI_API_KEY=sk-fake-smoke-key \ - "${IMAGE}" - rc=$? - set -e - # Cleanup is best-effort: the entrypoint chowns /configs to - # uid 1000 (agent) inside the container, which propagates to - # the host bind-mount, leaving the runner user unable to - # remove the files. Fall back to `sudo rm` and ignore any - # remaining failure — the runner is ephemeral, /tmp is - # cleaned automatically post-job. - rm -rf "${SMOKE_CONFIG_DIR}" 2>/dev/null \ - || sudo rm -rf "${SMOKE_CONFIG_DIR}" 2>/dev/null \ - || true - - if [ "${rc}" -eq 124 ]; then - echo "::error::boot smoke wedged past 120s — smoke_mode itself failed to terminate (look for blocking calls before MOLECULE_SMOKE_TIMEOUT_SECS fires)" - exit 1 - fi - if [ "${rc}" -ne 0 ]; then - echo "::error::boot smoke failed (exit ${rc}) — executor.execute() raised an import error OR an adapter marked runtime_wedge.is_wedged() (PR-25-class init wedge). Check the container log above for the offending lazy import or wedge reason." - exit "${rc}" - fi - echo "::notice::✓ ${IMAGE} executor.execute() smoke passed (imports healthy, no runtime wedge)" - - - name: Push image to GHCR (post-smoke) - # Now that the smoke test passed, push both tags. build-push-action - # reuses the cached build from the load step above, so this is fast - # — it's effectively a layer push, not a rebuild. Same build-args - # passed for cache key consistency. - uses: docker/build-push-action@v6 - with: - context: . - file: ./Dockerfile - platforms: linux/amd64 - push: true - tags: | - ${{ steps.tags.outputs.image }}:latest - ${{ steps.tags.outputs.image }}:sha-${{ steps.tags.outputs.sha }} - cache-from: type=gha - cache-to: type=gha,mode=max - build-args: | - RUNTIME_VERSION=${{ inputs.runtime_version }} - labels: | - org.opencontainers.image.source=https://github.com/${{ github.repository }} - org.opencontainers.image.revision=${{ github.sha }} - org.opencontainers.image.description=Molecule AI workspace template — ${{ steps.tags.outputs.runtime }} runtime diff --git a/.github/workflows/validate-org-template.yml b/.github/workflows/validate-org-template.yml deleted file mode 100644 index 415fe6d..0000000 --- a/.github/workflows/validate-org-template.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: Validate Org Template -on: - workflow_call: - -jobs: - validate: - name: Org template validation - runs-on: ubuntu-latest - timeout-minutes: 10 - steps: - - uses: actions/checkout@v4 - # Canonical validator script lives in molecule-ci, fetched fresh on - # every run. The previous setup expected `.molecule-ci/scripts/` to - # be vendored INTO each org-template repo, which drifted across the - # 5 org-template repos as the validator evolved. Single source of - # truth eliminates that drift class entirely. Mirrors the same - # pattern already used by validate-workspace-template.yml. - # Direct git-clone — see validate-plugin.yml for the rationale. - # Anonymous fetch of public molecule-ci, no actions/checkout idiosyncrasies. - - name: Fetch molecule-ci canonical scripts - run: git clone --depth 1 https://git.moleculesai.app/molecule-ai/molecule-ci.git .molecule-ci-canonical - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - cache: "pip" - cache-dependency-path: .molecule-ci-canonical/.molecule-ci/scripts/requirements.txt - - run: pip install pyyaml -q - - run: python3 .molecule-ci-canonical/.molecule-ci/scripts/validate-org-template.py - - name: Check for secrets - run: | - python3 - << 'PYEOF' - import os, re, sys - from pathlib import Path - - PATTERNS = [ - re.compile(r'''["']sk-ant-[a-zA-Z0-9]{50,}["']'''), - re.compile(r'''["']ghp_[a-zA-Z0-9]{36,}["']'''), - re.compile(r'''["']AKIA[A-Z0-9]{16}["']'''), - re.compile(r'''["'][a-zA-Z0-9/+=]{40}["']'''), - re.compile(r'''["']sk_test_[a-zA-Z0-9]{24,}["']'''), - re.compile(r'''["']Bearer\s+[a-zA-Z0-9_.-]{20,}["']'''), - re.compile(r'''ghp_[a-zA-Z0-9]{36,}'''), - re.compile(r'''sk-ant-[a-zA-Z0-9]{50,}'''), - ] - SKIP_DIRS = {'.molecule-ci', '.molecule-ci-canonical', '.git', 'node_modules', '__pycache__'} - EXTENSIONS = {'.yaml', '.yml', '.md', '.py', '.sh'} - - def is_false_positive(line): - ctx = line.lower() - return '...' in ctx or ' by default, - # and Gitea 404s the cross-repo authenticated request (different from - # GitHub which falls back to anon-public-read). - # (b) Passing token: '' triggers actions/checkout's runtime "Input required - # and not supplied: token" error — the input is documented as - # required:false but the action's runtime calls getInput with - # required:true on its auth-helper path. - # Anonymous git clone of public molecule-ci has neither problem. - # See molecule-ci#1 (lowercase fix) + #2 (token:'' attempt) + - # the post-merge CI run on plugin-molecule-careful-bash@663bf72. - - name: Fetch molecule-ci canonical scripts - run: git clone --depth 1 https://git.moleculesai.app/molecule-ai/molecule-ci.git .molecule-ci-canonical - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - cache: "pip" - cache-dependency-path: .molecule-ci-canonical/.molecule-ci/scripts/requirements.txt - - run: pip install pyyaml -q - - run: python3 .molecule-ci-canonical/.molecule-ci/scripts/validate-plugin.py - - name: Check for secrets - run: | - python3 - << 'PYEOF' - import os, re, sys - from pathlib import Path - - PATTERNS = [ - re.compile(r'''["']sk-ant-[a-zA-Z0-9]{50,}["']'''), - re.compile(r'''["']ghp_[a-zA-Z0-9]{36,}["']'''), - re.compile(r'''["']AKIA[A-Z0-9]{16}["']'''), - re.compile(r'''["'][a-zA-Z0-9/+=]{40}["']'''), - re.compile(r'''["']sk_test_[a-zA-Z0-9]{24,}["']'''), - re.compile(r'''["']Bearer\s+[a-zA-Z0-9_.-]{20,}["']'''), - re.compile(r'''ghp_[a-zA-Z0-9]{36,}'''), - re.compile(r'''sk-ant-[a-zA-Z0-9]{50,}'''), - ] - SKIP_DIRS = {'.molecule-ci', '.molecule-ci-canonical', '.git', 'node_modules', '__pycache__'} - EXTENSIONS = {'.yaml', '.yml', '.md', '.py', '.sh'} - - def is_false_positive(line): - ctx = line.lower() - return '...' in ctx or '/dev/null 2>&1; then - echo "::warning::docker daemon unreachable from runner job container — skipping Docker build smoke (runner-config gap, not a template issue). Fix: see molecule-ai/internal runner-docker-access issue." - exit 0 - fi - docker build -t template-test . --no-cache 2>&1 | tail -5 && echo "✓ Docker build succeeded" - - # Aggregator that emits a single `Template validation` check name — - # the caller's job (`validate:` in each template's ci.yml) plus this - # job's name produces `validate / Template validation`, which is what - # template-repo branch protection has historically required. - # - # Why it's needed: the workflow was refactored from one job into - # validate-static + validate-runtime (with matrix-suffixed display - # names) for fork-PR security. The matrix names never match the - # original required-check name, so PR auto-merge silently hung in - # BLOCKED forever on every template repo (caught while shipping - # fixes for the boot-smoke gate, openclaw#11 + hermes#29). - # - # `if: always()` so it reports out even when validate-static fails — - # without that, GitHub marks the aggregator as SKIPPED and branch - # protection still blocks because the required check never reports - # a final state. - # - # Fork-PR semantics: validate-runtime is intentionally skipped on - # fork PRs (security gate). Treat `skipped` as a pass for the - # aggregator on forks so static-only coverage doesn't make every - # external PR un-mergeable. - template-validation: - name: Template validation - runs-on: ubuntu-latest - needs: [validate-static, validate-runtime] - if: always() - timeout-minutes: 1 - steps: - - name: Aggregate - run: | - static="${{ needs.validate-static.result }}" - runtime="${{ needs.validate-runtime.result }}" - echo "validate-static: $static" - echo "validate-runtime: $runtime" - if [ "$static" != "success" ]; then - echo "::error::validate-static did not succeed: $static" - exit 1 - fi - if [ "$runtime" != "success" ] && [ "$runtime" != "skipped" ]; then - echo "::error::validate-runtime did not succeed: $runtime" - exit 1 - fi - echo "::notice::Template validation aggregate passed (static=$static, runtime=$runtime)" diff --git a/README.md b/README.md index 83fb730..a98a9d8 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ name: CI on: [push, pull_request] jobs: validate: - uses: Molecule-AI/molecule-ci/.github/workflows/validate-plugin.yml@v1 + uses: molecule-ai/molecule-ci/.gitea/workflows/validate-plugin.yml@v1 ``` ### Workspace template repos (`molecule-ai-workspace-template-*`) @@ -23,7 +23,7 @@ name: CI on: [push, pull_request] jobs: validate: - uses: Molecule-AI/molecule-ci/.github/workflows/validate-workspace-template.yml@v1 + uses: molecule-ai/molecule-ci/.gitea/workflows/validate-workspace-template.yml@v1 ``` ### Org template repos (`molecule-ai-org-template-*`) @@ -34,7 +34,7 @@ name: CI on: [push, pull_request] jobs: validate: - uses: Molecule-AI/molecule-ci/.github/workflows/validate-org-template.yml@v1 + uses: molecule-ai/molecule-ci/.gitea/workflows/validate-org-template.yml@v1 ``` ### Any repo with auto-merge enabled @@ -51,7 +51,7 @@ permissions: pull-requests: write jobs: disable-auto-merge-on-push: - uses: Molecule-AI/molecule-ci/.github/workflows/disable-auto-merge-on-push.yml@v1 + uses: molecule-ai/molecule-ci/.gitea/workflows/disable-auto-merge-on-push.yml@v1 ``` When the team lands more PR-time guards in this repo, add them as additional jobs in the same caller — keeps each consuming repo's footprint to one file. diff --git a/docs/template-contract.md b/docs/template-contract.md index da4107f..c622e81 100644 --- a/docs/template-contract.md +++ b/docs/template-contract.md @@ -54,7 +54,7 @@ name: CI on: [push, pull_request] jobs: validate: - uses: Molecule-AI/molecule-ci/.github/workflows/validate-workspace-template.yml@v1 + uses: molecule-ai/molecule-ci/.gitea/workflows/validate-workspace-template.yml@v1 ``` The reusable workflow checks out `molecule-ci` itself (into `.molecule-ci-canonical`) and runs the canonical `validate-workspace-template.py` from there — so no per-repo vendoring of the script is needed. The legacy `.molecule-ci/scripts/` directory in each template repo is being phased out. -- 2.52.0