perf(ci): move all public-repo workflows to ubuntu-latest
molecule-core is a public repo — GHA-hosted minutes are free. The self-hosted Mac mini was only in play to dodge GHA rate limits (memory feedback_selfhosted_runner), but for these specific workflows it came with real costs: - Docker-push workflows emulated linux/amd64 from arm64 via QEMU — every canvas + platform image build ran ~2-3x slower than native. - Six PRs worth of keychain-avoidance hacks in publish-* because `docker login` on macOS writes to osxkeychain unconditionally, and the Mac mini's launchd user-agent keychain is locked. - Homebrew pin-down environment variables (HOMEBREW_NO_*) sprinkled everywhere to work around the shared /opt/homebrew symlink mess on the runner. - Setup-python@v5 couldn't write to /Users/runner, so ci.yml python-lint resorted to a hand-rolled Homebrew python3.11 dance. - Single runner → fan-out contention; CodeQL's 45-min analysis fought the canvas publish for the one slot. Changes across the 7 workflows: - runs-on: [self-hosted, macos, arm64] → ubuntu-latest (every job) - publish-canvas-image + publish-workspace-server-image: drop the hand-rolled auths-map step + QEMU setup + buildx v4 → docker/login-action@v3 + setup-buildx@v3. Linux + amd64 target = native build. - canary-verify + promote-latest: replace `brew install crane` + HOMEBREW_NO_* incantations with imjasonh/setup-crane@v0.4. - codeql.yml: drop `brew install jq` — jq is preinstalled on ubuntu-latest. - ci.yml shellcheck: drop the self-hosted existence check — shellcheck is preinstalled via apt. - ci.yml python-lint: replace the Homebrew python3.11 path dance with actions/setup-python@v5 (which works fine on GHA-hosted), add requirements.txt caching while we're there. - Remove stale comments referencing "the self-hosted runner", "Mac mini", keychain, osxkeychain etc. The self-hosted Mac mini remains in service for private-repo workflows only. Memory feedback_selfhosted_runner updated to reflect the public-repo scope carve-out. Net -96 lines across the 7 files. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a8e4afe863
commit
e298393df5
29
.github/workflows/canary-verify.yml
vendored
29
.github/workflows/canary-verify.yml
vendored
@ -34,9 +34,7 @@ jobs:
|
||||
canary-smoke:
|
||||
# Skip when the upstream workflow failed — no image to test against.
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }}
|
||||
# Self-hosted mac mini — GitHub-hosted minutes are quota-blocked on
|
||||
# this org (same reason publish/promote-latest moved earlier).
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
sha: ${{ steps.compute.outputs.sha }}
|
||||
steps:
|
||||
@ -49,11 +47,10 @@ jobs:
|
||||
|
||||
- name: Wait for canary tenants to pick up :staging-<sha>
|
||||
# Poll canary health endpoints every 30s for up to 7 min instead
|
||||
# of a fixed 6-min sleep. Exits as soon as ALL canaries report the
|
||||
# new SHA, freeing the self-hosted runner slot sooner (~2-3 min
|
||||
# typical vs 6 min fixed). Falls back to proceeding after 7 min
|
||||
# even if not all canaries responded — the smoke suite will catch
|
||||
# any that didn't update.
|
||||
# of a fixed 6-min sleep. Exits as soon as ALL canaries report
|
||||
# the new SHA (~2-3 min typical vs 6 min fixed). Falls back to
|
||||
# proceeding after 7 min even if not all canaries responded —
|
||||
# the smoke suite will catch any that didn't update.
|
||||
env:
|
||||
CANARY_TENANT_URLS: ${{ secrets.CANARY_TENANT_URLS }}
|
||||
EXPECTED_SHA: ${{ steps.compute.outputs.sha }}
|
||||
@ -114,21 +111,9 @@ jobs:
|
||||
# the runner) that can retag remotely with a single API call each.
|
||||
needs: canary-smoke
|
||||
if: ${{ needs.canary-smoke.result == 'success' }}
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Ensure crane installed
|
||||
# Matches the install pattern in promote-latest.yml — brew
|
||||
# cleanup exits non-zero on the shared runner's /opt/homebrew
|
||||
# symlinks, so skip it.
|
||||
env:
|
||||
HOMEBREW_NO_INSTALL_CLEANUP: "1"
|
||||
HOMEBREW_NO_AUTO_UPDATE: "1"
|
||||
HOMEBREW_NO_ENV_HINTS: "1"
|
||||
run: |
|
||||
if ! command -v crane >/dev/null 2>&1; then
|
||||
brew install crane
|
||||
fi
|
||||
crane version
|
||||
- uses: imjasonh/setup-crane@v0.4
|
||||
|
||||
- name: GHCR login
|
||||
run: |
|
||||
|
||||
43
.github/workflows/ci.yml
vendored
43
.github/workflows/ci.yml
vendored
@ -7,17 +7,14 @@ on:
|
||||
branches: [main, staging]
|
||||
|
||||
# Cancel in-progress CI runs when a new commit arrives on the same ref.
|
||||
# This prevents multiple stale runs from queuing behind each other and
|
||||
# monopolising the self-hosted macOS arm64 runner.
|
||||
# This prevents stale runs from queuing behind each other.
|
||||
concurrency:
|
||||
group: ci-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# Detect which paths changed so downstream jobs can skip when only
|
||||
# docs/markdown files were modified. Uses plain `git diff` — no macOS
|
||||
# dependency, so this runs on ubuntu-latest to free the self-hosted
|
||||
# macOS arm64 runner for jobs that genuinely need it.
|
||||
# docs/markdown files were modified.
|
||||
changes:
|
||||
name: Detect changes
|
||||
runs-on: ubuntu-latest
|
||||
@ -62,7 +59,7 @@ jobs:
|
||||
name: Platform (Go)
|
||||
needs: changes
|
||||
if: needs.changes.outputs.platform == 'true'
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: workspace-server
|
||||
@ -98,7 +95,7 @@ jobs:
|
||||
name: Canvas (Next.js)
|
||||
needs: changes
|
||||
if: needs.changes.outputs.canvas == 'true'
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: canvas
|
||||
@ -124,23 +121,18 @@ jobs:
|
||||
name: Shellcheck (E2E scripts)
|
||||
needs: changes
|
||||
if: needs.changes.outputs.scripts == 'true'
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Run shellcheck on tests/e2e/*.sh
|
||||
# `ludeeus/action-shellcheck` is a Docker action (Linux-only). We rely
|
||||
# on shellcheck being pre-installed on the self-hosted runner instead.
|
||||
# shellcheck is pre-installed on ubuntu-latest runners (via apt).
|
||||
run: |
|
||||
if ! command -v shellcheck >/dev/null 2>&1; then
|
||||
echo "::error::shellcheck is not installed on the runner"
|
||||
exit 1
|
||||
fi
|
||||
find tests/e2e -type f -name '*.sh' -print0 \
|
||||
| xargs -0 shellcheck --severity=warning
|
||||
|
||||
canvas-deploy-reminder:
|
||||
name: Canvas Deploy Reminder
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
needs: [changes, canvas-build]
|
||||
# Only fires on direct pushes to main (i.e. after staging→main promotion).
|
||||
if: needs.changes.outputs.canvas == 'true' && github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
@ -186,7 +178,7 @@ jobs:
|
||||
name: Python Lint & Test
|
||||
needs: changes
|
||||
if: needs.changes.outputs.python == 'true'
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
WORKSPACE_ID: test
|
||||
defaults:
|
||||
@ -194,18 +186,13 @@ jobs:
|
||||
working-directory: workspace
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
# setup-python@v5 cannot write to /Users/runner (GitHub-hosted path) on
|
||||
# the self-hosted macOS arm64 runner (user: <runner-user>) and also hits
|
||||
# EACCES on /usr/local/bin due to macOS SIP. Skip it — Homebrew installs
|
||||
# Python 3.11 at /opt/homebrew/opt/python@3.11 which is already on PATH.
|
||||
- name: Verify Python 3.11 (Homebrew)
|
||||
run: |
|
||||
export PATH="/opt/homebrew/opt/python@3.11/bin:/opt/homebrew/bin:$PATH"
|
||||
python3.11 --version
|
||||
echo "/opt/homebrew/opt/python@3.11/bin" >> "$GITHUB_PATH"
|
||||
echo "/opt/homebrew/bin" >> "$GITHUB_PATH"
|
||||
- run: pip3.11 install -r requirements.txt pytest pytest-asyncio pytest-cov
|
||||
- run: python3.11 -m pytest --tb=short -q --cov=. --cov-report=term-missing
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: pip
|
||||
cache-dependency-path: workspace/requirements.txt
|
||||
- run: pip install -r requirements.txt pytest pytest-asyncio pytest-cov
|
||||
- run: python -m pytest --tb=short -q --cov=. --cov-report=term-missing
|
||||
|
||||
# SDK + plugin validation moved to standalone repo:
|
||||
# github.com/Molecule-AI/molecule-sdk-python
|
||||
|
||||
25
.github/workflows/codeql.yml
vendored
25
.github/workflows/codeql.yml
vendored
@ -8,11 +8,10 @@ name: CodeQL
|
||||
# scanned. This workflow fills that gap by explicitly scanning both
|
||||
# branches on push and PR.
|
||||
#
|
||||
# Runs on the self-hosted mac mini (matches the org-wide Code Quality
|
||||
# runner-label config). GHAS is NOT enabled on this repo, so results
|
||||
# are not uploaded to the Security tab — the scan fails the PR check
|
||||
# on findings, and the SARIF is kept as a workflow artifact for
|
||||
# triage.
|
||||
# Runs on ubuntu-latest (GHA-hosted — public repo, free). GHAS is NOT
|
||||
# enabled on this repo, so results are not uploaded to the Security
|
||||
# tab — the scan fails the PR check on findings, and the SARIF is
|
||||
# kept as a workflow artifact for triage.
|
||||
|
||||
on:
|
||||
push:
|
||||
@ -24,8 +23,8 @@ on:
|
||||
- cron: '30 1 * * 0'
|
||||
|
||||
# Workflow-level concurrency: only one CodeQL run per branch/PR at a time.
|
||||
# `cancel-in-progress: false` queues new runs — the 45-min analysis is the
|
||||
# longest CI occupant and fights the single mac mini runner the hardest.
|
||||
# `cancel-in-progress: false` queues new runs so a quick follow-up push
|
||||
# doesn't nuke a 45-min analysis mid-flight.
|
||||
concurrency:
|
||||
group: codeql-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
@ -38,7 +37,7 @@ permissions:
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze (${{ matrix.language }})
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
|
||||
strategy:
|
||||
@ -61,15 +60,7 @@ jobs:
|
||||
path: molecule-ai-plugin-github-app-auth
|
||||
token: ${{ secrets.PLUGIN_REPO_PAT || secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Ensure jq installed
|
||||
# Follows the crane-install pattern in promote-latest.yml.
|
||||
# HOMEBREW_NO_* flags skip the cleanup that fails on the shared
|
||||
# runner's /opt/homebrew symlinks.
|
||||
env:
|
||||
HOMEBREW_NO_INSTALL_CLEANUP: "1"
|
||||
HOMEBREW_NO_AUTO_UPDATE: "1"
|
||||
HOMEBREW_NO_ENV_HINTS: "1"
|
||||
run: command -v jq >/dev/null || brew install jq
|
||||
# jq is pre-installed on ubuntu-latest — no setup step needed.
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
|
||||
11
.github/workflows/e2e-api.yml
vendored
11
.github/workflows/e2e-api.yml
vendored
@ -37,11 +37,14 @@ concurrency:
|
||||
jobs:
|
||||
e2e-api:
|
||||
name: E2E API Smoke Test
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
# `services:` is Linux-only on self-hosted runners — we start postgres
|
||||
# and redis via `docker run` instead. Ports 15432/16379 avoid collision
|
||||
# with anything the host may already have on the standard ports.
|
||||
# Postgres + Redis run as sibling containers via `docker run`. Could
|
||||
# switch to a `services:` block now that we're on Linux, but the
|
||||
# explicit start-and-wait gives us pg_isready / PING readiness checks
|
||||
# that match the 30-tick timeouts the rest of the job expects. Ports
|
||||
# 15432/16379 avoid collision with anything the host may already have
|
||||
# on the standard ports.
|
||||
env:
|
||||
DATABASE_URL: postgres://dev:dev@localhost:15432/molecule?sslmode=disable
|
||||
REDIS_URL: redis://localhost:16379
|
||||
|
||||
19
.github/workflows/promote-latest.yml
vendored
19
.github/workflows/promote-latest.yml
vendored
@ -32,24 +32,9 @@ env:
|
||||
|
||||
jobs:
|
||||
promote:
|
||||
# Self-hosted mac mini — GitHub-hosted minutes are currently quota-
|
||||
# blocked. mac mini already has crane available via homebrew.
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Ensure crane installed
|
||||
# HOMEBREW_NO_INSTALL_CLEANUP + HOMEBREW_NO_AUTO_UPDATE stop
|
||||
# brew from touching unrelated symlinks in /opt/homebrew owned
|
||||
# by other users on this shared runner — cleanup was exiting
|
||||
# non-zero even though crane itself installed successfully.
|
||||
env:
|
||||
HOMEBREW_NO_INSTALL_CLEANUP: "1"
|
||||
HOMEBREW_NO_AUTO_UPDATE: "1"
|
||||
HOMEBREW_NO_ENV_HINTS: "1"
|
||||
run: |
|
||||
if ! command -v crane >/dev/null 2>&1; then
|
||||
brew install crane
|
||||
fi
|
||||
crane version
|
||||
- uses: imjasonh/setup-crane@v0.4
|
||||
|
||||
- name: GHCR login
|
||||
run: |
|
||||
|
||||
50
.github/workflows/publish-canvas-image.yml
vendored
50
.github/workflows/publish-canvas-image.yml
vendored
@ -39,56 +39,20 @@ env:
|
||||
jobs:
|
||||
build-and-push:
|
||||
name: Build & push canvas image
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Configure GHCR auth (write auths map; do NOT call docker login)
|
||||
# `docker login` on macOS unconditionally writes credentials to the
|
||||
# osxkeychain credential helper, even when DOCKER_CONFIG/config.json
|
||||
# declares `credsStore: ""` and even when invoked with `--config`.
|
||||
# Verified locally 2026-04-16 — after a successful login, Docker
|
||||
# rewrites the same config file to:
|
||||
# { "auths": { "ghcr.io": {} }, "credsStore": "osxkeychain" }
|
||||
# i.e. the auth lives in the Keychain, not the config file. The
|
||||
# Mac mini runner is a launchd user agent with a locked Keychain,
|
||||
# so storage fails with `User interaction is not allowed (-25308)`.
|
||||
#
|
||||
# Six prior PRs (#273, #319, #322, #341, #484, #486) all kept calling
|
||||
# `docker login` and tried to coerce credsStore — none worked.
|
||||
# The only reliable fix is to skip `docker login` entirely and write
|
||||
# the auth string directly. `docker/build-push-action@v6` and the
|
||||
# daemon honor the `auths` map for push without needing login.
|
||||
shell: bash
|
||||
env:
|
||||
GHCR_USER: ${{ github.actor }}
|
||||
GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
set -eu
|
||||
mkdir -p "${RUNNER_TEMP}/docker-config"
|
||||
AUTH=$(printf '%s:%s' "${GHCR_USER}" "${GHCR_TOKEN}" | base64)
|
||||
umask 077
|
||||
cat > "${RUNNER_TEMP}/docker-config/config.json" <<JSON
|
||||
{ "auths": { "ghcr.io": { "auth": "${AUTH}" } } }
|
||||
JSON
|
||||
echo "DOCKER_CONFIG=${RUNNER_TEMP}/docker-config" >> "${GITHUB_ENV}"
|
||||
# Diagnostics that don't leak the token.
|
||||
echo "=== docker ==="
|
||||
command -v docker || echo "(docker not in PATH)"
|
||||
docker --version 2>&1 || true
|
||||
ls -la /usr/local/bin/docker /opt/homebrew/bin/docker 2>&1 || true
|
||||
echo "=== auths registries (no values) ==="
|
||||
grep -o '"[a-zA-Z0-9.-]*\.io"' "${RUNNER_TEMP}/docker-config/config.json" || true
|
||||
|
||||
- name: Set up QEMU
|
||||
# Apple-silicon runner building linux/amd64 images for x86 hosts.
|
||||
uses: docker/setup-qemu-action@v4
|
||||
- name: Log in to GHCR
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
platforms: linux/amd64
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Compute tags
|
||||
id: tags
|
||||
|
||||
@ -24,7 +24,7 @@ env:
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
@ -35,7 +35,7 @@ jobs:
|
||||
# the Go module has a `replace` directive pointing at /plugin inside
|
||||
# the image. Pre-repo-split the plugin lived in the monorepo; the
|
||||
# 2026-04-18 restructure moved it out but didn't add this clone step
|
||||
# — which is why publish has been failing since then.
|
||||
# — which is why publish was failing after that restructure.
|
||||
#
|
||||
# Uses a fine-grained PAT (PLUGIN_REPO_PAT) because the plugin repo
|
||||
# is private and the default GITHUB_TOKEN is scoped to THIS repo.
|
||||
@ -48,26 +48,15 @@ jobs:
|
||||
path: molecule-ai-plugin-github-app-auth
|
||||
token: ${{ secrets.PLUGIN_REPO_PAT || secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Configure GHCR auth
|
||||
shell: bash
|
||||
env:
|
||||
GHCR_USER: ${{ github.actor }}
|
||||
GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
set -eu
|
||||
mkdir -p "${RUNNER_TEMP}/docker-config"
|
||||
GHCR_AUTH=$(printf '%s:%s' "${GHCR_USER}" "${GHCR_TOKEN}" | base64)
|
||||
umask 077
|
||||
printf '{"auths":{"ghcr.io":{"auth":"%s"}}}' "${GHCR_AUTH}" > "${RUNNER_TEMP}/docker-config/config.json"
|
||||
echo "DOCKER_CONFIG=${RUNNER_TEMP}/docker-config" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
- name: Log in to GHCR
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
platforms: linux/amd64
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Compute tags
|
||||
id: tags
|
||||
|
||||
Loading…
Reference in New Issue
Block a user