molecule-core/.github/workflows/ci.yml
Hongming Wang 3d0e093b11 chore(ci): serialize e2e-api across runs to prevent docker collision
Now that the Molecule-AI org has two self-hosted Apple-silicon runners
(`hongming-m1-mini` + `hongming-m1-mini-2`) servicing the same label set,
two CI runs could execute the e2e-api job concurrently. Each run starts
fixed-name docker containers (`molecule-ci-postgres`, `molecule-ci-redis`)
bound to host ports 15432/16379 — a collision means the second run fails
with "container name already in use" or "port already in use".

Adds a workflow-level `concurrency: e2e-api` group to the job so GitHub
Actions serializes e2e-api executions globally regardless of which runner
picks them up. `cancel-in-progress: false` ensures later runs queue
rather than cancelling the in-flight one (we want every PR's e2e check
to actually execute, not get skipped by a newer push).

Tradeoff: e2e-api is now effectively single-threaded across the whole
org. Measured duration is ~1-2 min per run, so the added serialization
latency is small relative to total CI wall time. All other jobs still
parallelize across both runners.
2026-04-15 17:06:41 -07:00

272 lines
10 KiB
YAML

name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
platform-build:
name: Platform (Go)
runs-on: [self-hosted, macos, arm64]
defaults:
run:
working-directory: platform
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: 'stable'
- run: go mod download
- run: go build ./cmd/server
- run: go build -o molecli ./cmd/cli
- run: go vet ./...
- name: Run golangci-lint
uses: golangci/golangci-lint-action@v4
with:
version: latest
working-directory: platform
args: --timeout 3m
continue-on-error: true # Warn but don't block until codebase is clean
- name: Run tests with race detection and coverage
run: go test -race -coverprofile=coverage.out ./...
- name: Check coverage baseline
run: |
COVERAGE=$(go tool cover -func=coverage.out | grep total | awk '{print $3}' | sed 's/%//')
echo "Total coverage: ${COVERAGE}%"
THRESHOLD=25
awk "BEGIN{if ($COVERAGE < $THRESHOLD) exit 1}" || {
echo "::error::Coverage ${COVERAGE}% is below the ${THRESHOLD}% threshold"
exit 1
}
canvas-build:
name: Canvas (Next.js)
runs-on: [self-hosted, macos, arm64]
defaults:
run:
working-directory: canvas
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '22'
- run: rm -f package-lock.json && npm install
- run: npm run build
- name: Run tests
run: npx vitest run
mcp-server-build:
name: MCP Server (Node.js)
runs-on: [self-hosted, macos, arm64]
defaults:
run:
working-directory: mcp-server
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '22'
cache: npm
cache-dependency-path: mcp-server/package-lock.json
- run: npm ci
- run: npm run build
e2e-api:
name: E2E API Smoke Test
runs-on: [self-hosted, macos, arm64]
timeout-minutes: 15
# Serialize across ALL CI runs globally. With multiple self-hosted
# runners, two e2e-api jobs could otherwise execute concurrently and
# collide on the fixed docker container names ($PG_CONTAINER /
# $REDIS_CONTAINER) and host ports 15432/16379. `cancel-in-progress:
# false` means later runs queue rather than cancel the current one.
concurrency:
group: e2e-api
cancel-in-progress: false
# `services:` is Linux-only on self-hosted runners — we start postgres
# and redis via `docker run` instead. Ports 15432/16379 avoid collision
# with anything the host may already have on the standard ports.
env:
DATABASE_URL: postgres://dev:dev@localhost:15432/molecule?sslmode=disable
REDIS_URL: redis://localhost:16379
PORT: "8080"
PG_CONTAINER: molecule-ci-postgres
REDIS_CONTAINER: molecule-ci-redis
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: 'stable'
cache: true
cache-dependency-path: platform/go.sum
- name: Start Postgres (docker)
run: |
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
docker run -d --name "$PG_CONTAINER" \
-e POSTGRES_USER=dev \
-e POSTGRES_PASSWORD=dev \
-e POSTGRES_DB=molecule \
-p 15432:5432 \
postgres:16
for i in $(seq 1 30); do
if docker exec "$PG_CONTAINER" pg_isready -U dev >/dev/null 2>&1; then
echo "Postgres ready after ${i}s"
exit 0
fi
sleep 1
done
echo "::error::Postgres did not become ready in 30s"
docker logs "$PG_CONTAINER" || true
exit 1
- name: Start Redis (docker)
run: |
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
docker run -d --name "$REDIS_CONTAINER" -p 16379:6379 redis:7
for i in $(seq 1 15); do
if docker exec "$REDIS_CONTAINER" redis-cli ping 2>/dev/null | grep -q PONG; then
echo "Redis ready after ${i}s"
exit 0
fi
sleep 1
done
echo "::error::Redis did not become ready in 15s"
exit 1
- name: Build platform
working-directory: platform
run: go build -o platform-server ./cmd/server
- name: Start platform (background)
working-directory: platform
run: |
./platform-server > platform.log 2>&1 &
echo $! > platform.pid
- name: Wait for /health
run: |
for i in $(seq 1 30); do
if curl -sf http://localhost:8080/health > /dev/null; then
echo "Platform up after ${i}s"
exit 0
fi
sleep 1
done
echo "::error::Platform did not become healthy in 30s"
cat platform/platform.log || true
exit 1
- name: Assert migrations applied
# Migrations auto-run at platform boot. Fail fast if they silently
# didn't — catches future migration-author mistakes before the E2E run.
run: |
tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc "SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'")
if [ "$tables" != "1" ]; then
echo "::error::Migrations did not apply — 'workspaces' table missing"
cat platform/platform.log || true
exit 1
fi
echo "Migrations OK (workspaces table present)"
- name: Run E2E API tests
run: bash tests/e2e/test_api.sh
- name: Dump platform log on failure
if: failure()
run: cat platform/platform.log || true
- name: Stop platform
if: always()
run: |
if [ -f platform/platform.pid ]; then
kill "$(cat platform/platform.pid)" 2>/dev/null || true
fi
- name: Stop service containers
if: always()
run: |
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
shellcheck:
name: Shellcheck (E2E scripts)
runs-on: [self-hosted, macos, arm64]
steps:
- uses: actions/checkout@v4
- name: Run shellcheck on tests/e2e/*.sh
# `ludeeus/action-shellcheck` is a Docker action (Linux-only). We rely
# on shellcheck being pre-installed on the self-hosted runner instead.
run: |
if ! command -v shellcheck >/dev/null 2>&1; then
echo "::error::shellcheck is not installed on the runner"
exit 1
fi
find tests/e2e -type f -name '*.sh' -print0 \
| xargs -0 shellcheck --severity=warning
canvas-deploy-reminder:
name: Canvas Deploy Reminder
runs-on: [self-hosted, macos, arm64]
needs: canvas-build
# Only fires on direct pushes to main (i.e. after a PR merges).
# PRs get canvas-build CI but no reminder — no deployment happens on PRs.
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
permissions:
# Required to post commit comments via the GitHub API.
contents: write
steps:
- name: Post deploy reminder as commit comment
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COMMIT_SHA: ${{ github.sha }}
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
# Write body to a temp file — avoids backtick escaping in shell.
cat > /tmp/deploy-reminder.md << 'BODY'
## Canvas build passed ✅ — deploy required
The canvas container is **not auto-deployed**. Merged canvas changes are invisible until the host container is rebuilt.
Run this on the host machine to apply:
```bash
cd /g/personal_programs/molecule-monorepo
git pull origin main
docker compose build canvas && docker compose up -d canvas
```
BODY
printf '\n> Posted automatically by CI · commit `%s` · [build log](%s)\n' \
"$COMMIT_SHA" "$RUN_URL" >> /tmp/deploy-reminder.md
gh api \
--method POST \
"repos/${{ github.repository }}/commits/${{ github.sha }}/comments" \
--field "body=@/tmp/deploy-reminder.md"
python-lint:
name: Python Lint & Test
runs-on: [self-hosted, macos, arm64]
defaults:
run:
working-directory: workspace-template
steps:
- uses: actions/checkout@v4
# setup-python@v5 cannot write to /Users/runner (GitHub-hosted path) on
# the self-hosted macOS arm64 runner (user: hongming-claw) and also hits
# EACCES on /usr/local/bin due to macOS SIP. Skip it — Homebrew installs
# Python 3.11 at /opt/homebrew/opt/python@3.11 which is already on PATH.
- name: Verify Python 3.11 (Homebrew)
run: |
export PATH="/opt/homebrew/opt/python@3.11/bin:/opt/homebrew/bin:$PATH"
python3.11 --version
echo "/opt/homebrew/opt/python@3.11/bin" >> "$GITHUB_PATH"
echo "/opt/homebrew/bin" >> "$GITHUB_PATH"
- run: pip3.11 install -r requirements.txt pytest pytest-asyncio pytest-cov
- run: python3.11 -m pytest --tb=short -q --cov=. --cov-report=term-missing
# Lint first-party plugins. The validator checks each plugin
# against the format it declares — currently agentskills.io for all
# of ours, but the same command covers any future shape that lands
# under a sibling adapter (MCP, DeepAgents sub-agent, etc.).
- name: Install molecule-plugin SDK
working-directory: sdk/python
run: pip3.11 install -e .
- name: Lint first-party plugins
working-directory: ${{ github.workspace }}
run: python3.11 -m molecule_plugin validate plugins/molecule-dev plugins/superpowers plugins/ecc
- name: Run SDK tests
working-directory: sdk/python
run: python3.11 -m pytest --tb=short -q