diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 422af4d7..415abf48 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,7 @@ on: jobs: platform-build: name: Platform (Go) - runs-on: ubuntu-latest + runs-on: [self-hosted, macos, arm64] defaults: run: working-directory: platform @@ -43,7 +43,7 @@ jobs: canvas-build: name: Canvas (Next.js) - runs-on: ubuntu-latest + runs-on: [self-hosted, macos, arm64] defaults: run: working-directory: canvas @@ -59,7 +59,7 @@ jobs: mcp-server-build: name: MCP Server (Node.js) - runs-on: ubuntu-latest + runs-on: [self-hosted, macos, arm64] defaults: run: working-directory: mcp-server @@ -75,37 +75,17 @@ jobs: e2e-api: name: E2E API Smoke Test - runs-on: ubuntu-latest - timeout-minutes: 10 - services: - postgres: - # Credentials match .env.example (dev:dev) so local reproduction is - # identical to CI. POSTGRES_DB matches the default there too. - image: postgres:16 - env: - POSTGRES_USER: dev - POSTGRES_PASSWORD: dev - POSTGRES_DB: molecule - ports: - - 5432:5432 - options: >- - --health-cmd "pg_isready -U dev" - --health-interval 10s - --health-timeout 5s - --health-retries 5 - redis: - image: redis:7 - ports: - - 6379:6379 - options: >- - --health-cmd "redis-cli ping" - --health-interval 10s - --health-timeout 5s - --health-retries 5 + runs-on: [self-hosted, macos, arm64] + timeout-minutes: 15 + # `services:` is Linux-only on self-hosted runners — we start postgres + # and redis via `docker run` instead. Ports 15432/16379 avoid collision + # with anything the host may already have on the standard ports. env: - DATABASE_URL: postgres://dev:dev@localhost:5432/molecule?sslmode=disable - REDIS_URL: redis://localhost:6379 + DATABASE_URL: postgres://dev:dev@localhost:15432/molecule?sslmode=disable + REDIS_URL: redis://localhost:16379 PORT: "8080" + PG_CONTAINER: molecule-ci-postgres + REDIS_CONTAINER: molecule-ci-redis steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 @@ -113,6 +93,38 @@ jobs: go-version: 'stable' cache: true cache-dependency-path: platform/go.sum + - name: Start Postgres (docker) + run: | + docker rm -f "$PG_CONTAINER" 2>/dev/null || true + docker run -d --name "$PG_CONTAINER" \ + -e POSTGRES_USER=dev \ + -e POSTGRES_PASSWORD=dev \ + -e POSTGRES_DB=molecule \ + -p 15432:5432 \ + postgres:16 + for i in $(seq 1 30); do + if docker exec "$PG_CONTAINER" pg_isready -U dev >/dev/null 2>&1; then + echo "Postgres ready after ${i}s" + exit 0 + fi + sleep 1 + done + echo "::error::Postgres did not become ready in 30s" + docker logs "$PG_CONTAINER" || true + exit 1 + - name: Start Redis (docker) + run: | + docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true + docker run -d --name "$REDIS_CONTAINER" -p 16379:6379 redis:7 + for i in $(seq 1 15); do + if docker exec "$REDIS_CONTAINER" redis-cli ping 2>/dev/null | grep -q PONG; then + echo "Redis ready after ${i}s" + exit 0 + fi + sleep 1 + done + echo "::error::Redis did not become ready in 15s" + exit 1 - name: Build platform working-directory: platform run: go build -o platform-server ./cmd/server @@ -135,17 +147,9 @@ jobs: exit 1 - name: Assert migrations applied # Migrations auto-run at platform boot. Fail fast if they silently - # didn't — catches future migration-author mistakes (e.g. a new - # privileged op Postgres "dev" can't execute) before the E2E run. - # Uses docker exec into the service container's own psql — avoids - # a 10-20s apt-install step in the runner. + # didn't — catches future migration-author mistakes before the E2E run. run: | - pg_container=$(docker ps --filter "ancestor=postgres:16" --format "{{.ID}}" | head -1) - if [ -z "$pg_container" ]; then - echo "::error::Could not find postgres service container" - exit 1 - fi - tables=$(docker exec "$pg_container" psql -U dev -d molecule -tAc "SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'") + tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc "SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'") if [ "$tables" != "1" ]; then echo "::error::Migrations did not apply — 'workspaces' table missing" cat platform/platform.log || true @@ -163,22 +167,31 @@ jobs: if [ -f platform/platform.pid ]; then kill "$(cat platform/platform.pid)" 2>/dev/null || true fi + - name: Stop service containers + if: always() + run: | + docker rm -f "$PG_CONTAINER" 2>/dev/null || true + docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true shellcheck: name: Shellcheck (E2E scripts) - runs-on: ubuntu-latest + runs-on: [self-hosted, macos, arm64] steps: - uses: actions/checkout@v4 - name: Run shellcheck on tests/e2e/*.sh - uses: ludeeus/action-shellcheck@master - env: - SHELLCHECK_OPTS: --severity=warning - with: - scandir: tests/e2e + # `ludeeus/action-shellcheck` is a Docker action (Linux-only). We rely + # on shellcheck being pre-installed on the self-hosted runner instead. + run: | + if ! command -v shellcheck >/dev/null 2>&1; then + echo "::error::shellcheck is not installed on the runner" + exit 1 + fi + find tests/e2e -type f -name '*.sh' -print0 \ + | xargs -0 shellcheck --severity=warning canvas-deploy-reminder: name: Canvas Deploy Reminder - runs-on: ubuntu-latest + runs-on: [self-hosted, macos, arm64] needs: canvas-build # Only fires on direct pushes to main (i.e. after a PR merges). # PRs get canvas-build CI but no reminder — no deployment happens on PRs. @@ -216,7 +229,12 @@ jobs: python-lint: name: Python Lint & Test - runs-on: ubuntu-latest + runs-on: [self-hosted, macos, arm64] + env: + # setup-python@v5 defaults to /Users/runner/hostedtoolcache which does + # not exist on the self-hosted runner (user is hongming-claw). Point it + # to the runner user's writable directory so Python 3.11 can be cached. + AGENT_TOOLSDIRECTORY: /Users/hongming-claw/hostedtoolcache defaults: run: working-directory: workspace-template diff --git a/.github/workflows/publish-platform-image.yml b/.github/workflows/publish-platform-image.yml index 03479723..eed94c3e 100644 --- a/.github/workflows/publish-platform-image.yml +++ b/.github/workflows/publish-platform-image.yml @@ -32,11 +32,19 @@ env: jobs: build-and-push: - runs-on: ubuntu-latest + runs-on: [self-hosted, macos, arm64] steps: - name: Checkout uses: actions/checkout@v4 + - name: Set up QEMU + # Required on the Apple-silicon self-hosted runner — Fly tenant machines + # pull linux/amd64, and buildx needs binfmt handlers in Docker Desktop's + # VM to emulate amd64 during the build. + uses: docker/setup-qemu-action@v3 + with: + platforms: linux/amd64 + - name: Set up Docker Buildx # Buildx enables cache-from/cache-to via GHA cache and multi-arch # builds without local docker daemon wrangling. @@ -75,10 +83,13 @@ jobs: # GHCR (or vice versa) — each registry's failure mode is isolated. # GHA cache is shared because both steps re-use the same Dockerfile # context + build args. + # Explicit linux/amd64 target: the runner is Apple-silicon (arm64), + # but Fly tenant machines are amd64. QEMU handles the emulation. uses: docker/build-push-action@v5 with: context: ./platform file: ./platform/Dockerfile + platforms: linux/amd64 push: true tags: | ${{ env.IMAGE_NAME }}:latest @@ -99,6 +110,7 @@ jobs: with: context: ./platform file: ./platform/Dockerfile + platforms: linux/amd64 push: true tags: | ${{ env.FLY_IMAGE_NAME }}:latest diff --git a/org-templates/molecule-dev/documentation-specialist/system-prompt.md b/org-templates/molecule-dev/documentation-specialist/system-prompt.md new file mode 100644 index 00000000..50b4b38e --- /dev/null +++ b/org-templates/molecule-dev/documentation-specialist/system-prompt.md @@ -0,0 +1,56 @@ +# Documentation Specialist + +**LANGUAGE RULE: Always respond in the same language the user uses.** + +You are the Documentation Specialist for Molecule AI. You own end-to-end documentation across three repos and are the single source of truth for terminology consistency across all public surfaces. + +## Your Three Repos + +| Repo | Visibility | Your Role | +|---|---|---| +| `Molecule-AI/molecule-monorepo` | **Public** | Internal architecture docs, READMEs, API references, `docs/` directory | +| `Molecule-AI/docs` | **Public** | Customer-facing docs site (Fumadocs + Next.js 15, deployed to doc.moleculesai.app) | +| `Molecule-AI/molecule-controlplane` | **⚠️ PRIVATE** | Internal README, PLAN.md, and `docs/saas/` section in the monorepo only | + +## ⚠️ Privacy Rule — Never Violate + +`molecule-controlplane` is a **private** repo. Its source code, file paths, internal endpoints, schema details, infra config, billing/auth implementation details — **none of that** goes into the public docs site or public monorepo README. Public docs describe the SaaS **product** (signup, billing, tenant lifecycle, multi-tenant isolation guarantees) but never the provisioner's internals. When in doubt: don't publish. + +## How You Work + +1. **Watch PRs landing on all three repos.** Any PR that touches a public API, template, plugin, channel, or user-facing concept needs a paired docs PR within one cron tick. +2. **Backfill stubs.** The docs site has stub pages marked "Coming soon" — work through them systematically. +3. **Hold the line on terminology.** Every concept has exactly one canonical name across all three repos. Flag and fix inconsistencies. +4. **Keep controlplane docs internal.** Controlplane changes get documented in `controlplane/README.md`, `controlplane/PLAN.md`, and the gated `docs/saas/` section — never in public surfaces. + +## Definition of Done + +- Every public surface has accurate, current, example-rich documentation +- Every merged PR that touches a public surface has a paired docs PR open within one cron tick +- Every stub page eventually gets backfilled +- Controlplane internal docs stay current with recent changes +- Nothing private leaks to public surfaces + +## Workflow + +1. **Receive task from PM** — docs gap, new feature to document, PR to pair, stub to backfill +2. **Pull latest** from all three repos before starting +3. **Write or update** the relevant docs files +4. **Open a PR** on the appropriate repo (monorepo or docs site) +5. **Reference issues** — if your PR closes a docs gap issue, include `Closes #N` in the PR body +6. **Never commit to `main`** — always a feature branch + PR + +## Memory + +Use `commit_memory` to track: +- Stub pages on the docs site that need backfilling (with priority) +- Recent platform PRs that have no docs PR yet +- Recent controlplane PRs whose internal README needs updating +- Terminology decisions (canonical names for concepts) + +## Hard Rules + +- **Never leak controlplane internals to public docs** — this is the top constraint +- **Always branch + PR** — never commit directly to main on any repo +- **Pair PRs within one cron tick** — don't let merged platform PRs go undocumented +- **One canonical name per concept** — enforce consistency, file PRs to fix deviations diff --git a/tests/e2e/test_api.sh b/tests/e2e/test_api.sh index cdefa74f..12bccbd4 100644 --- a/tests/e2e/test_api.sh +++ b/tests/e2e/test_api.sh @@ -123,11 +123,11 @@ check "PATCH /workspaces/:id (name)" '"status":"updated"' "$R" R=$(curl -s "$BASE/workspaces/$ECHO_ID") check "Name updated" '"name":"Echo Agent v2"' "$R" -# Test 17: Events -R=$(curl -s "$BASE/events") +# Test 17: Events (#165 / PR #167 — now admin-gated, bearer required) +R=$(curl -s "$BASE/events" -H "Authorization: Bearer $ECHO_TOKEN") check "GET /events (has events)" 'WORKSPACE_ONLINE' "$R" -R=$(curl -s "$BASE/events/$ECHO_ID") +R=$(curl -s "$BASE/events/$ECHO_ID" -H "Authorization: Bearer $ECHO_TOKEN") check "GET /events/:id (has events for echo)" 'WORKSPACE_ONLINE' "$R" # Test 18: Update card @@ -253,8 +253,8 @@ check "List after delete (count=1)" "1" "$COUNT" echo "" echo "--- Bundle Round-Trip Test ---" -# Export the summarizer workspace -BUNDLE=$(curl -s "$BASE/bundles/export/$SUM_ID") +# Export the summarizer workspace (#165 / PR #167 — admin-gated) +BUNDLE=$(curl -s "$BASE/bundles/export/$SUM_ID" -H "Authorization: Bearer $SUM_TOKEN") check "GET /bundles/export/:id" '"name":"Summarizer Agent"' "$BUNDLE" # Capture original config for comparison @@ -321,8 +321,8 @@ check "Register re-imported workspace" '"status":"registered"' "$R" # revoked when SUM_ID was deleted above — use this one for cleanup instead. NEW_TOKEN=$(echo "$R" | e2e_extract_token) -# Re-export and verify agent_card survives the round-trip -REBUNDLE=$(curl -s "$BASE/bundles/export/$NEW_ID") +# Re-export and verify agent_card survives the round-trip (#165 / PR #167 — admin-gated) +REBUNDLE=$(curl -s "$BASE/bundles/export/$NEW_ID" -H "Authorization: Bearer $NEW_TOKEN") check "Re-exported bundle has agent_card" '"agent_card"' "$REBUNDLE" # Clean up — use the token just issued to the re-imported workspace