name: CI on: push: branches: [main, staging] pull_request: branches: [main, staging] # GitHub merge queue fires `merge_group` for the queue's pre-merge CI run. # Required so the queue gets a real check result instead of a false-green # from the absence of a triggered workflow. Safe to add unconditionally — # the event simply doesn't fire until the queue is enabled on the branch. merge_group: types: [checks_requested] # Cancel in-progress CI runs when a new commit arrives on the same ref. # This prevents stale runs from queuing behind each other. The merge_group # refs (refs/heads/gh-readonly-queue/...) get their own concurrency group # automatically because github.ref differs from the PR ref. concurrency: group: ci-${{ github.ref }} cancel-in-progress: true jobs: # Detect which paths changed so downstream jobs can skip when only # docs/markdown files were modified. changes: name: Detect changes runs-on: ubuntu-latest outputs: platform: ${{ steps.check.outputs.platform }} canvas: ${{ steps.check.outputs.canvas }} python: ${{ steps.check.outputs.python }} scripts: ${{ steps.check.outputs.scripts }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - id: check run: | # For PR events: diff against the base branch (not HEAD~1 of the branch, # which may be unrelated after force-pushes). When a push updates a PR, # both pull_request and push events fire — prefer the PR base so that # the diff is always computed against the actual merge base, not the # previous SHA on the branch which may be on a different history line. BASE="${GITHUB_BASE_REF:-${{ github.event.before }}}" # GITHUB_BASE_REF is set by GitHub for PR events (the base branch name). # For pull_request events we use the stored base.sha; for push events # (or when base.sha is unavailable) fall back to github.event.before. if [ "${{ github.event_name }}" = "pull_request" ] && [ -n "${{ github.event.pull_request.base.sha }}" ]; then BASE="${{ github.event.pull_request.base.sha }}" fi # Fallback: if BASE is empty or all zeros (new branch), run everything if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$'; then echo "platform=true" >> "$GITHUB_OUTPUT" echo "canvas=true" >> "$GITHUB_OUTPUT" echo "python=true" >> "$GITHUB_OUTPUT" echo "scripts=true" >> "$GITHUB_OUTPUT" exit 0 fi DIFF=$(git diff --name-only "$BASE" HEAD 2>/dev/null || echo ".github/workflows/ci.yml") echo "platform=$(echo "$DIFF" | grep -qE '^workspace-server/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT" echo "canvas=$(echo "$DIFF" | grep -qE '^canvas/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT" echo "python=$(echo "$DIFF" | grep -qE '^workspace/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT" echo "scripts=$(echo "$DIFF" | grep -qE '^tests/e2e/|^scripts/|^infra/scripts/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT" # Platform (Go) is a required check on staging. Always-run + per-step # gating (see Canvas (Next.js) for the rationale and the failure mode # this avoids). platform-build: name: Platform (Go) needs: changes runs-on: ubuntu-latest defaults: run: working-directory: workspace-server steps: - if: needs.changes.outputs.platform != 'true' working-directory: . run: echo "No platform/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection." - if: needs.changes.outputs.platform == 'true' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - if: needs.changes.outputs.platform == 'true' uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5 with: go-version: 'stable' - if: needs.changes.outputs.platform == 'true' run: go mod download - if: needs.changes.outputs.platform == 'true' run: go build ./cmd/server # CLI (molecli) moved to standalone repo: github.com/Molecule-AI/molecule-cli - if: needs.changes.outputs.platform == 'true' run: go vet ./... || true - if: needs.changes.outputs.platform == 'true' name: Run golangci-lint run: golangci-lint run --timeout 3m ./... || true - if: needs.changes.outputs.platform == 'true' name: Run tests with race detection and coverage run: go test -race -coverprofile=coverage.out ./... - if: needs.changes.outputs.platform == 'true' name: Per-file coverage report # Advisory — lists every source file with its coverage so reviewers # can see at-a-glance where gaps are. Sorted ascending so the worst # offenders float to the top. Does NOT fail the build; the hard # gate is the threshold check below. (#1823) run: | echo "=== Per-file coverage (worst first) ===" go tool cover -func=coverage.out \ | grep -v '^total:' \ | awk '{file=$1; sub(/:[0-9][0-9.]*:.*/, "", file); pct=$NF; gsub(/%/,"",pct); s[file]+=pct; c[file]++} END {for (f in s) printf "%6.1f%% %s\n", s[f]/c[f], f}' \ | sort -n - if: needs.changes.outputs.platform == 'true' name: Check coverage thresholds # Enforces two gates from #1823 Layer 1: # 1. Total floor (25% — ratchet plan in COVERAGE_FLOOR.md). # 2. Per-file floor — non-test .go files in security-critical # paths with coverage <10% fail the build, UNLESS the file # path is listed in .coverage-allowlist.txt (acknowledged # historical debt with a tracking issue + expiry). run: | set -e TOTAL_FLOOR=25 # Security-critical paths where a 0%-coverage file is a real risk. CRITICAL_PATHS=( "internal/handlers/tokens" "internal/handlers/workspace_provision" "internal/handlers/a2a_proxy" "internal/handlers/registry" "internal/handlers/secrets" "internal/middleware/wsauth" "internal/crypto" ) TOTAL=$(go tool cover -func=coverage.out | grep '^total:' | awk '{print $3}' | sed 's/%//') echo "Total coverage: ${TOTAL}%" if awk "BEGIN{exit !($TOTAL < $TOTAL_FLOOR)}"; then echo "::error::Total coverage ${TOTAL}% is below the ${TOTAL_FLOOR}% floor. See COVERAGE_FLOOR.md for ratchet plan." exit 1 fi # Aggregate per-file coverage → /tmp/perfile.txt: " " go tool cover -func=coverage.out \ | grep -v '^total:' \ | awk '{file=$1; sub(/:[0-9][0-9.]*:.*/, "", file); pct=$NF; gsub(/%/,"",pct); s[file]+=pct; c[file]++} END {for (f in s) printf "%s %.1f\n", f, s[f]/c[f]}' \ > /tmp/perfile.txt # Build allowlist — paths relative to workspace-server, one per line. # Lines starting with # are comments. ALLOWLIST="" if [ -f ../.coverage-allowlist.txt ]; then ALLOWLIST=$(grep -vE '^(#|[[:space:]]*$)' ../.coverage-allowlist.txt || true) fi FAILED=0 WARNED=0 for path in "${CRITICAL_PATHS[@]}"; do while read -r file pct; do [[ "$file" == *_test.go ]] && continue [[ "$file" == *"$path"* ]] || continue awk "BEGIN{exit !($pct < 10)}" || continue # Strip the package-import prefix so we can match .coverage-allowlist.txt # entries written as paths relative to workspace-server/. # Handle both module paths: platform/workspace-server/... and platform/... rel=$(echo "$file" | sed 's|^github.com/Molecule-AI/molecule-monorepo/platform/workspace-server/||; s|^github.com/Molecule-AI/molecule-monorepo/platform/||') if echo "$ALLOWLIST" | grep -qxF "$rel"; then echo "::warning file=workspace-server/$rel::Critical file at ${pct}% coverage (allowlisted, #1823) — fix before expiry." WARNED=$((WARNED+1)) else echo "::error file=workspace-server/$rel::Critical file at ${pct}% coverage — must be >=10% (target 80%). See #1823. To acknowledge as known debt, add this path to .coverage-allowlist.txt." FAILED=$((FAILED+1)) fi done < /tmp/perfile.txt done echo "" echo "Critical-path check: $FAILED new failures, $WARNED allowlisted warnings." if [ "$FAILED" -gt 0 ]; then echo "" echo "$FAILED security-critical file(s) have <10% test coverage and are" echo "NOT in the allowlist. These paths handle auth, tokens, secrets, or" echo "workspace provisioning — a 0% file here is the exact gap that let" echo "CWE-22, CWE-78, KI-005 slip through in past incidents. Either:" echo " (a) add tests to raise coverage above 10%, or" echo " (b) add the path to .coverage-allowlist.txt with an expiry date" echo " and a tracking issue reference." exit 1 fi # Canvas (Next.js) — required check, always runs. See platform-build # comment above for the rationale. # # Supersedes the canvas-build-noop pattern attempted in PR #2321: two # jobs sharing `name:` doesn't actually satisfy branch protection # because the SKIPPED check run sibling is treated as not-passed # regardless of how many SUCCESS siblings it has. Verified empirically # on PR #2314 — mergeStateStatus stayed BLOCKED until I collapsed to # a single-job-with-conditional-steps shape. canvas-build: name: Canvas (Next.js) needs: changes runs-on: ubuntu-latest defaults: run: working-directory: canvas steps: - if: needs.changes.outputs.canvas != 'true' working-directory: . run: echo "No canvas/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection." - if: needs.changes.outputs.canvas == 'true' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - if: needs.changes.outputs.canvas == 'true' uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: node-version: '22' - if: needs.changes.outputs.canvas == 'true' run: rm -f package-lock.json && npm install - if: needs.changes.outputs.canvas == 'true' run: npm run build - if: needs.changes.outputs.canvas == 'true' name: Run tests with coverage # Coverage instrumentation is configured in canvas/vitest.config.ts # (provider: v8, reporters: text + html + json-summary). Step 2 of # #1815 — wires coverage into CI so we get a baseline visible on # every PR. No threshold gate yet; thresholds dial in (Step 3, also # tracked in #1815) after the team sees what current coverage is. # Per the inline comment in vitest.config.ts: "first land # observability so we can see the baseline, then dial in # thresholds + a hard gate" — this PR ships the observability half. run: npx vitest run --coverage - name: Upload coverage summary as artifact if: needs.changes.outputs.canvas == 'true' && always() uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: canvas-coverage-${{ github.run_id }} path: canvas/coverage/ retention-days: 7 if-no-files-found: warn # MCP Server + SDK removed from CI — now in standalone repos: # - github.com/Molecule-AI/molecule-mcp-server (npm CI) # - github.com/Molecule-AI/molecule-sdk-python (PyPI CI) # e2e-api job moved to .github/workflows/e2e-api.yml (issue #458). # It now has workflow-level concurrency (cancel-in-progress: false) so # new pushes queue the E2E run rather than cancelling it at the run level. # Shellcheck (E2E scripts) — required check, always runs. See # platform-build for the rationale. shellcheck: name: Shellcheck (E2E scripts) needs: changes runs-on: ubuntu-latest steps: - if: needs.changes.outputs.scripts != 'true' run: echo "No tests/e2e/ or infra/scripts/ changes — skipping real shellcheck; this job always runs to satisfy the required-check name on branch protection." - if: needs.changes.outputs.scripts == 'true' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - if: needs.changes.outputs.scripts == 'true' name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh # shellcheck is pre-installed on ubuntu-latest runners (via apt). # infra/scripts/ is included because setup.sh + nuke.sh gate the # README quickstart — a shellcheck regression there silently breaks # new-user onboarding. scripts/ is intentionally excluded until its # pre-existing SC3040/SC3043 warnings are cleaned up. run: | find tests/e2e infra/scripts -type f -name '*.sh' -print0 \ | xargs -0 shellcheck --severity=warning - if: needs.changes.outputs.scripts == 'true' name: Lint cleanup-trap hygiene (RFC #2873) # Asserts every shell E2E test that calls `mktemp` also installs # an EXIT trap. Catches the /tmp-leak class — a missing trap # silently leaks scratch into CI runners (~10-100KB per run). # See tests/e2e/lint_cleanup_traps.sh for the rule + fix pattern. run: bash tests/e2e/lint_cleanup_traps.sh - if: needs.changes.outputs.scripts == 'true' name: Run E2E bash unit tests (no live infra) # Pure-bash unit tests for E2E helper libs (lib/*.sh). These pin # behavior of dispatch logic that — when broken — silently masks as # "Could not resolve authentication method" only after a successful # tenant + workspace provision (PR #2571 incident, 2026-05-03). Add # new self-contained unit tests here as the lib/ directory grows; # tests requiring live CP/tenant credentials belong in the dedicated # e2e-staging-* workflows, not this job. run: | bash tests/e2e/test_model_slug.sh canvas-deploy-reminder: name: Canvas Deploy Reminder runs-on: ubuntu-latest needs: [changes, canvas-build] # Only fires on direct pushes to main (i.e. after staging→main promotion). if: needs.changes.outputs.canvas == 'true' && github.event_name == 'push' && github.ref == 'refs/heads/main' permissions: # Required to post commit comments via the GitHub API. contents: write steps: - name: Post deploy reminder as commit comment env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} COMMIT_SHA: ${{ github.sha }} RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | # Write body to a temp file — avoids backtick escaping in shell. cat > /tmp/deploy-reminder.md << 'BODY' ## Canvas build passed ✅ — deploy required The `publish-canvas-image` workflow is now building a fresh Docker image (`ghcr.io/molecule-ai/canvas:latest`) in the background. Once it completes (~3–5 min), apply on the host machine with: ```bash cd git pull origin main docker compose pull canvas && docker compose up -d canvas ``` If you need to rebuild from local source instead (e.g. testing unreleased changes or a new `NEXT_PUBLIC_*` URL), use: ```bash docker compose build canvas && docker compose up -d canvas ``` BODY printf '\n> Posted automatically by CI · commit `%s` · [build log](%s)\n' \ "$COMMIT_SHA" "$RUN_URL" >> /tmp/deploy-reminder.md gh api \ --method POST \ "repos/${{ github.repository }}/commits/${{ github.sha }}/comments" \ --field "body=@/tmp/deploy-reminder.md" # Python Lint & Test — required check, always runs. See platform-build # for the rationale. python-lint: name: Python Lint & Test needs: changes runs-on: ubuntu-latest env: WORKSPACE_ID: test defaults: run: working-directory: workspace steps: - if: needs.changes.outputs.python != 'true' working-directory: . run: echo "No workspace/** changes — skipping real lint+test; this job always runs to satisfy the required-check name on branch protection." - if: needs.changes.outputs.python == 'true' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - if: needs.changes.outputs.python == 'true' uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' cache: pip cache-dependency-path: workspace/requirements.txt - if: needs.changes.outputs.python == 'true' run: pip install -r requirements.txt pytest pytest-asyncio pytest-cov # Coverage flags + fail-under floor moved into workspace/pytest.ini # (issue #1817) so local `pytest` and CI use identical config. - if: needs.changes.outputs.python == 'true' run: python -m pytest --tb=short - if: needs.changes.outputs.python == 'true' name: Per-file critical-path coverage (MCP / inbox / auth) # MCP-critical Python files have a per-file floor on top of the # 86% total floor in pytest.ini. Rationale (issue #2790, after # the PR #2766 → PR #2771 cycle): the total floor averages ~6000 # lines, so a single MCP file could regress to ~50% with no # complaint as long as other modules compensate. These five # files handle multi-tenant routing + auth + inbox dispatch — # a coverage drop here is the same risk shape as a Go-side # workspace-server token/secrets file dropping below 10%. # # Floor 75% sits below current actuals (80-96%) so this gate is # strictly additive — no existing PR fails. Ratchet plan in # COVERAGE_FLOOR.md. run: | set -e PER_FILE_FLOOR=75 CRITICAL_FILES=( "a2a_mcp_server.py" "mcp_cli.py" "a2a_tools.py" "a2a_tools_inbox.py" "inbox.py" "platform_auth.py" ) # pytest already wrote .coverage; emit a JSON view scoped to # the critical files so jq/python can read the per-file pct # without parsing tabular text. --include uses fnmatch, and # the leading "*" allows the file to live anywhere under the # workspace root (today they sit at workspace/.py). INCLUDES=$(printf '*%s,' "${CRITICAL_FILES[@]}") INCLUDES="${INCLUDES%,}" python -m coverage json -o /tmp/critical-cov.json --include="$INCLUDES" FAILED=0 for f in "${CRITICAL_FILES[@]}"; do # Match by top-level path key (e.g. "a2a_tools.py", not # "builtin_tools/a2a_tools.py" — different file at 100%). # The keys in coverage.json are paths relative to the run # cwd (workspace/), so the critical-path entry sits at the # bare basename. pct=$(jq -r --arg f "$f" '.files | to_entries | map(select(.key == $f)) | .[0].value.summary.percent_covered // "MISSING"' /tmp/critical-cov.json) if [ "$pct" = "MISSING" ]; then echo "::error file=workspace/$f::No coverage data — file may have moved or test exclusion mis-set." FAILED=$((FAILED+1)) continue fi echo "$f: ${pct}%" if awk "BEGIN{exit !($pct < $PER_FILE_FLOOR)}"; then echo "::error file=workspace/$f::${pct}% < ${PER_FILE_FLOOR}% per-file floor (MCP critical path). See COVERAGE_FLOOR.md." FAILED=$((FAILED+1)) fi done if [ "$FAILED" -gt 0 ]; then echo "" echo "$FAILED MCP critical-path file(s) below the ${PER_FILE_FLOOR}% per-file floor." echo "These paths handle multi-tenant routing, auth tokens, and inbox dispatch." echo "A coverage drop here is the same risk shape as Go-side tokens/secrets files" echo "dropping below 10% (see COVERAGE_FLOOR.md). Either:" echo " (a) add tests to raise coverage back above ${PER_FILE_FLOOR}%, or" echo " (b) if this is unavoidable historical debt, file an issue and propose" echo " adjusting the floor with rationale in COVERAGE_FLOOR.md." exit 1 fi # SDK + plugin validation moved to standalone repo: # github.com/Molecule-AI/molecule-sdk-python