molecule-core/.github/workflows/ci.yml

name: CI

on:
  push:
    branches: [main, staging]
  pull_request:
    branches: [main, staging]
  # GitHub merge queue fires `merge_group` for the queue's pre-merge CI run.
  # Required so the queue gets a real check result instead of a false-green
  # from the absence of a triggered workflow. Safe to add unconditionally —
  # the event simply doesn't fire until the queue is enabled on the branch.
  merge_group:
    types: [checks_requested]

# Cancel in-progress CI runs when a new commit arrives on the same ref.
# This prevents stale runs from queuing behind each other. The merge_group
# refs (refs/heads/gh-readonly-queue/...) get their own concurrency group
# automatically because github.ref differs from the PR ref.
concurrency:
  group: ci-${{ github.ref }}
  cancel-in-progress: true

jobs:
  # Detect which paths changed so downstream jobs can skip when only
  # docs/markdown files were modified.
  changes:
    name: Detect changes
    runs-on: ubuntu-latest
    outputs:
      platform: ${{ steps.check.outputs.platform }}
      canvas: ${{ steps.check.outputs.canvas }}
      python: ${{ steps.check.outputs.python }}
      scripts: ${{ steps.check.outputs.scripts }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0
      - id: check
        run: |
          # For PR events: diff against the base branch (not HEAD~1 of the branch,
          # which may be unrelated after force-pushes). When a push updates a PR,
          # both pull_request and push events fire — prefer the PR base so that
          # the diff is always computed against the actual merge base, not the
          # previous SHA on the branch which may be on a different history line.
          BASE="${GITHUB_BASE_REF:-${{ github.event.before }}}"
          # GITHUB_BASE_REF is set by GitHub for PR events (the base branch name).
          # For pull_request events we use the stored base.sha; for push events
          # (or when base.sha is unavailable) fall back to github.event.before.
          if [ "${{ github.event_name }}" = "pull_request" ] && [ -n "${{ github.event.pull_request.base.sha }}" ]; then
            BASE="${{ github.event.pull_request.base.sha }}"
          fi
          # Fallback: if BASE is empty or all zeros (new branch), run everything
          if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$'; then
            echo "platform=true" >> "$GITHUB_OUTPUT"
            echo "canvas=true" >> "$GITHUB_OUTPUT"
            echo "python=true" >> "$GITHUB_OUTPUT"
            echo "scripts=true" >> "$GITHUB_OUTPUT"
            exit 0
          fi
          DIFF=$(git diff --name-only "$BASE" HEAD 2>/dev/null || echo ".github/workflows/ci.yml")
          echo "platform=$(echo "$DIFF" | grep -qE '^workspace-server/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"
          echo "canvas=$(echo "$DIFF" | grep -qE '^canvas/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"
          echo "python=$(echo "$DIFF" | grep -qE '^workspace/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"
          echo "scripts=$(echo "$DIFF" | grep -qE '^tests/e2e/|^scripts/|^infra/scripts/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"

  # Platform (Go) is a required check on staging. Always-run + per-step
  # gating (see Canvas (Next.js) for the rationale and the failure mode
  # this avoids).
  platform-build:
    name: Platform (Go)
    needs: changes
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: workspace-server
    steps:
      - if: needs.changes.outputs.platform != 'true'
        working-directory: .
        run: echo "No platform/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection."
      - if: needs.changes.outputs.platform == 'true'
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - if: needs.changes.outputs.platform == 'true'
        uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
        with:
          go-version: 'stable'
      - if: needs.changes.outputs.platform == 'true'
        run: go mod download
      - if: needs.changes.outputs.platform == 'true'
        run: go build ./cmd/server
      # CLI (molecli) moved to standalone repo: github.com/Molecule-AI/molecule-cli
      - if: needs.changes.outputs.platform == 'true'
        run: go vet ./... || true
      - if: needs.changes.outputs.platform == 'true'
        name: Run golangci-lint
        run: golangci-lint run --timeout 3m ./... || true
      - if: needs.changes.outputs.platform == 'true'
        name: Run tests with race detection and coverage
        run: go test -race -coverprofile=coverage.out ./...

      - if: needs.changes.outputs.platform == 'true'
        name: Per-file coverage report
        # Advisory — lists every source file with its coverage so reviewers
        # can see at-a-glance where gaps are. Sorted ascending so the worst
        # offenders float to the top. Does NOT fail the build; the hard
        # gate is the threshold check below. (#1823)
        run: |
          echo "=== Per-file coverage (worst first) ==="
          go tool cover -func=coverage.out \
            | grep -v '^total:' \
            | awk '{file=$1; sub(/:[0-9][0-9.]*:.*/, "", file); pct=$NF; gsub(/%/,"",pct); s[file]+=pct; c[file]++}
                   END {for (f in s) printf "%6.1f%%  %s\n", s[f]/c[f], f}' \
            | sort -n

      - if: needs.changes.outputs.platform == 'true'
        name: Check coverage thresholds
        # Enforces two gates from #1823 Layer 1:
        #   1. Total floor (25% — ratchet plan in COVERAGE_FLOOR.md).
        #   2. Per-file floor — non-test .go files in security-critical
        #      paths with coverage <10% fail the build, UNLESS the file
        #      path is listed in .coverage-allowlist.txt (acknowledged
        #      historical debt with a tracking issue + expiry).
        run: |
          set -e
          TOTAL_FLOOR=25
          # Security-critical paths where a 0%-coverage file is a real risk.
          CRITICAL_PATHS=(
            "internal/handlers/tokens"
            "internal/handlers/workspace_provision"
            "internal/handlers/a2a_proxy"
            "internal/handlers/registry"
            "internal/handlers/secrets"
            "internal/middleware/wsauth"
            "internal/crypto"
          )

          TOTAL=$(go tool cover -func=coverage.out | grep '^total:' | awk '{print $3}' | sed 's/%//')
          echo "Total coverage: ${TOTAL}%"
          if awk "BEGIN{exit !($TOTAL < $TOTAL_FLOOR)}"; then
            echo "::error::Total coverage ${TOTAL}% is below the ${TOTAL_FLOOR}% floor. See COVERAGE_FLOOR.md for ratchet plan."
            exit 1
          fi

          # Aggregate per-file coverage → /tmp/perfile.txt: "<fullpath> <pct>"
          go tool cover -func=coverage.out \
            | grep -v '^total:' \
            | awk '{file=$1; sub(/:[0-9][0-9.]*:.*/, "", file); pct=$NF; gsub(/%/,"",pct); s[file]+=pct; c[file]++}
                   END {for (f in s) printf "%s %.1f\n", f, s[f]/c[f]}' \
            > /tmp/perfile.txt

          # Build allowlist — paths relative to workspace-server, one per line.
          # Lines starting with # are comments.
          ALLOWLIST=""
          if [ -f ../.coverage-allowlist.txt ]; then
            ALLOWLIST=$(grep -vE '^(#|[[:space:]]*$)' ../.coverage-allowlist.txt || true)
          fi

          FAILED=0
          WARNED=0
          for path in "${CRITICAL_PATHS[@]}"; do
            while read -r file pct; do
              [[ "$file" == *_test.go ]] && continue
              [[ "$file" == *"$path"* ]] || continue
              awk "BEGIN{exit !($pct < 10)}" || continue

              # Strip the package-import prefix so we can match .coverage-allowlist.txt
              # entries written as paths relative to workspace-server/.
              # Handle both module paths: platform/workspace-server/... and platform/...
              rel=$(echo "$file" | sed 's|^github.com/Molecule-AI/molecule-monorepo/platform/workspace-server/||; s|^github.com/Molecule-AI/molecule-monorepo/platform/||')

              if echo "$ALLOWLIST" | grep -qxF "$rel"; then
                echo "::warning file=workspace-server/$rel::Critical file at ${pct}% coverage (allowlisted, #1823) — fix before expiry."
                WARNED=$((WARNED+1))
              else
                echo "::error file=workspace-server/$rel::Critical file at ${pct}% coverage — must be >=10% (target 80%). See #1823. To acknowledge as known debt, add this path to .coverage-allowlist.txt."
                FAILED=$((FAILED+1))
              fi
            done < /tmp/perfile.txt
          done

          echo ""
          echo "Critical-path check: $FAILED new failures, $WARNED allowlisted warnings."

          if [ "$FAILED" -gt 0 ]; then
            echo ""
            echo "$FAILED security-critical file(s) have <10% test coverage and are"
            echo "NOT in the allowlist. These paths handle auth, tokens, secrets, or"
            echo "workspace provisioning — a 0% file here is the exact gap that let"
            echo "CWE-22, CWE-78, KI-005 slip through in past incidents. Either:"
            echo "  (a) add tests to raise coverage above 10%, or"
            echo "  (b) add the path to .coverage-allowlist.txt with an expiry date"
            echo "      and a tracking issue reference."
            exit 1
          fi

  # Canvas (Next.js) — required check, always runs. See platform-build
  # comment above for the rationale.
  #
  # Supersedes the canvas-build-noop pattern attempted in PR #2321: two
  # jobs sharing `name:` doesn't actually satisfy branch protection
  # because the SKIPPED check run sibling is treated as not-passed
  # regardless of how many SUCCESS siblings it has. Verified empirically
  # on PR #2314 — mergeStateStatus stayed BLOCKED until I collapsed to
  # a single-job-with-conditional-steps shape.
  canvas-build:
    name: Canvas (Next.js)
    needs: changes
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: canvas
    steps:
      - if: needs.changes.outputs.canvas != 'true'
        working-directory: .
        run: echo "No canvas/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection."
      - if: needs.changes.outputs.canvas == 'true'
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - if: needs.changes.outputs.canvas == 'true'
        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
        with:
          node-version: '22'
      - if: needs.changes.outputs.canvas == 'true'
        run: rm -f package-lock.json && npm install
      - if: needs.changes.outputs.canvas == 'true'
        run: npm run build
      - if: needs.changes.outputs.canvas == 'true'
        name: Run tests with coverage
        # Coverage instrumentation is configured in canvas/vitest.config.ts
        # (provider: v8, reporters: text + html + json-summary). Step 2 of
        # #1815 — wires coverage into CI so we get a baseline visible on
        # every PR. No threshold gate yet; thresholds dial in (Step 3, also
        # tracked in #1815) after the team sees what current coverage is.
        # Per the inline comment in vitest.config.ts: "first land
        # observability so we can see the baseline, then dial in
        # thresholds + a hard gate" — this PR ships the observability half.
        run: npx vitest run --coverage
      - name: Upload coverage summary as artifact
        if: needs.changes.outputs.canvas == 'true' && always()
        uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
        with:
          name: canvas-coverage-${{ github.run_id }}
          path: canvas/coverage/
          retention-days: 7
          if-no-files-found: warn

  # MCP Server + SDK removed from CI — now in standalone repos:
  # - github.com/Molecule-AI/molecule-mcp-server (npm CI)
  # - github.com/Molecule-AI/molecule-sdk-python (PyPI CI)

  # e2e-api job moved to .github/workflows/e2e-api.yml (issue #458).
  # It now has workflow-level concurrency (cancel-in-progress: false) so
  # new pushes queue the E2E run rather than cancelling it at the run level.

  # Shellcheck (E2E scripts) — required check, always runs. See
  # platform-build for the rationale.
  shellcheck:
    name: Shellcheck (E2E scripts)
    needs: changes
    runs-on: ubuntu-latest
    steps:
      - if: needs.changes.outputs.scripts != 'true'
        run: echo "No tests/e2e/ or infra/scripts/ changes — skipping real shellcheck; this job always runs to satisfy the required-check name on branch protection."
      - if: needs.changes.outputs.scripts == 'true'
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - if: needs.changes.outputs.scripts == 'true'
        name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh
        # shellcheck is pre-installed on ubuntu-latest runners (via apt).
        # infra/scripts/ is included because setup.sh + nuke.sh gate the
        # README quickstart — a shellcheck regression there silently breaks
        # new-user onboarding. scripts/ is intentionally excluded until its
        # pre-existing SC3040/SC3043 warnings are cleaned up.
        run: |
          find tests/e2e infra/scripts -type f -name '*.sh' -print0 \
            | xargs -0 shellcheck --severity=warning

      - if: needs.changes.outputs.scripts == 'true'
        name: Lint cleanup-trap hygiene (RFC #2873)
        # Asserts every shell E2E test that calls `mktemp` also installs
        # an EXIT trap. Catches the /tmp-leak class — a missing trap
        # silently leaks scratch into CI runners (~10-100KB per run).
        # See tests/e2e/lint_cleanup_traps.sh for the rule + fix pattern.
        run: bash tests/e2e/lint_cleanup_traps.sh

      - if: needs.changes.outputs.scripts == 'true'
        name: Run E2E bash unit tests (no live infra)
        # Pure-bash unit tests for E2E helper libs (lib/*.sh). These pin
        # behavior of dispatch logic that — when broken — silently masks as
        # "Could not resolve authentication method" only after a successful
        # tenant + workspace provision (PR #2571 incident, 2026-05-03). Add
        # new self-contained unit tests here as the lib/ directory grows;
        # tests requiring live CP/tenant credentials belong in the dedicated
        # e2e-staging-* workflows, not this job.
        run: |
          bash tests/e2e/test_model_slug.sh

  canvas-deploy-reminder:
    name: Canvas Deploy Reminder
    runs-on: ubuntu-latest
    needs: [changes, canvas-build]
    # Only fires on direct pushes to main (i.e. after staging→main promotion).
    if: needs.changes.outputs.canvas == 'true' && github.event_name == 'push' && github.ref == 'refs/heads/main'
    permissions:
      # Required to post commit comments via the GitHub API.
      contents: write
    steps:
      - name: Post deploy reminder as commit comment
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          COMMIT_SHA: ${{ github.sha }}
          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
        run: |
          # Write body to a temp file — avoids backtick escaping in shell.
          cat > /tmp/deploy-reminder.md << 'BODY'
          ## Canvas build passed ✅ — deploy required

          The `publish-canvas-image` workflow is now building a fresh Docker image
          (`ghcr.io/molecule-ai/canvas:latest`) in the background.

          Once it completes (~3–5 min), apply on the host machine with:
          ```bash
          cd <runner-workspace>
          git pull origin main
          docker compose pull canvas && docker compose up -d canvas
          ```

          If you need to rebuild from local source instead (e.g. testing unreleased
          changes or a new `NEXT_PUBLIC_*` URL), use:
          ```bash
          docker compose build canvas && docker compose up -d canvas
          ```
          BODY
          printf '\n> Posted automatically by CI · commit `%s` · [build log](%s)\n' \
            "$COMMIT_SHA" "$RUN_URL" >> /tmp/deploy-reminder.md

          gh api \
            --method POST \
            "repos/${{ github.repository }}/commits/${{ github.sha }}/comments" \
            --field "body=@/tmp/deploy-reminder.md"

  # Python Lint & Test — required check, always runs. See platform-build
  # for the rationale.
  python-lint:
    name: Python Lint & Test
    needs: changes
    runs-on: ubuntu-latest
    env:
      WORKSPACE_ID: test
    defaults:
      run:
        working-directory: workspace
    steps:
      - if: needs.changes.outputs.python != 'true'
        working-directory: .
        run: echo "No workspace/** changes — skipping real lint+test; this job always runs to satisfy the required-check name on branch protection."
      - if: needs.changes.outputs.python == 'true'
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - if: needs.changes.outputs.python == 'true'
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: '3.11'
          cache: pip
          cache-dependency-path: workspace/requirements.txt
      - if: needs.changes.outputs.python == 'true'
        run: pip install -r requirements.txt pytest pytest-asyncio pytest-cov
      # Coverage flags + fail-under floor moved into workspace/pytest.ini
      # (issue #1817) so local `pytest` and CI use identical config.
      - if: needs.changes.outputs.python == 'true'
        run: python -m pytest --tb=short

      - if: needs.changes.outputs.python == 'true'
        name: Per-file critical-path coverage (MCP / inbox / auth)
        # MCP-critical Python files have a per-file floor on top of the
        # 86% total floor in pytest.ini. Rationale (issue #2790, after
        # the PR #2766 → PR #2771 cycle): the total floor averages ~6000
        # lines, so a single MCP file could regress to ~50% with no
        # complaint as long as other modules compensate. These five
        # files handle multi-tenant routing + auth + inbox dispatch —
        # a coverage drop here is the same risk shape as a Go-side
        # workspace-server token/secrets file dropping below 10%.
        #
        # Floor 75% sits below current actuals (80-96%) so this gate is
        # strictly additive — no existing PR fails. Ratchet plan in
        # COVERAGE_FLOOR.md.
        run: |
          set -e
          PER_FILE_FLOOR=75
          CRITICAL_FILES=(
            "a2a_mcp_server.py"
            "mcp_cli.py"
            "a2a_tools.py"
            "a2a_tools_inbox.py"
            "inbox.py"
            "platform_auth.py"
          )

          # pytest already wrote .coverage; emit a JSON view scoped to
          # the critical files so jq/python can read the per-file pct
          # without parsing tabular text. --include uses fnmatch, and
          # the leading "*" allows the file to live anywhere under the
          # workspace root (today they sit at workspace/<name>.py).
          INCLUDES=$(printf '*%s,' "${CRITICAL_FILES[@]}")
          INCLUDES="${INCLUDES%,}"
          python -m coverage json -o /tmp/critical-cov.json --include="$INCLUDES"

          FAILED=0
          for f in "${CRITICAL_FILES[@]}"; do
            # Match by top-level path key (e.g. "a2a_tools.py", not
            # "builtin_tools/a2a_tools.py" — different file at 100%).
            # The keys in coverage.json are paths relative to the run
            # cwd (workspace/), so the critical-path entry sits at the
            # bare basename.
            pct=$(jq -r --arg f "$f" '.files | to_entries | map(select(.key == $f)) | .[0].value.summary.percent_covered // "MISSING"' /tmp/critical-cov.json)
            if [ "$pct" = "MISSING" ]; then
              echo "::error file=workspace/$f::No coverage data — file may have moved or test exclusion mis-set."
              FAILED=$((FAILED+1))
              continue
            fi
            echo "$f: ${pct}%"
            if awk "BEGIN{exit !($pct < $PER_FILE_FLOOR)}"; then
              echo "::error file=workspace/$f::${pct}% < ${PER_FILE_FLOOR}% per-file floor (MCP critical path). See COVERAGE_FLOOR.md."
              FAILED=$((FAILED+1))
            fi
          done

          if [ "$FAILED" -gt 0 ]; then
            echo ""
            echo "$FAILED MCP critical-path file(s) below the ${PER_FILE_FLOOR}% per-file floor."
            echo "These paths handle multi-tenant routing, auth tokens, and inbox dispatch."
            echo "A coverage drop here is the same risk shape as Go-side tokens/secrets files"
            echo "dropping below 10% (see COVERAGE_FLOOR.md). Either:"
            echo "  (a) add tests to raise coverage back above ${PER_FILE_FLOOR}%, or"
            echo "  (b) if this is unavoidable historical debt, file an issue and propose"
            echo "      adjusting the floor with rationale in COVERAGE_FLOOR.md."
            exit 1
          fi

      # SDK + plugin validation moved to standalone repo:
      # github.com/Molecule-AI/molecule-sdk-python