diff --git a/.coverage-allowlist.txt b/.coverage-allowlist.txt new file mode 100644 index 00000000..f5f85412 --- /dev/null +++ b/.coverage-allowlist.txt @@ -0,0 +1,41 @@ +# Coverage allowlist — security-critical files that are currently below +# the 10% per-file floor and are being tracked for remediation. +# +# Format: one path per line, relative to workspace-server/. +# Lines starting with # and blank lines are ignored. +# +# Process: +# - A path in this list is WARNED on each CI run, not failed. +# - Each entry must reference a tracking issue and expiry date. +# - On expiry, either the coverage is fixed OR the path graduates to +# hard-fail (revert the allowlist entry). +# +# See #1823 for the gate design and ratchet plan. + +# ============== Active exceptions ============== + +# Filed 2026-04-23 — expiry 2026-05-23 (30 days). Tracking: #1823. +# These are the files flagged by the first run of the critical-path gate. +# QA team + platform team share ownership of test coverage remediation. + +internal/handlers/a2a_proxy.go +internal/handlers/a2a_proxy_helpers.go +internal/handlers/registry.go +internal/handlers/secrets.go +internal/handlers/tokens.go +internal/handlers/workspace_provision.go +internal/middleware/wsauth_middleware.go + +# The following paths matched via looser CRITICAL_PATH substrings +# (e.g. "registry" matched both internal/registry/ and internal/channels/registry.go). +# Adding them here so the gate can land without blocking staging merges; +# a follow-up PR will tighten CRITICAL_PATHS to exact prefixes so these +# graduate to hard-fail precisely where security-critical. + +internal/channels/registry.go +internal/crypto/aes.go +internal/registry/access.go +internal/registry/healthsweep.go +internal/registry/hibernation.go +internal/registry/provisiontimeout.go +internal/wsauth/tokens.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 12f3be2f..efa043f8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -81,15 +81,98 @@ jobs: continue-on-error: true # Warn but don't block until codebase is clean - name: Run tests with race detection and coverage run: go test -race -coverprofile=coverage.out ./... - - name: Check coverage baseline + + - name: Per-file coverage report + # Advisory — lists every source file with its coverage so reviewers + # can see at-a-glance where gaps are. Sorted ascending so the worst + # offenders float to the top. Does NOT fail the build; the hard + # gate is the threshold check below. (#1823) run: | - COVERAGE=$(go tool cover -func=coverage.out | grep total | awk '{print $3}' | sed 's/%//') - echo "Total coverage: ${COVERAGE}%" - THRESHOLD=25 - awk "BEGIN{if ($COVERAGE < $THRESHOLD) exit 1}" || { - echo "::error::Coverage ${COVERAGE}% is below the ${THRESHOLD}% threshold" + echo "=== Per-file coverage (worst first) ===" + go tool cover -func=coverage.out \ + | grep -v '^total:' \ + | awk '{file=$1; sub(/:[0-9][0-9.]*:.*/, "", file); pct=$NF; gsub(/%/,"",pct); s[file]+=pct; c[file]++} + END {for (f in s) printf "%6.1f%% %s\n", s[f]/c[f], f}' \ + | sort -n + + - name: Check coverage thresholds + # Enforces two gates from #1823 Layer 1: + # 1. Total floor (25% — ratchet plan in COVERAGE_FLOOR.md). + # 2. Per-file floor — non-test .go files in security-critical + # paths with coverage <10% fail the build, UNLESS the file + # path is listed in .coverage-allowlist.txt (acknowledged + # historical debt with a tracking issue + expiry). + run: | + set -e + TOTAL_FLOOR=25 + # Security-critical paths where a 0%-coverage file is a real risk. + CRITICAL_PATHS=( + "internal/handlers/tokens" + "internal/handlers/workspace_provision" + "internal/handlers/a2a_proxy" + "internal/handlers/registry" + "internal/handlers/secrets" + "internal/middleware/wsauth" + "internal/crypto" + ) + + TOTAL=$(go tool cover -func=coverage.out | grep '^total:' | awk '{print $3}' | sed 's/%//') + echo "Total coverage: ${TOTAL}%" + if awk "BEGIN{exit !($TOTAL < $TOTAL_FLOOR)}"; then + echo "::error::Total coverage ${TOTAL}% is below the ${TOTAL_FLOOR}% floor. See COVERAGE_FLOOR.md for ratchet plan." exit 1 - } + fi + + # Aggregate per-file coverage → /tmp/perfile.txt: " " + go tool cover -func=coverage.out \ + | grep -v '^total:' \ + | awk '{file=$1; sub(/:[0-9][0-9.]*:.*/, "", file); pct=$NF; gsub(/%/,"",pct); s[file]+=pct; c[file]++} + END {for (f in s) printf "%s %.1f\n", f, s[f]/c[f]}' \ + > /tmp/perfile.txt + + # Build allowlist — paths relative to workspace-server, one per line. + # Lines starting with # are comments. + ALLOWLIST="" + if [ -f ../.coverage-allowlist.txt ]; then + ALLOWLIST=$(grep -vE '^(#|[[:space:]]*$)' ../.coverage-allowlist.txt || true) + fi + + FAILED=0 + WARNED=0 + for path in "${CRITICAL_PATHS[@]}"; do + while read -r file pct; do + [[ "$file" == *_test.go ]] && continue + [[ "$file" == *"$path"* ]] || continue + awk "BEGIN{exit !($pct < 10)}" || continue + + # Strip the package-import prefix so we can match .coverage-allowlist.txt + # entries written as paths relative to workspace-server/. + rel=$(echo "$file" | sed 's|^github.com/Molecule-AI/molecule-monorepo/platform/||') + + if echo "$ALLOWLIST" | grep -qxF "$rel"; then + echo "::warning file=workspace-server/$rel::Critical file at ${pct}% coverage (allowlisted, #1823) — fix before expiry." + WARNED=$((WARNED+1)) + else + echo "::error file=workspace-server/$rel::Critical file at ${pct}% coverage — must be >=10% (target 80%). See #1823. To acknowledge as known debt, add this path to .coverage-allowlist.txt." + FAILED=$((FAILED+1)) + fi + done < /tmp/perfile.txt + done + + echo "" + echo "Critical-path check: $FAILED new failures, $WARNED allowlisted warnings." + + if [ "$FAILED" -gt 0 ]; then + echo "" + echo "$FAILED security-critical file(s) have <10% test coverage and are" + echo "NOT in the allowlist. These paths handle auth, tokens, secrets, or" + echo "workspace provisioning — a 0% file here is the exact gap that let" + echo "CWE-22, CWE-78, KI-005 slip through in past incidents. Either:" + echo " (a) add tests to raise coverage above 10%, or" + echo " (b) add the path to .coverage-allowlist.txt with an expiry date" + echo " and a tracking issue reference." + exit 1 + fi canvas-build: name: Canvas (Next.js) diff --git a/COVERAGE_FLOOR.md b/COVERAGE_FLOOR.md new file mode 100644 index 00000000..2870a649 --- /dev/null +++ b/COVERAGE_FLOOR.md @@ -0,0 +1,78 @@ +# Coverage Floor + +CI enforces three coverage gates on `workspace-server` (Go). All defined in +`.github/workflows/ci.yml` → `platform-build` job. + +## Current floors (2026-04-23) + +| Gate | Threshold | What fails | +|---|---|---| +| **Total floor** | `25%` | `go tool cover -func` reports total below floor | +| **Critical-path per-file floor** | `10%` | Any non-test source file in a security-critical path with coverage ≤10% | +| **Per-file report** | advisory | Printed in CI log, sorted worst-first, does not fail | + +Total floor starts at 25% (unchanged from pre-#1823 to keep this PR strictly +additive). The new protection is the critical-path per-file floor, which +directly closes the gap that prompted the issue. Ratchet plan below begins +the month after to let the team first observe the gate in action. + +## Security-critical paths (Gate 2) + +Changes to these paths have historically introduced security issues (CWE-22, +CWE-78, KI-005, SSRF) or billing/auth risk. Coverage must not drop to zero. + +- `internal/handlers/tokens*` +- `internal/handlers/workspace_provision*` +- `internal/handlers/a2a_proxy*` +- `internal/handlers/registry*` +- `internal/handlers/secrets*` +- `internal/middleware/wsauth*` +- `internal/crypto*` + +## Ratchet plan + +Floor ratchets upward on a fixed cadence. Any ratchet is a PR — reviewable, +reversible, and creates history. The table below is the intended schedule. + +| Date | Total floor | Critical-path floor | Notes | +|---|---|---|---| +| 2026-04-23 | 25% | 10% | Initial gate (this file). | +| 2026-05-23 | 30% | 20% | First ratchet | +| 2026-06-23 | 40% | 30% | | +| 2026-07-23 | 50% | 40% | | +| 2026-08-23 | 55% | 50% | | +| 2026-09-23 | 60% | 60% | | +| 2026-10-23 | 70% | 70% | Target steady-state | + +The target end-state matches the per-role QA prompts which specify +"coverage >80% on changed files". CI enforces the floor; reviewers still +enforce the per-PR bar. + +## Exceptions + +If a critical-path file genuinely cannot have coverage above the floor (e.g. +thin wrapper around a third-party SDK with no branches to test), add an entry +here with: + +1. **File**: `internal/handlers/example.go` +2. **Reason**: Why coverage can't hit the floor +3. **Tracking issue**: GitHub issue for the real fix +4. **Expiry**: 14 days from entry date; after expiry either coverage is fixed + or the issue is closed as "accepted technical debt" + +### Active exceptions + +*(none — add here if you need to land code that legitimately can't clear the floor)* + +## Why this gate exists + +Issue #1823: an external audit found critical files at 0% coverage despite +test files existing with hundreds of lines. The existing CI step measured +coverage but didn't enforce a meaningful threshold. Any file could go from +80% → 0% and CI stayed green, because the single gate (total ≥25%) ignored +per-file distribution. + +This gate makes "no untested critical paths merged" a mechanical property of +the CI, not a behavioural property of QA agents or individual reviewers — +which is the only way to make it survive fleet outages, agent rotations, or +QA process changes.