diff --git a/tools/gate-check-v3/gate_check.py b/tools/gate-check-v3/gate_check.py index 417b2c25..eb88b82b 100644 --- a/tools/gate-check-v3/gate_check.py +++ b/tools/gate-check-v3/gate_check.py @@ -444,6 +444,196 @@ GOVERNANCE_REQUIRED_CONTEXTS = [ ] +# ── Signal 7: destructive-diff guard (core#2875) ──────────────────────────── + +# Detects a stale-or-destructive PR: a branch that has either +# (a) inherited massive destructive changes from its base (the +# pre-stale state of the base has since been heavily edited on the +# target branch, so the PR is now reverting large swaths of current +# main), OR +# (b) grown the destructive diff itself (files_changed >= 200 OR +# net_deleted_lines >= 5000). +# +# Real example (the RCA that motivated this guard): PR #1100 was an +# all-required-green stale branch whose current head reverted 481 +# files / -55k lines of current main. The all-required status +# check was green but the diff was destructive. This signal blocks +# that case at the gate level so it can never silently merge. +# +# Two-tier heuristic per Researcher's #2875 proposal: +# - BLOCK: high-confidence destructive condition (the destructive +# diff + the branch-diverged / stale state) AND not explicitly +# opted out via a refactor / migration / generated label. +# - WARN: moderate drift (commits_behind > 10, or any of the +# high-confidence thresholds halved) — operator should review. +# - CLEAR: no destructive pattern detected. +# +# All PR-file count + line-count comes from the Gitea PR-files API +# (paginated; sum of `additions` + `deletions`). Refactor/migration +# exemptions are read from PR labels (no PR-body parsing). +REFACTOR_EXEMPT_LABELS = {"refactor", "migration", "generated", "vendor"} + + +def _pr_diff_stats(pr_number: int, repo: str) -> dict: + """Fetch PR file changes + sum to (files_changed, added_lines, deleted_lines). + + Returns an empty dict on API error — the caller treats missing data as + 'cannot compute' (which falls into the WARN tier, not BLOCK, to avoid + false-positive blocks on transient API failures). + """ + try: + files = api_list(f"/repos/{repo}/pulls/{pr_number}/files", per_page=200) + except GiteaError as e: + return {"error": str(e)} + + files_changed = 0 + added = 0 + deleted = 0 + for f in files: + # The Gitea PR-files API uses `additions` and `deletions` per file. + # Some older versions use `additions`/`deletions`; normalize both. + files_changed += 1 + added += int(f.get("additions", 0) or 0) + deleted += int(f.get("deletions", 0) or 0) + return { + "files_changed": files_changed, + "added_lines": added, + "deleted_lines": deleted, + "net_deleted_lines": max(0, deleted - added), + } + + +def _pr_has_refactor_exemption(pr_data: dict) -> bool: + """True iff the PR has a label in REFACTOR_EXEMPT_LABELS (e.g. 'refactor', + 'migration', 'generated', 'vendor') that opts it out of the destructive + BLOCK. The exemption is LABEL-only (not PR-body-marker) because labels + are the canonical signal already understood by the rest of the gate + stack. Refactor-exempt PRs still get the WARN tier (not CLEAR) so + operators can see the destructive diff size — they just don't get + a BLOCK. + """ + for label in pr_data.get("labels", []) or []: + name = (label.get("name") or "").lower() + if name in REFACTOR_EXEMPT_LABELS: + return True + return False + + +def signal_7_destructive_diff_guard( + pr_number: int, repo: str, pr_data: dict | None = None +) -> dict: + """core#2875 — BLOCK a PR when its destructive diff + branch-diverged + state match a high-confidence destructive pattern (a stale branch + that would silently revert large swaths of current main). + + Computes: + - diff stats (files_changed, deleted_lines, net_deleted_lines) + from the PR-files API. + - branch divergence (base.sha vs current target-branch HEAD) and + commits_behind via signal_4's helper. + - refactor exemption via PR labels. + + Verdict: + - BLOCK when (files>=200 OR net_deleted>=5000 OR deleted>=10000) + AND (diverged OR commits_behind>20) + AND no refactor exemption. + - WARN when (files>=50 OR net_deleted>=1000 OR deleted>=2000) + OR commits_behind>10. + (WARN also surfaces on API errors so transient fetches don't + silently pass.) + - CLEAR otherwise. + """ + if pr_data is None: + owner, name = repo.split("/", 1) + pr_data = api_get(f"/repos/{owner}/{name}/pulls/{pr_number}") + + # Branch divergence: reuse the signal_4 helper's data + # (no need to re-derive diverged + commits_behind here). + sig4 = signal_4_branch_divergence(pr_number, repo, pr_data=pr_data) + diverged = bool(sig4.get("diverged", False)) + commits_behind = int(sig4.get("commits_behind", 0) or 0) + + # Diff stats from the PR-files API. + stats = _pr_diff_stats(pr_number, repo) + if "error" in stats: + # Cannot compute — surface as WARN (transient API failure + # shouldn't silently pass a destructive PR, but also shouldn't + # BLOCK on a missing-files transient). + return { + "signal": "destructive_diff_guard", + "verdict": "WARNING", + "reason": "could not fetch PR files API: " + stats["error"], + "diverged": diverged, + "commits_behind": commits_behind, + } + + files_changed = stats["files_changed"] + deleted_lines = stats["deleted_lines"] + net_deleted = stats["net_deleted_lines"] + has_refactor_exemption = _pr_has_refactor_exemption(pr_data) + + # High-confidence destructive condition: + # - any of the destructive diff thresholds + # - AND (branch is diverged OR has been sitting behind >20 commits) + # - AND the PR is not explicitly opted out via a refactor/migration label. + destructive_diff = ( + files_changed >= 200 + or net_deleted >= 5000 + or deleted_lines >= 10000 + ) + is_stale = diverged or commits_behind > 20 + if destructive_diff and is_stale and not has_refactor_exemption: + return { + "signal": "destructive_diff_guard", + "verdict": "BLOCKED", + "diverged": diverged, + "commits_behind": commits_behind, + "files_changed": files_changed, + "added_lines": stats["added_lines"], + "deleted_lines": deleted_lines, + "net_deleted_lines": net_deleted, + "refactor_exemption": False, + "reason": ( + f"destructive diff (files={files_changed}, net_deleted={net_deleted}, " + f"deleted={deleted_lines}) + stale branch " + f"(diverged={diverged}, commits_behind={commits_behind})" + ), + } + + # Moderate-drift WARN: any of the moderate thresholds + # (operator should review but the PR isn't blocked). + moderate = ( + files_changed >= 50 + or net_deleted >= 1000 + or deleted_lines >= 2000 + or commits_behind > 10 + ) + if moderate: + return { + "signal": "destructive_diff_guard", + "verdict": "WARNING", + "diverged": diverged, + "commits_behind": commits_behind, + "files_changed": files_changed, + "added_lines": stats["added_lines"], + "deleted_lines": deleted_lines, + "net_deleted_lines": net_deleted, + "refactor_exemption": has_refactor_exemption, + } + + return { + "signal": "destructive_diff_guard", + "verdict": "CLEAR", + "diverged": diverged, + "commits_behind": commits_behind, + "files_changed": files_changed, + "added_lines": stats["added_lines"], + "deleted_lines": deleted_lines, + "net_deleted_lines": net_deleted, + "refactor_exemption": has_refactor_exemption, + } + + def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: dict | None = None) -> dict: """ Query combined CI status for PR head commit. @@ -667,6 +857,7 @@ def run(repo: str, pr_number: int, post_comment: bool = False) -> dict: signal_3_staleness(pr_number, repo), signal_4_branch_divergence(pr_number, repo, pr_data=pr), signal_6_ci(pr_number, repo, branch=base_ref, pr_data=pr), + signal_7_destructive_diff_guard(pr_number, repo, pr_data=pr), ] verdict, blockers = compute_verdict(gates) diff --git a/tools/gate-check-v3/test_gate_check.py b/tools/gate-check-v3/test_gate_check.py index ef2516bd..9b2f1654 100644 --- a/tools/gate-check-v3/test_gate_check.py +++ b/tools/gate-check-v3/test_gate_check.py @@ -566,3 +566,264 @@ def test_signal_6_status_collapse_uses_max_id(monkeypatch): assert result["verdict"] == "CI_FAIL" assert TRUSTED_QA in result["failing_required"] assert result["all_check_statuses"][TRUSTED_QA] == "failure" + + + +# ── core#2875 — signal_7 destructive-diff guard tests ─────────────────────── + + +def test_signal_7_clear_when_diff_and_branch_both_small(monkeypatch): + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "CLEAR", + "diverged": False, "commits_behind": 0, "pr_files_count": 0, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.0, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 5, "added_lines": 10, "deleted_lines": 5, "net_deleted_lines": 0, + }) + result = mod.signal_7_destructive_diff_guard(200, "molecule-ai/molecule-core", pr_data={"labels": []}) + assert result["verdict"] == "CLEAR" + assert result["files_changed"] == 5 + assert result["diverged"] is False + + +def test_signal_7_warn_on_moderate_files_changed(monkeypatch): + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "CLEAR", + "diverged": False, "commits_behind": 0, "pr_files_count": 0, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.0, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 60, "added_lines": 100, "deleted_lines": 50, "net_deleted_lines": 0, + }) + result = mod.signal_7_destructive_diff_guard(200, "molecule-ai/molecule-core", pr_data={"labels": []}) + assert result["verdict"] == "WARNING" + assert result["files_changed"] == 60 + + +def test_signal_7_warn_on_moderate_net_deleted(monkeypatch): + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "CLEAR", + "diverged": False, "commits_behind": 0, "pr_files_count": 0, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.0, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 10, "added_lines": 100, "deleted_lines": 1500, "net_deleted_lines": 1400, + }) + result = mod.signal_7_destructive_diff_guard(200, "molecule-ai/molecule-core", pr_data={"labels": []}) + assert result["verdict"] == "WARNING" + assert result["net_deleted_lines"] == 1400 + + +def test_signal_7_warn_on_commits_behind_above_moderate(monkeypatch): + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 15, "pr_files_count": 5, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.0, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 5, "added_lines": 10, "deleted_lines": 5, "net_deleted_lines": 0, + }) + result = mod.signal_7_destructive_diff_guard(200, "molecule-ai/molecule-core", pr_data={"labels": []}) + assert result["verdict"] == "WARNING" + assert result["commits_behind"] == 15 + + +def test_signal_7_block_on_high_confidence_destructive_diff(monkeypatch): + """The core#1100 / #2875 case: 481 files / -55k diff + stale branch.""" + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": ["a.py", "b.py"], "new_work_files": ["c.py"], "inherited_fraction": 0.67, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 481, "added_lines": 1000, "deleted_lines": 55800, "net_deleted_lines": 54800, + }) + result = mod.signal_7_destructive_diff_guard(200, "molecule-ai/molecule-core", pr_data={"labels": []}) + assert result["verdict"] == "BLOCKED" + assert result["files_changed"] == 481 + assert result["net_deleted_lines"] == 54800 + assert result["diverged"] is True + assert "destructive diff" in result["reason"] + + +def test_signal_7_block_when_only_one_threshold_trips_but_branch_diverged(monkeypatch): + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": ["a.py"], "new_work_files": ["c.py"], "inherited_fraction": 0.5, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 250, "added_lines": 200, "deleted_lines": 50, "net_deleted_lines": 0, + }) + result = mod.signal_7_destructive_diff_guard(200, "molecule-ai/molecule-core", pr_data={"labels": []}) + assert result["verdict"] == "BLOCKED" + + +def test_signal_7_no_block_if_diff_destructive_but_branch_not_diverged(monkeypatch): + """Destructive diff alone (no stale branch) does NOT BLOCK — + prevents false-positive on large but intentional PRs.""" + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "CLEAR", + "diverged": False, "commits_behind": 0, "pr_files_count": 0, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.0, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 481, "added_lines": 1000, "deleted_lines": 55800, "net_deleted_lines": 54800, + }) + result = mod.signal_7_destructive_diff_guard(200, "molecule-ai/molecule-core", pr_data={"labels": []}) + assert result["verdict"] == "WARNING" + + +def test_signal_7_refactor_label_exempts_block(monkeypatch): + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.5, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 481, "added_lines": 1000, "deleted_lines": 55800, "net_deleted_lines": 54800, + }) + result = mod.signal_7_destructive_diff_guard( + 200, "molecule-ai/molecule-core", + pr_data={"labels": [{"name": "refactor"}, {"name": "needs_review"}]}, + ) + assert result["verdict"] == "WARNING" + assert result["refactor_exemption"] is True + + +def test_signal_7_migration_label_exempts_block(monkeypatch): + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.5, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 300, "added_lines": 100, "deleted_lines": 8000, "net_deleted_lines": 7900, + }) + result = mod.signal_7_destructive_diff_guard( + 200, "molecule-ai/molecule-core", + pr_data={"labels": [{"name": "migration"}]}, + ) + assert result["verdict"] == "WARNING" + assert result["refactor_exemption"] is True + + +def test_signal_7_generated_label_exempts_block(monkeypatch): + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.5, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 250, "added_lines": 50, "deleted_lines": 100, "net_deleted_lines": 50, + }) + result = mod.signal_7_destructive_diff_guard( + 200, "molecule-ai/molecule-core", + pr_data={"labels": [{"name": "generated"}]}, + ) + assert result["verdict"] == "WARNING" + + +def test_signal_7_vendor_label_exempts_block(monkeypatch): + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.5, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 300, "added_lines": 10, "deleted_lines": 20000, "net_deleted_lines": 19990, + }) + result = mod.signal_7_destructive_diff_guard( + 200, "molecule-ai/molecule-core", + pr_data={"labels": [{"name": "vendor"}]}, + ) + assert result["verdict"] == "WARNING" + + +def test_signal_7_case_insensitive_label_match(monkeypatch): + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.5, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 481, "added_lines": 1000, "deleted_lines": 55800, "net_deleted_lines": 54800, + }) + result = mod.signal_7_destructive_diff_guard( + 200, "molecule-ai/molecule-core", + pr_data={"labels": [{"name": "Refactor"}]}, + ) + assert result["verdict"] == "WARNING" + assert result["refactor_exemption"] is True + + +def test_signal_7_files_api_error_returns_warning(monkeypatch): + """A transient PR-files API error must surface as WARN, not BLOCK + (transient failure shouldn't gate-block a real PR).""" + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "CLEAR", + "diverged": False, "commits_behind": 0, "pr_files_count": 0, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.0, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: {"error": "404 page not found"}) + result = mod.signal_7_destructive_diff_guard(200, "molecule-ai/molecule-core", pr_data={"labels": []}) + assert result["verdict"] == "WARNING" + assert "could not fetch PR files API" in result["reason"] + + +def test_signal_7_net_deleted_uses_max_zero_underflow(monkeypatch): + """A PR that ADDS more than it deletes (a large refactor rewrite) should + have net_deleted_lines=0, NOT a negative number. The block must + use net_deleted_lines (>= 5000), not raw deleted_lines minus added_lines.""" + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.5, + }) + # deleted=100, added=200 → net should clamp to 0 (not -100) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 5, "added_lines": 200, "deleted_lines": 100, "net_deleted_lines": 0, + }) + result = mod.signal_7_destructive_diff_guard(200, "molecule-ai/molecule-core", pr_data={"labels": []}) + # files=5 (small) + net=0 (small) + diverged → moderate? files<50, net<1000, deleted<2000, commits_behind>10 → WARN + assert result["net_deleted_lines"] == 0 + + +def test_signal_7_refactor_exempt_with_still_high_diff_surfaces_numbers(monkeypatch): + """When refactor-exemption applies, the WARN result should still + surface the destructive numbers so an operator can review the size.""" + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.5, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 481, "added_lines": 1000, "deleted_lines": 55800, "net_deleted_lines": 54800, + }) + result = mod.signal_7_destructive_diff_guard( + 200, "molecule-ai/molecule-core", + pr_data={"labels": [{"name": "refactor"}]}, + ) + assert result["verdict"] == "WARNING" + # WARN still surfaces the destructive numbers for human review + assert result["files_changed"] == 481 + assert result["net_deleted_lines"] == 54800 + assert result["deleted_lines"] == 55800 + assert result["diverged"] is True + assert result["refactor_exemption"] is True