From 57b74ab31ead0226fd7e262793d52ca018df0859 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sun, 24 May 2026 02:49:09 +0000 Subject: [PATCH] =?UTF-8?q?gate-check-v3:=20add=20Signal=204=20=E2=80=94?= =?UTF-8?q?=20branch=20divergence=20/=20scope-creep=20guard=20(mc#365)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a heuristic that detects stale PR branches where the base SHA has drifted behind target HEAD. Distinguishes files that are "inherited" from base divergence (already on target via prior commits) from genuinely new PR work, preventing misattribution of scope creep when branches are stale. Implementation: - New signal_4_branch_divergence() compares PR.base.sha to current target-branch HEAD via the Gitea API. - If diverged, paginates /commits to count commits behind and collect filenames changed on target since base. - Cross-references with /pulls/{n}/files to compute inherited vs new-work fractions. - Emits WARNING when >50% inherited or >5 commits behind with overlap. - Advisory only — never blocks merge (WARNING is not in blockers list). Updates: - VERDICT_ORDER expanded with WARNING between N/A and CLEAR. - format_comment renders divergence stats + inherited file list. - Workflow YAML comment block updated to list signal 4. - 4 new unit tests cover: no-divergence, inherited-files WARNING, no-overlap CLEAR, and API-error N/A fallback. Co-Authored-By: Claude Opus 4.7 --- .gitea/workflows/gate-check-v3.yml | 3 +- tools/gate-check-v3/gate_check.py | 152 ++++++++++++++++++++++++- tools/gate-check-v3/test_gate_check.py | 144 +++++++++++++++++++++++ 3 files changed, 296 insertions(+), 3 deletions(-) diff --git a/.gitea/workflows/gate-check-v3.yml b/.gitea/workflows/gate-check-v3.yml index 0ee216a43..e8d603ecd 100644 --- a/.gitea/workflows/gate-check-v3.yml +++ b/.gitea/workflows/gate-check-v3.yml @@ -7,10 +7,11 @@ # PR_NUMBER — set via ${{ github.event.pull_request.number }} from the trigger # POST_COMMENT — "true" to post/update comment on PR # -# Gating logic (MVP signals 1,2,3,6): +# Gating logic (MVP signals 1,2,3,4,6): # 1. Author-aware agent-tag comment scan # 2. REQUEST_CHANGES reviews state machine # 3. Staleness detection (SOP-12: review.commit_id != PR.head_sha + >1 working day) +# 4. Branch divergence / scope-creep guard (base-sha vs target HEAD; mc#365) # 6. CI required-checks awareness # # Exit code: 0=CLEAR, 1=BLOCKED, 2=ERROR diff --git a/tools/gate-check-v3/gate_check.py b/tools/gate-check-v3/gate_check.py index 729cf277c..d5f79f8cd 100644 --- a/tools/gate-check-v3/gate_check.py +++ b/tools/gate-check-v3/gate_check.py @@ -6,10 +6,11 @@ Emits structured verdict + human-readable summary. Designed to run as: 1. CLI: python gate_check.py --repo org/repo --pr N 2. Gitea Actions step: runs this script, captures stdout JSON -Signals (MVP — signals 1,2,3,6): +Signals (MVP — signals 1,2,3,4,6): 1. Author-aware agent-tag comment scan 2. REQUEST_CHANGES reviews state machine 3. Staleness detection (review.commit_id != PR.head_sha) + 4. Branch divergence / scope-creep guard (base-sha vs target HEAD) 6. CI required-checks awareness Exit codes: @@ -330,6 +331,132 @@ def signal_3_staleness(pr_number: int, repo: str) -> dict: } +# ── Signal 4: Branch divergence / scope-creep guard ───────────────────────── +# Detects stale PR branches where the base SHA has drifted behind target HEAD. +# Distinguishes files that are "inherited" from base divergence (already on +# target via prior commits) from genuinely new PR work. Prevents misattribution +# of scope creep when branches are stale (molecule-core#365). + + +def _commits_and_files_behind( + owner: str, name: str, base_sha: str, target_branch: str +) -> tuple[int | None, set[str]]: + """Paginate target-branch commits from HEAD back to base_sha. + Return (commits_behind_count, set of filenames changed in those commits). + Safety-capped at 20 pages (~1000 commits) to avoid runaway pagination. + """ + commits_behind = 0 + target_files: set[str] = set() + page = 1 + max_pages = 20 + per_page = 50 + + while page <= max_pages: + try: + commits = api_get( + f"/repos/{owner}/{name}/commits?sha={target_branch}&page={page}&limit={per_page}" + ) + except GiteaError: + return (None, target_files) + + if not isinstance(commits, list): + return (None, target_files) + + for c in commits: + if c.get("sha") == base_sha: + return (commits_behind, target_files) + commits_behind += 1 + for f in c.get("files", []): + fname = f.get("filename") or f.get("name", "") + if fname: + target_files.add(fname) + + if len(commits) < per_page: + break + page += 1 + + return (commits_behind if commits_behind > 0 else None, target_files) + + +def signal_4_branch_divergence( + pr_number: int, repo: str, pr_data: dict | None = None +) -> dict: + """ + Compare PR.base.sha to current target-branch HEAD. + If diverged, show "inherited from base divergence" vs "actual new work" + file fractions using the commits API. + Returns: {signal, verdict, diverged, commits_behind, inherited_fraction, ...} + """ + owner, name = repo.split("/", 1) + + if pr_data is None: + pr_data = api_get(f"/repos/{owner}/{name}/pulls/{pr_number}") + + base_sha = pr_data["base"]["sha"] + target_branch = pr_data["base"]["ref"] + + try: + branch_info = api_get(f"/repos/{owner}/{name}/branches/{target_branch}") + target_head = branch_info["commit"]["id"] + except GiteaError as e: + return {"signal": "branch_divergence", "verdict": "N/A", "error": str(e)} + + if base_sha == target_head: + return { + "signal": "branch_divergence", + "verdict": "CLEAR", + "diverged": False, + "commits_behind": 0, + "pr_files_count": 0, + "inherited_files": [], + "new_work_files": [], + "inherited_fraction": 0.0, + } + + # Branch is diverged — count commits behind and collect files changed on + # target since the PR's base snapshot. + commits_behind, target_files = _commits_and_files_behind( + owner, name, base_sha, target_branch + ) + + # Get PR files + try: + pr_files_data = api_list(f"/repos/{owner}/{name}/pulls/{pr_number}/files") + pr_files = { + f.get("filename") or f.get("name", "") for f in pr_files_data + } + pr_files.discard("") + except GiteaError: + pr_files = set() + + inherited_files = sorted(pr_files & target_files) + new_work_files = sorted(pr_files - target_files) + total = len(pr_files) + inherited_fraction = len(inherited_files) / total if total else 0.0 + + # Verdict: WARNING if significant divergence. + # Thresholds: >50 % inherited files, or >5 commits behind with any inherited files. + if inherited_fraction > 0.5 or ( + commits_behind and commits_behind > 5 and inherited_files + ): + verdict = "WARNING" + else: + verdict = "CLEAR" + + return { + "signal": "branch_divergence", + "verdict": verdict, + "diverged": True, + "base_sha": base_sha, + "target_head": target_head, + "commits_behind": commits_behind, + "pr_files_count": total, + "inherited_files": inherited_files, + "new_work_files": new_work_files, + "inherited_fraction": round(inherited_fraction, 2), + } + + # ── Signal 6: CI required-checks awareness ─────────────────────────────────── def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: dict | None = None) -> dict: @@ -410,7 +537,7 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d # ── Gate evaluation ─────────────────────────────────────────────────────────── -VERDICT_ORDER = {"ERROR": 0, "CI_FAIL": 1, "BLOCKED": 2, "STALE-RC": 3, "CI_PENDING": 4, "N/A": 5, "CLEAR": 6} +VERDICT_ORDER = {"ERROR": 0, "CI_FAIL": 1, "BLOCKED": 2, "STALE-RC": 3, "CI_PENDING": 4, "N/A": 5, "WARNING": 6, "CLEAR": 7} def compute_verdict(gates: list[dict]) -> tuple[str, list[dict]]: @@ -441,6 +568,7 @@ def format_comment(repo: str, pr_number: int, verdict: str, gates: list[dict], b "agent_tag_comments": "Agent-tag gates", "request_changes_reviews": "REQUEST_CHANGES reviews", "stale_reviews": "Staleness check", + "branch_divergence": "Branch divergence / scope-creep guard", "ci_checks": "CI required checks", } @@ -476,6 +604,25 @@ def format_comment(repo: str, pr_number: int, verdict: str, gates: list[dict], b lines.append( f" - @{r['user']} stale (commit={r.get('review_commit','?')[:7]}, age={r.get('age_hours','?')}h)" ) + elif sig == "branch_divergence": + if b.get("diverged"): + lines.append( + f" - Branch is {b.get('commits_behind', '?')} commits behind target " + f"({b.get('target_head', '?')[:7]})" + ) + frac = b.get("inherited_fraction", 0) + lines.append( + f" - {frac * 100:.0f}% of PR files inherited from base divergence " + f"({len(b.get('inherited_files', []))}/{b.get('pr_files_count', 0)} files)" + ) + for f in b.get("inherited_files", [])[:5]: + lines.append(f" - inherited: `{f}`") + if len(b.get("inherited_files", [])) > 5: + lines.append( + f" - ... and {len(b.get('inherited_files', [])) - 5} more" + ) + else: + lines.append(" - Branch is up to date with target") elif sig == "agent_tag_comments": for agent, res in b.get("results", {}).items(): v = res.get("verdict", "MISSING") @@ -518,6 +665,7 @@ def run(repo: str, pr_number: int, post_comment: bool = False) -> dict: signal_1_comment_scan(pr_number, repo), signal_2_reviews(pr_number, repo), signal_3_staleness(pr_number, repo), + signal_4_branch_divergence(pr_number, repo, pr_data=pr), signal_6_ci(pr_number, repo, branch=base_ref, pr_data=pr), ] verdict, blockers = compute_verdict(gates) diff --git a/tools/gate-check-v3/test_gate_check.py b/tools/gate-check-v3/test_gate_check.py index 190cfa92b..74e9e3be7 100644 --- a/tools/gate-check-v3/test_gate_check.py +++ b/tools/gate-check-v3/test_gate_check.py @@ -74,3 +74,147 @@ def test_signal_1_infra_sre_login_alias_resolved_to_core_devops(monkeypatch): engineers = result["results"]["core-devops"] assert engineers["verdict"] == "APPROVED" assert engineers["group"] == "engineers" + + +# ── Signal 4: Branch divergence / scope-creep guard ───────────────────────── + + +def test_signal_4_no_divergence_returns_clear(monkeypatch): + """When PR.base.sha equals target branch HEAD, divergence is zero.""" + mod = load_gate_check() + + shared_sha = "abc123" + + def fake_api_get(path): + if path == "/repos/molecule-ai/molecule-core/pulls/100": + return { + "base": {"sha": shared_sha, "ref": "main"}, + "head": {"sha": "def456"}, + } + if path == "/repos/molecule-ai/molecule-core/branches/main": + return {"commit": {"id": shared_sha}} + raise AssertionError(f"unexpected api_get: {path}") + + monkeypatch.setattr(mod, "api_get", fake_api_get) + + result = mod.signal_4_branch_divergence(100, "molecule-ai/molecule-core") + + assert result["verdict"] == "CLEAR" + assert result["diverged"] is False + assert result["commits_behind"] == 0 + assert result["inherited_fraction"] == 0.0 + + +def test_signal_4_divergence_with_inherited_files_warning(monkeypatch): + """Stale branch with overlapping files triggers WARNING and correct fractions.""" + mod = load_gate_check() + + base_sha = "base000" + target_head = "head111" + + def fake_api_get(path): + if path == "/repos/molecule-ai/molecule-core/pulls/101": + return { + "base": {"sha": base_sha, "ref": "main"}, + "head": {"sha": "pr222"}, + } + if path == "/repos/molecule-ai/molecule-core/branches/main": + return {"commit": {"id": target_head}} + if path == "/repos/molecule-ai/molecule-core/commits?sha=main&page=1&limit=50": + return [ + { + "sha": target_head, + "files": [ + {"filename": "ci.yml"}, + {"filename": "README.md"}, + ], + }, + {"sha": base_sha, "files": []}, + ] + raise AssertionError(f"unexpected api_get: {path}") + + def fake_api_list(path): + if path == "/repos/molecule-ai/molecule-core/pulls/101/files": + return [ + {"filename": "ci.yml"}, + {"filename": "README.md"}, + {"filename": "new_feature.go"}, + ] + raise AssertionError(f"unexpected api_list: {path}") + + monkeypatch.setattr(mod, "api_get", fake_api_get) + monkeypatch.setattr(mod, "api_list", fake_api_list) + + result = mod.signal_4_branch_divergence(101, "molecule-ai/molecule-core") + + assert result["verdict"] == "WARNING" + assert result["diverged"] is True + assert result["commits_behind"] == 1 + assert result["pr_files_count"] == 3 + assert result["inherited_files"] == ["README.md", "ci.yml"] + assert result["new_work_files"] == ["new_feature.go"] + assert result["inherited_fraction"] == round(2 / 3, 2) + + +def test_signal_4_divergence_no_inherited_files_clear(monkeypatch): + """Stale branch but zero file overlap → still CLEAR (no scope-creep risk).""" + mod = load_gate_check() + + base_sha = "base000" + target_head = "head111" + + def fake_api_get(path): + if path == "/repos/molecule-ai/molecule-core/pulls/102": + return { + "base": {"sha": base_sha, "ref": "main"}, + "head": {"sha": "pr222"}, + } + if path == "/repos/molecule-ai/molecule-core/branches/main": + return {"commit": {"id": target_head}} + if path == "/repos/molecule-ai/molecule-core/commits?sha=main&page=1&limit=50": + return [ + { + "sha": target_head, + "files": [{"filename": "other.go"}], + }, + {"sha": base_sha, "files": []}, + ] + raise AssertionError(f"unexpected api_get: {path}") + + def fake_api_list(path): + if path == "/repos/molecule-ai/molecule-core/pulls/102/files": + return [{"filename": "new_feature.go"}] + raise AssertionError(f"unexpected api_list: {path}") + + monkeypatch.setattr(mod, "api_get", fake_api_get) + monkeypatch.setattr(mod, "api_list", fake_api_list) + + result = mod.signal_4_branch_divergence(102, "molecule-ai/molecule-core") + + assert result["verdict"] == "CLEAR" + assert result["diverged"] is True + assert result["inherited_files"] == [] + assert result["new_work_files"] == ["new_feature.go"] + assert result["inherited_fraction"] == 0.0 + + +def test_signal_4_branch_api_error_returns_na(monkeypatch): + """If the branch endpoint 404s, signal degrades to N/A rather than crashing.""" + mod = load_gate_check() + + def fake_api_get(path): + if path == "/repos/molecule-ai/molecule-core/pulls/103": + return { + "base": {"sha": "base000", "ref": "main"}, + "head": {"sha": "pr222"}, + } + if path == "/repos/molecule-ai/molecule-core/branches/main": + raise mod.GiteaError("GET .../branches/main → 404: not found") + raise AssertionError(f"unexpected api_get: {path}") + + monkeypatch.setattr(mod, "api_get", fake_api_get) + + result = mod.signal_4_branch_divergence(103, "molecule-ai/molecule-core") + + assert result["verdict"] == "N/A" + assert "error" in result -- 2.52.0