gate-check-v3: add Signal 4 — branch divergence / scope-creep guard (mc#365) #1764

Merged
agent-dev-a merged 3 commits from fix-365-scope-divergence-gate-check into main 2026-05-26 09:29:11 +00:00
3 changed files with 299 additions and 3 deletions
+2 -1
View File
@@ -7,10 +7,11 @@
# PR_NUMBER — set via ${{ github.event.pull_request.number }} from the trigger
# POST_COMMENT — "true" to post/update comment on PR
#
# Gating logic (MVP signals 1,2,3,6):
# Gating logic (MVP signals 1,2,3,4,6):
# 1. Author-aware agent-tag comment scan
# 2. REQUEST_CHANGES reviews state machine
# 3. Staleness detection (SOP-12: review.commit_id != PR.head_sha + >1 working day)
# 4. Branch divergence / scope-creep guard (base-sha vs target HEAD; mc#365)
# 6. CI required-checks awareness
#
# Exit code: 0=CLEAR, 1=BLOCKED, 2=ERROR
+150 -2
View File
@@ -6,10 +6,11 @@ Emits structured verdict + human-readable summary. Designed to run as:
1. CLI: python gate_check.py --repo org/repo --pr N
2. Gitea Actions step: runs this script, captures stdout JSON
Signals (MVP — signals 1,2,3,6):
Signals (MVP — signals 1,2,3,4,6):
1. Author-aware agent-tag comment scan
2. REQUEST_CHANGES reviews state machine
3. Staleness detection (review.commit_id != PR.head_sha)
4. Branch divergence / scope-creep guard (base-sha vs target HEAD)
6. CI required-checks awareness
Exit codes:
@@ -335,6 +336,132 @@ def signal_3_staleness(pr_number: int, repo: str) -> dict:
}
# ── Signal 4: Branch divergence / scope-creep guard ─────────────────────────
# Detects stale PR branches where the base SHA has drifted behind target HEAD.
# Distinguishes files that are "inherited" from base divergence (already on
# target via prior commits) from genuinely new PR work. Prevents misattribution
# of scope creep when branches are stale (molecule-core#365).
def _commits_and_files_behind(
owner: str, name: str, base_sha: str, target_branch: str
) -> tuple[int | None, set[str]]:
"""Paginate target-branch commits from HEAD back to base_sha.
Return (commits_behind_count, set of filenames changed in those commits).
Safety-capped at 20 pages (~1000 commits) to avoid runaway pagination.
"""
commits_behind = 0
target_files: set[str] = set()
page = 1
max_pages = 20
per_page = 50
while page <= max_pages:
try:
commits = api_get(
f"/repos/{owner}/{name}/commits?sha={target_branch}&page={page}&limit={per_page}"
)
except GiteaError:
return (None, target_files)
if not isinstance(commits, list):
return (None, target_files)
for c in commits:
if c.get("sha") == base_sha:
return (commits_behind, target_files)
commits_behind += 1
for f in c.get("files", []):
fname = f.get("filename") or f.get("name", "")
if fname:
target_files.add(fname)
if len(commits) < per_page:
break
page += 1
return (commits_behind if commits_behind > 0 else None, target_files)
def signal_4_branch_divergence(
pr_number: int, repo: str, pr_data: dict | None = None
) -> dict:
"""
Compare PR.base.sha to current target-branch HEAD.
If diverged, show "inherited from base divergence" vs "actual new work"
file fractions using the commits API.
Returns: {signal, verdict, diverged, commits_behind, inherited_fraction, ...}
"""
owner, name = repo.split("/", 1)
if pr_data is None:
pr_data = api_get(f"/repos/{owner}/{name}/pulls/{pr_number}")
base_sha = pr_data["base"]["sha"]
target_branch = pr_data["base"]["ref"]
try:
branch_info = api_get(f"/repos/{owner}/{name}/branches/{target_branch}")
target_head = branch_info["commit"]["id"]
except GiteaError as e:
return {"signal": "branch_divergence", "verdict": "N/A", "error": str(e)}
if base_sha == target_head:
return {
"signal": "branch_divergence",
"verdict": "CLEAR",
"diverged": False,
"commits_behind": 0,
"pr_files_count": 0,
"inherited_files": [],
"new_work_files": [],
"inherited_fraction": 0.0,
}
# Branch is diverged — count commits behind and collect files changed on
# target since the PR's base snapshot.
commits_behind, target_files = _commits_and_files_behind(
owner, name, base_sha, target_branch
)
# Get PR files
try:
pr_files_data = api_list(f"/repos/{owner}/{name}/pulls/{pr_number}/files")
pr_files = {
f.get("filename") or f.get("name", "") for f in pr_files_data
}
pr_files.discard("")
except GiteaError:
pr_files = set()
inherited_files = sorted(pr_files & target_files)
new_work_files = sorted(pr_files - target_files)
total = len(pr_files)
inherited_fraction = len(inherited_files) / total if total else 0.0
# Verdict: WARNING if significant divergence.
# Thresholds: >50 % inherited files, or >5 commits behind with any inherited files.
if inherited_fraction > 0.5 or (
commits_behind and commits_behind > 5 and inherited_files
):
verdict = "WARNING"
else:
verdict = "CLEAR"
return {
"signal": "branch_divergence",
"verdict": verdict,
"diverged": True,
"base_sha": base_sha,
"target_head": target_head,
"commits_behind": commits_behind,
"pr_files_count": total,
"inherited_files": inherited_files,
"new_work_files": new_work_files,
"inherited_fraction": round(inherited_fraction, 2),
}
# ── Signal 6: CI required-checks awareness ───────────────────────────────────
def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: dict | None = None) -> dict:
@@ -415,7 +542,7 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
# ── Gate evaluation ───────────────────────────────────────────────────────────
VERDICT_ORDER = {"ERROR": 0, "CI_FAIL": 1, "BLOCKED": 2, "STALE-RC": 3, "CI_PENDING": 4, "N/A": 5, "CLEAR": 6}
VERDICT_ORDER = {"ERROR": 0, "CI_FAIL": 1, "BLOCKED": 2, "STALE-RC": 3, "CI_PENDING": 4, "N/A": 5, "WARNING": 6, "CLEAR": 7}
def compute_verdict(gates: list[dict]) -> tuple[str, list[dict]]:
@@ -446,6 +573,7 @@ def format_comment(repo: str, pr_number: int, verdict: str, gates: list[dict], b
"agent_tag_comments": "Agent-tag gates",
"request_changes_reviews": "REQUEST_CHANGES reviews",
"stale_reviews": "Staleness check",
"branch_divergence": "Branch divergence / scope-creep guard",
"ci_checks": "CI required checks",
}
@@ -481,6 +609,25 @@ def format_comment(repo: str, pr_number: int, verdict: str, gates: list[dict], b
lines.append(
f" - @{r['user']} stale (commit={r.get('review_commit','?')[:7]}, age={r.get('age_hours','?')}h)"
)
elif sig == "branch_divergence":
if b.get("diverged"):
lines.append(
f" - Branch is {b.get('commits_behind', '?')} commits behind target "
f"({b.get('target_head', '?')[:7]})"
)
frac = b.get("inherited_fraction", 0)
lines.append(
f" - {frac * 100:.0f}% of PR files inherited from base divergence "
f"({len(b.get('inherited_files', []))}/{b.get('pr_files_count', 0)} files)"
)
for f in b.get("inherited_files", [])[:5]:
lines.append(f" - inherited: `{f}`")
if len(b.get("inherited_files", [])) > 5:
lines.append(
f" - ... and {len(b.get('inherited_files', [])) - 5} more"
)
else:
lines.append(" - Branch is up to date with target")
elif sig == "agent_tag_comments":
for agent, res in b.get("results", {}).items():
v = res.get("verdict", "MISSING")
@@ -523,6 +670,7 @@ def run(repo: str, pr_number: int, post_comment: bool = False) -> dict:
signal_1_comment_scan(pr_number, repo),
signal_2_reviews(pr_number, repo),
signal_3_staleness(pr_number, repo),
signal_4_branch_divergence(pr_number, repo, pr_data=pr),
signal_6_ci(pr_number, repo, branch=base_ref, pr_data=pr),
]
verdict, blockers = compute_verdict(gates)
+147
View File
@@ -121,6 +121,9 @@ def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):
assert result["results"]["core-devops"]["verdict"] == "APPROVED"
# ── Signal 2: Draft REQUEST_CHANGES guard ───────────────────────────────────
def test_signal_2_draft_request_changes_does_not_block(monkeypatch):
"""official=False REQUEST_CHANGES is a draft/pending review and must NOT
block the gate (matching review-check.sh post-#1818 official-filter)."""
@@ -171,3 +174,147 @@ def test_signal_2_null_user_in_request_changes_does_not_crash(monkeypatch):
result = mod.signal_2_reviews(903, "molecule-ai/molecule-core")
assert result["verdict"] == "CLEAR"
assert result["blocking_reviews"] == []
# ── Signal 4: Branch divergence / scope-creep guard ─────────────────────────
def test_signal_4_no_divergence_returns_clear(monkeypatch):
"""When PR.base.sha equals target branch HEAD, divergence is zero."""
mod = load_gate_check()
shared_sha = "abc123"
def fake_api_get(path):
if path == "/repos/molecule-ai/molecule-core/pulls/100":
return {
"base": {"sha": shared_sha, "ref": "main"},
"head": {"sha": "def456"},
}
if path == "/repos/molecule-ai/molecule-core/branches/main":
return {"commit": {"id": shared_sha}}
raise AssertionError(f"unexpected api_get: {path}")
monkeypatch.setattr(mod, "api_get", fake_api_get)
result = mod.signal_4_branch_divergence(100, "molecule-ai/molecule-core")
assert result["verdict"] == "CLEAR"
assert result["diverged"] is False
assert result["commits_behind"] == 0
assert result["inherited_fraction"] == 0.0
def test_signal_4_divergence_with_inherited_files_warning(monkeypatch):
"""Stale branch with overlapping files triggers WARNING and correct fractions."""
mod = load_gate_check()
base_sha = "base000"
target_head = "head111"
def fake_api_get(path):
if path == "/repos/molecule-ai/molecule-core/pulls/101":
return {
"base": {"sha": base_sha, "ref": "main"},
"head": {"sha": "pr222"},
}
if path == "/repos/molecule-ai/molecule-core/branches/main":
return {"commit": {"id": target_head}}
if path == "/repos/molecule-ai/molecule-core/commits?sha=main&page=1&limit=50":
return [
{
"sha": target_head,
"files": [
{"filename": "ci.yml"},
{"filename": "README.md"},
],
},
{"sha": base_sha, "files": []},
]
raise AssertionError(f"unexpected api_get: {path}")
def fake_api_list(path):
if path == "/repos/molecule-ai/molecule-core/pulls/101/files":
return [
{"filename": "ci.yml"},
{"filename": "README.md"},
{"filename": "new_feature.go"},
]
raise AssertionError(f"unexpected api_list: {path}")
monkeypatch.setattr(mod, "api_get", fake_api_get)
monkeypatch.setattr(mod, "api_list", fake_api_list)
result = mod.signal_4_branch_divergence(101, "molecule-ai/molecule-core")
assert result["verdict"] == "WARNING"
assert result["diverged"] is True
assert result["commits_behind"] == 1
assert result["pr_files_count"] == 3
assert result["inherited_files"] == ["README.md", "ci.yml"]
assert result["new_work_files"] == ["new_feature.go"]
assert result["inherited_fraction"] == round(2 / 3, 2)
def test_signal_4_divergence_no_inherited_files_clear(monkeypatch):
"""Stale branch but zero file overlap → still CLEAR (no scope-creep risk)."""
mod = load_gate_check()
base_sha = "base000"
target_head = "head111"
def fake_api_get(path):
if path == "/repos/molecule-ai/molecule-core/pulls/102":
return {
"base": {"sha": base_sha, "ref": "main"},
"head": {"sha": "pr222"},
}
if path == "/repos/molecule-ai/molecule-core/branches/main":
return {"commit": {"id": target_head}}
if path == "/repos/molecule-ai/molecule-core/commits?sha=main&page=1&limit=50":
return [
{
"sha": target_head,
"files": [{"filename": "other.go"}],
},
{"sha": base_sha, "files": []},
]
raise AssertionError(f"unexpected api_get: {path}")
def fake_api_list(path):
if path == "/repos/molecule-ai/molecule-core/pulls/102/files":
return [{"filename": "new_feature.go"}]
raise AssertionError(f"unexpected api_list: {path}")
monkeypatch.setattr(mod, "api_get", fake_api_get)
monkeypatch.setattr(mod, "api_list", fake_api_list)
result = mod.signal_4_branch_divergence(102, "molecule-ai/molecule-core")
assert result["verdict"] == "CLEAR"
assert result["diverged"] is True
assert result["inherited_files"] == []
assert result["new_work_files"] == ["new_feature.go"]
assert result["inherited_fraction"] == 0.0
def test_signal_4_branch_api_error_returns_na(monkeypatch):
"""If the branch endpoint 404s, signal degrades to N/A rather than crashing."""
mod = load_gate_check()
def fake_api_get(path):
if path == "/repos/molecule-ai/molecule-core/pulls/103":
return {
"base": {"sha": "base000", "ref": "main"},
"head": {"sha": "pr222"},
}
if path == "/repos/molecule-ai/molecule-core/branches/main":
raise mod.GiteaError("GET .../branches/main → 404: not found")
raise AssertionError(f"unexpected api_get: {path}")
monkeypatch.setattr(mod, "api_get", fake_api_get)
result = mod.signal_4_branch_divergence(103, "molecule-ai/molecule-core")
assert result["verdict"] == "N/A"
assert "error" in result