gate-check-v3: add Signal 4 — branch divergence / scope-creep guard (mc#365) #1764
@@ -7,10 +7,11 @@
|
||||
# PR_NUMBER — set via ${{ github.event.pull_request.number }} from the trigger
|
||||
# POST_COMMENT — "true" to post/update comment on PR
|
||||
#
|
||||
# Gating logic (MVP signals 1,2,3,6):
|
||||
# Gating logic (MVP signals 1,2,3,4,6):
|
||||
# 1. Author-aware agent-tag comment scan
|
||||
# 2. REQUEST_CHANGES reviews state machine
|
||||
# 3. Staleness detection (SOP-12: review.commit_id != PR.head_sha + >1 working day)
|
||||
# 4. Branch divergence / scope-creep guard (base-sha vs target HEAD; mc#365)
|
||||
# 6. CI required-checks awareness
|
||||
#
|
||||
# Exit code: 0=CLEAR, 1=BLOCKED, 2=ERROR
|
||||
|
||||
@@ -6,10 +6,11 @@ Emits structured verdict + human-readable summary. Designed to run as:
|
||||
1. CLI: python gate_check.py --repo org/repo --pr N
|
||||
2. Gitea Actions step: runs this script, captures stdout JSON
|
||||
|
||||
Signals (MVP — signals 1,2,3,6):
|
||||
Signals (MVP — signals 1,2,3,4,6):
|
||||
1. Author-aware agent-tag comment scan
|
||||
2. REQUEST_CHANGES reviews state machine
|
||||
3. Staleness detection (review.commit_id != PR.head_sha)
|
||||
4. Branch divergence / scope-creep guard (base-sha vs target HEAD)
|
||||
6. CI required-checks awareness
|
||||
|
||||
Exit codes:
|
||||
@@ -335,6 +336,132 @@ def signal_3_staleness(pr_number: int, repo: str) -> dict:
|
||||
}
|
||||
|
||||
|
||||
# ── Signal 4: Branch divergence / scope-creep guard ─────────────────────────
|
||||
# Detects stale PR branches where the base SHA has drifted behind target HEAD.
|
||||
# Distinguishes files that are "inherited" from base divergence (already on
|
||||
# target via prior commits) from genuinely new PR work. Prevents misattribution
|
||||
# of scope creep when branches are stale (molecule-core#365).
|
||||
|
||||
|
||||
def _commits_and_files_behind(
|
||||
owner: str, name: str, base_sha: str, target_branch: str
|
||||
) -> tuple[int | None, set[str]]:
|
||||
"""Paginate target-branch commits from HEAD back to base_sha.
|
||||
Return (commits_behind_count, set of filenames changed in those commits).
|
||||
Safety-capped at 20 pages (~1000 commits) to avoid runaway pagination.
|
||||
"""
|
||||
commits_behind = 0
|
||||
target_files: set[str] = set()
|
||||
page = 1
|
||||
max_pages = 20
|
||||
per_page = 50
|
||||
|
||||
while page <= max_pages:
|
||||
try:
|
||||
commits = api_get(
|
||||
f"/repos/{owner}/{name}/commits?sha={target_branch}&page={page}&limit={per_page}"
|
||||
)
|
||||
except GiteaError:
|
||||
return (None, target_files)
|
||||
|
||||
if not isinstance(commits, list):
|
||||
return (None, target_files)
|
||||
|
||||
for c in commits:
|
||||
if c.get("sha") == base_sha:
|
||||
return (commits_behind, target_files)
|
||||
commits_behind += 1
|
||||
for f in c.get("files", []):
|
||||
fname = f.get("filename") or f.get("name", "")
|
||||
if fname:
|
||||
target_files.add(fname)
|
||||
|
||||
if len(commits) < per_page:
|
||||
break
|
||||
page += 1
|
||||
|
||||
return (commits_behind if commits_behind > 0 else None, target_files)
|
||||
|
||||
|
||||
def signal_4_branch_divergence(
|
||||
pr_number: int, repo: str, pr_data: dict | None = None
|
||||
) -> dict:
|
||||
"""
|
||||
Compare PR.base.sha to current target-branch HEAD.
|
||||
If diverged, show "inherited from base divergence" vs "actual new work"
|
||||
file fractions using the commits API.
|
||||
Returns: {signal, verdict, diverged, commits_behind, inherited_fraction, ...}
|
||||
"""
|
||||
owner, name = repo.split("/", 1)
|
||||
|
||||
if pr_data is None:
|
||||
pr_data = api_get(f"/repos/{owner}/{name}/pulls/{pr_number}")
|
||||
|
||||
base_sha = pr_data["base"]["sha"]
|
||||
target_branch = pr_data["base"]["ref"]
|
||||
|
||||
try:
|
||||
branch_info = api_get(f"/repos/{owner}/{name}/branches/{target_branch}")
|
||||
target_head = branch_info["commit"]["id"]
|
||||
except GiteaError as e:
|
||||
return {"signal": "branch_divergence", "verdict": "N/A", "error": str(e)}
|
||||
|
||||
if base_sha == target_head:
|
||||
return {
|
||||
"signal": "branch_divergence",
|
||||
"verdict": "CLEAR",
|
||||
"diverged": False,
|
||||
"commits_behind": 0,
|
||||
"pr_files_count": 0,
|
||||
"inherited_files": [],
|
||||
"new_work_files": [],
|
||||
"inherited_fraction": 0.0,
|
||||
}
|
||||
|
||||
# Branch is diverged — count commits behind and collect files changed on
|
||||
# target since the PR's base snapshot.
|
||||
commits_behind, target_files = _commits_and_files_behind(
|
||||
owner, name, base_sha, target_branch
|
||||
)
|
||||
|
||||
# Get PR files
|
||||
try:
|
||||
pr_files_data = api_list(f"/repos/{owner}/{name}/pulls/{pr_number}/files")
|
||||
pr_files = {
|
||||
f.get("filename") or f.get("name", "") for f in pr_files_data
|
||||
}
|
||||
pr_files.discard("")
|
||||
except GiteaError:
|
||||
pr_files = set()
|
||||
|
||||
inherited_files = sorted(pr_files & target_files)
|
||||
new_work_files = sorted(pr_files - target_files)
|
||||
total = len(pr_files)
|
||||
inherited_fraction = len(inherited_files) / total if total else 0.0
|
||||
|
||||
# Verdict: WARNING if significant divergence.
|
||||
# Thresholds: >50 % inherited files, or >5 commits behind with any inherited files.
|
||||
if inherited_fraction > 0.5 or (
|
||||
commits_behind and commits_behind > 5 and inherited_files
|
||||
):
|
||||
verdict = "WARNING"
|
||||
else:
|
||||
verdict = "CLEAR"
|
||||
|
||||
return {
|
||||
"signal": "branch_divergence",
|
||||
"verdict": verdict,
|
||||
"diverged": True,
|
||||
"base_sha": base_sha,
|
||||
"target_head": target_head,
|
||||
"commits_behind": commits_behind,
|
||||
"pr_files_count": total,
|
||||
"inherited_files": inherited_files,
|
||||
"new_work_files": new_work_files,
|
||||
"inherited_fraction": round(inherited_fraction, 2),
|
||||
}
|
||||
|
||||
|
||||
# ── Signal 6: CI required-checks awareness ───────────────────────────────────
|
||||
|
||||
def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: dict | None = None) -> dict:
|
||||
@@ -415,7 +542,7 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
|
||||
|
||||
# ── Gate evaluation ───────────────────────────────────────────────────────────
|
||||
|
||||
VERDICT_ORDER = {"ERROR": 0, "CI_FAIL": 1, "BLOCKED": 2, "STALE-RC": 3, "CI_PENDING": 4, "N/A": 5, "CLEAR": 6}
|
||||
VERDICT_ORDER = {"ERROR": 0, "CI_FAIL": 1, "BLOCKED": 2, "STALE-RC": 3, "CI_PENDING": 4, "N/A": 5, "WARNING": 6, "CLEAR": 7}
|
||||
|
||||
|
||||
def compute_verdict(gates: list[dict]) -> tuple[str, list[dict]]:
|
||||
@@ -446,6 +573,7 @@ def format_comment(repo: str, pr_number: int, verdict: str, gates: list[dict], b
|
||||
"agent_tag_comments": "Agent-tag gates",
|
||||
"request_changes_reviews": "REQUEST_CHANGES reviews",
|
||||
"stale_reviews": "Staleness check",
|
||||
"branch_divergence": "Branch divergence / scope-creep guard",
|
||||
"ci_checks": "CI required checks",
|
||||
}
|
||||
|
||||
@@ -481,6 +609,25 @@ def format_comment(repo: str, pr_number: int, verdict: str, gates: list[dict], b
|
||||
lines.append(
|
||||
f" - @{r['user']} stale (commit={r.get('review_commit','?')[:7]}, age={r.get('age_hours','?')}h)"
|
||||
)
|
||||
elif sig == "branch_divergence":
|
||||
if b.get("diverged"):
|
||||
lines.append(
|
||||
f" - Branch is {b.get('commits_behind', '?')} commits behind target "
|
||||
f"({b.get('target_head', '?')[:7]})"
|
||||
)
|
||||
frac = b.get("inherited_fraction", 0)
|
||||
lines.append(
|
||||
f" - {frac * 100:.0f}% of PR files inherited from base divergence "
|
||||
f"({len(b.get('inherited_files', []))}/{b.get('pr_files_count', 0)} files)"
|
||||
)
|
||||
for f in b.get("inherited_files", [])[:5]:
|
||||
lines.append(f" - inherited: `{f}`")
|
||||
if len(b.get("inherited_files", [])) > 5:
|
||||
lines.append(
|
||||
f" - ... and {len(b.get('inherited_files', [])) - 5} more"
|
||||
)
|
||||
else:
|
||||
lines.append(" - Branch is up to date with target")
|
||||
elif sig == "agent_tag_comments":
|
||||
for agent, res in b.get("results", {}).items():
|
||||
v = res.get("verdict", "MISSING")
|
||||
@@ -523,6 +670,7 @@ def run(repo: str, pr_number: int, post_comment: bool = False) -> dict:
|
||||
signal_1_comment_scan(pr_number, repo),
|
||||
signal_2_reviews(pr_number, repo),
|
||||
signal_3_staleness(pr_number, repo),
|
||||
signal_4_branch_divergence(pr_number, repo, pr_data=pr),
|
||||
signal_6_ci(pr_number, repo, branch=base_ref, pr_data=pr),
|
||||
]
|
||||
verdict, blockers = compute_verdict(gates)
|
||||
|
||||
@@ -121,6 +121,9 @@ def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):
|
||||
assert result["results"]["core-devops"]["verdict"] == "APPROVED"
|
||||
|
||||
|
||||
# ── Signal 2: Draft REQUEST_CHANGES guard ───────────────────────────────────
|
||||
|
||||
|
||||
def test_signal_2_draft_request_changes_does_not_block(monkeypatch):
|
||||
"""official=False REQUEST_CHANGES is a draft/pending review and must NOT
|
||||
block the gate (matching review-check.sh post-#1818 official-filter)."""
|
||||
@@ -171,3 +174,147 @@ def test_signal_2_null_user_in_request_changes_does_not_crash(monkeypatch):
|
||||
result = mod.signal_2_reviews(903, "molecule-ai/molecule-core")
|
||||
assert result["verdict"] == "CLEAR"
|
||||
assert result["blocking_reviews"] == []
|
||||
|
||||
|
||||
# ── Signal 4: Branch divergence / scope-creep guard ─────────────────────────
|
||||
|
||||
|
||||
def test_signal_4_no_divergence_returns_clear(monkeypatch):
|
||||
"""When PR.base.sha equals target branch HEAD, divergence is zero."""
|
||||
mod = load_gate_check()
|
||||
|
||||
shared_sha = "abc123"
|
||||
|
||||
def fake_api_get(path):
|
||||
if path == "/repos/molecule-ai/molecule-core/pulls/100":
|
||||
return {
|
||||
"base": {"sha": shared_sha, "ref": "main"},
|
||||
"head": {"sha": "def456"},
|
||||
}
|
||||
if path == "/repos/molecule-ai/molecule-core/branches/main":
|
||||
return {"commit": {"id": shared_sha}}
|
||||
raise AssertionError(f"unexpected api_get: {path}")
|
||||
|
||||
monkeypatch.setattr(mod, "api_get", fake_api_get)
|
||||
|
||||
result = mod.signal_4_branch_divergence(100, "molecule-ai/molecule-core")
|
||||
|
||||
assert result["verdict"] == "CLEAR"
|
||||
assert result["diverged"] is False
|
||||
assert result["commits_behind"] == 0
|
||||
assert result["inherited_fraction"] == 0.0
|
||||
|
||||
|
||||
def test_signal_4_divergence_with_inherited_files_warning(monkeypatch):
|
||||
"""Stale branch with overlapping files triggers WARNING and correct fractions."""
|
||||
mod = load_gate_check()
|
||||
|
||||
base_sha = "base000"
|
||||
target_head = "head111"
|
||||
|
||||
def fake_api_get(path):
|
||||
if path == "/repos/molecule-ai/molecule-core/pulls/101":
|
||||
return {
|
||||
"base": {"sha": base_sha, "ref": "main"},
|
||||
"head": {"sha": "pr222"},
|
||||
}
|
||||
if path == "/repos/molecule-ai/molecule-core/branches/main":
|
||||
return {"commit": {"id": target_head}}
|
||||
if path == "/repos/molecule-ai/molecule-core/commits?sha=main&page=1&limit=50":
|
||||
return [
|
||||
{
|
||||
"sha": target_head,
|
||||
"files": [
|
||||
{"filename": "ci.yml"},
|
||||
{"filename": "README.md"},
|
||||
],
|
||||
},
|
||||
{"sha": base_sha, "files": []},
|
||||
]
|
||||
raise AssertionError(f"unexpected api_get: {path}")
|
||||
|
||||
def fake_api_list(path):
|
||||
if path == "/repos/molecule-ai/molecule-core/pulls/101/files":
|
||||
return [
|
||||
{"filename": "ci.yml"},
|
||||
{"filename": "README.md"},
|
||||
{"filename": "new_feature.go"},
|
||||
]
|
||||
raise AssertionError(f"unexpected api_list: {path}")
|
||||
|
||||
monkeypatch.setattr(mod, "api_get", fake_api_get)
|
||||
monkeypatch.setattr(mod, "api_list", fake_api_list)
|
||||
|
||||
result = mod.signal_4_branch_divergence(101, "molecule-ai/molecule-core")
|
||||
|
||||
assert result["verdict"] == "WARNING"
|
||||
assert result["diverged"] is True
|
||||
assert result["commits_behind"] == 1
|
||||
assert result["pr_files_count"] == 3
|
||||
assert result["inherited_files"] == ["README.md", "ci.yml"]
|
||||
assert result["new_work_files"] == ["new_feature.go"]
|
||||
assert result["inherited_fraction"] == round(2 / 3, 2)
|
||||
|
||||
|
||||
def test_signal_4_divergence_no_inherited_files_clear(monkeypatch):
|
||||
"""Stale branch but zero file overlap → still CLEAR (no scope-creep risk)."""
|
||||
mod = load_gate_check()
|
||||
|
||||
base_sha = "base000"
|
||||
target_head = "head111"
|
||||
|
||||
def fake_api_get(path):
|
||||
if path == "/repos/molecule-ai/molecule-core/pulls/102":
|
||||
return {
|
||||
"base": {"sha": base_sha, "ref": "main"},
|
||||
"head": {"sha": "pr222"},
|
||||
}
|
||||
if path == "/repos/molecule-ai/molecule-core/branches/main":
|
||||
return {"commit": {"id": target_head}}
|
||||
if path == "/repos/molecule-ai/molecule-core/commits?sha=main&page=1&limit=50":
|
||||
return [
|
||||
{
|
||||
"sha": target_head,
|
||||
"files": [{"filename": "other.go"}],
|
||||
},
|
||||
{"sha": base_sha, "files": []},
|
||||
]
|
||||
raise AssertionError(f"unexpected api_get: {path}")
|
||||
|
||||
def fake_api_list(path):
|
||||
if path == "/repos/molecule-ai/molecule-core/pulls/102/files":
|
||||
return [{"filename": "new_feature.go"}]
|
||||
raise AssertionError(f"unexpected api_list: {path}")
|
||||
|
||||
monkeypatch.setattr(mod, "api_get", fake_api_get)
|
||||
monkeypatch.setattr(mod, "api_list", fake_api_list)
|
||||
|
||||
result = mod.signal_4_branch_divergence(102, "molecule-ai/molecule-core")
|
||||
|
||||
assert result["verdict"] == "CLEAR"
|
||||
assert result["diverged"] is True
|
||||
assert result["inherited_files"] == []
|
||||
assert result["new_work_files"] == ["new_feature.go"]
|
||||
assert result["inherited_fraction"] == 0.0
|
||||
|
||||
|
||||
def test_signal_4_branch_api_error_returns_na(monkeypatch):
|
||||
"""If the branch endpoint 404s, signal degrades to N/A rather than crashing."""
|
||||
mod = load_gate_check()
|
||||
|
||||
def fake_api_get(path):
|
||||
if path == "/repos/molecule-ai/molecule-core/pulls/103":
|
||||
return {
|
||||
"base": {"sha": "base000", "ref": "main"},
|
||||
"head": {"sha": "pr222"},
|
||||
}
|
||||
if path == "/repos/molecule-ai/molecule-core/branches/main":
|
||||
raise mod.GiteaError("GET .../branches/main → 404: not found")
|
||||
raise AssertionError(f"unexpected api_get: {path}")
|
||||
|
||||
monkeypatch.setattr(mod, "api_get", fake_api_get)
|
||||
|
||||
result = mod.signal_4_branch_divergence(103, "molecule-ai/molecule-core")
|
||||
|
||||
assert result["verdict"] == "N/A"
|
||||
assert "error" in result
|
||||
|
||||
Reference in New Issue
Block a user