fix(ci-required-drift): suppress F1 false positive when sentinel deliberately omits needs: #1162

Closed
infra-sre wants to merge 5 commits from sre/fix-ci-drift-false-positive into staging
4 changed files with 130 additions and 2 deletions
+33 -1
View File
@@ -250,6 +250,25 @@ def sentinel_needs(ci_doc: dict) -> set[str]:
return set(needs)
def sentinel_has_explicit_needs_key(ci_doc: dict) -> bool:
"""Return True if the sentinel job has an explicit `needs:` key in the YAML.
This distinguishes `needs: []` (explicit empty list — a real drift signal)
from a missing `needs:` key entirely (the sentinel polls commit statuses
directly, no job-dependency chain — intentional per Gitea 1.22.6 quirk).
When `needs:` is absent entirely, the sentinel intentionally bypasses the
`needs:` pattern to avoid the Gitea 1.22/act_runner bug where a
`needs:`-dependent job with `if: always()` emits a "skipped" status
before upstream jobs settle, leaving branch protection with a permanent
pending context. See ci.yml comment on the `all-required` sentinel.
"""
sentinel = ci_doc.get("jobs", {}).get(SENTINEL_JOB)
if not isinstance(sentinel, dict):
return True # no sentinel = no exception; let the existing error fire
return "needs" in sentinel
def required_checks_env(audit_doc: dict) -> set[str]:
"""Pull the REQUIRED_CHECKS env value from audit-force-merge.yml.
Walks the YAML AST per `feedback_behavior_based_ast_gates`: we do
@@ -330,6 +349,7 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
jobs_all = ci_jobs_all(ci_doc)
needs = sentinel_needs(ci_doc)
env_set = required_checks_env(audit_doc)
sentinel_has_explicit_needs = sentinel_has_explicit_needs_key(ci_doc)
# Protection
# api() raises ApiError on non-2xx. Transient 5xx should fail loud.
@@ -369,6 +389,7 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
"branch": branch,
"ci_jobs": sorted(jobs),
"sentinel_needs": sorted(needs),
"sentinel_has_explicit_needs": sentinel_has_explicit_needs,
"protection_contexts_skipped": True,
"protection_http_status": http_status,
"audit_env_checks": sorted(env_set),
@@ -384,12 +405,22 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
contexts = set(protection.get("status_check_contexts") or [])
# ----- F1: job exists in CI but not under sentinel.needs -----
# Skip this check when the sentinel deliberately omits `needs:` (polls commit
# statuses directly instead, per Gitea 1.22.6 quirk documented in ci.yml).
# `needs: []` (explicit empty) is still a real F1 signal; only absent `needs:`
# is the documented exception.
missing_from_needs = sorted(jobs - needs)
if missing_from_needs:
if missing_from_needs and sentinel_has_explicit_needs:
findings.append(
"F1 — jobs in ci.yml NOT under sentinel `needs:` (sentinel doesn't gate them):\n"
+ "\n".join(f" - {n}" for n in missing_from_needs)
)
elif missing_from_needs and not sentinel_has_explicit_needs:
sys.stderr.write(
f"::notice::sentinel '{SENTINEL_JOB}' has no `needs:` key — "
f"intentional polling-sentinel design (Gitea 1.22.6 quirk); "
f"skipping F1 for {[n for n in missing_from_needs]}.\n"
)
# ----- F1b: needs lists a job that doesn't exist (typo) -----
# Compare against jobs_all (incl. event-gated jobs); a typo is a
@@ -445,6 +476,7 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
"branch": branch,
"ci_jobs": sorted(jobs),
"sentinel_needs": sorted(needs),
"sentinel_has_explicit_needs": sentinel_has_explicit_needs,
"protection_contexts": sorted(contexts),
"audit_env_checks": sorted(env_set),
"expected_contexts": sorted(emitted_contexts),
+47 -1
View File
@@ -314,6 +314,29 @@ def post_comment(pr_number: int, body: str, *, dry_run: bool) -> None:
api("POST", f"/repos/{OWNER}/{NAME}/issues/{pr_number}/comments", body={"body": body})
def remove_label(pr_number: int, label: str, *, dry_run: bool) -> None:
print(f"::notice::removing label '{label}' from PR #{pr_number}")
if dry_run:
return
# Gitea requires label ID, not name, for deletion.
# Multiple labels can share the same name with different IDs — remove all.
_, body = api("GET", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}")
pr_labels = body.get("labels", []) if isinstance(body, dict) else []
removed = False
for lbl in pr_labels:
if lbl.get("name") == label:
label_id = lbl.get("id")
if label_id:
api(
"DELETE",
f"/repos/{OWNER}/{NAME}/issues/{pr_number}/labels/{label_id}",
expect_json=False,
)
removed = True
if not removed:
print(f"::notice::label '{label}' not found on PR #{pr_number}")
def update_pull(pr_number: int, *, dry_run: bool) -> None:
print(f"::notice::updating PR #{pr_number} with base branch via style={UPDATE_STYLE}")
if dry_run:
@@ -407,7 +430,30 @@ def process_once(*, dry_run: bool = False) -> int:
"deferring to next tick"
)
return 0
merge_pull(pr_number, dry_run=dry_run)
try:
merge_pull(pr_number, dry_run=dry_run)
except ApiError as exc:
# Merge failed (pre-receive hook, branch protection, etc.).
# Remove queue label so next tick picks the next PR.
msg = str(exc)
if "405" in msg or "not allowed to merge" in msg.lower():
hint = "pre-receive hook or branch protection blocked the merge"
elif "422" in msg or "Unprocessable" in msg:
hint = "branch protection required-status check failed"
elif "409" in msg or "conflict" in msg.lower():
hint = "merge conflict"
else:
hint = msg[:200]
remove_label(pr_number, QUEUE_LABEL, dry_run=dry_run)
post_comment(
pr_number,
(
f"merge-queue: merge blocked ({hint}). "
f"Label removed — re-add once the block is resolved."
),
dry_run=dry_run,
)
return 0
return 0
return 0
+6
View File
@@ -48,6 +48,12 @@ jobs:
REQUIRED_CONTEXTS: >-
CI / all-required (pull_request),
sop-checklist / all-items-acked (pull_request)
# NOTE: qa-review / security-review gates intentionally omitted.
# These gates permanently fail (mc#1111: SOP_TIER_CHECK_TOKEN missing
# PAT — token owner not in qa/security teams). Adding them to
# REQUIRED_CONTEXTS would strip the merge-queue label from every PR
# in the queue, breaking the queue for all contributors.
# Re-add these gates once mc#1111 is resolved.
# Push-side required contexts. Checking CI / all-required (push)
# explicitly instead of the combined state avoids false-pause when
# non-blocking jobs (continue-on-error: true) have failed — those
+44
View File
@@ -179,6 +179,50 @@ def test_f1_job_missing_from_sentinel_needs(drift_module, tmp_path, monkeypatch)
assert any("F1 —" in f and "test" in f for f in findings), findings
def test_f1_not_flagged_when_sentinel_deliberately_omits_needs(
drift_module, tmp_path, monkeypatch
):
"""When the sentinel intentionally has no `needs:` key (polls commit statuses
directly, per Gitea 1.22.6 quirk), F1 is suppressed — the empty sentinel
needs is a documented design choice, not drift."""
# Write ci.yml with sentinel having NO `needs:` key at all
import yaml
doc = {
"name": "ci",
"on": {"pull_request": {}},
"jobs": {
"build": {"runs-on": "ubuntu-latest"},
"test": {"runs-on": "ubuntu-latest"}, # would trigger F1 if needs: []
"all-required": {
"runs-on": "ubuntu-latest",
# NOTE: no `needs:` key — sentinel polls commit statuses directly.
# This is the intentional Gitea 1.22.6 quirk design.
"steps": [{"run": "echo polling"}],
},
},
}
ci = tmp_path / "ci.yml"
ci.write_text(yaml.safe_dump(doc), encoding="utf-8")
audit = _write_audit_yaml(tmp_path, ["ci / all-required (pull_request)"])
_patch_paths(drift_module, monkeypatch, ci, audit)
stub = _make_stub_api({
("GET", "/repos/owner/repo/branch_protections/main"): (
200,
{"status_check_contexts": ["ci / all-required (pull_request)"]},
),
})
monkeypatch.setattr(drift_module, "api", stub)
findings, debug = drift_module.detect_drift("main")
# F1 must NOT be raised
assert not any("F1 —" in f for f in findings), findings
# Debug info should confirm the sentinel has no explicit needs
assert debug["sentinel_has_explicit_needs"] is False
assert debug["sentinel_needs"] == []
def test_f1b_sentinel_needs_typo(drift_module, tmp_path, monkeypatch):
"""F1b: sentinel.needs lists a job not present in ci.yml (typo).