fix(merge-queue): fail-closed on red/skipped Platform(Go) + all-required (incl force_merge) — RCA core#1676 #3207

Merged
core-devops merged 2 commits from harden-merge-failclosed-1676-probe into main 2026-06-24 05:43:39 +00:00
2 changed files with 234 additions and 2 deletions
+117
View File
@@ -173,6 +173,40 @@ GOVERNANCE_REQUIRED_CONTEXTS = [
"security-review / approved (pull_request_target)",
"sop-checklist / all-items-acked (pull_request_target)",
]
# --------------------------------------------------------------------------
# CRITICAL fail-closed contexts (RCA: core PR #1676 merged 2026-06-24 with
# `CI / Platform (Go) = failure` AND `CI / all-required = skipped`, turning
# main RED. Same class as #3181.)
# --------------------------------------------------------------------------
# These two contexts MUST be green before ANY merge, INCLUDING a force_merge.
# They are checked here UNCONDITIONALLY and INDEPENDENTLY of whatever branch
# protection happens to enumerate in status_check_contexts — the #1676 gap was
# precisely that when BP did NOT list these (or listed a different suffix),
# Platform(Go)=failure + all-required=skipped fell through the required-set
# check and were then swept up as "non-required reds" and FORCE-MERGED past.
#
# Rules (all fail-closed — unverifiable == BLOCK):
# * `CI / Platform (Go)` : must be `success`. failure/error/skipped/missing
# all BLOCK. (skipped/missing = the gate did not
# actually run → cannot prove green.)
# * `CI / all-required` : the aggregator sentinel. `skipped` is FATAL —
# a skipped all-required means the required jobs
# it gates did NOT actually run, so nothing
# gated the merge. failure/error/missing also
# BLOCK; only `success` passes.
# The context names are matched by their base prefix (the part before the
# trailing " (pull_request*)" event suffix) so the guard catches the context
# regardless of which event variant Gitea emitted it under.
CRITICAL_REQUIRED_CONTEXT_PREFIXES = [
name.strip()
for name in _env(
"CRITICAL_REQUIRED_CONTEXT_PREFIXES",
default="CI / Platform (Go),CI / all-required",
).split(",")
if name.strip()
]
REQUIRED_CONTEXTS_RAW = _env(
"REQUIRED_CONTEXTS",
default=(
@@ -1102,6 +1136,70 @@ def _non_required_red_present(
return False
def _context_base_name(context: str) -> str:
"""Strip the trailing ` (pull_request*)` / ` (push)` event suffix from a
Gitea status context, returning the stable base name.
`"CI / Platform (Go) (pull_request)"` -> `"CI / Platform (Go)"`.
Note: the inner `(Go)` is preserved — only the FINAL parenthesized event
token is removed (it always names a Gitea webhook event).
"""
stripped = context.strip()
if stripped.endswith(")"):
open_idx = stripped.rfind(" (")
if open_idx != -1:
inner = stripped[open_idx + 2 : -1]
if inner.startswith(("pull_request", "push")):
return stripped[:open_idx].strip()
return stripped
def critical_contexts_block(latest: dict[str, dict]) -> list[str]:
"""FAIL-CLOSED guard for the CRITICAL_REQUIRED_CONTEXT_PREFIXES.
Returns a list of human-readable block reasons; an EMPTY list means every
critical context is provably green. A NON-empty list means the merge MUST
be refused (this gates the normal AND the force_merge path — see
evaluate_merge_readiness, which calls this BEFORE any merge decision incl.
force).
Fail-closed semantics (RCA core#1676):
* A critical context that is failure/error/skipped/missing → BLOCK.
`skipped` and `missing` are FATAL: the gate did not actually run, so
we cannot prove it green. (#1676's `all-required = skipped` is the
canonical case — a skipped aggregator sentinel means nothing gated
the required jobs.)
* Only `success` clears a critical context.
Each critical PREFIX must be matched by at least one observed context whose
base name equals it AND that context must be `success`. If the prefix is
not present at all in the statuses, that is ALSO a block (missing == not
proven green).
"""
# Map base-name -> set of observed states for that base name.
observed: dict[str, list[str]] = {}
for context, status in latest.items():
if not isinstance(context, str):
continue
base = _context_base_name(context)
observed.setdefault(base, []).append(status_state(status))
reasons: list[str] = []
for prefix in CRITICAL_REQUIRED_CONTEXT_PREFIXES:
states = observed.get(prefix)
if not states:
reasons.append(f"{prefix}=missing (critical context not reported — cannot prove green)")
continue
# Every occurrence of the critical context must be success. A single
# failure/error/skipped occurrence blocks (newest-wins was already
# applied by latest_statuses_by_context, but a critical context that
# appears under multiple event suffixes must ALL be green).
bad = [s or "missing" for s in states if s != "success"]
if bad:
reasons.append(f"{prefix}={','.join(sorted(set(bad)))} (critical context not green)")
return reasons
def evaluate_merge_readiness(
*,
main_status: dict,
@@ -1115,6 +1213,25 @@ def evaluate_merge_readiness(
pr_labels: set[str] | None = None,
runtime_bump_exempt: bool = False,
) -> MergeDecision:
# 0) CRITICAL fail-closed guard (RCA core#1676). Before ANY other gate —
# and BEFORE step 5 computes the force_merge flag — the PR head's
# critical contexts (CI / Platform (Go), CI / all-required) MUST be
# green. This is checked UNCONDITIONALLY, independent of branch
# protection's status_check_contexts, so a force_merge can NEVER bypass
# a red/skipped Platform(Go) or a skipped all-required. #1676 merged red
# because these fell out of the BP-required set and were then swept up
# as "non-required reds" and force-merged. There is no exemption from
# this gate — runtime-bump exemption only bypasses the APPROVALS bar,
# never a red required CI status.
pr_latest_critical = latest_statuses_by_context(pr_status.get("statuses") or [])
critical_block = critical_contexts_block(pr_latest_critical)
if critical_block:
return MergeDecision(
False, "wait",
"CRITICAL required context(s) not green (fail-closed, force_merge "
"cannot bypass): " + "; ".join(critical_block),
)
# 1) Main's push-required contexts must be green. Combined state can be
# "failure" due to non-blocking jobs (continue-on-error: true) that do
# not gate merges, so check the explicit required set, not combined.
+117 -2
View File
@@ -129,6 +129,9 @@ def _ready_kwargs(**overrides):
"state": "success",
"statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
# CRITICAL fail-closed contexts (RCA core#1676) — a genuinely
# ready PR has these green; the step-0 guard requires them.
{"context": "CI / Platform (Go) (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
@@ -321,6 +324,7 @@ def test_governance_red_blocks_merge():
"state": "failure",
"statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "CI / Platform (Go) (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "failure"},
{"context": "security-review / approved (pull_request_target)", "status": "pending"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "failure"},
@@ -341,6 +345,7 @@ def test_non_required_red_does_not_block_merge():
"state": "failure",
"statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "CI / Platform (Go) (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request)", "status": "failure"},
{"context": "security-review / approved (pull_request)", "status": "pending"},
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "failure"},
@@ -361,6 +366,7 @@ def test_non_required_advisory_red_does_not_block_merge():
"state": "failure", # combined polluted by advisory non-required reds
"statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "CI / Platform (Go) (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
@@ -384,7 +390,109 @@ def test_failing_required_context_blocks_even_with_approvals():
assert decision.ready is False
assert decision.action == "wait"
assert decision.force is False
assert "required contexts not green" in decision.reason
# all-required IS a CRITICAL fail-closed context (RCA core#1676); a failing
# all-required is now caught by the step-0 critical guard (an even stronger
# block — force_merge cannot bypass it).
assert "required context" in decision.reason.lower()
# --------------------------------------------------------------------------
# CRITICAL fail-closed contexts (RCA core#1676 — merged with
# CI/Platform(Go)=failure AND CI/all-required=skipped onto a red main; the
# force_merge path swept these up as "non-required reds" and bypassed them).
# --------------------------------------------------------------------------
def _rca_1676_statuses():
"""The exact critical-context shape that let core PR #1676 merge red."""
return {
"state": "failure",
"statuses": [
{"context": "CI / Platform (Go) (pull_request)", "status": "failure"},
{"context": "CI / all-required (pull_request)", "status": "skipped"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
],
}
def test_critical_contexts_block_helper_flags_1676():
latest = mq.latest_statuses_by_context(_rca_1676_statuses()["statuses"])
reasons = mq.critical_contexts_block(latest)
joined = " ".join(reasons)
assert "CI / Platform (Go)" in joined
assert "CI / all-required" in joined
def test_critical_guard_blocks_1676_even_force_merge_path():
# mergeable=True + genuine approvals is the EXACT force_merge precondition
# that let #1676 through. The step-0 critical guard must block it anyway,
# and the decision must NOT be a (forced) merge.
decision = mq.evaluate_merge_readiness(
**_ready_kwargs(
pr_status=_rca_1676_statuses(),
# The #1676 gap: BP-required set did NOT enumerate the critical
# contexts, so step-4 alone would have let them slip to force.
required_contexts=["sop-checklist / all-items-acked (pull_request_target)"],
mergeable=True,
)
)
assert decision.ready is False
assert decision.action == "wait"
assert decision.force is False
assert "CRITICAL" in decision.reason
def test_critical_guard_blocks_skipped_all_required():
statuses = {
"state": "success", # combined can mask a skipped sentinel as non-failing
"statuses": [
{"context": "CI / Platform (Go) (pull_request)", "status": "success"},
{"context": "CI / all-required (pull_request)", "status": "skipped"},
],
}
decision = mq.evaluate_merge_readiness(
**_ready_kwargs(pr_status=statuses, required_contexts=[], mergeable=True)
)
assert decision.ready is False
assert "CI / all-required" in decision.reason
def test_critical_guard_blocks_missing_platform_go():
statuses = {
"state": "success",
"statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
# CI / Platform (Go) entirely absent → cannot prove green → BLOCK.
],
}
decision = mq.evaluate_merge_readiness(
**_ready_kwargs(pr_status=statuses, required_contexts=[], mergeable=True)
)
assert decision.ready is False
assert "CI / Platform (Go)=missing" in decision.reason
def test_critical_guard_allows_when_both_green():
statuses = {
"state": "success",
"statuses": [
{"context": "CI / Platform (Go) (pull_request)", "status": "success"},
{"context": "CI / all-required (pull_request)", "status": "success"},
],
}
decision = mq.evaluate_merge_readiness(
**_ready_kwargs(
pr_status=statuses,
required_contexts=[
"CI / Platform (Go) (pull_request)",
"CI / all-required (pull_request)",
],
mergeable=True,
)
)
assert decision.ready is True
assert decision.action == "merge"
def test_unmergeable_pr_blocks():
@@ -471,6 +579,7 @@ def test_process_once_holds_pr_on_permanent_merge_error(monkeypatch):
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
return {"state": "success", "statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "CI / Platform (Go) (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
@@ -544,6 +653,7 @@ def _fully_ready_process_once_monkeypatch(monkeypatch, mergeable, calls):
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
return {"state": "success", "statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "CI / Platform (Go) (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
@@ -717,7 +827,8 @@ def test_process_once_pauses_when_main_not_green_no_direct_merge(monkeypatch):
return {"state": "failure",
"statuses": [{"context": "CI / all-required (push)", "status": "failure"}]}
return {"state": "success",
"statuses": [{"context": "CI / all-required (pull_request)", "status": "success"}]}
"statuses": [{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "CI / Platform (Go) (pull_request)", "status": "success"}]}
monkeypatch.setattr(mq, "get_combined_status", red_main_combined)
rc = mq.process_once(dry_run=False)
@@ -955,6 +1066,7 @@ def _stale_pr_update_409_monkeypatch(monkeypatch, queued_issues, calls):
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
return {"state": "success", "statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "CI / Platform (Go) (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
@@ -1232,6 +1344,7 @@ def _wire_ready_process_once(monkeypatch, *, issues, pr_payload, calls):
]}
return {"state": "success", "statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "CI / Platform (Go) (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
@@ -1420,6 +1533,7 @@ def _wire_multi_candidate_process_once(monkeypatch, *, issues, pulls, reviews, c
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
return {"state": "success", "statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "CI / Platform (Go) (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
@@ -1557,6 +1671,7 @@ def test_hol_unready_red_required_ci_is_skipped_for_ready_pr(monkeypatch):
return {"state": state,
"statuses": [
{"context": "CI / all-required (pull_request)", "status": state},
{"context": "CI / Platform (Go) (pull_request)", "status": state},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},