fix(merge-queue): fail-closed on empty PUSH_REQUIRED_CONTEXTS + untrustworthy snapshot ts (#3210 tail) #3224
@@ -379,23 +379,41 @@ def load_conductor_snapshot() -> dict | None:
|
||||
if not isinstance(snapshot, dict):
|
||||
return None
|
||||
|
||||
# Freshness is fail-closed: an UNVERIFIABLE age must NOT be trusted as
|
||||
# current. A snapshot with no `ts` (absent/empty) or an unparseable `ts`
|
||||
# could be arbitrarily old (e.g. written by a wedged conductor), so we
|
||||
# CANNOT prove it is within the 10-minute window — discard it and let the
|
||||
# caller self-fetch the live state via the API (the None-return path every
|
||||
# consumer already handles on a cache miss). The previous behaviour skipped
|
||||
# the age check on an empty `ts` and swallowed a strptime failure as
|
||||
# "treat as fresh (conservative)" — that was ANTI-conservative: it trusted
|
||||
# an undated/old snapshot as current.
|
||||
ts_str = snapshot.get("ts", "")
|
||||
if ts_str:
|
||||
try:
|
||||
from datetime import datetime, timezone
|
||||
if not ts_str:
|
||||
print(
|
||||
"::notice::conductor snapshot has no ts (freshness unverifiable); "
|
||||
"self-fetching"
|
||||
)
|
||||
return None
|
||||
try:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
ts = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%SZ").replace(
|
||||
tzinfo=timezone.utc
|
||||
)
|
||||
age_sec = (datetime.now(timezone.utc) - ts).total_seconds()
|
||||
if age_sec > 600: # 10 minutes
|
||||
print(
|
||||
f"::notice::conductor snapshot stale ({int(age_sec)}s); "
|
||||
"self-fetching"
|
||||
)
|
||||
return None
|
||||
except ValueError:
|
||||
pass # malformed ts, treat as fresh (conservative)
|
||||
ts = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%SZ").replace(
|
||||
tzinfo=timezone.utc
|
||||
)
|
||||
except ValueError:
|
||||
print(
|
||||
f"::notice::conductor snapshot ts unparseable ({ts_str!r}; freshness "
|
||||
"unverifiable); self-fetching"
|
||||
)
|
||||
return None
|
||||
age_sec = (datetime.now(timezone.utc) - ts).total_seconds()
|
||||
if age_sec > 600: # 10 minutes
|
||||
print(
|
||||
f"::notice::conductor snapshot stale ({int(age_sec)}s); "
|
||||
"self-fetching"
|
||||
)
|
||||
return None
|
||||
|
||||
return snapshot
|
||||
|
||||
@@ -443,6 +461,24 @@ class EnforcedContextsUnavailable(ApiError):
|
||||
error. Only an OSError opening/reading the file raises this."""
|
||||
|
||||
|
||||
class PushRequiredContextsUnavailable(ApiError):
|
||||
"""The push-required context set (env `PUSH_REQUIRED_CONTEXTS`) parsed to
|
||||
EMPTY. The queue must HOLD rather than treat the main-green backstop as
|
||||
satisfied (internal#3210 HIGH).
|
||||
|
||||
The main-green gate checks `required_contexts_green(main_latest, <set>)`;
|
||||
with an empty set that call returns `(True, [])` — a VACUOUS pass for ANY
|
||||
main state, INCLUDING all-red. The backstop that makes the direct-merge
|
||||
path safe (it PAUSES the queue when main's required CI goes red after a
|
||||
semantic main-break) would silently disappear, and the queue would keep
|
||||
merging onto a red main.
|
||||
|
||||
An empty parse is a MISCONFIGURATION (env unset/blank/all-comma), not a
|
||||
transient Gitea blip, so — like EnforcedContextsUnavailable — it propagates
|
||||
to main()'s ApiError handler (rc 1: no merge + operators paged) rather than
|
||||
being held quietly with rc 0."""
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class MergeDecision:
|
||||
ready: bool
|
||||
@@ -548,8 +584,26 @@ def required_contexts(raw: str) -> list[str]:
|
||||
|
||||
|
||||
def push_required_contexts() -> list[str]:
|
||||
"""Required contexts for push (branch) CI runs. See PUSH_REQUIRED_CONTEXTS_RAW."""
|
||||
return required_contexts(PUSH_REQUIRED_CONTEXTS_RAW)
|
||||
"""Required contexts for push (branch) CI runs. See PUSH_REQUIRED_CONTEXTS_RAW.
|
||||
|
||||
Fail-closed (internal#3210 HIGH): if `PUSH_REQUIRED_CONTEXTS` is empty,
|
||||
whitespace-only, or all-comma, the parse is [] and the main-green backstop
|
||||
(`required_contexts_green(main_latest, [])`) would PASS VACUOUSLY for any
|
||||
main state, including all-red — letting the queue merge onto a red main.
|
||||
Raise PushRequiredContextsUnavailable instead so the gate HOLDS rather than
|
||||
silently disabling its own main-green guard. Mirrors the
|
||||
EnforcedContextsUnavailable fail-closed convention; never returns []."""
|
||||
contexts = required_contexts(PUSH_REQUIRED_CONTEXTS_RAW)
|
||||
if not contexts:
|
||||
raise PushRequiredContextsUnavailable(
|
||||
"PUSH_REQUIRED_CONTEXTS parsed to an empty set "
|
||||
f"(raw={PUSH_REQUIRED_CONTEXTS_RAW!r}); the main-green backstop "
|
||||
"would pass vacuously for ANY main state (including all-red). "
|
||||
"Merge gate HOLDS the tick (fail-closed) rather than disabling the "
|
||||
"main-green guard. Set at least the default 'CI / all-required "
|
||||
"(push)'."
|
||||
)
|
||||
return contexts
|
||||
|
||||
|
||||
def status_state(status: dict) -> str:
|
||||
|
||||
@@ -2190,6 +2190,79 @@ def test_load_conductor_snapshot_ignores_stale_snapshot(monkeypatch):
|
||||
os.unlink(path)
|
||||
|
||||
|
||||
def test_load_conductor_snapshot_discards_snapshot_without_ts(monkeypatch):
|
||||
"""internal#3210 MEDIUM (fail-closed): a snapshot with NO `ts` field has an
|
||||
UNVERIFIABLE age — it could be arbitrarily old (a wedged conductor). The old
|
||||
`if ts_str:` guard skipped the age check entirely on an absent/empty ts and
|
||||
trusted the snapshot as fresh. It must instead be discarded (return None →
|
||||
the caller self-fetches live state), never trusted as current."""
|
||||
snapshot = { # no "ts" key at all
|
||||
"repo": "molecule-ai/molecule-core",
|
||||
"prs": [{"number": 60, "head_sha": "a" * 40, "labels": []}],
|
||||
}
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
||||
json.dump(snapshot, f)
|
||||
path = f.name
|
||||
try:
|
||||
monkeypatch.setenv("CONDUCTOR_SNAPSHOT_FILE", path)
|
||||
assert mq.load_conductor_snapshot() is None
|
||||
finally:
|
||||
os.unlink(path)
|
||||
|
||||
# An explicitly EMPTY ts is the same case (freshness unverifiable).
|
||||
snapshot_empty = _make_snapshot(
|
||||
[{"number": 61, "head_sha": "b" * 40, "labels": []}], ts=""
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
||||
json.dump(snapshot_empty, f)
|
||||
path2 = f.name
|
||||
try:
|
||||
monkeypatch.setenv("CONDUCTOR_SNAPSHOT_FILE", path2)
|
||||
assert mq.load_conductor_snapshot() is None
|
||||
finally:
|
||||
os.unlink(path2)
|
||||
|
||||
|
||||
def test_load_conductor_snapshot_discards_snapshot_with_malformed_ts(monkeypatch):
|
||||
"""internal#3210 MEDIUM (fail-closed): a snapshot whose `ts` does not parse
|
||||
has an UNVERIFIABLE age. The old `except ValueError: pass` swallowed the
|
||||
strptime failure and fell through to `return snapshot` ('treat as fresh
|
||||
(conservative)') — which was ANTI-conservative (an old snapshot from a
|
||||
wedged conductor would be trusted as current). It must be discarded (return
|
||||
None → self-fetch)."""
|
||||
snapshot = _make_snapshot(
|
||||
[{"number": 70, "head_sha": "c" * 40, "labels": []}],
|
||||
ts="not-a-timestamp",
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
||||
json.dump(snapshot, f)
|
||||
path = f.name
|
||||
try:
|
||||
monkeypatch.setenv("CONDUCTOR_SNAPSHOT_FILE", path)
|
||||
assert mq.load_conductor_snapshot() is None
|
||||
finally:
|
||||
os.unlink(path)
|
||||
|
||||
|
||||
def test_load_conductor_snapshot_honors_fresh_dated_snapshot(monkeypatch):
|
||||
"""The fail-closed ts guard does NOT regress the happy path: a present,
|
||||
parseable, in-window ts is still trusted and returned (so the existing
|
||||
snapshot-consumption fast path is preserved)."""
|
||||
snapshot = _make_snapshot(
|
||||
[{"number": 80, "head_sha": "d" * 40, "labels": []}]
|
||||
) # _make_snapshot defaults ts to now → fresh + parseable
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
||||
json.dump(snapshot, f)
|
||||
path = f.name
|
||||
try:
|
||||
monkeypatch.setenv("CONDUCTOR_SNAPSHOT_FILE", path)
|
||||
loaded = mq.load_conductor_snapshot()
|
||||
assert loaded is not None
|
||||
assert loaded["prs"][0]["number"] == 80
|
||||
finally:
|
||||
os.unlink(path)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Fix 3: non-main base PRs are skipped loudly/observably, not silently.
|
||||
# core#2548 stopped the per-tick PR-comment flood; the skip must still be
|
||||
@@ -3095,6 +3168,67 @@ def test_process_once_fails_closed_when_enforced_contexts_unreadable(monkeypatch
|
||||
assert merged["called"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# internal#3210 HIGH: an empty PUSH_REQUIRED_CONTEXTS must NOT vacuously pass
|
||||
# the main-green backstop. push_required_contexts() raises
|
||||
# PushRequiredContextsUnavailable (an ApiError) so the gate HOLDS rather than
|
||||
# letting required_contexts_green(main_latest, []) == (True, []) wave through
|
||||
# ANY main state (including all-red).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_PushRequiredContextsUnavailable_inherits_from_ApiError():
|
||||
# So main()'s `except ApiError` handler catches it → rc 1 (no merge + page).
|
||||
assert issubclass(mq.PushRequiredContextsUnavailable, mq.ApiError)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("raw", ["", " ", ",", " , , "])
|
||||
def test_push_required_contexts_raises_when_parse_empty(monkeypatch, raw):
|
||||
"""Empty / whitespace-only / all-comma PUSH_REQUIRED_CONTEXTS parses to []
|
||||
and must fail closed — NOT return [] (which would vacuously green main)."""
|
||||
monkeypatch.setattr(mq, "PUSH_REQUIRED_CONTEXTS_RAW", raw)
|
||||
with pytest.raises(mq.PushRequiredContextsUnavailable):
|
||||
mq.push_required_contexts()
|
||||
|
||||
|
||||
def test_push_required_contexts_returns_configured_set():
|
||||
"""Regression guard: a normal configured value still parses to the set
|
||||
(the fail-closed raise must not break the happy path)."""
|
||||
# The module default is non-empty; assert it parses to a non-empty list.
|
||||
assert mq.push_required_contexts() == ["CI / all-required (push)"]
|
||||
|
||||
|
||||
def test_process_once_holds_tick_when_push_required_contexts_empty(monkeypatch):
|
||||
"""internal#3210 HIGH integration: with PUSH_REQUIRED_CONTEXTS empty, the
|
||||
main-green check in process_once would VACUOUSLY pass (the backstop that
|
||||
pauses the queue on a red main disappears). push_required_contexts() raises
|
||||
PushRequiredContextsUnavailable BEFORE the candidate loop; process_once lets
|
||||
it propagate so main() surfaces rc 1 — and crucially NO PR is merged.
|
||||
|
||||
Main is wired ALL-RED here: the bug let the queue keep merging onto a red
|
||||
main; the fix must HOLD instead."""
|
||||
merged = {"called": False}
|
||||
monkeypatch.setattr(mq, "WATCH_BRANCH", "main")
|
||||
monkeypatch.setattr(mq, "PUSH_REQUIRED_CONTEXTS_RAW", "") # parses to []
|
||||
monkeypatch.setattr(mq, "get_branch_protection", lambda branch: mq.BranchProtection(
|
||||
required_contexts=["CI / all-required (pull_request)"],
|
||||
required_approvals=2,
|
||||
block_on_rejected_reviews=True,
|
||||
))
|
||||
main_sha = "b" * 40
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
# Main is RED — the exact state the old vacuous pass would have ignored.
|
||||
monkeypatch.setattr(mq, "get_combined_status", lambda sha: {
|
||||
"state": "failure",
|
||||
"statuses": [{"context": "CI / all-required (push)", "status": "failure"}],
|
||||
})
|
||||
monkeypatch.setattr(mq, "merge_pull", lambda *a, **k: merged.__setitem__("called", True))
|
||||
|
||||
with pytest.raises(mq.PushRequiredContextsUnavailable):
|
||||
mq.process_once(dry_run=False)
|
||||
assert merged["called"] is False
|
||||
|
||||
|
||||
def test_enforced_file_contexts_green_event_insensitive_match():
|
||||
latest = mq.latest_statuses_by_context([
|
||||
{"context": "E2E Staging SaaS (full lifecycle) / X (pull_request)", "status": "success"},
|
||||
|
||||
Reference in New Issue
Block a user