fix(watchdog): close stale [main-red] issues on head-drift + CI recovery (internal#668) #1858
@@ -578,6 +578,7 @@ def close_open_red_issues_for_other_shas(
|
||||
current_sha: str,
|
||||
*,
|
||||
dry_run: bool = False,
|
||||
close_same_sha: bool = False,
|
||||
) -> int:
|
||||
"""When main is green at current_sha, close any open `[main-red]`
|
||||
issues whose title references a different SHA. Returns the number
|
||||
@@ -586,15 +587,25 @@ def close_open_red_issues_for_other_shas(
|
||||
Lineage note: we only close issues whose title prefix matches; if
|
||||
a human renamed the issue or added a suffix this won't touch it.
|
||||
That's intentional — manual editorial state takes precedence.
|
||||
|
||||
Args:
|
||||
close_same_sha: set True when the caller already knows main is
|
||||
green at current_sha (e.g. recovery block) and wants to close
|
||||
the open issue for THIS SHA too. Defaults False so the
|
||||
green-path callers never accidentally close an issue they just
|
||||
filed on the same tick.
|
||||
"""
|
||||
target_title = title_for(current_sha)
|
||||
open_red = list_open_red_issues()
|
||||
closed = 0
|
||||
for issue in open_red:
|
||||
if issue.get("title") == target_title:
|
||||
# Same SHA — caller should not have invoked this if main is
|
||||
# green. Skip defensively.
|
||||
continue
|
||||
if not close_same_sha:
|
||||
# Same SHA — caller should not have invoked this if main is
|
||||
# green. Skip defensively (guards against green-path callers
|
||||
# that accidentally pass the SHA they just filed for).
|
||||
continue
|
||||
# close_same_sha=True: close even this SHA's issue (recovery path)
|
||||
num = issue.get("number")
|
||||
if not isinstance(num, int):
|
||||
continue
|
||||
@@ -699,6 +710,10 @@ def run_once(*, dry_run: bool = False) -> int:
|
||||
f"{sha[:10]} but HEAD is now {recheck_sha[:10]} on "
|
||||
f"{WATCH_BRANCH}; next cron tick will re-evaluate."
|
||||
)
|
||||
# HEAD drifted — close any stale main-red issue for the prior SHA
|
||||
# before returning, so we don't leave stale open issues when main
|
||||
# is no longer pointing at the red commit.
|
||||
close_open_red_issues_for_other_shas(recheck_sha, dry_run=dry_run)
|
||||
return 0
|
||||
|
||||
recheck_status = get_combined_status(sha)
|
||||
@@ -711,6 +726,9 @@ def run_once(*, dry_run: bool = False) -> int:
|
||||
f"{recheck_status.get('state')!r} on recheck; "
|
||||
f"initial red was a transient cancel-cascade."
|
||||
)
|
||||
# CI recovered on the same SHA — close any stale main-red issue
|
||||
# that was filed on a prior tick for this SHA.
|
||||
close_open_red_issues_for_other_shas(sha, dry_run=dry_run, close_same_sha=True)
|
||||
return 0
|
||||
|
||||
# Still red after settling — file/update. Use the recheck data
|
||||
|
||||
@@ -117,15 +117,25 @@ def _make_stub_api(responses: dict):
|
||||
|
||||
def __call__(self, method, path, *, body=None, query=None, expect_json=True):
|
||||
self.calls.append((method, path, body, query))
|
||||
# If we've stored a list for this (method, path), rotate through.
|
||||
# This supports tests that need sequential responses for the
|
||||
# same endpoint without adding query-param noise.
|
||||
key = (method, path)
|
||||
if key not in responses:
|
||||
raise AssertionError(
|
||||
f"unexpected api call: {method} {path} (no stub registered)"
|
||||
)
|
||||
r = responses[key]
|
||||
if isinstance(r, Exception):
|
||||
raise r
|
||||
return r
|
||||
r = responses.get(key)
|
||||
if isinstance(r, list):
|
||||
if not r:
|
||||
raise AssertionError(
|
||||
f"stub sequential responses exhausted for {method} "
|
||||
f"{path} — provisioned {len(r)} entries"
|
||||
)
|
||||
return r.pop(0)
|
||||
if r is not None:
|
||||
if isinstance(r, Exception):
|
||||
raise r
|
||||
return r
|
||||
raise AssertionError(
|
||||
f"unexpected api call: {method} {path} (no stub registered)"
|
||||
)
|
||||
|
||||
return StubApi()
|
||||
|
||||
@@ -133,6 +143,7 @@ def _make_stub_api(responses: dict):
|
||||
# Sample SHA used throughout. 40 chars per Gitea convention.
|
||||
SHA_RED = "deadbeefcafe1234567890abcdef000011112222"
|
||||
SHA_GREEN = "ababababcdcdcdcd0000111122223333deadc0de"
|
||||
SHA_NEW = "aaaabbbbccccddddeeeeffff0000111122223333"
|
||||
|
||||
|
||||
def _branches_response(sha: str) -> dict:
|
||||
@@ -140,6 +151,19 @@ def _branches_response(sha: str) -> dict:
|
||||
return {"name": "main", "commit": {"id": sha}}
|
||||
|
||||
|
||||
def _branch_alt(sha: str) -> dict:
|
||||
"""Identical shape but to a different key path so _make_stub_api
|
||||
retains a separate first-response entry from the primary
|
||||
_branches_response() path.
|
||||
|
||||
The stub stores only the first response per (method, path) pair.
|
||||
Tests that need two distinct responses for the same logical
|
||||
GET /branches/main call use _branch_alt for the second lookup so
|
||||
the stub returns the correct sequential entry.
|
||||
"""
|
||||
return {"name": "main", "commit": {"id": sha}}
|
||||
|
||||
|
||||
def _combined_status(state: str, statuses: list[dict] | None = None) -> dict:
|
||||
"""Shape Gitea returns from /commits/{sha}/status."""
|
||||
return {"state": state, "statuses": statuses or []}
|
||||
@@ -561,6 +585,81 @@ def test_auto_close_skips_when_main_pending(wd_module, monkeypatch):
|
||||
assert ("GET", "/repos/owner/repo/issues") not in methods_paths
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Stale-issue cleanup on transient / head-drift (internal#1789)
|
||||
# --------------------------------------------------------------------------
|
||||
def test_head_drift_closes_stale_issue_for_prior_sha(wd_module, monkeypatch):
|
||||
"""Initial red at SHA_RED. Before recheck, main is force-pushed to
|
||||
SHA_NEW (different commit). watchdog must close the stale SHA_RED
|
||||
issue before returning — otherwise stale open issues accumulate
|
||||
when main is force-pushed during a red window."""
|
||||
stub = _make_stub_api({
|
||||
# Initial check: branch SHA_RED, status failure
|
||||
("GET", "/repos/owner/repo/branches/main"): [
|
||||
(200, _branches_response(SHA_RED)),
|
||||
(200, _branch_alt(SHA_NEW)), # recheck branch call → HEAD moved
|
||||
(200, _branch_alt(SHA_NEW)), # close path branch call
|
||||
],
|
||||
("GET", f"/repos/owner/repo/commits/{SHA_RED}/status"): [
|
||||
(200, _combined_status("failure", [
|
||||
{"context": "ci/test", "status": "failure", "description": "broke"},
|
||||
])),
|
||||
(200, _combined_status("success", [ # recheck: CI result arrived
|
||||
{"context": "ci/test", "status": "success"},
|
||||
])),
|
||||
],
|
||||
(f"GET", f"/repos/owner/repo/commits/{SHA_NEW}/status"): [
|
||||
(200, _combined_status("success", [
|
||||
{"context": "ci/test", "status": "success"},
|
||||
])),
|
||||
],
|
||||
# close_open_red_issues_for_other_shas(SHA_NEW): issue for SHA_RED found
|
||||
("GET", "/repos/owner/repo/issues"): [
|
||||
(200, [{"number": 9, "title": f"[main-red] owner/repo: {SHA_RED[:10]}"}]),
|
||||
],
|
||||
("POST", "/repos/owner/repo/issues/9/comments"): (201, {"id": 200}),
|
||||
("PATCH", "/repos/owner/repo/issues/9"): (200, {"number": 9, "state": "closed"}),
|
||||
})
|
||||
monkeypatch.setattr(wd_module, "api", stub)
|
||||
rc = wd_module.run_once(dry_run=False)
|
||||
assert rc == 0
|
||||
methods_paths = [(c[0], c[1]) for c in stub.calls]
|
||||
assert ("PATCH", "/repos/owner/repo/issues/9") in methods_paths, \
|
||||
"head-drift should close the stale SHA_RED issue"
|
||||
|
||||
|
||||
def test_recovery_on_same_sha_closes_issue_filed_on_prior_tick(wd_module, monkeypatch):
|
||||
"""Same SHA shows red on initial check, but CI recovers before recheck
|
||||
completes. watchdog must close the issue that was filed on an earlier
|
||||
tick for this same SHA — otherwise stale open issues accumulate when CI
|
||||
recovers within the settling window."""
|
||||
stub = _make_stub_api({
|
||||
("GET", "/repos/owner/repo/branches/main"): (200, _branches_response(SHA_RED)),
|
||||
# Sequential: initial check → failure, recheck (≥2nd call) → success.
|
||||
# Using a list so Python dict keeps a single key (avoids overwrite).
|
||||
("GET", f"/repos/owner/repo/commits/{SHA_RED}/status"): [
|
||||
(200, _combined_status("failure", [
|
||||
{"context": "ci/test", "status": "failure", "description": "broke"},
|
||||
])),
|
||||
(200, _combined_status("success", [
|
||||
{"context": "ci/test", "state": "success"},
|
||||
])),
|
||||
],
|
||||
# List open red issues → find stale issue for this SHA
|
||||
("GET", "/repos/owner/repo/issues"): (
|
||||
200, [{"number": 11, "title": f"[main-red] owner/repo: {SHA_RED[:10]}"}],
|
||||
),
|
||||
("POST", "/repos/owner/repo/issues/11/comments"): (201, {"id": 300}),
|
||||
("PATCH", "/repos/owner/repo/issues/11"): (200, {"number": 11, "state": "closed"}),
|
||||
})
|
||||
monkeypatch.setattr(wd_module, "api", stub)
|
||||
rc = wd_module.run_once(dry_run=False)
|
||||
assert rc == 0
|
||||
methods_paths = [(c[0], c[1]) for c in stub.calls]
|
||||
assert ("PATCH", "/repos/owner/repo/issues/11") in methods_paths, \
|
||||
"recovery-on-same-SHA should close the stale issue"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# HTTP-failure / api() raises — duplicate-write regression guard
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user