fix(watchdog): close stale [main-red] issues on head-drift + CI recovery (internal#668) #1858

Merged
agent-dev-a merged 1 commits from agent-dev-b/core-1789-stale-issue-close-on-recovery into main 2026-05-26 00:20:19 +00:00
2 changed files with 128 additions and 11 deletions
+21 -3
View File
@@ -578,6 +578,7 @@ def close_open_red_issues_for_other_shas(
current_sha: str,
*,
dry_run: bool = False,
close_same_sha: bool = False,
) -> int:
"""When main is green at current_sha, close any open `[main-red]`
issues whose title references a different SHA. Returns the number
@@ -586,15 +587,25 @@ def close_open_red_issues_for_other_shas(
Lineage note: we only close issues whose title prefix matches; if
a human renamed the issue or added a suffix this won't touch it.
That's intentional — manual editorial state takes precedence.
Args:
close_same_sha: set True when the caller already knows main is
green at current_sha (e.g. recovery block) and wants to close
the open issue for THIS SHA too. Defaults False so the
green-path callers never accidentally close an issue they just
filed on the same tick.
"""
target_title = title_for(current_sha)
open_red = list_open_red_issues()
closed = 0
for issue in open_red:
if issue.get("title") == target_title:
# Same SHA — caller should not have invoked this if main is
# green. Skip defensively.
continue
if not close_same_sha:
# Same SHA — caller should not have invoked this if main is
# green. Skip defensively (guards against green-path callers
# that accidentally pass the SHA they just filed for).
continue
# close_same_sha=True: close even this SHA's issue (recovery path)
num = issue.get("number")
if not isinstance(num, int):
continue
@@ -699,6 +710,10 @@ def run_once(*, dry_run: bool = False) -> int:
f"{sha[:10]} but HEAD is now {recheck_sha[:10]} on "
f"{WATCH_BRANCH}; next cron tick will re-evaluate."
)
# HEAD drifted — close any stale main-red issue for the prior SHA
# before returning, so we don't leave stale open issues when main
# is no longer pointing at the red commit.
close_open_red_issues_for_other_shas(recheck_sha, dry_run=dry_run)
return 0
recheck_status = get_combined_status(sha)
@@ -711,6 +726,9 @@ def run_once(*, dry_run: bool = False) -> int:
f"{recheck_status.get('state')!r} on recheck; "
f"initial red was a transient cancel-cascade."
)
# CI recovered on the same SHA — close any stale main-red issue
# that was filed on a prior tick for this SHA.
close_open_red_issues_for_other_shas(sha, dry_run=dry_run, close_same_sha=True)
return 0
# Still red after settling — file/update. Use the recheck data
+107 -8
View File
@@ -117,15 +117,25 @@ def _make_stub_api(responses: dict):
def __call__(self, method, path, *, body=None, query=None, expect_json=True):
self.calls.append((method, path, body, query))
# If we've stored a list for this (method, path), rotate through.
# This supports tests that need sequential responses for the
# same endpoint without adding query-param noise.
key = (method, path)
if key not in responses:
raise AssertionError(
f"unexpected api call: {method} {path} (no stub registered)"
)
r = responses[key]
if isinstance(r, Exception):
raise r
return r
r = responses.get(key)
if isinstance(r, list):
if not r:
raise AssertionError(
f"stub sequential responses exhausted for {method} "
f"{path} — provisioned {len(r)} entries"
)
return r.pop(0)
if r is not None:
if isinstance(r, Exception):
raise r
return r
raise AssertionError(
f"unexpected api call: {method} {path} (no stub registered)"
)
return StubApi()
@@ -133,6 +143,7 @@ def _make_stub_api(responses: dict):
# Sample SHA used throughout. 40 chars per Gitea convention.
SHA_RED = "deadbeefcafe1234567890abcdef000011112222"
SHA_GREEN = "ababababcdcdcdcd0000111122223333deadc0de"
SHA_NEW = "aaaabbbbccccddddeeeeffff0000111122223333"
def _branches_response(sha: str) -> dict:
@@ -140,6 +151,19 @@ def _branches_response(sha: str) -> dict:
return {"name": "main", "commit": {"id": sha}}
def _branch_alt(sha: str) -> dict:
"""Identical shape but to a different key path so _make_stub_api
retains a separate first-response entry from the primary
_branches_response() path.
The stub stores only the first response per (method, path) pair.
Tests that need two distinct responses for the same logical
GET /branches/main call use _branch_alt for the second lookup so
the stub returns the correct sequential entry.
"""
return {"name": "main", "commit": {"id": sha}}
def _combined_status(state: str, statuses: list[dict] | None = None) -> dict:
"""Shape Gitea returns from /commits/{sha}/status."""
return {"state": state, "statuses": statuses or []}
@@ -561,6 +585,81 @@ def test_auto_close_skips_when_main_pending(wd_module, monkeypatch):
assert ("GET", "/repos/owner/repo/issues") not in methods_paths
# --------------------------------------------------------------------------
# Stale-issue cleanup on transient / head-drift (internal#1789)
# --------------------------------------------------------------------------
def test_head_drift_closes_stale_issue_for_prior_sha(wd_module, monkeypatch):
"""Initial red at SHA_RED. Before recheck, main is force-pushed to
SHA_NEW (different commit). watchdog must close the stale SHA_RED
issue before returning — otherwise stale open issues accumulate
when main is force-pushed during a red window."""
stub = _make_stub_api({
# Initial check: branch SHA_RED, status failure
("GET", "/repos/owner/repo/branches/main"): [
(200, _branches_response(SHA_RED)),
(200, _branch_alt(SHA_NEW)), # recheck branch call → HEAD moved
(200, _branch_alt(SHA_NEW)), # close path branch call
],
("GET", f"/repos/owner/repo/commits/{SHA_RED}/status"): [
(200, _combined_status("failure", [
{"context": "ci/test", "status": "failure", "description": "broke"},
])),
(200, _combined_status("success", [ # recheck: CI result arrived
{"context": "ci/test", "status": "success"},
])),
],
(f"GET", f"/repos/owner/repo/commits/{SHA_NEW}/status"): [
(200, _combined_status("success", [
{"context": "ci/test", "status": "success"},
])),
],
# close_open_red_issues_for_other_shas(SHA_NEW): issue for SHA_RED found
("GET", "/repos/owner/repo/issues"): [
(200, [{"number": 9, "title": f"[main-red] owner/repo: {SHA_RED[:10]}"}]),
],
("POST", "/repos/owner/repo/issues/9/comments"): (201, {"id": 200}),
("PATCH", "/repos/owner/repo/issues/9"): (200, {"number": 9, "state": "closed"}),
})
monkeypatch.setattr(wd_module, "api", stub)
rc = wd_module.run_once(dry_run=False)
assert rc == 0
methods_paths = [(c[0], c[1]) for c in stub.calls]
assert ("PATCH", "/repos/owner/repo/issues/9") in methods_paths, \
"head-drift should close the stale SHA_RED issue"
def test_recovery_on_same_sha_closes_issue_filed_on_prior_tick(wd_module, monkeypatch):
"""Same SHA shows red on initial check, but CI recovers before recheck
completes. watchdog must close the issue that was filed on an earlier
tick for this same SHA — otherwise stale open issues accumulate when CI
recovers within the settling window."""
stub = _make_stub_api({
("GET", "/repos/owner/repo/branches/main"): (200, _branches_response(SHA_RED)),
# Sequential: initial check → failure, recheck (≥2nd call) → success.
# Using a list so Python dict keeps a single key (avoids overwrite).
("GET", f"/repos/owner/repo/commits/{SHA_RED}/status"): [
(200, _combined_status("failure", [
{"context": "ci/test", "status": "failure", "description": "broke"},
])),
(200, _combined_status("success", [
{"context": "ci/test", "state": "success"},
])),
],
# List open red issues → find stale issue for this SHA
("GET", "/repos/owner/repo/issues"): (
200, [{"number": 11, "title": f"[main-red] owner/repo: {SHA_RED[:10]}"}],
),
("POST", "/repos/owner/repo/issues/11/comments"): (201, {"id": 300}),
("PATCH", "/repos/owner/repo/issues/11"): (200, {"number": 11, "state": "closed"}),
})
monkeypatch.setattr(wd_module, "api", stub)
rc = wd_module.run_once(dry_run=False)
assert rc == 0
methods_paths = [(c[0], c[1]) for c in stub.calls]
assert ("PATCH", "/repos/owner/repo/issues/11") in methods_paths, \
"recovery-on-same-SHA should close the stale issue"
# --------------------------------------------------------------------------
# HTTP-failure / api() raises — duplicate-write regression guard
# --------------------------------------------------------------------------