fix(ci): retry status reaper api timeouts #890
@ -58,9 +58,10 @@ What this script does, per `.gitea/workflows/status-reaper.yml` invocation:
|
||||
even if another tick happens before the runner finishes.
|
||||
|
||||
What it does NOT do:
|
||||
- Touch any context NOT ending in ` (push)`. The required-checks on
|
||||
main (verified 2026-05-11) all have ` (pull_request)` suffixes;
|
||||
they CANNOT be reached by this code path.
|
||||
- Touch ` (pull_request)` contexts unless the exact same
|
||||
workflow/job has a successful ` (push)` context on the same
|
||||
default-branch SHA. That case is post-merge status pollution, not
|
||||
an unproven PR gate.
|
||||
- Compensate `error`/`pending` states. Only `failure` — the only one
|
||||
Gitea emits for the hardcoded-suffix bug.
|
||||
- Write to non-default branches. WATCH_BRANCH is sourced from
|
||||
@ -91,7 +92,9 @@ from __future__ import annotations
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
@ -118,19 +121,28 @@ WORKFLOWS_DIR = _env("WORKFLOWS_DIR", default=".gitea/workflows")
|
||||
|
||||
OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
|
||||
API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
|
||||
API_TIMEOUT_SEC = int(_env("STATUS_REAPER_API_TIMEOUT_SEC", default="30") or "30")
|
||||
API_RETRIES = int(_env("STATUS_REAPER_API_RETRIES", default="3") or "3")
|
||||
API_RETRY_SLEEP_SEC = float(_env("STATUS_REAPER_API_RETRY_SLEEP_SEC", default="2") or "2")
|
||||
|
||||
# Compensating-status description prefix. Used as the marker so a human
|
||||
# auditing commit statuses can tell at a glance that the green was
|
||||
# synthetic, not a real CI pass. Kept stable; downstream tooling
|
||||
# (e.g. main-red-watchdog visual diff) MAY key on it.
|
||||
COMPENSATION_DESCRIPTION = (
|
||||
PUSH_COMPENSATION_DESCRIPTION = (
|
||||
"Compensated by status-reaper (workflow has no push: trigger; "
|
||||
"Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)"
|
||||
)
|
||||
PR_SHADOW_COMPENSATION_DESCRIPTION = (
|
||||
"Compensated by status-reaper (default-branch pull_request status "
|
||||
"shadowed by successful push status on same SHA; see "
|
||||
".gitea/scripts/status-reaper.py)"
|
||||
)
|
||||
|
||||
# Context suffix the reaper acts on. Gitea hardcodes this for ALL
|
||||
# default-branch workflow runs.
|
||||
PUSH_SUFFIX = " (push)"
|
||||
PULL_REQUEST_SUFFIX = " (pull_request)"
|
||||
|
||||
|
||||
def _require_runtime_env() -> None:
|
||||
@ -182,13 +194,27 @@ def api(
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
headers["Content-Type"] = "application/json"
|
||||
req = urllib.request.Request(url, method=method, data=data, headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
raw = resp.read()
|
||||
status = resp.status
|
||||
except urllib.error.HTTPError as e:
|
||||
raw = e.read()
|
||||
status = e.code
|
||||
attempts = max(API_RETRIES, 1)
|
||||
for attempt in range(1, attempts + 1):
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=API_TIMEOUT_SEC) as resp:
|
||||
raw = resp.read()
|
||||
status = resp.status
|
||||
break
|
||||
except urllib.error.HTTPError as e:
|
||||
raw = e.read()
|
||||
status = e.code
|
||||
break
|
||||
except (TimeoutError, socket.timeout, urllib.error.URLError, OSError) as e:
|
||||
if attempt >= attempts:
|
||||
raise ApiError(
|
||||
f"{method} {path} failed after {attempts} attempts: {e}"
|
||||
) from e
|
||||
print(
|
||||
f"::warning::{method} {path} transient API error "
|
||||
f"(attempt {attempt}/{attempts}): {e}; retrying"
|
||||
)
|
||||
time.sleep(API_RETRY_SLEEP_SEC)
|
||||
|
||||
if not (200 <= status < 300):
|
||||
snippet = raw[:500].decode("utf-8", errors="replace") if raw else ""
|
||||
@ -357,24 +383,38 @@ def get_combined_status(sha: str) -> dict:
|
||||
# --------------------------------------------------------------------------
|
||||
# Context parsing
|
||||
# --------------------------------------------------------------------------
|
||||
def parse_push_context(context: str) -> tuple[str, str] | None:
|
||||
"""Parse `<workflow_name> / <job_name> (push)` into
|
||||
def parse_suffixed_context(context: str, suffix: str) -> tuple[str, str] | None:
|
||||
"""Parse `<workflow_name> / <job_name> (<event>)` into
|
||||
(workflow_name, job_name).
|
||||
|
||||
Returns None if the context doesn't match the shape (caller skips).
|
||||
Strict: requires the trailing ` (push)` and at least one ` / `
|
||||
Strict: requires the trailing suffix and at least one ` / `
|
||||
separator. Anything else is left alone.
|
||||
"""
|
||||
if not context.endswith(PUSH_SUFFIX):
|
||||
if not context.endswith(suffix):
|
||||
return None
|
||||
head = context[: -len(PUSH_SUFFIX)] # strip " (push)"
|
||||
head = context[: -len(suffix)]
|
||||
if " / " not in head:
|
||||
# No workflow/job separator — not the bug shape we compensate.
|
||||
return None
|
||||
workflow_name, job_name = head.split(" / ", 1)
|
||||
return workflow_name, job_name
|
||||
|
||||
|
||||
def parse_push_context(context: str) -> tuple[str, str] | None:
|
||||
"""Parse `<workflow_name> / <job_name> (push)` into
|
||||
(workflow_name, job_name)."""
|
||||
return parse_suffixed_context(context, PUSH_SUFFIX)
|
||||
|
||||
|
||||
def push_equivalent_context(context: str) -> str | None:
|
||||
"""Return the matching `(push)` context for a `(pull_request)` context."""
|
||||
parsed = parse_suffixed_context(context, PULL_REQUEST_SUFFIX)
|
||||
if parsed is None:
|
||||
return None
|
||||
workflow_name, job_name = parsed
|
||||
return f"{workflow_name} / {job_name}{PUSH_SUFFIX}"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Compensating POST
|
||||
# --------------------------------------------------------------------------
|
||||
@ -383,6 +423,7 @@ def post_compensating_status(
|
||||
context: str,
|
||||
target_url: str | None,
|
||||
*,
|
||||
description: str = PUSH_COMPENSATION_DESCRIPTION,
|
||||
dry_run: bool = False,
|
||||
) -> None:
|
||||
"""POST a `state=success` to /repos/{o}/{r}/statuses/{sha} with the
|
||||
@ -394,7 +435,7 @@ def post_compensating_status(
|
||||
payload: dict[str, Any] = {
|
||||
"context": context,
|
||||
"state": "success",
|
||||
"description": COMPENSATION_DESCRIPTION,
|
||||
"description": description,
|
||||
}
|
||||
# Echo the original target_url when present so a human auditing
|
||||
# the (now-green) compensated status can still reach the run logs
|
||||
@ -431,7 +472,8 @@ def reap(
|
||||
Returns counters for observability:
|
||||
{compensated, preserved_real_push, preserved_unknown,
|
||||
preserved_non_failure, preserved_non_push_suffix,
|
||||
preserved_unparseable,
|
||||
preserved_unparseable, compensated_pr_shadowed_by_push_success,
|
||||
preserved_pr_without_push_success,
|
||||
compensated_contexts: [<context>, ...]}
|
||||
|
||||
`compensated_contexts` is rev2-added so `reap_branch` can build
|
||||
@ -444,10 +486,17 @@ def reap(
|
||||
"preserved_non_failure": 0,
|
||||
"preserved_non_push_suffix": 0,
|
||||
"preserved_unparseable": 0,
|
||||
"compensated_pr_shadowed_by_push_success": 0,
|
||||
"preserved_pr_without_push_success": 0,
|
||||
"compensated_contexts": [],
|
||||
}
|
||||
|
||||
statuses = combined.get("statuses") or []
|
||||
successful_contexts = {
|
||||
(s.get("context") or "")
|
||||
for s in statuses
|
||||
if isinstance(s, dict) and (s.get("status") or s.get("state") or "") == "success"
|
||||
}
|
||||
for s in statuses:
|
||||
if not isinstance(s, dict):
|
||||
continue
|
||||
@ -471,9 +520,31 @@ def reap(
|
||||
counters["preserved_non_failure"] += 1
|
||||
continue
|
||||
|
||||
# Default-branch `pull_request` contexts can be stale shadows of
|
||||
# the exact same workflow/job already proven by the successful
|
||||
# `push` context on the same SHA. Compensate only that narrow
|
||||
# shape; a missing or failed push equivalent remains a real gate
|
||||
# signal and is preserved.
|
||||
push_equivalent = push_equivalent_context(context)
|
||||
if push_equivalent is not None:
|
||||
if push_equivalent in successful_contexts:
|
||||
post_compensating_status(
|
||||
sha,
|
||||
context,
|
||||
s.get("target_url"),
|
||||
description=PR_SHADOW_COMPENSATION_DESCRIPTION,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
counters["compensated"] += 1
|
||||
counters["compensated_pr_shadowed_by_push_success"] += 1
|
||||
counters["compensated_contexts"].append(context)
|
||||
else:
|
||||
counters["preserved_pr_without_push_success"] += 1
|
||||
continue
|
||||
|
||||
# Only `(push)`-suffix contexts hit the hardcoded-suffix bug.
|
||||
# Branch-protection required checks (e.g. `Secret scan / Scan
|
||||
# diff (pull_request)`) are NOT reachable from this path.
|
||||
# Other failed contexts are preserved unless handled by the
|
||||
# pull-request-shadow rule above.
|
||||
if not context.endswith(PUSH_SUFFIX):
|
||||
counters["preserved_non_push_suffix"] += 1
|
||||
continue
|
||||
@ -595,6 +666,8 @@ def reap_branch(
|
||||
"preserved_non_failure": 0,
|
||||
"preserved_non_push_suffix": 0,
|
||||
"preserved_unparseable": 0,
|
||||
"compensated_pr_shadowed_by_push_success": 0,
|
||||
"preserved_pr_without_push_success": 0,
|
||||
"compensated_per_sha": {},
|
||||
}
|
||||
|
||||
@ -632,6 +705,8 @@ def reap_branch(
|
||||
"preserved_non_failure",
|
||||
"preserved_non_push_suffix",
|
||||
"preserved_unparseable",
|
||||
"compensated_pr_shadowed_by_push_success",
|
||||
"preserved_pr_without_push_success",
|
||||
):
|
||||
aggregate[key] += per_sha[key]
|
||||
|
||||
|
||||
169
.gitea/scripts/tests/test_status_reaper_api.py
Normal file
169
.gitea/scripts/tests/test_status_reaper_api.py
Normal file
@ -0,0 +1,169 @@
|
||||
import importlib.util
|
||||
import json
|
||||
import pathlib
|
||||
import urllib.error
|
||||
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
SCRIPT = ROOT / "status-reaper.py"
|
||||
|
||||
|
||||
def load_reaper():
|
||||
spec = importlib.util.spec_from_file_location("status_reaper", SCRIPT)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(mod)
|
||||
mod.API = "https://git.example.test/api/v1"
|
||||
mod.GITEA_TOKEN = "test-token"
|
||||
mod.API_TIMEOUT_SEC = 1
|
||||
mod.API_RETRIES = 3
|
||||
mod.API_RETRY_SLEEP_SEC = 0
|
||||
return mod
|
||||
|
||||
|
||||
class FakeResponse:
|
||||
status = 200
|
||||
|
||||
def __init__(self, payload):
|
||||
self.payload = payload
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def read(self):
|
||||
return json.dumps(self.payload).encode("utf-8")
|
||||
|
||||
|
||||
def test_api_retries_transient_timeout(monkeypatch):
|
||||
mod = load_reaper()
|
||||
calls = {"n": 0}
|
||||
|
||||
def fake_urlopen(req, timeout):
|
||||
calls["n"] += 1
|
||||
if calls["n"] == 1:
|
||||
raise TimeoutError("simulated slow Gitea API")
|
||||
return FakeResponse({"ok": True})
|
||||
|
||||
monkeypatch.setattr(mod.urllib.request, "urlopen", fake_urlopen)
|
||||
|
||||
status, body = mod.api("GET", "/repos/o/r/commits")
|
||||
|
||||
assert status == 200
|
||||
assert body == {"ok": True}
|
||||
assert calls["n"] == 2
|
||||
|
||||
|
||||
def test_api_raises_after_retry_budget(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
def fake_urlopen(req, timeout):
|
||||
raise urllib.error.URLError("connection reset")
|
||||
|
||||
monkeypatch.setattr(mod.urllib.request, "urlopen", fake_urlopen)
|
||||
|
||||
try:
|
||||
mod.api("GET", "/repos/o/r/commits")
|
||||
except mod.ApiError as exc:
|
||||
assert "failed after 3 attempts" in str(exc)
|
||||
else:
|
||||
raise AssertionError("expected ApiError")
|
||||
|
||||
|
||||
def test_reap_compensates_failed_pr_context_when_push_equivalent_passed(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
|
||||
def fake_post(sha, context, target_url, *, description="", dry_run=False):
|
||||
posted.append((sha, context, target_url, description, dry_run))
|
||||
|
||||
monkeypatch.setattr(mod, "post_compensating_status", fake_post)
|
||||
|
||||
counters = mod.reap(
|
||||
{"CI": True, "Handlers Postgres Integration": True},
|
||||
{
|
||||
"statuses": [
|
||||
{
|
||||
"context": "CI / Platform (Go) (pull_request)",
|
||||
"status": "failure",
|
||||
"target_url": "https://git.example.test/ci-pr",
|
||||
},
|
||||
{
|
||||
"context": "CI / Platform (Go) (push)",
|
||||
"status": "success",
|
||||
},
|
||||
{
|
||||
"context": (
|
||||
"Handlers Postgres Integration / "
|
||||
"Handlers Postgres Integration (pull_request)"
|
||||
),
|
||||
"status": "failure",
|
||||
"target_url": "https://git.example.test/handlers-pr",
|
||||
},
|
||||
{
|
||||
"context": (
|
||||
"Handlers Postgres Integration / "
|
||||
"Handlers Postgres Integration (push)"
|
||||
),
|
||||
"status": "success",
|
||||
},
|
||||
],
|
||||
},
|
||||
"db3b7a93e31adc0cb072a6d177d92dd73275a191",
|
||||
)
|
||||
|
||||
assert counters["compensated_pr_shadowed_by_push_success"] == 2
|
||||
assert posted == [
|
||||
(
|
||||
"db3b7a93e31adc0cb072a6d177d92dd73275a191",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
"https://git.example.test/ci-pr",
|
||||
mod.PR_SHADOW_COMPENSATION_DESCRIPTION,
|
||||
False,
|
||||
),
|
||||
(
|
||||
"db3b7a93e31adc0cb072a6d177d92dd73275a191",
|
||||
"Handlers Postgres Integration / Handlers Postgres Integration (pull_request)",
|
||||
"https://git.example.test/handlers-pr",
|
||||
mod.PR_SHADOW_COMPENSATION_DESCRIPTION,
|
||||
False,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def test_reap_preserves_failed_pr_context_without_push_success(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"post_compensating_status",
|
||||
lambda sha, context, target_url, *, description="", dry_run=False: posted.append(
|
||||
context
|
||||
),
|
||||
)
|
||||
|
||||
counters = mod.reap(
|
||||
{"CI": True},
|
||||
{
|
||||
"statuses": [
|
||||
{
|
||||
"context": "CI / Platform (Go) (pull_request)",
|
||||
"status": "failure",
|
||||
},
|
||||
{
|
||||
"context": "CI / Platform (Go) (push)",
|
||||
"status": "failure",
|
||||
},
|
||||
{
|
||||
"context": "CI / Shellcheck (pull_request)",
|
||||
"status": "failure",
|
||||
},
|
||||
],
|
||||
},
|
||||
"db3b7a93e31adc0cb072a6d177d92dd73275a191",
|
||||
)
|
||||
|
||||
assert counters["preserved_pr_without_push_success"] == 2
|
||||
assert posted == []
|
||||
@ -84,7 +84,7 @@ permissions:
|
||||
jobs:
|
||||
reap:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 3
|
||||
timeout-minutes: 8
|
||||
steps:
|
||||
- name: Check out repo at default-branch HEAD
|
||||
# BASE checkout per `feedback_pull_request_target_workflow_from_base`.
|
||||
@ -118,4 +118,7 @@ jobs:
|
||||
REPO: ${{ github.repository }}
|
||||
WATCH_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
WORKFLOWS_DIR: .gitea/workflows
|
||||
STATUS_REAPER_API_RETRIES: "4"
|
||||
STATUS_REAPER_API_TIMEOUT_SEC: "20"
|
||||
STATUS_REAPER_API_RETRY_SLEEP_SEC: "2"
|
||||
run: python3 .gitea/scripts/status-reaper.py
|
||||
|
||||
Loading…
Reference in New Issue
Block a user