From 29d15cbe2c4236abbe9fb3fd83c96776dc481ccb Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 6 Jun 2026 14:23:08 +0000 Subject: [PATCH] =?UTF-8?q?fix(merge-queue):=20paginate=20Gitea=20API=20li?= =?UTF-8?q?st=20calls=20=E2=80=94=20issues,=20statuses=20(#2366/#588)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The merge queue used hard-coded limit=50 on list endpoints, silently truncating enumeration when more than 50 open PRs or status checks exist. This meant newer PRs could be invisible to the queue, and PRs with >50 status contexts would have incomplete check evaluation. Changes: - Add api_paginated() helper that loops through pages until a partial page is returned (indicating end of collection). - list_queued_issues() and list_candidate_issues() now use pagination to enumerate ALL open PRs, not just the first 50. - get_combined_status() /statuses enrichment now paginates to capture all status checks beyond the 50-entry cap. All 52 gitea-merge-queue tests pass. Refs: molecule-core#2366/#588, PM dispatch 01eaa317. --- .gitea/scripts/gitea-merge-queue.py | 71 +++++++++++-------- .../scripts/tests/test_gitea_merge_queue.py | 57 +++++++++++++++ 2 files changed, 98 insertions(+), 30 deletions(-) diff --git a/.gitea/scripts/gitea-merge-queue.py b/.gitea/scripts/gitea-merge-queue.py index 29b561a60..111efa079 100644 --- a/.gitea/scripts/gitea-merge-queue.py +++ b/.gitea/scripts/gitea-merge-queue.py @@ -268,6 +268,34 @@ def api( return status, {"_raw": raw.decode("utf-8", errors="replace")} +def api_paginated( + method: str, + path: str, + *, + query: dict[str, str] | None = None, + page_size: int = 50, +) -> list[dict]: + """Fetch all pages of a paginated Gitea list endpoint. + + Gitea paginates with `page` (1-indexed) and `limit`. We loop until a + page returns fewer than `page_size` items, indicating the end. + """ + results: list[dict] = [] + page = 1 + while True: + page_query = dict(query or {}) + page_query["page"] = str(page) + page_query["limit"] = str(page_size) + _, body = api(method, path, query=page_query) + if not isinstance(body, list): + raise ApiError(f"{path} paginated response not list") + results.extend(body) + if len(body) < page_size: + break + page += 1 + return results + + def required_contexts(raw: str) -> list[str]: return [part.strip() for part in raw.split(",") if part.strip()] @@ -659,32 +687,23 @@ def get_combined_status(sha: str) -> dict: """Combined status + all individual statuses for `sha`. The /status endpoint caps the `statuses` array at 30 entries (Gitea - default page size), so we fetch the full list via /statuses with a - higher limit. The combined `state` still comes from /status. + default page size), so we fetch the full list via /statuses. The combined + `state` still comes from /status. - Fail-closed: the PRIMARY /status fetch must succeed. If it raises, the - error propagates so the caller skips this PR this tick (we never treat a - failed status fetch as green — dev-sop "no fail-open"). Only the SECONDARY - /statuses enrichment (which merely extends the per-context list beyond the - 30-entry cap) is best-effort; if it fails we still have the combined set. + Fail-closed: BOTH the PRIMARY /status fetch AND the SECONDARY /statuses + enrichment must succeed. If either raises, the error propagates so the + caller skips this PR this tick (we never treat a failed status fetch as + green — dev-sop "no fail-open"). A paginated /statuses error must NOT + silently degrade to an incomplete status set. """ _, combined = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status") if not isinstance(combined, dict): raise ApiError(f"status for {sha} response not object") combined_statuses: list[dict] = combined.get("statuses") or [] - try: - _, all_statuses_raw = api( - "GET", - f"/repos/{OWNER}/{NAME}/commits/{sha}/statuses", - query={"limit": "50"}, - ) - if isinstance(all_statuses_raw, list): - all_statuses: list[dict] = list(all_statuses_raw) - else: - all_statuses = [] - except (ApiError, urllib.error.URLError, TimeoutError, OSError) as exc: - sys.stderr.write(f"::warning::could not fetch full statuses list for {sha[:8]}: {exc}\n") - all_statuses = [] + all_statuses = api_paginated( + "GET", + f"/repos/{OWNER}/{NAME}/commits/{sha}/statuses", + ) # Build latest per context: process combined (ascending→reverse=newest # first), then fill gaps from all_statuses (already newest-first). latest: dict[str, dict] = {} @@ -701,19 +720,15 @@ def get_combined_status(sha: str) -> dict: def list_queued_issues() -> list[dict]: - _, body = api( + return api_paginated( "GET", f"/repos/{OWNER}/{NAME}/issues", query={ "state": "open", "type": "pulls", "labels": QUEUE_LABEL, - "limit": "50", }, ) - if not isinstance(body, list): - raise ApiError("queued issues response not list") - return body def list_candidate_issues(*, auto_discover: bool) -> list[dict]: @@ -727,18 +742,14 @@ def list_candidate_issues(*, auto_discover: bool) -> list[dict]: """ if not auto_discover: return list_queued_issues() - _, body = api( + return api_paginated( "GET", f"/repos/{OWNER}/{NAME}/issues", query={ "state": "open", "type": "pulls", - "limit": "50", }, ) - if not isinstance(body, list): - raise ApiError("candidate issues response not list") - return body def get_pull(pr_number: int) -> dict: diff --git a/.gitea/scripts/tests/test_gitea_merge_queue.py b/.gitea/scripts/tests/test_gitea_merge_queue.py index 83740b6a5..9783cf48a 100644 --- a/.gitea/scripts/tests/test_gitea_merge_queue.py +++ b/.gitea/scripts/tests/test_gitea_merge_queue.py @@ -2,6 +2,8 @@ import importlib.util import sys from pathlib import Path +import pytest + SCRIPT = Path(__file__).resolve().parents[1] / "gitea-merge-queue.py" spec = importlib.util.spec_from_file_location("gitea_merge_queue", SCRIPT) mq = importlib.util.module_from_spec(spec) @@ -532,6 +534,61 @@ def test_status_fetch_failure_is_fail_closed(monkeypatch): assert merged["called"] is False +# -------------------------------------------------------------------------- +# Pagination: api_paginated loops pages and is fail-closed on page errors +# -------------------------------------------------------------------------- + +def test_api_paginated_loops_pages_until_partial(monkeypatch): + """api_paginated fetches all pages and stops when a page is < page_size.""" + calls = [] + + def fake_api(method, path, *, query=None, **kw): + page = int((query or {}).get("page", "1")) + limit = int((query or {}).get("limit", "50")) + calls.append((page, limit)) + if page == 1: + return 200, [{"number": 1}, {"number": 2}] + if page == 2: + return 200, [{"number": 3}] + return 200, [] + + monkeypatch.setattr(mq, "api", fake_api) + results = mq.api_paginated("GET", "/repos/o/r/issues", page_size=2) + assert len(results) == 3 + assert results[0]["number"] == 1 + assert results[1]["number"] == 2 + assert results[2]["number"] == 3 + assert calls == [(1, 2), (2, 2)] + + +def test_api_paginated_raises_on_non_list(monkeypatch): + """A page that returns a dict instead of list is an error.""" + def fake_api(method, path, *, query=None, **kw): + return 200, {"message": "not found"} + + monkeypatch.setattr(mq, "api", fake_api) + with pytest.raises(mq.ApiError): + mq.api_paginated("GET", "/repos/o/r/issues") + + +def test_get_combined_status_propagates_paginated_statuses_error(monkeypatch): + """If the paginated /statuses enrichment raises, the error propagates + (fail-closed — we do NOT silently fall back to an incomplete status set).""" + monkeypatch.setattr(mq, "OWNER", "o") + monkeypatch.setattr(mq, "NAME", "r") + + def fake_api(method, path, *, query=None, **kw): + if path.endswith("/status"): + return 200, {"state": "success", "statuses": [{"context": "c1", "status": "success", "id": 1}]} + if path.endswith("/statuses"): + raise mq.ApiError("GET /statuses -> HTTP 502") + raise mq.ApiError(f"unexpected {path}") + + monkeypatch.setattr(mq, "api", fake_api) + with pytest.raises(mq.ApiError, match="GET /statuses"): + mq.get_combined_status("a" * 40) + + def test_process_once_holds_tick_when_branch_protection_unavailable(monkeypatch): """BP enumeration failure → HOLD the whole tick (no merge, rc 0).""" merged = {"called": False} -- 2.52.0