Compare commits

...

1 Commits

Author SHA1 Message Date
Molecule AI Dev Engineer A (Kimi) 29d15cbe2c fix(merge-queue): paginate Gitea API list calls — issues, statuses (#2366/#588)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 6s
CI / Python Lint & Test (pull_request) Successful in 15s
CI / Detect changes (pull_request) Successful in 18s
E2E API Smoke Test / detect-changes (pull_request) Successful in 10s
E2E Chat / detect-changes (pull_request) Successful in 10s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 10s
CI / Platform (Go) (pull_request) Successful in 2s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 11s
Lint forbidden tenant-env keys / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 10s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 12s
CI / Canvas (Next.js) (pull_request) Successful in 1s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 3s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 4s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 5s
CI / all-required (pull_request) Successful in 2s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 14s
gate-check-v3 / gate-check (pull_request_target) Failing after 7s
qa-review / approved (pull_request_target) Failing after 5s
security-review / approved (pull_request_target) Failing after 3s
E2E Chat / E2E Chat (pull_request) Successful in 6s
sop-checklist / all-items-acked (pull_request) acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: comprehensive-testing, local-postgres-e2
sop-checklist / review-refire (pull_request_target) Has been skipped
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request_target) Successful in 4s
CI / Canvas Deploy Status (pull_request) Has been skipped
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 2s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2s
sop-tier-check / tier-check (pull_request_target) Failing after 4s
Ops Scripts Tests / Ops scripts (unittest) (pull_request) Successful in 1m23s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m17s
qa-review / approved (pull_request_review) Has been skipped
security-review / approved (pull_request_review) Has been skipped
sop-tier-check / tier-check (pull_request_review) Failing after 5s
audit-force-merge / audit (pull_request_target) Successful in 4s
The merge queue used hard-coded limit=50 on list endpoints, silently
truncating enumeration when more than 50 open PRs or status checks exist.
This meant newer PRs could be invisible to the queue, and PRs with >50
status contexts would have incomplete check evaluation.

Changes:
- Add api_paginated() helper that loops through pages until a partial
  page is returned (indicating end of collection).
- list_queued_issues() and list_candidate_issues() now use pagination
  to enumerate ALL open PRs, not just the first 50.
- get_combined_status() /statuses enrichment now paginates to capture
  all status checks beyond the 50-entry cap.

All 52 gitea-merge-queue tests pass.

Refs: molecule-core#2366/#588, PM dispatch 01eaa317.
2026-06-06 14:40:32 +00:00
2 changed files with 98 additions and 30 deletions
+41 -30
View File
@@ -268,6 +268,34 @@ def api(
return status, {"_raw": raw.decode("utf-8", errors="replace")}
def api_paginated(
method: str,
path: str,
*,
query: dict[str, str] | None = None,
page_size: int = 50,
) -> list[dict]:
"""Fetch all pages of a paginated Gitea list endpoint.
Gitea paginates with `page` (1-indexed) and `limit`. We loop until a
page returns fewer than `page_size` items, indicating the end.
"""
results: list[dict] = []
page = 1
while True:
page_query = dict(query or {})
page_query["page"] = str(page)
page_query["limit"] = str(page_size)
_, body = api(method, path, query=page_query)
if not isinstance(body, list):
raise ApiError(f"{path} paginated response not list")
results.extend(body)
if len(body) < page_size:
break
page += 1
return results
def required_contexts(raw: str) -> list[str]:
return [part.strip() for part in raw.split(",") if part.strip()]
@@ -659,32 +687,23 @@ def get_combined_status(sha: str) -> dict:
"""Combined status + all individual statuses for `sha`.
The /status endpoint caps the `statuses` array at 30 entries (Gitea
default page size), so we fetch the full list via /statuses with a
higher limit. The combined `state` still comes from /status.
default page size), so we fetch the full list via /statuses. The combined
`state` still comes from /status.
Fail-closed: the PRIMARY /status fetch must succeed. If it raises, the
error propagates so the caller skips this PR this tick (we never treat a
failed status fetch as green — dev-sop "no fail-open"). Only the SECONDARY
/statuses enrichment (which merely extends the per-context list beyond the
30-entry cap) is best-effort; if it fails we still have the combined set.
Fail-closed: BOTH the PRIMARY /status fetch AND the SECONDARY /statuses
enrichment must succeed. If either raises, the error propagates so the
caller skips this PR this tick (we never treat a failed status fetch as
green — dev-sop "no fail-open"). A paginated /statuses error must NOT
silently degrade to an incomplete status set.
"""
_, combined = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
if not isinstance(combined, dict):
raise ApiError(f"status for {sha} response not object")
combined_statuses: list[dict] = combined.get("statuses") or []
try:
_, all_statuses_raw = api(
"GET",
f"/repos/{OWNER}/{NAME}/commits/{sha}/statuses",
query={"limit": "50"},
)
if isinstance(all_statuses_raw, list):
all_statuses: list[dict] = list(all_statuses_raw)
else:
all_statuses = []
except (ApiError, urllib.error.URLError, TimeoutError, OSError) as exc:
sys.stderr.write(f"::warning::could not fetch full statuses list for {sha[:8]}: {exc}\n")
all_statuses = []
all_statuses = api_paginated(
"GET",
f"/repos/{OWNER}/{NAME}/commits/{sha}/statuses",
)
# Build latest per context: process combined (ascending→reverse=newest
# first), then fill gaps from all_statuses (already newest-first).
latest: dict[str, dict] = {}
@@ -701,19 +720,15 @@ def get_combined_status(sha: str) -> dict:
def list_queued_issues() -> list[dict]:
_, body = api(
return api_paginated(
"GET",
f"/repos/{OWNER}/{NAME}/issues",
query={
"state": "open",
"type": "pulls",
"labels": QUEUE_LABEL,
"limit": "50",
},
)
if not isinstance(body, list):
raise ApiError("queued issues response not list")
return body
def list_candidate_issues(*, auto_discover: bool) -> list[dict]:
@@ -727,18 +742,14 @@ def list_candidate_issues(*, auto_discover: bool) -> list[dict]:
"""
if not auto_discover:
return list_queued_issues()
_, body = api(
return api_paginated(
"GET",
f"/repos/{OWNER}/{NAME}/issues",
query={
"state": "open",
"type": "pulls",
"limit": "50",
},
)
if not isinstance(body, list):
raise ApiError("candidate issues response not list")
return body
def get_pull(pr_number: int) -> dict:
@@ -2,6 +2,8 @@ import importlib.util
import sys
from pathlib import Path
import pytest
SCRIPT = Path(__file__).resolve().parents[1] / "gitea-merge-queue.py"
spec = importlib.util.spec_from_file_location("gitea_merge_queue", SCRIPT)
mq = importlib.util.module_from_spec(spec)
@@ -532,6 +534,61 @@ def test_status_fetch_failure_is_fail_closed(monkeypatch):
assert merged["called"] is False
# --------------------------------------------------------------------------
# Pagination: api_paginated loops pages and is fail-closed on page errors
# --------------------------------------------------------------------------
def test_api_paginated_loops_pages_until_partial(monkeypatch):
"""api_paginated fetches all pages and stops when a page is < page_size."""
calls = []
def fake_api(method, path, *, query=None, **kw):
page = int((query or {}).get("page", "1"))
limit = int((query or {}).get("limit", "50"))
calls.append((page, limit))
if page == 1:
return 200, [{"number": 1}, {"number": 2}]
if page == 2:
return 200, [{"number": 3}]
return 200, []
monkeypatch.setattr(mq, "api", fake_api)
results = mq.api_paginated("GET", "/repos/o/r/issues", page_size=2)
assert len(results) == 3
assert results[0]["number"] == 1
assert results[1]["number"] == 2
assert results[2]["number"] == 3
assert calls == [(1, 2), (2, 2)]
def test_api_paginated_raises_on_non_list(monkeypatch):
"""A page that returns a dict instead of list is an error."""
def fake_api(method, path, *, query=None, **kw):
return 200, {"message": "not found"}
monkeypatch.setattr(mq, "api", fake_api)
with pytest.raises(mq.ApiError):
mq.api_paginated("GET", "/repos/o/r/issues")
def test_get_combined_status_propagates_paginated_statuses_error(monkeypatch):
"""If the paginated /statuses enrichment raises, the error propagates
(fail-closed — we do NOT silently fall back to an incomplete status set)."""
monkeypatch.setattr(mq, "OWNER", "o")
monkeypatch.setattr(mq, "NAME", "r")
def fake_api(method, path, *, query=None, **kw):
if path.endswith("/status"):
return 200, {"state": "success", "statuses": [{"context": "c1", "status": "success", "id": 1}]}
if path.endswith("/statuses"):
raise mq.ApiError("GET /statuses -> HTTP 502")
raise mq.ApiError(f"unexpected {path}")
monkeypatch.setattr(mq, "api", fake_api)
with pytest.raises(mq.ApiError, match="GET /statuses"):
mq.get_combined_status("a" * 40)
def test_process_once_holds_tick_when_branch_protection_unavailable(monkeypatch):
"""BP enumeration failure → HOLD the whole tick (no merge, rc 0)."""
merged = {"called": False}