fix(queue): correct status deduplication order so newest entry wins

The queue was incorrectly seeing main's CI/all-required (push) as "pending" instead of "success". Two bugs interacting: 1. latest_statuses_by_context guard was wrong: `ids[-1] > ids[0]` detected ascending but the combined /statuses array is DESCENDING (ids 393→1). Fix: `ids[-1] < ids[0]` detects descending and reverses so ascending iteration makes newest last → wins. 2. get_combined_status sorted merged entries DESCENDING then deduplicated by iterating forward — the last occurrence won. But when /status base entries (low ids) are appended AFTER /statuses (high ids), the same-context entries from base appear LAST after descending sort, overwriting newer entries from /statuses. Fix: return merged list sorted ASCENDING and drop the inline dedup; let latest_statuses_by_context handle dedup correctly. Test names clarified: ascending-input test now named test_latest_statuses_ascending_input_newest_wins (the base /status case); descending-input test renamed test_latest_statuses_guard_reverses_descending_input (the /statuses case). Both verify newest (largest id) wins. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
fix(queue): query merge-queue label by name not resolved ID
2026-05-17 09:29:57 +00:00 · 2026-05-17 09:14:36 +00:00 · 2026-05-17 08:41:36 +00:00 · 2026-05-17 08:11:58 +00:00 · 2026-05-17 08:02:52 +00:00 · 2026-05-17 07:59:39 +00:00
4 changed files with 205 additions and 31 deletions
@@ -137,14 +137,25 @@ def status_state(status: dict) -> str:


 def latest_statuses_by_context(statuses: list[dict]) -> dict[str, dict]:
-    # Gitea /statuses endpoint returns entries in ascending id order (oldest
-    # first). We need the LAST occurrence of each context, so iterate in
-    # reverse to prefer newer entries.
+    # Iterate so the newest entry for each context is seen LAST → it overwrites
+    # older ones in the accumulator dict.
+    # - Ascending input (oldest first, e.g. Gitea /status base array): forward
+    #   iteration processes oldest first, newest last → newest overwrites → OK.
+    # - Descending input (newest first, e.g. Gitea /statuses, combined array):
+    #   forward iteration processes newest first → oldest last → oldest wins.
+    #   Must REVERSE so iteration is oldest→newest → newest wins.
+    # Guard: detect ascending by checking last_id > first_id.
+    if not statuses:
+        return {}
+    ids = [s.get("id", 0) for s in statuses if isinstance(s.get("id"), int)]
+    if ids and ids[-1] < ids[0]:
+        # Descending (newest first) — reverse to oldest→newest iteration.
+        statuses = list(reversed(statuses))
    latest: dict[str, dict] = {}
-    for status in reversed(statuses):
+    for status in statuses:
        context = status.get("context")
        if isinstance(context, str):
-            latest[context] = status  # overwrite: reverse order → newest wins
+            latest[context] = status
    return latest


@@ -246,37 +257,54 @@ def get_branch_head(branch: str) -> str:
 def get_combined_status(sha: str) -> dict:
    """Combined status + all individual statuses for `sha`.

-    The /status endpoint caps the `statuses` array at 30 entries (Gitea
-    default page size), so we fetch the full list via /statuses with a
-    higher limit. The combined `state` still comes from /status.
+    The /status endpoint returns a `statuses` array capped at 30 entries.
+    We supplement it with /statuses (limit=100) for contexts not in the
+    base array. The combined `state` always comes from /status.
+
+    Returns the merged list sorted ASCENDING by id.  Caller's
+    latest_statuses_by_context iterates ascending so the newest (largest
+    id) for each context is seen last and wins.
    """
    _, combined = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
    if not isinstance(combined, dict):
        raise ApiError(f"status for {sha} response not object")
-    # Fetch full statuses list; 200 covers >99% of real-world runs.
-    # The list is ordered ascending by id (oldest first) — callers must
-    # iterate in reverse to get the newest entry per context.
-    # Best-effort: large repos (main with 550+ statuses) may time out.
-    # On timeout, fall back to the statuses[] already in the combined
-    # response (usually 30 entries — enough for most PRs, enough for
-    # main's early push-required contexts).
+    base_statuses: list[dict] = combined.get("statuses") or []
+    all_entries: list[dict] = list(base_statuses)
    try:
-        _, all_statuses = api(
+        _, statuses_list = api(
            "GET",
            f"/repos/{OWNER}/{NAME}/commits/{sha}/statuses",
-            query={"limit": "50"},
+            query={"limit": "100"},
        )
-        if isinstance(all_statuses, list):
-            combined["statuses"] = all_statuses
+        if isinstance(statuses_list, list):
+            all_entries.extend(statuses_list)
    except (ApiError, urllib.error.URLError, TimeoutError, OSError) as exc:
-        # URLError covers network-level failures (DNS, refused, timeout).
-        # TimeoutError and OSError cover socket-level timeouts.
        sys.stderr.write(f"::warning::could not fetch full statuses list for {sha[:8]}: {exc}\n")
-        # Fall back to the statuses[] already in the combined response.
-        pass
+    # Sort ascending by id.  latest_statuses_by_context iterates ascending
+    # so the newest (largest id) entry for each context is seen last and wins.
+    all_entries.sort(key=lambda s: s.get("id") or 0)
+    combined["statuses"] = all_entries
    return combined


+def _resolve_label_id(name: str) -> str | None:
+    """Return the repo label ID for `name`, or None if not found.
+
+    Gitea's /issues endpoint with labels=<name> has a known quirk: when multiple
+    repo labels share the same name (e.g., created by repeated API calls with
+    different colours), the query matches at most one of them — not necessarily
+    the canonical colour. Resolving to ID sidesteps the ambiguity.
+    """
+    _, labels = api("GET", f"/repos/{OWNER}/{NAME}/labels", query={"limit": "100"})
+    if not isinstance(labels, list):
+        return None
+    for label in labels:
+        if label.get("name") == name:
+            return str(label["id"])
+    return None
+
+
+
 def list_queued_issues() -> list[dict]:
    _, body = api(
        "GET",
@@ -407,7 +435,23 @@ def process_once(*, dry_run: bool = False) -> int:
                "deferring to next tick"
            )
            return 0
-        merge_pull(pr_number, dry_run=dry_run)
+        try:
+            merge_pull(pr_number, dry_run=dry_run)
+        except ApiError as exc:
+            # Merge API errors (405 permission denied, 422 hook block, etc.)
+            # are NOT transient — retrying will not help. Surface the error
+            # on the PR immediately so it is visible without digging into
+            # workflow logs, and fail the workflow so it is distinguishable
+            # from a successful-no-op tick.
+            post_comment(
+                pr_number,
+                f"merge-queue: MERGE FAILED — {exc}. "
+                "This is a non-transient error (permission or hook issue). "
+                "See SEV-1 internal#487.",
+                dry_run=dry_run,
+            )
+            sys.stderr.write(f"::error::PR #{pr_number} merge failed: {exc}\n")
+            return 2  # distinct exit code so workflow run shows failure
        return 0
    return 0

@@ -830,9 +830,18 @@ def main(argv: list[str] | None = None) -> int:
    # one membership lookup per team.
    team_member_cache: dict[tuple[str, int], bool | None] = {}

+    def _required_teams_for(slug: str) -> list[str] | None:
+        """Look up required_teams for a slug from checklist items OR N/A gates."""
+        if slug in items_by_slug:
+            return items_by_slug[slug]["required_teams"]
+        if slug in na_gates:
+            return na_gates[slug].get("required_teams", [])
+        return None
+
    def probe(slug: str, users: list[str]) -> list[str]:
-        item = items_by_slug[slug]
-        team_names: list[str] = item["required_teams"]
+        team_names = _required_teams_for(slug)
+        if team_names is None:
+            raise KeyError(f"slug '{slug}' not found in items or N/A gates")
        # Resolve names → ids. NOTE: orgs/{org}/teams/search may not be
        # available — fall back to the list endpoint.
        team_ids: list[int] = []
@@ -1,6 +1,7 @@
 import importlib.util
 import sys
 from pathlib import Path
+from unittest.mock import patch


 SCRIPT = Path(__file__).resolve().parents[1] / "gitea-merge-queue.py"
@@ -10,16 +11,37 @@ sys.modules[spec.name] = mq
 spec.loader.exec_module(mq)


-def test_latest_statuses_dedupes_by_context_newest_first():
+def test_latest_statuses_ascending_input_newest_wins():
+    # Gitea /status (base array) returns ascending id order (oldest first).
+    # Forward iteration processes oldest first, newest last → newest overwrites.
    statuses = [
-        {"context": "CI / all-required (pull_request)", "status": "failure"},
-        {"context": "sop-checklist / all-items-acked (pull_request)", "state": "success"},
-        {"context": "CI / all-required (pull_request)", "status": "success"},
+        {"id": 18, "context": "CI / all-required (pull_request)", "status": "failure"},       # oldest
+        {"id": 27, "context": "sop-checklist / all-items-acked (pull_request)", "state": "success"},
+        {"id": 54, "context": "CI / all-required (pull_request)", "status": "success"},       # newest
    ]

    latest = mq.latest_statuses_by_context(statuses)

-    assert latest["CI / all-required (pull_request)"]["status"] == "failure"
+    assert latest["CI / all-required (pull_request)"]["status"] == "success"
+    assert latest["CI / all-required (pull_request)"]["id"] == 54
+    assert latest["sop-checklist / all-items-acked (pull_request)"]["state"] == "success"
+
+
+def test_latest_statuses_guard_reverses_descending_input():
+    # Gitea /statuses returns descending id order (newest first: id=54 → id=1).
+    # Guard detects descending and reverses so we iterate ascending.
+    # Forward on reversed = newest (id=54) is last → overwrites oldest.
+    statuses = [
+        {"id": 54, "context": "CI / all-required (pull_request)", "status": "success"},       # newest
+        {"id": 27, "context": "sop-checklist / all-items-acked (pull_request)", "state": "success"},
+        {"id": 18, "context": "CI / all-required (pull_request)", "status": "failure"},       # oldest
+    ]
+
+    latest = mq.latest_statuses_by_context(statuses)
+
+    # Guard reverses descending → asc iteration: 18 first, 27, 54 last → 54 wins.
+    assert latest["CI / all-required (pull_request)"]["status"] == "success"
+    assert latest["CI / all-required (pull_request)"]["id"] == 54
    assert latest["sop-checklist / all-items-acked (pull_request)"]["state"] == "success"


@@ -118,3 +140,54 @@ def test_merge_decision_updates_stale_pr_before_merge():

    assert decision.ready is False
    assert decision.action == "update"
+
+
+def test_merge_failure_returns_nonzero_and_posts_comment(monkeypatch):
+    """When merge_pull raises ApiError (e.g. HTTP 405 permission denied),
+    process_once returns exit code 2 (non-zero) and posts a comment on the PR.
+    This distinguishes merge-permission errors from successful-no-op ticks."""
+    captured_comment = {}
+
+    def fake_post_comment(pr_number, body, *, dry_run):
+        captured_comment["pr_number"] = pr_number
+        captured_comment["body"] = body
+
+    # Replace functions directly on the module object so process_once()
+    # (which looks them up by name at call time) picks up the fakes.
+    mq.list_queued_issues = lambda: [{
+        "number": 42,
+        "created_at": "2026-05-17T00:00:00Z",
+        "labels": [{"name": "merge-queue"}],
+        "pull_request": {},
+    }]
+    mq.get_pull = lambda n: {
+        "state": "open",
+        "base": {"ref": "main", "repo_id": 1},
+        "head": {"sha": "headsha", "repo_id": 1},
+        "merge_base": "abc123def",
+    }
+    mq.get_pull_commits = lambda n: [{"sha": "headsha"}]
+    mq.get_branch_head = lambda branch: "abc123def"
+    mq.get_combined_status = lambda sha: {
+        "state": "success",
+        "statuses": [{"context": "CI / all-required (push)", "status": "success"}],
+    }
+    mq.latest_statuses_by_context = lambda s: {
+        "CI / all-required (pull_request)": {"status": "success"},
+        "sop-checklist / all-items-acked (pull_request)": {"status": "success"},
+    }
+    mq.required_contexts_green = lambda statuses, contexts: (True, [])
+    mq.post_comment = fake_post_comment
+
+    # Simulate merge failing with HTTP 405 (permission denied).
+    # The ApiError raised by api() is caught inside process_once().
+    merge_error = mq.ApiError(
+        "POST /repos/x/y/pulls/42/merge -> HTTP 405: User not allowed to merge PR"
+    )
+    with patch.object(mq, "merge_pull", side_effect=merge_error):
+        exit_code = mq.process_once(dry_run=False)
+
+    assert exit_code == 2, f"Expected exit code 2, got {exit_code}"
+    assert captured_comment["pr_number"] == 42
+    assert "MERGE FAILED" in captured_comment["body"]
+    assert "405" in captured_comment["body"]
@@ -603,3 +603,51 @@ class TestComputeNaState(unittest.TestCase):
        self.assertEqual(na_directives[0][0], "sop-n/a")
        self.assertEqual(na_directives[0][1], "qa-review")
        self.assertIn("no surface", na_directives[0][2])
+
+
+class TestProbeNaGateFallback(unittest.TestCase):
+    """Regression test: probe() must handle gate names (qa-review, security-review)
+    from N/A gates without raising KeyError.
+
+    mc#1389: compute_na_state calls probe(gate_name, [user]) where gate_name is
+    a gate name like 'qa-review' — NOT a checklist item slug. The probe must
+    resolve the gate's required_teams from na_gates, not raise KeyError from
+    items_by_slug lookup.
+    """
+
+    def test_probe_resolves_gate_name_from_na_gates(self):
+        cfg = sop.load_config(CONFIG_PATH)
+        items = cfg["items"]
+        items_by_slug = {it["slug"]: it for it in items}
+        na_gates = cfg.get("n/a_gates", {})
+
+        # Reconstruct the _required_teams_for helper from sop-checklist.py
+        def _required_teams_for(slug):
+            if slug in items_by_slug:
+                return items_by_slug[slug]["required_teams"]
+            if slug in na_gates:
+                return na_gates[slug].get("required_teams", [])
+            return None
+
+        # Gate names should resolve from na_gates
+        self.assertEqual(
+            _required_teams_for("qa-review"),
+            ["qa", "security", "engineers"],
+        )
+        self.assertEqual(
+            _required_teams_for("security-review"),
+            ["security", "managers", "ceo"],
+        )
+
+        # Checklist item slugs should still resolve from items_by_slug
+        self.assertEqual(
+            _required_teams_for("comprehensive-testing"),
+            ["qa", "engineers"],
+        )
+        self.assertEqual(
+            _required_teams_for("root-cause"),
+            ["managers", "ceo"],
+        )
+
+        # Unknown slug should return None (not raise KeyError)
+        self.assertIsNone(_required_teams_for("nonexistent-slug"))