Compare commits

...

1 Commits

Author SHA1 Message Date
infra-sre ce4b179c4b fix(queue): handle merge conflicts + pre-receive hook during branch sync
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 26s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 42s
CI / Detect changes (pull_request) Successful in 1m8s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 25s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 32s
qa-review / approved (pull_request) Failing after 42s
E2E API Smoke Test / detect-changes (pull_request) Successful in 1m39s
security-review / approved (pull_request) Failing after 50s
gate-check-v3 / gate-check (pull_request) Successful in 1m18s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m48s
sop-tier-check / tier-check (pull_request) Successful in 59s
Runtime PR-Built Compatibility / detect-changes (pull_request) Successful in 2m19s
Ops Scripts Tests / Ops scripts (unittest) (pull_request) Failing after 2m6s
CI / Python Lint & Test (pull_request) Successful in 9m6s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Failing after 12m5s
CI / Canvas (Next.js) (pull_request) Successful in 21m32s
CI / Canvas Deploy Reminder (pull_request) Successful in 7s
CI / Platform (Go) (pull_request) Successful in 23m1s
CI / all-required (pull_request) Successful in 23m4s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Failing after 14m35s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (pull_request) Failing after 14m30s
sop-checklist / all-items-acked (pull_request) [info tier:low] acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: comprehensive-testing, l
audit-force-merge / audit (pull_request) Has been skipped
Two queue stall conditions fixed:

1. HTTP 409 from /pulls/{n}/update — merge conflicts with current main.
   Queue raised generic ApiError → exit 0 → infinite retry loop.
2. HTTP 405 from /pulls/{n}/merge — pre-receive hook blocks API merges.
   Queue raised generic ApiError → exit 0 → infinite retry loop.

Key behavior change: after successfully updating a PR's base branch, the
queue now REMOVES the merge-queue label. This prevents the queue from
blocking on one PR's CI while newer PRs wait. The author re-adds the
label once CI passes, which confirms the sync is valid and triggers a
fresh CI run on the updated head. Serialization against current main is
preserved because the label can only be re-added after CI passes.

Changes:
- PreReceiveBlocked(ApiError): raised on HTTP 405 from merge.
  main() catches it, posts UI-merge comment, skips PR.
- MergeConflict(ApiError): raised on HTTP 409 from update.
  update_pull() detects 409 and raises it.
- process_once() handles MergeConflict:
  - If UPDATE_STYLE=merge: retry with rebase (one-shot fallback).
  - If rebase also conflicts: post conflict comment + remove queue label.
  - After any successful update: remove queue label + move to next PR.
- merge_pull() detects 405 and raises PreReceiveBlocked.
- add remove_label() helper.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-15 04:43:32 +00:00
+169 -15
View File
@@ -65,6 +65,35 @@ class ApiError(RuntimeError):
pass
class PreReceiveBlocked(ApiError):
"""Raised when the pre-receive hook blocks a merge (HTTP 405).
Distinguishes "retryable transient failure" (network, auth, rate-limit)
from "permanent block that requires human UI intervention".
"""
def __init__(self, path: str, status: int, body: str, pr_number: int):
self.status = status
self.body = body
self.pr_number = pr_number
super().__init__(f"{path} -> HTTP {status}: {body[:200]}")
class MergeConflict(ApiError):
"""Raised when /pulls/{n}/update returns HTTP 409 Conflict.
The branch cannot be updated with the base branch due to merge conflicts.
The queue must NOT retry indefinitely — the PR needs human intervention.
"""
def __init__(self, path: str, status: int, body: str, pr_number: int, attempted_style: str):
self.status = status
self.body = body
self.pr_number = pr_number
self.attempted_style = attempted_style
super().__init__(f"{path} -> HTTP {status}: {body[:200]}")
@dataclasses.dataclass(frozen=True)
class MergeDecision:
ready: bool
@@ -314,18 +343,38 @@ def post_comment(pr_number: int, body: str, *, dry_run: bool) -> None:
api("POST", f"/repos/{OWNER}/{NAME}/issues/{pr_number}/comments", body={"body": body})
def update_pull(pr_number: int, *, dry_run: bool) -> None:
print(f"::notice::updating PR #{pr_number} with base branch via style={UPDATE_STYLE}")
def remove_label(pr_number: int, label: str, *, dry_run: bool) -> None:
"""Remove a label from a PR."""
print(f"::notice::removing label '{label}' from PR #{pr_number}")
if dry_run:
return
api(
"POST",
f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/update",
query={"style": UPDATE_STYLE},
expect_json=False,
"DELETE",
f"/repos/{OWNER}/{NAME}/issues/{pr_number}/labels/{urllib.parse.quote(label)}",
)
def update_pull(pr_number: int, *, dry_run: bool, style: str | None = None) -> None:
"""Update PR base branch. Raises MergeConflict on HTTP 409."""
effective_style = style or UPDATE_STYLE
print(f"::notice::updating PR #{pr_number} with base branch via style={effective_style}")
if dry_run:
return
path = f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/update"
try:
api(
"POST",
path,
query={"style": effective_style},
expect_json=False,
)
except ApiError as exc:
msg: str = str(exc)
if "409" in msg or "conflict" in msg.lower():
raise MergeConflict(path, 409, msg, pr_number, effective_style) from exc
raise
def merge_pull(pr_number: int, *, dry_run: bool) -> None:
payload = {
"Do": "merge",
@@ -338,7 +387,20 @@ def merge_pull(pr_number: int, *, dry_run: bool) -> None:
print(f"::notice::merging PR #{pr_number}")
if dry_run:
return
api("POST", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/merge", body=payload, expect_json=False)
path = f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/merge"
try:
api("POST", path, body=payload, expect_json=False)
except ApiError as exc:
# Gitea pre-receive hook returns HTTP 405 with body like
# '{"message":"User not allowed to merge PR"}'. The hook blocks
# all API-originated merges regardless of token permissions.
# Detect: 405 + "not allowed" or "pre-receive" in the error body.
msg: str = str(exc)
body_snippet = msg.split("HTTP 405:")[1].strip() if "HTTP 405:" in msg else ""
if "405" in msg or "not allowed" in body_snippet.lower() or "pre-receive" in body_snippet.lower():
raise PreReceiveBlocked(path, 405, body_snippet, pr_number) from exc
# Other API errors (auth, rate-limit, server error) are retryable.
raise
def process_once(*, dry_run: bool = False) -> int:
@@ -389,7 +451,51 @@ def process_once(*, dry_run: bool = False) -> int:
print(f"::notice::PR #{pr_number} decision={decision.action}: {decision.reason}")
if decision.action == "update":
update_pull(pr_number, dry_run=dry_run)
try:
update_pull(pr_number, dry_run=dry_run)
except MergeConflict as exc:
if exc.attempted_style == "merge" and UPDATE_STYLE == "merge":
# Merge-style conflict: try rebase as a one-shot fallback.
print(
f"::notice::merge-style update for PR #{pr_number} conflicted; "
f"retrying with rebase"
)
try:
update_pull(pr_number, dry_run=dry_run, style="rebase")
post_comment(
pr_number,
(
f"merge-queue: rebase-sync succeeded — the branch has been "
f"rebased onto `{WATCH_BRANCH}` at `{main_sha[:12]}`. "
"Waiting for CI on the refreshed head."
),
dry_run=dry_run,
)
# Rebase succeeded: remove the queue label so the queue moves
# on to other PRs. The author re-adds the label once CI passes,
# which also confirms the sync is good. This prevents the queue
# from blocking on one PR's CI while newer PRs wait.
remove_label(pr_number, QUEUE_LABEL, dry_run=dry_run)
print(f"::notice::PR #{pr_number} removed from queue; re-add label after CI passes")
return 0
except MergeConflict:
pass # Fall through to conflict-handling below.
# Rebase also conflicted, or UPDATE_STYLE=rebase already.
msg = (
f"merge-queue: **merge conflict** — "
f"the branch cannot be automatically synced with `{WATCH_BRANCH}` "
f"(conflicts in both merge and rebase styles). "
"Please resolve the conflicts locally and push the fix, or rebase "
"the branch onto the latest main. Once conflicts are resolved, "
"re-add the `merge-queue` label to re-enter the queue."
)
post_comment(pr_number, msg, dry_run=dry_run)
remove_label(pr_number, QUEUE_LABEL, dry_run=dry_run)
sys.stderr.write(
f"::error::queue: PR #{pr_number} has merge conflicts with "
f"{WATCH_BRANCH}; removed queue label and posted comment.\n"
)
return 0
post_comment(
pr_number,
(
@@ -398,6 +504,11 @@ def process_once(*, dry_run: bool = False) -> int:
),
dry_run=dry_run,
)
# Remove the queue label so the queue moves on to other PRs.
# The author re-adds the label once CI passes, which confirms the
# sync is good and triggers a fresh CI run on the updated head.
remove_label(pr_number, QUEUE_LABEL, dry_run=dry_run)
print(f"::notice::PR #{pr_number} removed from queue; re-add label after CI passes")
return 0
if decision.ready:
latest_main_sha = get_branch_head(WATCH_BRANCH)
@@ -407,7 +518,20 @@ def process_once(*, dry_run: bool = False) -> int:
"deferring to next tick"
)
return 0
merge_pull(pr_number, dry_run=dry_run)
try:
merge_pull(pr_number, dry_run=dry_run)
except PreReceiveBlocked as exc:
msg = (
"merge-queue: **blocked by pre-receive hook** — "
"the Gitea server-side hook is preventing API merges for this PR. "
"Please merge via the UI at the link above, or ask a repo admin "
"to temporarily disable the hook if an emergency merge is needed."
)
post_comment(exc.pr_number, msg, dry_run=dry_run)
sys.stderr.write(
f"::error::queue: PR #{exc.pr_number} blocked by pre-receive hook "
f"(HTTP {exc.status}); posted comment and skipping.\n"
)
return 0
return 0
@@ -419,12 +543,42 @@ def main() -> int:
_require_runtime_env()
try:
return process_once(dry_run=args.dry_run)
except ApiError as exc:
# API errors (401/403/404/500) are transient for a queue tick —
# log and exit 0 so the workflow is not marked failed and the next
# tick can retry. Returning non-zero would permanently fail the
# workflow run, blocking future ticks.
sys.stderr.write(f"::error::queue API error: {exc}\n")
except PreReceiveBlocked as exc:
# Pre-receive hook is blocking API merges. Post a comment so humans
# know the PR is in the queue but blocked, then skip it.
msg = (
"merge-queue: **blocked by pre-receive hook** — "
"the Gitea server-side hook is preventing API merges for this PR. "
"Please merge via the UI at the link above, or ask a repo admin "
"to temporarily disable the hook if an emergency merge is needed."
)
try:
post_comment(exc.pr_number, msg, dry_run=args.dry_run)
except Exception:
pass # Don't fail the tick if commenting also fails.
sys.stderr.write(
f"::error::queue: PR #{exc.pr_number} blocked by pre-receive hook "
f"(HTTP {exc.status}); posted comment and skipping.\n"
)
return 0
except MergeConflict as exc:
# MergeConflict is handled inline in process_once (update step).
# This catch-all handles any edge case where it escapes.
msg = (
f"merge-queue: **merge conflict** — "
f"the branch cannot be automatically synced with `{WATCH_BRANCH}`. "
"Please resolve the conflicts locally and push, then re-add "
"`merge-queue` to re-enter the queue."
)
try:
post_comment(exc.pr_number, msg, dry_run=args.dry_run)
remove_label(exc.pr_number, QUEUE_LABEL, dry_run=args.dry_run)
except Exception:
pass
sys.stderr.write(
f"::error::queue: PR #{exc.pr_number} merge conflict "
f"(style={exc.attempted_style}); removed queue label and skipping.\n"
)
return 0
except urllib.error.URLError as exc:
sys.stderr.write(f"::error::queue network error: {exc}\n")