forked from molecule-ai/molecule-core
Part of the post-#66 sweep to remove `gh` CLI dependencies that fail
silently against Gitea (which exposes /api/v1 only — no GraphQL → 405,
no /api/v3 → 404). Class A covers `gh pr list / view / diff / comment`
shapes.
Affected:
- `.github/workflows/auto-tag-runtime.yml`
Replaced `gh pr list --search SHA --json number,labels` with a curl
to `/api/v1/repos/.../pulls?state=closed&sort=newest&limit=50` +
jq filter on `merge_commit_sha == github.sha`. Same end-to-end
behaviour: locate the merged PR for this push, read its labels,
pick the bump kind. Defensive `?.name // empty` jq guard handles
unlabelled PRs without erroring. The 50-PR window is comfortably
larger than the volume of staging→main promotes that close in any
reasonable detection window.
- `scripts/check-stale-promote-pr.sh`
Rewrote `fetch_prs` and `post_comment` to call Gitea's REST API
directly. Gitea doesn't expose GitHub's compound `mergeStateStatus`
/ `reviewDecision` fields, so the new fetcher pulls
`/pulls?state=open&base=main` then for each PR pulls
`/pulls/{n}/reviews` and synthesizes the GitHub-shape JSON the rest
of the script (and the existing fixture-based unit tests) consume:
BLOCKED + REVIEW_REQUIRED ↔ mergeable=true AND 0 APPROVED reviews
DIRTY ↔ mergeable=false (alarm doesn't fire)
CLEAN + APPROVED ↔ mergeable=true AND ≥1 APPROVED review
Comment-posting moves to `POST /repos/.../issues/{n}/comments`
(Gitea treats PRs as issues for the comment surface, same as
GitHub's REST). All 23 fixture-driven unit tests still pass —
fixtures pass GitHub-shape JSON via PR_FIXTURE which short-circuits
the live fetch path.
- `scripts/ops/check_migration_collisions.py`
Replaced `gh pr list` + `gh pr diff` calls with stdlib `urllib`
against /api/v1. Helper `_gitea_get` centralizes auth + error
handling; uses GITEA_TOKEN env, falling back to GITHUB_TOKEN
(act_runner) and GH_TOKEN. Return shape from
`open_prs_with_migration_prefix` mimics the historical
`--json number,headRefName` so the call sites are unchanged. All 9
regex-classifier unit tests still pass; live integration test
against the production Gitea API returns 0 collisions for prefix=999
as expected.
curl invocation pattern is `curl --fail-with-body -sS` (NOT `-fsS` —
the two short-fail flags are mutually exclusive in modern curl;
caught by `curl: You must select either --fail or --fail-with-body,
not both` during local verification).
Token model: workflows pass act_runner's GITHUB_TOKEN (per-run, repo
read scope) — same surface used by the auto-sync fix in PR #66 plus
the surrounding workflows. No new repo secrets required.
Verification: bash unit tests (23/23 pass), python unittest (9/9 pass),
live curl call against production Gitea returns 200 with the expected
shape, YAML / shell / Python syntax all validate.
Closes part of #75. Other classes (D — `gh api`; F — `gh run list`)
land in follow-up PRs.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
291 lines
11 KiB
Python
Executable File
291 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""check_migration_collisions.py — fail-loud detector for two open PRs adding
|
|
the same migration version number.
|
|
|
|
Why this exists: two PRs targeting staging can each add a migration with the
|
|
same numeric prefix (e.g. 044_*.up.sql). Each passes CI independently. They
|
|
collide at merge time. Worst-case the second migration silently doesn't apply
|
|
and the schema drifts from what the code expects. Caught manually 2026-04-30
|
|
during PR #2276 rebase: 044_runtime_image_pins collided with
|
|
044_platform_inbound_secret from RFC #2312.
|
|
|
|
This check runs on every PR and asserts the migration prefixes added by THIS
|
|
PR don't collide with:
|
|
|
|
1. The base branch's tip (someone else already used this number)
|
|
2. Any other open PR (race-window collision — both pass CI independently)
|
|
|
|
Exit codes:
|
|
0 — no collisions
|
|
1 — collision detected; output names the conflicting PR(s) for the author
|
|
|
|
Designed to run from a Gitea Actions PR check. Reads PR metadata via direct
|
|
HTTP calls to Gitea's REST API (`/api/v1/`), which on the molecule-ai fleet
|
|
lives at https://git.moleculesai.app. Runs in under 10s against a typical PR.
|
|
|
|
Post-2026-05-06 (Gitea migration, issue #75): the previous version called
|
|
the GitHub CLI (``gh pr list``, ``gh pr diff``). On Gitea those calls hit
|
|
either the GraphQL endpoint (HTTP 405) or /api/v3 (HTTP 404). This module
|
|
now talks to /api/v1 directly via urllib so it works against any Gitea
|
|
host without a `gh` install or extra dependencies.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import urllib.error
|
|
import urllib.parse
|
|
import urllib.request
|
|
from pathlib import Path
|
|
|
|
MIGRATIONS_DIR = "workspace-server/migrations"
|
|
MIGRATION_FILE_RE = re.compile(r"^(\d+)_[^/]+\.(up|down)\.sql$")
|
|
|
|
|
|
def _gitea_api_url() -> str:
|
|
"""Resolve the Gitea API base URL.
|
|
|
|
act_runner forwards github.server_url as GITHUB_SERVER_URL; for the
|
|
molecule-ai fleet that's https://git.moleculesai.app. Append /api/v1
|
|
to get the REST root. Override directly via GITEA_API_URL for tests
|
|
or non-default hosts.
|
|
"""
|
|
env_override = os.environ.get("GITEA_API_URL", "").rstrip("/")
|
|
if env_override:
|
|
return env_override
|
|
server = os.environ.get("GITHUB_SERVER_URL", "https://git.moleculesai.app").rstrip("/")
|
|
return f"{server}/api/v1"
|
|
|
|
|
|
def _gitea_token() -> str:
|
|
"""Resolve the Gitea token from env. GITEA_TOKEN wins; falls back
|
|
to GITHUB_TOKEN (set by act_runner) and GH_TOKEN (operator habit
|
|
from the GitHub era)."""
|
|
return (
|
|
os.environ.get("GITEA_TOKEN")
|
|
or os.environ.get("GITHUB_TOKEN")
|
|
or os.environ.get("GH_TOKEN")
|
|
or ""
|
|
)
|
|
|
|
|
|
def _gitea_get(path: str, params: dict[str, str] | None = None) -> bytes | None:
|
|
"""GET against /api/v1; returns response body or None on HTTP error.
|
|
|
|
Errors return None (not raise) because callers handle missing data
|
|
by emitting an actionable workflow message rather than crashing the
|
|
PR check on a transient API blip.
|
|
"""
|
|
base = _gitea_api_url()
|
|
qs = ""
|
|
if params:
|
|
qs = "?" + urllib.parse.urlencode(params)
|
|
url = f"{base}/{path.lstrip('/')}{qs}"
|
|
req = urllib.request.Request(url)
|
|
token = _gitea_token()
|
|
if token:
|
|
req.add_header("Authorization", f"token {token}")
|
|
req.add_header("Accept", "application/json")
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=20) as resp: # noqa: S310
|
|
return resp.read()
|
|
except urllib.error.HTTPError as e:
|
|
sys.stderr.write(f"Gitea API HTTP {e.code} on {path}: {e.reason}\n")
|
|
return None
|
|
except (urllib.error.URLError, TimeoutError) as e:
|
|
sys.stderr.write(f"Gitea API network error on {path}: {e}\n")
|
|
return None
|
|
|
|
|
|
def run(cmd: list[str], check: bool = True) -> str:
|
|
"""Run a subprocess and return stdout. Raise on non-zero when check=True."""
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
if check and result.returncode != 0:
|
|
sys.stderr.write(f"command failed: {' '.join(cmd)}\n{result.stderr}\n")
|
|
sys.exit(1)
|
|
return result.stdout
|
|
|
|
|
|
def migrations_in_diff(base_ref: str, head_ref: str) -> set[int]:
|
|
"""Return the set of migration prefixes added or modified between two refs.
|
|
|
|
Uses --diff-filter=AM (Added or Modified) so a deleted migration doesn't
|
|
count. Renames (--diff-filter=R) appear as A on the new path and D on the
|
|
old, so we'd catch a renumbering correctly.
|
|
"""
|
|
out = run([
|
|
"git", "diff", "--name-only", "--diff-filter=AM",
|
|
f"{base_ref}...{head_ref}", "--", MIGRATIONS_DIR,
|
|
])
|
|
prefixes: set[int] = set()
|
|
for line in out.splitlines():
|
|
path = Path(line.strip())
|
|
if not path.name:
|
|
continue
|
|
m = MIGRATION_FILE_RE.match(path.name)
|
|
if not m:
|
|
# Files like the workflow_checkpoints.up.sql with non-numeric
|
|
# prefix are intentional — skip without complaint.
|
|
continue
|
|
prefixes.add(int(m.group(1)))
|
|
return prefixes
|
|
|
|
|
|
def migrations_on_ref(ref: str) -> set[int]:
|
|
"""Return the set of numeric migration prefixes existing at the given git ref.
|
|
|
|
Walks the migrations dir at that ref via `git ls-tree`, not the working
|
|
tree, so it works against any branch / SHA without checking it out.
|
|
"""
|
|
out = run([
|
|
"git", "ls-tree", "-r", "--name-only", ref, "--", MIGRATIONS_DIR,
|
|
])
|
|
prefixes: set[int] = set()
|
|
for line in out.splitlines():
|
|
path = Path(line.strip())
|
|
if not path.name:
|
|
continue
|
|
m = MIGRATION_FILE_RE.match(path.name)
|
|
if not m:
|
|
continue
|
|
prefixes.add(int(m.group(1)))
|
|
return prefixes
|
|
|
|
|
|
def open_prs_with_migration_prefix(
|
|
repo: str, prefix: int, exclude_pr: int
|
|
) -> list[dict]:
|
|
"""Return open PRs (other than `exclude_pr`) that add a migration with
|
|
`prefix`. Walks open PRs via Gitea's `/repos/{owner}/{repo}/pulls` and
|
|
pulls each one's changed-file list via `/pulls/{n}/files`. The cost is
|
|
bounded by open-PR count, which is small (<100) on this repo. The
|
|
return shape mimics the GitHub CLI's `--json number,headRefName`:
|
|
``[{"number": int, "headRefName": str}, ...]``.
|
|
"""
|
|
body = _gitea_get(
|
|
f"repos/{repo}/pulls",
|
|
{"state": "open", "limit": "50"},
|
|
)
|
|
if body is None:
|
|
# Best-effort: a transient Gitea blip shouldn't fail the PR
|
|
# check (the base-branch collision check runs locally and is
|
|
# the more common failure mode).
|
|
return []
|
|
prs = json.loads(body)
|
|
matches: list[dict] = []
|
|
for pr in prs:
|
|
num = pr["number"]
|
|
if num == exclude_pr:
|
|
continue
|
|
# Gitea returns the head ref under .head.ref (REST shape);
|
|
# GitHub CLI's --json headRefName flattens it. Normalize on
|
|
# the way out so callers see the historical shape.
|
|
head_ref_name = (pr.get("head") or {}).get("ref", "")
|
|
files_body = _gitea_get(f"repos/{repo}/pulls/{num}/files", {"limit": "100"})
|
|
if files_body is None:
|
|
continue
|
|
try:
|
|
files = json.loads(files_body)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
for f in files:
|
|
# Gitea's /pulls/{n}/files returns objects with `.filename`
|
|
# (same as GitHub's REST). Older Gitea versions emit
|
|
# `.name` instead — handle both.
|
|
raw = f.get("filename") or f.get("name") or ""
|
|
path = Path(raw.strip())
|
|
if not path.name:
|
|
continue
|
|
m = MIGRATION_FILE_RE.match(path.name)
|
|
if m and int(m.group(1)) == prefix:
|
|
matches.append({"number": num, "headRefName": head_ref_name})
|
|
break
|
|
return matches
|
|
|
|
|
|
def main() -> int:
|
|
pr_number_env = os.environ.get("PR_NUMBER", "").strip()
|
|
if not pr_number_env:
|
|
sys.stderr.write(
|
|
"PR_NUMBER not set — this script is intended to run from a PR "
|
|
"context. Set PR_NUMBER (e.g. ${{ github.event.pull_request.number }}) "
|
|
"and BASE_REF (target branch) and HEAD_REF (PR head SHA).\n"
|
|
)
|
|
return 1
|
|
pr_number = int(pr_number_env)
|
|
base_ref = os.environ.get("BASE_REF", "origin/staging")
|
|
head_ref = os.environ.get("HEAD_REF", "HEAD")
|
|
# Default kept lowercase to match the Gitea-canonical org name
|
|
# (post-2026-05-06 migration). Tests + workflow context override
|
|
# via GITHUB_REPOSITORY which act_runner sets per-run.
|
|
repo = os.environ.get("GITHUB_REPOSITORY", "molecule-ai/molecule-core")
|
|
|
|
added = migrations_in_diff(base_ref, head_ref)
|
|
if not added:
|
|
print("no migrations added or modified by this PR — nothing to check")
|
|
return 0
|
|
|
|
print(f"this PR adds/modifies migrations: {sorted(added)}")
|
|
|
|
# Collision check 1: base branch already has this prefix on a different
|
|
# filename. This happens when the PR was branched off an old base and
|
|
# didn't rebase — base advanced and another PR landed the same number.
|
|
base_prefixes = migrations_on_ref(base_ref)
|
|
base_collisions = added & base_prefixes
|
|
# Filter to "different filename, same prefix" — same filename means the
|
|
# PR is updating an existing migration in place, which is fine.
|
|
real_base_collisions: set[int] = set()
|
|
for prefix in base_collisions:
|
|
# List filenames at base for this prefix
|
|
out = run([
|
|
"git", "ls-tree", "-r", "--name-only", base_ref, "--",
|
|
MIGRATIONS_DIR,
|
|
])
|
|
base_names = {
|
|
Path(line).name for line in out.splitlines()
|
|
if (m := MIGRATION_FILE_RE.match(Path(line).name)) and int(m.group(1)) == prefix
|
|
}
|
|
# And in the PR
|
|
diff_out = run([
|
|
"git", "diff", "--name-only", "--diff-filter=AM",
|
|
f"{base_ref}...{head_ref}", "--", MIGRATIONS_DIR,
|
|
])
|
|
pr_names = {
|
|
Path(line).name for line in diff_out.splitlines()
|
|
if (m := MIGRATION_FILE_RE.match(Path(line).name)) and int(m.group(1)) == prefix
|
|
}
|
|
if pr_names - base_names:
|
|
real_base_collisions.add(prefix)
|
|
|
|
# Collision check 2: another open PR claims the same prefix.
|
|
open_pr_collisions: dict[int, list[dict]] = {}
|
|
for prefix in added:
|
|
peers = open_prs_with_migration_prefix(repo, prefix, pr_number)
|
|
if peers:
|
|
open_pr_collisions[prefix] = peers
|
|
|
|
if not real_base_collisions and not open_pr_collisions:
|
|
print("no migration version collisions detected")
|
|
return 0
|
|
|
|
print()
|
|
print("::error::migration version collision detected")
|
|
if real_base_collisions:
|
|
print(f"::error::these prefixes already exist on {base_ref} with different filenames: "
|
|
f"{sorted(real_base_collisions)}")
|
|
print("::error::rebase onto current base and renumber to the next available prefix")
|
|
for prefix, peers in sorted(open_pr_collisions.items()):
|
|
peer_str = ", ".join(f"#{p['number']} ({p['headRefName']})" for p in peers)
|
|
print(f"::error::migration prefix {prefix:03d} also claimed by open PR(s): {peer_str}")
|
|
print(f"::error::rebase coordination needed — only one PR can land a given prefix; "
|
|
f"renumber yours or theirs")
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|