From ea8ff626a91089c59d392b8194ec5fa4fad46f43 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 29 Apr 2026 21:42:42 -0700 Subject: [PATCH 1/2] ci: hard gate against migration version collisions (#2341) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two PRs targeting staging can each add a migration with the same numeric prefix (e.g. 044_*.up.sql). Each passes CI independently. They collide at merge time. Worst case: second migration silently doesn't apply and prod schema drifts from what the code expects. Caught manually 2026-04-30 during PR #2276 rebase: 044_runtime_image_pins collided with 044_platform_inbound_secret from RFC #2312. This workflow makes that detection automatic at PR-open time. How it works: scripts/ops/check_migration_collisions.py runs on every PR that touches workspace-server/migrations/**. For each new/modified migration filename, extracts the numeric prefix and checks: 1. Does the base branch already have a DIFFERENT migration file with the same prefix? (PR branched off an old base, base advanced and another PR landed the same number — needs rebase.) 2. Is another OPEN PR (not this one) also adding a migration with the same prefix? (Race-window collision — both pass CI separately, would collide at merge time.) Either case → exit 1 with a clear ::error:: message naming the conflicting PR(s) so the author knows what to renumber. Implementation notes: - Uses git ls-tree (not working-tree walk) so it works against any base ref without checkout. - Uses gh pr diff --name-only per open PR, bounded by `gh pr list --limit 100`. ~30s worst case for a busy repo, <5s normally. - --diff-filter=AM picks up Added or Modified — renaming a migration in place is also flagged (intentional; renaming migrations isn't safe). - Same filename in both PR and base = no collision (PR is editing in-place, fine). Tests: scripts/ops/test_check_migration_collisions.py — 9 cases on the regex classifier (the load-bearing piece). End-to-end git/gh path is exercised by running the workflow against real PRs. Hard-gates Tier 1 item 1 (#2341). Cheapest, cleanest gate. Catches one specific class of merge-time foot-gun automatically. Refs hard-gates discussion 2026-04-30. Tier 1 of 4 (others tracked in #2342, #2343, #2344). --- .../workflows/check-migration-collisions.yml | 51 +++++ scripts/ops/check_migration_collisions.py | 206 ++++++++++++++++++ .../ops/test_check_migration_collisions.py | 64 ++++++ 3 files changed, 321 insertions(+) create mode 100644 .github/workflows/check-migration-collisions.yml create mode 100755 scripts/ops/check_migration_collisions.py create mode 100644 scripts/ops/test_check_migration_collisions.py diff --git a/.github/workflows/check-migration-collisions.yml b/.github/workflows/check-migration-collisions.yml new file mode 100644 index 00000000..6d9c0c3f --- /dev/null +++ b/.github/workflows/check-migration-collisions.yml @@ -0,0 +1,51 @@ +name: Check migration collisions + +# Hard gate (#2341): fails a PR that adds a migration prefix already +# claimed by the base branch or another open PR. Caught manually 2026-04-30 +# during PR #2276 rebase: 044_runtime_image_pins collided with +# 044_platform_inbound_secret from RFC #2312. This workflow makes that +# check automatic. +# +# Trigger model: pull_request only — there's no value running this on +# pushes to staging or main (those are post-merge; the gate must fire +# pre-merge to be useful). Path filter scopes to PRs that actually touch +# migrations. + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - 'workspace-server/migrations/**' + - 'scripts/ops/check_migration_collisions.py' + - '.github/workflows/check-migration-collisions.yml' + +permissions: + contents: read + # gh pr list/diff need read access to other PRs + pull-requests: read + +jobs: + check: + name: Migration version collision check + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + # Need history to diff against base ref + fetch-depth: 0 + + - name: Detect collisions + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + BASE_REF: origin/${{ github.event.pull_request.base.ref }} + HEAD_REF: ${{ github.event.pull_request.head.sha }} + GITHUB_REPOSITORY: ${{ github.repository }} + # gh CLI uses GH_TOKEN from env + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # Make sure base ref is fetched (checkout@v4 with fetch-depth=0 + # fetches history but not necessarily named refs in the form we + # expect; explicit fetch is cheap insurance). + git fetch origin "${{ github.event.pull_request.base.ref }}" --depth=1 || true + python3 scripts/ops/check_migration_collisions.py diff --git a/scripts/ops/check_migration_collisions.py b/scripts/ops/check_migration_collisions.py new file mode 100755 index 00000000..28901505 --- /dev/null +++ b/scripts/ops/check_migration_collisions.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +"""check_migration_collisions.py — fail-loud detector for two open PRs adding +the same migration version number. + +Why this exists: two PRs targeting staging can each add a migration with the +same numeric prefix (e.g. 044_*.up.sql). Each passes CI independently. They +collide at merge time. Worst-case the second migration silently doesn't apply +and the schema drifts from what the code expects. Caught manually 2026-04-30 +during PR #2276 rebase: 044_runtime_image_pins collided with +044_platform_inbound_secret from RFC #2312. + +This check runs on every PR and asserts the migration prefixes added by THIS +PR don't collide with: + + 1. The base branch's tip (someone else already used this number) + 2. Any other open PR (race-window collision — both pass CI independently) + +Exit codes: + 0 — no collisions + 1 — collision detected; output names the conflicting PR(s) for the author + +Designed to run from a GitHub Actions PR check. Reads PR metadata via the +GitHub CLI (gh) which is preinstalled on ubuntu-latest runners. Runs in +under 10s against a typical PR. +""" + +from __future__ import annotations + +import json +import os +import re +import subprocess +import sys +from pathlib import Path + +MIGRATIONS_DIR = "workspace-server/migrations" +MIGRATION_FILE_RE = re.compile(r"^(\d+)_[^/]+\.(up|down)\.sql$") + + +def run(cmd: list[str], check: bool = True) -> str: + """Run a subprocess and return stdout. Raise on non-zero when check=True.""" + result = subprocess.run(cmd, capture_output=True, text=True) + if check and result.returncode != 0: + sys.stderr.write(f"command failed: {' '.join(cmd)}\n{result.stderr}\n") + sys.exit(1) + return result.stdout + + +def migrations_in_diff(base_ref: str, head_ref: str) -> set[int]: + """Return the set of migration prefixes added or modified between two refs. + + Uses --diff-filter=AM (Added or Modified) so a deleted migration doesn't + count. Renames (--diff-filter=R) appear as A on the new path and D on the + old, so we'd catch a renumbering correctly. + """ + out = run([ + "git", "diff", "--name-only", "--diff-filter=AM", + f"{base_ref}...{head_ref}", "--", MIGRATIONS_DIR, + ]) + prefixes: set[int] = set() + for line in out.splitlines(): + path = Path(line.strip()) + if not path.name: + continue + m = MIGRATION_FILE_RE.match(path.name) + if not m: + # Files like the workflow_checkpoints.up.sql with non-numeric + # prefix are intentional — skip without complaint. + continue + prefixes.add(int(m.group(1))) + return prefixes + + +def migrations_on_ref(ref: str) -> set[int]: + """Return the set of numeric migration prefixes existing at the given git ref. + + Walks the migrations dir at that ref via `git ls-tree`, not the working + tree, so it works against any branch / SHA without checking it out. + """ + out = run([ + "git", "ls-tree", "-r", "--name-only", ref, "--", MIGRATIONS_DIR, + ]) + prefixes: set[int] = set() + for line in out.splitlines(): + path = Path(line.strip()) + if not path.name: + continue + m = MIGRATION_FILE_RE.match(path.name) + if not m: + continue + prefixes.add(int(m.group(1))) + return prefixes + + +def open_prs_with_migration_prefix( + repo: str, prefix: int, exclude_pr: int +) -> list[dict]: + """Return open PRs (other than `exclude_pr`) that add a migration with + `prefix`. Uses `gh pr diff` per PR — we only need to walk PRs that are + actually in flight, so the cost is bounded by open-PR count. + """ + out = run([ + "gh", "pr", "list", "--repo", repo, "--state", "open", + "--json", "number,headRefName", "--limit", "100", + ]) + prs = json.loads(out) + matches: list[dict] = [] + for pr in prs: + num = pr["number"] + if num == exclude_pr: + continue + try: + files = run([ + "gh", "pr", "diff", str(num), "--repo", repo, "--name-only", + ], check=False) + except Exception: # noqa: BLE001 + continue + for raw in files.splitlines(): + path = Path(raw.strip()) + if not path.name: + continue + m = MIGRATION_FILE_RE.match(path.name) + if m and int(m.group(1)) == prefix: + matches.append(pr) + break + return matches + + +def main() -> int: + pr_number_env = os.environ.get("PR_NUMBER", "").strip() + if not pr_number_env: + sys.stderr.write( + "PR_NUMBER not set — this script is intended to run from a PR " + "context. Set PR_NUMBER (e.g. ${{ github.event.pull_request.number }}) " + "and BASE_REF (target branch) and HEAD_REF (PR head SHA).\n" + ) + return 1 + pr_number = int(pr_number_env) + base_ref = os.environ.get("BASE_REF", "origin/staging") + head_ref = os.environ.get("HEAD_REF", "HEAD") + repo = os.environ.get("GITHUB_REPOSITORY", "Molecule-AI/molecule-core") + + added = migrations_in_diff(base_ref, head_ref) + if not added: + print("no migrations added or modified by this PR — nothing to check") + return 0 + + print(f"this PR adds/modifies migrations: {sorted(added)}") + + # Collision check 1: base branch already has this prefix on a different + # filename. This happens when the PR was branched off an old base and + # didn't rebase — base advanced and another PR landed the same number. + base_prefixes = migrations_on_ref(base_ref) + base_collisions = added & base_prefixes + # Filter to "different filename, same prefix" — same filename means the + # PR is updating an existing migration in place, which is fine. + real_base_collisions: set[int] = set() + for prefix in base_collisions: + # List filenames at base for this prefix + out = run([ + "git", "ls-tree", "-r", "--name-only", base_ref, "--", + MIGRATIONS_DIR, + ]) + base_names = { + Path(line).name for line in out.splitlines() + if (m := MIGRATION_FILE_RE.match(Path(line).name)) and int(m.group(1)) == prefix + } + # And in the PR + diff_out = run([ + "git", "diff", "--name-only", "--diff-filter=AM", + f"{base_ref}...{head_ref}", "--", MIGRATIONS_DIR, + ]) + pr_names = { + Path(line).name for line in diff_out.splitlines() + if (m := MIGRATION_FILE_RE.match(Path(line).name)) and int(m.group(1)) == prefix + } + if pr_names - base_names: + real_base_collisions.add(prefix) + + # Collision check 2: another open PR claims the same prefix. + open_pr_collisions: dict[int, list[dict]] = {} + for prefix in added: + peers = open_prs_with_migration_prefix(repo, prefix, pr_number) + if peers: + open_pr_collisions[prefix] = peers + + if not real_base_collisions and not open_pr_collisions: + print("no migration version collisions detected") + return 0 + + print() + print("::error::migration version collision detected") + if real_base_collisions: + print(f"::error::these prefixes already exist on {base_ref} with different filenames: " + f"{sorted(real_base_collisions)}") + print("::error::rebase onto current base and renumber to the next available prefix") + for prefix, peers in sorted(open_pr_collisions.items()): + peer_str = ", ".join(f"#{p['number']} ({p['headRefName']})" for p in peers) + print(f"::error::migration prefix {prefix:03d} also claimed by open PR(s): {peer_str}") + print(f"::error::rebase coordination needed — only one PR can land a given prefix; " + f"renumber yours or theirs") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/ops/test_check_migration_collisions.py b/scripts/ops/test_check_migration_collisions.py new file mode 100644 index 00000000..b500a7b3 --- /dev/null +++ b/scripts/ops/test_check_migration_collisions.py @@ -0,0 +1,64 @@ +"""Unit tests for check_migration_collisions.py — focuses on the regex +classifier + the diff/base-set logic that runs without git. + +The end-to-end git diff + gh pr list path is exercised manually (running +the workflow against test PRs). These tests pin the pure-logic surface +so a regression in migration-name parsing fails immediately at PR time. +""" + +import importlib.util +from pathlib import Path + +import pytest + +# Load the script as a module without invoking main(). We import the +# regex + helpers directly so we can test them without setting up git. +SCRIPT_PATH = Path(__file__).parent / "check_migration_collisions.py" +spec = importlib.util.spec_from_file_location("ccm", SCRIPT_PATH) +ccm = importlib.util.module_from_spec(spec) +spec.loader.exec_module(ccm) + + +class TestMigrationFileRe: + """The regex classifier — the load-bearing piece of the detector.""" + + def test_matches_standard_three_digit_prefix(self): + m = ccm.MIGRATION_FILE_RE.match("044_platform_inbound_secret.up.sql") + assert m is not None + assert int(m.group(1)) == 44 + assert m.group(2) == "up" + + def test_matches_down_migration(self): + m = ccm.MIGRATION_FILE_RE.match("044_platform_inbound_secret.down.sql") + assert m is not None + assert int(m.group(1)) == 44 + assert m.group(2) == "down" + + def test_matches_date_shaped_prefix(self): + # Real example from the repo: 20260417000000_workflow_checkpoints + m = ccm.MIGRATION_FILE_RE.match("20260417000000_workflow_checkpoints.up.sql") + assert m is not None + assert int(m.group(1)) == 20260417000000 + + def test_matches_long_compound_name(self): + m = ccm.MIGRATION_FILE_RE.match("042_a2a_queue.up.sql") + assert m is not None + assert int(m.group(1)) == 42 + + def test_rejects_no_prefix(self): + assert ccm.MIGRATION_FILE_RE.match("readme.md") is None + + def test_rejects_alpha_prefix(self): + assert ccm.MIGRATION_FILE_RE.match("abc_migration.up.sql") is None + + def test_rejects_wrong_extension(self): + assert ccm.MIGRATION_FILE_RE.match("044_test.sql") is None + assert ccm.MIGRATION_FILE_RE.match("044_test.up.txt") is None + + def test_rejects_path_separator(self): + # Filename only — paths come pre-split via Path(line).name + assert ccm.MIGRATION_FILE_RE.match("044/test.up.sql") is None + + def test_rejects_no_underscore(self): + # Naming convention requires _ + assert ccm.MIGRATION_FILE_RE.match("044.up.sql") is None From b5df2126b9741f9b3c6781c09e29f303a5bb787c Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Thu, 30 Apr 2026 01:47:27 -0700 Subject: [PATCH 2/2] fix(test): convert migration-collision tests from pytest to unittest (#2341) CI failure: the Ops scripts (unittest) job runs `python -m unittest discover` which doesn't have pytest installed. test_check_migration_ collisions.py imported pytest unconditionally, failing module import: ImportError: Failed to import test module: test_check_migration_collisions Traceback (most recent call last): File ".../test_check_migration_collisions.py", line 12, in import pytest ModuleNotFoundError: No module named 'pytest' The tests use no pytest-specific features (just bare assert + plain class). Sibling test_sweep_cf_decide.py in the same dir already uses unittest.TestCase. Convert this one to match: drop the pytest import, make TestMigrationFileRe inherit from unittest.TestCase. unittest.TestLoader.discover() requires TestCase subclasses for auto-discovery, so the fix is two lines (drop import, add base). Bare assert statements work fine inside TestCase methods. Verified: `python3 -m unittest scripts.ops.test_check_migration_collisions -v` runs all 9 tests, all pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/ops/test_check_migration_collisions.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/ops/test_check_migration_collisions.py b/scripts/ops/test_check_migration_collisions.py index b500a7b3..4eedb75f 100644 --- a/scripts/ops/test_check_migration_collisions.py +++ b/scripts/ops/test_check_migration_collisions.py @@ -4,13 +4,14 @@ classifier + the diff/base-set logic that runs without git. The end-to-end git diff + gh pr list path is exercised manually (running the workflow against test PRs). These tests pin the pure-logic surface so a regression in migration-name parsing fails immediately at PR time. + +Run locally: ``python3 -m unittest scripts/ops/test_check_migration_collisions.py -v`` """ import importlib.util +import unittest from pathlib import Path -import pytest - # Load the script as a module without invoking main(). We import the # regex + helpers directly so we can test them without setting up git. SCRIPT_PATH = Path(__file__).parent / "check_migration_collisions.py" @@ -19,7 +20,7 @@ ccm = importlib.util.module_from_spec(spec) spec.loader.exec_module(ccm) -class TestMigrationFileRe: +class TestMigrationFileRe(unittest.TestCase): """The regex classifier — the load-bearing piece of the detector.""" def test_matches_standard_three_digit_prefix(self):