refactor(ops): apply simplify findings on #2027 PR

Code-quality + efficiency review of PR #2079: - Hoist all_slugs = prod_slugs | staging_slugs out of decide() into the caller (was rebuilt on every record — 1k records × ~50-slug union per call). decide() signature now (r, all_slugs, ec2_names). - Compile regexes at module scope (_WS_RE, _E2E_RE, _TENANT_RE) + hoist platform-core literal set (_PLATFORM_CORE_NAMES). Same change mirrored in the bash heredoc. - Drop decorative # Rule N: comments (numbering was out of order, 3 before 2 — actively confusing). - Move the "edits must mirror" reminder OUTSIDE the CANONICAL DECIDE block in the .sh file, eliminating the .replace() comment-skip hack in TestParityWithBashScript. - Drop per-line .strip() in _slice_canonical (would mask a real indentation bug; both blocks already at column 0). - subTest() in TestPlatformCore loops so a single failure no longer short-circuits the rest of the items. - merge_group + concurrency on test-ops-scripts.yml (parity with ci.yml gate behaviour). - Fix don't apostrophe in inline comment that closed the python heredoc's single-quote and broke bash -n. All 25 tests still pass. bash -n clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 00:28:15 -07:00 · 2026-04-26 00:28:15 -07:00 · 6494e9192b
commit 6494e9192b
parent ba78a5c00d
4 changed files with 86 additions and 116 deletions
--- a/.github/workflows/test-ops-scripts.yml
+++ b/.github/workflows/test-ops-scripts.yml
@ -15,6 +15,12 @@ on:
    paths:
      - 'scripts/ops/**'
      - '.github/workflows/test-ops-scripts.yml'
+  merge_group:
+    types: [checks_requested]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true

 jobs:
  test:
--- a/scripts/ops/sweep-cf-orphans.sh
+++ b/scripts/ops/sweep-cf-orphans.sh
@ -117,66 +117,69 @@ log "  CF records: $TOTAL_CF"
 #   5. Anything else → keep (we only sweep patterns we understand).

 export PROD_SLUGS STAGING_SLUGS EC2_NAMES TOTAL_CF
+# Edits inside the CANONICAL DECIDE block below must mirror
+# scripts/ops/sweep_cf_decide.py — the parity test in
+# test_sweep_cf_decide.py asserts they match byte-for-byte.
 DECISIONS=$(echo "$CF_JSON" | python3 -c '
 import json, os, re, sys
 d = json.load(sys.stdin)
 prod_slugs = set(os.environ["PROD_SLUGS"].split())
 staging_slugs = set(os.environ["STAGING_SLUGS"].split())
+all_slugs = prod_slugs | staging_slugs
 ec2_names = set(n for n in os.environ["EC2_NAMES"].split() if n)

+_PLATFORM_CORE_NAMES = {
+    "api.moleculesai.app", "app.moleculesai.app", "doc.moleculesai.app",
+    "send.moleculesai.app", "status.moleculesai.app", "www.moleculesai.app",
+    "staging-api.moleculesai.app",
+}
+_WS_RE = re.compile(r"^(ws-[a-f0-9]{8}-[a-f0-9]+)(?:\.staging)?\.moleculesai\.app$")
+_E2E_RE = re.compile(r"^(e2e-[^.]+)(?:\.staging)?\.moleculesai\.app$")
+_TENANT_RE = re.compile(r"^([a-z0-9][a-z0-9-]*)(?:\.staging)?\.moleculesai\.app$")
+
 # CANONICAL DECIDE BEGIN
-# Edits inside this block must mirror scripts/ops/sweep_cf_decide.py — the
-# parity test in test_sweep_cf_decide.py asserts they match byte-for-byte.
-def decide(r, prod_slugs, staging_slugs, ec2_names):
+def decide(r, all_slugs, ec2_names):
    n = r["name"]
    rid = r["id"]
    typ = r["type"]
-    all_slugs = prod_slugs | staging_slugs

-    # Rule 1: platform core — leave alone
    if n == "moleculesai.app":
        return ("keep", "apex", rid, n, typ)
    if n.startswith("_") or n.endswith("._domainkey.moleculesai.app"):
        return ("keep", "verification/key", rid, n, typ)
-    if n in {"api.moleculesai.app","app.moleculesai.app","doc.moleculesai.app",
-            "send.moleculesai.app","status.moleculesai.app","www.moleculesai.app",
-            "staging-api.moleculesai.app"}:
+    if n in _PLATFORM_CORE_NAMES:
        return ("keep", "platform-core", rid, n, typ)

-    # Rule 3: ws-<hex8>-<rest>.(staging.)moleculesai.app
-    m = re.match(r"^(ws-[a-f0-9]{8}-[a-f0-9]+)(?:\.staging)?\.moleculesai\.app$", n)
+    m = _WS_RE.match(n)
    if m:
        prefix = m.group(1)
-        # Live EC2 names are like "ws-d3605ef2-f7d" — same shape as DNS subdomain.
+        # Live EC2 names share the ws-<hex8>-<rest> shape with the DNS subdomain.
        for ename in ec2_names:
            if ename.startswith(prefix):
                return ("keep", "live-ec2", rid, n, typ)
        return ("delete", "orphan-ws", rid, n, typ)

-    # Rule 4: e2e-* tenants (includes canary, canvas variants)
-    m = re.match(r"^(e2e-[^.]+)(?:\.staging)?\.moleculesai\.app$", n)
+    m = _E2E_RE.match(n)
    if m:
        slug = m.group(1)
        if slug in all_slugs:
            return ("keep", "live-e2e-tenant", rid, n, typ)
        return ("delete", "orphan-e2e-tenant", rid, n, typ)

-    # Rule 2: any other tenant subdomain (slug.moleculesai.app or slug.staging.moleculesai.app)
-    m = re.match(r"^([a-z0-9][a-z0-9-]*)(?:\.staging)?\.moleculesai\.app$", n)
+    m = _TENANT_RE.match(n)
    if m:
        slug = m.group(1)
        if slug in all_slugs:
            return ("keep", "live-tenant", rid, n, typ)
-        # Only flag as orphan if name looks like a tenant (not a one-off like "hermes-final-*")
-        # To avoid false-positive nukes on ad-hoc records, we KEEP anything that
-        # does not match a known pattern. Orphan only for explicit tenant-shaped names.
+        # KEEP unknown tenant-shaped names — avoid false-positive nukes on
+        # ad-hoc records (e.g. hermes-final-*) that do not match a known slug.
        return ("keep", "unknown-subdomain-kept-for-safety", rid, n, typ)

    return ("keep", "not-a-pattern-we-sweep", rid, n, typ)
 # CANONICAL DECIDE END

 for r in d["result"]:
-    action, reason, rid, name, typ = decide(r, prod_slugs, staging_slugs, ec2_names)
+    action, reason, rid, name, typ = decide(r, all_slugs, ec2_names)
    print(json.dumps({"action": action, "reason": reason, "id": rid, "name": name, "type": typ}))
 ')

--- a/scripts/ops/sweep_cf_decide.py
+++ b/scripts/ops/sweep_cf_decide.py
@ -11,10 +11,11 @@ If you change the rules: edit BOTH this file AND the inline block in
 ``# CANONICAL DECIDE BEGIN`` to ``# CANONICAL DECIDE END`` markers in
 both files; the parity check compares those slices).

-Inputs to ``decide(record, prod_slugs, staging_slugs, ec2_names)``:
+Inputs to ``decide(record, all_slugs, ec2_names)``:
  record       Cloudflare DNS record dict {name, id, type}
-  prod_slugs   set of CP-prod org slugs (live tenants)
-  staging_slugs set of CP-staging org slugs
+  all_slugs    set of CP org slugs (prod ∪ staging) — caller computes the
+               union once instead of per-record (decide is hot-path: 100s
+               to 1000s of records per sweep)
  ec2_names    set of live EC2 Name tags (e.g. ``ws-d3605ef2-f7d``)

 Returns ``(action, reason, id, name, type)`` matching the bash heredoc.
@ -22,53 +23,57 @@ Returns ``(action, reason, id, name, type)`` matching the bash heredoc.
 from __future__ import annotations

 import re
-from typing import Iterable
+
+
+# Pre-compile per-record regexes once at module load — saves the per-call
+# pattern-cache lookup across 1000s of CF records per sweep. Mirrored at
+# the same scope in sweep-cf-orphans.sh's heredoc.
+_PLATFORM_CORE_NAMES = {
+    "api.moleculesai.app", "app.moleculesai.app", "doc.moleculesai.app",
+    "send.moleculesai.app", "status.moleculesai.app", "www.moleculesai.app",
+    "staging-api.moleculesai.app",
+}
+_WS_RE = re.compile(r"^(ws-[a-f0-9]{8}-[a-f0-9]+)(?:\.staging)?\.moleculesai\.app$")
+_E2E_RE = re.compile(r"^(e2e-[^.]+)(?:\.staging)?\.moleculesai\.app$")
+_TENANT_RE = re.compile(r"^([a-z0-9][a-z0-9-]*)(?:\.staging)?\.moleculesai\.app$")


 # CANONICAL DECIDE BEGIN
-def decide(r, prod_slugs, staging_slugs, ec2_names):
+def decide(r, all_slugs, ec2_names):
    n = r["name"]
    rid = r["id"]
    typ = r["type"]
-    all_slugs = prod_slugs | staging_slugs

-    # Rule 1: platform core — leave alone
    if n == "moleculesai.app":
        return ("keep", "apex", rid, n, typ)
    if n.startswith("_") or n.endswith("._domainkey.moleculesai.app"):
        return ("keep", "verification/key", rid, n, typ)
-    if n in {"api.moleculesai.app","app.moleculesai.app","doc.moleculesai.app",
-            "send.moleculesai.app","status.moleculesai.app","www.moleculesai.app",
-            "staging-api.moleculesai.app"}:
+    if n in _PLATFORM_CORE_NAMES:
        return ("keep", "platform-core", rid, n, typ)

-    # Rule 3: ws-<hex8>-<rest>.(staging.)moleculesai.app
-    m = re.match(r"^(ws-[a-f0-9]{8}-[a-f0-9]+)(?:\.staging)?\.moleculesai\.app$", n)
+    m = _WS_RE.match(n)
    if m:
        prefix = m.group(1)
-        # Live EC2 names are like "ws-d3605ef2-f7d" — same shape as DNS subdomain.
+        # Live EC2 names share the ws-<hex8>-<rest> shape with the DNS subdomain.
        for ename in ec2_names:
            if ename.startswith(prefix):
                return ("keep", "live-ec2", rid, n, typ)
        return ("delete", "orphan-ws", rid, n, typ)

-    # Rule 4: e2e-* tenants (includes canary, canvas variants)
-    m = re.match(r"^(e2e-[^.]+)(?:\.staging)?\.moleculesai\.app$", n)
+    m = _E2E_RE.match(n)
    if m:
        slug = m.group(1)
        if slug in all_slugs:
            return ("keep", "live-e2e-tenant", rid, n, typ)
        return ("delete", "orphan-e2e-tenant", rid, n, typ)

-    # Rule 2: any other tenant subdomain (slug.moleculesai.app or slug.staging.moleculesai.app)
-    m = re.match(r"^([a-z0-9][a-z0-9-]*)(?:\.staging)?\.moleculesai\.app$", n)
+    m = _TENANT_RE.match(n)
    if m:
        slug = m.group(1)
        if slug in all_slugs:
            return ("keep", "live-tenant", rid, n, typ)
-        # Only flag as orphan if name looks like a tenant (not a one-off like "hermes-final-*")
-        # To avoid false-positive nukes on ad-hoc records, we KEEP anything that
-        # does not match a known pattern. Orphan only for explicit tenant-shaped names.
+        # KEEP unknown tenant-shaped names — avoid false-positive nukes on
+        # ad-hoc records (e.g. hermes-final-*) that do not match a known slug.
        return ("keep", "unknown-subdomain-kept-for-safety", rid, n, typ)

    return ("keep", "not-a-pattern-we-sweep", rid, n, typ)
@ -78,13 +83,12 @@ def decide(r, prod_slugs, staging_slugs, ec2_names):
 def safety_gate(total: int, delete_count: int, max_delete_pct: int = 50) -> bool:
    """Return True iff the sweep is safe to execute.

-    Mirrors the shell-side gate in sweep-cf-orphans.sh: if the deletion
-    fraction exceeds ``max_delete_pct`` the sweep refuses to run. The
-    bash script computes the integer percentage as ``DELETE_COUNT*100/TOTAL``
-    — keeping the same arithmetic here so a future "raise to 75%" tweak
-    needs to be made in only one place semantically.
+    Mirrors the shell-side gate: if the deletion fraction exceeds
+    ``max_delete_pct`` the sweep refuses to run. Same integer arithmetic
+    as the bash script (``DELETE_COUNT*100/TOTAL``) so a future threshold
+    tweak only needs to land in one semantic place.
    """
    if total <= 0:
-        return True  # nothing to delete; gate is trivially satisfied
+        return True
    pct = delete_count * 100 // total
    return pct <= max_delete_pct
--- a/scripts/ops/test_sweep_cf_decide.py
+++ b/scripts/ops/test_sweep_cf_decide.py
@ -17,10 +17,8 @@ import unittest
 import sweep_cf_decide as M


-# --- Fixtures ---------------------------------------------------------------
-
-PROD = {"acme", "globex", "initech"}
-STAGING = {"e2e-test-runner", "soak", "playground"}
+# Caller responsibility (per the new decide signature): compute the union once.
+ALL_SLUGS = {"acme", "globex", "initech", "e2e-test-runner", "soak", "playground"}
 LIVE_EC2 = {"ws-d3605ef2-f7d", "ws-aaaaaaaa-bbb", "ws-cafef00d-dec"}


@ -29,10 +27,7 @@ def rec(name: str, rid: str = "rid-x", typ: str = "A") -> dict:


 def call(record: dict) -> tuple:
-    return M.decide(record, PROD, STAGING, LIVE_EC2)
-
-
-# --- Rule 1: platform core --------------------------------------------------
+    return M.decide(record, ALL_SLUGS, LIVE_EC2)


 class TestPlatformCore(unittest.TestCase):
@ -43,10 +38,10 @@ class TestPlatformCore(unittest.TestCase):
        self.assertEqual((action, reason), ("keep", "apex"))

    def test_underscore_records_kept(self):
-        # _vercel, _domainkey, _railway-verify, etc.
        for n in ("_vercel.moleculesai.app", "_railway-verify.moleculesai.app"):
-            action, reason, *_ = call(rec(n))
-            self.assertEqual((action, reason), ("keep", "verification/key"), n)
+            with self.subTest(name=n):
+                action, reason, *_ = call(rec(n))
+                self.assertEqual((action, reason), ("keep", "verification/key"))

    def test_dkim_kept(self):
        action, reason, *_ = call(rec("send._domainkey.moleculesai.app"))
@ -62,11 +57,9 @@ class TestPlatformCore(unittest.TestCase):
            "www.moleculesai.app",
            "staging-api.moleculesai.app",
        ):
-            action, reason, *_ = call(rec(n))
-            self.assertEqual((action, reason), ("keep", "platform-core"), n)
-
-
-# --- Rule 3: ws-<hex8>-<rest> -----------------------------------------------
+            with self.subTest(name=n):
+                action, reason, *_ = call(rec(n))
+                self.assertEqual((action, reason), ("keep", "platform-core"))


 class TestWsRule(unittest.TestCase):
@ -89,9 +82,6 @@ class TestWsRule(unittest.TestCase):
        self.assertEqual((action, reason), ("delete", "orphan-ws"))


-# --- Rule 4: e2e-* tenants --------------------------------------------------
-
-
 class TestE2ERule(unittest.TestCase):
    def test_live_e2e_kept(self):
        action, reason, *_ = call(rec("e2e-test-runner.staging.moleculesai.app"))
@ -102,14 +92,10 @@ class TestE2ERule(unittest.TestCase):
        self.assertEqual((action, reason), ("delete", "orphan-e2e-tenant"))

    def test_dead_e2e_on_prod_deleted(self):
-        # e2e-* on prod (no .staging) is also tenant-shaped — deletion path.
        action, reason, *_ = call(rec("e2e-ghost.moleculesai.app"))
        self.assertEqual((action, reason), ("delete", "orphan-e2e-tenant"))


-# --- Rule 2: generic tenant subdomain ---------------------------------------
-
-
 class TestTenantSubdomainRule(unittest.TestCase):
    def test_live_prod_tenant_kept(self):
        action, reason, *_ = call(rec("acme.moleculesai.app"))
@ -120,14 +106,10 @@ class TestTenantSubdomainRule(unittest.TestCase):
        self.assertEqual((action, reason), ("keep", "live-tenant"))

    def test_unknown_subdomain_kept_for_safety(self):
-        # The script intentionally KEEPS unknown patterns to avoid blast.
        action, reason, *_ = call(rec("hermes-final-2.moleculesai.app"))
        self.assertEqual((action, reason), ("keep", "unknown-subdomain-kept-for-safety"))


-# --- Rule 5 / fallthrough ---------------------------------------------------
-
-
 class TestNotASweepPattern(unittest.TestCase):
    def test_external_domain_kept(self):
        # Domain-spoofing attempt — must NOT match any of the moleculesai.app rules.
@ -139,13 +121,10 @@ class TestNotASweepPattern(unittest.TestCase):
        self.assertEqual((action, reason), ("keep", "not-a-pattern-we-sweep"))


-# --- Rule priority ----------------------------------------------------------
-
-
 class TestRulePriority(unittest.TestCase):
-    """Rule 1 (platform-core) wins over later rules even if the name shape
-    overlaps — e.g. ``api.moleculesai.app`` matches Rule 2's tenant pattern
-    but must be classified as platform-core."""
+    """Platform-core check must precede the tenant-subdomain regex —
+    e.g. ``api.moleculesai.app`` matches the tenant pattern but must
+    classify as platform-core."""

    def test_api_subdomain_classified_as_platform_not_tenant(self):
        action, reason, *_ = call(rec("api.moleculesai.app"))
@ -156,9 +135,6 @@ class TestRulePriority(unittest.TestCase):
        self.assertEqual(reason, "verification/key")


-# --- Safety gate ------------------------------------------------------------
-
-
 class TestSafetyGate(unittest.TestCase):
    """The bash gate refuses to delete >MAX_DELETE_PCT (default 50%)."""

@ -179,48 +155,42 @@ class TestSafetyGate(unittest.TestCase):
        self.assertFalse(M.safety_gate(total=100, delete_count=76, max_delete_pct=75))


-# --- Empty live-sets behavior (incident-prevention) -------------------------
-
-
 class TestEmptyLiveSets(unittest.TestCase):
    """If the CP admin API returns no orgs (auth broken, network blip),
-    every tenant-shaped record looks orphan. The decide function alone
-    has no defense — that's the safety_gate's job. This test pins the
-    expected behavior so the safety-gate contract is documented."""
+    every tenant-shaped record looks orphan. decide() alone has no
+    defense — that's safety_gate's job. This test pins the contract so
+    a future "make decide() defensive" change doesn't silently bypass
+    the gate."""

    def test_dead_e2e_orphans_when_live_set_empty(self):
-        empty = set()
        action, reason, *_ = M.decide(
            rec("e2e-test-runner.staging.moleculesai.app"),
-            empty, empty, set(),
+            set(), set(),
        )
-        # decide() classifies as orphan — gate is the line of defense.
        self.assertEqual((action, reason), ("delete", "orphan-e2e-tenant"))

    def test_live_ws_still_kept_when_ec2_set_empty(self):
-        # Symmetric: ws-* without matching EC2 = orphan.
        action, reason, *_ = M.decide(
            rec("ws-cafef00d-dec.moleculesai.app"),
-            PROD, STAGING, set(),
+            ALL_SLUGS, set(),
        )
        self.assertEqual((action, reason), ("delete", "orphan-ws"))


-# --- Parity check -----------------------------------------------------------
-
-
 class TestParityWithBashScript(unittest.TestCase):
    """The decision logic exists in two places: the canonical block in
    sweep_cf_decide.py and the inline heredoc in sweep-cf-orphans.sh.
-    This test asserts the two byte-for-byte match between the
+    This test asserts the two match between the
    ``# CANONICAL DECIDE BEGIN`` / ``# CANONICAL DECIDE END`` markers,
-    so an edit to one without the other fails CI loudly."""
+    so an edit to one without the other fails CI loudly. The mirror-
+    reminder comment lives OUTSIDE the markers in the .sh file so we
+    don't need to special-case it here."""

    @staticmethod
-    def _slice_canonical(text: str) -> str:
-        """Return the canonical block, line-anchored (mentions of the marker
-        words inside docstrings are ignored — only an exact-match line
-        ``# CANONICAL DECIDE BEGIN`` opens the slice)."""
+    def _slice_canonical(text: str) -> list[str]:
+        """Return the lines between the canonical markers, exclusive.
+        Markers are matched line-anchored (a stripped-line literal match)
+        so the docstring's prose mention is ignored."""
        lines = text.splitlines()
        begin_idx = end_idx = None
        for i, line in enumerate(lines):
@ -235,27 +205,14 @@ class TestParityWithBashScript(unittest.TestCase):
                "missing CANONICAL DECIDE BEGIN/END markers — "
                "first 30 lines were:\n" + "\n".join(lines[:30])
            )
-        block = lines[begin_idx + 1:end_idx]
-        # Strip leading whitespace per-line so the .sh heredoc (no indent)
-        # and the .py module (also no indent at function scope) compare equal
-        # even if a future move into a class adds indent on one side.
-        return "\n".join(line.strip() for line in block if line.strip())
+        return lines[begin_idx + 1:end_idx]

    def test_blocks_match(self):
        here = os.path.dirname(__file__)
        with open(os.path.join(here, "sweep_cf_decide.py"), "r", encoding="utf-8") as f:
            py_block = self._slice_canonical(f.read())
        with open(os.path.join(here, "sweep-cf-orphans.sh"), "r", encoding="utf-8") as f:
-            # Strip the bash-only marker comment line (the .py file doesn't
-            # carry the "Edits inside this block must mirror …" reminder).
-            sh_text = f.read().replace(
-                "# Edits inside this block must mirror scripts/ops/sweep_cf_decide.py — the\n",
-                "",
-            ).replace(
-                "# parity test in test_sweep_cf_decide.py asserts they match byte-for-byte.\n",
-                "",
-            )
-            sh_block = self._slice_canonical(sh_text)
+            sh_block = self._slice_canonical(f.read())
        self.assertEqual(
            py_block,
            sh_block,