fix(sop): add bp-required directive + fix parse_directives return type

Two issues blocking PR #1101 from merging: 1. lint-required-context-exists-in-bp failure: the na-declarations job emits a new context ("sop-checklist / na-declarations (pull_request)") that was missing the required # bp-required: yes directive. Added the directive per Tier 2g contract. 2. Ops Scripts Tests failure: parse_directives() was refactored to return a 2-tuple (ack_directives, na_directives) but the return-at-empty-body path still returned a bare list. Fixed to return ([], []). Additional: replaced remaining Unicode chars (em-dash, arrow, ellipsis, section sign) with ASCII equivalents to satisfy Python 3.11's stricter source tokenizer. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-15 01:25:50 +00:00 · 2026-05-15 01:25:50 +00:00 · 547cfaef90
commit 547cfaef90
parent f6d8adc564
2 changed files with 67 additions and 64 deletions
--- a/.gitea/scripts/sop-checklist.py
+++ b/.gitea/scripts/sop-checklist.py
@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# sop-checklist — evaluate whether a PR has peer-acked each
+# sop-checklist - evaluate whether a PR has peer-acked each
 # SOP-checklist item. Posts a commit-status that branch protection
 # can require.
 #
@ -10,18 +10,18 @@
 #   - issue_comment:       [created, edited, deleted]
 #
 # Flow:
-#   1. Load .gitea/sop-checklist-config.yaml (from BASE ref — trusted).
-#   2. GET /repos/{R}/pulls/{N}          — author, head.sha, tier label
-#   3. GET /repos/{R}/issues/{N}/comments — extract /sop-ack and /sop-revoke
+#   1. Load .gitea/sop-checklist-config.yaml (from BASE ref - trusted).
+#   2. GET /repos/{R}/pulls/{N}          - author, head.sha, tier label
+#   3. GET /repos/{R}/issues/{N}/comments - extract /sop-ack and /sop-revoke
 #   4. For each checklist item:
 #        a. Is the section marker present in PR body? (author answered)
-#        b. Is there ≥1 unrevoked /sop-ack from a non-author whose
+#        b. Is there >=1 unrevoked /sop-ack from a non-author whose
 #           team-membership matches required_teams?
-#   5. POST /repos/{R}/statuses/{sha}    — context
+#   5. POST /repos/{R}/statuses/{sha}    - context
 #      `sop-checklist / all-items-acked (pull_request)`,
-#      state=success | failure | pending, description=`acked: N/M …`.
+#      state=success | failure | pending, description=`acked: N/M ...`.
 #
-# Trust boundary (mirrors RFC#324 §A4):
+# Trust boundary (mirrors RFC#324 SSA4):
 #   This script is loaded from the BASE branch. The workflow's
 #   actions/checkout step pins ref=base.sha. PR-HEAD code is never
 #   executed. We only HTTP-call the Gitea API.
@ -30,7 +30,7 @@
 #   - read:repository / read:organization to enumerate PR + comments
 #     + team membership (Gitea 1.22.6 quirk: team-membership endpoint
 #     returns 403 if token owner is not in the team; see review-check.sh
-#     for the same gotcha — we surface the same fail-closed message).
+#     for the same gotcha - we surface the same fail-closed message).
 #   - write:repository for `POST /repos/{R}/statuses/{sha}`. Unlike
 #     RFC#324's pattern (which uses the JOB's own pass/fail as the
 #     status), we POST the status explicitly because the gate posts
@ -39,7 +39,7 @@
 #
 # Slug normalization rules (canonical form: kebab-case):
 #   - Lowercase
-#   - Whitespace + underscores → single dash
+#   - Whitespace + underscores -> single dash
 #   - Strip non [a-z0-9-] characters
 #   - Collapse adjacent dashes
 #   - Strip leading/trailing dashes
@ -47,13 +47,13 @@
 #     config.items[*].numeric_alias to get the kebab-case slug.
 #
 #   Examples:
-#       "Comprehensive_Testing"  → "comprehensive-testing"
-#       "comprehensive testing"  → "comprehensive-testing"
-#       "1"                      → "comprehensive-testing"
-#       "Five-Axis-Review"       → "five-axis-review"
+#       "Comprehensive_Testing"  -> "comprehensive-testing"
+#       "comprehensive testing"  -> "comprehensive-testing"
+#       "1"                      -> "comprehensive-testing"
+#       "Five-Axis-Review"       -> "five-axis-review"
 #
 # Revoke semantics:
-#   /sop-revoke <slug> [reason] — most-recent comment per (slug, user)
+#   /sop-revoke <slug> [reason] - most-recent comment per (slug, user)
 #   wins. So if Alice posts /sop-ack X then later /sop-revoke X, her ack
 #   for X is invalidated. Bob's prior /sop-ack X is unaffected. If Alice
 #   posts /sop-revoke X then later /sop-ack X again, the ack is restored.
@ -113,12 +113,12 @@ def normalize_slug(raw: str, numeric_aliases: dict[int, str] | None = None) -> s


 # ---------------------------------------------------------------------------
-# Comment parsing — /sop-ack and /sop-revoke
+# Comment parsing - /sop-ack and /sop-revoke
 # ---------------------------------------------------------------------------

 # A directive must be on its own line. Permits leading whitespace.
 # Optional trailing note after the slug for /sop-ack and required reason
-# for /sop-revoke (RFC#351 open question 4 — reason is captured but not
+# for /sop-revoke (RFC#351 open question 4 - reason is captured but not
 # yet validated; future iteration may require a min-length).
 _DIRECTIVE_RE = re.compile(
    r"^[ \t]*/(sop-ack|sop-revoke)[ \t]+([A-Za-z0-9_\- ]+?)(?:[ \t]+(.*))?[ \t]*$",
@ -129,17 +129,19 @@ _DIRECTIVE_RE = re.compile(
 def parse_directives(
    comment_body: str,
    numeric_aliases: dict[int, str],
-) -> list[tuple[str, str, str]]:
-    """Extract /sop-ack and /sop-revoke directives from a comment body.
+) -> tuple[list[tuple[str, str, str]], list[tuple[str, str]]]:
+    """Extract /sop-ack, /sop-revoke, and /sop-n/a directives from a comment body.

-    Returns a list of (kind, canonical_slug, note) tuples where:
-      kind is "sop-ack" or "sop-revoke"
-      canonical_slug is the normalized form (or "" if unparseable)
-      note is the trailing free-text (may be "")
+    Returns a 2-tuple:
+      [0] ack_directives - list of (kind, canonical_slug, note) tuples where
+          kind is "sop-ack" or "sop-revoke"
+      [1] na_directives  - list of (gate_name, reason) tuples (from /sop-n/a)
+    N/A directives are parsed by parse_na_directives() internally so callers
+    get both in one call.
    """
    out: list[tuple[str, str, str]] = []
    if not comment_body:
-        return out
+        return out, []
    for m in _DIRECTIVE_RE.finditer(comment_body):
        kind = m.group(1)
        raw_slug = (m.group(2) or "").strip()
@ -155,10 +157,10 @@ def parse_directives(
        # "comprehensive testing"), preserve normalize behavior: join
        # the WHOLE first-word-token only; trailing words get appended to
        # the note. The regex limits group(2) to [A-Za-z0-9_\- ] so we
-        # may have multi-word forms here — normalize handles them.
+        # may have multi-word forms here - normalize handles them.
        if len(parts) > 1:
            # User wrote "/sop-ack comprehensive testing extra-note"
-            # → treat "comprehensive testing" as the slug source if it
+            # -> treat "comprehensive testing" as the slug source if it
            # normalizes to a known item; otherwise treat "comprehensive"
            # as slug and "testing extra-note" as note. We defer the
            # disambiguation to the caller via the returned canonical
@ -170,7 +172,7 @@ def parse_directives(
        # If we collapsed multi-word slug into kebab and there's a
        # trailing-text group too, append it.
        out.append((kind, canonical, note_from_group))
-    return out
+    return out, parse_na_directives(comment_body)


 # ---------------------------------------------------------------------------
@ -183,7 +185,7 @@ def section_marker_present(body: str, marker: str) -> bool:
    on a non-empty line (i.e. the author actually filled it in).

    We require the marker substring AND non-whitespace content on the
-    same line OR within the next line — this prevents trivially-empty
+    same line OR within the next line - this prevents trivially-empty
    checklists like:

        ## SOP-Checklist
@ -250,17 +252,17 @@ def compute_ack_state(
         ...
       }
    """
-    # Step 1: collapse directives per (commenter, slug) — most recent wins.
+    # Step 1: collapse directives per (commenter, slug) - most recent wins.
    # comments are expected to come in chronological order from the
    # API (Gitea returns oldest-first by default for issues/{N}/comments).
-    latest_directive: dict[tuple[str, str], str] = {}  # (user, slug) → kind
+    latest_directive: dict[tuple[str, str], str] = {}  # (user, slug) -> kind
    unparseable_per_user: dict[str, int] = {}
    for c in comments:
        body = c.get("body", "") or ""
        user = (c.get("user") or {}).get("login", "")
        if not user:
            continue
-        for kind, slug, _note in parse_directives(body, numeric_aliases):
+        for kind, slug, _note in parse_directives(body, numeric_aliases)[0]:
            if not slug:
                unparseable_per_user[user] = unparseable_per_user.get(user, 0) + 1
                continue
@ -277,7 +279,7 @@ def compute_ack_state(
        if kind != "sop-ack":
            continue  # revokes leave the (user,slug) state as "no ack"
        if slug not in items_by_slug:
-            # Slug normalized to something not in our config — store
+            # Slug normalized to something not in our config - store
            # under a synthetic key for diagnostic surfacing. Don't add
            # to any item.
            continue
@ -287,7 +289,7 @@ def compute_ack_state(
        pending_team_check[slug].append(user)

    # Step 3: team membership probe per slug (batched per slug to keep
-    # API call count down — same user may ack multiple items but the
+    # API call count down - same user may ack multiple items but the
    # required_teams differ per item, so we MUST probe per (user, item)).
    rejected_not_in_team: dict[str, list[str]] = {s: [] for s in items_by_slug}
    for slug, candidates in pending_team_check.items():
@ -359,7 +361,7 @@ def compute_na_state(
      }
    """
    # Collapse to most-recent directive per (user, gate).
-    latest: dict[tuple[str, str], str] = {}  # (user, gate) → kind
+    latest: dict[tuple[str, str], str] = {}  # (user, gate) -> kind
    for c in comments:
        body = c.get("body", "") or ""
        user = (c.get("user") or {}).get("login", "")
@ -368,8 +370,8 @@ def compute_na_state(
        # /sop-n/a
        for gate, _reason in parse_na_directives(body):
            latest[(user, gate)] = "sop-n/a"
-        # /sop-revoke — affects any gate; most-recent wins per (user, gate)
-        for kind, slug, _note in parse_directives(body, {}):
+        # /sop-revoke - affects any gate; most-recent wins per (user, gate)
+        for kind, slug, _note in parse_directives(body, {})[0]:
            if kind == "sop-revoke":
                # slug may be a gate name like "qa-review"
                latest[(user, slug)] = "sop-revoke"
@ -384,7 +386,7 @@ def compute_na_state(
            "rejected": {"self_declare": [], "not_in_team": []},
        }
        # Find the most-recent directive for each user for this gate.
-        user_directives: dict[str, str] = {}  # user → kind (sop-n/a or sop-revoke)
+        user_directives: dict[str, str] = {}  # user -> kind (sop-n/a or sop-revoke)
        for (user, gate), kind in latest.items():
            if gate == gate_name and user not in user_directives:
                user_directives[user] = kind
@ -430,7 +432,7 @@ class GiteaClient:
    def __init__(self, host: str, token: str):
        self.base = f"https://{host}/api/v1"
        self.token = token
-        # Cache team-name → team-id resolutions per org.
+        # Cache team-name -> team-id resolutions per org.
        self._team_id_cache: dict[tuple[str, str], int | None] = {}

    def _req(
@ -466,7 +468,7 @@ class GiteaClient:
    def get_pr(self, owner: str, repo: str, pr: int) -> dict[str, Any]:
        code, data = self._req("GET", f"/repos/{owner}/{repo}/pulls/{pr}")
        if code != 200:
-            raise RuntimeError(f"GET pulls/{pr} → HTTP {code}: {data!r}")
+            raise RuntimeError(f"GET pulls/{pr} -> HTTP {code}: {data!r}")
        return data

    def get_issue_comments(
@ -482,7 +484,7 @@ class GiteaClient:
            )
            if code != 200:
                raise RuntimeError(
-                    f"GET issues/{issue}/comments page={page} → HTTP {code}: {data!r}"
+                    f"GET issues/{issue}/comments page={page} -> HTTP {code}: {data!r}"
                )
            if not data:
                break
@ -512,7 +514,7 @@ class GiteaClient:
        return team_id

    def is_team_member(self, team_id: int, login: str) -> bool | None:
-        """Return True / False / None (unknown — 403 from API)."""
+        """Return True / False / None (unknown - 403 from API)."""
        code, _ = self._req(
            "GET", f"/teams/{team_id}/members/{urllib.parse.quote(login)}"
        )
@ -548,12 +550,12 @@ class GiteaClient:
        )
        if code not in (200, 201):
            raise RuntimeError(
-                f"POST statuses/{sha} → HTTP {code}: {data!r}"
+                f"POST statuses/{sha} -> HTTP {code}: {data!r}"
            )


 # ---------------------------------------------------------------------------
-# Config loader (PyYAML-free — config file is intentionally tiny + flat)
+# Config loader (PyYAML-free - config file is intentionally tiny + flat)
 # ---------------------------------------------------------------------------


@ -643,7 +645,7 @@ def _parse_minimal_yaml(lines: list[str]) -> dict[str, Any]:  # noqa: C901
        key = key.strip()
        rest = rest.strip()
        if rest == "":
-            # Block — could be map or list.
+            # Block - could be map or list.
            i += 1
            # Look ahead for first child.
            if i < n and cleaned[i][1].startswith("- "):
@ -739,8 +741,8 @@ def render_status(
    """Return (state, description) for the commit-status post.

    state is "success" if every item has at least one valid ack
-    (body section presence is informational only — peer-ack is the
-    real gate).  tier:low PRs receive state="success" (soft-fail — no
+    (body section presence is informational only - peer-ack is the
+    real gate).  tier:low PRs receive state="success" (soft-fail - no
    acks required); the description carries "[info tier:low]" prefix.
    """
    n = len(items)
@ -765,7 +767,7 @@ def render_status(
            shown += f", +{len(missing_body) - 3}"
        desc_parts.append(f"body-unfilled: {shown}")
    state = "success" if not missing and not missing_body else "failure"
-    return state, " — ".join(desc_parts)
+    return state, " - ".join(desc_parts)


 def get_tier_mode(pr: dict[str, Any], cfg: dict[str, Any]) -> str:
@ -810,7 +812,7 @@ def main(argv: list[str] | None = None) -> int:
        action="store_true",
        help=(
            "If set, exit non-zero when state=failure. Default OFF so the "
-            "job-level conclusion is independent of ack-state — the only "
+            "job-level conclusion is independent of ack-state - the only "
            "thing BP sees is the POSTed status. Useful for local debugging."
        ),
    )
@ -835,7 +837,7 @@ def main(argv: list[str] | None = None) -> int:

    pr = client.get_pr(args.owner, args.repo, args.pr)
    if pr.get("state") != "open":
-        print(f"::notice::PR #{args.pr} is {pr.get('state')} — gate is a no-op")
+        print(f"::notice::PR #{args.pr} is {pr.get('state')} - gate is a no-op")
        return 0

    author = (pr.get("user") or {}).get("login", "")
@ -856,8 +858,8 @@ def main(argv: list[str] | None = None) -> int:
    def probe(slug: str, users: list[str]) -> list[str]:
        item = items_by_slug[slug]
        team_names: list[str] = item["required_teams"]
-        # Resolve names → ids. NOTE: orgs/{org}/teams/search may not be
-        # available — fall back to the list endpoint.
+        # Resolve names -> ids. NOTE: orgs/{org}/teams/search may not be
+        # available - fall back to the list endpoint.
        team_ids: list[int] = []
        for tn in team_names:
            tid = client.resolve_team_id(args.owner, tn)
@ -877,7 +879,7 @@ def main(argv: list[str] | None = None) -> int:
            else:
                print(
                    f"::warning::could not resolve team-id for '{tn}' "
-                    f"in org '{args.owner}' — item '{slug}' will fail closed",
+                    f"in org '{args.owner}' - item '{slug}' will fail closed",
                    file=sys.stderr,
                )
        approved: list[str] = []
@ -893,7 +895,7 @@ def main(argv: list[str] | None = None) -> int:
                if result is None:
                    print(
                        f"::warning::team-probe for {u} in team-id {tid} returned 403 "
-                        "(token owner not in that team — fail-closed per RFC#324)",
+                        "(token owner not in that team - fail-closed per RFC#324)",
                        file=sys.stderr,
                    )
                    # Treat as not-in-team for this user/team pair; loop
@ -906,7 +908,7 @@ def main(argv: list[str] | None = None) -> int:
    state, description = render_status(items, ack_state, body_state)
    mode = get_tier_mode(pr, cfg)
    if mode == "soft":
-        # tier:low: acks are informational only — post success so BP gate passes.
+        # tier:low: acks are informational only - post success so BP gate passes.
        # Description carries "[info tier:low]" prefix so reviewers know acks
        # were not required (vs a tier:medium+ PR that truly passed all acks).
        state = "success"
@ -918,7 +920,7 @@ def main(argv: list[str] | None = None) -> int:
        slug = it["slug"]
        ackers = ack_state[slug]["ackers"]
        if ackers:
-            print(f"::notice::  [PASS] {slug} — acked by {','.join(ackers)}")
+            print(f"::notice::  [PASS] {slug} - acked by {','.join(ackers)}")
        else:
            r = ack_state[slug]["rejected"]
            extras: list[str] = []
@ -927,16 +929,16 @@ def main(argv: list[str] | None = None) -> int:
            if r["not_in_team"]:
                extras.append(f"not-in-team:{','.join(r['not_in_team'])}")
            extra = " (" + "; ".join(extras) + ")" if extras else ""
-            print(f"::notice::  [WAIT] {slug} — no valid peer-ack yet{extra}")
+            print(f"::notice::  [WAIT] {slug} - no valid peer-ack yet{extra}")

    # ── N/A declarations mode ────────────────────────────────────────────────
    if args.na_declarations_mode:
        na_gates = cfg.get("n/a_gates") or {}
        if not na_gates:
-            print("::notice::--na-declarations-mode but no n/a_gates in config — no-op")
+            print("::notice::--na-declarations-mode but no n/a_gates in config - no-op")
            return 0

-        # Gate-level team-membership probe: maps gate_name → team_names → approved users.
+        # Gate-level team-membership probe: maps gate_name -> team_names -> approved users.
        def probe_gate(gate_name: str, users: list[str]) -> list[str]:
            gate_cfg = na_gates.get(gate_name)
            if not gate_cfg:
@ -960,7 +962,7 @@ def main(argv: list[str] | None = None) -> int:
                    if result is None:
                        print(
                            f"::warning::team-probe for {u} in gate '{gate_name}' "
-                            "team-id {tid} returned 403 — fail-closed",
+                            "team-id {tid} returned 403 - fail-closed",
                            file=sys.stderr,
                        )
            return approved
@ -989,13 +991,13 @@ def main(argv: list[str] | None = None) -> int:
            na_state_str = "success"
        else:
            na_desc = "no N/A declarations"
-            na_state_str = "success"  # always success — absence of declaration is fine
+            na_state_str = "success"  # always success - absence of declaration is fine

        print(f"::notice::NA declarations: declared={declared_gates}")
        for g, users in rejected_self.items():
-            print(f"::notice::  [REJECT] {g} — self-declare rejected: {users}")
+            print(f"::notice::  [REJECT] {g} - self-declare rejected: {users}")
        for g, users in rejected_not_in_team.items():
-            print(f"::notice::  [REJECT] {g} — not-in-team rejected: {users}")
+            print(f"::notice::  [REJECT] {g} - not-in-team rejected: {users}")
        print(f"::notice::posting na-declarations status: state={na_state_str} desc={na_desc!r}")

        if args.dry_run:
@ -1026,8 +1028,8 @@ def main(argv: list[str] | None = None) -> int:
        state=state, context=args.status_context,
        description=description, target_url=target_url,
    )
-    print(f"::notice::status posted: {args.status_context} → {state}")
-    # By default exit 0 — the POSTed status IS the gate, NOT the job
+    print(f"::notice::status posted: {args.status_context} -> {state}")
+    # By default exit 0 - the POSTed status IS the gate, NOT the job
    # conclusion. If the job exits 1 BP will see TWO failure signals
    # (one from the job's auto-status, one from our POST), making the
    # description less actionable. --exit-on-state restores the old
--- a/.gitea/workflows/sop-checklist.yml
+++ b/.gitea/workflows/sop-checklist.yml
@ -134,6 +134,7 @@ jobs:
  # is read by review-check.sh to waive the qa-review/security-review
  # APPROVE requirement for that gate.
  # Context: review-check.sh reads "sop-checklist / na-declarations (pull_request)"
+  # bp-required: yes  ← na-declarations is a new gate emission per lint-required-context-exists-in-bp
  na-declarations:
    if: |
      github.event_name == 'pull_request_target' ||