diff --git a/.gitea/scripts/sop-checklist.py b/.gitea/scripts/sop-checklist.py index 9759f06c2..7745c149e 100644 --- a/.gitea/scripts/sop-checklist.py +++ b/.gitea/scripts/sop-checklist.py @@ -174,6 +174,16 @@ def parse_directives( if not parts: continue first = parts[0] + # Em-dash (U+2014) is a common visual separator in user-written + # notes, e.g. /sop-ack Five-Axis — five-axis-review + # If raw_slug contains an em-dash, split on the first one so + # the part before becomes the slug and the rest becomes the note. + note_from_slug = "" + slug_source = raw_slug + emdash_idx = raw_slug.find("—") + if emdash_idx != -1: + slug_source = raw_slug[:emdash_idx].strip() + note_from_slug = raw_slug[emdash_idx + 1 :].strip() # If the slug-capture greedily matched multiple words (e.g. # "comprehensive testing"), preserve normalize behavior: join # the WHOLE first-word-token only; trailing words get appended to @@ -186,13 +196,19 @@ def parse_directives( # as slug and "testing extra-note" as note. We defer the # disambiguation to the caller via the returned canonical # slug. For simplicity: try the WHOLE captured string first. - canonical = normalize_slug(raw_slug, numeric_aliases) + canonical = normalize_slug(slug_source, numeric_aliases) else: - canonical = normalize_slug(first, numeric_aliases) + canonical = normalize_slug(slug_source, numeric_aliases) note_from_group = (m.group(3) or "").strip() - # If we collapsed multi-word slug into kebab and there's a - # trailing-text group too, append it. - entry = (kind, canonical, note_from_group) + # The em-dash (U+2014) is a visual separator; the regex puts it + # in group(3) because it is outside the slug character class. + # Strip it so "/sop-ack slug — note" yields just "note". + if note_from_group.startswith("—"): + note_from_group = note_from_group[1:].strip() + # Combine note_from_slug (em-dash split) with note_from_group + # (trailing text after the slug captured by the regex group). + combined_note = (note_from_slug + " " + note_from_group).strip() + entry = (kind, canonical, combined_note) if kind == "sop-n/a": na_directives.append(entry) else: diff --git a/.gitea/scripts/tests/test_sop_checklist.py b/.gitea/scripts/tests/test_sop_checklist.py index 23c06cc55..257966b63 100644 --- a/.gitea/scripts/tests/test_sop_checklist.py +++ b/.gitea/scripts/tests/test_sop_checklist.py @@ -208,6 +208,22 @@ class TestParseDirectives(unittest.TestCase): d = self.parse_ack_revoke("/sop-ack Comprehensive_Testing") self.assertEqual(d[0][1], "comprehensive-testing") + def test_emdash_separator_parsed_correctly(self): + # Em-dash (U+2014) between slug and note is common in practice. + # /sop-ack Five-Axis — five-axis-review + # → slug = five-axis, note = — five-axis-review + d = self.parse_ack_revoke("/sop-ack Five-Axis — five-axis-review") + self.assertEqual(len(d), 1) + self.assertEqual(d[0][1], "five-axis") + self.assertIn("five-axis-review", d[0][2]) + + def test_emdash_no_note(self): + # Em-dash at end of slug: only slug, no note content + d = self.parse_ack_revoke("/sop-ack Five-Axis —") + self.assertEqual(len(d), 1) + self.assertEqual(d[0][1], "five-axis") + self.assertEqual(d[0][2], "") # em-dash is separator-only → empty note + # --------------------------------------------------------------------------- # section_marker_present diff --git a/.gitea/sop-checklist-config.yaml b/.gitea/sop-checklist-config.yaml index 3ede62cb5..ef180536a 100644 --- a/.gitea/sop-checklist-config.yaml +++ b/.gitea/sop-checklist-config.yaml @@ -205,5 +205,5 @@ n/a_gates: required_teams: [security, managers, ceo] description: >- Security review N/A when this change has no security surface - (docs-only, pure-frontend, dependency-only). A security/owners + (docs-only, pure-frontend, dependency-only). A security/managers/ceo member must post /sop-n/a security-review to activate.