fix(sop-checklist): split slug on em-dash so notes parse correctly #1408

Merged
devops-engineer merged 5 commits from fix/sop-checklist-emdash-slug-parse into main 2026-06-06 08:32:59 +00:00
3 changed files with 38 additions and 6 deletions
+21 -5
View File
@@ -174,6 +174,16 @@ def parse_directives(
if not parts:
continue
first = parts[0]
# Em-dash (U+2014) is a common visual separator in user-written
# notes, e.g. /sop-ack Five-Axis — five-axis-review
# If raw_slug contains an em-dash, split on the first one so
# the part before becomes the slug and the rest becomes the note.
note_from_slug = ""
slug_source = raw_slug
emdash_idx = raw_slug.find("")
if emdash_idx != -1:
slug_source = raw_slug[:emdash_idx].strip()
note_from_slug = raw_slug[emdash_idx + 1 :].strip()
# If the slug-capture greedily matched multiple words (e.g.
# "comprehensive testing"), preserve normalize behavior: join
# the WHOLE first-word-token only; trailing words get appended to
@@ -186,13 +196,19 @@ def parse_directives(
# as slug and "testing extra-note" as note. We defer the
# disambiguation to the caller via the returned canonical
# slug. For simplicity: try the WHOLE captured string first.
canonical = normalize_slug(raw_slug, numeric_aliases)
canonical = normalize_slug(slug_source, numeric_aliases)
else:
canonical = normalize_slug(first, numeric_aliases)
canonical = normalize_slug(slug_source, numeric_aliases)
note_from_group = (m.group(3) or "").strip()
# If we collapsed multi-word slug into kebab and there's a
# trailing-text group too, append it.
entry = (kind, canonical, note_from_group)
# The em-dash (U+2014) is a visual separator; the regex puts it
# in group(3) because it is outside the slug character class.
# Strip it so "/sop-ack slug — note" yields just "note".
if note_from_group.startswith(""):
note_from_group = note_from_group[1:].strip()
# Combine note_from_slug (em-dash split) with note_from_group
# (trailing text after the slug captured by the regex group).
combined_note = (note_from_slug + " " + note_from_group).strip()
entry = (kind, canonical, combined_note)
if kind == "sop-n/a":
na_directives.append(entry)
else:
@@ -208,6 +208,22 @@ class TestParseDirectives(unittest.TestCase):
d = self.parse_ack_revoke("/sop-ack Comprehensive_Testing")
self.assertEqual(d[0][1], "comprehensive-testing")
def test_emdash_separator_parsed_correctly(self):
# Em-dash (U+2014) between slug and note is common in practice.
# /sop-ack Five-Axis — five-axis-review
# → slug = five-axis, note = — five-axis-review
d = self.parse_ack_revoke("/sop-ack Five-Axis — five-axis-review")
self.assertEqual(len(d), 1)
self.assertEqual(d[0][1], "five-axis")
self.assertIn("five-axis-review", d[0][2])
def test_emdash_no_note(self):
# Em-dash at end of slug: only slug, no note content
d = self.parse_ack_revoke("/sop-ack Five-Axis —")
self.assertEqual(len(d), 1)
self.assertEqual(d[0][1], "five-axis")
self.assertEqual(d[0][2], "") # em-dash is separator-only → empty note
# ---------------------------------------------------------------------------
# section_marker_present
+1 -1
View File
@@ -205,5 +205,5 @@ n/a_gates:
required_teams: [security, managers, ceo]
description: >-
Security review N/A when this change has no security surface
(docs-only, pure-frontend, dependency-only). A security/owners
(docs-only, pure-frontend, dependency-only). A security/managers/ceo
member must post /sop-n/a security-review to activate.