fix(sop-checklist): split slug on em-dash so notes parse correctly #1408
@@ -174,6 +174,16 @@ def parse_directives(
|
||||
if not parts:
|
||||
continue
|
||||
first = parts[0]
|
||||
# Em-dash (U+2014) is a common visual separator in user-written
|
||||
# notes, e.g. /sop-ack Five-Axis — five-axis-review
|
||||
# If raw_slug contains an em-dash, split on the first one so
|
||||
# the part before becomes the slug and the rest becomes the note.
|
||||
note_from_slug = ""
|
||||
slug_source = raw_slug
|
||||
emdash_idx = raw_slug.find("—")
|
||||
if emdash_idx != -1:
|
||||
slug_source = raw_slug[:emdash_idx].strip()
|
||||
note_from_slug = raw_slug[emdash_idx + 1 :].strip()
|
||||
# If the slug-capture greedily matched multiple words (e.g.
|
||||
# "comprehensive testing"), preserve normalize behavior: join
|
||||
# the WHOLE first-word-token only; trailing words get appended to
|
||||
@@ -186,13 +196,19 @@ def parse_directives(
|
||||
# as slug and "testing extra-note" as note. We defer the
|
||||
# disambiguation to the caller via the returned canonical
|
||||
# slug. For simplicity: try the WHOLE captured string first.
|
||||
canonical = normalize_slug(raw_slug, numeric_aliases)
|
||||
canonical = normalize_slug(slug_source, numeric_aliases)
|
||||
else:
|
||||
canonical = normalize_slug(first, numeric_aliases)
|
||||
canonical = normalize_slug(slug_source, numeric_aliases)
|
||||
note_from_group = (m.group(3) or "").strip()
|
||||
# If we collapsed multi-word slug into kebab and there's a
|
||||
# trailing-text group too, append it.
|
||||
entry = (kind, canonical, note_from_group)
|
||||
# The em-dash (U+2014) is a visual separator; the regex puts it
|
||||
# in group(3) because it is outside the slug character class.
|
||||
# Strip it so "/sop-ack slug — note" yields just "note".
|
||||
if note_from_group.startswith("—"):
|
||||
note_from_group = note_from_group[1:].strip()
|
||||
# Combine note_from_slug (em-dash split) with note_from_group
|
||||
# (trailing text after the slug captured by the regex group).
|
||||
combined_note = (note_from_slug + " " + note_from_group).strip()
|
||||
entry = (kind, canonical, combined_note)
|
||||
if kind == "sop-n/a":
|
||||
na_directives.append(entry)
|
||||
else:
|
||||
|
||||
@@ -208,6 +208,22 @@ class TestParseDirectives(unittest.TestCase):
|
||||
d = self.parse_ack_revoke("/sop-ack Comprehensive_Testing")
|
||||
self.assertEqual(d[0][1], "comprehensive-testing")
|
||||
|
||||
def test_emdash_separator_parsed_correctly(self):
|
||||
# Em-dash (U+2014) between slug and note is common in practice.
|
||||
# /sop-ack Five-Axis — five-axis-review
|
||||
# → slug = five-axis, note = — five-axis-review
|
||||
d = self.parse_ack_revoke("/sop-ack Five-Axis — five-axis-review")
|
||||
self.assertEqual(len(d), 1)
|
||||
self.assertEqual(d[0][1], "five-axis")
|
||||
self.assertIn("five-axis-review", d[0][2])
|
||||
|
||||
def test_emdash_no_note(self):
|
||||
# Em-dash at end of slug: only slug, no note content
|
||||
d = self.parse_ack_revoke("/sop-ack Five-Axis —")
|
||||
self.assertEqual(len(d), 1)
|
||||
self.assertEqual(d[0][1], "five-axis")
|
||||
self.assertEqual(d[0][2], "") # em-dash is separator-only → empty note
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# section_marker_present
|
||||
|
||||
@@ -205,5 +205,5 @@ n/a_gates:
|
||||
required_teams: [security, managers, ceo]
|
||||
description: >-
|
||||
Security review N/A when this change has no security surface
|
||||
(docs-only, pure-frontend, dependency-only). A security/owners
|
||||
(docs-only, pure-frontend, dependency-only). A security/managers/ceo
|
||||
member must post /sop-n/a security-review to activate.
|
||||
|
||||
Reference in New Issue
Block a user