2026-06-03 00:12:42 +00:00
5 changed files with 445 additions and 4 deletions
@@ -895,6 +895,47 @@ def resolve_required_teams(item: dict[str, Any], high_risk: bool) -> list[str]:
    return list(item.get("required_teams") or [])


+# ---------------------------------------------------------------------------
+# CI status validation for testing-class AI acks (internal#760 CTO hardening)
+# ---------------------------------------------------------------------------
+
+# Slugs that require CI / all-required green before an AI ack is valid.
+_TESTING_CLASS_SLUGS = {"comprehensive-testing", "local-postgres-e2e", "staging-smoke"}
+
+# Human-only carve-out: these items can NEVER be acked by AI, regardless
+# of config drift. Any item in this set MUST NOT have ai_ack_eligible.
+# migration / schema are future-proofing — not yet in config items, but
+# the code guard rejects them proactively (CTO hardening, msg 1388c76f).
+_HUMAN_ONLY_SLUGS = {"root-cause", "no-backwards-compat", "migration", "schema"}
+
+
+def get_ci_status(client: GiteaClient, owner: str, repo: str, sha: str) -> str:
+    """Return the state of CI / all-required (pull_request) for `sha`.
+
+    Looks through the commit statuses and returns the state string
+    ("success", "failure", "pending", "error") or "missing" if the
+    context is not found. This prevents an AI agent from attesting
+    "tests pass" independently of the actual CI run.
+    """
+    code, data = client._req(  # noqa: SLF001
+        "GET", f"/repos/{owner}/{repo}/statuses/{sha}"
+    )
+    if code != 200:
+        return "unknown"
+    if not data or not isinstance(data, list):
+        return "missing"
+    # Gitea returns statuses newest-first. Find the latest for our context.
+    for status in data:
+        if status.get("context") == "CI / all-required (pull_request)":
+            return status.get("state", "unknown")
+    return "missing"
+
+
+# ---------------------------------------------------------------------------
+# Main entry point
+# ---------------------------------------------------------------------------
+
+
 def main(argv: list[str] | None = None) -> int:
    p = argparse.ArgumentParser()
    p.add_argument("--owner", required=True)
@@ -988,6 +1029,9 @@ def main(argv: list[str] | None = None) -> int:
    # one membership lookup per team.
    team_member_cache: dict[tuple[str, int], bool | None] = {}

+    # Pre-resolve the ai-sop-ack team id once (None if the team does not exist).
+    ai_sop_ack_team_id = client.resolve_team_id(args.owner, "ai-sop-ack")
+
    def probe(slug: str, users: list[str]) -> list[str]:
        # `slug` may be either an items-key (compute_ack_state caller) OR
        # an n/a-gate key (compute_na_state caller). Previously this hard
@@ -1042,14 +1086,18 @@ def main(argv: list[str] | None = None) -> int:
                    file=sys.stderr,
                )
        approved: list[str] = []
+        rejected_ai_ineligible: list[str] = []
+        rejected_ci_not_green: list[str] = []
        for u in users:
+            # 1) Human required_teams membership check
+            in_human_team = False
            for tid in team_ids:
                cache_key = (u, tid)
                if cache_key not in team_member_cache:
                    team_member_cache[cache_key] = client.is_team_member(tid, u)
                result = team_member_cache[cache_key]
                if result is True:
-                    approved.append(u)
+                    in_human_team = True
                    break
                if result is None:
                    print(
@@ -1059,6 +1107,44 @@ def main(argv: list[str] | None = None) -> int:
                    )
                    # Treat as not-in-team for this user/team pair; loop
                    # may still find membership in another team.
+            if in_human_team:
+                approved.append(u)
+                continue
+
+            # 2) AI-sop-ack team membership check (only for items that allow it).
+            if slug in items_by_slug:
+                item = items_by_slug[slug]
+                # Defensive: human-only carve-out is enforced in code, not just
+                # config. Even if ai_ack_eligible were mistakenly added to a
+                # migration/schema item, the AI path is rejected here.
+                if slug in _HUMAN_ONLY_SLUGS:
+                    rejected_ai_ineligible.append(u)
+                    continue
+                if item.get("ai_ack_eligible") and ai_sop_ack_team_id is not None:
+                    cache_key = (u, ai_sop_ack_team_id)
+                    if cache_key not in team_member_cache:
+                        team_member_cache[cache_key] = client.is_team_member(
+                            ai_sop_ack_team_id, u
+                        )
+                    result = team_member_cache[cache_key]
+                    if result is True:
+                        # 2a) Testing-class items require real CI artifact evidence.
+                        if slug in _TESTING_CLASS_SLUGS:
+                            ci_state = get_ci_status(
+                                client, args.owner, args.repo, head_sha
+                            )
+                            if ci_state != "success":
+                                print(
+                                    f"::warning::AI ack for {slug} rejected: "
+                                    f"CI / all-required is {ci_state}, not success",
+                                    file=sys.stderr,
+                                )
+                                rejected_ci_not_green.append(u)
+                                continue
+                        approved.append(u)
+                        continue
+            # If we get here, user is not approved for this slug.
+            rejected_ai_ineligible.append(u)
        return approved

    ack_state = compute_ack_state(
@@ -21,6 +21,7 @@ Scenarios:
  T16_comments_generic_approval — reviews empty; comments have "APPROVED" by team member → exit 0
  T17_comments_no_approval   — reviews empty; comments have no approval keywords → exit 1
  T18_review_wrong_team_comment_right_team — review candidate 404s, comment candidate passes
+  T19_ai_sop_ack_approved — ai-sop-ack member APPROVED review → team probe 404 → exit 1

 Usage:
  FIXTURE_STATE_DIR=/tmp/x python3 _review_check_fixture.py 8080
@@ -116,6 +117,12 @@ class Handler(http.server.BaseHTTPRequestHandler):
                    {"state": "CHANGES_REQUESTED", "dismissed": False, "user": {"login": "bob"}, "commit_id": "abc1234"},
                    {"state": "APPROVED", "dismissed": False, "user": {"login": "core-devops"}, "commit_id": "abc1234"},
                ])
+            if sc == "T19_ai_sop_ack_approved":
+                # ai-sop-ack member submitted APPROVED review — must NOT count
+                # toward qa-review (team_id=20) or security-review (team_id=21).
+                return self._json(200, [
+                    {"state": "APPROVED", "dismissed": False, "user": {"login": "ai-reviewer"}, "commit_id": "abc1234"},
+                ])
            # Default: one non-author APPROVED
            return self._json(200, [
                {"state": "APPROVED", "dismissed": False, "user": {"login": "core-devops"}, "commit_id": "abc1234"},
@@ -157,6 +164,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
                return self._empty(403)
            if sc == "T18_review_wrong_team_comment_right_team" and login == "core-devops":
                return self._empty(404)
+            if sc == "T19_ai_sop_ack_approved" and login == "ai-reviewer":
+                # ai-sop-ack member is NOT in qa (20) or security (21).
+                return self._empty(404)
            # T7_team_member: member
            return self._empty(204)

@@ -205,6 +205,8 @@ chmod +x "$FIXTURE_DIR/bin/curl"
 # Helper: run the script with fixture environment
 run_review_check() {
  local scenario="$1"
+  local team="${2:-qa}"
+  local team_id="${3:-20}"
  echo "$scenario" >"$FIX_STATE_DIR/scenario"
  local out
  set +e
@@ -215,8 +217,8 @@ run_review_check() {
    REPO="molecule-ai/molecule-core" \
    PR_NUMBER="999" \
    DEFAULT_BRANCH="main" \
-    TEAM="qa" \
-    TEAM_ID="20" \
+    TEAM="$team" \
+    TEAM_ID="$team_id" \
    REVIEW_CHECK_DEBUG="0" \
    REVIEW_CHECK_STRICT="0" \
    bash "$SCRIPT" 2>&1
@@ -372,6 +374,25 @@ assert_eq "T18 exit code 0 (comment approval still considered)" "0" "$T18_RC"
 assert_contains "T18 comment candidate notice" "comment-based approval" "$T18_OUT"
 assert_contains "T18 comment approver accepted" "APPROVED by core-qa-agent" "$T18_OUT"

+# T19 — ai-sop-ack member APPROVED review must NOT count toward qa-review
+# or security-review (R1 hardening refinement, msg 1388c76f).
+echo
+echo "== T19 ai-sop-ack APPROVED review excluded from qa-review gate =="
+T19_OUT=$(run_review_check "T19_ai_sop_ack_approved" "qa" "20")
+T19_RC=$(cat "$FIX_STATE_DIR/last_rc")
+assert_eq "T19 exit code 1 (ai-sop-ack not in qa team)" "1" "$T19_RC"
+assert_contains "T19 ai-reviewer excluded from qa" "candidates: ai-reviewer" "$T19_OUT"
+assert_contains "T19 none are in qa team" "none are in team" "$T19_OUT"
+
+# T20 — same ai-sop-ack member must also be excluded from security-review gate.
+echo
+echo "== T20 ai-sop-ack APPROVED review excluded from security-review gate =="
+T20_OUT=$(run_review_check "T19_ai_sop_ack_approved" "security" "21")
+T20_RC=$(cat "$FIX_STATE_DIR/last_rc")
+assert_eq "T20 exit code 1 (ai-sop-ack not in security team)" "1" "$T20_RC"
+assert_contains "T20 ai-reviewer excluded from security" "candidates: ai-reviewer" "$T20_OUT"
+assert_contains "T20 none are in security team" "none are in team" "$T20_OUT"
+
 echo
 echo "------"
 echo "PASS=$PASS FAIL=$FAIL"
@@ -1003,3 +1003,299 @@ class TestComputeNaStateAcceptsGateNotInItems(unittest.TestCase):
            comments, "alice", na_gates, lambda *_: ["alice"]
        )
        self.assertFalse(na_state["security-review"]["declared"])
+
+
+# ---------------------------------------------------------------------------
+# internal#760 ceremony — ai-sop-ack team + ai_ack_eligible per-item flag
+# ---------------------------------------------------------------------------
+
+
+class TestAIAckEligibleConfig(unittest.TestCase):
+    """CTO-controlled allowlist (msg 1388c76f):
+      ai_ack_eligible: comprehensive-testing, local-postgres-e2e, staging-smoke,
+                       five-axis-review, memory-consulted
+      human-only:      root-cause, no-backwards-compat
+    """
+
+    def test_ai_ack_eligible_items(self):
+        cfg = sop.load_config(CONFIG_PATH)
+        items_by_slug = {it["slug"]: it for it in cfg["items"]}
+        eligible = {
+            "comprehensive-testing",
+            "local-postgres-e2e",
+            "staging-smoke",
+            "five-axis-review",
+            "memory-consulted",
+        }
+        for slug in eligible:
+            self.assertTrue(
+                items_by_slug[slug].get("ai_ack_eligible"),
+                f"{slug} must be ai_ack_eligible",
+            )
+
+    def test_human_only_items(self):
+        cfg = sop.load_config(CONFIG_PATH)
+        items_by_slug = {it["slug"]: it for it in cfg["items"]}
+        human_only = {"root-cause", "no-backwards-compat"}
+        for slug in human_only:
+            self.assertFalse(
+                items_by_slug[slug].get("ai_ack_eligible", False),
+                f"{slug} must NOT be ai_ack_eligible (human-only)",
+            )
+
+    def test_testing_class_slugs_constant(self):
+        """_TESTING_CLASS_SLUGS must match the three testing items."""
+        self.assertEqual(
+            sop._TESTING_CLASS_SLUGS,
+            {"comprehensive-testing", "local-postgres-e2e", "staging-smoke"},
+        )
+
+    def test_human_only_slugs_constant(self):
+        """_HUMAN_ONLY_SLUGS encodes the migration/schema carve-out.
+
+        If this set changes, the CTO must approve the widening.
+        """
+        self.assertEqual(
+            sop._HUMAN_ONLY_SLUGS,
+            {"root-cause", "no-backwards-compat", "migration", "schema"},
+        )
+
+    def test_human_only_invariant_enforced_in_code_and_config(self):
+        """Every config-present slug in _HUMAN_ONLY_SLUGS must be human-only.
+
+        This test fails if a migration/schema-class item accidentally
+        acquires ai_ack_eligible via config drift.  migration/schema are
+        future-proofing slugs not yet in the live config; they are checked
+        by the production probe closure but skipped here.
+        """
+        cfg = sop.load_config(CONFIG_PATH)
+        items_by_slug = {it["slug"]: it for it in cfg["items"]}
+        for slug in sop._HUMAN_ONLY_SLUGS:
+            if slug not in items_by_slug:
+                # Future-proofing slug (e.g. migration, schema) — not yet
+                # in config, but the code guard still rejects AI acks.
+                continue
+            self.assertFalse(
+                items_by_slug[slug].get("ai_ack_eligible", False),
+                f"{slug} is in _HUMAN_ONLY_SLUGS and must NEVER be ai_ack_eligible",
+            )
+
+
+class TestAIAckEligibilityProbe(unittest.TestCase):
+    """The probe closure in main() delegates to compute_ack_state.
+    We simulate the AI-ack path by injecting a probe that behaves like
+    the production probe (human team first, then ai-sop-ack fallback).
+    """
+
+    def setUp(self):
+        self.items = _items_by_slug()
+        self.aliases = _numeric_aliases()
+
+    def _probe_human_then_ai(self, human_users, ai_users):
+        """Return users in human_users immediately; users in ai_users only
+        if the item is ai_ack_eligible."""
+        def probe(slug, users):
+            item = self.items.get(slug, {})
+            approved = []
+            for u in users:
+                if u in human_users:
+                    approved.append(u)
+                elif u in ai_users and item.get("ai_ack_eligible"):
+                    approved.append(u)
+            return approved
+        return probe
+
+    def test_ai_ack_passes_for_eligible_item(self):
+        comments = [_comment("ai-bot", "/sop-ack five-axis-review")]
+        probe = self._probe_human_then_ai(human_users=set(), ai_users={"ai-bot"})
+        state = sop.compute_ack_state(
+            comments, "alice", self.items, self.aliases, probe
+        )
+        self.assertEqual(state["five-axis-review"]["ackers"], ["ai-bot"])
+
+    def test_ai_ack_rejected_for_human_only_item(self):
+        comments = [_comment("ai-bot", "/sop-ack root-cause")]
+        probe = self._probe_human_then_ai(human_users=set(), ai_users={"ai-bot"})
+        state = sop.compute_ack_state(
+            comments, "alice", self.items, self.aliases, probe
+        )
+        self.assertEqual(state["root-cause"]["ackers"], [])
+        self.assertIn("ai-bot", state["root-cause"]["rejected"]["not_in_team"])
+
+    def test_human_ack_still_works_for_ai_eligible_item(self):
+        comments = [_comment("bob", "/sop-ack comprehensive-testing")]
+        probe = self._probe_human_then_ai(human_users={"bob"}, ai_users=set())
+        state = sop.compute_ack_state(
+            comments, "alice", self.items, self.aliases, probe
+        )
+        self.assertEqual(state["comprehensive-testing"]["ackers"], ["bob"])
+
+    def test_ai_ack_rejected_for_testing_item_when_ci_red(self):
+        # Simulate the production probe that checks CI status for testing items.
+        # When CI is not green, ai-sop-ack member is rejected.
+        def probe(slug, users):
+            item = self.items.get(slug, {})
+            approved = []
+            for u in users:
+                if u == "ai-bot" and item.get("ai_ack_eligible"):
+                    # Testing items require CI green; simulate CI red.
+                    if slug in sop._TESTING_CLASS_SLUGS:
+                        continue  # rejected: CI not green
+                    approved.append(u)
+            return approved
+
+        comments = [_comment("ai-bot", "/sop-ack comprehensive-testing")]
+        state = sop.compute_ack_state(
+            comments, "alice", self.items, self.aliases, probe
+        )
+        self.assertEqual(state["comprehensive-testing"]["ackers"], [])
+
+    def test_ai_ack_passes_for_testing_item_when_ci_green(self):
+        # Simulate CI green → AI ack passes.
+        def probe(slug, users):
+            item = self.items.get(slug, {})
+            approved = []
+            for u in users:
+                if u == "ai-bot" and item.get("ai_ack_eligible"):
+                    if slug in sop._TESTING_CLASS_SLUGS:
+                        # CI is green → allow
+                        pass
+                    approved.append(u)
+            return approved
+
+        comments = [_comment("ai-bot", "/sop-ack comprehensive-testing")]
+        state = sop.compute_ack_state(
+            comments, "alice", self.items, self.aliases, probe
+        )
+        self.assertEqual(state["comprehensive-testing"]["ackers"], ["ai-bot"])
+
+
+class TestAIAckHumanOnlyMigrationSchema(unittest.TestCase):
+    """RC 8322: migration and schema items are human-only regardless of
+    any future config that might accidentally mark them ai_ack_eligible.
+
+    These slugs are not yet in the live config items list; the tests use
+    synthetic items so the production guard can be exercised directly.
+    """
+
+    def setUp(self):
+        # Synthetic items — if live config ever adds migration/schema,
+        # they MUST stay human-only. The probe below mirrors the actual
+        # production closure logic (human team first, then AI fallback
+        # with _HUMAN_ONLY_SLUGS guard).
+        self.items = {
+            "migration": {
+                "slug": "migration",
+                "ai_ack_eligible": True,
+                "required_teams": ["engineers"],
+            },
+            "schema": {
+                "slug": "schema",
+                "ai_ack_eligible": True,
+                "required_teams": ["engineers"],
+            },
+        }
+        self.aliases = {}
+
+    def _production_like_probe(self, human_users, ai_users):
+        """Return a probe that mirrors the production closure's guard."""
+
+        def probe(slug, users):
+            item = self.items.get(slug, {})
+            approved = []
+            for u in users:
+                if u in human_users:
+                    approved.append(u)
+                elif u in ai_users:
+                    # Production guard: _HUMAN_ONLY_SLUGS rejects AI acks
+                    # regardless of the ai_ack_eligible flag.
+                    if slug in sop._HUMAN_ONLY_SLUGS:
+                        continue
+                    if item.get("ai_ack_eligible"):
+                        approved.append(u)
+            return approved
+
+        return probe
+
+    def test_ai_ack_rejected_for_migration(self):
+        comments = [_comment("ai-bot", "/sop-ack migration")]
+        probe = self._production_like_probe(human_users=set(), ai_users={"ai-bot"})
+        state = sop.compute_ack_state(
+            comments, "alice", self.items, self.aliases, probe
+        )
+        self.assertEqual(state["migration"]["ackers"], [])
+        self.assertIn("ai-bot", state["migration"]["rejected"]["not_in_team"])
+
+    def test_ai_ack_rejected_for_schema(self):
+        comments = [_comment("ai-bot", "/sop-ack schema")]
+        probe = self._production_like_probe(human_users=set(), ai_users={"ai-bot"})
+        state = sop.compute_ack_state(
+            comments, "alice", self.items, self.aliases, probe
+        )
+        self.assertEqual(state["schema"]["ackers"], [])
+        self.assertIn("ai-bot", state["schema"]["rejected"]["not_in_team"])
+
+    def test_human_ack_still_works_for_migration(self):
+        # Human team member acking migration/schema is unaffected.
+        comments = [_comment("bob", "/sop-ack migration")]
+        probe = self._production_like_probe(human_users={"bob"}, ai_users=set())
+        state = sop.compute_ack_state(
+            comments, "alice", self.items, self.aliases, probe
+        )
+        self.assertEqual(state["migration"]["ackers"], ["bob"])
+
+    def test_human_ack_still_works_for_schema(self):
+        comments = [_comment("bob", "/sop-ack schema")]
+        probe = self._production_like_probe(human_users={"bob"}, ai_users=set())
+        state = sop.compute_ack_state(
+            comments, "alice", self.items, self.aliases, probe
+        )
+        self.assertEqual(state["schema"]["ackers"], ["bob"])
+
+
+class TestGetCIStatus(unittest.TestCase):
+    """Verify get_ci_status reads the correct context from commit statuses."""
+
+    def _client_with_statuses(self, statuses):
+        client = sop.GiteaClient("git.example.com", "tok")
+
+        def fake_req(method, path, body=None, ok_codes=(200, 201, 204)):
+            return 200, statuses
+
+        client._req = fake_req  # type: ignore[method-assign]
+        return client
+
+    def test_ci_green_returns_success(self):
+        client = self._client_with_statuses([
+            {"context": "CI / all-required (pull_request)", "state": "success"},
+        ])
+        self.assertEqual(
+            sop.get_ci_status(client, "o", "r", "sha1"), "success"
+        )
+
+    def test_ci_red_returns_failure(self):
+        client = self._client_with_statuses([
+            {"context": "CI / all-required (pull_request)", "state": "failure"},
+        ])
+        self.assertEqual(
+            sop.get_ci_status(client, "o", "r", "sha1"), "failure"
+        )
+
+    def test_missing_context_returns_missing(self):
+        client = self._client_with_statuses([
+            {"context": "some-other-context", "state": "success"},
+        ])
+        self.assertEqual(
+            sop.get_ci_status(client, "o", "r", "sha1"), "missing"
+        )
+
+    def test_api_error_returns_unknown(self):
+        client = sop.GiteaClient("git.example.com", "tok")
+
+        def fake_req(method, path, body=None, ok_codes=(200, 201, 204)):
+            return 500, {"error": "boom"}
+
+        client._req = fake_req  # type: ignore[method-assign]
+        self.assertEqual(
+            sop.get_ci_status(client, "o", "r", "sha1"), "unknown"
+        )
@@ -32,6 +32,26 @@
 # AUTHOR SELF-ACK IS FORBIDDEN regardless of which team contains them
 # — the gate script enforces commenter != PR author before checking
 # team membership.
+#
+# AI-SOP-ACK TEAM (internal#760 ceremony design, CTO-approved):
+#   The `ai-sop-ack` team contains AI agent identities that can ack
+#   SOP-checklist items ON BEHALF OF automated evidence.  An AI ack is
+#   only valid when:
+#     1. the item has `ai_ack_eligible: true`
+#     2. the item is NOT in the human-only carve-out (migration/schema)
+#     3. for testing-class items, CI / all-required (pull_request) is
+#        green on the current head SHA
+#
+#   AI acks NEVER count toward qa-review or security-review gates —
+#   those remain human-team-only (enforced by review-check.sh team
+#   probe against TEAM_ID 20/21).
+#
+#   INITIAL ai_ack_eligible allowlist (CTO-controlled, msg 1388c76f):
+#     comprehensive-testing, local-postgres-e2e, staging-smoke,
+#     five-axis-review, memory-consulted
+#   HUMAN-ONLY carve-out:
+#     root-cause, no-backwards-compat
+#   Any widening requires an explicit config change reviewed by CTO.

 version: 1

@@ -83,25 +103,31 @@ items:
    numeric_alias: 1
    pr_section_marker: "Comprehensive testing performed"
    required_teams: [qa, engineers]
+    ai_ack_eligible: true
    description: >-
      What was tested, how, edge cases covered. Ack from any qa-team
-      member (or engineers fallback while qa is small).
+      member (or engineers fallback while qa is small). AI ack valid
+      only when CI / all-required (pull_request) is green.

  - slug: local-postgres-e2e
    numeric_alias: 2
    pr_section_marker: "Local-postgres E2E run"
    required_teams: [engineers]
+    ai_ack_eligible: true
    description: >-
      Link to local CI artifact, or "N/A: pure-frontend change". Ack
      from any engineer who can verify the local DB test actually ran.
+      AI ack valid only when CI / all-required (pull_request) is green.

  - slug: staging-smoke
    numeric_alias: 3
    pr_section_marker: "Staging-smoke verified or pending"
    required_teams: [engineers]
+    ai_ack_eligible: true
    description: >-
      Link to canary run, or "scheduled post-merge". Ack from any
      engineer (core-devops/infra-sre are members of engineers team).
+      AI ack valid only when CI / all-required (pull_request) is green.

  - slug: root-cause
    numeric_alias: 4
@@ -120,6 +146,7 @@ items:
    numeric_alias: 5
    pr_section_marker: "Five-Axis review walked"
    required_teams: [engineers]
+    ai_ack_eligible: true
    description: >-
      Correctness / readability / architecture / security / performance.
      Ack from any non-author engineer.
@@ -140,6 +167,7 @@ items:
    numeric_alias: 7
    pr_section_marker: "Memory/saved-feedback consulted"
    required_teams: [engineers]
+    ai_ack_eligible: true
    description: >-
      List of feedback memories applicable to this change. Ack from
      any engineer who has the same memory access.