diff --git a/.gitea/scripts/sop-checklist.py b/.gitea/scripts/sop-checklist.py index 4ea0c1df2..84d8a0077 100644 --- a/.gitea/scripts/sop-checklist.py +++ b/.gitea/scripts/sop-checklist.py @@ -895,6 +895,47 @@ def resolve_required_teams(item: dict[str, Any], high_risk: bool) -> list[str]: return list(item.get("required_teams") or []) +# --------------------------------------------------------------------------- +# CI status validation for testing-class AI acks (internal#760 CTO hardening) +# --------------------------------------------------------------------------- + +# Slugs that require CI / all-required green before an AI ack is valid. +_TESTING_CLASS_SLUGS = {"comprehensive-testing", "local-postgres-e2e", "staging-smoke"} + +# Human-only carve-out: these items can NEVER be acked by AI, regardless +# of config drift. Any item in this set MUST NOT have ai_ack_eligible. +# migration / schema are future-proofing — not yet in config items, but +# the code guard rejects them proactively (CTO hardening, msg 1388c76f). +_HUMAN_ONLY_SLUGS = {"root-cause", "no-backwards-compat", "migration", "schema"} + + +def get_ci_status(client: GiteaClient, owner: str, repo: str, sha: str) -> str: + """Return the state of CI / all-required (pull_request) for `sha`. + + Looks through the commit statuses and returns the state string + ("success", "failure", "pending", "error") or "missing" if the + context is not found. This prevents an AI agent from attesting + "tests pass" independently of the actual CI run. + """ + code, data = client._req( # noqa: SLF001 + "GET", f"/repos/{owner}/{repo}/statuses/{sha}" + ) + if code != 200: + return "unknown" + if not data or not isinstance(data, list): + return "missing" + # Gitea returns statuses newest-first. Find the latest for our context. + for status in data: + if status.get("context") == "CI / all-required (pull_request)": + return status.get("state", "unknown") + return "missing" + + +# --------------------------------------------------------------------------- +# Main entry point +# --------------------------------------------------------------------------- + + def main(argv: list[str] | None = None) -> int: p = argparse.ArgumentParser() p.add_argument("--owner", required=True) @@ -988,6 +1029,9 @@ def main(argv: list[str] | None = None) -> int: # one membership lookup per team. team_member_cache: dict[tuple[str, int], bool | None] = {} + # Pre-resolve the ai-sop-ack team id once (None if the team does not exist). + ai_sop_ack_team_id = client.resolve_team_id(args.owner, "ai-sop-ack") + def probe(slug: str, users: list[str]) -> list[str]: # `slug` may be either an items-key (compute_ack_state caller) OR # an n/a-gate key (compute_na_state caller). Previously this hard @@ -1042,14 +1086,18 @@ def main(argv: list[str] | None = None) -> int: file=sys.stderr, ) approved: list[str] = [] + rejected_ai_ineligible: list[str] = [] + rejected_ci_not_green: list[str] = [] for u in users: + # 1) Human required_teams membership check + in_human_team = False for tid in team_ids: cache_key = (u, tid) if cache_key not in team_member_cache: team_member_cache[cache_key] = client.is_team_member(tid, u) result = team_member_cache[cache_key] if result is True: - approved.append(u) + in_human_team = True break if result is None: print( @@ -1059,6 +1107,44 @@ def main(argv: list[str] | None = None) -> int: ) # Treat as not-in-team for this user/team pair; loop # may still find membership in another team. + if in_human_team: + approved.append(u) + continue + + # 2) AI-sop-ack team membership check (only for items that allow it). + if slug in items_by_slug: + item = items_by_slug[slug] + # Defensive: human-only carve-out is enforced in code, not just + # config. Even if ai_ack_eligible were mistakenly added to a + # migration/schema item, the AI path is rejected here. + if slug in _HUMAN_ONLY_SLUGS: + rejected_ai_ineligible.append(u) + continue + if item.get("ai_ack_eligible") and ai_sop_ack_team_id is not None: + cache_key = (u, ai_sop_ack_team_id) + if cache_key not in team_member_cache: + team_member_cache[cache_key] = client.is_team_member( + ai_sop_ack_team_id, u + ) + result = team_member_cache[cache_key] + if result is True: + # 2a) Testing-class items require real CI artifact evidence. + if slug in _TESTING_CLASS_SLUGS: + ci_state = get_ci_status( + client, args.owner, args.repo, head_sha + ) + if ci_state != "success": + print( + f"::warning::AI ack for {slug} rejected: " + f"CI / all-required is {ci_state}, not success", + file=sys.stderr, + ) + rejected_ci_not_green.append(u) + continue + approved.append(u) + continue + # If we get here, user is not approved for this slug. + rejected_ai_ineligible.append(u) return approved ack_state = compute_ack_state( diff --git a/.gitea/scripts/tests/_review_check_fixture.py b/.gitea/scripts/tests/_review_check_fixture.py index bb0a52f8d..e31e92b86 100644 --- a/.gitea/scripts/tests/_review_check_fixture.py +++ b/.gitea/scripts/tests/_review_check_fixture.py @@ -21,6 +21,7 @@ Scenarios: T16_comments_generic_approval — reviews empty; comments have "APPROVED" by team member → exit 0 T17_comments_no_approval — reviews empty; comments have no approval keywords → exit 1 T18_review_wrong_team_comment_right_team — review candidate 404s, comment candidate passes + T19_ai_sop_ack_approved — ai-sop-ack member APPROVED review → team probe 404 → exit 1 Usage: FIXTURE_STATE_DIR=/tmp/x python3 _review_check_fixture.py 8080 @@ -116,6 +117,12 @@ class Handler(http.server.BaseHTTPRequestHandler): {"state": "CHANGES_REQUESTED", "dismissed": False, "user": {"login": "bob"}, "commit_id": "abc1234"}, {"state": "APPROVED", "dismissed": False, "user": {"login": "core-devops"}, "commit_id": "abc1234"}, ]) + if sc == "T19_ai_sop_ack_approved": + # ai-sop-ack member submitted APPROVED review — must NOT count + # toward qa-review (team_id=20) or security-review (team_id=21). + return self._json(200, [ + {"state": "APPROVED", "dismissed": False, "user": {"login": "ai-reviewer"}, "commit_id": "abc1234"}, + ]) # Default: one non-author APPROVED return self._json(200, [ {"state": "APPROVED", "dismissed": False, "user": {"login": "core-devops"}, "commit_id": "abc1234"}, @@ -157,6 +164,9 @@ class Handler(http.server.BaseHTTPRequestHandler): return self._empty(403) if sc == "T18_review_wrong_team_comment_right_team" and login == "core-devops": return self._empty(404) + if sc == "T19_ai_sop_ack_approved" and login == "ai-reviewer": + # ai-sop-ack member is NOT in qa (20) or security (21). + return self._empty(404) # T7_team_member: member return self._empty(204) diff --git a/.gitea/scripts/tests/test_review_check.sh b/.gitea/scripts/tests/test_review_check.sh index 795acab9c..683b41dc0 100755 --- a/.gitea/scripts/tests/test_review_check.sh +++ b/.gitea/scripts/tests/test_review_check.sh @@ -205,6 +205,8 @@ chmod +x "$FIXTURE_DIR/bin/curl" # Helper: run the script with fixture environment run_review_check() { local scenario="$1" + local team="${2:-qa}" + local team_id="${3:-20}" echo "$scenario" >"$FIX_STATE_DIR/scenario" local out set +e @@ -215,8 +217,8 @@ run_review_check() { REPO="molecule-ai/molecule-core" \ PR_NUMBER="999" \ DEFAULT_BRANCH="main" \ - TEAM="qa" \ - TEAM_ID="20" \ + TEAM="$team" \ + TEAM_ID="$team_id" \ REVIEW_CHECK_DEBUG="0" \ REVIEW_CHECK_STRICT="0" \ bash "$SCRIPT" 2>&1 @@ -372,6 +374,25 @@ assert_eq "T18 exit code 0 (comment approval still considered)" "0" "$T18_RC" assert_contains "T18 comment candidate notice" "comment-based approval" "$T18_OUT" assert_contains "T18 comment approver accepted" "APPROVED by core-qa-agent" "$T18_OUT" +# T19 — ai-sop-ack member APPROVED review must NOT count toward qa-review +# or security-review (R1 hardening refinement, msg 1388c76f). +echo +echo "== T19 ai-sop-ack APPROVED review excluded from qa-review gate ==" +T19_OUT=$(run_review_check "T19_ai_sop_ack_approved" "qa" "20") +T19_RC=$(cat "$FIX_STATE_DIR/last_rc") +assert_eq "T19 exit code 1 (ai-sop-ack not in qa team)" "1" "$T19_RC" +assert_contains "T19 ai-reviewer excluded from qa" "candidates: ai-reviewer" "$T19_OUT" +assert_contains "T19 none are in qa team" "none are in team" "$T19_OUT" + +# T20 — same ai-sop-ack member must also be excluded from security-review gate. +echo +echo "== T20 ai-sop-ack APPROVED review excluded from security-review gate ==" +T20_OUT=$(run_review_check "T19_ai_sop_ack_approved" "security" "21") +T20_RC=$(cat "$FIX_STATE_DIR/last_rc") +assert_eq "T20 exit code 1 (ai-sop-ack not in security team)" "1" "$T20_RC" +assert_contains "T20 ai-reviewer excluded from security" "candidates: ai-reviewer" "$T20_OUT" +assert_contains "T20 none are in security team" "none are in team" "$T20_OUT" + echo echo "------" echo "PASS=$PASS FAIL=$FAIL" diff --git a/.gitea/scripts/tests/test_sop_checklist.py b/.gitea/scripts/tests/test_sop_checklist.py index 3ac2c1321..5a81e45f1 100644 --- a/.gitea/scripts/tests/test_sop_checklist.py +++ b/.gitea/scripts/tests/test_sop_checklist.py @@ -1003,3 +1003,299 @@ class TestComputeNaStateAcceptsGateNotInItems(unittest.TestCase): comments, "alice", na_gates, lambda *_: ["alice"] ) self.assertFalse(na_state["security-review"]["declared"]) + + +# --------------------------------------------------------------------------- +# internal#760 ceremony — ai-sop-ack team + ai_ack_eligible per-item flag +# --------------------------------------------------------------------------- + + +class TestAIAckEligibleConfig(unittest.TestCase): + """CTO-controlled allowlist (msg 1388c76f): + ai_ack_eligible: comprehensive-testing, local-postgres-e2e, staging-smoke, + five-axis-review, memory-consulted + human-only: root-cause, no-backwards-compat + """ + + def test_ai_ack_eligible_items(self): + cfg = sop.load_config(CONFIG_PATH) + items_by_slug = {it["slug"]: it for it in cfg["items"]} + eligible = { + "comprehensive-testing", + "local-postgres-e2e", + "staging-smoke", + "five-axis-review", + "memory-consulted", + } + for slug in eligible: + self.assertTrue( + items_by_slug[slug].get("ai_ack_eligible"), + f"{slug} must be ai_ack_eligible", + ) + + def test_human_only_items(self): + cfg = sop.load_config(CONFIG_PATH) + items_by_slug = {it["slug"]: it for it in cfg["items"]} + human_only = {"root-cause", "no-backwards-compat"} + for slug in human_only: + self.assertFalse( + items_by_slug[slug].get("ai_ack_eligible", False), + f"{slug} must NOT be ai_ack_eligible (human-only)", + ) + + def test_testing_class_slugs_constant(self): + """_TESTING_CLASS_SLUGS must match the three testing items.""" + self.assertEqual( + sop._TESTING_CLASS_SLUGS, + {"comprehensive-testing", "local-postgres-e2e", "staging-smoke"}, + ) + + def test_human_only_slugs_constant(self): + """_HUMAN_ONLY_SLUGS encodes the migration/schema carve-out. + + If this set changes, the CTO must approve the widening. + """ + self.assertEqual( + sop._HUMAN_ONLY_SLUGS, + {"root-cause", "no-backwards-compat", "migration", "schema"}, + ) + + def test_human_only_invariant_enforced_in_code_and_config(self): + """Every config-present slug in _HUMAN_ONLY_SLUGS must be human-only. + + This test fails if a migration/schema-class item accidentally + acquires ai_ack_eligible via config drift. migration/schema are + future-proofing slugs not yet in the live config; they are checked + by the production probe closure but skipped here. + """ + cfg = sop.load_config(CONFIG_PATH) + items_by_slug = {it["slug"]: it for it in cfg["items"]} + for slug in sop._HUMAN_ONLY_SLUGS: + if slug not in items_by_slug: + # Future-proofing slug (e.g. migration, schema) — not yet + # in config, but the code guard still rejects AI acks. + continue + self.assertFalse( + items_by_slug[slug].get("ai_ack_eligible", False), + f"{slug} is in _HUMAN_ONLY_SLUGS and must NEVER be ai_ack_eligible", + ) + + +class TestAIAckEligibilityProbe(unittest.TestCase): + """The probe closure in main() delegates to compute_ack_state. + We simulate the AI-ack path by injecting a probe that behaves like + the production probe (human team first, then ai-sop-ack fallback). + """ + + def setUp(self): + self.items = _items_by_slug() + self.aliases = _numeric_aliases() + + def _probe_human_then_ai(self, human_users, ai_users): + """Return users in human_users immediately; users in ai_users only + if the item is ai_ack_eligible.""" + def probe(slug, users): + item = self.items.get(slug, {}) + approved = [] + for u in users: + if u in human_users: + approved.append(u) + elif u in ai_users and item.get("ai_ack_eligible"): + approved.append(u) + return approved + return probe + + def test_ai_ack_passes_for_eligible_item(self): + comments = [_comment("ai-bot", "/sop-ack five-axis-review")] + probe = self._probe_human_then_ai(human_users=set(), ai_users={"ai-bot"}) + state = sop.compute_ack_state( + comments, "alice", self.items, self.aliases, probe + ) + self.assertEqual(state["five-axis-review"]["ackers"], ["ai-bot"]) + + def test_ai_ack_rejected_for_human_only_item(self): + comments = [_comment("ai-bot", "/sop-ack root-cause")] + probe = self._probe_human_then_ai(human_users=set(), ai_users={"ai-bot"}) + state = sop.compute_ack_state( + comments, "alice", self.items, self.aliases, probe + ) + self.assertEqual(state["root-cause"]["ackers"], []) + self.assertIn("ai-bot", state["root-cause"]["rejected"]["not_in_team"]) + + def test_human_ack_still_works_for_ai_eligible_item(self): + comments = [_comment("bob", "/sop-ack comprehensive-testing")] + probe = self._probe_human_then_ai(human_users={"bob"}, ai_users=set()) + state = sop.compute_ack_state( + comments, "alice", self.items, self.aliases, probe + ) + self.assertEqual(state["comprehensive-testing"]["ackers"], ["bob"]) + + def test_ai_ack_rejected_for_testing_item_when_ci_red(self): + # Simulate the production probe that checks CI status for testing items. + # When CI is not green, ai-sop-ack member is rejected. + def probe(slug, users): + item = self.items.get(slug, {}) + approved = [] + for u in users: + if u == "ai-bot" and item.get("ai_ack_eligible"): + # Testing items require CI green; simulate CI red. + if slug in sop._TESTING_CLASS_SLUGS: + continue # rejected: CI not green + approved.append(u) + return approved + + comments = [_comment("ai-bot", "/sop-ack comprehensive-testing")] + state = sop.compute_ack_state( + comments, "alice", self.items, self.aliases, probe + ) + self.assertEqual(state["comprehensive-testing"]["ackers"], []) + + def test_ai_ack_passes_for_testing_item_when_ci_green(self): + # Simulate CI green → AI ack passes. + def probe(slug, users): + item = self.items.get(slug, {}) + approved = [] + for u in users: + if u == "ai-bot" and item.get("ai_ack_eligible"): + if slug in sop._TESTING_CLASS_SLUGS: + # CI is green → allow + pass + approved.append(u) + return approved + + comments = [_comment("ai-bot", "/sop-ack comprehensive-testing")] + state = sop.compute_ack_state( + comments, "alice", self.items, self.aliases, probe + ) + self.assertEqual(state["comprehensive-testing"]["ackers"], ["ai-bot"]) + + +class TestAIAckHumanOnlyMigrationSchema(unittest.TestCase): + """RC 8322: migration and schema items are human-only regardless of + any future config that might accidentally mark them ai_ack_eligible. + + These slugs are not yet in the live config items list; the tests use + synthetic items so the production guard can be exercised directly. + """ + + def setUp(self): + # Synthetic items — if live config ever adds migration/schema, + # they MUST stay human-only. The probe below mirrors the actual + # production closure logic (human team first, then AI fallback + # with _HUMAN_ONLY_SLUGS guard). + self.items = { + "migration": { + "slug": "migration", + "ai_ack_eligible": True, + "required_teams": ["engineers"], + }, + "schema": { + "slug": "schema", + "ai_ack_eligible": True, + "required_teams": ["engineers"], + }, + } + self.aliases = {} + + def _production_like_probe(self, human_users, ai_users): + """Return a probe that mirrors the production closure's guard.""" + + def probe(slug, users): + item = self.items.get(slug, {}) + approved = [] + for u in users: + if u in human_users: + approved.append(u) + elif u in ai_users: + # Production guard: _HUMAN_ONLY_SLUGS rejects AI acks + # regardless of the ai_ack_eligible flag. + if slug in sop._HUMAN_ONLY_SLUGS: + continue + if item.get("ai_ack_eligible"): + approved.append(u) + return approved + + return probe + + def test_ai_ack_rejected_for_migration(self): + comments = [_comment("ai-bot", "/sop-ack migration")] + probe = self._production_like_probe(human_users=set(), ai_users={"ai-bot"}) + state = sop.compute_ack_state( + comments, "alice", self.items, self.aliases, probe + ) + self.assertEqual(state["migration"]["ackers"], []) + self.assertIn("ai-bot", state["migration"]["rejected"]["not_in_team"]) + + def test_ai_ack_rejected_for_schema(self): + comments = [_comment("ai-bot", "/sop-ack schema")] + probe = self._production_like_probe(human_users=set(), ai_users={"ai-bot"}) + state = sop.compute_ack_state( + comments, "alice", self.items, self.aliases, probe + ) + self.assertEqual(state["schema"]["ackers"], []) + self.assertIn("ai-bot", state["schema"]["rejected"]["not_in_team"]) + + def test_human_ack_still_works_for_migration(self): + # Human team member acking migration/schema is unaffected. + comments = [_comment("bob", "/sop-ack migration")] + probe = self._production_like_probe(human_users={"bob"}, ai_users=set()) + state = sop.compute_ack_state( + comments, "alice", self.items, self.aliases, probe + ) + self.assertEqual(state["migration"]["ackers"], ["bob"]) + + def test_human_ack_still_works_for_schema(self): + comments = [_comment("bob", "/sop-ack schema")] + probe = self._production_like_probe(human_users={"bob"}, ai_users=set()) + state = sop.compute_ack_state( + comments, "alice", self.items, self.aliases, probe + ) + self.assertEqual(state["schema"]["ackers"], ["bob"]) + + +class TestGetCIStatus(unittest.TestCase): + """Verify get_ci_status reads the correct context from commit statuses.""" + + def _client_with_statuses(self, statuses): + client = sop.GiteaClient("git.example.com", "tok") + + def fake_req(method, path, body=None, ok_codes=(200, 201, 204)): + return 200, statuses + + client._req = fake_req # type: ignore[method-assign] + return client + + def test_ci_green_returns_success(self): + client = self._client_with_statuses([ + {"context": "CI / all-required (pull_request)", "state": "success"}, + ]) + self.assertEqual( + sop.get_ci_status(client, "o", "r", "sha1"), "success" + ) + + def test_ci_red_returns_failure(self): + client = self._client_with_statuses([ + {"context": "CI / all-required (pull_request)", "state": "failure"}, + ]) + self.assertEqual( + sop.get_ci_status(client, "o", "r", "sha1"), "failure" + ) + + def test_missing_context_returns_missing(self): + client = self._client_with_statuses([ + {"context": "some-other-context", "state": "success"}, + ]) + self.assertEqual( + sop.get_ci_status(client, "o", "r", "sha1"), "missing" + ) + + def test_api_error_returns_unknown(self): + client = sop.GiteaClient("git.example.com", "tok") + + def fake_req(method, path, body=None, ok_codes=(200, 201, 204)): + return 500, {"error": "boom"} + + client._req = fake_req # type: ignore[method-assign] + self.assertEqual( + sop.get_ci_status(client, "o", "r", "sha1"), "unknown" + ) diff --git a/.gitea/sop-checklist-config.yaml b/.gitea/sop-checklist-config.yaml index 3f02b1bf3..3ede62cb5 100644 --- a/.gitea/sop-checklist-config.yaml +++ b/.gitea/sop-checklist-config.yaml @@ -32,6 +32,26 @@ # AUTHOR SELF-ACK IS FORBIDDEN regardless of which team contains them # — the gate script enforces commenter != PR author before checking # team membership. +# +# AI-SOP-ACK TEAM (internal#760 ceremony design, CTO-approved): +# The `ai-sop-ack` team contains AI agent identities that can ack +# SOP-checklist items ON BEHALF OF automated evidence. An AI ack is +# only valid when: +# 1. the item has `ai_ack_eligible: true` +# 2. the item is NOT in the human-only carve-out (migration/schema) +# 3. for testing-class items, CI / all-required (pull_request) is +# green on the current head SHA +# +# AI acks NEVER count toward qa-review or security-review gates — +# those remain human-team-only (enforced by review-check.sh team +# probe against TEAM_ID 20/21). +# +# INITIAL ai_ack_eligible allowlist (CTO-controlled, msg 1388c76f): +# comprehensive-testing, local-postgres-e2e, staging-smoke, +# five-axis-review, memory-consulted +# HUMAN-ONLY carve-out: +# root-cause, no-backwards-compat +# Any widening requires an explicit config change reviewed by CTO. version: 1 @@ -83,25 +103,31 @@ items: numeric_alias: 1 pr_section_marker: "Comprehensive testing performed" required_teams: [qa, engineers] + ai_ack_eligible: true description: >- What was tested, how, edge cases covered. Ack from any qa-team - member (or engineers fallback while qa is small). + member (or engineers fallback while qa is small). AI ack valid + only when CI / all-required (pull_request) is green. - slug: local-postgres-e2e numeric_alias: 2 pr_section_marker: "Local-postgres E2E run" required_teams: [engineers] + ai_ack_eligible: true description: >- Link to local CI artifact, or "N/A: pure-frontend change". Ack from any engineer who can verify the local DB test actually ran. + AI ack valid only when CI / all-required (pull_request) is green. - slug: staging-smoke numeric_alias: 3 pr_section_marker: "Staging-smoke verified or pending" required_teams: [engineers] + ai_ack_eligible: true description: >- Link to canary run, or "scheduled post-merge". Ack from any engineer (core-devops/infra-sre are members of engineers team). + AI ack valid only when CI / all-required (pull_request) is green. - slug: root-cause numeric_alias: 4 @@ -120,6 +146,7 @@ items: numeric_alias: 5 pr_section_marker: "Five-Axis review walked" required_teams: [engineers] + ai_ack_eligible: true description: >- Correctness / readability / architecture / security / performance. Ack from any non-author engineer. @@ -140,6 +167,7 @@ items: numeric_alias: 7 pr_section_marker: "Memory/saved-feedback consulted" required_teams: [engineers] + ai_ack_eligible: true description: >- List of feedback memories applicable to this change. Ack from any engineer who has the same memory access.