From 6431df92126520f62d3d9b05e394f6cde69e0dc2 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Wed, 3 Jun 2026 01:36:20 +0000 Subject: [PATCH 1/2] docs+test(gate): codify PR-head workflow-selection rule + add live-fire + stale-head regression tests (#2159) 1. DOC - runbooks/dev-sop.md: - Documents the Gitea PR-head workflow-selection rule (workflows load from PR head, not base). - Describes the standard core-PR flow: auto-fire for fresh heads, slash-refire fallback for stale heads. - Provides quick-check curl command and rebase vs. slash-refire guidance. 2. LIVE-FIRE TEST - test_gate_auto_fire_live.py: - Runtime verification that submitting an APPROVED review to a PR whose head contains the current gate workflows causes Gitea Actions to queue qa-review + security-review and POST the BP-required contexts. - Fix: handle string trigger form in addition to list/dict. 3. STALE-HEAD DIAGNOSTIC - test_gate_stale_head_diagnostic.py: - Local-checkout baseline + optional PR_NUMBER mode. - Fix: avoid double exc.read() on HTTPError (always returned empty). - Fix: handle string trigger form. CR round-2 fixes: - Reverted out-of-scope Go changes that accidentally reverted the #2162 platform-managed fail-closed guard. - Restored regression tests and env-mocking that were removed from Go tests. --- .../scripts/tests/test_gate_auto_fire_live.py | 174 ++++++++++++++++++ .../tests/test_gate_stale_head_diagnostic.py | 145 +++++++++++++++ runbooks/dev-sop.md | 131 +++++++++++++ 3 files changed, 450 insertions(+) create mode 100644 .gitea/scripts/tests/test_gate_auto_fire_live.py create mode 100644 .gitea/scripts/tests/test_gate_stale_head_diagnostic.py create mode 100644 runbooks/dev-sop.md diff --git a/.gitea/scripts/tests/test_gate_auto_fire_live.py b/.gitea/scripts/tests/test_gate_auto_fire_live.py new file mode 100644 index 000000000..4e0684b71 --- /dev/null +++ b/.gitea/scripts/tests/test_gate_auto_fire_live.py @@ -0,0 +1,174 @@ +"""Live-fire regression test for #2159 — gate auto-fire runtime verification. + +Static tests (test_gate_review_auto_fire.py) validate that the workflow YAML +is structurally correct. This test validates the *runtime* path: submitting an +APPROVED review to a PR whose head contains the current gate workflows causes +Gitea Actions to queue the qa-review + security-review workflows and POST the +branch-protection-required (pull_request_target) contexts within a reasonable +window. + +Skipped when Gitea API credentials are not available. Intended for: + - manual developer verification + - CI jobs provisioned with a service-account token + +Environment: + GITEA_HOST — default: git.moleculesai.app + GITEA_TOKEN — token with read:repository + write:issues (for review POST) + REPO — default: molecule-ai/molecule-core + LIVEFIRE_PR_NUMBER — optional; if omitted the test tries to find a + suitable open PR automatically, or skips. + LIVEFIRE_TIMEOUT_SEC — default: 120 +""" + +import base64 +import json +import os +import time +import urllib.error +import urllib.request +from pathlib import Path + +import pytest + +import yaml + +GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app") +GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "") +REPO = os.environ.get("REPO", "molecule-ai/molecule-core") +LIVEFIRE_PR_NUMBER = os.environ.get("LIVEFIRE_PR_NUMBER", "") +LIVEFIRE_TIMEOUT_SEC = int(os.environ.get("LIVEFIRE_TIMEOUT_SEC", "120")) + +REQUIRED_CONTEXTS = [ + "qa-review / approved (pull_request_target)", + "security-review / approved (pull_request_target)", +] + +skip_no_token = pytest.mark.skipif( + not GITEA_TOKEN, + reason="GITEA_TOKEN not set — live-fire test requires API credentials", +) + + +def _api(method: str, path: str, body: dict | None = None) -> tuple[int, dict]: + url = f"https://{GITEA_HOST}/api/v1{path}" + headers = { + "Authorization": f"token {GITEA_TOKEN}", + "Content-Type": "application/json", + } + data = json.dumps(body).encode() if body else None + req = urllib.request.Request(url, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + raw = resp.read() + code = resp.status + except urllib.error.HTTPError as exc: + raw = exc.read() + code = exc.code + payload = json.loads(raw) if raw else {} + return code, payload + + +def _get_pr(number: int) -> dict: + code, pr = _api("GET", f"/repos/{REPO}/pulls/{number}") + if code != 200: + pytest.fail(f"GET /pulls/{number} returned HTTP {code}: {pr}") + return pr + + +def _list_open_prs() -> list[dict]: + code, prs = _api("GET", f"/repos/{REPO}/pulls?state=open&limit=50") + if code != 200: + pytest.fail(f"GET /pulls?state=open returned HTTP {code}: {prs}") + return prs + + +def _pr_has_trigger_in_head(pr: dict) -> bool: + """Return True if the PR head contains pull_request_review in both workflows.""" + head_sha = pr["head"]["sha"] + for wf_name in ("qa-review.yml", "security-review.yml"): + path = f"/repos/{REPO}/contents/.gitea/workflows/{wf_name}?ref={head_sha}" + code, payload = _api("GET", path) + if code != 200: + return False + raw = base64.b64decode(payload.get("content", "")).decode("utf-8") + wf = yaml.safe_load(raw) + on = wf.get(True) or wf.get("on") or {} + if isinstance(on, str): + if on != "pull_request_review": + return False + elif "pull_request_review" not in on: + return False + return True + + +def _find_suitable_pr() -> dict: + if LIVEFIRE_PR_NUMBER: + pr = _get_pr(int(LIVEFIRE_PR_NUMBER)) + if pr.get("state") != "open": + pytest.skip(f"PR {LIVEFIRE_PR_NUMBER} is not open") + return pr + + prs = _list_open_prs() + for pr in prs: + if _pr_has_trigger_in_head(pr): + return pr + pytest.skip("No open PR found whose head contains the pull_request_review trigger") + + +def _submit_approved_review(pr_number: int) -> None: + code, _ = _api( + "POST", + f"/repos/{REPO}/pulls/{pr_number}/reviews", + {"body": "Live-fire test APPROVED review", "event": "APPROVE"}, + ) + # 200 = created, 422 = review already exists (idempotent enough for our purposes) + if code not in (200, 201, 422): + pytest.fail(f"POST /pulls/{pr_number}/reviews returned HTTP {code}") + + +def _poll_status_contexts(sha: str, timeout_sec: int = LIVEFIRE_TIMEOUT_SEC) -> dict[str, str]: + deadline = time.monotonic() + timeout_sec + found: dict[str, str] = {} + while time.monotonic() < deadline: + code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100") + if code == 200: + for st in statuses: + ctx = st.get("context", "") + if ctx in REQUIRED_CONTEXTS: + found[ctx] = st.get("state", st.get("status", "")) + if all(ctx in found for ctx in REQUIRED_CONTEXTS): + return found + time.sleep(5) + return found + + +@skip_no_token +class TestGateAutoFireLive: + def test_auto_fire_posts_required_contexts(self): + """Submit APPROVED review; assert BP-required contexts appear within timeout.""" + pr = _find_suitable_pr() + pr_number = pr["number"] + head_sha = pr["head"]["sha"] + + # Pre-check: ensure contexts are not already present from a previous run. + # We tolerate stale contexts; the test looks for a fresh appearance. + _submit_approved_review(pr_number) + + found = _poll_status_contexts(head_sha) + + missing = [ctx for ctx in REQUIRED_CONTEXTS if ctx not in found] + if missing: + pytest.fail( + f"After {LIVEFIRE_TIMEOUT_SEC}s, contexts still missing: {missing}. " + f"Found: {found}. " + f"PR #{pr_number} head={head_sha}. " + f"This indicates the pull_request_review trigger did not fire at runtime." + ) + + # The contexts appeared — that's the proof of auto-fire. + # We do NOT assert success vs failure; the evaluator decides that. + # The point of #2159 is that the workflows QUEUE and POST at all. + for ctx, state in found.items(): + assert state in ("pending", "success", "failure"), ( + f"Unexpected state {state!r} for {ctx}" + ) diff --git a/.gitea/scripts/tests/test_gate_stale_head_diagnostic.py b/.gitea/scripts/tests/test_gate_stale_head_diagnostic.py new file mode 100644 index 000000000..db3d98fab --- /dev/null +++ b/.gitea/scripts/tests/test_gate_stale_head_diagnostic.py @@ -0,0 +1,145 @@ +"""Stale-head diagnostic test for #2159. + +Deterministically reports whether a PR's HEAD contains the pull_request_review +trigger in qa-review.yml and security-review.yml. If the trigger is absent, +auto-fire on APPROVED review is impossible for that PR. + +This is used as a self-diagnostic for future stale-PR situations (PRs opened +before #2157 merged, or branches cut from old bases). + +Environment: + GITEA_HOST — default: git.moleculesai.app + GITEA_TOKEN — token with read:repository scope (optional; falls back to local files) + REPO — default: molecule-ai/molecule-core + PR_NUMBER — required when running against a real PR +""" + +import base64 +import json +import os +import urllib.error +import urllib.request +from pathlib import Path + +import pytest + +import yaml + +GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app") +GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "") +REPO = os.environ.get("REPO", "molecule-ai/molecule-core") +PR_NUMBER = os.environ.get("PR_NUMBER", "") + +ROOT = Path(__file__).resolve().parents[2] + + +def _api(method: str, path: str) -> tuple[int, dict]: + url = f"https://{GITEA_HOST}/api/v1{path}" + headers = {"Authorization": f"token {GITEA_TOKEN}"} + req = urllib.request.Request(url, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + return resp.status, json.loads(resp.read()) + except urllib.error.HTTPError as exc: + body = exc.read() + return exc.code, json.loads(body) if body else {} + + +def _fetch_workflow_from_ref(workflow_name: str, ref: str) -> dict: + path = f"/repos/{REPO}/contents/.gitea/workflows/{workflow_name}?ref={ref}" + code, payload = _api("GET", path) + if code != 200: + pytest.fail( + f"GET {path} returned HTTP {code}: {payload}. " + f"Cannot determine whether PR head contains the trigger." + ) + raw = base64.b64decode(payload.get("content", "")).decode("utf-8") + return yaml.safe_load(raw) + + +def _fetch_workflow_local(workflow_name: str) -> dict: + p = ROOT / "workflows" / workflow_name + if not p.exists(): + pytest.fail(f"Local workflow file not found: {p}") + return yaml.safe_load(p.read_text()) + + +def _has_pull_request_review_trigger(wf: dict) -> bool: + on = wf.get(True) or wf.get("on") or {} + if isinstance(on, list): + return "pull_request_review" in on + if isinstance(on, dict): + return "pull_request_review" in on + if isinstance(on, str): + return on == "pull_request_review" + return False + + +def _diagnose_pr(pr_number: int) -> dict[str, bool]: + code, pr = _api("GET", f"/repos/{REPO}/pulls/{pr_number}") + if code != 200: + pytest.fail(f"GET /pulls/{pr_number} returned HTTP {code}: {pr}") + + head_ref = pr["head"]["ref"] + head_sha = pr["head"]["sha"] + + results: dict[str, bool] = {} + for wf_name in ("qa-review.yml", "security-review.yml"): + wf = _fetch_workflow_from_ref(wf_name, head_sha) + results[wf_name] = _has_pull_request_review_trigger(wf) + + return { + "pr_number": pr_number, + "head_ref": head_ref, + "head_sha": head_sha, + "triggers": results, + "auto_fire_possible": all(results.values()), + } + + +def _diagnose_local() -> dict[str, bool]: + results: dict[str, bool] = {} + for wf_name in ("qa-review.yml", "security-review.yml"): + wf = _fetch_workflow_local(wf_name) + results[wf_name] = _has_pull_request_review_trigger(wf) + return { + "pr_number": None, + "head_ref": "local-checkout", + "head_sha": None, + "triggers": results, + "auto_fire_possible": all(results.values()), + } + + +class TestStaleHeadDiagnostic: + """Test deterministically reports 'auto-fire impossible for this PR' when + the PR head lacks the pull_request_review trigger. + """ + + def test_local_checkout_has_pull_request_review_trigger(self): + """Local files (the ones in this checkout) must contain the trigger. + + This is the baseline: if the checkout itself is stale, every PR cut + from it will also be stale. + """ + diag = _diagnose_local() + missing = [n for n, ok in diag["triggers"].items() if not ok] + if missing: + pytest.fail( + f"Local checkout is missing pull_request_review trigger in: {missing}. " + f"This branch cannot produce PRs that auto-fire." + ) + + @pytest.mark.skipif(not GITEA_TOKEN, reason="GITEA_TOKEN not set") + @pytest.mark.skipif(not PR_NUMBER, reason="PR_NUMBER not set") + def test_pr_head_has_pull_request_review_trigger(self): + """When PR_NUMBER is given, assert the PR head contains the trigger.""" + diag = _diagnose_pr(int(PR_NUMBER)) + if not diag["auto_fire_possible"]: + missing = [n for n, ok in diag["triggers"].items() if not ok] + pytest.fail( + f"Auto-fire impossible for PR #{diag['pr_number']}. " + f"Head ref={diag['head_ref']} sha={diag['head_sha']}. " + f"Missing trigger in: {missing}. " + f"This PR needs /qa-recheck + /security-recheck fallback, or a rebase onto current main." + ) diff --git a/runbooks/dev-sop.md b/runbooks/dev-sop.md new file mode 100644 index 000000000..caf2a3a45 --- /dev/null +++ b/runbooks/dev-sop.md @@ -0,0 +1,131 @@ +# Developer SOP — PR review gate auto-fire and stale-head handling + +> Last updated: 2026-06-03 (cp#2159 follow-up) +> +> Applies to: all core-PR authors and reviewers on `molecule-core` and sibling +> repos using the `qa-review` + `security-review` branch-protection gates. + +--- + +## 1. Gitea PR-head workflow-selection rule + +**Rule:** For `pull_request_target` and `pull_request_review` events, Gitea +loads the workflow definition from the **PR's HEAD branch**, not from the +base (`main`) branch. + +This is different from GitHub Actions, where `pull_request_target` always +loads workflows from the base branch. Gitea's behaviour means: + +- A PR that was opened **before** the `pull_request_review` trigger was added +to `qa-review.yml` / `security-review.yml` will **NOT** auto-fire on review, +because its HEAD still contains the old workflow YAML (no trigger). + +- A PR that was opened **after** the trigger was added (or that has been +rebased onto a commit containing the trigger) **WILL** auto-fire, because its +HEAD contains the new workflow YAML. + +### Ops implication + +| PR head contains `pull_request_review` trigger? | Behaviour on APPROVED review | +|---|---| +| **Yes** (cut from current main, or rebased) | Workflows auto-queue, evaluate, and POST the `(pull_request_target)` context automatically. No slash-command needed. | +| **No** (stale head, opened before #2157) | Nothing fires. Use `/qa-recheck` + `/security-recheck` slash-commands in a PR comment, OR rebase onto current main. | + +--- + +## 2. Standard core-PR flow (post-#2157) + +``` +1. Author opens PR from a branch based on current main + → qa-review + security-review workflows run on pull_request_target + → status contexts post (initial eval, usually red until reviews land) + +2. Reviewers submit real APPROVED reviews + → If PR head has the trigger: workflows AUTO-FIRE on pull_request_review + → Contexts flip green (or stay red if reviewer is not in team) + +3. [Optional] If contexts did not flip (stale head, event lost, etc.): + → Anyone can comment `/qa-recheck` or `/security-recheck` + → sop-checklist.yml refires the evaluator (read-only, idempotent) + +4. Both qa-review + security-review contexts are green + → Plain Do:merge (no force-merge needed) +``` + +### Key point + +The `/qa-recheck` and `/security-recheck` commands are a **backstop**, not the +primary path. PRs cut from current main should auto-fire without manual +intervention. + +--- + +## 3. Diagnosing a stale head + +If a PR has real team-member APPROVED reviews but the qa/security contexts +remain red and no workflow run appears on the PR's "Actions" tab for the +review event, the PR head is likely stale. + +### Quick check + +```bash +# From the PR page, look at the head commit SHA, then: +curl -sS "https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-core/contents/.gitea/workflows/qa-review.yml?ref=" \ + | jq -r '.content' | base64 -d | grep -c 'pull_request_review' +# 0 → stale head (no trigger in that version of the workflow) +# >0 → trigger present; auto-fire SHOULD work (if it didn't, file a tracker) +``` + +### Automated diagnostic + +The test suite includes `test_gate_stale_head_diagnostic.py`, which reports +"auto-fire impossible for this PR" when the head lacks the trigger. Run it +in CI or locally with: + +```bash +PR_NUMBER=123 python -m pytest .gitea/scripts/tests/test_gate_stale_head_diagnostic.py -v +``` + +--- + +## 4. Rebasing vs. slash-refire + +| Approach | When to use | Trade-off | +|---|---|---| +| **Rebase onto current main** | PR is genuinely stale (head lacks trigger OR head is far behind main) | Clean history, gets all recent fixes, but requires force-push and re-approval if the branch was protected | +| **`/qa-recheck` + `/security-recheck`** | PR head is recent but the review event was missed, or you want to avoid rebase churn | Quick, no force-push, but does NOT fix a missing trigger in the head | + +**Do not** use slash-refire as a substitute for rebasing a stale head. If the +workflow YAML in the PR head does not contain `pull_request_review`, no amount +of rechecking will make auto-fire work. + +--- + +## 5. Live-fire verification + +The `test_gate_auto_fire_live.py` regression test exercises the full runtime +path: it submits an APPROVED review to a test PR and polls for the +`(pull_request_target)` status contexts. It is skipped when no API token is +available, and is intended to catch runtime non-fire that static structural +tests (e.g. `test_gate_review_auto_fire.py`) cannot detect. + +Run manually with: + +```bash +export GITEA_HOST=git.moleculesai.app +export GITEA_TOKEN= +export REPO=molecule-ai/molecule-core +export LIVEFIRE_PR_NUMBER= +python -m pytest .gitea/scripts/tests/test_gate_auto_fire_live.py -v +``` + +--- + +## References + +- #2159 — gate auto-trigger not firing (root cause: stale PR heads lacking +the `pull_request_review` trigger, NOT a workflow code defect) +- #765 — static structural regression test for gate configuration +- #2157 — merged trigger addition (`pull_request_review` types: [submitted]) +- #2020 — milestone confirming gate infrastructure is stable +- RFC#324 — qa-review + security-review design -- 2.52.0 From 77573074e49009d90e77af1ecd5c569526301021 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Wed, 3 Jun 2026 10:20:20 +0000 Subject: [PATCH 2/2] =?UTF-8?q?fix(gate):=20CR2=20RC=208365=20=E2=80=94=20?= =?UTF-8?q?APPROVED=20event=20value=20+=20fresh-context=20proof=20(#2163)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - test_gate_auto_fire_live.py: change review event from \"APPROVE\" to \"APPROVED\" to match Gitea API contract. - Add _get_status_updated_at() to capture pre-existing status timestamps before review submission. - Add _poll_fresh_statuses() that only accepts statuses whose updated_at differs from the pre-existing record, proving the context was posted AFTER the review rather than tolerating stale contexts. - Remove misleading \"tolerate stale contexts\" comment. Co-Authored-By: Claude Opus 4.7 --- .../scripts/tests/test_gate_auto_fire_live.py | 48 ++++++++++++++----- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/.gitea/scripts/tests/test_gate_auto_fire_live.py b/.gitea/scripts/tests/test_gate_auto_fire_live.py index 4e0684b71..7661c2d96 100644 --- a/.gitea/scripts/tests/test_gate_auto_fire_live.py +++ b/.gitea/scripts/tests/test_gate_auto_fire_live.py @@ -115,18 +115,37 @@ def _find_suitable_pr() -> dict: pytest.skip("No open PR found whose head contains the pull_request_review trigger") -def _submit_approved_review(pr_number: int) -> None: - code, _ = _api( +def _submit_approved_review(pr_number: int) -> dict: + code, review = _api( "POST", f"/repos/{REPO}/pulls/{pr_number}/reviews", - {"body": "Live-fire test APPROVED review", "event": "APPROVE"}, + {"body": "Live-fire test APPROVED review", "event": "APPROVED"}, ) # 200 = created, 422 = review already exists (idempotent enough for our purposes) if code not in (200, 201, 422): pytest.fail(f"POST /pulls/{pr_number}/reviews returned HTTP {code}") + return review -def _poll_status_contexts(sha: str, timeout_sec: int = LIVEFIRE_TIMEOUT_SEC) -> dict[str, str]: +def _get_status_updated_at(sha: str) -> dict[str, str]: + """Return mapping context -> updated_at for required contexts on this SHA.""" + code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100") + if code != 200: + return {} + result: dict[str, str] = {} + for st in statuses: + ctx = st.get("context", "") + if ctx in REQUIRED_CONTEXTS: + result[ctx] = st.get("updated_at", st.get("created_at", "")) + return result + + +def _poll_fresh_statuses( + sha: str, + prior_updated_at: dict[str, str], + timeout_sec: int = LIVEFIRE_TIMEOUT_SEC, +) -> dict[str, str]: + """Poll until required contexts appear with updated_at fresher than prior.""" deadline = time.monotonic() + timeout_sec found: dict[str, str] = {} while time.monotonic() < deadline: @@ -135,7 +154,10 @@ def _poll_status_contexts(sha: str, timeout_sec: int = LIVEFIRE_TIMEOUT_SEC) -> for st in statuses: ctx = st.get("context", "") if ctx in REQUIRED_CONTEXTS: - found[ctx] = st.get("state", st.get("status", "")) + updated_at = st.get("updated_at", st.get("created_at", "")) + # Fresh if the context was absent before, OR its timestamp changed. + if ctx not in prior_updated_at or updated_at != prior_updated_at[ctx]: + found[ctx] = st.get("state", st.get("status", "")) if all(ctx in found for ctx in REQUIRED_CONTEXTS): return found time.sleep(5) @@ -145,27 +167,29 @@ def _poll_status_contexts(sha: str, timeout_sec: int = LIVEFIRE_TIMEOUT_SEC) -> @skip_no_token class TestGateAutoFireLive: def test_auto_fire_posts_required_contexts(self): - """Submit APPROVED review; assert BP-required contexts appear within timeout.""" + """Submit APPROVED review; assert BP-required contexts appear fresh within timeout.""" pr = _find_suitable_pr() pr_number = pr["number"] head_sha = pr["head"]["sha"] - # Pre-check: ensure contexts are not already present from a previous run. - # We tolerate stale contexts; the test looks for a fresh appearance. + # Capture pre-existing status timestamps so we can prove FRESH contexts + # were posted after the review submission (not stale from a prior run). + prior_updated_at = _get_status_updated_at(head_sha) + _submit_approved_review(pr_number) - found = _poll_status_contexts(head_sha) + found = _poll_fresh_statuses(head_sha, prior_updated_at) missing = [ctx for ctx in REQUIRED_CONTEXTS if ctx not in found] if missing: pytest.fail( - f"After {LIVEFIRE_TIMEOUT_SEC}s, contexts still missing: {missing}. " - f"Found: {found}. " + f"After {LIVEFIRE_TIMEOUT_SEC}s, fresh contexts still missing: {missing}. " + f"Found: {found}. Prior timestamps: {prior_updated_at}. " f"PR #{pr_number} head={head_sha}. " f"This indicates the pull_request_review trigger did not fire at runtime." ) - # The contexts appeared — that's the proof of auto-fire. + # The contexts appeared fresh — that's the proof of auto-fire. # We do NOT assert success vs failure; the evaluator decides that. # The point of #2159 is that the workflows QUEUE and POST at all. for ctx, state in found.items(): -- 2.52.0