From 29f8010d69b9f3b7379ce4f33c154d44cc8794cd Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Mon, 15 Jun 2026 02:43:23 +0000 Subject: [PATCH 1/4] ci(gate): add PR diff guard to block stale destructive diffs (#2875) Adds .gitea/workflows/pr-diff-guard.yml + .gitea/scripts/pr-diff-guard.py. The guard compares the PR head against the merge base of the target branch and fails when: - changed files > 100 (configurable) - insertions > 10000 (configurable) - deletions > 5000 (configurable) - any protected path is deleted (CI workflows, scripts, E2E harness, handlers, provisioner, middleware, canvas src) Motivated by core#2875 / PR #1100, where a stale branch carried a 481-file, ~55k-line destructive diff while CI/all-required stayed green. Fixes #2875 Co-Authored-By: Claude --- .gitea/scripts/pr-diff-guard.py | 135 +++++++++++++++++++++++++++++ .gitea/workflows/pr-diff-guard.yml | 30 +++++++ 2 files changed, 165 insertions(+) create mode 100644 .gitea/scripts/pr-diff-guard.py create mode 100644 .gitea/workflows/pr-diff-guard.yml diff --git a/.gitea/scripts/pr-diff-guard.py b/.gitea/scripts/pr-diff-guard.py new file mode 100644 index 00000000..4039b191 --- /dev/null +++ b/.gitea/scripts/pr-diff-guard.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +"""PR diff-size / destructive-diff guard. + +Implements core#2875: block stale branches whose head has drifted into a +massive destructive diff against current main (e.g., PR #1100: 481 files +changed, ~55k deletions). The guard runs on every PR and fails loudly when +any of the configured thresholds are exceeded. + +The check compares the PR head against the merge base of the target branch, +so rebasing a stale branch to a clean, narrow diff will clear the guard. +""" + +from __future__ import annotations + +import os +import subprocess +import sys + + +PROTECTED_PATHS = ( + ".gitea/workflows/", + ".gitea/scripts/", + "tests/e2e/", + "workspace-server/internal/handlers/", + "workspace-server/internal/provisioner/", + "workspace-server/internal/middleware/", + "canvas/src/", +) + +DEFAULT_MAX_CHANGED_FILES = int(os.environ.get("DIFFGUARD_MAX_CHANGED_FILES", "100")) +DEFAULT_MAX_DELETIONS = int(os.environ.get("DIFFGUARD_MAX_DELETIONS", "5000")) +DEFAULT_MAX_INSERTIONS = int(os.environ.get("DIFFGUARD_MAX_INSERTIONS", "10000")) + + +def git(*args: str) -> str: + result = subprocess.run( + ["git", *args], + capture_output=True, + text=True, + check=True, + ) + return result.stdout + + +def main() -> int: + base_ref = os.environ.get("PR_BASE_REF", os.environ.get("GITHUB_BASE_REF", "main")) + head_sha = os.environ.get("PR_HEAD_SHA", os.environ.get("GITHUB_SHA", "")) + + if not head_sha: + # In a pull_request workflow, GITHUB_SHA is the merge commit. Use the + # PR head ref instead when available. + head_sha = os.environ.get("GITHUB_EVENT_PULL_REQUEST_HEAD_SHA", "HEAD") + if not head_sha: + head_sha = "HEAD" + + # Ensure base ref is available. + try: + git("rev-parse", f"origin/{base_ref}") + except subprocess.CalledProcessError: + git("fetch", "origin", base_ref) + + # Find merge base so the diff reflects only what the PR added, not main + # drift since the branch was created. + merge_base = git("merge-base", f"origin/{base_ref}", head_sha).strip() + + # Diff stat. + numstat = git("diff", "--numstat", f"{merge_base}..{head_sha}").strip() + changed_files = 0 + insertions = 0 + deletions = 0 + for line in numstat.splitlines(): + parts = line.split() + if len(parts) < 3: + continue + add, rem = parts[0], parts[1] + if add == "-" or rem == "-": + continue # binary + insertions += int(add) + deletions += int(rem) + changed_files += 1 + + # Deleted files and protected-path deletions. + name_status = git("diff", "--name-status", f"{merge_base}..{head_sha}").strip() + deleted_files: list[str] = [] + protected_deletions: list[str] = [] + for line in name_status.splitlines(): + if not line: + continue + status, path = line.split("\t", 1) + if status.startswith("D"): + deleted_files.append(path) + if any(path.startswith(p) for p in PROTECTED_PATHS): + protected_deletions.append(path) + + # Evaluate thresholds. + failures: list[str] = [] + if changed_files > DEFAULT_MAX_CHANGED_FILES: + failures.append( + f"changed files ({changed_files}) exceeds threshold ({DEFAULT_MAX_CHANGED_FILES})" + ) + if insertions > DEFAULT_MAX_INSERTIONS: + failures.append( + f"insertions (+{insertions}) exceeds threshold ({DEFAULT_MAX_INSERTIONS})" + ) + if deletions > DEFAULT_MAX_DELETIONS: + failures.append( + f"deletions (-{deletions}) exceeds threshold ({DEFAULT_MAX_DELETIONS})" + ) + if protected_deletions: + failures.append( + f"deleted {len(protected_deletions)} protected path(s): " + + ", ".join(protected_deletions[:10]) + ) + + # Report. + print(f"Diff guard: {changed_files} files changed, +{insertions}/-{deletions} lines") + print(f"Deleted files: {len(deleted_files)}") + if protected_deletions: + print(f"Protected-path deletions: {len(protected_deletions)}") + + if failures: + print("::error::PR diff guard failed:") + for f in failures: + print(f" - {f}") + print( + "If this diff is intentional, split the PR or request a threshold override from the PM." + ) + return 1 + + print("Diff guard passed.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.gitea/workflows/pr-diff-guard.yml b/.gitea/workflows/pr-diff-guard.yml new file mode 100644 index 00000000..dfeb7aef --- /dev/null +++ b/.gitea/workflows/pr-diff-guard.yml @@ -0,0 +1,30 @@ +name: PR Diff Guard + +# core#2875: block stale PR branches that have drifted into massive, +# destructive diffs against current main (e.g., PR #1100: 481 files, +# ~55k deletions). Runs on every PR and fails when configured size or +# protected-path deletion thresholds are exceeded. + +on: + pull_request: + branches: [main, staging] + +permissions: + contents: read + +jobs: + diff-guard: + name: PR diff guard + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + # Full history is required to compute the merge-base against origin/main. + fetch-depth: 0 + + - name: Run PR diff guard + env: + PR_BASE_REF: ${{ github.base_ref }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: python3 .gitea/scripts/pr-diff-guard.py -- 2.52.0 From d96c37120c4d432fdbf743515654bbcc17c005f1 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Mon, 15 Jun 2026 02:47:11 +0000 Subject: [PATCH 2/4] ci(gate): add bp-required pending directive to diff guard (#2875) Marks the new PR diff guard job with # bp-required: pending #2875 so lint-required-context-exists-in-bp passes while we calibrate thresholds before wiring it into branch protection. Addresses #2875 Co-Authored-By: Claude --- .gitea/workflows/pr-diff-guard.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitea/workflows/pr-diff-guard.yml b/.gitea/workflows/pr-diff-guard.yml index dfeb7aef..29c25e2c 100644 --- a/.gitea/workflows/pr-diff-guard.yml +++ b/.gitea/workflows/pr-diff-guard.yml @@ -14,6 +14,9 @@ permissions: jobs: diff-guard: + # bp-required: pending #2875 + # New gate; wiring into branch protection will follow once it has + # green history and the thresholds are calibrated. name: PR diff guard runs-on: ubuntu-latest timeout-minutes: 5 -- 2.52.0 From 01957874ab776c18fdf8a5658fbcd4dc685586ba Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Mon, 15 Jun 2026 02:51:49 +0000 Subject: [PATCH 3/4] ci(gate): move bp-required directive above job key (#2875) lint_required_context_exists_in_bp scans a 3-line window above the job-key line, so the directive must precede rather than sit inside the job body. Addresses #2875 Co-Authored-By: Claude --- .gitea/workflows/pr-diff-guard.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitea/workflows/pr-diff-guard.yml b/.gitea/workflows/pr-diff-guard.yml index 29c25e2c..8c2d479d 100644 --- a/.gitea/workflows/pr-diff-guard.yml +++ b/.gitea/workflows/pr-diff-guard.yml @@ -13,10 +13,10 @@ permissions: contents: read jobs: + # bp-required: pending #2875 + # New gate; wiring into branch protection will follow once it has + # green history and the thresholds are calibrated. diff-guard: - # bp-required: pending #2875 - # New gate; wiring into branch protection will follow once it has - # green history and the thresholds are calibrated. name: PR diff guard runs-on: ubuntu-latest timeout-minutes: 5 -- 2.52.0 From a3e5a91b51f072a01fca8621fbbdf6c44dfdbf21 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Mon, 15 Jun 2026 02:58:21 +0000 Subject: [PATCH 4/4] ci(gate): robust diff guard fallback when merge base is missing (#2875) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a PR branch has no merge base with origin/main, fall back to a direct diff against origin/main so the guard still catches massive destructive diffs instead of crashing. Verified against PR #1100 head (485 files, ~56k deletions) — guard fails loudly as intended. Addresses #2875 Co-Authored-By: Claude --- .gitea/scripts/pr-diff-guard.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.gitea/scripts/pr-diff-guard.py b/.gitea/scripts/pr-diff-guard.py index 4039b191..28657773 100644 --- a/.gitea/scripts/pr-diff-guard.py +++ b/.gitea/scripts/pr-diff-guard.py @@ -60,8 +60,14 @@ def main() -> int: git("fetch", "origin", base_ref) # Find merge base so the diff reflects only what the PR added, not main - # drift since the branch was created. - merge_base = git("merge-base", f"origin/{base_ref}", head_sha).strip() + # drift since the branch was created. If no merge base exists (e.g., + # unrelated-history branch), fall back to the base ref itself — the guard + # should still catch a massive destructive diff. + try: + merge_base = git("merge-base", f"origin/{base_ref}", head_sha).strip() + except subprocess.CalledProcessError: + print(f"::warning::no merge base with origin/{base_ref}; falling back to direct diff") + merge_base = f"origin/{base_ref}" # Diff stat. numstat = git("diff", "--numstat", f"{merge_base}..{head_sha}").strip() -- 2.52.0