From 44ab45720f5df69a92bb4345ea9021c829974048 Mon Sep 17 00:00:00 2001 From: devops-engineer Date: Tue, 9 Jun 2026 04:26:26 +0000 Subject: [PATCH] ci(gate): close all-required name-vs-coverage hole via cross-workflow drift check (F4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `CI / all-required (pull_request)` is a fail-closed aggregator over the CI workflow's OWN jobs (plain `needs:` + explicit per-need `result == success` assertion). That mechanism is correct. But its name implies it covers every branch-protection required check, which it does NOT and CANNOT: Gitea Actions has no cross-workflow `needs:`, so the sibling required workflows `E2E API Smoke Test` (e2e-api.yml) and `Handlers Postgres Integration` (handlers-postgres-integration.yml) emit their own status contexts that `all-required` structurally cannot gate. Observed latent hole (core PR #1086 @ 9136d05a): `CI / all-required` posted success while `E2E API Smoke Test` (id 48) and `Handlers Postgres Integration` (id 47) had already posted failure. Not currently exploitable — main BP requires all three contexts independently — but if BP were ever trimmed to just `CI / all-required` on the false assumption that it covers them, a red-CI PR would look mergeable (the falsely-GREEN-aggregate class, inverse of #2448's green-but-empty). Root cause is NOT a bug in the aggregator (it is fail-closed for its scope); it is the missing cross-workflow coverage guarantee. ci-required-drift.py's F2 was meant to catch BP contexts with no emitter, but it is scoped to the hard-coded lowercase literal `ci / ` prefix + bare job-KEYs — a shape that does not match this repo (workflow is `name: CI`; CI jobs set their own `name:`), so F2 is effectively dormant here. Fix (robust, minimal, behavior-based): - F4 in ci-required-drift.py: parse EVERY workflow's real `name:` + each job's `name|key` and assert every BP `status_check_contexts` entry has a live emitting workflow. This is the case-correct, repo-wide generalization of F2 and closes the inverse-of-F2 hole — a renamed/deleted sibling workflow that BP still requires now fails drift loudly instead of degrading to a silent absent-as-pending advisory gate. This is what keeps the `all-required` name honest at the repo level. - Document the aggregator's SCOPE on the `all-required` job in ci.yml: it covers CI's own jobs only; E2E + Handlers MUST stay required in BP independently; do not trim BP to just `CI / all-required`. - 7 new unit tests: emitter computation (job name>key, no-name, dir union, skip-unparseable) + detect_drift F4 silent-when-emitted, fires-on-stale- cross-workflow-context, and no-false-positive-on-lone-context. All 23 ci-required-drift tests pass; 28 incl. ci-workflow-bookkeeping; lint-workflow-yaml clean (59 files, 0 warnings). No existing behavior changed. Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitea/scripts/ci-required-drift.py | 104 +++++++++++++++ .../scripts/tests/test_ci_required_drift.py | 122 ++++++++++++++++++ .gitea/workflows/ci.yml | 21 +++ 3 files changed, 247 insertions(+) diff --git a/.gitea/scripts/ci-required-drift.py b/.gitea/scripts/ci-required-drift.py index eed421f83..473c5f663 100755 --- a/.gitea/scripts/ci-required-drift.py +++ b/.gitea/scripts/ci-required-drift.py @@ -24,6 +24,17 @@ Three failure classes: F3 (B) and (C) are not set-equal. Audit env wider than protection → audit flags non-force-merges as force; narrower → real force-merges are missed. + F4 Context in (B) is emitted by NO workflow in .gitea/workflows/ at + all (repo-wide, case-correct generalization of F2, which only + covers `ci / `-prefixed names). This is the inverse-of-F2 hole and + the one that makes the `CI / all-required` aggregator's + name-vs-coverage gap safe: `all-required` is fail-closed over CI's + OWN jobs but CANNOT cover sibling required workflows + (`E2E API Smoke Test`, `Handlers Postgres Integration` — Gitea has + no cross-workflow `needs:`). F4 verifies each cross-workflow + required context still has a live emitter, so a renamed/deleted + sibling workflow that BP still requires is caught instead of + degrading to a silent absent-as-pending advisory gate. Idempotency: Searches OPEN issues by exact title prefix @@ -380,6 +391,68 @@ def expected_context(job_key: str, workflow_name: str = "ci") -> str: return f"{workflow_name} / {job_key} (pull_request)" +def workflow_emitted_contexts(wf_doc: dict) -> set[str]: + """The set of `pull_request` status-check contexts a SINGLE workflow + emits, computed from its real `name:` + each job's `name or key`. + + Gitea reports a context as `{workflow.name} / {job.name|job.key} + (pull_request)`. Unlike `expected_context()` (which hard-codes the + lowercase literal `ci` and the bare job-KEY — a shape that does NOT + match this repo, whose workflow is `name: CI` and whose CI jobs DO + set per-job `name:`), this reads the authoritative names straight + from the parsed YAML, so the contexts it produces are byte-equal to + what BP records. Used by F4 (cross-workflow emitter existence). + + Jobs whose `if:` gates on `github.event_name`/`github.ref` are still + emitters on the events they DO run — they remain in the set; F4 only + asserts *existence of an emitter*, never that it ran on a given + trigger.""" + name = wf_doc.get("name") + if not isinstance(name, str) or not name: + return set() + jobs = wf_doc.get("jobs") + if not isinstance(jobs, dict): + return set() + out: set[str] = set() + for key, spec in jobs.items(): + job_name = key + if isinstance(spec, dict) and isinstance(spec.get("name"), str) and spec["name"]: + job_name = spec["name"] + out.add(f"{name} / {job_name} (pull_request)") + return out + + +def all_emitted_contexts(workflows_dir: str = ".gitea/workflows") -> set[str]: + """Union of `pull_request` contexts emitted by EVERY workflow in the + repo. F4 uses this to assert that each BP-required + `status_check_contexts` entry corresponds to a real emitting + workflow+job — closing the inverse-of-F2 hole where BP requires a + context that NO workflow produces (e.g. a sibling workflow like + `E2E API Smoke Test` or `Handlers Postgres Integration` was renamed + or deleted while still required, leaving BP demanding a green it can + never receive; Gitea treats absent-as-pending → silent advisory + gate). This is what makes the misleadingly-named `CI / all-required` + aggregator safe at the repo level: it only covers CI's own jobs, but + F4 guarantees the cross-workflow required contexts it CANNOT cover + are real and present.""" + import glob as _glob + + emitted: set[str] = set() + for path in sorted(_glob.glob(os.path.join(workflows_dir, "*.yml"))): + try: + with open(path, encoding="utf-8") as f: + doc = yaml.safe_load(f) + except (OSError, yaml.YAMLError): + # A single unparseable sibling workflow must not blind F4 to + # the rest. Skip it loudly; lint-workflow-yaml gates parse + # validity separately. + sys.stderr.write(f"::warning::F4: could not parse {path}, skipping\n") + continue + if isinstance(doc, dict): + emitted |= workflow_emitted_contexts(doc) + return emitted + + # -------------------------------------------------------------------------- # Drift detection # -------------------------------------------------------------------------- @@ -531,6 +604,36 @@ def detect_drift(branch: str) -> tuple[list[str], dict]: + "\n".join(f" - {c}" for c in stale_protection) ) + # ----- F4: cross-workflow required context has no emitting workflow ----- + # F2 (above) is scoped to `ci / `-prefixed contexts ONLY, and built + # from the hard-coded lowercase literal `ci` + bare job-keys — a shape + # that does NOT match this repo (workflow is `name: CI`, jobs set their + # own `name:`), so F2 is effectively dormant here. F4 is the + # case-correct, REPO-WIDE generalization: it parses every workflow's + # real `name:` + job `name|key` and asserts that EVERY BP-required + # context is actually emitted by some workflow. + # + # This is the gate that makes the `CI / all-required` aggregator's + # name-vs-coverage gap safe. `all-required` is fail-closed over CI's + # OWN jobs but — by Gitea's design (no cross-workflow `needs:`) — it + # CANNOT and does not cover sibling required workflows + # (`E2E API Smoke Test`, `Handlers Postgres Integration`). Those MUST + # be listed in BP independently. F4 verifies each such BP context + # still has a live emitter, so the inverse-of-F2 hole — BP requires a + # context that no workflow produces (rename/delete a sibling workflow + # while still required → Gitea treats absent-as-pending → silent + # advisory gate, and a red PR can look mergeable) — is caught. + repo_emitted = all_emitted_contexts(os.path.dirname(CI_WORKFLOW_PATH)) + unemitted = sorted(c for c in contexts if c not in repo_emitted) + if unemitted: + findings.append( + "F4 — branch_protections/{br}.status_check_contexts entries that " + "NO workflow in .gitea/workflows/ emits " + "(stale required name → silent advisory gate; a red PR can look " + "mergeable):\n".format(br=branch) + + "\n".join(f" - {c}" for c in unemitted) + ) + # ----- F3: audit env vs protection contexts (set-equal) ----- only_in_env = sorted(env_set - contexts) only_in_protection = sorted(contexts - env_set) @@ -556,6 +659,7 @@ def detect_drift(branch: str) -> tuple[list[str], dict]: "protection_contexts": sorted(contexts), "audit_env_checks": sorted(env_set), "expected_contexts": sorted(emitted_contexts), + "repo_emitted_contexts": sorted(repo_emitted), } return findings, debug diff --git a/.gitea/scripts/tests/test_ci_required_drift.py b/.gitea/scripts/tests/test_ci_required_drift.py index b74f91961..4a4843f7d 100644 --- a/.gitea/scripts/tests/test_ci_required_drift.py +++ b/.gitea/scripts/tests/test_ci_required_drift.py @@ -275,3 +275,125 @@ def test_detect_drift_no_f1_when_needs_empty_even_with_jobs(): findings, _ = drift.detect_drift("main") assert not any("F1 —" in f for f in findings) + + +# --------------------------------------------------------------------------- +# F4 — cross-workflow required-context emitter existence +# (closes the `CI / all-required` name-vs-coverage hole: the sentinel is +# fail-closed over CI's own jobs but CANNOT cover sibling required +# workflows — Gitea has no cross-workflow `needs:` — so F4 guarantees each +# BP-required context still has a live emitting workflow.) +# --------------------------------------------------------------------------- + +def test_workflow_emitted_contexts_uses_job_name_over_key(): + """Job `name:` wins over key; missing name falls back to key.""" + doc = { + "name": "E2E API Smoke Test", + "jobs": { + "detect-changes": {}, # no name -> key + "e2e-api": {"name": "E2E API Smoke Test"}, + }, + } + got = drift.workflow_emitted_contexts(doc) + assert got == { + "E2E API Smoke Test / detect-changes (pull_request)", + "E2E API Smoke Test / E2E API Smoke Test (pull_request)", + } + + +def test_workflow_emitted_contexts_empty_when_no_name(): + """A workflow with no top-level `name:` emits nothing F4 can match.""" + assert drift.workflow_emitted_contexts({"jobs": {"x": {}}}) == set() + + +def test_all_emitted_contexts_unions_workflow_dir(tmp_path): + """all_emitted_contexts globs *.yml and unions their emitter sets.""" + wf = tmp_path / "wf" + wf.mkdir() + (wf / "a.yml").write_text( + "name: CI\njobs:\n all-required:\n runs-on: x\n", encoding="utf-8" + ) + (wf / "b.yml").write_text( + "name: Handlers Postgres Integration\n" + "jobs:\n integration:\n name: Handlers Postgres Integration\n" + " runs-on: x\n", + encoding="utf-8", + ) + got = drift.all_emitted_contexts(str(wf)) + assert "CI / all-required (pull_request)" in got + assert "Handlers Postgres Integration / Handlers Postgres Integration (pull_request)" in got + + +def test_all_emitted_contexts_skips_unparseable(tmp_path): + """A single broken sibling workflow must not blind F4 to the rest.""" + wf = tmp_path / "wf" + wf.mkdir() + (wf / "good.yml").write_text("name: CI\njobs:\n j:\n runs-on: x\n", encoding="utf-8") + (wf / "bad.yml").write_text("name: [unterminated\n : : :\n", encoding="utf-8") + got = drift.all_emitted_contexts(str(wf)) + assert "CI / j (pull_request)" in got + + +# A BP fixture that includes the two cross-workflow required contexts. +_BP_WITH_SIBLINGS = { + "status_check_contexts": [ + "CI / all-required (pull_request)", + "E2E API Smoke Test / E2E API Smoke Test (pull_request)", + "Handlers Postgres Integration / Handlers Postgres Integration (pull_request)", + ] +} + +# The matching set of repo-wide emitted contexts (what a correct repo produces). +_EMITTED_OK = { + "CI / all-required (pull_request)", + "E2E API Smoke Test / E2E API Smoke Test (pull_request)", + "Handlers Postgres Integration / Handlers Postgres Integration (pull_request)", +} + + +def test_detect_drift_f4_silent_when_all_contexts_emitted(): + """No F4 when every BP context has a live emitting workflow.""" + ci = _make_ci_doc({"all-required": {}}) + audit = _make_audit_doc(sorted(_BP_WITH_SIBLINGS["status_check_contexts"])) + with patch.object(drift, "load_yaml", side_effect=[ci, audit]): + with patch.object(drift, "api", return_value=(200, _BP_WITH_SIBLINGS)): + with patch.object(drift, "all_emitted_contexts", return_value=set(_EMITTED_OK)): + findings, debug = drift.detect_drift("main") + assert not any("F4 —" in f for f in findings) + assert debug["repo_emitted_contexts"] == sorted(_EMITTED_OK) + + +def test_detect_drift_f4_fires_on_stale_cross_workflow_context(): + """The core gate-hole regression: BP requires a cross-workflow context + (e.g. a renamed/deleted sibling workflow) that NO workflow emits. + F4 must fire — this is the inverse-of-F2 hole that makes a red PR look + mergeable if BP is ever trimmed/renamed around `CI / all-required`.""" + ci = _make_ci_doc({"all-required": {}}) + audit = _make_audit_doc(sorted(_BP_WITH_SIBLINGS["status_check_contexts"])) + # Handlers workflow got renamed -> its OLD BP context now has no emitter. + emitted_after_rename = { + "CI / all-required (pull_request)", + "E2E API Smoke Test / E2E API Smoke Test (pull_request)", + # Handlers context absent (renamed away) + } + with patch.object(drift, "load_yaml", side_effect=[ci, audit]): + with patch.object(drift, "api", return_value=(200, _BP_WITH_SIBLINGS)): + with patch.object(drift, "all_emitted_contexts", return_value=emitted_after_rename): + findings, _ = drift.detect_drift("main") + assert any("F4 —" in f for f in findings) + assert any("Handlers Postgres Integration" in f for f in findings) + + +def test_detect_drift_f4_catches_all_required_only_trim(): + """If BP is trimmed to JUST `CI / all-required` but E2E/Handlers are + still real workflows, F4 does NOT fire (no stale context) — but F3b + (env vs BP) / operator policy must keep them required. This asserts F4 + does not false-positive on a correctly-emitted lone context.""" + bp = {"status_check_contexts": ["CI / all-required (pull_request)"]} + ci = _make_ci_doc({"all-required": {}}) + audit = _make_audit_doc(["CI / all-required (pull_request)"]) + with patch.object(drift, "load_yaml", side_effect=[ci, audit]): + with patch.object(drift, "api", return_value=(200, bp)): + with patch.object(drift, "all_emitted_contexts", return_value=set(_EMITTED_OK)): + findings, _ = drift.detect_drift("main") + assert not any("F4 —" in f for f in findings) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 6555b80b6..9e0288703 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -500,6 +500,27 @@ jobs: all-required: # Aggregator sentinel — RFC internal#219 §2 (Phase 4 — closes internal#286). # + # ── SCOPE (read before trusting the name) ───────────────────────── + # "all-required" means "all of THIS workflow's (CI's) required jobs" + # — NOT "all of branch-protection's required checks". It is fail- + # closed over its `needs:` (the CI jobs below), but Gitea Actions has + # NO cross-workflow `needs:`, so this sentinel STRUCTURALLY CANNOT and + # does not cover sibling required workflows that live in their own + # files — notably: + # • `E2E API Smoke Test` (.gitea/workflows/e2e-api.yml) + # • `Handlers Postgres Integration`(.gitea/workflows/handlers-postgres-integration.yml) + # Those emit their OWN status contexts and MUST be listed in branch + # protection `status_check_contexts` INDEPENDENTLY of this sentinel. + # They are today; do NOT trim BP down to just `CI / all-required` on + # the assumption that it covers them — it does not, and a red E2E / + # Handlers run would then look mergeable (observed: core PR #1086 @ + # 9136d05a — `CI / all-required` green while E2E (id 48) + Handlers + # (id 47) were red; not exploitable only because BP still requires + # all three). The cross-workflow coverage is enforced separately by + # ci-required-drift.py's F4 check (every BP context must have a live + # emitting workflow), which is what keeps this name honest. + # ────────────────────────────────────────────────────────────────── + # # Emits `CI / all-required ()` where is the workflow trigger # (e.g. `CI / all-required (pull_request)`, `CI / all-required (push)`). # Branch protection requires the event-suffixed name — -- 2.52.0