From 1aa1d14e57ad9cf24a3c1bb8372da79f04c42a2f Mon Sep 17 00:00:00 2001 From: devops-engineer Date: Wed, 10 Jun 2026 15:14:32 +0000 Subject: [PATCH] ci(lint): forbid continue-on-error on required branch-protection jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes SOP#765 mechanical (the mc#1982 masking incident). A job that is continue-on-error: true AND emits a context in .gitea/required-contexts.txt fails the lint. continue-on-error rolls a failed step up to SUCCESS (Gitea Quirk #10) — on a required context that turns a real failure green. .gitea/required-contexts.txt is the checked-in SSOT (CI cannot always read branch_protections); when DRIFT_BOT_TOKEN is present the lint also live-reads BP and fails on allowlist drift, degrading gracefully on 403/absent token. 6 pytest cases + verified clean against current core (3 required contexts, all coe=false) with live BP cross-check passing. Co-Authored-By: Claude Fable 5 --- .gitea/required-contexts.txt | 12 ++ .gitea/scripts/lint_no_coe_on_required.py | 172 +++++++++++++++++++ .gitea/workflows/lint-no-coe-on-required.yml | 81 +++++++++ tests/test_lint_no_coe_on_required.py | 93 ++++++++++ 4 files changed, 358 insertions(+) create mode 100644 .gitea/required-contexts.txt create mode 100644 .gitea/scripts/lint_no_coe_on_required.py create mode 100644 .gitea/workflows/lint-no-coe-on-required.yml create mode 100644 tests/test_lint_no_coe_on_required.py diff --git a/.gitea/required-contexts.txt b/.gitea/required-contexts.txt new file mode 100644 index 000000000..f52937206 --- /dev/null +++ b/.gitea/required-contexts.txt @@ -0,0 +1,12 @@ +# SSOT: the branch-protection REQUIRED status contexts on molecule-core +# main. Authoritative for lint-no-coe-on-required (CI cannot always read +# branch_protections; when a repo-admin token is present the lint also +# live-cross-checks this list against BP and fails on drift). +# +# Form: " / " (event suffix stripped). Verified +# 2026-06-10 against GET /api/v1/repos/molecule-ai/molecule-core/branch_protections. +# Keep in sync with branch protection; lint-required-context-exists-in-bp +# is the cousin that guards the other direction. +CI / all-required +E2E API Smoke Test / E2E API Smoke Test +Handlers Postgres Integration / Handlers Postgres Integration diff --git a/.gitea/scripts/lint_no_coe_on_required.py b/.gitea/scripts/lint_no_coe_on_required.py new file mode 100644 index 000000000..f86c10d18 --- /dev/null +++ b/.gitea/scripts/lint_no_coe_on_required.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +"""lint_no_coe_on_required — forbid continue-on-error on REQUIRED jobs. + +Forbidden shape +--------------- +A job in `.gitea/workflows/*.yml` that BOTH: + - has `continue-on-error: true` (job-level), AND + - emits a commit-status context that is in the repo's required + branch-protection set. + +`continue-on-error: true` makes a failed step roll up to a *success* +job status (Gitea Quirk #10). On a job whose context branch-protection +treats as REQUIRED, that converts a real failure into a green gate — +exactly the mc#1982 masking incident (continue-on-error on platform-build +hid regressions for ~3 weeks; SOP#765). This makes SOP#765 mechanical. + +Required-context SSOT +--------------------- +A checked-in allowlist (REQUIRED_CONTEXTS_FILE, default +.gitea/required-contexts.txt — one context per line, `#` comments). This +is authoritative because the CI token cannot always read +branch_protections (cp returns 403). When a token IS available +(GITEA_TOKEN + repo admin) the script ALSO live-reads branch_protections +and fails if the checked-in allowlist has drifted from live BP — but a +403/absent token degrades gracefully to allowlist-only (warn, don't fail +on the read). + +Context derivation +------------------ +Gitea emits the per-job status context as `"{workflow_name} / {job_name +or job_key}{suffix}"` where suffix is ` (pull_request)` / ` (push)` on +those events. The allowlist stores the bare `workflow / job` form; we +match a required context if its event-stripped form equals a job's +`workflow / job`. +""" +import os +import re +import sys + +try: + import yaml +except ImportError: + print("FAIL: PyYAML not available", file=sys.stderr) + sys.exit(2) + +WORKFLOWS_DIR = os.environ.get("WORKFLOWS_DIR", ".gitea/workflows") +REQUIRED_FILE = os.environ.get("REQUIRED_CONTEXTS_FILE", ".gitea/required-contexts.txt") +GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "") +GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app") +REPO = os.environ.get("REPO", "") + +EVENT_SUFFIX = re.compile(r"\s*\((pull_request|push|pull_request_target)\)\s*$") + + +def strip_event(ctx): + return EVENT_SUFFIX.sub("", ctx).strip() + + +def load_required_allowlist(path): + if not os.path.isfile(path): + return None + out = set() + with open(path) as f: + for line in f: + line = line.split("#", 1)[0].strip() + if line: + out.add(strip_event(line)) + return out + + +def job_contexts(workflows_dir): + """Return dict context -> (file, job_key, continue_on_error_bool).""" + contexts = {} + for fn in sorted(os.listdir(workflows_dir)): + if not (fn.endswith(".yml") or fn.endswith(".yaml")): + continue + path = os.path.join(workflows_dir, fn) + try: + with open(path) as f: + doc = yaml.safe_load(f) + except yaml.YAMLError: + continue + if not isinstance(doc, dict): + continue + wf_name = doc.get("name") or os.path.splitext(fn)[0] + jobs = doc.get("jobs") or {} + if not isinstance(jobs, dict): + continue + for jkey, jval in jobs.items(): + if not isinstance(jval, dict): + continue + jname = jval.get("name") or jkey + coe = jval.get("continue-on-error", False) + # Gitea coerces string "true" truthy. + coe_bool = coe is True or (isinstance(coe, str) and coe.strip().lower() == "true") + ctx = f"{wf_name} / {jname}" + contexts[strip_event(ctx)] = (path, jkey, coe_bool) + return contexts + + +def live_required_contexts(): + """Best-effort live BP read. Returns set or None (degrade).""" + if not (GITEA_TOKEN and REPO): + return None + try: + import json + import urllib.request + url = f"https://{GITEA_HOST}/api/v1/repos/{REPO}/branch_protections" + req = urllib.request.Request(url, headers={"Authorization": f"token {GITEA_TOKEN}"}) + with urllib.request.urlopen(req, timeout=20) as r: + data = json.load(r) + out = set() + for b in data: + if b.get("branch_name") in ("main", None): + for c in (b.get("status_check_contexts") or []): + out.add(strip_event(c)) + return out + except Exception as e: + print(f"::warning:: live branch_protections read failed ({e}); using checked-in allowlist only") + return None + + +def main(): + if not os.path.isdir(WORKFLOWS_DIR): + print(f"OK: no {WORKFLOWS_DIR}") + return 0 + required = load_required_allowlist(REQUIRED_FILE) + if required is None: + print(f"FAIL: required-contexts allowlist {REQUIRED_FILE} is missing — " + f"this file is the SSOT for which contexts are merge-required.") + return 1 + + # Optional live-BP drift check (graceful). + live = live_required_contexts() + if live is not None: + only_live = live - required + if only_live: + print("FAIL: branch-protection required contexts NOT in the checked-in allowlist " + f"({REQUIRED_FILE}) — allowlist has drifted from live BP:") + for c in sorted(only_live): + print(f" - {c}") + print(" Add them to the allowlist (or remove from BP).") + return 1 + + ctxs = job_contexts(WORKFLOWS_DIR) + fails = [] + for ctx in sorted(required): + info = ctxs.get(ctx) + if info is None: + # The context is required but no job currently emits it — that's + # a different lint's concern (required-context-exists). Skip. + continue + path, jkey, coe = info + if coe: + fails.append(f"{path}: job `{jkey}` (context `{ctx}`) is branch-protection REQUIRED " + f"but has continue-on-error: true") + if fails: + print("FAIL: continue-on-error: true on a REQUIRED branch-protection job (mc#1982 / SOP#765):") + for f in fails: + print(f" - {f}") + print() + print("Why: continue-on-error makes a failed step roll up to a SUCCESS") + print(" job status (Gitea Quirk #10). On a REQUIRED context that turns") + print(" a real failure into a green gate — the mc#1982 masking incident.") + print(" Remove continue-on-error from required jobs (SOP#765).") + return 1 + print(f"OK: no continue-on-error on any of the {len(required)} required contexts.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.gitea/workflows/lint-no-coe-on-required.yml b/.gitea/workflows/lint-no-coe-on-required.yml new file mode 100644 index 000000000..8e40feb33 --- /dev/null +++ b/.gitea/workflows/lint-no-coe-on-required.yml @@ -0,0 +1,81 @@ +name: lint-no-coe-on-required + +# Static workflow-shape lint: forbid `continue-on-error: true` on any job +# that emits a REQUIRED branch-protection status context. Makes SOP#765 +# mechanical (the mc#1982 masking incident). +# +# Forbidden shape +# --------------- +# A job in `.gitea/workflows/*.yml` that is BOTH continue-on-error: true +# AND emits a context in `.gitea/required-contexts.txt` (the checked-in +# SSOT of merge-required contexts). +# +# Why this rule exists (mc#1982 / SOP#765) +# ---------------------------------------- +# `continue-on-error: true` makes a failed step roll up to a SUCCESS job +# status (Gitea Quirk #10). On a REQUIRED context that silently converts +# a real failure into a green gate — continue-on-error on platform-build +# masked regressions for ~3 weeks before #656 surfaced them. SOP#765 +# banned it on required jobs; this lint enforces it at PR time. +# +# Required-context SSOT + drift guard +# ----------------------------------- +# `.gitea/required-contexts.txt` is authoritative (CI cannot always read +# branch_protections — cp returns 403). When a repo-admin token is +# present (DRIFT_BOT_TOKEN) the lint ALSO live-reads BP and fails if the +# checked-in allowlist has DRIFTED from live BP. A 403/absent token +# degrades gracefully to allowlist-only (warn, not fail). +# +# Not path-filtered on the required-contexts file because BP can change +# out-of-band; the live cross-check catches that on every run. + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - '.gitea/workflows/**' + - '.gitea/required-contexts.txt' + - '.gitea/scripts/lint_no_coe_on_required.py' + - 'tests/test_lint_no_coe_on_required.py' + push: + branches: [main, staging] + schedule: + # Daily — catches BP-drift introduced out-of-band (a required context + # added to BP whose emitting job already has continue-on-error). + - cron: '23 14 * * *' + workflow_dispatch: + +env: + GITHUB_SERVER_URL: https://git.moleculesai.app + +permissions: + contents: read + +concurrency: + group: lint-no-coe-on-required-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + # bp-exempt: meta-lint guarding the required set; standalone red-status + # lint, not itself a branch-protection required context. + lint: + name: lint-no-coe-on-required + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: '3.12' + - name: Install PyYAML + run: python -m pip install --quiet 'PyYAML==6.0.2' + - name: Run lint-no-coe-on-required + env: + GITEA_TOKEN: ${{ secrets.DRIFT_BOT_TOKEN }} + GITEA_HOST: git.moleculesai.app + REPO: ${{ github.repository }} + run: python3 .gitea/scripts/lint_no_coe_on_required.py + - name: Run unit tests + run: | + python -m pip install --quiet pytest + python3 -m pytest tests/test_lint_no_coe_on_required.py -q diff --git a/tests/test_lint_no_coe_on_required.py b/tests/test_lint_no_coe_on_required.py new file mode 100644 index 000000000..475dea490 --- /dev/null +++ b/tests/test_lint_no_coe_on_required.py @@ -0,0 +1,93 @@ +"""Unit tests for lint_no_coe_on_required — fixture catch + clean.""" +import importlib.util +import os +import textwrap + +HERE = os.path.dirname(__file__) +SCRIPT = os.path.join(HERE, "..", ".gitea", "scripts", "lint_no_coe_on_required.py") +spec = importlib.util.spec_from_file_location("lint_no_coe_on_required", SCRIPT) +mod = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mod) + + +def _wf(tmp_path, name, body): + d = tmp_path / ".gitea" / "workflows" + d.mkdir(parents=True, exist_ok=True) + (d / name).write_text(textwrap.dedent(body)) + + +def _allow(tmp_path, contexts): + (tmp_path / ".gitea").mkdir(parents=True, exist_ok=True) + (tmp_path / ".gitea" / "required-contexts.txt").write_text("\n".join(contexts) + "\n") + + +def test_coe_on_required_job_flagged(tmp_path): + _wf(tmp_path, "ci.yml", """\ + name: CI + on: [pull_request] + jobs: + all-required: + runs-on: ubuntu-latest + continue-on-error: true + steps: + - run: echo gate + """) + ctxs = mod.job_contexts(str(tmp_path / ".gitea" / "workflows")) + info = ctxs["CI / all-required"] + assert info[2] is True # continue-on-error detected + + +def test_coe_string_true_flagged(tmp_path): + _wf(tmp_path, "ci.yml", """\ + name: CI + on: [pull_request] + jobs: + gate: + runs-on: ubuntu-latest + continue-on-error: "true" + steps: + - run: echo hi + """) + ctxs = mod.job_contexts(str(tmp_path / ".gitea" / "workflows")) + assert ctxs["CI / gate"][2] is True + + +def test_required_job_without_coe_clean(tmp_path): + _wf(tmp_path, "ci.yml", """\ + name: CI + on: [pull_request] + jobs: + all-required: + runs-on: ubuntu-latest + steps: + - run: echo gate + """) + ctxs = mod.job_contexts(str(tmp_path / ".gitea" / "workflows")) + assert ctxs["CI / all-required"][2] is False + + +def test_named_job_context_uses_name_not_key(tmp_path): + _wf(tmp_path, "e2e.yml", """\ + name: E2E API Smoke Test + on: [pull_request] + jobs: + e2e-api: + name: E2E API Smoke Test + runs-on: ubuntu-latest + steps: + - run: echo hi + """) + ctxs = mod.job_contexts(str(tmp_path / ".gitea" / "workflows")) + assert "E2E API Smoke Test / E2E API Smoke Test" in ctxs + + +def test_strip_event_suffix(): + assert mod.strip_event("CI / all-required (pull_request)") == "CI / all-required" + assert mod.strip_event("ci / build (push)") == "ci / build" + assert mod.strip_event("X / y") == "X / y" + + +def test_allowlist_load(tmp_path): + _allow(tmp_path, ["# comment", "CI / all-required", " ci / build (push) "]) + got = mod.load_required_allowlist(str(tmp_path / ".gitea" / "required-contexts.txt")) + assert got == {"CI / all-required", "ci / build"} -- 2.52.0