diff --git a/.gitea/scripts/lint-required-no-paths.py b/.gitea/scripts/lint-required-no-paths.py new file mode 100755 index 00000000..911e8884 --- /dev/null +++ b/.gitea/scripts/lint-required-no-paths.py @@ -0,0 +1,404 @@ +#!/usr/bin/env python3 +"""lint-required-no-paths — structural enforcement of +`feedback_path_filtered_workflow_cant_be_required`. + +For every workflow whose status-check context appears in +`branch_protections/.status_check_contexts`, assert that the +workflow's `on:` block has NO `paths:` and NO `paths-ignore:` filter. + +A required-check workflow with a paths filter silently degrades the +merge gate: + + - If the PR's diff doesn't match the `paths:` glob, the workflow + never fires. + - Gitea (1.22.6) reports the required context as `pending` (never as + `skipped == success`), so the PR cannot merge. + - For a docs-only PR against `paths: ['**.go']`, the PR is + blocked forever — no human action can produce a green. + +The class was previously prevented only by reviewer vigilance + the +saved memory `feedback_path_filtered_workflow_cant_be_required`. This +script makes it a hard CI gate so a future PR adding `paths:` to a +required workflow fails fast at PR time, not after merge when the next +docs PR wedges main. + +The lint runs as `.gitea/workflows/lint-required-no-paths.yml` on every +PR. The lint workflow ITSELF must not have a paths-filter (otherwise it +could be circumvented by a paths-non-matching PR) — that's enforced by +self-reference and by the workflow's own `on:` block deliberately +omitting filters. + +Sources of truth: + - `branch_protections/` `status_check_contexts` (the merge gate) + - `.gitea/workflows/*.yml` `name:` + `on:` (the workflow set) + +Context-format note (Gitea 1.22.6): + Status-check contexts are formatted `{workflow_name} / {job_name_or_key} ({event})`. + We parse the workflow_name prefix and walk `.gitea/workflows/*.yml` for + a file whose `name:` attr matches. (The filename is NOT the source of + truth; `name:` is, because Gitea formats the context from `name:`.) + +Exit codes: + 0 — no required workflow has a paths/paths-ignore filter (clean) OR + branch_protections endpoint returned 403/404 (token-scope issue; + surfaced via ::error:: but non-fatal so a missing scope doesn't + red-X every PR — fix the token, not the lint). + 1 — at least one required workflow has a paths/paths-ignore filter + (the gate-degrading defect class). + 2 — env contract violation (missing GITEA_TOKEN/HOST/REPO/BRANCH). + 3 — workflows directory missing or workflow YAML unparseable. + 4 — protection response shape unexpected (non-dict body on 2xx). + +Auth note: `GET /repos/.../branch_protections/{branch}` requires +repo-admin role in Gitea 1.22.6. The workflow-default `GITHUB_TOKEN` +is non-admin; we re-use `DRIFT_BOT_TOKEN` (same persona that powers +ci-required-drift.yml). If `DRIFT_BOT_TOKEN` is unavailable in a future +context, the script falls through gracefully (exit 0 + ::error::). +""" +from __future__ import annotations + +import json +import os +import re +import sys +import urllib.error +import urllib.parse +import urllib.request +from pathlib import Path +from typing import Any + +import yaml # PyYAML 6.0.2 — installed by the workflow before this runs. + + +# -------------------------------------------------------------------------- +# Environment +# -------------------------------------------------------------------------- +def _env(key: str, *, required: bool = True, default: str | None = None) -> str: + val = os.environ.get(key, default) + if required and not val: + sys.stderr.write(f"::error::missing required env var: {key}\n") + sys.exit(2) + return val or "" + + +GITEA_TOKEN = _env("GITEA_TOKEN", required=False) +GITEA_HOST = _env("GITEA_HOST", required=False) +REPO = _env("REPO", required=False) +BRANCH = _env("BRANCH", required=False, default="main") +WORKFLOWS_DIR = _env( + "WORKFLOWS_DIR", required=False, default=".gitea/workflows" +) + +OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "") +API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else "" + + +def _require_runtime_env() -> None: + """Enforce env contract — called from `run()` only. Tests import + individual functions without setting the full env contract.""" + for key in ("GITEA_TOKEN", "GITEA_HOST", "REPO", "BRANCH"): + if not os.environ.get(key): + sys.stderr.write(f"::error::missing required env var: {key}\n") + sys.exit(2) + + +# -------------------------------------------------------------------------- +# Tiny HTTP helper (mirrors ci-required-drift.py contract: +# raise on non-2xx and on JSON-decode-fail when JSON expected, per +# `feedback_api_helper_must_raise_not_return_dict`). +# -------------------------------------------------------------------------- +class ApiError(RuntimeError): + """Raised when a Gitea API call cannot be trusted to have succeeded.""" + + +def api( + method: str, + path: str, + *, + body: dict | None = None, + query: dict[str, str] | None = None, + expect_json: bool = True, +) -> tuple[int, Any]: + url = f"{API}{path}" + if query: + url = f"{url}?{urllib.parse.urlencode(query)}" + data = None + headers = { + "Authorization": f"token {GITEA_TOKEN}", + "Accept": "application/json", + } + if body is not None: + data = json.dumps(body).encode("utf-8") + headers["Content-Type"] = "application/json" + req = urllib.request.Request(url, method=method, data=data, headers=headers) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + raw = resp.read() + status = resp.status + except urllib.error.HTTPError as e: + raw = e.read() + status = e.code + + if not (200 <= status < 300): + snippet = raw[:500].decode("utf-8", errors="replace") if raw else "" + raise ApiError(f"{method} {path} → HTTP {status}: {snippet}") + + if not raw: + return status, None + try: + return status, json.loads(raw) + except json.JSONDecodeError as e: + if expect_json: + raise ApiError( + f"{method} {path} → HTTP {status} but body is not JSON: {e}" + ) from e + return status, {"_raw": raw.decode("utf-8", errors="replace")} + + +# -------------------------------------------------------------------------- +# Status-check context parser +# -------------------------------------------------------------------------- +# Format: " / ()" +# Examples observed on molecule-core/main: +# "Secret scan / Scan diff for credential-shaped strings (pull_request)" +# "sop-tier-check / tier-check (pull_request)" +# +# Split strategy: peel off the trailing ` ()` first, then split +# the leading ` / ` on the FIRST ` / ` (workflow names +# come from `name:` attrs which conventionally don't embed ' / '; job +# names CAN, so we keep the rest of the slash-divided text as the job +# name). This matches Gitea's `name: ` semantics. +_CONTEXT_RE = re.compile(r"^(?P.+?) / (?P.+) \((?P[^)]+)\)$") + + +def parse_context(ctx: str) -> tuple[str, str, str] | None: + """Parse ` / ()` → (workflow, job, event) or None.""" + if not ctx: + return None + m = _CONTEXT_RE.match(ctx) + if not m: + return None + return m.group("workflow"), m.group("job"), m.group("event") + + +# -------------------------------------------------------------------------- +# workflow-name → file resolution +# -------------------------------------------------------------------------- +def _iter_workflow_files() -> list[Path]: + d = Path(WORKFLOWS_DIR) + if not d.is_dir(): + sys.stderr.write(f"::error::workflows directory not found: {d}\n") + sys.exit(3) + # `.yml` and `.yaml` — Gitea accepts both (rarely used `.yaml`, but + # don't silently miss it if a future port uses it). + return sorted(list(d.glob("*.yml")) + list(d.glob("*.yaml"))) + + +def resolve_workflow_file(workflow_name: str) -> Path | None: + """Find the YAML file whose `name:` attr matches `workflow_name`. + + Returns None if no match. Filename is NOT used as a fallback — + Gitea's context format uses `name:`, so a `name:`-less workflow + won't even appear in the protection list. (A YAML with no `name:` + would default the context to the file basename, but our protection + contexts on molecule-core are all `name:`-derived; we trust the + same.) + """ + for f in _iter_workflow_files(): + try: + doc = yaml.safe_load(f.read_text(encoding="utf-8")) + except yaml.YAMLError as e: + sys.stderr.write(f"::error::YAML parse error in {f}: {e}\n") + sys.exit(3) + if isinstance(doc, dict) and doc.get("name") == workflow_name: + return f + return None + + +# -------------------------------------------------------------------------- +# paths-filter detection +# -------------------------------------------------------------------------- +# Triggers that accept `paths:` / `paths-ignore:` (per GitHub Actions / +# Gitea Actions docs): pull_request, pull_request_target, push. +# We don't enumerate — any sub-key named `paths` or `paths-ignore` +# inside an event mapping is flagged. +_PATHS_KEYS = ("paths", "paths-ignore") + + +def detect_paths_filters(workflow_path: Path) -> list[str]: + """Walk the workflow's `on:` block and return a list of findings, one + per offending `paths`/`paths-ignore` key. + + Returns: + Empty list if the workflow has no paths/paths-ignore filter + anywhere in its `on:` block. Otherwise, a list of human-readable + strings naming the event and filter key + the filter contents. + """ + try: + doc = yaml.safe_load(workflow_path.read_text(encoding="utf-8")) + except yaml.YAMLError as e: + sys.stderr.write(f"::error::YAML parse error in {workflow_path}: {e}\n") + sys.exit(3) + if not isinstance(doc, dict): + return [] + + on_block = doc.get("on") or doc.get(True) # PyYAML 6 quirk: `on:` + # under default constructor sometimes becomes the bool key `True` + # because YAML 1.1 treats `on` as a boolean. Tolerate both. + if on_block is None: + return [] + + findings: list[str] = [] + + # Shape A: `on: pull_request` (string shorthand) — cannot carry filters. + if isinstance(on_block, str): + return [] + # Shape B: `on: [pull_request, push]` (list shorthand) — cannot carry filters. + if isinstance(on_block, list): + return [] + # Shape C: `on: { event: { ... } }` — the standard mapping case. + if isinstance(on_block, dict): + # Defensive: top-level malformed `on.paths` (someone wrote + # `on: { paths: ['x'] }` thinking it's a workflow-level filter). + # This is invalid syntax, but if present, flag it — it might + # not block the workflow from registering (Gitea may ignore the + # unknown key) and would create a false sense of "filter exists" + # the lint should still surface. + for k in _PATHS_KEYS: + if k in on_block: + v = on_block[k] + findings.append( + f"top-level `on.{k}` filter (malformed but present): {v!r}" + ) + for event, event_body in on_block.items(): + if event in _PATHS_KEYS: + continue # already handled above + if not isinstance(event_body, dict): + # `pull_request: null` / `pull_request: [opened]` shapes — + # no place for a paths filter to live; skip. + continue + for k in _PATHS_KEYS: + if k in event_body: + v = event_body[k] + findings.append( + f"`on.{event}.{k}` filter present: {v!r}" + ) + return findings + + +# -------------------------------------------------------------------------- +# Driver +# -------------------------------------------------------------------------- +def run() -> int: + """Main lint entrypoint. Returns the process exit code. + + Exit semantics (see module docstring for full table): + 0 — clean (no offending paths-filter on any required workflow), + OR protection unreadable (403/404) — surfaced as ::error:: + but treated as non-fatal so token-scope issues don't red-X + every PR. + 1 — at least one required workflow carries a paths/paths-ignore + filter — the regression class this lint exists to prevent. + """ + _require_runtime_env() + + protection_path = f"/repos/{OWNER}/{NAME}/branch_protections/{BRANCH}" + try: + _, protection = api("GET", protection_path) + except ApiError as e: + msg = str(e) + m = re.search(r"HTTP (\d{3})", msg) + http_status = int(m.group(1)) if m else None + if http_status in (403, 404): + sys.stderr.write( + f"::error::GET {protection_path} returned HTTP {http_status} — " + f"DRIFT_BOT_TOKEN lacks repo-admin scope (Gitea 1.22.6 " + f"requires it for this endpoint) OR branch '{BRANCH}' has " + f"no protection configured. Cannot enumerate required " + f"checks; skipping lint with exit 0 to avoid red-X on " + f"every PR. Fix: grant repo-admin to mc-drift-bot.\n" + ) + return 0 + raise + + if not isinstance(protection, dict): + sys.stderr.write( + f"::error::protection response for {BRANCH} not a JSON object\n" + ) + return 4 + + contexts: list[str] = list(protection.get("status_check_contexts") or []) + if not contexts: + print( + f"::notice::branch_protections/{BRANCH} has 0 required " + f"status_check_contexts; nothing to lint. (no required contexts)" + ) + return 0 + + print(f"::notice::Linting {len(contexts)} required context(s) for paths-filter regressions:") + for c in contexts: + print(f" - {c}") + + offenders: list[tuple[str, Path, list[str]]] = [] + unresolved: list[str] = [] + + for ctx in contexts: + parsed = parse_context(ctx) + if parsed is None: + print( + f"::warning::could not parse context '{ctx}' " + f"(expected ` / ()`); skipping" + ) + unresolved.append(ctx) + continue + workflow_name, _job, _event = parsed + wf_path = resolve_workflow_file(workflow_name) + if wf_path is None: + print( + f"::warning::no workflow file in {WORKFLOWS_DIR} has " + f"`name: {workflow_name}` (required context '{ctx}'); " + f"skipping paths-filter check. " + f"(orphaned-context detection is ci-required-drift's job.)" + ) + unresolved.append(ctx) + continue + findings = detect_paths_filters(wf_path) + if findings: + offenders.append((workflow_name, wf_path, findings)) + else: + print(f"::notice::OK {wf_path.name} ({workflow_name}) — no paths filter") + + if offenders: + print("") + print(f"::error::Found {len(offenders)} required workflow(s) with paths/paths-ignore filters:") + for workflow_name, wf_path, findings in offenders: + for finding in findings: + # ::error file=... lets Gitea Actions surface a per-file + # annotation in the PR UI (when annotations are wired). + print( + f"::error file={wf_path}::Required workflow " + f"'{workflow_name}' ({wf_path.name}) has a paths " + f"filter that would degrade the merge gate to a " + f"silent indefinite pending: {finding}. " + f"See feedback_path_filtered_workflow_cant_be_required. " + f"Fix: remove the filter and instead gate per-step " + f"inside the job with `if: contains(steps.changed.outputs.files, ...)` " + f"or refactor to a single-job-with-per-step-if shape." + ) + return 1 + + print("") + print( + f"::notice::OK — all {len(contexts) - len(unresolved)} resolvable " + f"required workflow(s) clean (no paths/paths-ignore filters)." + ) + if unresolved: + print( + f"::notice::{len(unresolved)} required context(s) were not " + f"resolved to a workflow file (warn-not-fail); see warnings above." + ) + return 0 + + +if __name__ == "__main__": + sys.exit(run()) diff --git a/.gitea/workflows/lint-required-no-paths.yml b/.gitea/workflows/lint-required-no-paths.yml new file mode 100644 index 00000000..b994c7ef --- /dev/null +++ b/.gitea/workflows/lint-required-no-paths.yml @@ -0,0 +1,96 @@ +# lint-required-no-paths — structural enforcement of +# `feedback_path_filtered_workflow_cant_be_required`. +# +# Fails the PR if ANY workflow whose status-check context appears in +# `branch_protections/main.status_check_contexts` carries a +# `paths:` or `paths-ignore:` filter in its `on:` block. +# +# Why this exists: +# A required-check workflow with a paths filter silently degrades the +# merge gate. If a PR's diff doesn't touch the filter, the workflow +# never fires; Gitea (1.22.6) reports the required context as +# `pending` (NOT `skipped == success`), so the PR cannot merge. For a +# docs-only PR against `paths: ['**.go']`, the PR is wedged forever. +# +# Previously prevented only by reviewer vigilance + the saved memory +# `feedback_path_filtered_workflow_cant_be_required`. This workflow +# makes it a hard CI gate. +# +# Forward-compat scope: +# Today (2026-05-11) molecule-core/main protects 3 contexts: +# - "Secret scan / Scan diff for credential-shaped strings (pull_request)" +# - "sop-tier-check / tier-check (pull_request)" +# - "CI / all-required (pull_request)" +# Per RFC#324 Step 2 the required-list expands to ~5 contexts +# (qa-review, security-review added). Each new required context's +# workflow must remain unconditional. This lint pins that contract. +# +# Meta-required-check: +# This workflow ITSELF deliberately has NO `paths:` filter on its `on:` +# block — otherwise a paths-non-matching PR could bypass the check. +# Self-evident from this file: only `pull_request` types + no paths. +# +# Auth: +# `GET /repos/.../branch_protections/{branch}` requires repo-admin +# role in Gitea 1.22.6. The workflow-default `GITHUB_TOKEN` is +# non-admin (read-only), so we re-use `DRIFT_BOT_TOKEN` (same persona +# that powers `ci-required-drift.yml` — verified working there). +# If `DRIFT_BOT_TOKEN` becomes unavailable, the script exits 0 with a +# loud `::error::` rather than red-X every PR — token-scope issues +# should be fixed at the token, not surfaced as a gate failure on +# every unrelated PR. +# +# Behavior-based gate per `feedback_behavior_based_ast_gates`: +# YAML AST walk (PyYAML), NOT grep. Workflow renames, formatting +# changes (block-scalar vs flow-style), or moving `paths:` between +# `pull_request:` and `pull_request_target:` all still detect. +# +# IMPORTANT — Gitea 1.22.6 parser quirk per +# `feedback_gitea_workflow_dispatch_inputs_unsupported`: do NOT add an +# `inputs:` block to `workflow_dispatch:` — Gitea 1.22.6 rejects the +# entire workflow as "unknown on type" and it registers for ZERO events. + +name: lint-required-no-paths + +on: + pull_request: + types: [opened, synchronize, reopened] + workflow_dispatch: + +# Read protection + read local YAML. No writes. +permissions: + contents: read + +# Only one in-flight run per PR — re-pushes cancel the previous run to +# keep the queue short. Required-list reads are cheap (one GET); the +# cancellation is just hygiene. +concurrency: + group: lint-required-no-paths-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + lint: + name: lint-required-no-paths + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Check out repo (we read the workflow YAML files locally) + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Set up Python (PyYAML for AST parsing) + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: '3.12' + - name: Install PyYAML + run: python -m pip install --quiet 'PyYAML==6.0.2' + - name: Run lint-required-no-paths + env: + # DRIFT_BOT_TOKEN is owned by mc-drift-bot, a least-privilege + # Gitea persona with repo-admin role for branch_protections + # read. Same secret used by ci-required-drift.yml — see that + # workflow's header for provisioning trail (internal#329). + GITEA_TOKEN: ${{ secrets.DRIFT_BOT_TOKEN }} + GITEA_HOST: git.moleculesai.app + REPO: ${{ github.repository }} + BRANCH: main + WORKFLOWS_DIR: .gitea/workflows + run: python3 .gitea/scripts/lint-required-no-paths.py diff --git a/tests/test_lint_required_no_paths.py b/tests/test_lint_required_no_paths.py new file mode 100644 index 00000000..a30282de --- /dev/null +++ b/tests/test_lint_required_no_paths.py @@ -0,0 +1,554 @@ +"""Tests for `.gitea/scripts/lint-required-no-paths.py`. + +Structural enforcement of `feedback_path_filtered_workflow_cant_be_required`: +no workflow whose status-check context is in `branch_protections/main` +`status_check_contexts` may use `paths:` or `paths-ignore:` filters in its +`on:` block. A path-filtered workflow silently does not fire on a PR whose +diff doesn't touch the filter — Gitea treats that as `pending` forever, +not `skipped`-as-`success`, so the gate degrades to an indefinite block. +Worse, a docs-only PR could never satisfy a required check whose filter +excludes docs paths, and the protected branch becomes unreachable. + +Five test classes: + - test_no_required_workflows_succeeds — empty status_check_contexts → exit 0 + - test_required_workflow_no_paths_passes — required workflow with no + paths filter → exit 0 + - test_required_workflow_with_paths_filter_fails — required workflow + with `paths: ['**.go']` → exit 1, error names workflow + - test_required_workflow_with_paths_ignore_fails — same shape for + `paths-ignore` + - test_unknown_required_context_warns_not_fails — context whose + workflow file is missing → warn, do NOT fail (graceful — could be a + cross-repo context name or a workflow renamed mid-PR; the lint is for + paths-filter detection, not orphaned-context detection — that's + ci-required-drift's job) + +Also covers the workflow-name → file-path mapping (parses the +` / ()` context format) and the +multi-event `on:` block edge cases (paths under `on.push` vs `on.pull_request` +vs top-level `on.paths`). + +Run: + python3 -m pytest tests/test_lint_required_no_paths.py -v + +Dependencies: stdlib + PyYAML (already required by the script itself). +No network. No live Gitea calls — `api()` is stubbed. +""" +from __future__ import annotations + +import importlib.util +import os +import sys +from pathlib import Path +from unittest import mock + +import pytest + + +# -------------------------------------------------------------------------- +# Module import fixture — mirror of tests/test_ci_required_drift.py shape +# -------------------------------------------------------------------------- +SCRIPT_PATH = ( + Path(__file__).resolve().parent.parent + / ".gitea" + / "scripts" + / "lint-required-no-paths.py" +) + + +@pytest.fixture() +def lint_module(tmp_path, monkeypatch): + """Import the script as a module with a clean env per test. + + Tests need a per-test workflows directory under tmp_path; the module + reads `WORKFLOWS_DIR` from env. Fresh import per test means tests + cannot leak global state into each other. + """ + env = { + "GITEA_TOKEN": "test-token", + "GITEA_HOST": "git.example.test", + "REPO": "owner/repo", + "BRANCH": "main", + "WORKFLOWS_DIR": str(tmp_path / ".gitea" / "workflows"), + } + (tmp_path / ".gitea" / "workflows").mkdir(parents=True) + monkeypatch.setattr(os, "environ", {**os.environ, **env}) + spec = importlib.util.spec_from_file_location( + f"lint_required_no_paths_{id(tmp_path)}", SCRIPT_PATH + ) + m = importlib.util.module_from_spec(spec) + spec.loader.exec_module(m) + # Force-set the globals from env (they were captured at import time; + # we mutate them so the per-test tmp_path is what the script reads). + m.GITEA_TOKEN = env["GITEA_TOKEN"] + m.GITEA_HOST = env["GITEA_HOST"] + m.REPO = env["REPO"] + m.BRANCH = env["BRANCH"] + m.WORKFLOWS_DIR = env["WORKFLOWS_DIR"] + m.OWNER, m.NAME = "owner", "repo" + m.API = f"https://{env['GITEA_HOST']}/api/v1" + return m + + +def _write_workflow(workflows_dir: str, filename: str, content: str) -> Path: + p = Path(workflows_dir) / filename + p.write_text(content, encoding="utf-8") + return p + + +def _make_stub_api(responses: dict): + """Build a fake `api()` callable. + + `responses` maps (method, path) tuples to either: + - (status_int, body) → returned as-is + - Exception instance → raised + Calls are recorded in `.calls` for later assertion. + """ + class StubApi: + def __init__(self): + self.calls: list[tuple] = [] + + def __call__(self, method, path, *, body=None, query=None, expect_json=True): + self.calls.append((method, path, body, query)) + key = (method, path) + if key not in responses: + raise AssertionError( + f"unexpected api call: {method} {path} (no stub registered)" + ) + r = responses[key] + if isinstance(r, Exception): + raise r + return r + + return StubApi() + + +# -------------------------------------------------------------------------- +# context → (workflow_name, job_name, event) parser +# -------------------------------------------------------------------------- +def test_parse_context_standard_shape(lint_module): + """` / ()` round-trips cleanly.""" + parsed = lint_module.parse_context( + "Secret scan / Scan diff for credential-shaped strings (pull_request)" + ) + assert parsed == ( + "Secret scan", + "Scan diff for credential-shaped strings", + "pull_request", + ) + + +def test_parse_context_with_slash_in_job_name(lint_module): + """Job names CAN contain ' / ' literally in Gitea; the parser must + split on the LAST ' / ' before the trailing ' (event)' suffix.""" + parsed = lint_module.parse_context( + "ci / setup / install-deps (pull_request)" + ) + # Workflow = first segment; job = everything between first ' / ' and + # the trailing ' (event)'. Pragmatic split: the workflow name is + # `name:` from the YAML, so multi-slash workflow names are unlikely; + # treat the first ' / ' as the divider. + assert parsed[0] == "ci" + assert parsed[1] == "setup / install-deps" + assert parsed[2] == "pull_request" + + +def test_parse_context_unparseable_returns_none(lint_module): + """Malformed context string → None so the caller can warn-and-skip.""" + assert lint_module.parse_context("garbage no event marker") is None + assert lint_module.parse_context("") is None + + +# -------------------------------------------------------------------------- +# workflow-name → file resolution +# -------------------------------------------------------------------------- +def test_resolve_workflow_file_matches_name_attr(lint_module): + """Resolution scans workflows/*.yml for a `name:` matching the + context's workflow_name. Filename is NOT the source of truth — the + `name:` attribute is, because Gitea's context format uses + `name:` (not the filename). + """ + _write_workflow( + lint_module.WORKFLOWS_DIR, + "some-file.yml", + "name: Secret scan\non:\n pull_request:\n types: [opened]\njobs:\n scan:\n runs-on: ubuntu-latest\n", + ) + p = lint_module.resolve_workflow_file("Secret scan") + assert p is not None + assert p.name == "some-file.yml" + + +def test_resolve_workflow_file_returns_none_when_missing(lint_module): + """No matching `name:` found → None.""" + _write_workflow( + lint_module.WORKFLOWS_DIR, + "other.yml", + "name: Other\non:\n pull_request: {}\njobs:\n x:\n runs-on: ubuntu-latest\n", + ) + assert lint_module.resolve_workflow_file("Secret scan") is None + + +# -------------------------------------------------------------------------- +# paths-filter detection +# -------------------------------------------------------------------------- +def test_workflow_has_no_paths_filter_clean(lint_module): + """No paths/paths-ignore → returns empty list (no findings).""" + _write_workflow( + lint_module.WORKFLOWS_DIR, + "clean.yml", + "name: Clean\n" + "on:\n" + " pull_request:\n" + " types: [opened, synchronize]\n" + "jobs:\n" + " x:\n" + " runs-on: ubuntu-latest\n", + ) + findings = lint_module.detect_paths_filters( + Path(lint_module.WORKFLOWS_DIR) / "clean.yml" + ) + assert findings == [] + + +def test_workflow_with_pull_request_paths_filter_detected(lint_module): + """`on.pull_request.paths` → ONE finding naming pull_request + paths.""" + _write_workflow( + lint_module.WORKFLOWS_DIR, + "bad.yml", + "name: Bad\n" + "on:\n" + " pull_request:\n" + " paths: ['**.go', 'workspace/**']\n" + "jobs:\n" + " x:\n" + " runs-on: ubuntu-latest\n", + ) + findings = lint_module.detect_paths_filters( + Path(lint_module.WORKFLOWS_DIR) / "bad.yml" + ) + assert len(findings) == 1 + f = findings[0] + assert "pull_request" in f + assert "paths" in f + assert "**.go" in f or "workspace/**" in f # filter content surfaced + + +def test_workflow_with_paths_ignore_filter_detected(lint_module): + """`on.pull_request.paths-ignore` → finding naming paths-ignore. + + paths-ignore is the SAME class of defect: a docs-only PR (that + matches the ignore pattern) silently won't fire the workflow, and the + required context stays pending. + """ + _write_workflow( + lint_module.WORKFLOWS_DIR, + "bad.yml", + "name: Bad\n" + "on:\n" + " pull_request:\n" + " paths-ignore: ['docs/**']\n" + "jobs:\n" + " x:\n" + " runs-on: ubuntu-latest\n", + ) + findings = lint_module.detect_paths_filters( + Path(lint_module.WORKFLOWS_DIR) / "bad.yml" + ) + assert len(findings) == 1 + assert "paths-ignore" in findings[0] + + +def test_workflow_with_push_paths_filter_detected(lint_module): + """`on.push.paths` → also a finding. A required check on a PR is + typically `(pull_request)`-event, but a workflow may ALSO have a + push trigger; a paths filter on the push side affects the same + workflow file, and a future PR might add `paths:` to the wrong + event-branch and trip the gate. Surface all paths-filter sites. + """ + _write_workflow( + lint_module.WORKFLOWS_DIR, + "bad.yml", + "name: Bad\n" + "on:\n" + " pull_request:\n" + " types: [opened]\n" + " push:\n" + " branches: [main]\n" + " paths: ['**.py']\n" + "jobs:\n" + " x:\n" + " runs-on: ubuntu-latest\n", + ) + findings = lint_module.detect_paths_filters( + Path(lint_module.WORKFLOWS_DIR) / "bad.yml" + ) + assert len(findings) == 1 + assert "push" in findings[0] + assert "paths" in findings[0] + + +def test_workflow_with_both_paths_and_paths_ignore_two_findings(lint_module): + """Both filters under one event → two findings (one per offending + key). Test ensures the detector doesn't short-circuit after the + first.""" + _write_workflow( + lint_module.WORKFLOWS_DIR, + "bad.yml", + "name: Bad\n" + "on:\n" + " pull_request:\n" + " paths: ['**.go']\n" + " paths-ignore: ['docs/**']\n" + "jobs:\n" + " x:\n" + " runs-on: ubuntu-latest\n", + ) + findings = lint_module.detect_paths_filters( + Path(lint_module.WORKFLOWS_DIR) / "bad.yml" + ) + assert len(findings) == 2 + + +def test_workflow_with_on_shorthand_string_passes(lint_module): + """`on: pull_request` (string shorthand, no sub-keys) cannot have a + paths filter — detector treats it as clean.""" + _write_workflow( + lint_module.WORKFLOWS_DIR, + "clean.yml", + "name: Clean\non: pull_request\njobs:\n x:\n runs-on: ubuntu-latest\n", + ) + findings = lint_module.detect_paths_filters( + Path(lint_module.WORKFLOWS_DIR) / "clean.yml" + ) + assert findings == [] + + +def test_workflow_with_on_list_shorthand_passes(lint_module): + """`on: [pull_request, push]` (list shorthand) cannot carry filters + either — clean.""" + _write_workflow( + lint_module.WORKFLOWS_DIR, + "clean.yml", + "name: Clean\non: [pull_request, push]\njobs:\n x:\n runs-on: ubuntu-latest\n", + ) + findings = lint_module.detect_paths_filters( + Path(lint_module.WORKFLOWS_DIR) / "clean.yml" + ) + assert findings == [] + + +def test_workflow_on_event_with_null_value_passes(lint_module): + """`pull_request:` with no body (None / null) is event-shorthand — + no filter possible.""" + _write_workflow( + lint_module.WORKFLOWS_DIR, + "clean.yml", + "name: Clean\non:\n pull_request:\n push:\n branches: [main]\njobs:\n x:\n runs-on: ubuntu-latest\n", + ) + findings = lint_module.detect_paths_filters( + Path(lint_module.WORKFLOWS_DIR) / "clean.yml" + ) + assert findings == [] + + +# -------------------------------------------------------------------------- +# End-to-end lint (main) — required-checks fan-out +# -------------------------------------------------------------------------- +def test_no_required_workflows_succeeds(lint_module, monkeypatch, capsys): + """Empty status_check_contexts → exit 0, no findings reported.""" + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branch_protections/main"): ( + 200, + {"status_check_contexts": []}, + ), + }) + monkeypatch.setattr(lint_module, "api", stub) + rc = lint_module.run() + assert rc == 0 + out = capsys.readouterr().out + assert "no required contexts" in out.lower() or "0 required" in out.lower() + + +def test_required_workflow_no_paths_passes(lint_module, monkeypatch, capsys): + """A required workflow with no paths filter → exit 0.""" + _write_workflow( + lint_module.WORKFLOWS_DIR, + "secret-scan.yml", + "name: Secret scan\non:\n pull_request:\n types: [opened]\njobs:\n scan:\n runs-on: ubuntu-latest\n", + ) + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branch_protections/main"): ( + 200, + { + "status_check_contexts": [ + "Secret scan / scan (pull_request)", + ] + }, + ), + }) + monkeypatch.setattr(lint_module, "api", stub) + rc = lint_module.run() + assert rc == 0 + + +def test_required_workflow_with_paths_filter_fails( + lint_module, monkeypatch, capsys +): + """A required workflow that has `paths:` filter → exit 1 + error + names the offending workflow + the filter.""" + _write_workflow( + lint_module.WORKFLOWS_DIR, + "secret-scan.yml", + "name: Secret scan\n" + "on:\n" + " pull_request:\n" + " paths: ['**.go']\n" + "jobs:\n" + " scan:\n" + " runs-on: ubuntu-latest\n", + ) + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branch_protections/main"): ( + 200, + {"status_check_contexts": ["Secret scan / scan (pull_request)"]}, + ), + }) + monkeypatch.setattr(lint_module, "api", stub) + rc = lint_module.run() + assert rc == 1 + out = capsys.readouterr().out + assert "secret-scan.yml" in out + assert "Secret scan" in out + assert "paths" in out + assert "::error::" in out + + +def test_required_workflow_with_paths_ignore_fails( + lint_module, monkeypatch, capsys +): + """Same defect class for `paths-ignore` — exit 1, named.""" + _write_workflow( + lint_module.WORKFLOWS_DIR, + "sop-tier-check.yml", + "name: sop-tier-check\n" + "on:\n" + " pull_request_target:\n" + " paths-ignore: ['docs/**']\n" + "jobs:\n" + " tier-check:\n" + " runs-on: ubuntu-latest\n", + ) + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branch_protections/main"): ( + 200, + { + "status_check_contexts": [ + "sop-tier-check / tier-check (pull_request_target)" + ] + }, + ), + }) + monkeypatch.setattr(lint_module, "api", stub) + rc = lint_module.run() + assert rc == 1 + out = capsys.readouterr().out + assert "sop-tier-check.yml" in out + assert "paths-ignore" in out + + +def test_unknown_required_context_warns_not_fails( + lint_module, monkeypatch, capsys +): + """Required context with no matching workflow file → warn, don't + fail. This is gracefully bounded — the lint's mandate is paths-filter + detection, not orphaned-context detection (`ci-required-drift` is the + canonical detector for that). + """ + # No workflows written → all required contexts will be unresolved. + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branch_protections/main"): ( + 200, + { + "status_check_contexts": [ + "Mystery / job (pull_request)", + ] + }, + ), + }) + monkeypatch.setattr(lint_module, "api", stub) + rc = lint_module.run() + assert rc == 0 # warn-not-fail + out = capsys.readouterr().out + assert "::warning::" in out + assert "Mystery" in out + + +def test_multi_required_one_bad_one_good_fails( + lint_module, monkeypatch, capsys +): + """Two required contexts; one workflow is bad. Lint still fails + (one defect is enough) and the error names ONLY the bad workflow.""" + _write_workflow( + lint_module.WORKFLOWS_DIR, + "good.yml", + "name: Good\non:\n pull_request:\n types: [opened]\njobs:\n x:\n runs-on: ubuntu-latest\n", + ) + _write_workflow( + lint_module.WORKFLOWS_DIR, + "bad.yml", + "name: Bad\n" + "on:\n" + " pull_request:\n" + " paths: ['src/**']\n" + "jobs:\n x:\n runs-on: ubuntu-latest\n", + ) + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branch_protections/main"): ( + 200, + { + "status_check_contexts": [ + "Good / x (pull_request)", + "Bad / x (pull_request)", + ] + }, + ), + }) + monkeypatch.setattr(lint_module, "api", stub) + rc = lint_module.run() + assert rc == 1 + out = capsys.readouterr().out + assert "bad.yml" in out + # `good.yml` should NOT show up in the error block — only the bad one. + # (It may appear as a "checked" notice; assert it's not flagged as bad.) + assert "::error::" in out + error_lines = [ln for ln in out.split("\n") if ln.startswith("::error::") or "paths" in ln.lower() and "good" in ln.lower()] + # The good workflow must not appear under an ::error:: line referencing paths. + for ln in error_lines: + if ln.startswith("::error::"): + # The error line itself shouldn't name good.yml as offending. + assert "good.yml" not in ln + + +def test_protection_403_treated_as_skip(lint_module, monkeypatch, capsys): + """If the token can't read branch_protections (HTTP 403), exit 0 + with a clear ::error::-but-non-fatal note. Same scope-fallback shape + as ci-required-drift.py per the precedent. + + Rationale: if the lint workflow itself can't read protection, the PR + can't make THIS state worse (a paths-filter PR was already addable + without the lint). Better to surface a token-scope problem loudly + than to red-X every PR until the token is fixed. + """ + stub = _make_stub_api({ + ("GET", "/repos/owner/repo/branch_protections/main"): ( + lint_module.ApiError( + "GET /repos/owner/repo/branch_protections/main → HTTP 403: forbidden" + ) + ), + }) + monkeypatch.setattr(lint_module, "api", stub) + rc = lint_module.run() + assert rc == 0 + err = capsys.readouterr().err + assert "::error::" in err + assert "403" in err