#!/usr/bin/env bash # tools/branch-protection/check_name_parity.sh — assert every required- # check name listed in apply.sh maps to a workflow job whose "always # emits this status" shape is intact. # # Closes #144 / encodes the saved memory # feedback_branch_protection_check_name_parity: # # "Path filters (e.g., detect-changes → conditional skip) silently # break branch protection because no job emits the protected # sentinel status when path-filter returns false." # # Two safe shapes for a required-check job: # # 1. Single-job-with-per-step-if (path-filter case): # The workflow has NO top-level `paths:` filter; the always-running # job has steps gated on `if: needs..outputs. == 'true'` # so the no-op step alone fires when paths exclude the commit. # Used by ci.yml's Platform/Canvas/Python/Shellcheck and by # e2e-api.yml / e2e-staging-canvas.yml / runtime-prbuild-compat.yml. # # 2. Aggregator-with-needs+always() (matrix-refactor case): # An aggregator job named after the protected check `needs:` the # matrix children + uses `if: always()` + checks each child's # result. (Not currently in this repo but supported.) # # Unsafe shape this script catches: # - Workflow has top-level `paths:` filter AND the protected check # name is on a single job. When paths-filter excludes a commit, the # workflow doesn't fire — branch protection waits forever. # # Exit codes: # 0 — every required check name has at least one safe-shape match # 1 — a required name has no match OR matches an unsafe shape # 2 — script-internal error (apply.sh missing, awk failure, etc.) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" WORKFLOWS_DIR="$REPO_ROOT/.github/workflows" APPLY_SH="$SCRIPT_DIR/apply.sh" if [[ ! -f "$APPLY_SH" ]]; then echo "check_name_parity: missing apply.sh at $APPLY_SH" >&2 exit 2 fi if [[ ! -d "$WORKFLOWS_DIR" ]]; then echo "check_name_parity: missing .github/workflows at $WORKFLOWS_DIR" >&2 exit 2 fi # ─── Extract the union of required check names from apply.sh ────── # apply.sh has STAGING_CHECKS and MAIN_CHECKS heredocs; union them so # we audit any name that gates EITHER branch. Filters out blank lines # and the heredoc end marker. Sorted + uniq so the audit output is stable. # # Captures the heredoc end-marker dynamically from the `<<'MARKER'` # token on the opening line — the token can be `EOF` (production # apply.sh), `EOF2` (test fixtures with nested heredocs), or any other # bash-legal identifier. Without dynamic extraction, test fixtures # with nested heredocs would either skip-capture (wrong end marker) # or capture the inner end marker as a stray check name. # # Two-step approach to keep awk-portable across BSD awk (macOS) and # gawk (Linux): grep finds the heredoc-opening lines, sed extracts the # marker, then awk does the capture. Pure-awk attempts hit BSD-vs-GNU # regex/variable-init differences that regress silently — this shape # stays in POSIX-portable territory. extract_heredoc_block() { local file="$1" local marker="$2" awk -v marker="$marker" ' $0 ~ "<<.?" marker { capture=1; next } $0 == marker && capture { capture=0; next } capture && NF { print } ' "$file" } # Find every heredoc-end marker used in apply.sh (typically just EOF # in the production script, but EOF2 / TAG / ABC are all valid in # fixtures or future expansions). Each marker maps to one or more # heredoc blocks; we union all of them. markers=$(grep -E "<<['\"]?[A-Za-z0-9_]+['\"]?[[:space:]]*\\|\\|" "$APPLY_SH" \ | sed -E "s/.*<<['\"]?([A-Za-z0-9_]+)['\"]?.*/\\1/" \ | sort -u) required_names="" while IFS= read -r marker; do [[ -z "$marker" ]] && continue block=$(extract_heredoc_block "$APPLY_SH" "$marker") if [[ -n "$block" ]]; then required_names+="$block"$'\n' fi done <<< "$markers" required_names=$(printf '%s' "$required_names" | sort -u | sed '/^$/d') if [[ -z "$required_names" ]]; then echo "check_name_parity: failed to extract required check names from apply.sh" >&2 exit 2 fi # ─── For each required name, find the workflow file that owns it ── # A workflow "owns" a name if any `name:` line in the file equals the # required name. We look at job-level names AND the workflow-level # `name:` (the latter prefixes "Analyze" jobs in codeql.yml). # # Then we check whether the owning workflow has a top-level `paths:` # filter. The unsafe shape is: # - top-level paths: filter present # - AND the named job is gated only at the workflow level (no per- # step `if:` gates) # # Distinguishing "no `paths:` filter" from "paths: filter + per-step # gating" requires parsing the YAML semantics. We do it heuristically: # # - "no top-level paths:" → safe by construction (workflow always # fires) # - "paths: present" → check that the matching job has at # least one `if: needs..outputs` # step gate. If yes, that's the # single-job-with-per-step-if shape. # If no, flag as unsafe. # # Heuristic so it stays a portable bash + awk + grep tool — full YAML # parsing would need yq which isn't a dependency. The known unsafe # shape (workflow-level paths: AND no per-step if-gates) is what we're # trying to catch. failed=0 declare -a unsafe_findings=() while IFS= read -r name; do [[ -z "$name" ]] && continue # Find every workflow file that contains a job with `name: ` or # whose top-level workflow `name:` plus matrix substitution would # produce . Need to be careful about quoting — YAML allows # `name: Foo`, `name: "Foo"`, `name: 'Foo'`. Strip quotes. matches=() while IFS= read -r f; do # Look for an exact `name:` match (anywhere in the file). The # workflow-level name line is at column 0; job-level names are # indented. Either is acceptable for parity — what matters is # whether the EMITTED check-run name is the one we required. # Strip surrounding quotes/whitespace before comparing. if awk -v want="$name" ' /^[[:space:]]*name:[[:space:]]*/ { line = $0 sub(/^[[:space:]]*name:[[:space:]]*/, "", line) # Strip surrounding " or '\'' gsub(/^["\047]|["\047]$/, "", line) # Strip trailing whitespace + comment sub(/[[:space:]]*#.*$/, "", line) sub(/[[:space:]]+$/, "", line) if (line == want) found = 1 } END { exit !found } ' "$f"; then matches+=("$f") fi done < <(find "$WORKFLOWS_DIR" -name '*.yml' -o -name '*.yaml') if [[ ${#matches[@]} -eq 0 ]]; then # Special case — Analyze (go/javascript-typescript/python) is # generated by codeql.yml's matrix expansion of `Analyze (${{ # matrix.language }})`. Don't flag those as missing if codeql.yml # exists with the expected base name. case "$name" in "Analyze (go)"|"Analyze (javascript-typescript)"|"Analyze (python)") # shellcheck disable=SC2016 # The literal `${{ matrix.language }}` is the GHA template # syntax we're searching FOR — not a shell expansion. SC2016 # would have us add quotes that defeat the search. if [[ -f "$WORKFLOWS_DIR/codeql.yml" ]] && \ grep -q 'name: Analyze (${{[[:space:]]*matrix.language[[:space:]]*}})' "$WORKFLOWS_DIR/codeql.yml"; then matches=("$WORKFLOWS_DIR/codeql.yml") fi ;; esac fi if [[ ${#matches[@]} -eq 0 ]]; then unsafe_findings+=("MISSING: required check name '$name' has no matching workflow job") failed=1 continue fi # For each owning workflow, classify safe vs unsafe. for f in "${matches[@]}"; do rel="${f#"$REPO_ROOT"/}" # Heuristic: does the workflow have a top-level `paths:` filter? # Top-level here means under the `on:` key, not under jobs..if. # Workflow-level paths filters appear at indent depth 4 (under # `push:` or `pull_request:`). Job-level `if:` paths-filter doesn't # block the workflow from firing. has_top_paths=0 if awk ' # Track whether we are inside the `on:` block. The `on:` block # starts at column 0 (`on:` key) and ends when the next column-0 # key appears. /^on:[[:space:]]*$/ { in_on = 1; next } /^[a-zA-Z]/ && in_on { in_on = 0 } in_on && /^[[:space:]]+paths:[[:space:]]*$/ { print "yes"; exit } in_on && /^[[:space:]]+paths:[[:space:]]*\[/ { print "yes"; exit } ' "$f" | grep -q yes; then has_top_paths=1 fi if [[ "$has_top_paths" -eq 0 ]]; then # Safe: workflow always fires. If there are inner per-step if- # gates (single-job-with-per-step-if pattern), the no-op step # produces SUCCESS for the protected name — branch-protection-clean. continue fi # Unsafe candidate — has top-level paths: AND we need to verify # the per-step if-gate pattern is absent. Look for any `if:` # referencing a paths-filter / detect-changes output inside the # owning job's body. If at least one is present, classify as the # single-job-with-per-step-if pattern (safe). # # The regex is intentionally anchored loosely — actual workflow # YAML writes per-step if-gates as ` - if: needs.X.outputs.Y` # (with the `-` step-marker between the leading spaces and the # `if`). Anchoring on `^[[:space:]]+if:` would miss those. if grep -qE "if:[[:space:]]+needs\.[a-zA-Z_-]+\.outputs\." "$f"; then # Per-step if-gates exist. Combined with top-level paths: this # would be a buggy mix (the workflow might still skip entirely # when paths exclude). Flag as unsafe — the safe pattern omits # the top-level paths: filter altogether and gates per-step. unsafe_findings+=("UNSAFE-MIX: $rel has top-level paths: AND per-step if-gates — when paths exclude the commit, the workflow doesn't fire and the required check '$name' is silently absent. Drop the top-level paths: filter; keep the per-step if-gates.") failed=1 else # Top-level paths: with no per-step if-gates: the canonical # check-name parity bug. unsafe_findings+=("UNSAFE-PATH-FILTER: $rel has top-level paths: filter and no per-step if-gates. When paths exclude the commit, no job emits the required check '$name' — branch protection waits forever. Either drop the paths: filter and add per-step if-gates against a detect-changes output, or add an aggregator-with-needs+always() job that emits '$name'.") failed=1 fi done done <<< "$required_names" if [[ "$failed" -eq 0 ]]; then echo "check_name_parity: OK — every required check name maps to a safe workflow shape." exit 0 fi echo "check_name_parity: FOUND $((${#unsafe_findings[@]})) issue(s):" >&2 for finding in "${unsafe_findings[@]}"; do echo " - $finding" >&2 done exit 1