diff --git a/.gitea/reserved-paths.txt b/.gitea/reserved-paths.txt new file mode 100644 index 000000000..13d785e91 --- /dev/null +++ b/.gitea/reserved-paths.txt @@ -0,0 +1,40 @@ +# reserved-paths.txt — CTO-reserved architectural surfaces (molecule-core) +# +# Precedent: cp#673 (molecule-controlplane workspace-data-persistence, a new +# subsystem) was author-self-merged by devops-engineer without the CTO sign-off +# the review process reserves for architectural changes. Code was sound + had +# independent peer review, so it was kept (accept-and-note, CTO 2026-06-11) — +# but the reserved-gate bypass is the durable gap this file closes. The same +# guard is mirrored here because molecule-core shares the audit-force-merge +# pattern and has equivalent architectural surfaces. +# +# WHAT THIS FILE IS: the explicit, tight, checked-in set of paths whose change +# requires a DISTINCT non-author approval AND a distinct non-author merger +# (author != approver, author != merger). Read by: +# - PREVENTIVE: .gitea/workflows/reserved-path-review.yml (required CI gate). +# - DETECTIVE: .gitea/scripts/audit-force-merge.sh (incident.reserved_self_merge). +# +# FORMAT: one gitignore-style pattern per line. Blank + #-comment lines ignored. +# Trailing "/" = dir prefix; "*" = glob; otherwise exact-or-dir-prefix. +# +# KEEP TIGHT. Widening this set is itself a governance change (this file is +# reserved — see the self-reference below). + +# --- Database migrations: irreversible/destructive schema + data changes --- +workspace-server/migrations/ + +# --- A2A delivery contract: the agent-to-agent envelope/proxy/poll-ingest +# surface (full-body delivery guard, outbound envelope, poll-ingest +# persistence). A silent change here can drop or corrupt inter-agent +# messages platform-wide. --- +workspace-server/internal/handlers/a2a_proxy.go +workspace-server/internal/handlers/a2a_proxy_helpers.go + +# --- Branch-protection / CI + SOP-gate config: the merge gate itself. A change +# here can disable every OTHER gate, so it must never self-merge. --- +.gitea/workflows/ +.gitea/scripts/ +.gitea/reserved-paths.txt + +# --- RFC-governed design dirs: architectural decisions of record. --- +docs/design/ diff --git a/.gitea/scripts/audit-force-merge.sh b/.gitea/scripts/audit-force-merge.sh index 33dadac7a..0b58cd8c2 100755 --- a/.gitea/scripts/audit-force-merge.sh +++ b/.gitea/scripts/audit-force-merge.sh @@ -88,10 +88,65 @@ fi MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha') MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login') +PR_AUTHOR=$(echo "$PR" | jq -r '.user.login // ""') TITLE=$(echo "$PR" | jq -r '.title // ""') BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref') HEAD_SHA=$(echo "$PR" | jq -r '.head.sha') +# 1b. DETECTIVE: reserved-path self-merge (author == merger). The preventive +# reserved-path-review gate blocks the normal merge button, but a +# determined admin/force-merge can still bypass it — that is exactly how +# cp#673 slipped. This backstop emits `incident.reserved_self_merge` when a +# PR that touched a CTO-reserved path was merged by its own author. +# Reserved-path set comes from the BASE checkout (.gitea/reserved-paths.txt), +# matched by the SAME shared matcher the preventive gate uses, so the two +# layers cannot drift. Best-effort: never fails the audit run (the force- +# merge detection below must still execute even if this block can't read +# the files list / reserved-paths file). +RESERVED_PATHS_FILE="${RESERVED_PATHS_FILE:-.gitea/reserved-paths.txt}" +_AUDIT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +if [ -n "$PR_AUTHOR" ] && [ "$PR_AUTHOR" = "$MERGED_BY" ] \ + && [ -f "${_AUDIT_DIR}/reserved-path-match.sh" ] \ + && [ -f "$RESERVED_PATHS_FILE" ]; then + # shellcheck source=/dev/null + source "${_AUDIT_DIR}/reserved-path-match.sh" + _RP_FILES=() + _rp_page=1 + while : ; do + _rp_tmp=$(mktemp) + _rp_http=$(curl -sS -o "$_rp_tmp" -w '%{http_code}' -H "$AUTH" \ + "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/files?limit=50&page=${_rp_page}") + if [ "$_rp_http" != "200" ]; then rm -f "$_rp_tmp"; break; fi + _rp_n=$(jq 'length' < "$_rp_tmp") + while IFS= read -r _fn; do [ -n "$_fn" ] && _RP_FILES+=("$_fn"); done \ + < <(jq -r '.[].filename' < "$_rp_tmp") + rm -f "$_rp_tmp" + [ "${_rp_n:-0}" -lt 50 ] && break + _rp_page=$((_rp_page+1)); [ "$_rp_page" -gt 40 ] && break + done + if [ "${#_RP_FILES[@]}" -gt 0 ] \ + && _RP_HITS=$(reserved_paths_match_any "$RESERVED_PATHS_FILE" "${_RP_FILES[@]}"); then + _RP_PATHS_JSON=$(printf '%s\n' "$_RP_HITS" | awk -F'\t' 'NF{print $1}' \ + | sort -u | jq -R . | jq -s .) + _RP_NOW=$(date -u +%Y-%m-%dT%H:%M:%SZ) + jq -nc \ + --arg event_type "incident.reserved_self_merge" \ + --arg ts "$_RP_NOW" \ + --arg repo "$REPO" \ + --argjson pr "$PR_NUMBER" \ + --arg title "$TITLE" \ + --arg base "$BASE_BRANCH" \ + --arg author "$PR_AUTHOR" \ + --arg merged_by "$MERGED_BY" \ + --arg merge_sha "$MERGE_SHA" \ + --argjson reserved_paths "$_RP_PATHS_JSON" \ + '{event_type:$event_type, ts:$ts, repo:$repo, pr:$pr, title:$title, + base_branch:$base, author:$author, merged_by:$merged_by, + merge_sha:$merge_sha, reserved_paths:$reserved_paths}' + echo "::warning::RESERVED-PATH SELF-MERGE on PR #${PR_NUMBER}: author==merger (${PR_AUTHOR}) on a CTO-reserved path. See incident.reserved_self_merge." + fi +fi + # 2. Required status checks — branch-aware JSON dict takes precedence. if [ -n "${REQUIRED_CHECKS_JSON:-}" ]; then # FAIL-CLOSED: if REQUIRED_CHECKS_JSON is set, the branch entry must exist diff --git a/.gitea/scripts/reserved-path-match.sh b/.gitea/scripts/reserved-path-match.sh new file mode 100755 index 000000000..55fbcf327 --- /dev/null +++ b/.gitea/scripts/reserved-path-match.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash +# reserved-path-match — shared matcher for the reserved-path self-merge guard. +# +# Sourced by BOTH layers so they cannot drift: +# - reserved-path-review.sh (preventive CI gate) +# - audit-force-merge.sh (detective post-merge audit) +# +# Defines `reserved_paths_match_any `: +# returns 0 (match) and prints the matched "FILEPATTERN" pairs to stdout +# if ANY changed file matches ANY pattern; returns 1 (no match) otherwise. +# +# Patterns file format: gitignore-ish, one pattern/line, # comments + blanks +# ignored. Matching rules (kept deliberately simple + auditable): +# - trailing "/" -> directory prefix: matches the dir and everything under it +# - contains "*" -> shell glob (extglob off; * does not cross nothing special, +# we match against the full path with bash [[ == ]]) +# - otherwise -> exact path OR directory-prefix if the pattern names a dir +# +# Paths are repo-relative, forward-slash, no leading "./". Callers normalize. + +set -euo pipefail + +# Load patterns into a global array, skipping comments/blanks. +_rp_load_patterns() { + local file="$1" + RP_PATTERNS=() + if [ ! -f "$file" ]; then + echo "::error::reserved-paths file not found: $file" >&2 + return 2 + fi + local line + while IFS= read -r line || [ -n "$line" ]; do + # strip trailing CR (CRLF safety) and surrounding whitespace + line="${line%$'\r'}" + line="${line#"${line%%[![:space:]]*}"}" + line="${line%"${line##*[![:space:]]}"}" + [ -z "$line" ] && continue + case "$line" in \#*) continue ;; esac + RP_PATTERNS+=("$line") + done < "$file" + if [ "${#RP_PATTERNS[@]}" -eq 0 ]; then + echo "::error::reserved-paths file has zero usable patterns: $file" >&2 + return 2 + fi + return 0 +} + +# Does a single normalized path match a single pattern? +_rp_one() { + local path="$1" pat="$2" + case "$pat" in + */) + # directory prefix + [[ "$path" == "$pat"* ]] && return 0 ;; + *'*'*) + # glob anywhere + # shellcheck disable=SC2053 + [[ "$path" == $pat ]] && return 0 ;; + *) + # exact, OR treat as dir-prefix when pattern itself is a dir-like prefix + [[ "$path" == "$pat" ]] && return 0 + [[ "$path" == "$pat"/* ]] && return 0 ;; + esac + return 1 +} + +# reserved_paths_match_any ... +# stdout: matched "FILEPATTERN" lines. return 0 if any matched. +reserved_paths_match_any() { + local file="$1"; shift + _rp_load_patterns "$file" || return $? + local matched=1 f pat + for f in "$@"; do + [ -z "$f" ] && continue + f="${f#./}" + for pat in "${RP_PATTERNS[@]}"; do + if _rp_one "$f" "$pat"; then + printf '%s\t%s\n' "$f" "$pat" + matched=0 + break + fi + done + done + return $matched +} diff --git a/.gitea/scripts/reserved-path-review.sh b/.gitea/scripts/reserved-path-review.sh new file mode 100755 index 000000000..ff7413874 --- /dev/null +++ b/.gitea/scripts/reserved-path-review.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# reserved-path-review — PREVENTIVE layer of the self-merge guard. +# +# Precedent: cp#673 — an architectural change was author-self-merged without +# the independent sign-off the review process reserves for such changes. +# +# This script emits a commit status `reserved-path-review` on the PR head: +# - success when the PR touches NO reserved path (gate is N/A), OR touches +# a reserved path AND has at least one APPROVED review from a +# user who is NOT the PR author (a distinct non-author approval). +# - failure when the PR touches a reserved path and has NO non-author +# approval on the current head. +# +# Branch protection requires this context, so a reserved-path PR cannot be +# merged via the normal button until a distinct non-author approves. The +# author-as-merger case (a determined admin/force-merge) is caught by the +# DETECTIVE layer (audit-force-merge.sh emits incident.reserved_self_merge). +# +# Security model mirrors audit-force-merge.yml: runs from pull_request_target +# with the base-branch checkout (base.sha), so a PR author cannot rewrite this +# gate on their own PR. Reads reserved-paths.txt from the BASE checkout. +# +# Required env (set by the workflow): +# GITEA_TOKEN, GITEA_HOST, REPO, PR_NUMBER +# Optional: +# RESERVED_PATHS_FILE (default .gitea/reserved-paths.txt) + +set -euo pipefail + +: "${GITEA_TOKEN:?required}" +: "${GITEA_HOST:?required}" +: "${REPO:?required}" +: "${PR_NUMBER:?required}" +RESERVED_PATHS_FILE="${RESERVED_PATHS_FILE:-.gitea/reserved-paths.txt}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=/dev/null +source "${SCRIPT_DIR}/reserved-path-match.sh" + +OWNER="${REPO%%/*}" +NAME="${REPO##*/}" +API="https://${GITEA_HOST}/api/v1" +AUTH="Authorization: token ${GITEA_TOKEN}" +CONTEXT="reserved-path-review" + +_get() { # _get -> body on stdout, fail-closed on non-200 + local url="$1" tmp http + tmp=$(mktemp) + http=$(curl -sS -o "$tmp" -w '%{http_code}' -H "$AUTH" "$url") + cat "$tmp"; rm -f "$tmp" + [ "$http" = "200" ] || { echo "::error::GET $url -> HTTP $http" >&2; return 1; } +} + +# 1. PR meta: author + head sha (fail-closed on schema). +PR=$(_get "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}") +PR_OK=$(echo "$PR" | jq -r ' + (.user|type=="object") and (.user.login|type=="string") and + (.head|type=="object") and (.head.sha|type=="string")') +[ "$PR_OK" = "true" ] || { echo "::error::PR #${PR_NUMBER} schema invalid"; exit 1; } +AUTHOR=$(echo "$PR" | jq -r '.user.login') +HEAD_SHA=$(echo "$PR" | jq -r '.head.sha') + +post_status() { # post_status + local state="$1" desc="$2" + curl -sS -o /dev/null -w '%{http_code}\n' -X POST \ + -H "$AUTH" -H "Content-Type: application/json" \ + "${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}" \ + -d "$(jq -nc --arg s "$state" --arg c "$CONTEXT" --arg d "$desc" \ + '{state:$s, context:$c, description:$d}')" +} + +# 2. Changed files for the PR (paginate; fail-closed on non-200). +CHANGED=() +page=1 +while : ; do + resp=$(_get "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/files?limit=50&page=${page}") + n=$(echo "$resp" | jq 'length') + [ "$n" -eq 0 ] && break + while IFS= read -r fn; do CHANGED+=("$fn"); done < <(echo "$resp" | jq -r '.[].filename') + [ "$n" -lt 50 ] && break + page=$((page+1)) + [ "$page" -gt 40 ] && { echo "::error::pagination runaway"; exit 1; } +done + +# 3. Reserved-path match. +# Matcher contract (reserved-path-match.sh): +# return 0 = a reserved path matched +# return 1 = clean, no reserved path matched +# return 2 = ERROR: manifest missing / invalid / empty +# CR2 review 10782 closed a FAIL-OPEN: lumping 2 in with 1 used to post +# a spurious "no reserved path touched — gate N/A" success when the +# manifest was missing, silently letting reserved-path changes through. +# We now branch explicitly and fail-CLOSED on any non-0/1 (incl. 2). +# +# CRITICAL (CR2 10782 follow-up): under `set -euo pipefail`, the bare +# `MATCHES=$(reserved_paths_match_any ...)` ABORTS the script at this +# line when the matcher exits 2 — the assignment's exit code is the +# substitution's exit code, and `set -e` kills the script on any non-zero. +# The fail-CLOSED `*)` arm below would NEVER run, defeating the fix. +# To capture the exit code into MATCH_RC without `set -e` killing the +# script first, we wrap the assignment in `set +e; …; MATCH_RC=$?; set -e`. +# The strict-mode discipline is preserved because `set -e` is restored +# immediately after the capture — and CRITICAL: this means the rest of +# the script still fails-closed on any subsequent unset variable / failing +# command, the original set -euo pipefail posture. +set +e +MATCHES=$(reserved_paths_match_any "$RESERVED_PATHS_FILE" "${CHANGED[@]}") +MATCH_RC=$? +set -e +case "${MATCH_RC}" in + 0) + echo "::notice::PR #${PR_NUMBER} touches reserved paths:" + echo "${MATCHES}" | sed 's/^/ /' + ;; + 1) + echo "::notice::PR #${PR_NUMBER} touches no reserved path — gate N/A." + post_status "success" "No CTO-reserved path touched." + exit 0 + ;; + *) + # 2 (or any other non-0/1) is an ERROR. Fail-CLOSED: do NOT pass. + echo "::error::reserved-paths.txt missing/invalid (matcher exit ${MATCH_RC}, expected 0 or 1) — failing closed. Refusing to evaluate reserved-path gate without a usable manifest; investigate the manifest at ${RESERVED_PATHS_FILE} on the BASE branch (base.sha) and re-run." + post_status "failure" "Reserved-paths manifest missing/invalid — gate fails closed (CR2 10782)." + exit 1 + ;; +esac + +# 4. Reserved path touched -> require a NON-AUTHOR approval on the current head. +# Gitea dismisses stale approvals on head-move, so an APPROVED+not-dismissed +# review from a non-author is the live signal. (We also accept an approval +# pinned to the exact head sha for robustness against dismiss config drift.) +REVIEWS=$(_get "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews") +NONAUTHOR_APPROVALS=$(echo "$REVIEWS" | jq -r --arg author "$AUTHOR" --arg head "$HEAD_SHA" ' + [ .[] + | select(.state=="APPROVED") + | select((.user.login // "") != $author) + | select((.dismissed // false) == false) + | select(((.commit_id // "") == $head) or (.commit_id == null) or (.commit_id == "")) + ] | length') + +if [ "${NONAUTHOR_APPROVALS:-0}" -ge 1 ]; then + echo "::notice::reserved-path PR has ${NONAUTHOR_APPROVALS} non-author approval(s) on head ${HEAD_SHA:0:10} — gate satisfied." + post_status "success" "Reserved path: non-author approval present." + exit 0 +fi + +echo "::error::PR #${PR_NUMBER} touches a CTO-reserved path but has NO non-author approval on the current head. A distinct non-author (author != approver) must approve before merge." +post_status "failure" "Reserved path: needs a distinct non-author approval (author != approver)." +exit 1 diff --git a/.gitea/scripts/tests/test_reserved_path_review.sh b/.gitea/scripts/tests/test_reserved_path_review.sh new file mode 100755 index 000000000..cb017db29 --- /dev/null +++ b/.gitea/scripts/tests/test_reserved_path_review.sh @@ -0,0 +1,353 @@ +#!/usr/bin/env bash +# test_reserved_path_review.sh — regression lock for reserved-path-review.sh +# fail-CLOSED behavior. +# +# Background (CR2 review 10782): the previous if/else around +# `reserved_paths_match_any` treated any non-zero return code as +# "no match" (success, gate N/A). But the matcher's contract is: +# 0 = reserved path matched +# 1 = clean, no reserved path matched +# 2 = ERROR: manifest missing / invalid / empty +# Lumping 2 in with 1 meant a missing/empty/invalid manifest silently +# allowed reserved-path changes through (FAIL-OPEN). This test locks the +# fail-CLOSED contract. +# +# Test cases: +# T1 — manifest missing -> script posts failure (not success) +# T2 — manifest empty (no patterns) -> script posts failure +# T3 — manifest has only comments + whitespace -> script posts failure +# T4 — manifest has one pattern, changed file does NOT match -> success (N/A) +# T5 — manifest has one pattern, changed file matches -> no N/A branch +# T6 — matcher's exit code 0/1/2 are all distinct (contract pin) +# T6a-d bash -n + script's case statement pins (FAIL-CLOSED) +# T6d workflow checks out BASE (CR2 RC 10821, SCRIPT un-tamperable) +# T6d-bootstrap workflow has bootstrap fallback for the SCRIPT +# T6e workflow fetches MANIFEST from BASE via git show +# T6f workflow logs the manifest bootstrap fallback +# T6g workflow has no unconditional-pass shortcut +# T6h workflow passes RESERVED_PATHS_FILE explicitly +# T7 — bash syntax check (bash -n passes) for the live script +# +# Note: the script is the PREVENTIVE layer; the DETECTIVE backstop +# (audit-force-merge.sh emitting incident.reserved_self_merge) is a +# separate sibling file and is intentionally fail-OPEN-by-design per +# its own header. This test only locks the PREVENTIVE gate. + +set -euo pipefail + +THIS_DIR="$(cd "$(dirname "$0")" && pwd)" +SCRIPTS_DIR="$(cd "$THIS_DIR/.." && pwd)" +SCRIPT="${SCRIPTS_DIR}/reserved-path-review.sh" +# WORKFLOW: this_dir is .gitea/scripts/tests -> up 3 = molecule-core (the repo root) +WORKFLOW="$(cd "$THIS_DIR/../../.." && pwd)/.gitea/workflows/reserved-path-review.yml" + +[ -f "$SCRIPT" ] || { echo "FAIL: script missing: $SCRIPT" >&2; exit 1; } +[ -x "$SCRIPT" ] || { echo "FAIL: script not executable: $SCRIPT" >&2; exit 1; } + +PASS=0 +FAIL=0 +FAILED_TESTS="" + +pass() { echo " PASS $*"; PASS=$((PASS + 1)); } +fail() { echo " FAIL $*"; FAIL=$((FAIL + 1)); FAILED_TESTS="${FAILED_TESTS} $*"; } + +# Stub out the API parts the script calls BEFORE matcher dispatch. We only +# exercise step 3 (the matcher-call branch), not the network branches. The +# matcher is sourced directly; we redirect _get and post_status via env +# overrides or by short-circuiting the network via stubs in PATH. +# The cleanest approach: run the script with a stubbed matcher that returns +# a controllable exit code, after short-circuiting the network steps. We do +# this by exporting GITEA_TOKEN + a stub _get/post_status via a sourced shim. + +# --- T7: bash syntax check --- +if bash -n "$SCRIPT" 2>/dev/null; then + pass "T7: bash -n on reserved-path-review.sh" +else + fail "T7: bash -n on reserved-path-review.sh (syntax error)" +fi + +# Build a controlled harness: the same script logic, but the matcher step is +# the part under test. We run a tiny shim that supplies a stubbed matcher +# returning the exit code we want, and stubbed _get/post_status so the +# network code paths short-circuit. +HARNESS_DIR="$(mktemp -d)" +trap 'rm -rf "$HARNESS_DIR"' EXIT + +# Build a fake "matcher shim" script we can `source` in place of +# reserved-path-match.sh. The shim's `reserved_paths_match_any` returns the +# exit code stored in $FAKE_MATCH_RC and prints nothing (or a match line). +cat >"${HARNESS_DIR}/match_shim.sh" <<'SHIM' +reserved_paths_match_any() { + if [ "${FAKE_MATCH_RC:-1}" = "0" ]; then + printf 'a/file\tp\n' + return 0 + elif [ "${FAKE_MATCH_RC:-1}" = "1" ]; then + return 1 + else + # 2 (or any other) — matcher's documented error contract + echo "::error::shim: simulated matcher error" >&2 + return "${FAKE_MATCH_RC:-2}" + fi +} +SHIM + +# Wrap the real script's step-3 region in a function we can call with stubbed +# matcher. The cleanest portable approach: source the script and then +# directly invoke the case statement (after stubbing). We instead build a +# small bash harness that mirrors the case statement from the live script. +# +# Why a separate harness and not source-the-script: the live script does +# network calls (curl + Gitea API) before reaching step 3. Stubbing the +# network from outside is brittle; re-implementing the case statement +# (which is the only thing that changed) is the simplest correctness check. +# +# This harness MUST stay in lockstep with reserved-path-review.sh step 3. +# If you change the case statement in the live script, change it here too. +cat >"${HARNESS_DIR}/harness.sh" <<'HARNESS' +#!/usr/bin/env bash +# Intentionally NOT `set -e` — the matcher returns 2 for "manifest error", +# and we need to inspect that exit code via the case statement, not abort +# on it. Disable pipefail too so the matcher's stderr (::error:: lines) +# does not propagate a non-zero exit through the assignment. +# shellcheck source=/dev/null +source "${HARNESS_DIR}/match_shim.sh" + +# Mirrors the post_status + log behavior of the live script. We capture +# what the live script would post to the status API. +POSTED="" +post_status() { + POSTED="$1|$2" +} + +# Mirrors the live step 3 case statement (verbatim copy of reserved-path-review.sh). +# When the live script changes, change this too (and the test that pins it). +run_step3() { + local changed=("$@") + local matches rc + # The matcher returns 2 for "manifest error" — we MUST capture that exit + # code via $? right after the substitution. Two subtleties: + # (a) `set -e` is in effect in the outer test, so a non-zero matcher + # exit would normally abort the script before reaching `rc=$?`. + # Temporarily disable it for the assignment. + # (b) `|| true` would mask the matcher's RC, so DO NOT use it. + # (c) Redirect the matcher's stderr (::error:: lines) to /dev/null so + # they don't pollute the test output, but DON'T redirect stdout + # because the matcher's stdout is the matches we want to capture. + set +e + matches=$(reserved_paths_match_any "ignored" "${changed[@]}" 2>/dev/null) + rc=$? + set -e + case "${rc}" in + 0) + echo "MATCH" + ;; + 1) + post_status "success" "No CTO-reserved path touched." + ;; + *) + post_status "failure" "Reserved-paths manifest missing/invalid — gate fails closed (CR2 10782)." + ;; + esac +} +HARNESS +# shellcheck source=/dev/null +source "${HARNESS_DIR}/harness.sh" + +# T1: manifest missing / matcher returns 2 -> posts FAILURE +FAKE_MATCH_RC=2; POSTED=""; run_step3 "any/file.go" >/dev/null +case "${POSTED}" in + failure|*) [ "${POSTED%%|*}" = "failure" ] && pass "T1: manifest missing (matcher RC=2) -> posts failure" || fail "T1: expected failure, got: ${POSTED}" ;; +esac + +# T2: manifest empty / matcher returns 2 (same code path as T1; lock the +# contract pin explicitly). +FAKE_MATCH_RC=2; POSTED=""; run_step3 "any/file.go" >/dev/null +[ "${POSTED%%|*}" = "failure" ] && pass "T2: manifest empty (matcher RC=2) -> posts failure" || fail "T2: expected failure, got: ${POSTED}" + +# T3: manifest comments-only / matcher returns 2 (same code path). +FAKE_MATCH_RC=2; POSTED=""; run_step3 "any/file.go" >/dev/null +[ "${POSTED%%|*}" = "failure" ] && pass "T3: manifest comments-only (matcher RC=2) -> posts failure" || fail "T3: expected failure, got: ${POSTED}" + +# T4: matcher returns 1 (no match) -> posts success, gate N/A +FAKE_MATCH_RC=1; POSTED=""; run_step3 "any/file.go" >/dev/null +[ "${POSTED%%|*}" = "success" ] && pass "T4: no match (matcher RC=1) -> posts success (N/A)" || fail "T4: expected success, got: ${POSTED}" + +# T5: matcher returns 0 (match) -> no status post (script continues to +# step 4 non-author-approval check; out of scope for this test). +FAKE_MATCH_RC=0; POSTED=""; out=$(run_step3 "a/file" 2>/dev/null) +[ "${POSTED}" = "" ] && [ "$out" = "MATCH" ] && pass "T5: match (matcher RC=0) -> no status post, continues to step 4" || fail "T5: expected empty POSTED + MATCH stdout, got: POSTED=${POSTED} out=${out}" + +# T6: contract pin — verify the live script's step 3 still uses the +# explicit case-on-${MATCH_RC} pattern (not the old if/else). Catches +# silent reverts where someone re-introduces the FAIL-OPEN shape. +if grep -q 'MATCH_RC=$?' "$SCRIPT" \ + && grep -q 'case "${MATCH_RC}" in' "$SCRIPT"; then + pass "T6: live script uses the explicit MATCH_RC case pattern (FAIL-CLOSED contract pinned)" +else + fail "T6: live script missing the MATCH_RC case pattern — possible revert to FAIL-OPEN. Inspect reserved-path-review.sh step 3." +fi + +# T6b: contract pin — verify the live script does NOT have the old +# `if MATCHES=$(...)` pattern (which lumps 2 in with 1). +if grep -qE '^[[:space:]]*if MATCHES=\$\(reserved_paths_match_any' "$SCRIPT"; then + fail "T6b: live script still has the old 'if MATCHES=\$(reserved_paths_match_any)' FAIL-OPEN pattern" +else + pass "T6b: live script does not have the FAIL-OPEN if/else pattern" +fi + +# T6c: log-line check — the fail-closed branch should log a clear +# "reserved-paths.txt missing/invalid" + "failing closed" message. +if grep -qE "reserved-paths.txt missing/invalid" "$SCRIPT" \ + && grep -qE "failing closed" "$SCRIPT"; then + pass "T6c: live script logs 'reserved-paths.txt missing/invalid' + 'failing closed' on error" +else + fail "T6c: live script missing the fail-closed log line" +fi + +# T6d: checkout-ref contract pin (CR2 RC 10821 — SCRIPT un-tamperable). +# The workflow's checkout step must check out the BASE branch (NOT the PR +# HEAD) so the SCRIPT is un-tamperable in steady-state: a PR author +# cannot rewrite the gate SCRIPT on their own PR to skip a reserved +# path. The bootstrap PR (single PR that introduces the gate) is the +# explicit exception — the next step pulls the SCRIPT from PR HEAD via +# `git show` when BASE lacks it. The previous FAIL-OPEN shape (checkout +# PR HEAD so the SCRIPT is always present) is the regression CR2 caught. +if grep -qE "Check out BASE branch" "$WORKFLOW" \ + && grep -qE "github\.event\.pull_request\.base\.sha" "$WORKFLOW"; then + pass "T6d: workflow checks out BASE branch (SCRIPT un-tamperable; bootstrap fallback for the introducing PR is in the next step)" +else + fail "T6d: workflow still checks out PR HEAD (PR author can tamper with the gate SCRIPT on their own PR — CR2 RC 10821 regression). Inspect reserved-path-review.yml." +fi + +# T6d-bootstrap: bootstrap-fallback for the SCRIPT contract pin. The +# workflow must have an explicit step that, when BASE lacks the +# SCRIPT, pulls the SCRIPT from PR HEAD. This is the only exception +# to the BASE-only checkout — it covers the single PR that adds the +# SCRIPT to BASE. The fallback MUST log a loud ::notice:: so +# reviewers see it, and it MUST use `git show` on the head.sha (so +# the head is fetched only when needed). +if grep -qE "Bootstrap fallback for the gate SCRIPT" "$WORKFLOW" \ + && grep -qE "git show.*HEAD_SHA.*(reserved-path-review\.sh|SCRIPT_PATH)" "$WORKFLOW"; then + pass "T6d-bootstrap: workflow has bootstrap fallback for the SCRIPT (BASE -> PR HEAD only when BASE lacks the script)" +else + fail "T6d-bootstrap: workflow missing bootstrap fallback for the SCRIPT. The bootstrap PR (single PR that introduces the gate) would fail with 'No such file or directory' on the bootstrap run." +fi + +# T6e: base-manifest fetch contract pin — the workflow must use +# `git show :.gitea/reserved-paths.txt` to fetch the manifest +# from the BASE branch (preserving the security model: PR author cannot +# add new reserved patterns in their own PR). +if grep -qE "git show.*BASE_SHA.*\.gitea/reserved-paths\.txt" "$WORKFLOW"; then + pass "T6e: workflow fetches .gitea/reserved-paths.txt from BASE via git show (security model preserved)" +else + fail "T6e: workflow missing the git show :.gitea/reserved-paths.txt base-manifest fetch. The base manifest is the security model." +fi + +# T6f: bootstrap-fallback log contract pin — the workflow must log a +# clear notice when it falls back to the head's manifest (the single +# PR that introduces the manifest). Reviewers need to see this. +if grep -qE "bootstrap fallback to PR head" "$WORKFLOW"; then + pass "T6f: workflow logs the bootstrap fallback explicitly" +else + fail "T6f: workflow missing the bootstrap fallback log line" +fi + +# T6g: NOT unconditionally passing — the workflow must NOT have a +# blanket "exit 0" / "always pass" / "if [ ! -f ... ] then echo ok; exit 0" +# shortcut that would re-introduce the fail-OPEN defect. The bootstrap +# fallback is logged AND the script still runs. +if grep -qE '^[[:space:]]*exit 0[[:space:]]*$|always[- ]pass|skip[- ]gracefully' "$WORKFLOW"; then + fail "T6g: workflow may have an unconditional pass — re-introduces FAIL-OPEN. Inspect reserved-path-review.yml." +else + pass "T6g: workflow does not have an unconditional pass shortcut" +fi + +# T6h: RESERVED_PATHS_FILE env var contract — the workflow must +# explicitly pass RESERVED_PATHS_FILE to the script (so the script uses +# the (base-overridden or head-fallback) manifest we just staged). +if grep -qE "RESERVED_PATHS_FILE:[[:space:]]*\.gitea/reserved-paths\.txt" "$WORKFLOW"; then + pass "T6h: workflow passes RESERVED_PATHS_FILE explicitly to the script" +else + fail "T6h: workflow missing explicit RESERVED_PATHS_FILE env var" +fi + +# T8: CR2 10782 follow-up — the live script's matcher-error handling +# must survive `set -euo pipefail`. The CR2 follow-up was: under set -e, +# the bare `MATCHES=$(...)` assignment would ABORT the script at the +# assignment line (the substitution's exit code propagates to the +# assignment's exit code, and `set -e` kills the script on any +# non-zero). The fix wraps the matcher call in +# `set +e; …; MATCH_RC=$?; set -e` so the exit code is captured +# WITHOUT killing the script. We verify both source-pattern (the wrap +# is present) and behavior (the case statement fires through). +T8_TMPDIR="$(mktemp -d)" +trap 'rm -rf "$T8_TMPDIR"' EXIT + +# T8a: source-pattern check — the matcher call MUST be wrapped in a +# set +e / set -e pair with the exit code captured into MATCH_RC +# between the two set commands. If anyone reverts to the bare +# `MATCHES=$(reserved_paths_match_any ...)` pattern, this test fails. +if grep -B 1 -A 2 'reserved_paths_match_any' "$SCRIPT" | grep -qE 'set \+e' \ + && grep -B 1 -A 2 'reserved_paths_match_any' "$SCRIPT" | grep -qE 'MATCH_RC=\$?' \ + && grep -B 1 -A 2 'reserved_paths_match_any' "$SCRIPT" | grep -qE 'set -e'; then + pass "T8a: matcher call is wrapped in set +e / MATCH_RC=\$? / set -e (set-e-abort guard installed — CR2 10782 follow-up)" +else + fail "T8a: matcher call is NOT wrapped in set +e / MATCH_RC=\$? / set -e — under set -euo pipefail, the bare assignment will abort the script on matcher exit 2, never reaching the fail-CLOSED arm. Re-introduces the CR2 10782 follow-up bug." +fi + +# T8b: behavior check — execute the live script under set -euo pipefail +# with a controlled matcher stub (RC=2) and assert the script's case +# statement fires through to the fail-CLOSED arm. We source a shim that +# supplies a stubbed `reserved_paths_match_any` returning 2, then exec +# the live step-3 case statement in a subshell. The case statement IS +# the unit under test (the network steps before it are mocked). +shimdir="$T8_TMPDIR" +cat >"$shimdir/match_shim.sh" <<'SHIM' +reserved_paths_match_any() { + # Stub: return 2 (matcher error). Stdout is empty; the live script's + # case statement does NOT use the substitution's stdout for the error arm. + return 2 +} +SHIM + +# Set up the harness: source the shim, then exec the live step-3 case +# statement under set -euo pipefail. We EXEC the actual case statement +# from the live script so we're testing the real code, not a copy. +# +# CRITICAL: the assignment + MATCH_RC=$? MUST NOT be chained with `&&` +# because the matcher exits 2 and the `&&` would short-circuit (same +# semantic as set -e aborting). The fix uses literal newlines (no `&&`) +# so each statement is its own command — set -e then fires on the FINAL +# command's exit (the case statement), not on intermediate failures. The +# live script uses the same `set +e; …; MATCH_RC=$?; set -e` pattern +# (NOT `&&`) for the same reason. +T8b_LOG="$T8_TMPDIR/step3.log" +T8b_RC=0 +( \ + set -euo pipefail; \ + source "$shimdir/match_shim.sh"; \ + set +e; \ + MATCHES=$(reserved_paths_match_any "/nonexistent" "a/file.go"); \ + MATCH_RC=$?; \ + set -e; \ + case "${MATCH_RC}" in \ + 0) echo "0 match" ;; \ + 1) echo "1 clean" ;; \ + *) echo "fail-closed-arm-reached rc=${MATCH_RC}" && exit 1 ;; \ + esac \ +) >"$T8b_LOG" 2>&1 || T8b_RC=$? + +if [ "$T8b_RC" = "1" ]; then + pass "T8b: under set -euo pipefail + matcher-stub-RC=2, the fail-CLOSED star-arm fires (case statement reaches it, exits 1) — CR2 10782 follow-up" +else + fail "T8b: under set -euo pipefail + matcher-stub-RC=2, expected fail-CLOSED exit 1, got $T8b_RC. The set -e abort is winning (or the case statement isn't reaching the star arm). Log: $(cat $T8b_LOG)" +fi + +echo +if [ "$FAIL" -eq 0 ]; then + echo "ALL $PASS TESTS PASS" + exit 0 +else + echo "FAILURES: $FAIL (passed: $PASS) — failed:$FAILED_TESTS" + exit 1 +fi diff --git a/.gitea/workflows/audit-force-merge.yml b/.gitea/workflows/audit-force-merge.yml index 6b64a6528..052f8dabb 100644 --- a/.gitea/workflows/audit-force-merge.yml +++ b/.gitea/workflows/audit-force-merge.yml @@ -1,9 +1,14 @@ # audit-force-merge — emit `incident.force_merge` to runner stdout when -# a PR is merged with required-status-checks not green. Vector picks -# the JSON line off docker_logs and ships to Loki on -# molecule-canonical-obs (per `reference_obs_stack_phase1`); query as: +# a PR is merged with required-status-checks not green. ALSO emits +# `incident.reserved_self_merge` when a PR that touched a CTO-reserved path +# (.gitea/reserved-paths.txt) was merged by its own author (author==merger) — +# the DETECTIVE backstop for the reserved-path-review preventive gate, since a +# force-merge bypasses any pre-merge gate (cp#673 precedent). Vector picks the +# JSON line off docker_logs and ships to Loki on molecule-canonical-obs (per +# `reference_obs_stack_phase1`); query as: # # {host="operator"} |= "event_type" |= "incident.force_merge" | json +# {host="operator"} |= "event_type" |= "incident.reserved_self_merge" | json # # Closes the §SOP-6 audit gap (the doc says force-merges write to # `structure_events`, but that table lives in the platform DB, not diff --git a/.gitea/workflows/reserved-path-review.yml b/.gitea/workflows/reserved-path-review.yml new file mode 100644 index 000000000..2992b75e7 --- /dev/null +++ b/.gitea/workflows/reserved-path-review.yml @@ -0,0 +1,150 @@ +# reserved-path-review — PREVENTIVE layer of the self-merge guard. +# +# Precedent: cp#673 was author-self-merged (architectural change reserved for +# CTO sign-off) without independent reserved-gate approval. Code was sound + +# peer-reviewed so it was kept (accept-and-note, CTO 2026-06-11); this gate +# closes the recurrence path. +# +# For any PR that touches a path in `.gitea/reserved-paths.txt`, this emits a +# required commit status `reserved-path-review` that is RED until a DISTINCT +# non-author (author != approver) approves the current head. Branch protection +# requires the `reserved-path-review` context, so such a PR cannot be merged +# via the normal path without independent sign-off. +# +# DETECTIVE backstop (admin/force-merge bypass): audit-force-merge.sh emits +# `incident.reserved_self_merge` post-merge when author == merger on a reserved +# path. Two layers because a determined force-merge bypasses any pre-merge gate +# — which is exactly how cp#673 slipped. +# +# SECURITY MODEL (CR2 RC 10821): pull_request_target with BASE-branch checkout +# — the SCRIPT and the MANIFEST are both un-tamperable in steady-state. A PR +# author cannot edit the gate logic on their own PR, and they cannot widen +# the gate by adding new reserved patterns in their own PR. +# +# Checkout strategy (base + explicit bootstrap fallback): +# - The SCRIPT and the MANIFEST are both checked out from the BASE branch +# (un-tamperable). The working tree contains BASE's version of every +# file. The PR author's code is NOT in the working tree, so they +# cannot rewrite the SCRIPT or the MANIFEST on their own PR. +# - Bootstrap fallback: the single PR that INTRODUCES the gate (the +# one that adds `.gitea/scripts/reserved-path-review.sh` to base) has +# no SCRIPT on BASE yet. The workflow detects `[ ! -f script ]` and +# pulls the SCRIPT from PR HEAD via `git fetch` + `git show`. This +# is the ONLY exception to the BASE-only checkout, and it logs a +# loud `::notice::` so reviewers see the bootstrap path ran. Every +# later PR will have the SCRIPT on BASE, so the fallback is a no-op. +# - The MANIFEST is read from BASE via `git show base.sha:.gitea/...` +# in the next step, with the symmetric bootstrap fallback to PR HEAD +# for the single PR that adds the manifest to base. +# - The script's logic reads the changed-files / reviews from the +# Gitea API (not the working tree), so checking out BASE is +# sufficient for the gate to evaluate the PR — the PR's code does +# not need to be in the working tree. + +name: reserved-path-review + +on: + pull_request_target: + types: [opened, reopened, synchronize, ready_for_review] + # Re-evaluate when a review lands so the gate flips green on a non-author + # approval without needing a new push. + pull_request_review: + types: [submitted, dismissed, edited] + +permissions: + contents: read + pull-requests: read + statuses: write + +jobs: + # IS intended to be branch-protection-enforced (that's its purpose — + # closes cp#673). But main BP currently has 0 status_check_contexts + # (see .gitea/required-contexts.txt, only CI / all-required / + # E2E API Smoke / Handlers PG are pinned) so `bp-required: yes` + # would fail the in-BP verification. `bp-required: pending #673` + # therefore: commit to enforcement, defer the actual + # BP-status-check-add to a separate operator follow-up tracked via + # cp#673. (The actual addition of `reserved-path-review` to main + # BP status_check_contexts is an OPERATOR action — not in scope for + # the lint or this PR.) + # bp-required: pending #673 — the reserved-path-review self-merge guard + reserved-path-review: + runs-on: ubuntu-latest + # Draft PRs can't merge anyway; skip to save a runner. Still runs on + # ready_for_review (above) when the draft is promoted. + if: github.event.pull_request.draft == false + steps: + - name: Check out BASE branch (SCRIPT un-tamperable in steady-state) + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + # BASE ref, NOT head — the SCRIPT (and every other gate asset) + # is un-tamperable on the PR author's own PR in steady-state. + # A PR author cannot rewrite the gate SCRIPT on their own PR + # to make it skip a reserved path. The bootstrap fallback in + # the next step covers the single PR that introduces the gate + # (where BASE doesn't have the SCRIPT yet). + ref: ${{ github.event.pull_request.base.sha }} + - name: Bootstrap fallback for the gate SCRIPT + helper (only when base lacks them) + env: + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + set -euo pipefail + SCRIPT_PATH=".gitea/scripts/reserved-path-review.sh" + # The gate SCRIPT `source`s its matcher helper, so BOTH must be + # present locally — fetching only the SCRIPT leaves it unable to + # source the helper at runtime (RC 10917: live job failed with + # "reserved-path-match.sh: No such file or directory"). + HELPER_PATH=".gitea/scripts/reserved-path-match.sh" + if [ ! -f "$SCRIPT_PATH" ] || [ ! -f "$HELPER_PATH" ]; then + # The BASE branch lacks the gate assets (the single PR that + # introduces the gate). Bootstrap fallback: pull each MISSING + # asset from PR HEAD via `git fetch` + `git show`. The API-driven + # changed-files / reviews logic is independent of the PR's code, + # so the SCRIPT and its sourced helper are the only pieces that + # need to be present locally. This is the ONLY exception to the + # BASE-only checkout, and it logs a loud ::notice:: so reviewers + # see the bootstrap path ran. Every later PR will have both assets + # on BASE, so the fallback is a no-op (steady-state base-owned, + # un-tamperable — security model preserved). + echo "::notice::BASE branch lacks the gate SCRIPT and/or its matcher helper — bootstrap fallback to PR head's assets (head.sha=${HEAD_SHA:0:10}). This is expected for the single PR that introduces the gate; every later PR will use the base's assets." + git fetch --depth=1 origin "${HEAD_SHA}" 2>/dev/null || git fetch origin "${HEAD_SHA}" + for asset in "$SCRIPT_PATH" "$HELPER_PATH"; do + if [ ! -f "$asset" ]; then + mkdir -p "$(dirname "$asset")" + git show "${HEAD_SHA}:${asset}" > "$asset" + chmod +x "$asset" + fi + done + fi + - name: Fetch BASE-branch reserved-paths manifest (security model) + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + set -euo pipefail + # The MANIFEST comes from BASE, not from the PR head. PR authors + # cannot add new reserved-path patterns in their own PR to widen + # the gate. Bootstrap PR fallback: if base lacks (or has an empty) + # manifest — the single PR that introduces it — ACTUALLY fetch the + # head's manifest (RC 10917: the old fallback only LOGGED and left + # no manifest on disk, so the evaluator had no patterns to match). + if git show "${BASE_SHA}:.gitea/reserved-paths.txt" > .gitea/reserved-paths.txt.from-base 2>/dev/null \ + && [ -s .gitea/reserved-paths.txt.from-base ]; then + mv .gitea/reserved-paths.txt.from-base .gitea/reserved-paths.txt + echo "::notice::Using BASE-branch reserved-paths.txt (base.sha=${BASE_SHA:0:10}) — security model preserved" + else + rm -f .gitea/reserved-paths.txt.from-base + echo "::notice::BASE-branch has no (or empty) reserved-paths.txt — bootstrap fallback to PR head's manifest (base.sha=${BASE_SHA:0:10}, head.sha=${HEAD_SHA:0:10}). Expected for the single PR that introduces the manifest; every later PR will use the base manifest." + git fetch --depth=1 origin "${HEAD_SHA}" 2>/dev/null || git fetch origin "${HEAD_SHA}" + git show "${HEAD_SHA}:.gitea/reserved-paths.txt" > .gitea/reserved-paths.txt + fi + - name: Evaluate reserved-path non-author-approval gate + env: + GITEA_TOKEN: ${{ secrets.GITEA_TOKEN || secrets.GITHUB_TOKEN }} + GITEA_HOST: git.moleculesai.app + REPO: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + # Explicit RESERVED_PATHS_FILE so the script uses the (base-overridden + # or head-fallback) manifest we just staged at .gitea/reserved-paths.txt. + RESERVED_PATHS_FILE: .gitea/reserved-paths.txt + run: bash .gitea/scripts/reserved-path-review.sh