From 6818f01447bd49ea197fae51e7c1919840f14de1 Mon Sep 17 00:00:00 2001 From: "claude-ceo-assistant (Claude Opus 4.7 on Hongming's MacBook)" Date: Fri, 8 May 2026 20:09:35 -0700 Subject: [PATCH 1/3] =?UTF-8?q?ci(audit-force-merge):=20fan=20=C2=A7SOP-6?= =?UTF-8?q?=20force-merge=20audit=20to=20molecule-core?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the canonical workflow shipped on internal#120 + #122. Same shape: pull_request_target on closed, base.sha checkout, structured JSON event to runner stdout that Vector ships to Loki on molecule-canonical-obs. REQUIRED_CHECKS env declares both molecule-core/main protected contexts (sop-tier-check + Secret scan). Mirror against branch protection if either is added/removed. Verified end-to-end on internal: synthetic force-merge of internal#123 emitted incident.force_merge with all expected fields, indexable in Loki via {host="molecule-canonical-1"} |= "incident.force_merge". Tier: low (CI workflow, no platform code path). --- .gitea/scripts/audit-force-merge.sh | 118 +++++++++++++++++++++++++ .gitea/workflows/audit-force-merge.yml | 58 ++++++++++++ 2 files changed, 176 insertions(+) create mode 100755 .gitea/scripts/audit-force-merge.sh create mode 100644 .gitea/workflows/audit-force-merge.yml diff --git a/.gitea/scripts/audit-force-merge.sh b/.gitea/scripts/audit-force-merge.sh new file mode 100755 index 00000000..d2c34fe3 --- /dev/null +++ b/.gitea/scripts/audit-force-merge.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +# audit-force-merge — detect a §SOP-6 force-merge after PR close, emit +# `incident.force_merge` to stdout as structured JSON. +# +# Vector's docker_logs source picks up runner stdout; the JSON gets +# shipped to Loki on molecule-canonical-obs, indexable by event_type. +# Query example: +# +# {host="operator"} |= "event_type" |= "incident.force_merge" | json +# +# A force-merge is detected when a PR closed-with-merged=true had at +# least one of the repo's required-status-check contexts in a state +# other than "success" at the merge commit's SHA. That's exactly what +# the Gitea force_merge:true API call lets through, so it's a faithful +# detector of the override path. +# +# Triggers on `pull_request_target: closed` (loaded from base branch +# per §SOP-6 security model). No-op when merged=false. +# +# Required env (set by the workflow): +# GITEA_TOKEN, GITEA_HOST, REPO, PR_NUMBER, REQUIRED_CHECKS +# +# REQUIRED_CHECKS is a newline-separated list of status-check context +# names that branch protection requires. Declared in the workflow YAML +# rather than fetched from /branch_protections (which needs admin +# scope — sop-tier-bot has read-only). Trade dynamism for simplicity: +# when the required-check set changes, update both branch protection +# AND this env. Keeping them in sync is less complexity than granting +# the audit bot admin perms on every repo. + +set -euo pipefail + +: "${GITEA_TOKEN:?required}" +: "${GITEA_HOST:?required}" +: "${REPO:?required}" +: "${PR_NUMBER:?required}" +: "${REQUIRED_CHECKS:?required (newline-separated context names)}" + +OWNER="${REPO%%/*}" +NAME="${REPO##*/}" +API="https://${GITEA_HOST}/api/v1" +AUTH="Authorization: token ${GITEA_TOKEN}" + +# 1. Fetch the PR. If not merged, no-op. +PR=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}") +MERGED=$(echo "$PR" | jq -r '.merged // false') +if [ "$MERGED" != "true" ]; then + echo "::notice::PR #${PR_NUMBER} closed without merge — no audit emission." + exit 0 +fi + +MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha // empty') +MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login // "unknown"') +TITLE=$(echo "$PR" | jq -r '.title // ""') +BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref // "main"') +HEAD_SHA=$(echo "$PR" | jq -r '.head.sha // empty') + +if [ -z "$MERGE_SHA" ]; then + echo "::warning::PR #${PR_NUMBER} merged=true but no merge_commit_sha — cannot evaluate force-merge." + exit 0 +fi + +# 2. Required status checks declared in the workflow env. +REQUIRED="$REQUIRED_CHECKS" +if [ -z "${REQUIRED//[[:space:]]/}" ]; then + echo "::notice::REQUIRED_CHECKS empty — force-merge not applicable." + exit 0 +fi + +# 3. Status-check state at the PR HEAD (where checks ran). The merge +# commit doesn't get its own checks; we evaluate the PR's last +# commit, which is what branch protection compared against. +STATUS=$(curl -sS -H "$AUTH" \ + "${API}/repos/${OWNER}/${NAME}/commits/${HEAD_SHA}/status") +declare -A CHECK_STATE +while IFS=$'\t' read -r ctx state; do + [ -n "$ctx" ] && CHECK_STATE[$ctx]="$state" +done < <(echo "$STATUS" | jq -r '.statuses // [] | .[] | "\(.context)\t\(.status)"') + +# 4. For each required check, was it green at merge? YAML block scalars +# (`|`) leave a trailing newline; skip blank/whitespace-only lines. +FAILED_CHECKS=() +while IFS= read -r req; do + trimmed="${req#"${req%%[![:space:]]*}"}" # ltrim + trimmed="${trimmed%"${trimmed##*[![:space:]]}"}" # rtrim + [ -z "$trimmed" ] && continue + state="${CHECK_STATE[$trimmed]:-missing}" + if [ "$state" != "success" ]; then + FAILED_CHECKS+=("${trimmed}=${state}") + fi +done <<< "$REQUIRED" + +if [ "${#FAILED_CHECKS[@]}" -eq 0 ]; then + echo "::notice::PR #${PR_NUMBER} merged with all required checks green — not a force-merge." + exit 0 +fi + +# 5. Emit structured audit event. +NOW=$(date -u +%Y-%m-%dT%H:%M:%SZ) +FAILED_JSON=$(printf '%s\n' "${FAILED_CHECKS[@]}" | jq -R . | jq -s .) + +# Print as a single-line JSON so Vector's parse_json transform can pick +# it up cleanly from docker_logs. +jq -nc \ + --arg event_type "incident.force_merge" \ + --arg ts "$NOW" \ + --arg repo "$REPO" \ + --argjson pr "$PR_NUMBER" \ + --arg title "$TITLE" \ + --arg base "$BASE_BRANCH" \ + --arg merged_by "$MERGED_BY" \ + --arg merge_sha "$MERGE_SHA" \ + --argjson failed_checks "$FAILED_JSON" \ + '{event_type: $event_type, ts: $ts, repo: $repo, pr: $pr, title: $title, + base_branch: $base, merged_by: $merged_by, merge_sha: $merge_sha, + failed_checks: $failed_checks}' + +echo "::warning::FORCE-MERGE detected on PR #${PR_NUMBER} by ${MERGED_BY}: ${#FAILED_CHECKS[@]} required check(s) not green at merge time." diff --git a/.gitea/workflows/audit-force-merge.yml b/.gitea/workflows/audit-force-merge.yml new file mode 100644 index 00000000..09f4eb7b --- /dev/null +++ b/.gitea/workflows/audit-force-merge.yml @@ -0,0 +1,58 @@ +# audit-force-merge — emit `incident.force_merge` to runner stdout when +# a PR is merged with required-status-checks not green. Vector picks +# the JSON line off docker_logs and ships to Loki on +# molecule-canonical-obs (per `reference_obs_stack_phase1`); query as: +# +# {host="operator"} |= "event_type" |= "incident.force_merge" | json +# +# Closes the §SOP-6 audit gap (the doc says force-merges write to +# `structure_events`, but that table lives in the platform DB, not +# Gitea-side; Loki is the practical equivalent for Gitea Actions +# events). When the credential / observability stack converges later, +# this can sync into structure_events from Loki via a backfill job — +# the structured JSON shape is forward-compatible. +# +# Logic in `.gitea/scripts/audit-force-merge.sh` per the same script- +# extract pattern as sop-tier-check. + +name: audit-force-merge + +# pull_request_target loads from the base branch — same security model +# as sop-tier-check. Without this, an attacker could rewrite the +# workflow on a PR and skip the audit emission for their own +# force-merge. See `.gitea/workflows/sop-tier-check.yml` for the full +# rationale. +on: + pull_request_target: + types: [closed] + +jobs: + audit: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + # Skip when PR is closed without merge — saves a runner. + if: github.event.pull_request.merged == true + steps: + - name: Check out base branch (for the script) + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: ${{ github.event.pull_request.base.sha }} + - name: Detect force-merge + emit audit event + env: + # Same org-level secret the sop-tier-check workflow uses. + GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }} + GITEA_HOST: git.moleculesai.app + REPO: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + # Required-status-check contexts to evaluate at merge time. + # Newline-separated. Mirror this against branch protection + # (settings → branches → protected branch → required checks). + # Declared here rather than fetched from /branch_protections + # because that endpoint requires admin write — sop-tier-bot is + # read-only by design (least-privilege). + REQUIRED_CHECKS: | + sop-tier-check / tier-check (pull_request) + Secret scan / Scan diff for credential-shaped strings (pull_request) + run: bash .gitea/scripts/audit-force-merge.sh -- 2.45.2 From 0529bc246a3f731981c68dc6b619f26d9d1b52ff Mon Sep 17 00:00:00 2001 From: "claude-ceo-assistant (Claude Opus 4.7 on Hongming's MacBook)" Date: Fri, 8 May 2026 20:10:26 -0700 Subject: [PATCH 2/3] trigger: re-run sop-tier-check after dev-lead approval -- 2.45.2 From 323bbb4ec247f52ed18b876dfe7fd2683509f8d3 Mon Sep 17 00:00:00 2001 From: "claude-ceo-assistant (Claude Opus 4.7 on Hongming's MacBook)" Date: Fri, 8 May 2026 20:13:06 -0700 Subject: [PATCH 3/3] =?UTF-8?q?ci(secret-scan):=20port=20from=20.github/?= =?UTF-8?q?=20to=20.gitea/=20=E2=80=94=20fix=20unsatisfiable=20required=20?= =?UTF-8?q?check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit molecule-core/main branch protection requires the status-check context 'Secret scan / Scan diff for credential-shaped strings (pull_request)' but the workflow lived only in .github/workflows/, which Gitea Actions doesn't see — every PR's required-status-checks rollup left the context in 'expected' / never-fires state, blocking merge. Port to .gitea/workflows/secret-scan.yml. Drops: - merge_group event (Gitea has no merge queue) - workflow_call (no cross-repo reusable invocation on Gitea) SELF exclude lists both .github/ and .gitea/ paths so a future sync between them stays clean. Job + step names match the GitHub workflow so the produced status-check context name matches branch protection unchanged. Same regex set as the runtime's pre-commit hook (molecule-ai-workspace-runtime: molecule_runtime/scripts/pre-commit-checks.sh). This unblocks PR #150 (audit-force-merge fan-out) and every future PR on molecule-core/main. --- .gitea/workflows/secret-scan.yml | 191 +++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 .gitea/workflows/secret-scan.yml diff --git a/.gitea/workflows/secret-scan.yml b/.gitea/workflows/secret-scan.yml new file mode 100644 index 00000000..6f1583f4 --- /dev/null +++ b/.gitea/workflows/secret-scan.yml @@ -0,0 +1,191 @@ +name: Secret scan + +# Hard CI gate. Refuses any PR / push whose diff additions contain a +# recognisable credential. Defense-in-depth for the #2090-class incident +# (2026-04-24): GitHub's hosted Copilot Coding Agent leaked a ghs_* +# installation token into tenant-proxy/package.json via `npm init` +# slurping the URL from a token-embedded origin remote. We can't fix +# upstream's clone hygiene, so we gate here. +# +# Same regex set as the runtime's bundled pre-commit hook +# (molecule-ai-workspace-runtime: molecule_runtime/scripts/pre-commit-checks.sh). +# Keep the two sides aligned when adding patterns. +# +# Ported from .github/workflows/secret-scan.yml so the gate actually +# fires on Gitea Actions. Differences from the GitHub version: +# - drops `merge_group` event (Gitea has no merge queue) +# - drops `workflow_call` (no cross-repo reusable invocation on Gitea) +# - SELF path updated to .gitea/workflows/secret-scan.yml +# The job name + step name are identical to the GitHub workflow so the +# status-check context (`Secret scan / Scan diff for credential-shaped +# strings (pull_request)`) matches branch protection on molecule-core/main. + +on: + pull_request: + types: [opened, synchronize, reopened] + push: + branches: [main, staging] + +jobs: + scan: + name: Scan diff for credential-shaped strings + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 2 # need previous commit to diff against on push events + + # For pull_request events the diff base may be many commits behind + # HEAD and absent from the shallow clone. Fetch it explicitly. + - name: Fetch PR base SHA (pull_request events only) + if: github.event_name == 'pull_request' + run: git fetch --depth=1 origin ${{ github.event.pull_request.base.sha }} + + - name: Refuse if credential-shaped strings appear in diff additions + env: + # Plumb event-specific SHAs through env so the script doesn't + # need conditional `${{ ... }}` interpolation per event type. + # github.event.before/after only exist on push events; + # pull_request has pull_request.base.sha / pull_request.head.sha. + PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + PUSH_BEFORE: ${{ github.event.before }} + PUSH_AFTER: ${{ github.event.after }} + run: | + # Pattern set covers GitHub family (the actual #2090 vector), + # Anthropic / OpenAI / Slack / AWS. Anchored on prefixes with low + # false-positive rates against agent-generated content. Mirror of + # molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh + # — keep aligned. + SECRET_PATTERNS=( + 'ghp_[A-Za-z0-9]{36,}' # GitHub PAT (classic) + 'ghs_[A-Za-z0-9]{36,}' # GitHub App installation token + 'gho_[A-Za-z0-9]{36,}' # GitHub OAuth user-to-server + 'ghu_[A-Za-z0-9]{36,}' # GitHub OAuth user + 'ghr_[A-Za-z0-9]{36,}' # GitHub OAuth refresh + 'github_pat_[A-Za-z0-9_]{82,}' # GitHub fine-grained PAT + 'sk-ant-[A-Za-z0-9_-]{40,}' # Anthropic API key + 'sk-proj-[A-Za-z0-9_-]{40,}' # OpenAI project key + 'sk-svcacct-[A-Za-z0-9_-]{40,}' # OpenAI service-account key + 'sk-cp-[A-Za-z0-9_-]{60,}' # MiniMax API key (F1088 vector — caught only after the fact) + 'xox[baprs]-[A-Za-z0-9-]{20,}' # Slack tokens + 'AKIA[0-9A-Z]{16}' # AWS access key ID + 'ASIA[0-9A-Z]{16}' # AWS STS temp access key ID + ) + + # Determine the diff base. Each event type stores its SHAs in + # a different place — see the env block above. + case "${{ github.event_name }}" in + pull_request) + BASE="$PR_BASE_SHA" + HEAD="$PR_HEAD_SHA" + ;; + *) + BASE="$PUSH_BEFORE" + HEAD="$PUSH_AFTER" + ;; + esac + + # On push events with shallow clones, BASE may be present in + # the event payload but absent from the local object DB + # (fetch-depth=2 doesn't always reach the previous commit + # across true merges). Try fetching it on demand. If the + # fetch fails — e.g. the SHA was force-overwritten — we fall + # through to the empty-BASE branch below, which scans the + # entire tree as if every file were new. Correct, just slow. + if [ -n "$BASE" ] && ! echo "$BASE" | grep -qE '^0+$'; then + if ! git cat-file -e "$BASE" 2>/dev/null; then + git fetch --depth=1 origin "$BASE" 2>/dev/null || true + fi + fi + + # Files added or modified in this change. + if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$' || ! git cat-file -e "$BASE" 2>/dev/null; then + # New branch / no previous SHA / BASE unreachable — check the + # entire tree as added content. Slower, but correct on first + # push. + CHANGED=$(git ls-tree -r --name-only HEAD) + DIFF_RANGE="" + else + CHANGED=$(git diff --name-only --diff-filter=AM "$BASE" "$HEAD") + DIFF_RANGE="$BASE $HEAD" + fi + + if [ -z "$CHANGED" ]; then + echo "No changed files to inspect." + exit 0 + fi + + # Self-exclude: this workflow file legitimately contains the + # pattern strings as regex literals. Without an exclude it would + # block its own merge. Both the .github/ original and this + # .gitea/ port are excluded so a sync between them stays clean. + SELF_GITHUB=".github/workflows/secret-scan.yml" + SELF_GITEA=".gitea/workflows/secret-scan.yml" + + OFFENDING="" + # `while IFS= read -r` (not `for f in $CHANGED`) so filenames + # containing whitespace don't word-split silently — a path + # with a space would otherwise produce two iterations on + # tokens that aren't real filenames, breaking the + # self-exclude + diff lookup. + while IFS= read -r f; do + [ -z "$f" ] && continue + [ "$f" = "$SELF_GITHUB" ] && continue + [ "$f" = "$SELF_GITEA" ] && continue + if [ -n "$DIFF_RANGE" ]; then + ADDED=$(git diff --no-color --unified=0 "$BASE" "$HEAD" -- "$f" 2>/dev/null | grep -E '^\+[^+]' || true) + else + # No diff range (new branch first push) — scan the full file + # contents as if every line were new. + ADDED=$(cat "$f" 2>/dev/null || true) + fi + [ -z "$ADDED" ] && continue + for pattern in "${SECRET_PATTERNS[@]}"; do + if echo "$ADDED" | grep -qE "$pattern"; then + OFFENDING="${OFFENDING}${f} (matched: ${pattern})\n" + break + fi + done + done <<< "$CHANGED" + + if [ -n "$OFFENDING" ]; then + echo "::error::Credential-shaped strings detected in diff additions:" + # `printf '%b' "$OFFENDING"` interprets backslash escapes + # (the literal `\n` we appended above becomes a newline) + # WITHOUT treating OFFENDING as a format string. Plain + # `printf "$OFFENDING"` is a format-string sink: a filename + # containing `%` would be interpreted as a conversion + # specifier, corrupting the error message (or printing + # `%(missing)` artifacts). + printf '%b' "$OFFENDING" + echo "" + echo "The actual matched values are NOT echoed here, deliberately —" + echo "round-tripping a leaked credential into CI logs widens the blast" + echo "radius (logs are searchable + retained)." + echo "" + echo "Recovery:" + echo " 1. Remove the secret from the file. Replace with an env var" + echo " reference (e.g. \${{ secrets.GITHUB_TOKEN }} in workflows," + echo " process.env.X in code)." + echo " 2. If the credential was already pushed (this PR's commit" + echo " history reaches a public ref), treat it as compromised —" + echo " ROTATE it immediately, do not just remove it. The token" + echo " remains valid in git history forever and may be in any" + echo " log/cache that consumed this branch." + echo " 3. Force-push the cleaned commit (or stack a revert) and" + echo " re-run CI." + echo "" + echo "If the match is a false positive (test fixture, docs example," + echo "or this workflow's own regex literals): use a clearly-fake" + echo "placeholder like ghs_EXAMPLE_DO_NOT_USE that doesn't satisfy" + echo "the length suffix, OR add the file path to the SELF exclude" + echo "list in this workflow with a short reason." + echo "" + echo "Mirror of the regex set lives in the runtime's bundled" + echo "pre-commit hook (molecule-ai-workspace-runtime:" + echo "molecule_runtime/scripts/pre-commit-checks.sh) — keep aligned." + exit 1 + fi + + echo "✓ No credential-shaped strings in this change." -- 2.45.2