diff --git a/.gitea/workflows/manifest-entry-existence-check.yml b/.gitea/workflows/manifest-entry-existence-check.yml new file mode 100644 index 000000000..3af8cb947 --- /dev/null +++ b/.gitea/workflows/manifest-entry-existence-check.yml @@ -0,0 +1,52 @@ +name: manifest-entry-existence-check + +# PR-time defense against bad manifest.json entries (#2185). +# +# A manifest entry whose `repo` does not exist on Gitea only surfaces +# as a failure in publish-workspace-server-image.yml *after* merge, +# which fires the main-red watchdog. This workflow checks every +# (plugin + workspace_template + org_template) entry at PR-review +# time so broken entries are caught before they reach main. +# +# Design notes +# ------------ +# - Uses the same AUTO_SYNC_TOKEN secret that publish-workspace-server-image.yml +# uses for cloning manifest deps. Some entries are platform-private, so an +# anonymous API check would 404 on valid entries; auth is required. +# - Only checks repo existence, not ref resolvability (#2185 scope). +# - Mirrors clone-manifest.sh retry behavior (3 attempts, backoff). +# - Triggers only when manifest.json changes to keep CI load minimal. +# +# Related: #2183 (incident), #2184 (fix), #2185 (this gate). + +on: + pull_request: + branches: [main, staging] + paths: + - 'manifest.json' + +permissions: + contents: read + +jobs: + # bp-required: pending #2185 — new PR-time gate; branch-protection update tracked in the same issue. + check-entries: + name: Verify manifest entries exist on Gitea + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Check out PR head + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Verify each manifest entry resolves on Gitea + env: + GITEA_HOST: git.moleculesai.app + GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }} + run: | + set -euo pipefail + bash scripts/manifest-entry-existence-check.sh + + - name: Regression test manifest-entry-existence-check script + run: | + set -euo pipefail + bash scripts/test-manifest-entry-existence-check.sh diff --git a/scripts/manifest-entry-existence-check.sh b/scripts/manifest-entry-existence-check.sh new file mode 100755 index 000000000..e03f918be --- /dev/null +++ b/scripts/manifest-entry-existence-check.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# manifest-entry-existence-check.sh — PR-time guard: verify every repo listed in +# manifest.json actually exists on Gitea before merge. +# +# Mirrors clone-manifest.sh retry behavior (3 attempts, linear backoff) and +# fails closed on any exhausted non-200 status (404, 500, 403, auth/network +# failures, etc.) so bad manifest entries cannot slip through. +# +# Usage: +# GITEA_HOST=git.example.com GITEA_TOKEN=xxx ./manifest-entry-existence-check.sh [manifest.json] +# +# Exit: +# 0 all repos exist / were reachable +# 1 one or more entries could not be validated +# 2 bad usage / missing inputs / required env not set + +set -euo pipefail + +MANIFEST="${1:-manifest.json}" +GITEA_HOST="${GITEA_HOST:-}" +GITEA_TOKEN="${GITEA_TOKEN:-${MOLECULE_GITEA_TOKEN:-}}" +GITEA_API="${GITEA_API:-https://${GITEA_HOST}/api/v1/repos}" + +if [ ! -f "$MANIFEST" ]; then + echo "::error::manifest not found: $MANIFEST" >&2 + exit 2 +fi + +if [ -z "$GITEA_HOST" ]; then + echo "::error::GITEA_HOST is not set" >&2 + exit 2 +fi + +if [ -z "$GITEA_TOKEN" ]; then + echo "::error::GITEA_TOKEN (or MOLECULE_GITEA_TOKEN) is not set" >&2 + exit 2 +fi + +# Strip JSON5-style // comments before parsing (same as clone-manifest.sh) +_strip_comments() { + sed '/^[[:space:]]*\/\//d' "$MANIFEST" +} + +MANIFEST_JSON="$(_strip_comments)" + +TOTAL=0 +MISSING=() + +_check_entry() { + local name="$1" repo="$2" + local last_http_code="" + + for attempt in 1 2 3; do + local http_code + http_code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10 \ + -H "Authorization: token ${GITEA_TOKEN}" \ + "${GITEA_API}/${repo}" 2>/dev/null || true) + last_http_code="$http_code" + + if [ "$http_code" = "200" ]; then + echo " OK: $name -> $repo" + return 0 + elif [ "$http_code" = "404" ]; then + echo "::error::manifest entry '$name' points at $repo which does not exist on Gitea (404)" + MISSING+=("$name:$repo (404)") + return 0 + else + echo " attempt $attempt: '$name' -> $repo returned HTTP ${http_code:-(none)}, retrying" + sleep $((attempt * 2)) + fi + done + + # After exhausting retries, any non-200 status that wasn't already recorded + # as 404 is a validation failure (500, 403, auth/network gateway errors, etc.). + echo "::error::manifest entry '$name' -> $repo could not be validated after 3 attempts (last HTTP ${last_http_code:-(none)})" + MISSING+=("$name:$repo (last HTTP ${last_http_code:-(none)})") +} + +# Categories to check — must match manifest.json schema +_check_category() { + local category="$1" + local count + count=$(echo "$MANIFEST_JSON" | jq -r ".${category} | length") + + local i=0 + while [ "$i" -lt "$count" ]; do + local name repo + name=$(echo "$MANIFEST_JSON" | jq -r ".${category}[$i].name") + repo=$(echo "$MANIFEST_JSON" | jq -r ".${category}[$i].repo") + TOTAL=$((TOTAL + 1)) + _check_entry "$name" "$repo" + i=$((i + 1)) + done +} + +_check_category "plugins" +_check_category "workspace_templates" +_check_category "org_templates" + +if [ "${#MISSING[@]}" -gt 0 ]; then + echo "::error::${#MISSING[@]} of ${TOTAL} manifest entries are broken:" + printf ' - %s\n' "${MISSING[@]}" + exit 1 +fi + +echo "::notice::All ${TOTAL} manifest entries resolve to existing Gitea repos." +exit 0 diff --git a/scripts/test-manifest-entry-existence-check.sh b/scripts/test-manifest-entry-existence-check.sh new file mode 100755 index 000000000..45e046d77 --- /dev/null +++ b/scripts/test-manifest-entry-existence-check.sh @@ -0,0 +1,214 @@ +#!/usr/bin/env bash +# scripts/test-manifest-entry-existence-check.sh +# +# Regression tests for scripts/manifest-entry-existence-check.sh. +# Verifies the retry loop fails closed on persistent non-200 statuses +# (500, 403, network failures) and succeeds when retries eventually return 200. +# +# Run: bash scripts/test-manifest-entry-existence-check.sh +# Expected: "All N tests passed" + exit 0. + +set -euo pipefail + +SCRIPT="$(cd "$(dirname "$0")" && pwd)/manifest-entry-existence-check.sh" +TMP="$(mktemp -d)" +trap 'rm -rf "$TMP"' EXIT + +PASS=0 +FAIL=0 + +# ───────────────────────────────────────────────────────────────────────────── +# Helpers +# ───────────────────────────────────────────────────────────────────────────── + +run_script() { + # Args: [extra-env...] + local fixture="$1" + shift + set +e + env \ + GITEA_HOST="git.example.com" \ + GITEA_TOKEN="test-token" \ + PATH="$TMP:$PATH" \ + "$@" \ + bash "$SCRIPT" "$fixture" 2>&1 + local rc=$? + set -e + echo "EXIT_CODE=$rc" +} + +assert_match() { + local name="$1" got="$2" pattern="$3" + if printf '%s' "$got" | grep -qE "$pattern"; then + PASS=$((PASS + 1)) + printf ' ✓ %s\n' "$name" + else + FAIL=$((FAIL + 1)) + printf ' ✗ %s\n want pattern: %s\n got:\n%s\n' "$name" "$pattern" "$got" + fi +} + +assert_not_match() { + local name="$1" got="$2" pattern="$3" + if printf '%s' "$got" | grep -qE "$pattern"; then + FAIL=$((FAIL + 1)) + printf ' ✗ %s\n bad pattern matched: %s\n got:\n%s\n' "$name" "$pattern" "$got" + else + PASS=$((PASS + 1)) + printf ' ✓ %s\n' "$name" + fi +} + +# ───────────────────────────────────────────────────────────────────────────── +# Mock curl +# ───────────────────────────────────────────────────────────────────────────── + +# The mock curl reads MOCK_MODE to decide what status to return. +# It accepts the same flags the script uses and echoes the status code. +# jq is also mocked so tests run on hosts without jq installed. +mkdir -p "$TMP" + +cat > "$TMP/jq" <<'EOF' +#!/usr/bin/env python3 +import json, re, sys +# The checker invokes jq as: jq -r ". | length" or jq -r ".[N].". +# The query is always the last argument; -r can be ignored for the mock. +query = sys.argv[-1] +obj = json.load(sys.stdin) + +# Support queries used by the script: . | length and .[N]. +m = re.fullmatch(r'\.([A-Za-z_][A-Za-z0-9_]*)\s*\|\s*length', query) +if m: + print(len(obj.get(m.group(1), []))) + sys.exit(0) + +m = re.fullmatch(r'\.([A-Za-z_][A-Za-z0-9_]*)\[(\d+)\]\.([A-Za-z_][A-Za-z0-9_]*)', query) +if m: + cat = obj.get(m.group(1), []) + idx = int(m.group(2)) + field = m.group(3) + if idx < len(cat): + print(cat[idx].get(field, '')) + else: + print('') + sys.exit(0) + +print(json.dumps(obj)) +EOF +chmod +x "$TMP/jq" + +cat > "$TMP/curl" <<'EOF' +#!/usr/bin/env bash +# Mock curl for manifest-entry-existence-check tests. +# Returns the status stored in MOCK_MODE for every URL. +set -euo pipefail +mode="${MOCK_MODE-200}" +# Consume and ignore flags; the script always passes -sS -o /dev/null -w etc. +while [ "$#" -gt 0 ]; do + case "$1" in + -s|-S|-o|--max-time) shift 2 ;; + -w|-H) shift 2 ;; + *) URL="$1"; shift ;; + esac +done +printf '%s\n' "$mode" +EOF +chmod +x "$TMP/curl" + +# ───────────────────────────────────────────────────────────────────────────── +# Fixtures +# ───────────────────────────────────────────────────────────────────────────── + +cat > "$TMP/all-good.json" <<'EOF' +{ + "plugins": [ + {"name": "plugin-a", "repo": "molecule-ai/plugin-a"} + ], + "workspace_templates": [ + {"name": "template-a", "repo": "molecule-ai/template-a"} + ], + "org_templates": [] +} +EOF + +cat > "$TMP/mixed.json" <<'EOF' +{ + "plugins": [ + {"name": "plugin-a", "repo": "molecule-ai/plugin-a"}, + {"name": "plugin-b", "repo": "molecule-ai/plugin-b"} + ], + "workspace_templates": [], + "org_templates": [] +} +EOF + +# ───────────────────────────────────────────────────────────────────────────── +# Test cases +# ───────────────────────────────────────────────────────────────────────────── + +echo "1. All entries return HTTP 200 — clean exit" +got=$(MOCK_MODE=200 run_script "$TMP/all-good.json") +assert_match "all-good-success-message" "$got" "All .* manifest entries resolve" +assert_match "all-good-exit-zero" "$got" "EXIT_CODE=0" + +echo +echo "2. Persistent HTTP 404 — fails loudly" +got=$(MOCK_MODE=404 run_script "$TMP/all-good.json") +assert_match "404-reports-entry" "$got" "does not exist on Gitea \(404\)" +assert_match "404-exit-one" "$got" "EXIT_CODE=1" + +echo +echo "3. Persistent HTTP 500 after retries — fails closed" +got=$(MOCK_MODE=500 run_script "$TMP/all-good.json") +assert_match "500-reports-last-code" "$got" "last HTTP 500" +assert_match "500-exit-one" "$got" "EXIT_CODE=1" +assert_match "500-attempts-three" "$got" "attempt 3" + +echo +echo "4. Persistent HTTP 403 after retries — fails closed" +got=$(MOCK_MODE=403 run_script "$TMP/all-good.json") +assert_match "403-reports-last-code" "$got" "last HTTP 403" +assert_match "403-exit-one" "$got" "EXIT_CODE=1" + +echo +echo "5. Empty HTTP code (network/gateway failure) — fails closed" +got=$(MOCK_MODE="" run_script "$TMP/all-good.json") +assert_match "empty-code-reports-failure" "$got" "could not be validated after 3 attempts" +assert_match "empty-code-exit-one" "$got" "EXIT_CODE=1" + +echo +echo "6. Mixed entries with one 404 and one 200 — counts correctly" +# Use a per-URL mock: plugin-a 200, plugin-b 404 +cat > "$TMP/curl" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +while [ "$#" -gt 0 ]; do + case "$1" in + -s|-S|-o|--max-time) shift 2 ;; + -w|-H) shift 2 ;; + *) URL="$1"; shift ;; + esac +done +case "${URL:-}" in + *plugin-b*) printf '404\n' ;; + *) printf '200\n' ;; +esac +EOF +chmod +x "$TMP/curl" +got=$(run_script "$TMP/mixed.json") +assert_match "mixed-reports-404" "$got" "does not exist on Gitea \(404\)" +assert_match "mixed-reports-count" "$got" "1 of 2 manifest entries are broken" +assert_match "mixed-exit-one" "$got" "EXIT_CODE=1" +assert_not_match "mixed-does-not-report-500" "$got" "last HTTP 500" + +# ───────────────────────────────────────────────────────────────────────────── +# Summary +# ───────────────────────────────────────────────────────────────────────────── + +echo +echo "─────────────────────────────────────────────" +echo "Tests: $PASS passed, $FAIL failed" +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi +echo "All tests passed."