molecule-core/tests/harness/compose.yml
Hongming Wang 9dae0503ee fix(harness): generate SECRETS_ENCRYPTION_KEY per-run instead of hardcoding
Replaces the hardcoded base64 sentinel (630dd0da) with a per-run
generation in up.sh, exported into compose's interpolation environment.

Why:
- Hardcoding a 32-byte base64 string in the repo, even one labelled
  "test-only", sets a bad muscle-memory pattern. The next agent or
  contributor copies the shape into another harness — or worse, into a
  staging .env — and the test-only sentinel turns into something
  someone treats as a real key.
- Secret scanners flag key-shaped values regardless of the surrounding
  comment claiming intent. Avoiding the literal entirely sidesteps the
  false-positive.
- A fresh key per harness lifetime more closely mimics prod's
  per-tenant isolation, exercising the same code paths without any
  pretense of stable encrypted-data fixtures (which the harness wipes
  on every ./down.sh anyway).

Implementation:
- up.sh: `openssl rand -base64 32` if SECRETS_ENCRYPTION_KEY isn't
  already set in the caller's env. Honoring a pre-set value lets a
  debug session pin a key for reproducibility (e.g. when investigating
  encrypted-row corruption).
- compose.yml: `${SECRETS_ENCRYPTION_KEY:?…}` makes a misuse loud —
  running `docker compose up` directly bypassing up.sh fails fast with
  a clear error pointing at the right entry point, rather than a 100s
  unhealthy-tenant timeout.

Both paths verified via `docker compose config`:
- with key exported: value interpolates cleanly
- without it: "required variable SECRETS_ENCRYPTION_KEY is missing a
  value: must be set — run via tests/harness/up.sh, which generates
  one per run"
2026-04-30 13:30:14 -07:00

141 lines
5.3 KiB
YAML

# Production-shape harness for local E2E.
#
# Reproduces the SaaS tenant topology on localhost using the SAME
# images that ship to production:
#
# client → cf-proxy (nginx, mimics CF tunnel headers)
# → tenant (workspace-server/Dockerfile.tenant — combined platform + canvas)
# → cp-stub (control-plane stand-in) for /cp/* and CP-callback paths
# → postgres + redis (same versions as production)
#
# Why this matters: the workspace-server binary IS identical between
# local and production. The bugs that survive local E2E are topology
# bugs — env-gated middleware (TenantGuard, CP proxy, Canvas proxy),
# auth state, header rewrites, real production image. This harness
# activates ALL of them.
#
# Quickstart:
# cd tests/harness && ./up.sh
# ./seed.sh
# ./replays/peer-discovery-404.sh # reproduces issue #2397
#
# Env config:
# GIT_SHA — passed to the tenant build for /buildinfo verification.
# Defaults to "harness" so /buildinfo distinguishes the
# harness build from any cached image.
# CP_STUB_PEERS_MODE — peers failure mode for replay scripts.
# "" / "404" / "401" / "500" / "timeout".
services:
postgres:
image: postgres:16-alpine
environment:
POSTGRES_USER: harness
POSTGRES_PASSWORD: harness
POSTGRES_DB: molecule
networks: [harness-net]
healthcheck:
test: ["CMD-SHELL", "pg_isready -U harness"]
interval: 2s
timeout: 5s
retries: 10
redis:
image: redis:7-alpine
networks: [harness-net]
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 2s
timeout: 5s
retries: 10
cp-stub:
build:
context: ./cp-stub
environment:
PORT: "9090"
CP_STUB_PEERS_MODE: "${CP_STUB_PEERS_MODE:-}"
networks: [harness-net]
healthcheck:
test: ["CMD-SHELL", "wget -q -O- http://localhost:9090/healthz || exit 1"]
interval: 2s
timeout: 5s
retries: 10
# The actual production tenant image — same Dockerfile.tenant CI publishes.
# This is the load-bearing part of the harness: every bug class that hides
# behind "but it works locally" is reproducible HERE, against this image,
# not against `go run ./cmd/server`.
tenant:
build:
context: ../..
dockerfile: workspace-server/Dockerfile.tenant
args:
GIT_SHA: "${GIT_SHA:-harness}"
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
cp-stub:
condition: service_healthy
environment:
DATABASE_URL: "postgres://harness:harness@postgres:5432/molecule?sslmode=disable"
REDIS_URL: "redis://redis:6379"
PORT: "8080"
PLATFORM_URL: "http://tenant:8080"
MOLECULE_ENV: "production"
# SECRETS_ENCRYPTION_KEY is required when MOLECULE_ENV=production —
# crypto.InitStrict() refuses to boot without it. up.sh generates a
# fresh 32-byte key per harness lifetime via `openssl rand -base64 32`
# and exports it into this compose file's interpolation environment.
# The :? sentinel makes the misuse loud — running `docker compose up`
# directly without going through up.sh fails fast with a clear error
# rather than getting a confusing tenant-unhealthy timeout.
SECRETS_ENCRYPTION_KEY: "${SECRETS_ENCRYPTION_KEY:?must be set — run via tests/harness/up.sh, which generates one per run}"
# ADMIN_TOKEN flips the platform into strict-auth mode (matches
# production's CP-minted token configuration). Seeded value lets
# E2E scripts authenticate without going through CP.
ADMIN_TOKEN: "harness-admin-token"
# MOLECULE_ORG_ID — activates TenantGuard middleware. Every request
# must carry X-Molecule-Org-Id matching this value. Replays bugs
# that only fire in SaaS mode.
MOLECULE_ORG_ID: "harness-org"
# CP_UPSTREAM_URL — activates the /cp/* reverse proxy mount in
# router.go. Without this set, /cp/* would 404 and the canvas
# bootstrap would silently drift from production behavior.
CP_UPSTREAM_URL: "http://cp-stub:9090"
RATE_LIMIT: "1000"
# Canvas auto-proxy — entrypoint-tenant.sh exports CANVAS_PROXY_URL
# by default; keeping it explicit here makes the topology readable.
CANVAS_PROXY_URL: "http://localhost:3000"
networks: [harness-net]
healthcheck:
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"]
interval: 5s
timeout: 5s
retries: 20
# Cloudflare-tunnel-shape proxy — strips the :8080 suffix, rewrites
# Host to the tenant subdomain, injects X-Forwarded-*. Tests target
# http://harness-tenant.localhost:8080 and exercise the production
# routing layer.
cf-proxy:
image: nginx:1.27-alpine
depends_on:
tenant:
condition: service_healthy
volumes:
- ./cf-proxy/nginx.conf:/etc/nginx/nginx.conf:ro
# Bind to 127.0.0.1 only — the harness uses a hardcoded ADMIN_TOKEN
# ("harness-admin-token") so binding 0.0.0.0 (compose's default)
# would expose admin access to anyone on the local network or VPN.
# Loopback-only is safe for E2E and prevents a known-token leak.
ports:
- "127.0.0.1:8080:8080"
networks: [harness-net]
networks:
harness-net:
name: molecule-harness-net