harden(security): remove dev-mode fail-open auth — fail-closed everywhere + dev-token + regression gate #2291

Merged
core-devops merged 2 commits from harden/no-fail-open-auth into main 2026-06-05 08:34:28 +00:00
21 changed files with 872 additions and 387 deletions
+14 -5
View File
@@ -19,13 +19,22 @@ REDIS_URL=redis://localhost:6379
# itself to 3000 in canvas/package.json, so sourcing this file before
# `npm run dev` won't accidentally make Next.js try to bind 8080.
PORT=8080
# ---- Admin credential — REQUIRED to close issue #684 (AdminAuth bearer bypass) ----
# ---- Admin credential — REQUIRED in EVERY environment (auth is fail-closed) ----
# Auth is fail-CLOSED everywhere now (harden/no-fail-open-auth): there is NO
# dev-mode escape hatch. AdminAuth / WorkspaceAuth / discovery all require a
# real credential. The canvas authenticates by sending this value as a bearer
# (it reads NEXT_PUBLIC_ADMIN_TOKEN — set it to the SAME value).
# When ADMIN_TOKEN is set, only this value is accepted on /admin/* and /approvals/* routes.
# Without it, any valid workspace bearer token can call admin endpoints (backward compat
# fallback, still vulnerable). Set this in every environment, rotate when compromised.
# Generate: openssl rand -base64 32
# (When unset, a fresh install 401s on admin routes and any valid workspace bearer
# is the only deprecated fallback once tokens exist — set ADMIN_TOKEN to close #684.)
# Generate: openssl rand -base64 32 (scripts/dev-start.sh provisions a fixed dev value)
# Store in fly secrets / deployment env — NEVER commit the actual value here.
ADMIN_TOKEN=
# NEXT_PUBLIC_ADMIN_TOKEN= # Canvas-side mirror of ADMIN_TOKEN. The canvas
# bakes this into its bundle and sends it as the
# bearer. MUST equal ADMIN_TOKEN (next.config.ts
# warns if the pair is half-set). dev-start.sh
# exports it for you.
SECRETS_ENCRYPTION_KEY= # 32-byte key (raw or base64). Leave empty for plaintext (dev only).
CONFIGS_DIR= # Path to workspace-configs-templates/ (auto-discovered if empty)
PLUGINS_DIR= # Path to plugins/ directory (default: /plugins in container)
@@ -34,7 +43,7 @@ PLUGINS_DIR= # Path to plugins/ directory (default: /plugins i
# MOLECULE_MCP_ALLOW_SEND_MESSAGE= # Set to "true" to include send_message_to_user in the MCP bridge tool list (issue #810). Excluded by default to prevent unintended WebSocket pushes from CLI sessions.
# MOLECULE_MCP_URL=http://localhost:8080 # Platform URL for opencode MCP config (opencode.json). Same as PLATFORM_URL; separate var so opencode configs can reference it without ambiguity.
# WORKSPACE_DIR= # Optional global host path bind-mounted to /workspace in every container. Per-workspace workspace_dir column overrides this; if neither is set each workspace gets an isolated Docker named volume.
MOLECULE_ENV=development # Environment label (development/staging/production). Used for log tagging and for the AdminAuth dev-mode escape hatch (lets the Canvas dashboard keep working after the first workspace is created, when ADMIN_TOKEN is unset). SaaS deployments MUST set MOLECULE_ENV=production.
MOLECULE_ENV=development # Environment label (development/staging/production). Used for log tagging and for NON-security local-dev conveniences (loopback HTTP bind, relaxed rate-limit bucket). It is NOT an auth lever — auth is fail-closed in every environment. SaaS deployments MUST set MOLECULE_ENV=production.
# MOLECULE_ENABLE_TEST_TOKENS= # Set to 1 to expose GET /admin/workspaces/:id/test-token (mints a fresh bearer token for E2E scripts). The route is auto-enabled when MOLECULE_ENV != production; this flag is the explicit override. Leave unset/0 in prod — the route 404s unless enabled.
# MOLECULE_ORG_ID= # SaaS only: org UUID set by control plane on tenant machines. When set, workspace provisioning auto-routes through the control plane API instead of Docker.
# CP_PROVISION_URL= # Override control plane URL for workspace provisioning (default: https://api.moleculesai.app). Only needed for testing against a non-production control plane.
+1 -1
View File
@@ -114,7 +114,7 @@ Opt-in pattern: when `idle_prompt` is non-empty in `config.yaml`, the workspace
Three Gin middleware classes gate server-side routes. Full contract in `docs/runbooks/admin-auth.md`.
- **`middleware.AdminAuth(db.DB)`** — strict bearer-only. Used for any route where a forged request could leak prompts/memory, create/mutate workspaces, or leak ops intel. Lazy-bootstrap fail-open when `HasAnyLiveTokenGlobal` returns 0.
- **`middleware.AdminAuth(db.DB)`** — strict bearer-only and **fail-closed in every environment** (harden/no-fail-open-auth). Used for any route where a forged request could leak prompts/memory, create/mutate workspaces, or leak ops intel. The former lazy-bootstrap fail-open (pass when `HasAnyLiveTokenGlobal` returns 0) and the dev-mode escape hatch have both been removed — a fresh install must provision `ADMIN_TOKEN` to reach admin routes.
- **`middleware.CanvasOrBearer(db.DB)`** — accepts a bearer token OR an Origin matching `CORS_ORIGINS`. Used **only** for cosmetic routes where a forged request has zero data/security impact. Currently only on `PUT /canvas/viewport`. Do not extend this to any route that leaks data or creates resources — see the runbook.
- **`middleware.WorkspaceAuth(db.DB)`** — binds a bearer token to `:id`. Workspace A's token cannot hit workspace B's sub-routes. Used for the entire `/workspaces/:id/*` group except the A2A proxy (which has its own `CanCommunicate` layer).
+9 -3
View File
@@ -24,7 +24,7 @@ cd molecule-core
That single script:
1. Generates an `ADMIN_TOKEN` into `.env` (first run only — preserved on re-runs)
1. Generates an `ADMIN_TOKEN` into `.env` (first run only — preserved on re-runs) and exports the matching `NEXT_PUBLIC_ADMIN_TOKEN` so the canvas authenticates with it. Auth is **fail-closed in every environment** (including local dev) — there is no dev-mode fail-open; the canvas reaches admin/workspace routes only because it sends this bearer.
2. Brings up Postgres, Redis, Langfuse, ClickHouse, and Temporal via `infra/scripts/setup.sh`
3. Populates the workspace template + plugin registry from `manifest.json`
4. Builds and starts the platform on `http://localhost:8080`
@@ -62,11 +62,17 @@ If you only want the raw compose flow:
docker compose -f docker-compose.infra.yml up -d
```
> **Auth is fail-closed even in local dev.** Pick any local admin token and
> set it on *both* sides — the platform (`ADMIN_TOKEN`) and the canvas
> (`NEXT_PUBLIC_ADMIN_TOKEN`, same value). Without it the canvas 401s on every
> admin/workspace call. (`scripts/dev-start.sh` does this for you; the manual
> steps below set it explicitly.)
### Step 3: Start the platform
```bash
cd workspace-server
go run ./cmd/server
ADMIN_TOKEN=dev-local-admin-token MOLECULE_ENV=development go run ./cmd/server
```
The control plane listens on `http://localhost:8080`.
@@ -78,7 +84,7 @@ In a new terminal:
```bash
cd canvas
npm install
npm run dev
NEXT_PUBLIC_ADMIN_TOKEN=dev-local-admin-token npm run dev # MUST match ADMIN_TOKEN above
```
Open `http://localhost:3000`.
+32 -4
View File
@@ -1,5 +1,29 @@
# Admin Authentication Runbook
## Auth is fail-CLOSED in every environment — `ADMIN_TOKEN` is the bootstrap credential
Per the CTO "nothing should be fail-open" directive, **every** auth path on the
workspace-server fails closed — there is no dev-mode / zero-token / DB-outage
hatch that grants access. This includes:
- `AdminAuth` and `WorkspaceAuth` (admin + per-workspace routes),
- `CanvasOrBearer` (the cosmetic `PUT /canvas/viewport` route), and
- `validateDiscoveryCaller` (`/registry/:id/peers`, `/registry/discover/:id`).
Consequence for **bootstrap**: a brand-new self-hosted / dev install has **no
DB-backed tokens yet**, and there is no longer a fail-open that lets the first
request through. The **only** way to reach admin routes (and to mint the first
workspace token via `POST /admin/workspaces/:id/tokens`) is to set `ADMIN_TOKEN`
in the platform environment and present it as the bearer. This is the "local
mimics production" principle: there is no zero-config bootstrap.
- **Local dev:** `scripts/dev-start.sh` provisions a deterministic
`ADMIN_TOKEN` into `.env` (and exports the matching `NEXT_PUBLIC_ADMIN_TOKEN`
so the canvas authenticates with it). See `docs/quickstart.md`.
- **Self-hosted / SaaS:** set `ADMIN_TOKEN` to a strong random secret
(`openssl rand -base64 32`) in the platform env and bake the matching
`NEXT_PUBLIC_ADMIN_TOKEN` into the canvas bundle.
## Required: set `MOLECULE_ENV` in all non-dev environments
```bash
@@ -7,8 +31,10 @@
MOLECULE_ENV=production
```
This matches the production tenant default and disables development-only
shortcuts. Staging and production smoke tests should use the real user/API
This matches the production tenant default. NOTE: `MOLECULE_ENV` no longer gates
any auth decision — it only drives NON-security local-dev conveniences (loopback
bind, relaxed rate limit). Setting it to `dev`/`development` does **not** relax
authentication. Staging and production smoke tests should use the real user/API
workflow: create a workspace, then mint a one-time displayed workspace bearer
with `POST /admin/workspaces/:id/tokens`.
@@ -23,5 +49,7 @@ The platform uses `ADMIN_TOKEN` as the bearer credential for admin-gated endpoin
| `POST /org/import` | `Authorization: Bearer <ADMIN_TOKEN>` |
| `POST /admin/workspaces/:id/tokens` | `Authorization: Bearer <ADMIN_TOKEN>`; plaintext token returned once |
Missing or invalid `ADMIN_TOKEN` → AdminAuth fails open in dev mode (no token set), or
returns 401 in production mode (token set but invalid).
Missing or invalid bearer → **401 in every environment** (fail-closed; no
dev-mode fail-open). If the auth datastore is unreachable, auth-gated routes
return **503** (`platform_unavailable`) — an availability tradeoff that grants no
access — rather than allowing the request through.
+51 -22
View File
@@ -46,46 +46,67 @@ cleanup() {
trap cleanup EXIT INT TERM
# ─────────────────────────────────────────────── 1. dev-mode auth posture
# The AdminAuth middleware closes its fail-open the moment the first
# workspace token lands in the DB — at which point /workspaces and
# other admin routes 401 unless the caller has either ADMIN_TOKEN or
# the dev-mode escape hatch. The canvas at localhost:3000 has no
# bearer token to send, so without one of those two paths it can't
# call admin endpoints after a workspace exists.
#
# For local dev the right posture is the dev-mode escape hatch:
# SECURITY (harden/no-fail-open-auth): the workspace-server auth chain is
# now fail-CLOSED in EVERY environment, dev included. There is NO dev-mode
# fail-open escape hatch anymore — AdminAuth / WorkspaceAuth / discovery all
# require a real credential. So local dev must AUTHENTICATE, not run open.
#
# MOLECULE_ENV=development AND ADMIN_TOKEN unset
# The clean way to keep the canvas working locally is to provision a
# deterministic ADMIN_TOKEN and hand the matching NEXT_PUBLIC_ADMIN_TOKEN to
# the canvas bundle. The canvas already attaches `Authorization: Bearer
# $NEXT_PUBLIC_ADMIN_TOKEN` on every platform call (canvas/src/lib/api.ts),
# and next.config.ts warns if the pair is half-set. We set BOTH here.
#
# That makes middleware.isDevModeFailOpen() return true and lets the
# canvas keep working without a bearer. Setting ADMIN_TOKEN here
# would BREAK the canvas (it has no way to read that token in dev).
# MOLECULE_ENV=development — dev conveniences (loopback bind, relaxed
# rate limit). NOT an auth lever.
# ADMIN_TOKEN=<dev value> — server-side bearer AdminAuth/WorkspaceAuth
# enforce (Tier-2b). Real credential.
# NEXT_PUBLIC_ADMIN_TOKEN — same value, baked into the canvas bundle so
# the browser sends the matching bearer.
#
# For SaaS the platform is provisioned with ADMIN_TOKEN set AND
# MOLECULE_ENV=production — either one closes the hatch. So the dev
# mode signal here is safe (it's only active when both other knobs
# are absent).
# For SaaS the platform is provisioned with a random ADMIN_TOKEN + the
# canvas image baked with the matching NEXT_PUBLIC_ADMIN_TOKEN, plus
# MOLECULE_ENV=production. Same shape, stronger secret.
if [ -f "$ENV_FILE" ] && grep -q '^MOLECULE_ENV=' "$ENV_FILE"; then
echo "==> Reusing MOLECULE_ENV from existing .env"
else
echo "==> Setting MOLECULE_ENV=development in .env (dev-mode auth hatch)"
echo "==> Setting MOLECULE_ENV=development in .env"
{
if [ -f "$ENV_FILE" ]; then
cat "$ENV_FILE"
echo ""
fi
echo "# Generated by scripts/dev-start.sh on $(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "# Local-dev auth posture: dev-mode fail-open lets the canvas at"
echo "# localhost:3000 call admin endpoints without a bearer token."
echo "# DO NOT set ADMIN_TOKEN here in dev — it would close the hatch"
echo "# and the canvas would 401 on every admin call."
echo "# Local-dev conveniences (loopback bind, relaxed rate limit)."
echo "# Auth is fail-closed even in dev — see ADMIN_TOKEN below."
echo "MOLECULE_ENV=development"
} > "$ENV_FILE.tmp"
mv "$ENV_FILE.tmp" "$ENV_FILE"
echo " Saved to $ENV_FILE"
fi
# Provision a deterministic dev ADMIN_TOKEN (idempotent — preserved across
# re-runs). This is the credential the canvas authenticates with locally; it
# is NOT a secret (it only guards your own localhost stack), so a fixed,
# well-known value is fine and keeps re-runs reproducible.
DEV_ADMIN_TOKEN="dev-local-admin-token"
if [ -f "$ENV_FILE" ] && grep -q '^ADMIN_TOKEN=' "$ENV_FILE"; then
echo "==> Reusing ADMIN_TOKEN from existing .env"
else
echo "==> Provisioning dev ADMIN_TOKEN in .env (fail-closed auth, authenticated canvas)"
{
cat "$ENV_FILE"
echo ""
echo "# Dev ADMIN_TOKEN — the canvas authenticates with this locally."
echo "# Auth is fail-closed; without a matching bearer the canvas 401s."
echo "# Fixed value is fine: it only guards your localhost stack."
echo "ADMIN_TOKEN=$DEV_ADMIN_TOKEN"
} > "$ENV_FILE.tmp"
mv "$ENV_FILE.tmp" "$ENV_FILE"
echo " Saved to $ENV_FILE"
fi
# Source .env so the platform inherits ADMIN_TOKEN (and anything else
# the user has added — e.g. ANTHROPIC_API_KEY for skipping the canvas
# Secrets UI). `set -a` exports every assignment in the sourced file
@@ -95,6 +116,12 @@ set -a
. "$ENV_FILE"
set +a
# The canvas reads NEXT_PUBLIC_ADMIN_TOKEN at build/dev time and attaches it
# as the bearer on every platform call. Mirror the server-side ADMIN_TOKEN
# into it so the matched-pair guard in canvas/next.config.ts is satisfied and
# the browser authenticates. Exported for the `npm run dev` child below.
export NEXT_PUBLIC_ADMIN_TOKEN="$ADMIN_TOKEN"
# ─────────────────────────────────────────────── 2. infra + templates
# Use setup.sh (not raw docker-compose) so the template registry gets
@@ -195,7 +222,9 @@ cat <<EOF
Molecule AI dev environment ready
Canvas: http://localhost:3000
Platform: http://localhost:8080
Platform: http://localhost:8080 (bound to loopback in dev)
Auth: fail-closed — canvas authenticates with the dev ADMIN_TOKEN
(ADMIN_TOKEN + NEXT_PUBLIC_ADMIN_TOKEN, see .env)
Logs: /tmp/molecule-platform.log
/tmp/molecule-canvas.log
+72 -45
View File
@@ -1,24 +1,30 @@
#!/usr/bin/env bash
# E2E regression suite for the local-dev escape hatches added in
# fix/quickstart-bugless. These cover the exact user-facing breakages
# that dropped out of the partial squash-merge of PR #1871:
# E2E regression suite asserting that "dev mode" is fail-CLOSED.
#
# 1. GET /workspaces returns 200 with no bearer after tokens exist in
# the DB — exercises the AdminAuth Tier-1b dev-mode hatch
# (middleware/devmode.go::isDevModeFailOpen).
# 2. GET /workspaces/:id/activity returns 200 with no bearer — the
# same hatch applied to WorkspaceAuth.
# 3. POST /workspaces/:id/a2a doesn't 502-SSRF on a loopback workspace
# URL — exercises handlers/ssrf.go::devModeAllowsLoopback.
# 4. GET /org/templates returns the curated set populated by
# clone-manifest.sh — exercises infra/scripts/setup.sh + the
# ListTemplates failure logging in handlers/org.go.
# History: this file used to assert the local-dev fail-open escape hatches
# (GET /workspaces 200 with NO bearer, /workspaces/:id/activity 200 with no
# bearer) added in fix/quickstart-bugless. Under the CTO "nothing should be
# fail-open" directive (harden/no-fail-open-auth) those hatches were REMOVED:
# auth is fail-CLOSED in EVERY environment, local dev included. This suite now
# pins the inverse contract — bearer-less admin/workspace requests 401, and the
# SAME requests with the dev ADMIN_TOKEN bearer succeed.
#
# Requires: platform running on :8080 with MOLECULE_ENV=development and
# ADMIN_TOKEN unset. Matches the README quickstart env.
# What it verifies:
# 1. GET /workspaces 401s with NO bearer once tokens exist (was: 200 via the
# removed AdminAuth Tier-1b dev-mode hatch); 200 WITH the admin bearer.
# 2. GET /workspaces/:id/activity (and /delegations, /approvals/pending) 401
# with no bearer (was: 200 via the WorkspaceAuth hatch); 200 WITH bearer.
# 3. GET /org/templates returns the curated set populated by clone-manifest.sh
# (unauth-readable bootstrap surface — unchanged).
#
# Requires: platform running on :8080 with MOLECULE_ENV=development AND
# ADMIN_TOKEN set (the dev value), with MOLECULE_ADMIN_TOKEN (or
# ADMIN_TOKEN) exported here so the suite can present the bearer.
# scripts/dev-start.sh provisions ADMIN_TOKEN locally; the e2e-api CI
# job sets it on the platform and exports the matching bearer.
#
# Usage:
# bash tests/e2e/test_dev_mode.sh
# MOLECULE_ADMIN_TOKEN=dev-local-admin-token bash tests/e2e/test_dev_mode.sh
set -euo pipefail
# shellcheck source=_lib.sh
@@ -46,35 +52,44 @@ check_http() {
fi
}
echo "=== Dev-mode escape-hatch regression tests ==="
echo "=== Dev-mode fail-CLOSED regression tests ==="
echo ""
# Pre-test: ensure MOLECULE_ENV=development and no ADMIN_TOKEN are in the
# platform's env. The request path doesn't let us read the platform's
# env directly, but we can verify the hatch is active by confirming the
# expected behaviour under the conditions the test otherwise sets up.
# The platform is fail-closed in every environment now, so the suite MUST have
# the admin bearer to drive the authenticated (200) assertions. Without it we
# cannot create / clean up workspaces — bail loudly rather than silently skip.
ADMIN_BEARER="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
if [ -z "$ADMIN_BEARER" ]; then
echo "FAIL: MOLECULE_ADMIN_TOKEN/ADMIN_TOKEN not set — auth is fail-closed in"
echo " every environment, so this suite needs the dev ADMIN_TOKEN bearer."
echo " e.g. MOLECULE_ADMIN_TOKEN=dev-local-admin-token bash $0"
exit 1
fi
ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")
e2e_cleanup_all_workspaces
# ----------------------------------------------------------------------
# Section 1 — AdminAuth dev-mode hatch
# Section 1 — AdminAuth is fail-CLOSED (dev-mode hatch removed)
# ----------------------------------------------------------------------
# Before fix: once any workspace had tokens in the DB, GET /workspaces
# closed to unauthenticated callers and the Canvas broke. The hatch
# keeps it open specifically in dev mode.
echo "--- Section 1: AdminAuth dev-mode hatch ---"
echo "--- Section 1: AdminAuth fail-closed ---"
# No bearer → 401 in dev mode (the removed hatch used to return 200).
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces")
check_http "GET /workspaces (empty DB)" "200" "$R"
check_http "GET /workspaces (no bearer) is fail-CLOSED" "401" "$R"
# Create a workspace so tokens land in the DB.
# With the dev admin bearer → 200.
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces" "${ADMIN_AUTH[@]}")
check_http "GET /workspaces (with admin bearer)" "200" "$R"
# Create a workspace (authenticated) so tokens land in the DB.
R=$(curl -s -w "\n%{http_code}" -X POST "$BASE/workspaces" \
"${ADMIN_AUTH[@]}" \
-H "Content-Type: application/json" \
-d '{"name":"Dev-Mode-Test","tier":1,"runtime":"external","external":true}')
CODE=$(echo "$R" | tail -n1)
BODY=$(echo "$R" | sed '$d')
check_http "POST /workspaces (create)" "201" "$CODE"
check_http "POST /workspaces (create, with admin bearer)" "201" "$CODE"
WS_ID=$(echo "$BODY" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || true)
if [ -z "$WS_ID" ]; then
@@ -83,43 +98,55 @@ if [ -z "$WS_ID" ]; then
exit 1
fi
# Ensure a real workspace token exists so AdminAuth now sees a live token. On
# pre-fix builds the next /workspaces call would 401 — on post-fix it
# must stay 200 because MOLECULE_ENV=development + ADMIN_TOKEN unset.
# Ensure a real workspace token exists so AdminAuth sees a live token globally.
TOKEN=$(echo "$BODY" | e2e_extract_token)
if [ -z "$TOKEN" ]; then
e2e_mint_workspace_token "$WS_ID" >/dev/null
fi
# With tokens now in the DB, the bearer-less call STILL 401s (no lazy-bootstrap
# / dev-mode fall-through), and the authenticated call still 200s.
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces")
check_http "GET /workspaces (after token minted, no bearer)" "200" "$R"
check_http "GET /workspaces (after token minted, no bearer) is fail-CLOSED" "401" "$R"
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces" "${ADMIN_AUTH[@]}")
check_http "GET /workspaces (after token minted, with admin bearer)" "200" "$R"
# ----------------------------------------------------------------------
# Section 2 — WorkspaceAuth dev-mode hatch
# Section 2 — WorkspaceAuth is fail-CLOSED (dev-mode hatch removed)
# ----------------------------------------------------------------------
# Before fix: /workspaces/:id/activity 401'd once tokens existed —
# the Canvas side panel's chat history load broke.
echo ""
echo "--- Section 2: WorkspaceAuth dev-mode hatch ---"
echo "--- Section 2: WorkspaceAuth fail-closed ---"
# No bearer → 401 (the removed hatch used to return 200).
R=$(curl -s -o /dev/null -w "%{http_code}" \
"$BASE/workspaces/$WS_ID/activity?type=a2a_receive&limit=50")
check_http "GET /workspaces/:id/activity (no bearer)" "200" "$R"
check_http "GET /workspaces/:id/activity (no bearer) is fail-CLOSED" "401" "$R"
R=$(curl -s -o /dev/null -w "%{http_code}" \
"$BASE/workspaces/$WS_ID/delegations")
check_http "GET /workspaces/:id/delegations (no bearer)" "200" "$R"
check_http "GET /workspaces/:id/delegations (no bearer) is fail-CLOSED" "401" "$R"
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/approvals/pending")
check_http "GET /approvals/pending (no bearer)" "200" "$R"
check_http "GET /approvals/pending (no bearer) is fail-CLOSED" "401" "$R"
# Same requests WITH the admin bearer → 200.
R=$(curl -s -o /dev/null -w "%{http_code}" \
"$BASE/workspaces/$WS_ID/activity?type=a2a_receive&limit=50" "${ADMIN_AUTH[@]}")
check_http "GET /workspaces/:id/activity (with admin bearer)" "200" "$R"
R=$(curl -s -o /dev/null -w "%{http_code}" \
"$BASE/workspaces/$WS_ID/delegations" "${ADMIN_AUTH[@]}")
check_http "GET /workspaces/:id/delegations (with admin bearer)" "200" "$R"
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/approvals/pending" "${ADMIN_AUTH[@]}")
check_http "GET /approvals/pending (with admin bearer)" "200" "$R"
# ----------------------------------------------------------------------
# Section 3 — Template registry populated by setup.sh
# ----------------------------------------------------------------------
# Before fix: setup.sh didn't run clone-manifest.sh so the template
# palette was empty and the molecule-dev in-tree copy was broken.
# GET /org/templates is an unauthenticated bootstrap surface (the template
# palette must render before the user has a credential) — unchanged.
echo ""
echo "--- Section 3: Template registry ---"
+12 -4
View File
@@ -2,10 +2,15 @@ package main
import "testing"
// TestResolveBindHost pins the precedence: BIND_ADDR explicit > dev-mode
// fail-open default of 127.0.0.1 > production-shape empty (all interfaces).
// TestResolveBindHost pins the precedence: BIND_ADDR explicit > local-dev
// loopback default of 127.0.0.1 > production-shape empty (all interfaces).
//
// Mutation-test invariant: removing the IsDevModeFailOpen() branch makes
// (harden/no-fail-open-auth) The loopback default is now keyed on
// MOLECULE_ENV alone (IsLocalDevEnv), decoupled from ADMIN_TOKEN — a dev box
// defaults to loopback even when it provisions an ADMIN_TOKEN. This is
// defense-in-depth, not an auth lever; auth is fail-closed in every env.
//
// Mutation-test invariant: removing the IsLocalDevEnv() branch makes
// "no_bindaddr_devmode_unset_admin" fail (returns "" instead of "127.0.0.1").
// Removing the BIND_ADDR branch makes "explicit_bindaddr_*" cases fail.
func TestResolveBindHost(t *testing.T) {
@@ -35,7 +40,10 @@ func TestResolveBindHost(t *testing.T) {
bindAddr: "",
adminToken: "secret",
molEnv: "dev",
want: "", // ADMIN_TOKEN flips IsDevModeFailOpen to false → all interfaces
// harden/no-fail-open-auth: loopback default is keyed on
// MOLECULE_ENV alone now — a dev box defaults to loopback even
// with ADMIN_TOKEN provisioned (which dev-start.sh now does).
want: "127.0.0.1",
},
{
name: "no_bindaddr_production_env",
+15 -15
View File
@@ -474,12 +474,12 @@ func main() {
// HTTP server with graceful shutdown.
//
// Bind host: in dev-mode (no ADMIN_TOKEN, MOLECULE_ENV=dev|development)
// the AdminAuth chain fails open by design; pairing that with a wildcard
// bind would expose unauth /workspaces to any same-LAN peer. Default to
// loopback when fail-open is active. Operators who need LAN exposure set
// BIND_ADDR=0.0.0.0 explicitly. Production (ADMIN_TOKEN set) is unchanged.
// See molecule-core#7.
// Bind host: in local dev (MOLECULE_ENV=dev|development) default the
// listener to loopback as defense-in-depth — a dev box shouldn't be
// reachable from the LAN. This is NOT an auth lever (auth is fail-closed
// in every env now); it's strictly the safer default. Operators who need
// LAN exposure set BIND_ADDR=0.0.0.0 explicitly. Production binds all
// interfaces (existing shape). See molecule-core#7.
bindHost := resolveBindHost()
srv := &http.Server{
Addr: fmt.Sprintf("%s:%s", bindHost, port),
@@ -489,7 +489,7 @@ func main() {
// Start server in goroutine
go func() {
log.Printf("Platform starting on %s:%s (dev-mode-fail-open=%v)", bindHost, port, middleware.IsDevModeFailOpen())
log.Printf("Platform starting on %s:%s (local-dev-env=%v)", bindHost, port, middleware.IsLocalDevEnv())
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
log.Fatalf("Server failed: %v", err)
}
@@ -528,20 +528,20 @@ func envOr(key, fallback string) string {
//
// Precedence:
// 1. BIND_ADDR — explicit operator override (any value, including "0.0.0.0").
// 2. dev-mode fail-open active → "127.0.0.1" (loopback only).
// 2. local dev (MOLECULE_ENV=dev|development) → "127.0.0.1" (loopback only).
// 3. otherwise → "" (Go binds every interface; existing prod/self-host shape).
//
// Coupling the loopback default to middleware.IsDevModeFailOpen() means the
// two safety levers — bind narrowness and auth strength — move together. A
// production deploy (ADMIN_TOKEN set) keeps binding to all interfaces because
// the auth chain is doing its job; a dev Mac (no ADMIN_TOKEN, MOLECULE_ENV=dev)
// is reachable only via loopback because the auth chain is fail-open. See
// molecule-core#7 for the original LAN exposure finding.
// NOTE (harden/no-fail-open-auth): this is a defense-in-depth default, NOT an
// auth lever. Auth is fail-closed in every environment now, so the loopback
// default no longer compensates for a weak auth chain — it simply keeps a dev
// box off the LAN by default. It is keyed on MOLECULE_ENV alone (decoupled
// from ADMIN_TOKEN), because dev now provisions an ADMIN_TOKEN yet should
// still default to loopback. See molecule-core#7 for the original LAN finding.
func resolveBindHost() string {
if v := os.Getenv("BIND_ADDR"); v != "" {
return v
}
if middleware.IsDevModeFailOpen() {
if middleware.IsLocalDevEnv() {
return "127.0.0.1"
}
return ""
@@ -68,6 +68,10 @@ func TestPeers_CrossTenant_OrgRootNotLeaked(t *testing.T) {
caller := "org-a-root" // parent_id IS NULL — an org root for tenant A
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
// (Unordered match is set above, so this can be consumed at any point.)
seedDiscoveryGrandfather(mock, caller)
// parent_id lookup → NULL (caller is an org root)
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs(caller).
@@ -128,6 +132,9 @@ func TestPeers_SameOrg_SiblingsStillWork(t *testing.T) {
caller := "org-a-child-1"
parent := "org-a-root"
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
seedDiscoveryGrandfather(mock, caller)
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(parent))
+20 -15
View File
@@ -422,28 +422,33 @@ func (h *DiscoveryHandler) CheckAccess(c *gin.Context) {
// workspaces with tokens must present a matching Bearer, token binding
// is strict (A's token cannot authenticate caller B).
//
// Fail-open on DB hiccups. Unlike secrets.Values (which returns plaintext
// secrets and must fail closed), discovery only exposes peer URLs that
// are already behind the existing `CanCommunicate` hierarchy check — a
// momentary DB outage shouldn't take agent-to-agent discovery offline.
// (harden/no-fail-open-auth) Fails CLOSED on DB error. This used to return nil
// (allow) on a HasAnyLiveToken hiccup "because discovery only exposes peer URLs
// already behind CanCommunicate" — but the CTO "nothing fail-open" directive is
// absolute, and a request must never gain access because the auth datastore is
// unreachable. A datastore error now writes 503 (availability tradeoff that
// grants NO access) and returns a non-nil error; the caller already does
// `if err != nil { return }` so the 503 body is what the client sees.
func validateDiscoveryCaller(ctx context.Context, c *gin.Context, workspaceID string) error {
hasLive, err := wsauth.HasAnyLiveToken(ctx, db.DB, workspaceID)
if err != nil {
log.Printf("wsauth: discovery HasAnyLiveToken(%s) failed: %v — allowing request", workspaceID, err)
return nil
log.Printf("wsauth: discovery HasAnyLiveToken(%s): datastore lookup failed (returning 503): %v", workspaceID, err)
c.JSON(http.StatusServiceUnavailable, gin.H{
"error": "platform datastore unavailable — retry shortly",
"code": "platform_unavailable",
})
return errors.New("auth datastore unavailable")
}
if !hasLive {
return nil // legacy / pre-upgrade
}
// Tier-1b dev-mode hatch — same escape hatch AdminAuth and
// WorkspaceAuth apply on a local Docker setup. Without this, the
// canvas Details tab can never load peers for a workspace that has
// registered its live token, producing the 401 the user sees.
// Gated by MOLECULE_ENV=development + empty ADMIN_TOKEN, so SaaS
// production stays strict.
if middleware.IsDevModeFailOpen() {
return nil
}
// (harden/no-fail-open-auth) The former dev-mode escape hatch that
// returned nil (allow) here when MOLECULE_ENV=dev + ADMIN_TOKEN unset
// has been REMOVED. Discovery callers must present a verified CP
// session or a valid bearer in every environment. Local dev now
// authenticates the Canvas with a provisioned ADMIN_TOKEN /
// NEXT_PUBLIC_ADMIN_TOKEN (see scripts/dev-start.sh), so the Details
// tab loads peers with a real credential rather than via fail-open.
// Try session cookie auth first (SaaS canvas path).
// verifiedCPSession returns (valid, presented):
@@ -49,6 +49,10 @@ func TestDiscover_WorkspaceNotFound_WithCaller(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first;
// grandfather (count=0) so the bearer-less request is allowed through.
seedDiscoveryGrandfather(mock, "ws-caller")
// CanCommunicate will need DB lookups — both workspace name lookups
// For the access check: caller lookup succeeds, target lookup fails
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
@@ -113,6 +117,9 @@ func TestPeers_WithParent(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
seedDiscoveryGrandfather(mock, "ws-sibling-1")
// Expect parent_id lookup for the requesting workspace
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs("ws-sibling-1").
@@ -165,6 +172,9 @@ func TestPeers_NotFound(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
seedDiscoveryGrandfather(mock, "ws-ghost")
// Workspace not found
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs("ws-ghost").
@@ -191,6 +201,11 @@ func TestPeers_DBError(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// Auth probe grandfathers; this test targets a DB error on the
// *handler-body* parent_id query → 500 (distinct from the auth-probe
// DB error which now fails closed with 503).
seedDiscoveryGrandfather(mock, "ws-dberr")
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs("ws-dberr").
WillReturnError(sql.ErrConnDone)
@@ -216,6 +231,9 @@ func TestPeers_RootWorkspace_NoPeers(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
seedDiscoveryGrandfather(mock, "ws-root-alone")
// Root workspace (parent_id is NULL)
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs("ws-root-alone").
@@ -270,6 +288,9 @@ func peersFilterFixture(t *testing.T) (*DiscoveryHandler, sqlmock.Sqlmock) {
mock := setupTestDB(t)
setupTestRedis(t)
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
seedDiscoveryGrandfather(mock, "ws-self")
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs("ws-self").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow("ws-pm"))
@@ -927,13 +948,14 @@ func TestDiscoverHostPeer_Smoke_Success(t *testing.T) {
}
}
// ==================== Peers auth — dev-mode fail-open gate ====================
// ==================== Peers auth — fail-CLOSED gate ====================
//
// validateDiscoveryCaller applies a Tier-1b dev-mode hatch so the canvas
// user session (which holds no workspace-scoped bearer) can still load
// the Details → PEERS list on a local Docker setup. The gate must pass
// ONLY when MOLECULE_ENV is development AND ADMIN_TOKEN is empty.
// These tests pin that contract against accidental polarity flips.
// (harden/no-fail-open-auth) validateDiscoveryCaller USED to apply a
// Tier-1b dev-mode hatch that let the bearer-less canvas session load the
// Details → PEERS list when MOLECULE_ENV=development AND ADMIN_TOKEN empty.
// That hatch has been REMOVED — discovery callers must present a verified
// CP session or a valid bearer in every environment. These tests pin the
// fail-closed contract against accidental re-introduction.
// peersAuthFixtureHasLiveToken seeds the mock rows required for the
// Peers handler to reach the auth branch: HasAnyLiveToken → true (a
@@ -946,10 +968,30 @@ func peersAuthFixtureHasLiveToken(mock sqlmock.Sqlmock, workspaceID string) {
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
}
func TestPeers_DevModeFailOpen_AllowsBearerlessRequest(t *testing.T) {
// Dev mode: MOLECULE_ENV=development AND ADMIN_TOKEN empty. Canvas
// sends no bearer token; validateDiscoveryCaller must return nil
// (allow) and the handler must proceed to return the peer list.
// seedDiscoveryGrandfather seeds the FIRST query validateDiscoveryCaller
// issues (HasAnyLiveToken → 0 = legacy / pre-upgrade) so a bearer-less
// discovery request grandfathers through and the test can exercise the
// handler body.
//
// (harden/no-fail-open-auth) Before this branch, validateDiscoveryCaller
// returned nil (allow) when the HasAnyLiveToken probe ERRORED — so these
// handler-body tests never had to seed the probe at all; the unmatched
// COUNT query erred and the fail-open swallowed it. Now that the DB-error
// path fails CLOSED (503), the probe must be seeded explicitly. count=0 is
// the legitimate grandfather path (no live tokens for this workspace yet),
// which is what these pre-existing tests intend.
func seedDiscoveryGrandfather(mock sqlmock.Sqlmock, workspaceID string) {
mock.ExpectQuery("SELECT COUNT.+workspace_auth_tokens").
WithArgs(workspaceID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
}
func TestPeers_DevMode_BearerlessRequest_FailsClosed(t *testing.T) {
// (harden/no-fail-open-auth) Exact old-hatch conditions:
// MOLECULE_ENV=development AND ADMIN_TOKEN empty, with a live token in
// the DB. The bearer-less canvas-style request must now 401 — the
// dev-mode hatch that returned nil (allow) here is gone. Local dev
// authenticates via a provisioned ADMIN_TOKEN (scripts/dev-start.sh).
t.Setenv("MOLECULE_ENV", "development")
t.Setenv("ADMIN_TOKEN", "")
@@ -957,22 +999,10 @@ func TestPeers_DevModeFailOpen_AllowsBearerlessRequest(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// Only the HasAnyLiveToken probe runs; auth 401s before the peer
// queries, so no further expectations are seeded.
peersAuthFixtureHasLiveToken(mock, "ws-dev")
// Root workspace → children+parent queries still fire but the
// parent_id lookup comes first.
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs("ws-dev").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
peerCols := []string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}
mock.ExpectQuery("SELECT w.id.+WHERE w.parent_id IS NULL AND w.id").
WithArgs("ws-dev").
WillReturnRows(sqlmock.NewRows(peerCols))
// #383 — children query gained explicit `w.id != $2` self-filter.
mock.ExpectQuery("SELECT w.id.+WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs("ws-dev", "ws-dev").
WillReturnRows(sqlmock.NewRows(peerCols))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-dev"}}
@@ -980,8 +1010,8 @@ func TestPeers_DevModeFailOpen_AllowsBearerlessRequest(t *testing.T) {
handler.Peers(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200 under dev-mode hatch, got %d: %s", w.Code, w.Body.String())
if w.Code != http.StatusUnauthorized {
t.Fatalf("expected 401 (fail-closed) under old dev-mode hatch conditions, got %d: %s", w.Code, w.Body.String())
}
}
@@ -1034,6 +1064,70 @@ func TestPeers_DevModeFailOpen_ClosedInProduction(t *testing.T) {
}
}
// TestPeers_AuthProbeDBError_FailsClosed pins the removal of
// validateDiscoveryCaller's fail-open-on-DB-error branch
// (harden/no-fail-open-auth). When the HasAnyLiveToken auth probe ERRORS, the
// request must NOT be allowed through — it now returns 503 (availability
// tradeoff that grants NO access). Before this branch the function returned nil
// (allow) on a DB hiccup, so the request reached the peer queries.
//
// Watch-it-fail: restore `if err != nil { log; return nil }` in
// validateDiscoveryCaller → this flips 503→(200/handler path) and fails.
func TestPeers_AuthProbeDBError_FailsClosed(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewDiscoveryHandler()
// The FIRST query validateDiscoveryCaller issues (HasAnyLiveToken) errors.
// No further expectations: a fail-closed 503 must be written before the
// peer-list queries run.
mock.ExpectQuery("SELECT COUNT.+workspace_auth_tokens").
WithArgs("ws-dberr-auth").
WillReturnError(sql.ErrConnDone)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-dberr-auth"}}
c.Request = httptest.NewRequest("GET", "/registry/ws-dberr-auth/peers", nil)
handler.Peers(c)
if w.Code != http.StatusServiceUnavailable {
t.Fatalf("auth-probe DB error must fail CLOSED: expected 503, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestDiscover_AuthProbeDBError_FailsClosed is the Discover-endpoint companion
// to TestPeers_AuthProbeDBError_FailsClosed: a HasAnyLiveToken error on the
// caller's discovery request fails CLOSED with 503 (was: fail-open allow).
func TestDiscover_AuthProbeDBError_FailsClosed(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewDiscoveryHandler()
mock.ExpectQuery("SELECT COUNT.+workspace_auth_tokens").
WithArgs("ws-caller").
WillReturnError(sql.ErrConnDone)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-target"}}
c.Request = httptest.NewRequest("GET", "/registry/discover/ws-target", nil)
c.Request.Header.Set("X-Workspace-ID", "ws-caller")
handler.Discover(c)
if w.Code != http.StatusServiceUnavailable {
t.Fatalf("Discover auth-probe DB error must fail CLOSED: expected 503, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// ==================== Peers — #383 self never appears in result ====================
// TestPeers_ExcludeSelf_DefenseInDepth verifies the final-line filter in
@@ -1056,6 +1150,9 @@ func TestPeers_ExcludeSelf_DefenseInDepth(t *testing.T) {
const selfID = "ws-xiaodong"
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
seedDiscoveryGrandfather(mock, selfID)
// parent_id lookup — workspace has a parent.
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs(selfID).
@@ -551,6 +551,9 @@ func TestDiscover_AccessDenied(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first; grandfather.
seedDiscoveryGrandfather(mock, "ws-child-a")
// CanCommunicate: different parents → denied
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
WithArgs("ws-child-a").
@@ -582,6 +585,9 @@ func TestDiscover_TargetOffline(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first; grandfather.
seedDiscoveryGrandfather(mock, "ws-caller")
// Share a parent so communication is allowed under post-#1955 rules
sharedParent := "ws-parent"
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
@@ -373,6 +373,9 @@ func TestExtended_DiscoverWithCallerID(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first; grandfather.
seedDiscoveryGrandfather(mock, "ws-caller")
// CanCommunicate needs to look up both workspaces
// Share a parent so communication is allowed under post-#1955 rules
sharedParent := "ws-parent"
@@ -464,6 +467,9 @@ func TestExtended_Peers(t *testing.T) {
setupTestRedis(t)
handler := NewDiscoveryHandler()
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
seedDiscoveryGrandfather(mock, "ws-peer")
// Expect parent_id lookup for requesting workspace (root-level, no parent)
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs("ws-peer").
@@ -89,13 +89,16 @@ func TestSecurity_GetTemplates_NoAuth_Returns401(t *testing.T) {
}
}
// TestSecurity_GetTemplates_FreshInstall_FailsOpen verifies that GET /templates
// still succeeds on a fresh install (zero enrolled workspaces → AdminAuth fail-open).
// This is the regression check: the auth gate must not break new deployments.
func TestSecurity_GetTemplates_FreshInstall_FailsOpen(t *testing.T) {
// TestSecurity_GetTemplates_FreshInstall_FailsClosed pins the post-hardening
// contract (harden/no-fail-open-auth): GET /templates on a fresh install (zero
// enrolled workspaces, no ADMIN_TOKEN) now 401s with no bearer. The former
// AdminAuth Tier-1 lazy-bootstrap fail-open (fresh install ⇒ 200) is gone — a
// new deployment must provision ADMIN_TOKEN (dev does so via dev-start.sh).
func TestSecurity_GetTemplates_FreshInstall_FailsClosed(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
t.Setenv("ADMIN_TOKEN", "")
t.Setenv("MOLECULE_ENV", "")
authDB, authMock := newFreshInstallAuthDB(t)
tmpDir := t.TempDir()
@@ -108,8 +111,8 @@ func TestSecurity_GetTemplates_FreshInstall_FailsOpen(t *testing.T) {
req, _ := http.NewRequest(http.MethodGet, "/templates", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("#686 GET /templates fresh-install: want 200 (fail-open), got %d body=%s", w.Code, w.Body.String())
if w.Code != http.StatusUnauthorized {
t.Errorf("#686 GET /templates fresh-install fail-closed: want 401, got %d body=%s", w.Code, w.Body.String())
}
if err := authMock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet auth mock expectations: %v", err)
@@ -148,12 +151,14 @@ func TestSecurity_GetOrgTemplates_NoAuth_Returns401(t *testing.T) {
}
}
// TestSecurity_GetOrgTemplates_FreshInstall_FailsOpen mirrors the /templates
// regression check for /org/templates — fresh installs must still work.
func TestSecurity_GetOrgTemplates_FreshInstall_FailsOpen(t *testing.T) {
// TestSecurity_GetOrgTemplates_FreshInstall_FailsClosed mirrors the /templates
// fail-closed check for /org/templates (harden/no-fail-open-auth): a fresh
// install with no bearer / no ADMIN_TOKEN now 401s rather than fail-open.
func TestSecurity_GetOrgTemplates_FreshInstall_FailsClosed(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
t.Setenv("ADMIN_TOKEN", "")
t.Setenv("MOLECULE_ENV", "")
authDB, authMock := newFreshInstallAuthDB(t)
tmpDir := t.TempDir()
@@ -167,8 +172,8 @@ func TestSecurity_GetOrgTemplates_FreshInstall_FailsOpen(t *testing.T) {
req, _ := http.NewRequest(http.MethodGet, "/org/templates", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("#686 GET /org/templates fresh-install: want 200 (fail-open), got %d body=%s", w.Code, w.Body.String())
if w.Code != http.StatusUnauthorized {
t.Errorf("#686 GET /org/templates fresh-install fail-closed: want 401, got %d body=%s", w.Code, w.Body.String())
}
if err := authMock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet auth mock expectations: %v", err)
+34 -42
View File
@@ -5,61 +5,53 @@ import (
"strings"
)
// Dev-mode escape hatch — factored out of AdminAuth + WorkspaceAuth so a
// future third caller (or a change to what "dev mode" means) touches one
// place. Narrowing the exposed seam also makes it grep-able from security
// reviews: every `isDevModeFailOpen()` call is an intentional fail-open.
// Local-dev environment detection.
//
// Why the helper exists at all: on `go run ./cmd/server` the Canvas (at
// localhost:3000) calls the platform (at localhost:8080) cross-port. Both
// `isSameOriginCanvas` (Referer==Host) and the AdminAuth Tier-1 fail-open
// (no tokens in DB) close the moment the user creates their first
// workspace. Without this hatch the Canvas 401s on every /workspaces
// enumeration and every /workspaces/:id/* read until the operator sets
// `ADMIN_TOKEN` and rebuilds the Canvas bundle with a matching
// `NEXT_PUBLIC_ADMIN_TOKEN`. That's too much friction for a local smoke
// test — hence the hatch.
// SECURITY (harden/no-fail-open-auth): this file used to export an auth
// escape hatch — `isDevModeFailOpen()` — that let AdminAuth, WorkspaceAuth,
// and the discovery handler serve admin/workspace-protected endpoints with
// NO bearer token whenever `ADMIN_TOKEN` was unset and `MOLECULE_ENV` was a
// dev value. The CTO directive is "nothing should be fail-open": auth is now
// fail-CLOSED in every environment, dev included. The hatch is GONE.
//
// Why it's safe for SaaS: hosted tenants are provisioned with both
// `ADMIN_TOKEN` (a random secret, checked by Tier-2 above) and
// `MOLECULE_ENV=production`. Either one being set makes this helper
// return false, so the fail-open branch is unreachable in production.
// Real token minting goes through AdminAuth, so local development keeps a
// narrow fail-open mode for browser/API smoke tests without an admin secret.
// What remains here is a NON-security predicate, `isLocalDevEnv()`, that
// reports ONLY whether `MOLECULE_ENV` names a local-dev environment. It does
// NOT consult `ADMIN_TOKEN` and it does NOT influence authentication. It is
// used for two convenience/defense-in-depth knobs that never grant access:
//
// - ratelimit.go: relax the per-caller request bucket on a single-user
// local stack (a DoS knob, not a credential — relaxing it cannot expose
// any protected data).
// - cmd/server resolveBindHost(): default the HTTP listener to loopback
// (127.0.0.1) in local dev. This is strictly *safer* than binding all
// interfaces and is unrelated to whether a request is authenticated.
//
// Local dev now stays AUTHENTICATED, not open: scripts/dev-start.sh
// provisions a deterministic `ADMIN_TOKEN` and hands the matching
// `NEXT_PUBLIC_ADMIN_TOKEN` to the Canvas, so the browser sends a real
// bearer. See scripts/dev-start.sh and canvas/src/lib/api.ts.
// devModeEnvValues is the set of MOLECULE_ENV values that count as
// "explicit dev mode". Production callers don't set any of these.
// "explicit local dev". Production callers don't set any of these.
// Case-insensitive compare via strings.ToLower below.
var devModeEnvValues = map[string]struct{}{
"development": {},
"dev": {},
}
// isDevModeFailOpen reports whether the AdminAuth / WorkspaceAuth
// middleware should let a bearer-less request through despite live
// workspace tokens existing in the DB.
//
// True only when BOTH:
// - `ADMIN_TOKEN` is empty (operator has not opted in to the #684
// closure), AND
// - `MOLECULE_ENV` is explicitly a dev value ("development" / "dev").
//
// Either condition failing returns false — that's the SaaS safety
// guarantee. Tests: `devmode_test.go` covers every branch.
func isDevModeFailOpen() bool {
if os.Getenv("ADMIN_TOKEN") != "" {
return false
}
// isLocalDevEnv reports whether MOLECULE_ENV names a local-dev environment
// ("development" / "dev"). It carries NO authentication semantics — callers
// must never use it to bypass a credential check. It exists only for
// dev-convenience / defense-in-depth knobs (rate-limit relaxation, loopback
// bind default) that cannot expose protected data.
func isLocalDevEnv() bool {
env := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_ENV")))
_, ok := devModeEnvValues[env]
return ok
}
// IsDevModeFailOpen exposes isDevModeFailOpen to packages outside the
// middleware module (handlers, discovery, etc.) so they can apply the
// same Tier-1b escape hatch their sibling AdminAuth / WorkspaceAuth
// already do. Keep every call site audit-tagged so security review can
// grep them.
func IsDevModeFailOpen() bool {
return isDevModeFailOpen()
// IsLocalDevEnv exposes isLocalDevEnv to packages outside the middleware
// module (cmd/server bind-host default). NON-security: see isLocalDevEnv.
func IsLocalDevEnv() bool {
return isLocalDevEnv()
}
@@ -4,74 +4,66 @@ import (
"testing"
)
// Unit tests for the isDevModeFailOpen predicate. The AdminAuth and
// WorkspaceAuth middleware tests exercise the same helper indirectly via
// HTTP, but a direct predicate test locks the pure-logic behaviour:
// future callers can add themselves to `devmode.go` with confidence.
// Unit tests for the isLocalDevEnv predicate.
//
// (harden/no-fail-open-auth) This predicate replaced the old
// isDevModeFailOpen() auth escape hatch. It carries NO authentication
// semantics and does NOT consult ADMIN_TOKEN — it reports ONLY whether
// MOLECULE_ENV names a local-dev environment. It gates non-security knobs
// (rate-limit relaxation, loopback bind default). The fail-CLOSED auth
// behaviour is enforced by no_fail_open_test.go.
func TestIsDevModeFailOpen_DevModeNoAdminToken_True(t *testing.T) {
func TestIsLocalDevEnv_Development_True(t *testing.T) {
t.Setenv("MOLECULE_ENV", "development")
t.Setenv("ADMIN_TOKEN", "")
if !isDevModeFailOpen() {
t.Error("expected dev mode + no admin token to return true")
if !isLocalDevEnv() {
t.Error("expected MOLECULE_ENV=development to be local dev")
}
}
func TestIsDevModeFailOpen_DevModeShortAlias_True(t *testing.T) {
// "dev" is a valid alias for "development".
func TestIsLocalDevEnv_ShortAlias_True(t *testing.T) {
t.Setenv("MOLECULE_ENV", "dev")
t.Setenv("ADMIN_TOKEN", "")
if !isDevModeFailOpen() {
t.Error("expected MOLECULE_ENV=dev to be treated as dev mode")
if !isLocalDevEnv() {
t.Error("expected MOLECULE_ENV=dev to be treated as local dev")
}
}
func TestIsDevModeFailOpen_AdminTokenSet_False(t *testing.T) {
// Setting ADMIN_TOKEN is the operator's explicit opt-in to the #684
// closure. Dev mode must NOT silently override that signal.
func TestIsLocalDevEnv_IgnoresAdminToken(t *testing.T) {
// Decoupled from ADMIN_TOKEN: dev now provisions one, but the bind /
// rate-limit knobs still treat the env as local dev. Crucially this
// predicate grants no access, so the coupling no longer matters.
t.Setenv("MOLECULE_ENV", "development")
t.Setenv("ADMIN_TOKEN", "operator-explicitly-set-this")
if isDevModeFailOpen() {
t.Error("explicit ADMIN_TOKEN must suppress the dev-mode hatch")
t.Setenv("ADMIN_TOKEN", "operator-set-this")
if !isLocalDevEnv() {
t.Error("ADMIN_TOKEN must not affect isLocalDevEnv (env-only predicate)")
}
}
func TestIsDevModeFailOpen_Production_False(t *testing.T) {
// The SaaS-safety guarantee: production tenants always have
// MOLECULE_ENV=production, so the hatch is unreachable even if a
// misconfigured deployment also leaves ADMIN_TOKEN unset.
func TestIsLocalDevEnv_Production_False(t *testing.T) {
t.Setenv("MOLECULE_ENV", "production")
t.Setenv("ADMIN_TOKEN", "")
if isDevModeFailOpen() {
t.Error("production must never hit the dev-mode fail-open branch")
if isLocalDevEnv() {
t.Error("production must not count as local dev")
}
}
func TestIsDevModeFailOpen_CaseInsensitive(t *testing.T) {
// Operators shouldn't have to remember exact casing for a dev-only
// convenience. "Development", "DEV", " dev " all count.
func TestIsLocalDevEnv_CaseInsensitive(t *testing.T) {
cases := []string{"Development", "DEVELOPMENT", "Dev", "DEV", " dev "}
for _, env := range cases {
t.Run(env, func(t *testing.T) {
t.Setenv("MOLECULE_ENV", env)
t.Setenv("ADMIN_TOKEN", "")
if !isDevModeFailOpen() {
t.Errorf("MOLECULE_ENV=%q should count as dev mode", env)
if !isLocalDevEnv() {
t.Errorf("MOLECULE_ENV=%q should count as local dev", env)
}
})
}
}
func TestIsDevModeFailOpen_UnknownEnv_False(t *testing.T) {
// Arbitrary / unset MOLECULE_ENV values are NOT treated as dev mode.
// Keeps the fail-open branch narrow — no silent opt-in from a typo.
func TestIsLocalDevEnv_UnknownEnv_False(t *testing.T) {
cases := []string{"", "staging", "local", "preview", "test", "devel"}
for _, env := range cases {
t.Run(env, func(t *testing.T) {
t.Setenv("MOLECULE_ENV", env)
t.Setenv("ADMIN_TOKEN", "")
if isDevModeFailOpen() {
t.Errorf("MOLECULE_ENV=%q must not enable fail-open", env)
if isLocalDevEnv() {
t.Errorf("MOLECULE_ENV=%q must not count as local dev", env)
}
})
}
@@ -0,0 +1,245 @@
package middleware
import (
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// no_fail_open_test.go is the regression gate for the CTO directive
// "nothing should be fail-open" (branch harden/no-fail-open-auth).
//
// It asserts that AdminAuth and WorkspaceAuth fail CLOSED (401) under the
// EXACT conditions that used to trigger the removed dev-mode fail-open hatch:
// - ADMIN_TOKEN unset, AND
// - MOLECULE_ENV is a dev value ("development" / "dev"), AND
// - any HasAnyLiveTokenGlobal state (0 = fresh install, 1 = post-workspace).
//
// To prove this is RED against the old behaviour: temporarily restore the
// `if isDevModeFailOpen() { c.Next(); return }` short-circuit in
// wsauth_middleware.go (and the Tier-1 `if adminSecret == "" { c.Next() }`
// branch) — every sub-case below flips from 401 to 200 and fails. After the
// hardening, all sub-cases are 401.
// failOpenConditions enumerates the (MOLECULE_ENV, hasLiveTokens) combinations
// that the removed hatch keyed on. ADMIN_TOKEN is always unset here — that was
// a precondition of the old fail-open.
var failOpenConditions = []struct {
name string
molEnv string
liveCount int
}{
{"dev_alias_fresh_install", "dev", 0},
{"dev_alias_post_workspace", "dev", 1},
{"development_fresh_install", "development", 0},
{"development_post_workspace", "development", 1},
}
func TestAdminAuth_NoFailOpen_UnderOldHatchConditions(t *testing.T) {
for _, tc := range failOpenConditions {
t.Run(tc.name, func(t *testing.T) {
t.Setenv("ADMIN_TOKEN", "")
t.Setenv("MOLECULE_ENV", tc.molEnv)
// Ensure no CP-session path can accidentally pass.
t.Setenv("CP_UPSTREAM_URL", "")
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
defer mockDB.Close()
// AdminAuth always probes HasAnyLiveTokenGlobal (for the 503-on-
// outage semantics), so it must be expected for both counts.
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(tc.liveCount))
r := gin.New()
r.GET("/admin/secrets", AdminAuth(mockDB), func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"ok": true})
})
w := httptest.NewRecorder()
req, _ := http.NewRequest(http.MethodGet, "/admin/secrets", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("AdminAuth must fail CLOSED under old hatch conditions "+
"(MOLECULE_ENV=%q, ADMIN_TOKEN unset, liveTokens=%d): expected 401, got %d: %s",
tc.molEnv, tc.liveCount, w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
})
}
}
func TestWorkspaceAuth_NoFailOpen_UnderOldHatchConditions(t *testing.T) {
for _, tc := range failOpenConditions {
t.Run(tc.name, func(t *testing.T) {
t.Setenv("ADMIN_TOKEN", "")
t.Setenv("MOLECULE_ENV", tc.molEnv)
t.Setenv("CP_UPSTREAM_URL", "")
// WorkspaceAuth 401s before any DB lookup when there is no
// bearer / cookie, so no queries are expected regardless of
// the nominal live-token count.
mockDB, _, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
defer mockDB.Close()
r := gin.New()
r.GET("/workspaces/:id/activity", WorkspaceAuth(mockDB), func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"ok": true})
})
w := httptest.NewRecorder()
req, _ := http.NewRequest(http.MethodGet,
"/workspaces/00000000-0000-0000-0000-000000000000/activity", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("WorkspaceAuth must fail CLOSED under old hatch conditions "+
"(MOLECULE_ENV=%q, ADMIN_TOKEN unset): expected 401, got %d: %s",
tc.molEnv, w.Code, w.Body.String())
}
})
}
}
// TestCanvasOrBearer_NoFailOpen_UnderOldHatchConditions is the regression gate
// for the two fail-open branches removed from CanvasOrBearer
// (harden/no-fail-open-auth, "nothing fail-open" pass 2):
//
// (a) lazy-bootstrap pass: `if !hasLive { c.Next(); return }` — a zero-token
// install used to pass EVERYTHING through. Now a bearer-less request on a
// fresh install (HasAnyLiveTokenGlobal → 0) fails CLOSED with 401.
// (b) fail-open-on-DB-error: `if err != nil { log; c.Next(); return }` — a
// HasAnyLiveTokenGlobal error used to ALLOW. Now it fails CLOSED with 503.
//
// Watch-it-fail: restore either short-circuit in CanvasOrBearer and the
// matching sub-case flips (401→200 / 503→200) and fails.
func TestCanvasOrBearer_NoFailOpen_UnderOldHatchConditions(t *testing.T) {
// (a) Fresh install (0 live tokens), no bearer, no ADMIN_TOKEN → 401.
t.Run("zero_token_install_no_bearer_fails_closed_401", func(t *testing.T) {
t.Setenv("ADMIN_TOKEN", "")
t.Setenv("CORS_ORIGINS", "")
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
defer mockDB.Close()
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
handlerCalled := false
r := gin.New()
r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
handlerCalled = true
c.JSON(http.StatusOK, gin.H{"ok": true})
})
w := httptest.NewRecorder()
req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("CanvasOrBearer lazy-bootstrap fail-open removed: zero-token install must 401, got %d: %s",
w.Code, w.Body.String())
}
if handlerCalled {
t.Error("handler reached on a fresh-install bearer-less request — lazy-bootstrap fail-open not removed")
}
})
// (b) Auth datastore error → 503 (NOT allow).
t.Run("db_error_fails_closed_503", func(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
defer mockDB.Close()
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnError(http.ErrAbortHandler) // any non-nil error suffices
handlerCalled := false
r := gin.New()
r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
handlerCalled = true
c.JSON(http.StatusOK, gin.H{"ok": true})
})
w := httptest.NewRecorder()
req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusServiceUnavailable {
t.Errorf("CanvasOrBearer DB-error fail-open removed: must 503, got %d: %s", w.Code, w.Body.String())
}
if handlerCalled {
t.Error("handler reached on a datastore-error request — DB-error fail-open not removed")
}
})
}
// TestNoFailOpenAuthHelperReexists is a source-guard: it asserts that no
// fail-open auth helper (the removed isDevModeFailOpen / IsDevModeFailOpen)
// has crept back into the middleware package as real code. The replacement
// predicate is the NON-security isLocalDevEnv (bind / rate-limit only);
// re-introducing the old fail-open identifier as a declaration or call is a
// regression of the CTO directive.
//
// It matches the *invocation/declaration* form `isDevModeFailOpen(` (which
// only appears in live code) and deliberately ignores prose mentions in
// `//` comments, so the historical references kept in doc comments don't
// trip the guard.
func TestNoFailOpenAuthHelperReexists(t *testing.T) {
forbidden := []string{"isDevModeFailOpen(", "IsDevModeFailOpen("}
entries, err := os.ReadDir(".")
if err != nil {
t.Fatalf("ReadDir: %v", err)
}
for _, e := range entries {
name := e.Name()
if e.IsDir() || !strings.HasSuffix(name, ".go") {
continue
}
// Skip this guard file itself (it names the forbidden tokens on
// purpose, including inside a comment).
if name == "no_fail_open_test.go" {
continue
}
data, err := os.ReadFile(filepath.Clean(name))
if err != nil {
t.Fatalf("ReadFile %s: %v", name, err)
}
for i, line := range strings.Split(string(data), "\n") {
// Ignore single-line comments — historical mentions live there.
code := line
if idx := strings.Index(code, "//"); idx >= 0 {
code = code[:idx]
}
for _, f := range forbidden {
if strings.Contains(code, f) {
t.Errorf("%s:%d uses forbidden fail-open auth helper %q — "+
"the dev-mode fail-open hatch must stay removed (harden/no-fail-open-auth). "+
"Use isLocalDevEnv (NON-security) for dev-only knobs instead.",
name, i+1, strings.TrimSuffix(f, "("))
}
}
}
}
}
@@ -102,15 +102,16 @@ func (rl *RateLimiter) keyFor(c *gin.Context) string {
// the priority list and rationale.
func (rl *RateLimiter) Middleware() gin.HandlerFunc {
return func(c *gin.Context) {
// Tier-1b dev-mode hatch — same gate as AdminAuth / WorkspaceAuth /
// discovery. On a local single-user Docker setup the 600-req/min
// bucket fills fast: a 15-workspace canvas + activity polling +
// approvals polling + A2A overlay + initial hydration all land in
// one bucket (whichever keyFor returns — typically the dev user's
// IP or shared admin token), so a minute of active use can trip
// 429 and blank the page. Gated by MOLECULE_ENV=development +
// empty ADMIN_TOKEN so SaaS production keeps the bucket.
if isDevModeFailOpen() {
// Local-dev rate-limit relaxation (NON-security; see devmode.go).
// On a local single-user stack the 600-req/min bucket fills fast:
// a 15-workspace canvas + activity polling + approvals polling +
// A2A overlay + initial hydration all land in one bucket, so a
// minute of active use can trip 429 and blank the page. This only
// relaxes a DoS knob — it grants no access and is unrelated to
// authentication (auth is fail-closed in every env). Gated solely
// by MOLECULE_ENV=dev/development so SaaS production keeps the
// bucket. Decoupled from ADMIN_TOKEN (dev now provisions one).
if isLocalDevEnv() {
c.Header("X-RateLimit-Limit", "unlimited")
c.Next()
return
@@ -120,12 +120,12 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc {
return
}
}
// Local-dev escape hatch — see devmode.go. Unreachable on SaaS
// (hosted tenants always have ADMIN_TOKEN + MOLECULE_ENV=production).
if isDevModeFailOpen() {
c.Next()
return
}
// No bearer, no verified CP session: fail CLOSED in EVERY
// environment (harden/no-fail-open-auth). The old local-dev
// escape hatch that let bearer-less requests through when
// ADMIN_TOKEN was unset + MOLECULE_ENV=dev has been removed —
// local dev now authenticates with a provisioned ADMIN_TOKEN
// (see scripts/dev-start.sh).
c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "missing workspace auth token"})
}
}
@@ -133,11 +133,18 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc {
// AdminAuth returns a Gin middleware for global/admin routes (e.g.
// /settings/secrets, /admin/secrets) that have no per-workspace scope.
//
// FAIL-CLOSED in every environment (harden/no-fail-open-auth): there is no
// bearer-less path through this middleware. A request reaches the handler
// ONLY by presenting a valid credential (verified CP session cookie, org
// token, ADMIN_TOKEN, or — deprecated — a live workspace token). The former
// "Tier-1 lazy-bootstrap fail-open" (no live tokens + no ADMIN_TOKEN ⇒ pass)
// has been removed: it let an attacker pre-empt the first user by POSTing
// /org/import before any token was minted (C4 SaaS-launch finding). A fresh
// install must set ADMIN_TOKEN to reach admin routes.
//
// # Credential tier (evaluated in order)
//
// 1. Lazy-bootstrap fail-open: if no live workspace token exists anywhere on
// the platform (fresh install / pre-Phase-30 upgrade), every request passes
// through so existing deployments keep working.
// 1. Verified CP session cookie (SaaS canvas) — upstream-confirmed.
//
// 2. ADMIN_TOKEN env var (recommended, closes #684): when set, the bearer
// MUST equal this value exactly (constant-time comparison). Workspace
@@ -163,33 +170,17 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
ctx := c.Request.Context()
adminSecret := os.Getenv("ADMIN_TOKEN")
hasLive, err := wsauth.HasAnyLiveTokenGlobal(ctx, database)
if err != nil {
// (harden/no-fail-open-auth) Both former fail-open branches have
// been REMOVED here:
// - Tier-1 lazy-bootstrap (no live tokens + no ADMIN_TOKEN ⇒ pass)
// - Tier-1b local-dev escape hatch (isDevModeFailOpen ⇒ pass)
// Admin auth is now fail-CLOSED in every environment. We still probe
// HasAnyLiveTokenGlobal so a datastore outage returns a structured
// 503 (not a silent pass), but its result no longer opens any path.
if _, err := wsauth.HasAnyLiveTokenGlobal(ctx, database); err != nil {
abortAuthLookupError(c, "AdminAuth: HasAnyLiveTokenGlobal", err)
return
}
if !hasLive {
// Tier 1: fail-open is ONLY safe when ADMIN_TOKEN is unset
// (self-hosted dev, pre-Phase-30 upgrade). Hosted SaaS always
// sets ADMIN_TOKEN at provision time, and C4 (SaaS-launch
// blocker) showed that without this guard an attacker can
// pre-empt the first user by POSTing /org/import before any
// token gets minted. When ADMIN_TOKEN is set we fall through
// into the same bearer-check path Tier-2 uses below.
if adminSecret == "" {
c.Next()
return
}
}
// Tier 1b: Local-dev escape hatch — see devmode.go. Lets the
// Canvas dashboard keep working after the first workspace token
// lands in the DB on `go run ./cmd/server`. Unreachable on SaaS
// (hosted tenants always have ADMIN_TOKEN + MOLECULE_ENV=production).
if isDevModeFailOpen() {
c.Next()
return
}
// SaaS-canvas path: when the request carries a WorkOS session
// cookie AND the CP confirms it's valid, accept without a
@@ -281,34 +272,46 @@ func cpSessionActor(cookieHeader string) string {
// Accepts either:
//
// 1. A valid bearer token (same contract as AdminAuth) — covers molecli,
// agent-to-platform calls, and anyone using the API directly.
// 2. A browser Origin header that matches CORS_ORIGINS (canvas itself).
// This is NOT a strict auth boundary — curl can forge Origin — but for
// cosmetic-only routes the trade-off is acceptable. Non-cosmetic routes
// MUST NOT use this middleware (see #194 review on why it would re-open
// #164 CRITICAL if applied to /bundles/import).
// agent-to-platform calls, the browser canvas (which now sends
// Authorization: Bearer $NEXT_PUBLIC_ADMIN_TOKEN on every platform
// call — see canvas/src/lib/api.ts platformAuthHeaders), and anyone
// using the API directly.
// 2. A same-origin canvas request (Referer/Host match), but ONLY when the
// combined-tenant canvas proxy is active (CANVAS_PROXY_URL set). This is
// a real same-origin check the browser cannot forge cross-origin (see
// isSameOriginCanvas / IsVerifiedCanvasSession, #623/#194) — NOT the
// trivially-forgeable cross-origin Origin header. The forgeable
// CORS_ORIGINS Origin-match path was REMOVED under the CTO
// "nothing fail-open" directive (a no-bearer request passing purely on a
// spoofable Origin is effectively open even for a cosmetic route, and is
// no longer needed now that the canvas always sends a bearer).
//
// Lazy-bootstrap fail-open preserved: zero-token installs pass everything
// through so fresh self-hosted / dev sessions aren't bricked.
// Non-cosmetic routes MUST NOT use this middleware (see #194 review on why it
// would re-open #164 CRITICAL if applied to /bundles/import).
//
// (harden/no-fail-open-auth) Two former fail-open branches are REMOVED:
// - DB-error on HasAnyLiveTokenGlobal used to `c.Next()` (allow); it now
// fails CLOSED with 503 (availability tradeoff that grants NO access).
// - The lazy-bootstrap pass (`!hasLive ⇒ c.Next()`) used to let a
// zero-token install through EVERYTHING; it is gone. Bootstrap is now via
// ADMIN_TOKEN (provisioned by scripts/dev-start.sh for local dev,
// operator/SaaS-set in production) — local mimics production.
func CanvasOrBearer(database *sql.DB) gin.HandlerFunc {
return func(c *gin.Context) {
ctx := c.Request.Context()
hasLive, err := wsauth.HasAnyLiveTokenGlobal(ctx, database)
if err != nil {
log.Printf("wsauth: CanvasOrBearer HasAnyLiveTokenGlobal failed: %v — allowing request", err)
c.Next()
return
}
if !hasLive {
c.Next()
// Probe global token state for the (no-bearer) same-origin path
// below. Fail CLOSED on a datastore error — an availability tradeoff
// that does NOT grant access (was: log + c.Next() fail-open).
if _, err := wsauth.HasAnyLiveTokenGlobal(ctx, database); err != nil {
abortAuthLookupError(c, "CanvasOrBearer: HasAnyLiveTokenGlobal", err)
return
}
// Path 1: bearer present → bearer MUST validate. Do not fall through
// to Origin on an invalid bearer — an attacker with a revoked /
// expired token + a matching Origin would otherwise bypass auth.
// Empty bearer → skip to Origin path (canvas never sends one).
// to the same-origin path on an invalid bearer — an attacker with a
// revoked / expired token would otherwise bypass auth.
// Empty bearer → fall to the same-origin canvas path.
if tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")); tok != "" {
// Admin token accepted for canvas dashboard
adminSecret := os.Getenv("ADMIN_TOKEN")
@@ -324,13 +327,10 @@ func CanvasOrBearer(database *sql.DB) gin.HandlerFunc {
return
}
// Path 2: canvas origin match (cross-origin canvas).
if canvasOriginAllowed(c.GetHeader("Origin")) {
c.Next()
return
}
// Path 3: same-origin canvas (tenant image).
// Path 2: same-origin canvas (combined-tenant image). Gated behind
// canvasProxyActive (CANVAS_PROXY_URL) and a non-forgeable
// Referer/Host same-origin check — NOT the spoofable cross-origin
// Origin header (that path was removed, see doc comment above).
if isSameOriginCanvas(c) {
c.Next()
return
@@ -340,30 +340,14 @@ func CanvasOrBearer(database *sql.DB) gin.HandlerFunc {
}
}
// canvasOriginAllowed returns true if origin matches any entry in the
// CORS_ORIGINS env var (comma-separated) or the localhost defaults.
// Exact-match only; no prefix or wildcard logic — that's handled by the
// real CORS middleware upstream. The intent here is "did this request come
// from the canvas page the user is already logged into?" — a binary check.
func canvasOriginAllowed(origin string) bool {
if origin == "" {
return false
}
allowed := []string{"http://localhost:3000", "http://localhost:3001"}
if v := os.Getenv("CORS_ORIGINS"); v != "" {
for _, o := range strings.Split(v, ",") {
if o = strings.TrimSpace(o); o != "" {
allowed = append(allowed, o)
}
}
}
for _, a := range allowed {
if a == origin {
return true
}
}
return false
}
// (harden/no-fail-open-auth) canvasOriginAllowed was REMOVED. It matched a
// request's (trivially forgeable, cross-origin) Origin header against
// CORS_ORIGINS and was the basis of CanvasOrBearer's no-bearer Origin-match
// pass — effectively open to any curl that sets a matching Origin. Under the
// CTO "nothing fail-open" directive that path is gone; the canvas now always
// sends a bearer (NEXT_PUBLIC_ADMIN_TOKEN), so nothing legitimate relied on it.
// The CORS *response-header* allowlist is handled by the real CORS middleware
// upstream, unaffected by this removal.
// isSameOriginCanvas returns true when the request appears to come from the
// canvas UI served by the same Go process (tenant image). In this topology,
@@ -143,11 +143,15 @@ func TestCanvasOrBearer_AdminTokenEnv_Passes(t *testing.T) {
}
}
// TestCanvasOrBearer_DBError_FailOpen pins the documented behavior on a
// HasAnyLiveTokenGlobal failure. The middleware logs and falls open so a
// flaky DB doesn't lock canvas users out of cosmetic routes. Hardcoded in
// the comment block; this is a reminder if anyone changes that semantic.
func TestCanvasOrBearer_DBError_FailOpen(t *testing.T) {
// TestCanvasOrBearer_DBError_FailsClosed pins the removal of the
// fail-open-on-DB-error branch (harden/no-fail-open-auth). A
// HasAnyLiveTokenGlobal failure used to log + c.Next() (allow); it now fails
// CLOSED with 503 — an availability tradeoff that grants NO access. The
// handler must NOT be reached.
//
// Watch-it-fail: restore `if err != nil { log; c.Next(); return }` in
// CanvasOrBearer → this flips 503→200 and fails.
func TestCanvasOrBearer_DBError_FailsClosed(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock: %v", err)
@@ -156,8 +160,10 @@ func TestCanvasOrBearer_DBError_FailOpen(t *testing.T) {
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnError(http.ErrAbortHandler) // any non-nil error suffices
handlerCalled := false
r := gin.New()
r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
handlerCalled = true
c.JSON(http.StatusOK, gin.H{"ok": true})
})
@@ -165,8 +171,11 @@ func TestCanvasOrBearer_DBError_FailOpen(t *testing.T) {
req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("DB error fail-open: got %d, want 200 (%s)", w.Code, w.Body.String())
if w.Code != http.StatusServiceUnavailable {
t.Errorf("DB error must fail CLOSED: got %d, want 503 (%s)", w.Code, w.Body.String())
}
if handlerCalled {
t.Error("handler reached on a datastore-error request — DB-error fail-open not removed")
}
}
@@ -339,15 +339,24 @@ func TestWorkspaceAuth_WrongWorkspace_Returns401(t *testing.T) {
// TestAdminAuth_FailOpen_NoTokensGlobally — C10/C11: on a fresh install (no
// live tokens anywhere) the middleware must let the request through so existing
// deployments keep working during the Phase-30 rollout.
func TestAdminAuth_FailOpen_NoTokensGlobally(t *testing.T) {
// TestAdminAuth_FreshInstallNoTokens_FailsClosed pins the post-hardening
// contract (harden/no-fail-open-auth): on a fresh install with NO live
// tokens anywhere AND no ADMIN_TOKEN, a bearer-less admin request now 401s.
// The former Tier-1 "lazy-bootstrap fail-open" (no tokens ⇒ 200) is GONE —
// it let an attacker pre-empt the first user via /org/import (C4). A fresh
// install must provision ADMIN_TOKEN to reach admin routes.
func TestAdminAuth_FreshInstallNoTokens_FailsClosed(t *testing.T) {
t.Setenv("ADMIN_TOKEN", "")
t.Setenv("MOLECULE_ENV", "")
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
defer mockDB.Close()
// HasAnyLiveTokenGlobal returns 0 — fresh install.
// HasAnyLiveTokenGlobal returns 0 — fresh install. We still probe it
// (so a DB outage yields a structured 503), but the result no longer
// opens any path.
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
@@ -360,8 +369,8 @@ func TestAdminAuth_FailOpen_NoTokensGlobally(t *testing.T) {
req, _ := http.NewRequest(http.MethodGet, "/admin/secrets", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("C10 fail-open (no global tokens): expected 200, got %d: %s", w.Code, w.Body.String())
if w.Code != http.StatusUnauthorized {
t.Errorf("fresh-install no-token fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
@@ -831,18 +840,23 @@ func TestAdminAuth_Issue180_ApprovalsListing_NoBearer_Returns401(t *testing.T) {
}
}
// TestAdminAuth_Issue180_ApprovalsListing_FailOpen_NoTokens documents the
// fail-open contract: on a fresh install (no tokens anywhere), the middleware
// must not block the canvas from polling /approvals/pending.
func TestAdminAuth_Issue180_ApprovalsListing_FailOpen_NoTokens(t *testing.T) {
// TestAdminAuth_Issue180_ApprovalsListing_FreshInstall_FailsClosed pins the
// post-hardening contract (harden/no-fail-open-auth): on a fresh install (no
// tokens anywhere, no ADMIN_TOKEN), the canvas polling /approvals/pending with
// no bearer now gets 401. The former #180 fail-open (200 on no-tokens) is gone
// — local dev now provisions an ADMIN_TOKEN and the canvas authenticates with
// it (scripts/dev-start.sh).
func TestAdminAuth_Issue180_ApprovalsListing_FreshInstall_FailsClosed(t *testing.T) {
t.Setenv("ADMIN_TOKEN", "")
t.Setenv("MOLECULE_ENV", "")
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
defer mockDB.Close()
// HasAnyLiveTokenGlobal returns 0 — fresh install, no tokens yet.
// HasAnyLiveTokenGlobal returns 0 — fresh install, no tokens yet. Probed
// for the 503-on-outage semantics, but it opens no path now.
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
@@ -855,24 +869,21 @@ func TestAdminAuth_Issue180_ApprovalsListing_FailOpen_NoTokens(t *testing.T) {
req, _ := http.NewRequest(http.MethodGet, "/approvals/pending", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("#180 fail-open (no tokens): expected 200, got %d: %s", w.Code, w.Body.String())
if w.Code != http.StatusUnauthorized {
t.Errorf("#180 fresh-install fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen documents the
// local-dev escape hatch on WorkspaceAuth. On `go run ./cmd/server` +
// `npm run dev`, Canvas at localhost:3000 calls the platform at
// localhost:8080 cross-port, so isSameOriginCanvas's Host==Referer
// check fails. Without this hatch the Canvas can't show per-workspace
// activity/delegations.
//
// SaaS never fires this branch because tenant provisioning sets both
// MOLECULE_ENV=production and ADMIN_TOKEN.
func TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen(t *testing.T) {
// TestWorkspaceAuth_DevMode_NoBearer_FailsClosed pins the post-hardening
// contract (harden/no-fail-open-auth): the former local-dev escape hatch on
// WorkspaceAuth — which let a bearer-less request through when
// MOLECULE_ENV=dev + ADMIN_TOKEN unset — is GONE. Under exactly those
// conditions the request now 401s. Local dev authenticates with a
// provisioned ADMIN_TOKEN handed to the Canvas (scripts/dev-start.sh).
func TestWorkspaceAuth_DevMode_NoBearer_FailsClosed(t *testing.T) {
t.Setenv("MOLECULE_ENV", "development")
t.Setenv("ADMIN_TOKEN", "")
@@ -882,7 +893,9 @@ func TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen(t *testing.T) {
}
defer mockDB.Close()
// No DB queries expected — the hatch short-circuits before any lookup.
// No DB queries expected — WorkspaceAuth 401s before any lookup when
// there is no bearer / cookie. The hatch that used to short-circuit
// here no longer exists.
r := gin.New()
r.GET("/workspaces/:id/activity", WorkspaceAuth(mockDB), func(c *gin.Context) {
@@ -894,8 +907,8 @@ func TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen(t *testing.T) {
"/workspaces/00000000-0000-0000-0000-000000000000/activity", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("WorkspaceAuth dev-mode hatch: expected 200, got %d: %s", w.Code, w.Body.String())
if w.Code != http.StatusUnauthorized {
t.Errorf("WorkspaceAuth dev-mode fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
}
}
@@ -957,15 +970,14 @@ func TestWorkspaceAuth_DevModeEscapeHatch_IgnoredWhenAdminTokenSet(t *testing.T)
}
}
// TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens documents the
// Tier-1b dev-mode escape hatch. When the platform runs with MOLECULE_ENV=development
// and ADMIN_TOKEN is unset, AdminAuth must stay fail-open even after workspace
// tokens land in the DB. This keeps the Canvas dashboard usable in local dev
// after the first workspace is created (PR #1871 — quickstart bugless).
//
// SaaS never hits this path because tenant provisioning sets both
// ADMIN_TOKEN and MOLECULE_ENV=production.
func TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens(t *testing.T) {
// TestAdminAuth_DevMode_NoBearer_FailsClosed pins the post-hardening contract
// (harden/no-fail-open-auth): the former Tier-1b dev-mode escape hatch — which
// let AdminAuth pass a bearer-less request when MOLECULE_ENV=dev + ADMIN_TOKEN
// unset, even with live tokens in the DB — is GONE. Under exactly those
// conditions the request now 401s. Local dev authenticates with a provisioned
// ADMIN_TOKEN handed to the Canvas as NEXT_PUBLIC_ADMIN_TOKEN
// (scripts/dev-start.sh).
func TestAdminAuth_DevMode_NoBearer_FailsClosed(t *testing.T) {
t.Setenv("MOLECULE_ENV", "development")
t.Setenv("ADMIN_TOKEN", "")
@@ -976,7 +988,7 @@ func TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens(t *testing.T) {
defer mockDB.Close()
// HasAnyLiveTokenGlobal returns 1 — tokens exist (post first-workspace).
// The Tier-1 fail-open branch WOULD close here. Tier-1b must still open.
// Probed for the 503-on-outage semantics, but it opens no path now.
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
@@ -989,8 +1001,8 @@ func TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens(t *testing.T) {
req, _ := http.NewRequest(http.MethodGet, "/workspaces", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("dev-mode escape hatch: expected 200, got %d: %s", w.Code, w.Body.String())
if w.Code != http.StatusUnauthorized {
t.Errorf("dev-mode fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
@@ -1104,7 +1116,16 @@ func TestAdminAuth_Issue120_PatchWorkspace_NoBearer_Returns401(t *testing.T) {
// Accepts bearer or a matching Origin header. MUST NOT be used anywhere a
// forged request would leak data or create resources.
func TestCanvasOrBearer_NoTokens_FailOpen(t *testing.T) {
// TestCanvasOrBearer_NoTokens_FailsClosed pins the removal of the
// lazy-bootstrap fail-open (harden/no-fail-open-auth): a zero-token install
// must NOT pass everything through. A bearer-less request on a fresh install
// (HasAnyLiveTokenGlobal → 0) now 401s. Bootstrap is via ADMIN_TOKEN
// (scripts/dev-start.sh provisions it for local dev; operator/SaaS sets it in
// production) — not a zero-config fail-open.
//
// Watch-it-fail: restore `if !hasLive { c.Next(); return }` in CanvasOrBearer
// → this flips 401→200 and fails.
func TestCanvasOrBearer_NoTokens_FailsClosed(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock: %v", err)
@@ -1114,8 +1135,10 @@ func TestCanvasOrBearer_NoTokens_FailOpen(t *testing.T) {
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
handlerCalled := false
r := gin.New()
r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
handlerCalled = true
c.JSON(http.StatusOK, gin.H{"ok": true})
})
@@ -1123,8 +1146,11 @@ func TestCanvasOrBearer_NoTokens_FailOpen(t *testing.T) {
req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("bootstrap fail-open: got %d, want 200 (%s)", w.Code, w.Body.String())
if w.Code != http.StatusUnauthorized {
t.Errorf("zero-token install must fail CLOSED (lazy-bootstrap fail-open removed): got %d, want 401 (%s)", w.Code, w.Body.String())
}
if handlerCalled {
t.Error("handler reached on a fresh-install bearer-less request — lazy-bootstrap fail-open not removed")
}
}
@@ -1195,7 +1221,16 @@ func TestCanvasOrBearer_TokensExist_WrongOrigin_Returns401(t *testing.T) {
}
}
func TestCanvasOrBearer_TokensExist_CanvasOrigin_Passes(t *testing.T) {
// TestCanvasOrBearer_TokensExist_ForgeableOrigin_NoBearer_FailsClosed pins the
// removal of the cross-origin Origin-match cosmetic path
// (harden/no-fail-open-auth). A no-bearer request whose forgeable Origin header
// matches CORS_ORIGINS used to pass; it now 401s. The canvas always sends a
// bearer (NEXT_PUBLIC_ADMIN_TOKEN), so legitimate traffic is unaffected, and a
// curl that forges Origin can no longer reach even a cosmetic route.
//
// Watch-it-fail: restore `if canvasOriginAllowed(c.GetHeader("Origin")) {
// c.Next(); return }` in CanvasOrBearer → this flips 401→200 and fails.
func TestCanvasOrBearer_TokensExist_ForgeableOrigin_NoBearer_FailsClosed(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock: %v", err)
@@ -1207,18 +1242,24 @@ func TestCanvasOrBearer_TokensExist_CanvasOrigin_Passes(t *testing.T) {
t.Setenv("CORS_ORIGINS", "https://acme.moleculesai.app,https://bob.moleculesai.app")
handlerCalled := false
r := gin.New()
r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
handlerCalled = true
c.JSON(http.StatusOK, gin.H{"ok": true})
})
w := httptest.NewRecorder()
req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
// A matching-but-forgeable Origin with NO bearer must NOT pass anymore.
req.Header.Set("Origin", "https://acme.moleculesai.app")
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("canvas origin: got %d, want 200 (%s)", w.Code, w.Body.String())
if w.Code != http.StatusUnauthorized {
t.Errorf("no-bearer request on a forgeable matching Origin must fail CLOSED (Origin-match path removed): got %d, want 401 (%s)", w.Code, w.Body.String())
}
if handlerCalled {
t.Error("handler reached on a no-bearer forgeable-Origin request — Origin-match fail-open not removed")
}
}
@@ -1298,21 +1339,9 @@ func TestCanvasOrBearer_WrongOrigin_Blocked(t *testing.T) {
}
}
func TestCanvasOriginAllowed_EmptyOriginRejected(t *testing.T) {
if canvasOriginAllowed("") {
t.Error("empty Origin must not pass")
}
}
func TestCanvasOriginAllowed_LocalhostDefault(t *testing.T) {
t.Setenv("CORS_ORIGINS", "")
if !canvasOriginAllowed("http://localhost:3000") {
t.Error("localhost:3000 should be allowed by default")
}
if canvasOriginAllowed("http://evil.example.com") {
t.Error("random origin should not be allowed")
}
}
// (harden/no-fail-open-auth) TestCanvasOriginAllowed_* were REMOVED along with
// the canvasOriginAllowed helper they exercised — the forgeable cross-origin
// Origin-match cosmetic path no longer exists in CanvasOrBearer.
// ── Issue #623 regression ─────────────────────────────────────────────────────
// AdminAuth must NOT accept forged Origin headers. Any container on the Docker