feat(router): /cp/* reverse-proxy to CP + same-origin canvas fetches

Canvas's browser bundle issues fetches to both CP endpoints
(/cp/auth/me, /cp/orgs, ...) AND tenant-platform endpoints
(/canvas/viewport, /approvals/pending, /org/templates). They
share ONE build-time base URL. Baking api.moleculesai.app
broke tenant calls with 404; baking the tenant subdomain broke
auth. Tried both today and saw exactly one failure mode per
attempt.

Real fix: same-origin fetches + tenant-side split. Adds:

  internal/router/cp_proxy.go      # /cp/* → CP_UPSTREAM_URL

mounted before NoRoute(canvasProxy). Now a tenant serves:

  /cp/*              → reverse-proxy to api.moleculesai.app
  /canvas/viewport,
  /approvals/pending,
  /workspaces/:id/*,
  /ws, /registry,    → tenant platform (existing handlers)
  /metrics
  everything else    → canvas UI (existing reverse-proxy)

Canvas middleware reverts to `connect-src 'self' wss:` for the
same-origin path (keeping explicit PLATFORM_URL whitelist as a
self-hosted escape hatch when the build-arg is non-empty).

CI build-arg flips to NEXT_PUBLIC_PLATFORM_URL="" so the bundle
issues relative fetches.

Security of cp_proxy:
  - Cookie + Authorization PRESERVED across the hop (opposite of
    canvas proxy) — they carry the WorkOS session, which is the
    whole point.
  - Host rewritten to upstream so CORS + cookie-domain on the CP
    side see their own hostname.
  - Upstream URL validated at construction: must parse, must be
    http(s), must have a host — misconfig fails closed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-04-20 13:01:40 -07:00
parent 43cd8e1661
commit 52235aeb27
4 changed files with 109 additions and 25 deletions

View File

@ -111,21 +111,19 @@ jobs:
${{ env.TENANT_IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
# Bake the SaaS control-plane URL into the canvas bundle.
# Canvas's browser-side code uses PLATFORM_URL for every
# /cp/* call (auth, orgs, billing, terms). Leaving this empty
# made PLATFORM_URL fall back to http://localhost:8080 in the
# built bundle — which fails from the user's browser because
# localhost resolves to their own machine, not the tenant
# instance. Baking the CP origin here fixes browser-side auth
# for every tenant.
# Canvas uses same-origin fetches. The tenant Go platform
# reverse-proxies /cp/* to the SaaS CP via its CP_UPSTREAM_URL
# env; the tenant's /canvas/viewport, /approvals/pending,
# /org/templates etc. live on the tenant platform itself.
# Both legs share one origin (the tenant subdomain) so
# PLATFORM_URL="" forces canvas to fetch paths as relative,
# which land same-origin.
#
# Self-hosted / private-label deployments override this by
# rebuilding the image with a different NEXT_PUBLIC_PLATFORM_URL
# build-arg (e.g. https://api.their-domain.com). Same pattern
# molecule-app uses with NEXT_PUBLIC_CP_ORIGIN.
# Self-hosted / private-label deployments override this at
# build time with a specific backend (e.g. local dev:
# NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080).
build-args: |
NEXT_PUBLIC_PLATFORM_URL=https://api.moleculesai.app
NEXT_PUBLIC_PLATFORM_URL=
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}

View File

@ -35,23 +35,18 @@ export function buildCsp(nonce: string, isDev: boolean): string {
].join("; ") + ";";
}
// Canvas makes cross-origin fetches to the control plane for
// /cp/auth/*, /cp/orgs/*, /cp/billing/* — PLATFORM_URL points at
// it (baked in at build time via NEXT_PUBLIC_PLATFORM_URL). CSP
// has to whitelist that origin in connect-src or the browser
// refuses the fetch with "Refused to connect because it violates
// the document's Content Security Policy."
// connect-src: by default canvas calls are same-origin (the tenant
// forwards /cp/* upstream internally via its CP reverse proxy).
// 'self' + wss: is enough for that path.
//
// Self-hosted deployments override PLATFORM_URL at build time and
// the CSP adjusts automatically — no hardcoded hostname here.
// NEXT_PUBLIC_PLATFORM_URL is still honored for self-hosted /
// dev setups that bake a cross-origin backend into the bundle;
// when it's non-empty we add the origin + its wss sibling so
// those deployments don't break.
const platformURL = process.env.NEXT_PUBLIC_PLATFORM_URL ?? "";
const connectSrcParts = ["'self'", "wss:"];
if (platformURL) {
connectSrcParts.push(platformURL);
// Also allow the wss:// sibling of PLATFORM_URL explicitly.
// `wss:` scheme-wildcard covers it today but making the exact
// origin explicit survives a future CSP tightening without
// silently breaking auth.
connectSrcParts.push(platformURL.replace(/^http/, "ws"));
}

View File

@ -0,0 +1,75 @@
package router
import (
"log"
"net/http"
"net/http/httputil"
"net/url"
"github.com/gin-gonic/gin"
)
// newCPProxy returns a Gin handler that reverse-proxies /cp/* requests
// to the control plane. Lives beside newCanvasProxy because they solve
// the same problem — tenant browser fetches targeted at a single
// same-origin base — for the mirror-image endpoint set.
//
// Why this exists: canvas's browser bundle calls both CP endpoints
// (/cp/auth/me, /cp/orgs, /cp/billing/checkout) AND tenant-platform
// endpoints (/canvas/viewport, /approvals/pending). They share ONE
// build-time base URL (NEXT_PUBLIC_PLATFORM_URL). Baking the CP
// origin breaks tenant calls; baking the tenant origin breaks CP
// calls. The only sane fix is same-origin fetches + let the server
// split the traffic. This handler is the /cp/* leg of that split;
// newCanvasProxy is the UI leg.
//
// Security:
// - We do NOT strip Cookie/Authorization here: those carry the
// WorkOS session cookie and must reach the CP to resolve the
// user. That's the whole point of this proxy.
// - We DO rewrite the Host header to the CP upstream so CORS and
// cookie-domain logic upstream see themselves, not the tenant.
// - We do NOT strip X-Forwarded-For — upstream may want it for
// audit and rate-limit keying.
// - The proxy ONLY forwards /cp/* paths. The upstream URL is
// env-configured and its scheme is enforced https in prod via
// url.Parse (the caller passes the URL; we reject anything
// that isn't http/https at construction time).
//
// Rate / timeout note: we do NOT set a custom Transport with
// aggressive timeouts because CP endpoints are fast and any hang
// is already bounded by the caller's browser-level timeout. If a
// future slow endpoint warrants a bound, add here not at the
// gateway.
func newCPProxy(targetURL string) gin.HandlerFunc {
target, err := url.Parse(targetURL)
if err != nil {
log.Fatalf("cp_proxy: invalid CP_UPSTREAM_URL %q: %v", targetURL, err)
}
if target.Scheme != "http" && target.Scheme != "https" {
log.Fatalf("cp_proxy: CP_UPSTREAM_URL scheme must be http(s), got %q", target.Scheme)
}
if target.Host == "" {
log.Fatalf("cp_proxy: CP_UPSTREAM_URL missing host: %q", targetURL)
}
proxy := &httputil.ReverseProxy{
Director: func(req *http.Request) {
req.URL.Scheme = target.Scheme
req.URL.Host = target.Host
// Host header rewrite: CP middleware (CORS, cookie-domain)
// keys off Host; rewriting avoids "origin not allowed" on
// upstream OPTIONS preflight.
req.Host = target.Host
},
ErrorHandler: func(w http.ResponseWriter, _ *http.Request, err error) {
log.Printf("cp_proxy: %v", err)
w.WriteHeader(http.StatusBadGateway)
_, _ = w.Write([]byte("control plane unavailable"))
},
}
return func(c *gin.Context) {
proxy.ServeHTTP(c.Writer, c.Request)
}
}

View File

@ -519,6 +519,22 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
sh := handlers.NewSocketHandler(hub)
r.GET("/ws", sh.HandleConnect)
// Control-plane reverse proxy — forwards /cp/* to the SaaS CP.
// Canvas's browser bundle fetches /cp/auth/me, /cp/orgs, etc. on
// SAME ORIGIN (the tenant's <slug>.moleculesai.app). Those paths
// aren't mounted on the tenant platform; without this proxy they
// 404 and login breaks. When CP_UPSTREAM_URL is empty (self-
// hosted / local dev where no CP exists), we skip the mount so
// Gin's default 404 surfaces cleanly instead of proxying to a
// placeholder.
//
// Mounted via NoRoute-style group BEFORE the canvas NoRoute so
// /cp/* wins over the UI fallback.
if cpURL := os.Getenv("CP_UPSTREAM_URL"); cpURL != "" {
cpProxy := newCPProxy(cpURL)
r.Any("/cp/*path", cpProxy)
}
// Canvas reverse proxy — when running as a combined tenant image
// (Dockerfile.tenant), the Next.js canvas server runs on :3000 inside
// the same container. Any route not matched by the API handlers above