From 52235aeb276db4317bbc164d26c95d76a9c6f497 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 20 Apr 2026 13:01:40 -0700 Subject: [PATCH] feat(router): /cp/* reverse-proxy to CP + same-origin canvas fetches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Canvas's browser bundle issues fetches to both CP endpoints (/cp/auth/me, /cp/orgs, ...) AND tenant-platform endpoints (/canvas/viewport, /approvals/pending, /org/templates). They share ONE build-time base URL. Baking api.moleculesai.app broke tenant calls with 404; baking the tenant subdomain broke auth. Tried both today and saw exactly one failure mode per attempt. Real fix: same-origin fetches + tenant-side split. Adds: internal/router/cp_proxy.go # /cp/* → CP_UPSTREAM_URL mounted before NoRoute(canvasProxy). Now a tenant serves: /cp/* → reverse-proxy to api.moleculesai.app /canvas/viewport, /approvals/pending, /workspaces/:id/*, /ws, /registry, → tenant platform (existing handlers) /metrics everything else → canvas UI (existing reverse-proxy) Canvas middleware reverts to `connect-src 'self' wss:` for the same-origin path (keeping explicit PLATFORM_URL whitelist as a self-hosted escape hatch when the build-arg is non-empty). CI build-arg flips to NEXT_PUBLIC_PLATFORM_URL="" so the bundle issues relative fetches. Security of cp_proxy: - Cookie + Authorization PRESERVED across the hop (opposite of canvas proxy) — they carry the WorkOS session, which is the whole point. - Host rewritten to upstream so CORS + cookie-domain on the CP side see their own hostname. - Upstream URL validated at construction: must parse, must be http(s), must have a host — misconfig fails closed. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../publish-workspace-server-image.yml | 24 +++--- canvas/src/middleware.ts | 19 ++--- workspace-server/internal/router/cp_proxy.go | 75 +++++++++++++++++++ workspace-server/internal/router/router.go | 16 ++++ 4 files changed, 109 insertions(+), 25 deletions(-) create mode 100644 workspace-server/internal/router/cp_proxy.go diff --git a/.github/workflows/publish-workspace-server-image.yml b/.github/workflows/publish-workspace-server-image.yml index c72d6c8e..906322f0 100644 --- a/.github/workflows/publish-workspace-server-image.yml +++ b/.github/workflows/publish-workspace-server-image.yml @@ -111,21 +111,19 @@ jobs: ${{ env.TENANT_IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }} cache-from: type=gha cache-to: type=gha,mode=max - # Bake the SaaS control-plane URL into the canvas bundle. - # Canvas's browser-side code uses PLATFORM_URL for every - # /cp/* call (auth, orgs, billing, terms). Leaving this empty - # made PLATFORM_URL fall back to http://localhost:8080 in the - # built bundle — which fails from the user's browser because - # localhost resolves to their own machine, not the tenant - # instance. Baking the CP origin here fixes browser-side auth - # for every tenant. + # Canvas uses same-origin fetches. The tenant Go platform + # reverse-proxies /cp/* to the SaaS CP via its CP_UPSTREAM_URL + # env; the tenant's /canvas/viewport, /approvals/pending, + # /org/templates etc. live on the tenant platform itself. + # Both legs share one origin (the tenant subdomain) so + # PLATFORM_URL="" forces canvas to fetch paths as relative, + # which land same-origin. # - # Self-hosted / private-label deployments override this by - # rebuilding the image with a different NEXT_PUBLIC_PLATFORM_URL - # build-arg (e.g. https://api.their-domain.com). Same pattern - # molecule-app uses with NEXT_PUBLIC_CP_ORIGIN. + # Self-hosted / private-label deployments override this at + # build time with a specific backend (e.g. local dev: + # NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080). build-args: | - NEXT_PUBLIC_PLATFORM_URL=https://api.moleculesai.app + NEXT_PUBLIC_PLATFORM_URL= labels: | org.opencontainers.image.source=https://github.com/${{ github.repository }} org.opencontainers.image.revision=${{ github.sha }} diff --git a/canvas/src/middleware.ts b/canvas/src/middleware.ts index 9903a443..9442eb99 100644 --- a/canvas/src/middleware.ts +++ b/canvas/src/middleware.ts @@ -35,23 +35,18 @@ export function buildCsp(nonce: string, isDev: boolean): string { ].join("; ") + ";"; } - // Canvas makes cross-origin fetches to the control plane for - // /cp/auth/*, /cp/orgs/*, /cp/billing/* — PLATFORM_URL points at - // it (baked in at build time via NEXT_PUBLIC_PLATFORM_URL). CSP - // has to whitelist that origin in connect-src or the browser - // refuses the fetch with "Refused to connect because it violates - // the document's Content Security Policy." + // connect-src: by default canvas calls are same-origin (the tenant + // forwards /cp/* upstream internally via its CP reverse proxy). + // 'self' + wss: is enough for that path. // - // Self-hosted deployments override PLATFORM_URL at build time and - // the CSP adjusts automatically — no hardcoded hostname here. + // NEXT_PUBLIC_PLATFORM_URL is still honored for self-hosted / + // dev setups that bake a cross-origin backend into the bundle; + // when it's non-empty we add the origin + its wss sibling so + // those deployments don't break. const platformURL = process.env.NEXT_PUBLIC_PLATFORM_URL ?? ""; const connectSrcParts = ["'self'", "wss:"]; if (platformURL) { connectSrcParts.push(platformURL); - // Also allow the wss:// sibling of PLATFORM_URL explicitly. - // `wss:` scheme-wildcard covers it today but making the exact - // origin explicit survives a future CSP tightening without - // silently breaking auth. connectSrcParts.push(platformURL.replace(/^http/, "ws")); } diff --git a/workspace-server/internal/router/cp_proxy.go b/workspace-server/internal/router/cp_proxy.go new file mode 100644 index 00000000..94e3555f --- /dev/null +++ b/workspace-server/internal/router/cp_proxy.go @@ -0,0 +1,75 @@ +package router + +import ( + "log" + "net/http" + "net/http/httputil" + "net/url" + + "github.com/gin-gonic/gin" +) + +// newCPProxy returns a Gin handler that reverse-proxies /cp/* requests +// to the control plane. Lives beside newCanvasProxy because they solve +// the same problem — tenant browser fetches targeted at a single +// same-origin base — for the mirror-image endpoint set. +// +// Why this exists: canvas's browser bundle calls both CP endpoints +// (/cp/auth/me, /cp/orgs, /cp/billing/checkout) AND tenant-platform +// endpoints (/canvas/viewport, /approvals/pending). They share ONE +// build-time base URL (NEXT_PUBLIC_PLATFORM_URL). Baking the CP +// origin breaks tenant calls; baking the tenant origin breaks CP +// calls. The only sane fix is same-origin fetches + let the server +// split the traffic. This handler is the /cp/* leg of that split; +// newCanvasProxy is the UI leg. +// +// Security: +// - We do NOT strip Cookie/Authorization here: those carry the +// WorkOS session cookie and must reach the CP to resolve the +// user. That's the whole point of this proxy. +// - We DO rewrite the Host header to the CP upstream so CORS and +// cookie-domain logic upstream see themselves, not the tenant. +// - We do NOT strip X-Forwarded-For — upstream may want it for +// audit and rate-limit keying. +// - The proxy ONLY forwards /cp/* paths. The upstream URL is +// env-configured and its scheme is enforced https in prod via +// url.Parse (the caller passes the URL; we reject anything +// that isn't http/https at construction time). +// +// Rate / timeout note: we do NOT set a custom Transport with +// aggressive timeouts because CP endpoints are fast and any hang +// is already bounded by the caller's browser-level timeout. If a +// future slow endpoint warrants a bound, add here not at the +// gateway. +func newCPProxy(targetURL string) gin.HandlerFunc { + target, err := url.Parse(targetURL) + if err != nil { + log.Fatalf("cp_proxy: invalid CP_UPSTREAM_URL %q: %v", targetURL, err) + } + if target.Scheme != "http" && target.Scheme != "https" { + log.Fatalf("cp_proxy: CP_UPSTREAM_URL scheme must be http(s), got %q", target.Scheme) + } + if target.Host == "" { + log.Fatalf("cp_proxy: CP_UPSTREAM_URL missing host: %q", targetURL) + } + + proxy := &httputil.ReverseProxy{ + Director: func(req *http.Request) { + req.URL.Scheme = target.Scheme + req.URL.Host = target.Host + // Host header rewrite: CP middleware (CORS, cookie-domain) + // keys off Host; rewriting avoids "origin not allowed" on + // upstream OPTIONS preflight. + req.Host = target.Host + }, + ErrorHandler: func(w http.ResponseWriter, _ *http.Request, err error) { + log.Printf("cp_proxy: %v", err) + w.WriteHeader(http.StatusBadGateway) + _, _ = w.Write([]byte("control plane unavailable")) + }, + } + + return func(c *gin.Context) { + proxy.ServeHTTP(c.Writer, c.Request) + } +} diff --git a/workspace-server/internal/router/router.go b/workspace-server/internal/router/router.go index c636bf87..15668cbc 100644 --- a/workspace-server/internal/router/router.go +++ b/workspace-server/internal/router/router.go @@ -519,6 +519,22 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi sh := handlers.NewSocketHandler(hub) r.GET("/ws", sh.HandleConnect) + // Control-plane reverse proxy — forwards /cp/* to the SaaS CP. + // Canvas's browser bundle fetches /cp/auth/me, /cp/orgs, etc. on + // SAME ORIGIN (the tenant's .moleculesai.app). Those paths + // aren't mounted on the tenant platform; without this proxy they + // 404 and login breaks. When CP_UPSTREAM_URL is empty (self- + // hosted / local dev where no CP exists), we skip the mount so + // Gin's default 404 surfaces cleanly instead of proxying to a + // placeholder. + // + // Mounted via NoRoute-style group BEFORE the canvas NoRoute so + // /cp/* wins over the UI fallback. + if cpURL := os.Getenv("CP_UPSTREAM_URL"); cpURL != "" { + cpProxy := newCPProxy(cpURL) + r.Any("/cp/*path", cpProxy) + } + // Canvas reverse proxy — when running as a combined tenant image // (Dockerfile.tenant), the Next.js canvas server runs on :3000 inside // the same container. Any route not matched by the API handlers above