From 522d0557581472952ebca9e36e4626497d79f59e Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 14 Apr 2026 17:54:13 -0700 Subject: [PATCH] fix(middleware): TenantGuard accepts org id via Fly-Replay-Src state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase B.3 pair-fix to the control plane's fly-replay state change. Background: the private molecule-controlplane's router emits `fly-replay: app=X;instance=Y;state=org-id=`. Fly's edge replays the request to the tenant and injects `Fly-Replay-Src: instance=Z;...; state=org-id=` on the replayed request. But response headers from the cp (like X-Molecule-Org-Id) never travel to the replayed tenant — only the state= param does. TenantGuard now checks both paths in order: 1. Primary: X-Molecule-Org-Id header (direct-access path, e.g. molecli) 2. Secondary: Fly-Replay-Src's `state=org-id=` segment (production fly-replay path) Either matching configured MOLECULE_ORG_ID → allow. Neither matches → 404 (still don't leak tenant existence). New helper orgIDFromReplaySrc parses the semicolon-separated Fly-Replay- Src header per Fly's format. Covered by a table-driven test with 7 cases including malformed + empty-header + wrong-state-key. Tests: +3 new TestTenantGuard_* (FlyReplaySrc match, mismatch, table). Co-Authored-By: Claude Opus 4.6 (1M context) --- platform/internal/middleware/tenant_guard.go | 53 +++++++++++++++++-- .../internal/middleware/tenant_guard_test.go | 52 ++++++++++++++++++ 2 files changed, 100 insertions(+), 5 deletions(-) diff --git a/platform/internal/middleware/tenant_guard.go b/platform/internal/middleware/tenant_guard.go index a48c0bbc..d59b37af 100644 --- a/platform/internal/middleware/tenant_guard.go +++ b/platform/internal/middleware/tenant_guard.go @@ -7,6 +7,16 @@ import ( "github.com/gin-gonic/gin" ) +// flyReplaySrcHeader is the header Fly injects on requests it replays via +// the `fly-replay: ...;state=...` mechanism. Format is a semicolon- +// separated list of k=v pairs, e.g. +// instance=91854...;region=ord;t=1700000000000;state=org-id= +// We care only about the `state=` segment; the control plane encodes +// the org id as `state=org-id=` so we can treat it equivalently +// to the X-Molecule-Org-Id header. +const flyReplaySrcHeader = "Fly-Replay-Src" +const flyReplayStatePrefix = "org-id=" + // Tenant-mode guard — public repo's only SaaS hook. // // The SaaS control plane (private `molecule-controlplane` repo) provisions one @@ -58,12 +68,45 @@ func TenantGuardWithOrgID(configuredOrgID string) gin.HandlerFunc { c.Next() return } - if c.GetHeader(tenantOrgIDHeader) != configuredOrgID { - // 404 not 403 — existence of this tenant must not be inferable by - // probing other orgs' machines. - c.AbortWithStatus(404) + // Primary: explicit X-Molecule-Org-Id header (direct access path, + // e.g. from molecli or internal tooling that sets it directly). + if c.GetHeader(tenantOrgIDHeader) == configuredOrgID { + c.Next() return } - c.Next() + // Secondary: org id encoded in Fly-Replay-Src state by the control + // plane. This is the path every production request takes, because + // response headers set by the cp don't travel to the replayed + // tenant — only the state= param does. + if orgIDFromReplaySrc(c.GetHeader(flyReplaySrcHeader)) == configuredOrgID { + c.Next() + return + } + // 404 not 403 — existence of this tenant must not be inferable by + // probing other orgs' machines. + c.AbortWithStatus(404) } } + +// orgIDFromReplaySrc extracts the org id the control plane encoded via +// `state=org-id=` in the fly-replay response header. Returns "" if +// the header is missing, malformed, or the state segment isn't ours. +// Separated from TenantGuardWithOrgID so tests can round-trip header → +// id without spinning a full Gin context. +func orgIDFromReplaySrc(header string) string { + if header == "" { + return "" + } + for _, seg := range strings.Split(header, ";") { + seg = strings.TrimSpace(seg) + const statePrefix = "state=" + if !strings.HasPrefix(seg, statePrefix) { + continue + } + value := seg[len(statePrefix):] + if strings.HasPrefix(value, flyReplayStatePrefix) { + return value[len(flyReplayStatePrefix):] + } + } + return "" +} diff --git a/platform/internal/middleware/tenant_guard_test.go b/platform/internal/middleware/tenant_guard_test.go index 97c0679c..034e4dda 100644 --- a/platform/internal/middleware/tenant_guard_test.go +++ b/platform/internal/middleware/tenant_guard_test.go @@ -82,6 +82,58 @@ func TestTenantGuard_AllowlistBypassesCheck(t *testing.T) { } } +// Fly-Replay-Src state path: the production path. Control plane sends the +// org id as `state=org-id=` via fly-replay; Fly injects that into +// the replayed request as a segment of the Fly-Replay-Src header. +func TestTenantGuard_AcceptsFlyReplaySrcState(t *testing.T) { + gin.SetMode(gin.TestMode) + r := gin.New() + r.Use(TenantGuardWithOrgID("org-abc")) + r.GET("/workspaces", func(c *gin.Context) { c.String(200, "ok") }) + + req := httptest.NewRequest("GET", "/workspaces", nil) + req.Header.Set("Fly-Replay-Src", "instance=src-123;region=ord;t=1700000000000;state=org-id=org-abc") + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + if w.Code != 200 { + t.Errorf("Fly-Replay-Src state match: expected 200, got %d", w.Code) + } +} + +func TestTenantGuard_RejectsFlyReplaySrcMismatch(t *testing.T) { + gin.SetMode(gin.TestMode) + r := gin.New() + r.Use(TenantGuardWithOrgID("org-abc")) + r.GET("/workspaces", func(c *gin.Context) { c.String(200, "ok") }) + + req := httptest.NewRequest("GET", "/workspaces", nil) + req.Header.Set("Fly-Replay-Src", "state=org-id=org-xyz") + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + if w.Code != 404 { + t.Errorf("mismatched Fly-Replay-Src state: expected 404, got %d", w.Code) + } +} + +func TestOrgIDFromReplaySrc(t *testing.T) { + cases := map[string]string{ + "instance=x;region=ord;state=org-id=abc-123": "abc-123", + "state=org-id=abc-123;instance=x": "abc-123", + " state=org-id=abc-123 ": "abc-123", + "state=other=foo;instance=x": "", // wrong state key + "instance=x;region=ord": "", // no state + "": "", // empty header + "garbage": "", // unparseable + } + for in, want := range cases { + if got := orgIDFromReplaySrc(in); got != want { + t.Errorf("orgIDFromReplaySrc(%q) = %q, want %q", in, got, want) + } + } +} + // The allowlist is exact-match, not prefix. "/health/debug" must NOT bypass. func TestTenantGuard_AllowlistIsExactMatch(t *testing.T) { gin.SetMode(gin.TestMode)