Merge pull request #78 from Molecule-AI/feat/saas-tenant-guard-middleware

feat(platform): TenantGuard middleware — public repo's only SaaS hook (Phase 32 PR #1)
This commit is contained in:
Hongming Wang 2026-04-14 15:40:35 -07:00 committed by GitHub
commit 57a05686a4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 176 additions and 1 deletions

View File

@ -122,7 +122,7 @@ go run ./cmd/server # Run server (requires Postgres + Redis running)
go build -o molecli ./cmd/cli # Build TUI dashboard
./molecli # Run TUI dashboard (requires platform running)
```
Must run from `platform/` directory (not repo root). Env vars: `DATABASE_URL`, `REDIS_URL`, `PORT`, `PLATFORM_URL` (default `http://host.docker.internal:PORT` — passed to agent containers so they can reach the platform), `SECRETS_ENCRYPTION_KEY` (optional AES-256, 32 bytes), `CONFIGS_DIR` (auto-discovered), `PLUGINS_DIR` (deprecated — plugins are now installed per-workspace via API; the `plugins/` registry at repo root is auto-discovered), `ACTIVITY_RETENTION_DAYS` (default `7`), `ACTIVITY_CLEANUP_INTERVAL_HOURS` (default `6`), `CORS_ORIGINS` (comma-separated, default `http://localhost:3000,http://localhost:3001`), `RATE_LIMIT` (requests/min, default `600`), `WORKSPACE_DIR` (optional — global fallback host path for `/workspace` bind-mount; overridden by per-workspace `workspace_dir` column in DB; if neither is set, each workspace gets an isolated Docker named volume), `AWARENESS_URL` (optional — if set, injected into workspace containers along with a deterministic `AWARENESS_NAMESPACE` derived from workspace ID), `MOLECULE_IN_DOCKER` (optional — set to `1` when the platform itself runs inside Docker so the A2A proxy rewrites `127.0.0.1:<port>` URLs to container hostnames; auto-detected via `/.dockerenv`), `MOLECULE_ENV` (optional — set to `production` to hide the `/admin/workspaces/:id/test-token` E2E helper endpoint; unset or any other value leaves it enabled), `MOLECULE_ENABLE_TEST_TOKENS` (optional — set to `1` to force-enable the test-token endpoint even when `MOLECULE_ENV=production`; intended for staging runs only).
Must run from `platform/` directory (not repo root). Env vars: `DATABASE_URL`, `REDIS_URL`, `PORT`, `PLATFORM_URL` (default `http://host.docker.internal:PORT` — passed to agent containers so they can reach the platform), `SECRETS_ENCRYPTION_KEY` (optional AES-256, 32 bytes), `CONFIGS_DIR` (auto-discovered), `PLUGINS_DIR` (deprecated — plugins are now installed per-workspace via API; the `plugins/` registry at repo root is auto-discovered), `ACTIVITY_RETENTION_DAYS` (default `7`), `ACTIVITY_CLEANUP_INTERVAL_HOURS` (default `6`), `CORS_ORIGINS` (comma-separated, default `http://localhost:3000,http://localhost:3001`), `RATE_LIMIT` (requests/min, default `600`), `WORKSPACE_DIR` (optional — global fallback host path for `/workspace` bind-mount; overridden by per-workspace `workspace_dir` column in DB; if neither is set, each workspace gets an isolated Docker named volume), `AWARENESS_URL` (optional — if set, injected into workspace containers along with a deterministic `AWARENESS_NAMESPACE` derived from workspace ID), `MOLECULE_IN_DOCKER` (optional — set to `1` when the platform itself runs inside Docker so the A2A proxy rewrites `127.0.0.1:<port>` URLs to container hostnames; auto-detected via `/.dockerenv`), `MOLECULE_ENV` (optional — set to `production` to hide the `/admin/workspaces/:id/test-token` E2E helper endpoint; unset or any other value leaves it enabled), `MOLECULE_ENABLE_TEST_TOKENS` (optional — set to `1` to force-enable the test-token endpoint even when `MOLECULE_ENV=production`; intended for staging runs only), `MOLECULE_ORG_ID` (optional — the public repo's only SaaS hook. When set to a UUID, every non-allowlisted request must carry a matching `X-Molecule-Org-Id` header or gets a 404; when unset, the guard is a passthrough so self-hosted / dev / CI are unaffected. Set only by the private `molecule-controlplane` provisioner on Fly Machines tenant instances — never by self-hosters).
**Workspace tier resource limits** (issue #14 — override the per-tier memory/CPU caps in `provisioner.ApplyTierConfig`; CPU_SHARES follows Docker's 1024 = 1 CPU convention, translated to NanoCPUs for a hard cap):
- `TIER2_MEMORY_MB` / `TIER2_CPU_SHARES` — Standard tier (defaults `512` / `1024`)

View File

@ -0,0 +1,69 @@
package middleware
import (
"os"
"strings"
"github.com/gin-gonic/gin"
)
// Tenant-mode guard — public repo's only SaaS hook.
//
// The SaaS control plane (private `molecule-controlplane` repo) provisions one
// platform instance per customer org on Fly Machines and sets:
// - MOLECULE_ORG_ID=<uuid> (env on the machine)
// - forwards requests with X-Molecule-Org-Id=<uuid> (control-plane router)
//
// TenantGuard wraps every non-allowlisted route so a mis-routed request from
// another org bounces with 404 (not 403 — don't leak existence).
//
// When MOLECULE_ORG_ID is unset (self-hosted / dev / CI), the guard is a
// passthrough — self-hosters see no behavior change.
//
// The guard intentionally knows nothing about orgs, signup, billing, or
// provisioning. Those live in the private control-plane repo. All this code
// does is: "am I the tenant for this request? if not, 404."
// tenantOrgIDHeader is the HTTP header the control-plane router sets when it
// uses fly-replay to route a request to a tenant machine. Case-insensitive at
// the HTTP layer (Gin normalizes).
const tenantOrgIDHeader = "X-Molecule-Org-Id"
// tenantGuardAllowlist is the set of paths that MUST remain accessible even in
// tenant mode without the org header (health checks, Prometheus scrapes).
// Exact-match — no prefix semantics — to avoid accidentally exposing admin
// routes via e.g. "/health/debug/admin".
var tenantGuardAllowlist = map[string]struct{}{
"/health": {},
"/metrics": {},
}
// TenantGuard returns a Gin middleware configured from the MOLECULE_ORG_ID env
// var. Reads env once at construction — changing the env at runtime requires
// a restart (matches every other platform env var). Pass the orgID directly to
// TenantGuardWithOrgID if you need to test a specific configuration without
// mutating the process environment.
func TenantGuard() gin.HandlerFunc {
return TenantGuardWithOrgID(strings.TrimSpace(os.Getenv("MOLECULE_ORG_ID")))
}
// TenantGuardWithOrgID is the constructor used by tests; ordinary callers use
// TenantGuard. When configuredOrgID is empty the guard is a no-op.
func TenantGuardWithOrgID(configuredOrgID string) gin.HandlerFunc {
if configuredOrgID == "" {
return func(c *gin.Context) { c.Next() }
}
return func(c *gin.Context) {
if _, ok := tenantGuardAllowlist[c.Request.URL.Path]; ok {
c.Next()
return
}
if c.GetHeader(tenantOrgIDHeader) != configuredOrgID {
// 404 not 403 — existence of this tenant must not be inferable by
// probing other orgs' machines.
c.AbortWithStatus(404)
return
}
c.Next()
}
}

View File

@ -0,0 +1,99 @@
package middleware
import (
"net/http"
"net/http/httptest"
"testing"
"github.com/gin-gonic/gin"
)
// helper: build a router with TenantGuard configured to `orgID` and two
// representative routes — a regular API route and two allowlisted ones.
func newGuardedRouter(orgID string) *gin.Engine {
gin.SetMode(gin.TestMode)
r := gin.New()
r.Use(TenantGuardWithOrgID(orgID))
r.GET("/health", func(c *gin.Context) { c.String(200, "ok") })
r.GET("/metrics", func(c *gin.Context) { c.String(200, "metrics") })
r.GET("/workspaces", func(c *gin.Context) { c.String(200, "workspaces") })
return r
}
func doRequest(r *gin.Engine, path, orgIDHeader string) *httptest.ResponseRecorder {
req := httptest.NewRequest("GET", path, nil)
if orgIDHeader != "" {
req.Header.Set("X-Molecule-Org-Id", orgIDHeader)
}
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
return w
}
// MOLECULE_ORG_ID unset → passthrough. Existing self-hosted behavior preserved.
func TestTenantGuard_UnsetIsPassthrough(t *testing.T) {
r := newGuardedRouter("")
for _, path := range []string{"/health", "/metrics", "/workspaces"} {
if w := doRequest(r, path, ""); w.Code != 200 {
t.Errorf("%s: expected 200 with guard disabled, got %d", path, w.Code)
}
}
}
// Set + matching header → 200.
func TestTenantGuard_MatchingHeader(t *testing.T) {
r := newGuardedRouter("org-abc")
if w := doRequest(r, "/workspaces", "org-abc"); w.Code != 200 {
t.Errorf("matching header: expected 200, got %d", w.Code)
}
}
// Set + mismatching header → 404 (not 403 — don't leak tenant existence).
func TestTenantGuard_MismatchedHeaderIs404(t *testing.T) {
r := newGuardedRouter("org-abc")
w := doRequest(r, "/workspaces", "org-xyz")
if w.Code != 404 {
t.Errorf("mismatched header: expected 404, got %d", w.Code)
}
if w.Body.String() != "" {
// Bouncing via AbortWithStatus leaves an empty body, which is what we
// want — no response body means no tenant fingerprint.
t.Errorf("expected empty body on 404, got %q", w.Body.String())
}
}
// Set + missing header → 404.
func TestTenantGuard_MissingHeaderIs404(t *testing.T) {
r := newGuardedRouter("org-abc")
if w := doRequest(r, "/workspaces", ""); w.Code != 404 {
t.Errorf("missing header: expected 404, got %d", w.Code)
}
}
// Allowlisted paths bypass the guard even in tenant mode — required for health
// probes (Fly Machines checks) and Prometheus scrape.
func TestTenantGuard_AllowlistBypassesCheck(t *testing.T) {
r := newGuardedRouter("org-abc")
for _, path := range []string{"/health", "/metrics"} {
w := doRequest(r, path, "") // no header
if w.Code != 200 {
t.Errorf("%s: allowlisted path should return 200 without header, got %d", path, w.Code)
}
}
}
// The allowlist is exact-match, not prefix. "/health/debug" must NOT bypass.
func TestTenantGuard_AllowlistIsExactMatch(t *testing.T) {
gin.SetMode(gin.TestMode)
r := gin.New()
r.Use(TenantGuardWithOrgID("org-abc"))
r.GET("/health/debug", func(c *gin.Context) { c.String(200, "debug") })
req := httptest.NewRequest("GET", "/health/debug", nil)
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != http.StatusNotFound {
t.Errorf("expected /health/debug to be guarded (404), got %d", w.Code)
}
}

View File

@ -51,6 +51,13 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
// Must be registered after rate limiter so aborted requests are also counted.
r.Use(metrics.Middleware())
// Tenant guard — the public repo's only SaaS hook. When MOLECULE_ORG_ID is
// set (only by the private molecule-controlplane provisioner on tenant Fly
// Machines), rejects requests whose X-Molecule-Org-Id header doesn't match.
// Unset (self-hosted / dev / CI) → no-op. Registered after metrics so
// rejected requests still land on the 4xx counter.
r.Use(middleware.TenantGuard())
// Health
r.GET("/health", func(c *gin.Context) {
c.JSON(200, gin.H{"status": "ok"})