From 8516a8f9c65dd210b9b1a242d7544c437557ce49 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Thu, 30 Apr 2026 12:54:51 -0700 Subject: [PATCH] fix(tenant-guard): allowlist /buildinfo so redeploy verifier can reach it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /buildinfo route added in #2398 to verify each tenant runs the published SHA was 404'd by TenantGuard on every production tenant — the allowlist had /health, /metrics, /registry/register, /registry/heartbeat, but not /buildinfo. The redeploy workflows curl /buildinfo from a CI runner with no X-Molecule-Org-Id header, TenantGuard 404'd them, gin's NoRoute proxied to canvas, canvas returned its HTML 404 page, jq read empty git_sha, and the verifier silently soft-warned every tenant as "unreachable" — which the workflow doesn't fail on. Confirmed externally: curl https://hongmingwang.moleculesai.app/buildinfo → HTTP 404 + Content-Type: text/html (Next.js "404: This page could not be found.") even though /health on the same host returns {"status":"ok"} from gin. The buildinfo package's own doc already declares /buildinfo public by design ("Public is intentional: it's a build identifier, not operational state. The same string is already published as org.opencontainers.image.revision on the container image, so no new info is exposed.") — the allowlist just missed it. Pin the alignment in tenant_guard_test.go: TestTenantGuard_AllowlistBypassesCheck now asserts /buildinfo returns 200 without an org header alongside /health and /metrics, so a future allowlist edit can't silently regress the verifier again. Closes the silent-success failure mode: stale tenants will now show up as STALE (hard-fail) rather than UNREACHABLE (soft-warn). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/middleware/tenant_guard.go | 1 + .../internal/middleware/tenant_guard_test.go | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/workspace-server/internal/middleware/tenant_guard.go b/workspace-server/internal/middleware/tenant_guard.go index 4aaf7a4c..1363060d 100644 --- a/workspace-server/internal/middleware/tenant_guard.go +++ b/workspace-server/internal/middleware/tenant_guard.go @@ -53,6 +53,7 @@ const tenantOrgIDHeader = "X-Molecule-Org-Id" // here only bypasses the cross-org routing check, not auth. var tenantGuardAllowlist = map[string]struct{}{ "/health": {}, + "/buildinfo": {}, "/metrics": {}, "/registry/register": {}, "/registry/heartbeat": {}, diff --git a/workspace-server/internal/middleware/tenant_guard_test.go b/workspace-server/internal/middleware/tenant_guard_test.go index 5d2b4731..c2ab5792 100644 --- a/workspace-server/internal/middleware/tenant_guard_test.go +++ b/workspace-server/internal/middleware/tenant_guard_test.go @@ -8,13 +8,15 @@ import ( "github.com/gin-gonic/gin" ) -// helper: build a router with TenantGuard configured to `orgID` and two -// representative routes — a regular API route and two allowlisted ones. +// helper: build a router with TenantGuard configured to `orgID` and a +// representative API route plus the public allowlisted ones (/health, +// /buildinfo, /metrics). func newGuardedRouter(orgID string) *gin.Engine { gin.SetMode(gin.TestMode) r := gin.New() r.Use(TenantGuardWithOrgID(orgID)) r.GET("/health", func(c *gin.Context) { c.String(200, "ok") }) + r.GET("/buildinfo", func(c *gin.Context) { c.String(200, "buildinfo") }) r.GET("/metrics", func(c *gin.Context) { c.String(200, "metrics") }) r.GET("/workspaces", func(c *gin.Context) { c.String(200, "workspaces") }) return r @@ -71,10 +73,14 @@ func TestTenantGuard_MissingHeaderIs404(t *testing.T) { } // Allowlisted paths bypass the guard even in tenant mode — required for health -// probes (Fly Machines checks) and Prometheus scrape. +// probes (Fly Machines checks), Prometheus scrape, and the redeploy-fleet +// /buildinfo verification step. /buildinfo without an org header used to +// 404-via-NoRoute → canvas (HTML), which made the redeploy verifier think +// every tenant was stale even when the binary was current. Pin this so a +// future allowlist edit can't silently regress that check. func TestTenantGuard_AllowlistBypassesCheck(t *testing.T) { r := newGuardedRouter("org-abc") - for _, path := range []string{"/health", "/metrics"} { + for _, path := range []string{"/health", "/buildinfo", "/metrics"} { w := doRequest(r, path, "") // no header if w.Code != 200 { t.Errorf("%s: allowlisted path should return 200 without header, got %d", path, w.Code)