From dae7f500959d50385633cda371dca0167c50e7f7 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Thu, 23 Apr 2026 13:12:35 -0700 Subject: [PATCH] fix(wsauth): extend dev-mode escape hatch to WorkspaceAuth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit on this branch added a dev-mode fail-open branch to AdminAuth so the Canvas dashboard could enumerate workspaces after the first token lands in the DB. Verification via Chrome (clicking a workspace to open its side panel) surfaced the same class of bug on a different middleware — `WorkspaceAuth` — triggering: API GET /workspaces//activity?type=a2a_receive&source=canvas&limit=50: 401 {"error":"missing workspace auth token"} Root cause is identical to AdminAuth's: in local dev the Canvas (at localhost:3000) calls the platform (at localhost:8080) cross-port, so `isSameOriginCanvas`'s Host==Referer check fails. Without a bearer token, every per-workspace read (/activity, /delegations, /memories, /events/stream, /schedules, etc.) 401s and the side panel is unusable. ### Fix Symmetric extension in `WorkspaceAuth` (workspace-server/internal/middleware/wsauth_middleware.go): after the existing `isSameOriginCanvas` fallback, add a narrow escape hatch that stays fail-open only when BOTH - `ADMIN_TOKEN` is unset (operator has not opted in to the #684 closure), AND - `MOLECULE_ENV` is explicitly a dev mode (`development` / `dev`). SaaS tenants never hit this branch because hosted provisioning sets both `ADMIN_TOKEN` and `MOLECULE_ENV=production`. The comment in the code also links back to AdminAuth's Tier-1b for consistency. ### Tests Three new table-driven tests in wsauth_middleware_test.go mirror the AdminAuth tier-1b suite, exercising the positive path and both negative cases: - `TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen` — the happy path (dev mode, no admin token → 200) - `TestWorkspaceAuth_DevModeEscapeHatch_IgnoredInProduction` — the SaaS-safety guarantee (production + no admin token → 401) - `TestWorkspaceAuth_DevModeEscapeHatch_IgnoredWhenAdminTokenSet` — explicit `ADMIN_TOKEN` wins; dev mode does not silently override the opt-in ### Comprehensive audit of adjacent middlewares Re-scanned every file under workspace-server/internal/middleware/ and every handler that invokes `AbortWithStatusJSON(Unauthorized)` directly, to check for other surfaces where local dev might silently 401. Findings, already OK: - `CanvasOrBearer` — cosmetic routes already accept localhost:3000 via `canvasOriginAllowed` (Origin header check); no change needed. - `tenant_guard.go` — no-op when `MOLECULE_ORG_ID` is unset (self- hosted / dev); no change needed. - `session_auth.go` — verifies against `CP_UPSTREAM_URL`; returns (false, false) in local dev so callers fall through to bearer; no change needed. - `socket.go` `HandleConnect` — Canvas browser clients don't send `X-Workspace-ID` so skip the bearer check; agent clients do and validate as today. No change needed. - Handlers in handlers/{discovery,registry,secrets,plugins_install, a2a_proxy_helpers,schedules}.go — all workspace-scoped routes called by the workspace runtime, not the Canvas browser. Unaffected. - `handlers/admin_test_token.go` — already `MOLECULE_ENV`-aware (the convention this hatch mirrors). ### End-to-end verification 1. Fresh-nuked DB, platform + canvas restarted with `MOLECULE_ENV=development` 2. `POST /workspaces` → token lands in DB (Tier-1 would close here) 3. Probed every Canvas-hit endpoint with no bearer, with Canvas-like `Origin: http://localhost:3000`: 200 /workspaces 200 /workspaces//activity 200 /workspaces//delegations 200 /workspaces//memories 200 /approvals/pending 200 /events 4. Chrome browser test: opened http://localhost:3000, clicked a workspace tile — the side panel rendered with the full 13-tab structure (Chat, Activity, Details, Skills, Terminal, Config, Schedule, Channels, Files, Memory, Traces, Events, Audit) and no `Failed to load chat history` error. "No messages yet" placeholder shows instead of the 401 retry screen. 5. `go test -race ./internal/middleware/` — clean 6. `bash tests/e2e/test_api.sh` — 61/61 pass Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/middleware/wsauth_middleware.go | 15 +++ .../middleware/wsauth_middleware_test.go | 94 +++++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/workspace-server/internal/middleware/wsauth_middleware.go b/workspace-server/internal/middleware/wsauth_middleware.go index 50535bad..6775345c 100644 --- a/workspace-server/internal/middleware/wsauth_middleware.go +++ b/workspace-server/internal/middleware/wsauth_middleware.go @@ -90,6 +90,21 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc { c.Next() return } + // Local-dev escape hatch. Mirrors the Tier-1b branch in AdminAuth: + // on `go run ./cmd/server` + `npm run dev` the Canvas (at + // localhost:3000) calls the platform (at localhost:8080) cross-port, + // so isSameOriginCanvas's Host==Referer check fails. Without a + // bearer, every GET /workspaces/:id/activity / /delegations call + // 401s and the Canvas can't show chat history or agent comms. + // Gated on MOLECULE_ENV=development + ADMIN_TOKEN unset so SaaS + // (always MOLECULE_ENV=production + ADMIN_TOKEN set) never hits it. + if os.Getenv("ADMIN_TOKEN") == "" { + env := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_ENV"))) + if env == "development" || env == "dev" { + c.Next() + return + } + } c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "missing workspace auth token"}) return } diff --git a/workspace-server/internal/middleware/wsauth_middleware_test.go b/workspace-server/internal/middleware/wsauth_middleware_test.go index b796dc75..54dd05b1 100644 --- a/workspace-server/internal/middleware/wsauth_middleware_test.go +++ b/workspace-server/internal/middleware/wsauth_middleware_test.go @@ -735,6 +735,100 @@ func TestAdminAuth_Issue180_ApprovalsListing_FailOpen_NoTokens(t *testing.T) { } } +// TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen documents the +// local-dev escape hatch on WorkspaceAuth. On `go run ./cmd/server` + +// `npm run dev`, Canvas at localhost:3000 calls the platform at +// localhost:8080 cross-port, so isSameOriginCanvas's Host==Referer +// check fails. Without this hatch the Canvas can't show per-workspace +// activity/delegations. +// +// SaaS never fires this branch because tenant provisioning sets both +// MOLECULE_ENV=production and ADMIN_TOKEN. +func TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen(t *testing.T) { + t.Setenv("MOLECULE_ENV", "development") + t.Setenv("ADMIN_TOKEN", "") + + mockDB, _, err := sqlmock.New() + if err != nil { + t.Fatalf("sqlmock.New: %v", err) + } + defer mockDB.Close() + + // No DB queries expected — the hatch short-circuits before any lookup. + + r := gin.New() + r.GET("/workspaces/:id/activity", WorkspaceAuth(mockDB), func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"activity": []interface{}{}}) + }) + + w := httptest.NewRecorder() + req, _ := http.NewRequest(http.MethodGet, + "/workspaces/00000000-0000-0000-0000-000000000000/activity", nil) + r.ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Errorf("WorkspaceAuth dev-mode hatch: expected 200, got %d: %s", w.Code, w.Body.String()) + } +} + +// TestWorkspaceAuth_DevModeEscapeHatch_IgnoredInProduction verifies +// the hatch never fires in production mode. This is the SaaS-safety +// guarantee — no one should get a bearer-free 200 in prod just because +// MOLECULE_ENV leaks an unexpected value. +func TestWorkspaceAuth_DevModeEscapeHatch_IgnoredInProduction(t *testing.T) { + t.Setenv("MOLECULE_ENV", "production") + t.Setenv("ADMIN_TOKEN", "") + + mockDB, _, err := sqlmock.New() + if err != nil { + t.Fatalf("sqlmock.New: %v", err) + } + defer mockDB.Close() + + r := gin.New() + r.GET("/workspaces/:id/activity", WorkspaceAuth(mockDB), func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"activity": []interface{}{}}) + }) + + w := httptest.NewRecorder() + req, _ := http.NewRequest(http.MethodGet, + "/workspaces/00000000-0000-0000-0000-000000000000/activity", nil) + r.ServeHTTP(w, req) + + if w.Code != http.StatusUnauthorized { + t.Errorf("production mode: expected 401, got %d: %s", w.Code, w.Body.String()) + } +} + +// TestWorkspaceAuth_DevModeEscapeHatch_IgnoredWhenAdminTokenSet verifies +// setting ADMIN_TOKEN on the server (the #684 opt-in) disables the +// dev-mode hatch — callers MUST present a valid bearer. Setting +// ADMIN_TOKEN is the explicit SaaS-mode opt-in. +func TestWorkspaceAuth_DevModeEscapeHatch_IgnoredWhenAdminTokenSet(t *testing.T) { + t.Setenv("MOLECULE_ENV", "development") + t.Setenv("ADMIN_TOKEN", "operator-set-this") + + mockDB, _, err := sqlmock.New() + if err != nil { + t.Fatalf("sqlmock.New: %v", err) + } + defer mockDB.Close() + + r := gin.New() + r.GET("/workspaces/:id/activity", WorkspaceAuth(mockDB), func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"activity": []interface{}{}}) + }) + + w := httptest.NewRecorder() + req, _ := http.NewRequest(http.MethodGet, + "/workspaces/00000000-0000-0000-0000-000000000000/activity", nil) + r.ServeHTTP(w, req) + + if w.Code != http.StatusUnauthorized { + t.Errorf("dev-mode + ADMIN_TOKEN: expected 401, got %d: %s", w.Code, w.Body.String()) + } +} + // TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens documents the // Tier-1b dev-mode escape hatch. When the platform runs with MOLECULE_ENV=development // and ADMIN_TOKEN is unset, AdminAuth must stay fail-open even after workspace