diff --git a/canvas/src/app/page.tsx b/canvas/src/app/page.tsx
index 74291409..e64b5aba 100644
--- a/canvas/src/app/page.tsx
+++ b/canvas/src/app/page.tsx
@@ -7,13 +7,19 @@ import { CommunicationOverlay } from "@/components/CommunicationOverlay";
import { Spinner } from "@/components/Spinner";
import { connectSocket, disconnectSocket } from "@/store/socket";
import { useCanvasStore } from "@/store/canvas";
-import { api } from "@/lib/api";
+import { api, PlatformUnavailableError } from "@/lib/api";
import type { WorkspaceData } from "@/store/socket";
export default function Home() {
const hydrationError = useCanvasStore((s) => s.hydrationError);
const setHydrationError = useCanvasStore((s) => s.setHydrationError);
const [hydrating, setHydrating] = useState(true);
+ // Distinct from hydrationError: platform-down is its own UX path
+ // (different copy, different action — the user's next step is to
+ // check local services, not to retry the API call). Tracked
+ // separately rather than encoded into hydrationError so the
+ // generic-error branch can stay simple.
+ const [platformDown, setPlatformDown] = useState(false);
useEffect(() => {
connectSocket();
@@ -28,8 +34,11 @@ export default function Home() {
useCanvasStore.getState().setViewport(viewport);
}
}).catch((err) => {
- // Initial hydration failed — show error banner to user
console.error("Canvas: initial hydration failed", err);
+ if (err instanceof PlatformUnavailableError) {
+ setPlatformDown(true);
+ return;
+ }
useCanvasStore.getState().setHydrationError(
err instanceof Error && err.message ? err.message : "Failed to load canvas"
);
@@ -53,6 +62,10 @@ export default function Home() {
);
}
+ if (platformDown) {
+ return ;
+ }
+
return (
<>
@@ -78,3 +91,43 @@ export default function Home() {
>
);
}
+
+/**
+ * Dedicated diagnostic for the case where the platform reported its
+ * datastore (Postgres / Redis) is unreachable. Distinct from the
+ * generic API-error overlay: the user's next action is to check
+ * local services, not to retry the API call. Includes the exact
+ * commands for the common dev-host setup.
+ */
+function PlatformDownDiagnostic() {
+ return (
+
+
+ Platform infrastructure unreachable
+
+
+ The platform server returned 503 platform_unavailable.
+ That means it can't reach Postgres or Redis to validate your session.
+ Most common cause on a dev host: one of those services stopped.
+
+ If both are running, check /tmp/molecule-server.log for
+ the underlying error. If you're on hosted SaaS, this is a platform incident — try again in a moment.
+
+
+
+ );
+}
diff --git a/canvas/src/lib/__tests__/api.test.ts b/canvas/src/lib/__tests__/api.test.ts
index 09eb0eff..d95e367b 100644
--- a/canvas/src/lib/__tests__/api.test.ts
+++ b/canvas/src/lib/__tests__/api.test.ts
@@ -7,7 +7,7 @@ import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
const mockFetch = vi.fn();
globalThis.fetch = mockFetch;
-import { api } from "../api";
+import { api, PlatformUnavailableError } from "../api";
// ---------------------------------------------------------------------------
// Helpers
@@ -380,3 +380,99 @@ describe("api – request timeout signal", () => {
expect(sigA).not.toBe(sigB);
});
});
+
+// ---------------------------------------------------------------------------
+// PlatformUnavailableError classification
+// ---------------------------------------------------------------------------
+//
+// When the platform's wsauth middleware can't reach Postgres/Redis to
+// validate a token, it returns 503 + {error, code:"platform_unavailable"}.
+// api.ts must surface that as a typed error so the page-level renderer
+// can show a dedicated diagnostic instead of a generic 5xx toast.
+
+describe("PlatformUnavailableError classification", () => {
+ beforeEach(() => {
+ mockFetch.mockReset();
+ });
+
+ function mock503Platform(detail = "platform datastore unavailable — retry shortly") {
+ const body = JSON.stringify({ error: detail, code: "platform_unavailable" });
+ mockFetch.mockResolvedValueOnce({
+ ok: false,
+ status: 503,
+ json: () => Promise.reject(new Error("not used")),
+ text: () => Promise.resolve(body),
+ } as unknown as Response);
+ }
+
+ it("throws PlatformUnavailableError on 503 + code=platform_unavailable", async () => {
+ mock503Platform();
+ let thrown: unknown;
+ try {
+ await api.get("/workspaces");
+ } catch (e) {
+ thrown = e;
+ }
+ expect(thrown).toBeInstanceOf(PlatformUnavailableError);
+ expect((thrown as PlatformUnavailableError).code).toBe("platform_unavailable");
+ });
+
+ it("preserves the server-provided error string as the Error message", async () => {
+ mock503Platform("Postgres unreachable");
+ try {
+ await api.get("/workspaces");
+ } catch (e) {
+ expect(e).toBeInstanceOf(PlatformUnavailableError);
+ expect((e as Error).message).toBe("Postgres unreachable");
+ return;
+ }
+ throw new Error("expected to throw");
+ });
+
+ it("does NOT classify a generic 503 (no platform_unavailable code) as PlatformUnavailableError", async () => {
+ // Generic upstream-busy 503 — should keep the legacy generic-Error
+ // path so existing busy-retry UX isn't disrupted.
+ mockFetch.mockResolvedValueOnce({
+ ok: false,
+ status: 503,
+ json: () => Promise.reject(new Error("not used")),
+ text: () => Promise.resolve(JSON.stringify({ error: "upstream busy" })),
+ } as unknown as Response);
+ try {
+ await api.get("/workspaces/x/a2a");
+ } catch (e) {
+ expect(e).not.toBeInstanceOf(PlatformUnavailableError);
+ expect((e as Error).message).toContain("503");
+ return;
+ }
+ throw new Error("expected to throw");
+ });
+
+ it("does NOT classify on 500 (server kept legacy 500 for true internal errors)", async () => {
+ mockFailure(500, "boom");
+ try {
+ await api.get("/workspaces");
+ } catch (e) {
+ expect(e).not.toBeInstanceOf(PlatformUnavailableError);
+ return;
+ }
+ throw new Error("expected to throw");
+ });
+
+ it("falls back to generic Error when 503 body isn't JSON", async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: false,
+ status: 503,
+ json: () => Promise.reject(new Error("not used")),
+ text: () => Promise.resolve("Service Unavailable"),
+ } as unknown as Response);
+ try {
+ await api.get("/workspaces");
+ } catch (e) {
+ expect(e).not.toBeInstanceOf(PlatformUnavailableError);
+ expect((e as Error).message).toContain("503");
+ return;
+ }
+ throw new Error("expected to throw");
+ });
+});
diff --git a/canvas/src/lib/api.ts b/canvas/src/lib/api.ts
index e65d92fd..79f6b9f6 100644
--- a/canvas/src/lib/api.ts
+++ b/canvas/src/lib/api.ts
@@ -77,11 +77,39 @@ async function request(
}
if (!res.ok) {
const text = await res.text();
+ // Recognise the platform's structured "datastore unreachable"
+ // shape (returned by wsauth_middleware.abortAuthLookupError when
+ // Postgres/Redis is down). Surface as a typed error so callers
+ // can render a dedicated diagnostic instead of a generic toast.
+ if (res.status === 503 && text) {
+ try {
+ const parsed = JSON.parse(text) as { code?: string; error?: string };
+ if (parsed.code === "platform_unavailable") {
+ throw new PlatformUnavailableError(parsed.error || "platform datastore unavailable");
+ }
+ } catch (err) {
+ // Re-throw the typed error if that's what we just constructed.
+ // JSON.parse failures fall through to the generic Error below.
+ if (err instanceof PlatformUnavailableError) throw err;
+ }
+ }
throw new Error(`API ${method} ${path}: ${res.status} ${text}`);
}
return res.json();
}
+/** Thrown when the platform reports its datastore (Postgres/Redis) is
+ * unreachable. Surface with a dedicated diagnostic UI rather than a
+ * generic API-error toast — the user's next action is to check local
+ * services, not to retry the API call. */
+export class PlatformUnavailableError extends Error {
+ readonly code = "platform_unavailable" as const;
+ constructor(message: string) {
+ super(message);
+ this.name = "PlatformUnavailableError";
+ }
+}
+
export const api = {
get: (path: string, options?: RequestOptions) => request("GET", path, undefined, 0, options),
post: (path: string, body?: unknown, options?: RequestOptions) => request("POST", path, body, 0, options),
diff --git a/workspace-server/internal/middleware/wsauth_middleware.go b/workspace-server/internal/middleware/wsauth_middleware.go
index 93538753..ef82d8e7 100644
--- a/workspace-server/internal/middleware/wsauth_middleware.go
+++ b/workspace-server/internal/middleware/wsauth_middleware.go
@@ -14,6 +14,30 @@ import (
"github.com/gin-gonic/gin"
)
+// abortAuthLookupError is the single response shape for "the auth
+// middleware tried to validate a token but the underlying datastore
+// lookup failed." Returns 503 (not 500) because the right semantic
+// is "platform infrastructure unavailable, retry shortly" — not
+// "internal server error in our application logic". The structured
+// `code` lets the canvas distinguish this from generic 5xx and
+// surface a dedicated diagnostic ("Postgres/Redis unreachable —
+// check local services") instead of a confusing
+// `auth check failed` toast.
+//
+// `where` is included in the log line so the operator can grep
+// which call site fired (WorkspaceAuth vs AdminAuth, the
+// HasAnyLiveTokenGlobal probe vs orgtoken.Validate). The
+// user-visible body deliberately does NOT include the underlying
+// error string — that could leak DB hostnames, connection-string
+// fragments, or internal code paths.
+func abortAuthLookupError(c *gin.Context, where string, err error) {
+ log.Printf("wsauth: %s: datastore lookup failed (returning 503): %v", where, err)
+ c.AbortWithStatusJSON(http.StatusServiceUnavailable, gin.H{
+ "error": "platform datastore unavailable — retry shortly",
+ "code": "platform_unavailable",
+ })
+}
+
// WorkspaceAuth returns a Gin middleware that enforces per-workspace bearer-token
// authentication on /workspaces/:id/* sub-routes.
//
@@ -73,8 +97,7 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc {
c.Next()
return
} else if !errors.Is(err, orgtoken.ErrInvalidToken) {
- log.Printf("wsauth: WorkspaceAuth: orgtoken.Validate: %v", err)
- c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "auth check failed"})
+ abortAuthLookupError(c, "WorkspaceAuth: orgtoken.Validate", err)
return
}
// Per-workspace token — narrowest scope, bound to this :id.
@@ -136,8 +159,7 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
hasLive, err := wsauth.HasAnyLiveTokenGlobal(ctx, database)
if err != nil {
- log.Printf("wsauth: AdminAuth: HasAnyLiveTokenGlobal failed: %v", err)
- c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "auth check failed"})
+ abortAuthLookupError(c, "AdminAuth: HasAnyLiveTokenGlobal", err)
return
}
if !hasLive {
@@ -214,8 +236,7 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
return
} else if !errors.Is(err, orgtoken.ErrInvalidToken) {
// DB error — fail closed and log. Don't expose DB text.
- log.Printf("wsauth: AdminAuth: orgtoken.Validate: %v", err)
- c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "auth check failed"})
+ abortAuthLookupError(c, "AdminAuth: orgtoken.Validate", err)
return
}
diff --git a/workspace-server/internal/middleware/wsauth_middleware_test.go b/workspace-server/internal/middleware/wsauth_middleware_test.go
index edfd2230..6c802a79 100644
--- a/workspace-server/internal/middleware/wsauth_middleware_test.go
+++ b/workspace-server/internal/middleware/wsauth_middleware_test.go
@@ -2,8 +2,11 @@ package middleware
import (
"crypto/sha256"
+ "encoding/json"
+ "errors"
"net/http"
"net/http/httptest"
+ "strings"
"testing"
"github.com/DATA-DOG/go-sqlmock"
@@ -1699,3 +1702,57 @@ func TestAdminAuth_684_SpecificRoutes_NoBearer_Returns401(t *testing.T) {
})
}
}
+
+// ==================== platform-unavailable classification ====================
+//
+// abortAuthLookupError replaces the prior opaque
+// `500 {"error":"auth check failed"}` with a 503 + structured code so
+// the canvas can render a dedicated diagnostic instead of a confusing
+// toast. Pin both the status code and the body shape against
+// regression — this is the contract the canvas's
+// PlatformUnavailableError classifier reads at api.ts.
+
+func TestAdminAuth_DatastoreError_Returns503PlatformUnavailable(t *testing.T) {
+ mockDB, mock, err := sqlmock.New()
+ if err != nil {
+ t.Fatalf("sqlmock.New: %v", err)
+ }
+ defer mockDB.Close()
+
+ // Simulate Postgres being down — HasAnyLiveTokenGlobal's COUNT
+ // query returns a connection error.
+ mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
+ WillReturnError(errors.New("dial tcp [::1]:5432: connect: connection refused"))
+
+ r := gin.New()
+ r.GET("/workspaces", AdminAuth(mockDB), func(c *gin.Context) {
+ c.JSON(http.StatusOK, gin.H{"ok": true})
+ })
+
+ w := httptest.NewRecorder()
+ req, _ := http.NewRequest(http.MethodGet, "/workspaces", nil)
+ r.ServeHTTP(w, req)
+
+ if w.Code != http.StatusServiceUnavailable {
+ t.Errorf("expected 503, got %d: %s", w.Code, w.Body.String())
+ }
+ var resp map[string]interface{}
+ if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+ t.Fatalf("response body must be JSON: %v (body=%s)", err, w.Body.String())
+ }
+ if resp["code"] != "platform_unavailable" {
+ t.Errorf("response code = %v, want platform_unavailable (canvas reads this for the dedicated diagnostic)", resp["code"])
+ }
+ if _, ok := resp["error"].(string); !ok {
+ t.Errorf("response must include human-readable error string, got %v", resp["error"])
+ }
+ // The body must NOT leak the underlying DB error string —
+ // production hostnames / connection-string fragments could land
+ // in an error toast otherwise.
+ if errStr, _ := resp["error"].(string); strings.Contains(errStr, "dial tcp") {
+ t.Errorf("response leaks underlying DB error: %q", errStr)
+ }
+ if err := mock.ExpectationsWereMet(); err != nil {
+ t.Errorf("unmet sqlmock expectations: %v", err)
+ }
+}