diff --git a/canvas/src/components/ProvisioningTimeout.tsx b/canvas/src/components/ProvisioningTimeout.tsx index 1c09fa3b..008623c9 100644 --- a/canvas/src/components/ProvisioningTimeout.tsx +++ b/canvas/src/components/ProvisioningTimeout.tsx @@ -65,6 +65,12 @@ export function ProvisioningTimeout({ // banner even if they stay in provisioning. Cleared when the // workspace leaves provisioning (status changes). const [dismissed, setDismissed] = useState>(new Set()); + // Watch the live WS health. While it's not "connected", local node + // status reflects the last event we received before the drop — + // workspaces may have actually transitioned to online minutes ago. + // Suppress the banner until WS recovers + rehydrate confirms each + // workspace is genuinely still provisioning. + const wsStatus = useCanvasStore((s) => s.wsStatus); // Subscribe to provisioning nodes — use shallow compare to avoid infinite re-render // (filter+map creates new array reference on every store update). @@ -251,8 +257,11 @@ export function ProvisioningTimeout({ }, []); const visibleTimedOut = useMemo( - () => timedOut.filter((e) => !dismissed.has(e.workspaceId)), - [timedOut, dismissed], + () => + wsStatus === "connected" + ? timedOut.filter((e) => !dismissed.has(e.workspaceId)) + : [], + [timedOut, dismissed, wsStatus], ); if (visibleTimedOut.length === 0) return null; diff --git a/canvas/src/components/TemplatePalette.tsx b/canvas/src/components/TemplatePalette.tsx index 8c40d022..f3b9044b 100644 --- a/canvas/src/components/TemplatePalette.tsx +++ b/canvas/src/components/TemplatePalette.tsx @@ -1,6 +1,7 @@ "use client"; import { useState, useEffect, useCallback, useRef } from "react"; +import { flushSync } from "react-dom"; import { api } from "@/lib/api"; import { useCanvasStore } from "@/store/canvas"; import type { WorkspaceData } from "@/store/socket"; @@ -326,7 +327,18 @@ export function OrgTemplatesSection() { onSecretSaved={refreshConfiguredKeys} onProceed={() => { const org = preflight.org; - setPreflight(null); + // flushSync guarantees the modal unmounts BEFORE we kick + // off the import network call. Without it, React batches + // setPreflight(null) with the setImporting(...) from + // doImport's synchronous prefix, both commit at the end + // of this handler, AND the await import() POST may yield + // a microtask before React schedules the paint. Net + // effect: the modal backdrop sat over the canvas during + // the first wave of WORKSPACE_PROVISIONING WS events, + // hiding the spawn animation. Force the close to land + // first so the user sees the canvas reveal + agents + // popping into place. + flushSync(() => setPreflight(null)); void doImport(org); }} onCancel={() => setPreflight(null)} diff --git a/canvas/src/store/__tests__/socket.test.ts b/canvas/src/store/__tests__/socket.test.ts index c0a7c523..ee7f94b7 100644 --- a/canvas/src/store/__tests__/socket.test.ts +++ b/canvas/src/store/__tests__/socket.test.ts @@ -263,13 +263,59 @@ describe("WebSocket onclose – auto-reconnect", () => { const ws = getLastWS(); ws.triggerClose(); - // Fast-forward timers to trigger the reconnect - vi.runAllTimers(); + // First reconnect attempt is scheduled at 1s (Math.min(1000 * 2^0, + // 30000)). Advance just past that — vi.runAllTimers() would + // additionally re-fire the fallback poll setInterval forever and + // hit the 10000-timer abort. + vi.advanceTimersByTime(1100); expect(MockWebSocket.instances.length).toBeGreaterThan(1); }); }); +describe("HTTP fallback poll while WS unhealthy", () => { + it("starts a setInterval after onclose so /workspaces stays fresh", () => { + const setIntervalSpy = vi.spyOn(globalThis, "setInterval"); + connectSocket(); + const ws = getLastWS(); + ws.triggerClose(); + // The fallback poll runs at 10s; the reconnect uses setTimeout, so + // any setInterval registered between connect and close must be the + // fallback poll. + const fallbackCalls = setIntervalSpy.mock.calls.filter( + ([, delay]) => delay === 10_000, + ); + expect(fallbackCalls.length).toBeGreaterThan(0); + setIntervalSpy.mockRestore(); + }); + + it("clears the fallback poll once the WS reconnects (onopen)", () => { + const clearIntervalSpy = vi.spyOn(globalThis, "clearInterval"); + connectSocket(); + const ws = getLastWS(); + ws.triggerClose(); // starts fallback poll + clearIntervalSpy.mockClear(); + // Advance past the first reconnect delay so a fresh ws exists, + // then trigger its open. + vi.advanceTimersByTime(1100); + const ws2 = getLastWS(); + ws2.triggerOpen(); + expect(clearIntervalSpy).toHaveBeenCalled(); + clearIntervalSpy.mockRestore(); + }); + + it("clears the fallback poll on disconnect", () => { + const clearIntervalSpy = vi.spyOn(globalThis, "clearInterval"); + connectSocket(); + const ws = getLastWS(); + ws.triggerClose(); // starts fallback poll + clearIntervalSpy.mockClear(); + disconnectSocket(); + expect(clearIntervalSpy).toHaveBeenCalled(); + clearIntervalSpy.mockRestore(); + }); +}); + // --------------------------------------------------------------------------- // onerror handler // --------------------------------------------------------------------------- diff --git a/canvas/src/store/socket.ts b/canvas/src/store/socket.ts index 364c7ffb..f3b8f99f 100644 --- a/canvas/src/store/socket.ts +++ b/canvas/src/store/socket.ts @@ -56,6 +56,15 @@ export class RehydrateDedup { } } +/** Cadence for the HTTP fallback rehydrate that runs while the WS is + * in connecting/disconnected limbo. 10s is short enough that the user + * sees STARTING → ONLINE within one tick after the platform finishes + * provisioning, but long enough to not pound /workspaces if the + * network truly is down. The dedup gate inside rehydrate() collapses + * this against the post-onopen rehydrate, so reconnect doesn't pay + * for a duplicate fetch. */ +const FALLBACK_POLL_MS = 10_000; + class ReconnectingSocket { private ws: WebSocket | null = null; private attempt = 0; @@ -63,6 +72,13 @@ class ReconnectingSocket { private lastEventTime = 0; private healthCheckTimer: ReturnType | null = null; private reconnectTimer: ReturnType | null = null; + // Polls /workspaces while the WS is unhealthy so the canvas reflects + // truth even when realtime events aren't arriving. Without this the + // store can stay frozen for minutes — e.g. workspaces transition + // STARTING → ONLINE on the platform but the canvas keeps showing + // STARTING until the WS finally reconnects, triggering false + // "Provisioning Timeout" banners on already-online workspaces. + private fallbackPollTimer: ReturnType | null = null; // disposed signals that disconnect() has been called. Any in-flight // reconnect / handshake must abort early rather than attach to a // socket the caller no longer owns — otherwise React StrictMode's @@ -102,6 +118,7 @@ class ReconnectingSocket { this.attempt = 0; this.lastEventTime = Date.now(); useCanvasStore.getState().setWsStatus("connected"); + this.stopFallbackPoll(); this.rehydrate(); this.startHealthCheck(); }; @@ -125,6 +142,7 @@ class ReconnectingSocket { if (this.disposed || this.ws !== ws) return; this.stopHealthCheck(); useCanvasStore.getState().setWsStatus("connecting"); + this.startFallbackPoll(); const delay = Math.min(1000 * 2 ** this.attempt, 30000); this.attempt++; this.reconnectTimer = setTimeout(() => this.connect(), delay); @@ -157,6 +175,28 @@ class ReconnectingSocket { } } + /** While the WS is in connecting/disconnected limbo, poll /workspaces + * so the store stays fresh. The reconnect attempts continue in + * parallel; whichever recovers first wins. rehydrate()'s own dedup + * gate prevents this from racing with the open-time rehydrate. */ + private startFallbackPoll() { + if (this.fallbackPollTimer) return; + this.fallbackPollTimer = setInterval(() => { + if (this.disposed) { + this.stopFallbackPoll(); + return; + } + void this.rehydrate(); + }, FALLBACK_POLL_MS); + } + + private stopFallbackPoll() { + if (this.fallbackPollTimer) { + clearInterval(this.fallbackPollTimer); + this.fallbackPollTimer = null; + } + } + private rehydrate(): Promise { // Reuse an in-flight fetch — a second caller during the GET // shouldn't kick off a parallel one. @@ -191,6 +231,7 @@ class ReconnectingSocket { disconnect() { this.disposed = true; this.stopHealthCheck(); + this.stopFallbackPoll(); if (this.reconnectTimer) { clearTimeout(this.reconnectTimer); this.reconnectTimer = null;