diff --git a/canvas/src/lib/api.ts b/canvas/src/lib/api.ts index 86085081..5ee1d8ce 100644 --- a/canvas/src/lib/api.ts +++ b/canvas/src/lib/api.ts @@ -17,7 +17,8 @@ const DEFAULT_TIMEOUT_MS = 15_000; async function request( method: string, path: string, - body?: unknown + body?: unknown, + retryCount = 0, ): Promise { // SaaS cross-origin shape: // - X-Molecule-Org-Slug: derived from window.location.hostname by @@ -38,6 +39,18 @@ async function request( credentials: "include", signal: AbortSignal.timeout(DEFAULT_TIMEOUT_MS), }); + // Transient rate-limit recovery. A single IP bucket can momentarily + // spike on page load (several panels hydrate simultaneously). Instead + // of bubbling up a 429 that blanks the Canvas, wait the + // Retry-After window and try once — any further 429 surfaces normally. + // GET / idempotent methods only; never auto-retry mutations. + if (res.status === 429 && retryCount === 0 && method === "GET") { + const retryAfterHeader = res.headers.get("Retry-After"); + const retryAfter = retryAfterHeader ? parseInt(retryAfterHeader, 10) : NaN; + const delayMs = Number.isFinite(retryAfter) ? Math.min(retryAfter, 20) * 1000 : 2000; + await new Promise((resolve) => setTimeout(resolve, delayMs)); + return request(method, path, body, retryCount + 1); + } if (res.status === 401) { // Session expired or credentials lost. On SaaS (tenant subdomain) // the login page lives at /cp/auth/login and is mounted by the diff --git a/workspace-server/internal/middleware/ratelimit.go b/workspace-server/internal/middleware/ratelimit.go index 0e607762..1b2f50dd 100644 --- a/workspace-server/internal/middleware/ratelimit.go +++ b/workspace-server/internal/middleware/ratelimit.go @@ -57,6 +57,19 @@ func NewRateLimiter(rate int, interval time.Duration, ctx context.Context) *Rate // Middleware returns a Gin middleware that rate limits by client IP. func (rl *RateLimiter) Middleware() gin.HandlerFunc { return func(c *gin.Context) { + // Tier-1b dev-mode hatch — same gate as AdminAuth / WorkspaceAuth / + // discovery. On a local single-user Docker setup the 600-req/min + // bucket fills fast: a 15-workspace canvas + activity polling + + // approvals polling + A2A overlay + initial hydration all share + // one IP bucket, so a minute of active use can trip 429 and blank + // the page. Gated by MOLECULE_ENV=development + empty ADMIN_TOKEN + // so SaaS production keeps the bucket. + if isDevModeFailOpen() { + c.Header("X-RateLimit-Limit", "unlimited") + c.Next() + return + } + ip := c.ClientIP() rl.mu.Lock()