molecule-core/workspace-server/internal/middleware/ratelimit.go
Hongming Wang f2a4b6e0d3 fix: dev-mode bypass for IP rate limiter + 429 retry on GET
The 600-req/min/IP bucket is sized for SaaS where each tenant has
a distinct client IP. On a local Docker setup every panel shares
one IP — hydration (/workspaces + /templates + /org/templates +
/approvals/pending) plus polling (A2A overlay + activity tabs +
approvals + schedule + channels + audit trail) can burst past the
bucket inside a minute, blanking the canvas with 429s. The user
reported it after dragging workspaces — dragging itself is
release-only (savePosition in onNodeDragStop), but the polling
that's always running added onto startup tripped the limit.

Two-layer fix:

Server: RateLimiter.Middleware short-circuits when isDevModeFailOpen
is true (MOLECULE_ENV=development + empty ADMIN_TOKEN), matching
the Tier-1b hatch already applied to AdminAuth, WorkspaceAuth, and
discovery. SaaS production keeps the bucket.

Client: api.ts auto-retries a single 429 on idempotent GET requests,
waiting the server-provided Retry-After (capped at 20s). Mutations
(POST/PUT/PATCH/DELETE) never auto-retry to avoid double-applying.
Users on SaaS hitting a legitimate rate-limit spike get one
transparent recovery instead of an immediately-blank Canvas.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 20:44:09 -07:00

126 lines
3.3 KiB
Go

// Package middleware provides HTTP middleware for the platform API.
package middleware
import (
"context"
"net/http"
"strconv"
"sync"
"time"
"github.com/gin-gonic/gin"
)
// RateLimiter implements a simple token bucket rate limiter per IP.
type RateLimiter struct {
mu sync.Mutex
buckets map[string]*bucket
rate int // tokens per interval
interval time.Duration
}
type bucket struct {
tokens int
lastReset time.Time
}
// NewRateLimiter creates a rate limiter with the given rate per interval.
// Pass a context to stop the cleanup goroutine on shutdown.
func NewRateLimiter(rate int, interval time.Duration, ctx context.Context) *RateLimiter {
rl := &RateLimiter{
buckets: make(map[string]*bucket),
rate: rate,
interval: interval,
}
go func() {
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
rl.mu.Lock()
cutoff := time.Now().Add(-10 * time.Minute)
for ip, b := range rl.buckets {
if b.lastReset.Before(cutoff) {
delete(rl.buckets, ip)
}
}
rl.mu.Unlock()
}
}
}()
return rl
}
// Middleware returns a Gin middleware that rate limits by client IP.
func (rl *RateLimiter) Middleware() gin.HandlerFunc {
return func(c *gin.Context) {
// Tier-1b dev-mode hatch — same gate as AdminAuth / WorkspaceAuth /
// discovery. On a local single-user Docker setup the 600-req/min
// bucket fills fast: a 15-workspace canvas + activity polling +
// approvals polling + A2A overlay + initial hydration all share
// one IP bucket, so a minute of active use can trip 429 and blank
// the page. Gated by MOLECULE_ENV=development + empty ADMIN_TOKEN
// so SaaS production keeps the bucket.
if isDevModeFailOpen() {
c.Header("X-RateLimit-Limit", "unlimited")
c.Next()
return
}
ip := c.ClientIP()
rl.mu.Lock()
b, exists := rl.buckets[ip]
if !exists {
b = &bucket{tokens: rl.rate, lastReset: time.Now()}
rl.buckets[ip] = b
}
// Reset tokens if interval has passed
if time.Since(b.lastReset) >= rl.interval {
b.tokens = rl.rate
b.lastReset = time.Now()
}
// Issue #105 — advertise the current bucket state so clients and
// monitoring tools can back off proactively. Headers are set on every
// response (both allowed and throttled) so they're observable against
// any endpoint — /health, /metrics, and every /workspaces/* route.
//
// The `reset` value is seconds until the current bucket refills,
// matching the RFC 6585 Retry-After spec for 429 responses and the
// de-facto X-RateLimit-Reset convention (GitHub, Stripe, etc.).
remaining := b.tokens - 1
if remaining < 0 {
remaining = 0
}
resetSeconds := int(time.Until(b.lastReset.Add(rl.interval)).Seconds())
if resetSeconds < 0 {
resetSeconds = 0
}
c.Header("X-RateLimit-Limit", strconv.Itoa(rl.rate))
c.Header("X-RateLimit-Remaining", strconv.Itoa(remaining))
c.Header("X-RateLimit-Reset", strconv.Itoa(resetSeconds))
if b.tokens <= 0 {
rl.mu.Unlock()
// Retry-After is the canonical 429 signal per RFC 6585.
c.Header("Retry-After", strconv.Itoa(resetSeconds))
c.JSON(http.StatusTooManyRequests, gin.H{
"error": "rate limit exceeded",
"retry_after": resetSeconds,
})
c.Abort()
return
}
b.tokens--
rl.mu.Unlock()
c.Next()
}
}