The 600-req/min/IP bucket is sized for SaaS where each tenant has a distinct client IP. On a local Docker setup every panel shares one IP — hydration (/workspaces + /templates + /org/templates + /approvals/pending) plus polling (A2A overlay + activity tabs + approvals + schedule + channels + audit trail) can burst past the bucket inside a minute, blanking the canvas with 429s. The user reported it after dragging workspaces — dragging itself is release-only (savePosition in onNodeDragStop), but the polling that's always running added onto startup tripped the limit. Two-layer fix: Server: RateLimiter.Middleware short-circuits when isDevModeFailOpen is true (MOLECULE_ENV=development + empty ADMIN_TOKEN), matching the Tier-1b hatch already applied to AdminAuth, WorkspaceAuth, and discovery. SaaS production keeps the bucket. Client: api.ts auto-retries a single 429 on idempotent GET requests, waiting the server-provided Retry-After (capped at 20s). Mutations (POST/PUT/PATCH/DELETE) never auto-retry to avoid double-applying. Users on SaaS hitting a legitimate rate-limit spike get one transparent recovery instead of an immediately-blank Canvas. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
126 lines
3.3 KiB
Go
126 lines
3.3 KiB
Go
// Package middleware provides HTTP middleware for the platform API.
|
|
package middleware
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"strconv"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
)
|
|
|
|
// RateLimiter implements a simple token bucket rate limiter per IP.
|
|
type RateLimiter struct {
|
|
mu sync.Mutex
|
|
buckets map[string]*bucket
|
|
rate int // tokens per interval
|
|
interval time.Duration
|
|
}
|
|
|
|
type bucket struct {
|
|
tokens int
|
|
lastReset time.Time
|
|
}
|
|
|
|
// NewRateLimiter creates a rate limiter with the given rate per interval.
|
|
// Pass a context to stop the cleanup goroutine on shutdown.
|
|
func NewRateLimiter(rate int, interval time.Duration, ctx context.Context) *RateLimiter {
|
|
rl := &RateLimiter{
|
|
buckets: make(map[string]*bucket),
|
|
rate: rate,
|
|
interval: interval,
|
|
}
|
|
go func() {
|
|
ticker := time.NewTicker(5 * time.Minute)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
rl.mu.Lock()
|
|
cutoff := time.Now().Add(-10 * time.Minute)
|
|
for ip, b := range rl.buckets {
|
|
if b.lastReset.Before(cutoff) {
|
|
delete(rl.buckets, ip)
|
|
}
|
|
}
|
|
rl.mu.Unlock()
|
|
}
|
|
}
|
|
}()
|
|
return rl
|
|
}
|
|
|
|
// Middleware returns a Gin middleware that rate limits by client IP.
|
|
func (rl *RateLimiter) Middleware() gin.HandlerFunc {
|
|
return func(c *gin.Context) {
|
|
// Tier-1b dev-mode hatch — same gate as AdminAuth / WorkspaceAuth /
|
|
// discovery. On a local single-user Docker setup the 600-req/min
|
|
// bucket fills fast: a 15-workspace canvas + activity polling +
|
|
// approvals polling + A2A overlay + initial hydration all share
|
|
// one IP bucket, so a minute of active use can trip 429 and blank
|
|
// the page. Gated by MOLECULE_ENV=development + empty ADMIN_TOKEN
|
|
// so SaaS production keeps the bucket.
|
|
if isDevModeFailOpen() {
|
|
c.Header("X-RateLimit-Limit", "unlimited")
|
|
c.Next()
|
|
return
|
|
}
|
|
|
|
ip := c.ClientIP()
|
|
|
|
rl.mu.Lock()
|
|
b, exists := rl.buckets[ip]
|
|
if !exists {
|
|
b = &bucket{tokens: rl.rate, lastReset: time.Now()}
|
|
rl.buckets[ip] = b
|
|
}
|
|
|
|
// Reset tokens if interval has passed
|
|
if time.Since(b.lastReset) >= rl.interval {
|
|
b.tokens = rl.rate
|
|
b.lastReset = time.Now()
|
|
}
|
|
|
|
// Issue #105 — advertise the current bucket state so clients and
|
|
// monitoring tools can back off proactively. Headers are set on every
|
|
// response (both allowed and throttled) so they're observable against
|
|
// any endpoint — /health, /metrics, and every /workspaces/* route.
|
|
//
|
|
// The `reset` value is seconds until the current bucket refills,
|
|
// matching the RFC 6585 Retry-After spec for 429 responses and the
|
|
// de-facto X-RateLimit-Reset convention (GitHub, Stripe, etc.).
|
|
remaining := b.tokens - 1
|
|
if remaining < 0 {
|
|
remaining = 0
|
|
}
|
|
resetSeconds := int(time.Until(b.lastReset.Add(rl.interval)).Seconds())
|
|
if resetSeconds < 0 {
|
|
resetSeconds = 0
|
|
}
|
|
c.Header("X-RateLimit-Limit", strconv.Itoa(rl.rate))
|
|
c.Header("X-RateLimit-Remaining", strconv.Itoa(remaining))
|
|
c.Header("X-RateLimit-Reset", strconv.Itoa(resetSeconds))
|
|
|
|
if b.tokens <= 0 {
|
|
rl.mu.Unlock()
|
|
// Retry-After is the canonical 429 signal per RFC 6585.
|
|
c.Header("Retry-After", strconv.Itoa(resetSeconds))
|
|
c.JSON(http.StatusTooManyRequests, gin.H{
|
|
"error": "rate limit exceeded",
|
|
"retry_after": resetSeconds,
|
|
})
|
|
c.Abort()
|
|
return
|
|
}
|
|
|
|
b.tokens--
|
|
rl.mu.Unlock()
|
|
|
|
c.Next()
|
|
}
|
|
}
|