molecule-core/workspace-server/internal/middleware/ratelimit.go

// Package middleware provides HTTP middleware for the platform API.
package middleware

import (
	"context"
	"net/http"
	"strconv"
	"sync"
	"time"

	"github.com/gin-gonic/gin"
)

// RateLimiter implements a simple token bucket rate limiter per IP.
type RateLimiter struct {
	mu      sync.Mutex
	buckets map[string]*bucket
	rate    int           // tokens per interval
	interval time.Duration
}

type bucket struct {
	tokens    int
	lastReset time.Time
}

// NewRateLimiter creates a rate limiter with the given rate per interval.
// Pass a context to stop the cleanup goroutine on shutdown.
func NewRateLimiter(rate int, interval time.Duration, ctx context.Context) *RateLimiter {
	rl := &RateLimiter{
		buckets:  make(map[string]*bucket),
		rate:     rate,
		interval: interval,
	}
	go func() {
		ticker := time.NewTicker(5 * time.Minute)
		defer ticker.Stop()
		for {
			select {
			case <-ctx.Done():
				return
			case <-ticker.C:
				rl.mu.Lock()
				cutoff := time.Now().Add(-10 * time.Minute)
				for ip, b := range rl.buckets {
					if b.lastReset.Before(cutoff) {
						delete(rl.buckets, ip)
					}
				}
				rl.mu.Unlock()
			}
		}
	}()
	return rl
}

// Middleware returns a Gin middleware that rate limits by client IP.
func (rl *RateLimiter) Middleware() gin.HandlerFunc {
	return func(c *gin.Context) {
		// Tier-1b dev-mode hatch — same gate as AdminAuth / WorkspaceAuth /
		// discovery. On a local single-user Docker setup the 600-req/min
		// bucket fills fast: a 15-workspace canvas + activity polling +
		// approvals polling + A2A overlay + initial hydration all share
		// one IP bucket, so a minute of active use can trip 429 and blank
		// the page. Gated by MOLECULE_ENV=development + empty ADMIN_TOKEN
		// so SaaS production keeps the bucket.
		if isDevModeFailOpen() {
			c.Header("X-RateLimit-Limit", "unlimited")
			c.Next()
			return
		}

		ip := c.ClientIP()

		rl.mu.Lock()
		b, exists := rl.buckets[ip]
		if !exists {
			b = &bucket{tokens: rl.rate, lastReset: time.Now()}
			rl.buckets[ip] = b
		}

		// Reset tokens if interval has passed
		if time.Since(b.lastReset) >= rl.interval {
			b.tokens = rl.rate
			b.lastReset = time.Now()
		}

		// Issue #105 — advertise the current bucket state so clients and
		// monitoring tools can back off proactively. Headers are set on every
		// response (both allowed and throttled) so they're observable against
		// any endpoint — /health, /metrics, and every /workspaces/* route.
		//
		// The `reset` value is seconds until the current bucket refills,
		// matching the RFC 6585 Retry-After spec for 429 responses and the
		// de-facto X-RateLimit-Reset convention (GitHub, Stripe, etc.).
		remaining := b.tokens - 1
		if remaining < 0 {
			remaining = 0
		}
		resetSeconds := int(time.Until(b.lastReset.Add(rl.interval)).Seconds())
		if resetSeconds < 0 {
			resetSeconds = 0
		}
		c.Header("X-RateLimit-Limit", strconv.Itoa(rl.rate))
		c.Header("X-RateLimit-Remaining", strconv.Itoa(remaining))
		c.Header("X-RateLimit-Reset", strconv.Itoa(resetSeconds))

		if b.tokens <= 0 {
			rl.mu.Unlock()
			// Retry-After is the canonical 429 signal per RFC 6585.
			c.Header("Retry-After", strconv.Itoa(resetSeconds))
			c.JSON(http.StatusTooManyRequests, gin.H{
				"error":       "rate limit exceeded",
				"retry_after": resetSeconds,
			})
			c.Abort()
			return
		}

		b.tokens--
		rl.mu.Unlock()

		c.Next()
	}
}