Implements automatic workspace hibernation for workspaces that have been idle longer than their configured hibernation_idle_minutes threshold. Changes: - migrations/029: Add hibernation_idle_minutes INT DEFAULT NULL column + partial index on workspaces table - registry/hibernation.go: New StartHibernationMonitor goroutine that ticks every 2 min and calls hibernateIdleWorkspaces via the HibernateHandler callback (same import-cycle-prevention pattern as OfflineHandler) - registry/hibernation_test.go: 5 unit tests covering handler calls, no-rows, DB error, tick behaviour, and context-cancel shutdown - handlers/workspace_restart.go: New Hibernate() HTTP handler (POST /workspaces/:id/hibernate) + HibernateWorkspace(ctx, id) method — stops container, sets status='hibernated', clears Redis keys, broadcasts event - handlers/a2a_proxy.go: Auto-wake in resolveAgentURL — when status='hibernated' and URL is empty, triggers async RestartByID and returns 503 + Retry-After: 15 so callers can retry transparently - registry/liveness.go: Exclude 'hibernated' workspaces from offline detection - router.go: Register POST /workspaces/:id/hibernate under wsAuth group - cmd/server/main.go: Wire hibernation monitor via supervised.RunWithRecover Closes #711 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
60 lines
1.5 KiB
Go
60 lines
1.5 KiB
Go
package registry
|
|
|
|
import (
|
|
"context"
|
|
"log"
|
|
"strings"
|
|
|
|
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
|
)
|
|
|
|
// OfflineHandler is called when a workspace's liveness key expires.
|
|
type OfflineHandler func(ctx context.Context, workspaceID string)
|
|
|
|
// StartLivenessMonitor subscribes to Redis keyspace expiry events.
|
|
// When a workspace's liveness key (ws:{id}) expires, it marks the workspace offline
|
|
// and calls the onOffline handler.
|
|
func StartLivenessMonitor(ctx context.Context, onOffline OfflineHandler) {
|
|
sub := db.RDB.PSubscribe(ctx, "__keyevent@0__:expired")
|
|
|
|
log.Println("Liveness monitor started — listening for Redis key expirations")
|
|
|
|
ch := sub.Channel()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
sub.Close()
|
|
return
|
|
case msg := <-ch:
|
|
if msg == nil {
|
|
continue
|
|
}
|
|
key := msg.Payload
|
|
if !strings.HasPrefix(key, "ws:") {
|
|
continue
|
|
}
|
|
parts := strings.SplitN(key, ":", 3)
|
|
if len(parts) != 2 {
|
|
continue
|
|
}
|
|
workspaceID := parts[1]
|
|
|
|
log.Printf("Liveness: workspace %s TTL expired", workspaceID)
|
|
|
|
// Mark offline in Postgres — skip paused and hibernated workspaces (no active container)
|
|
_, err := db.DB.ExecContext(ctx, `
|
|
UPDATE workspaces SET status = 'offline', updated_at = now()
|
|
WHERE id = $1 AND status NOT IN ('removed', 'paused', 'hibernated')
|
|
`, workspaceID)
|
|
if err != nil {
|
|
log.Printf("Liveness: failed to mark %s offline: %v", workspaceID, err)
|
|
continue
|
|
}
|
|
|
|
if onOffline != nil {
|
|
onOffline(ctx, workspaceID)
|
|
}
|
|
}
|
|
}
|
|
}
|