Files
molecule-core/workspace-server/internal/registry/hibernation.go
T
claude-ceo-assistant f7e2976324
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 9s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 7s
Check migration collisions / Migration version collision check (pull_request) Successful in 10s
CI / Detect changes (pull_request) Successful in 7s
CI / Python Lint & Test (pull_request) Successful in 5s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 7s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Successful in 5s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 10s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Has been skipped
Handlers Postgres Integration / detect-changes (pull_request) Successful in 6s
Harness Replays / detect-changes (pull_request) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 33s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 50s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 8s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 9s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 58s
gate-check-v3 / gate-check (pull_request) Successful in 4s
qa-review / approved (pull_request) Successful in 3s
security-review / approved (pull_request) Successful in 3s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 4s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 4s
Ops Scripts Tests / Ops scripts (unittest) (pull_request) Successful in 1m6s
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Successful in 5m25s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 20s
E2E Chat / E2E Chat (pull_request) Successful in 33s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 11s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m58s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m44s
Harness Replays / Harness Replays (pull_request) Successful in 6s
CI / Platform (Go) (pull_request) Successful in 6m9s
CI / Canvas (Next.js) (pull_request) Successful in 7m41s
CI / all-required (pull_request) Successful in 32m0s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
audit-force-merge / audit (pull_request) Successful in 32s
chore: retire unmaintained workspace runtimes
2026-05-23 23:45:09 -07:00

103 lines
3.6 KiB
Go

package registry
import (
"context"
"log"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/supervised"
)
// HibernateHandler is called for each workspace that the hibernation monitor
// decides should be hibernated. The handler stops the container, updates the
// DB status, and broadcasts the event.
type HibernateHandler func(ctx context.Context, workspaceID string)
// defaultHibernationInterval is how often the hibernation monitor polls the
// database for idle-too-long workspaces. Two minutes is fine-grained enough
// for typical idle_hibernate_minutes values (≥5) and cheap enough on a busy
// platform — the query hits a partial index and does a small range scan.
const defaultHibernationInterval = 2 * time.Minute
// StartHibernationMonitor periodically scans for workspaces that have been
// idle (active_tasks == 0) longer than their configured hibernation_idle_minutes
// and calls onHibernate for each. It runs under supervised.RunWithRecover so a
// panic is recovered with exponential backoff rather than silently dying.
//
// Only workspaces with:
// - status IN ('online', 'degraded')
// - active_tasks == 0
// - hibernation_idle_minutes IS NOT NULL AND > 0
// - runtime != 'external' (external agents have no Docker container)
// - last heartbeat older than hibernation_idle_minutes minutes ago
//
// are candidates. The last_heartbeat_at column tracks the most recent
// successful heartbeat from the agent; when it is NULL the workspace has
// never heartbeated and is not yet eligible for hibernation (we give it a
// full grace period equal to hibernation_idle_minutes from its created_at).
func StartHibernationMonitor(ctx context.Context, onHibernate HibernateHandler) {
StartHibernationMonitorWithInterval(ctx, defaultHibernationInterval, onHibernate)
}
// StartHibernationMonitorWithInterval is StartHibernationMonitor with a
// configurable tick interval — exposed for tests so they don't have to wait
// 2 minutes for a tick.
func StartHibernationMonitorWithInterval(ctx context.Context, interval time.Duration, onHibernate HibernateHandler) {
ticker := time.NewTicker(interval)
defer ticker.Stop()
log.Printf("Hibernation monitor: started (interval=%s)", interval)
for {
select {
case <-ctx.Done():
log.Println("Hibernation monitor: context done; stopping")
return
case <-ticker.C:
hibernateIdleWorkspaces(ctx, onHibernate)
supervised.Heartbeat("hibernation-monitor")
}
}
}
// hibernateIdleWorkspaces queries for hibernation candidates and calls
// onHibernate for each. Errors from DB are logged but do not crash the loop.
func hibernateIdleWorkspaces(ctx context.Context, onHibernate HibernateHandler) {
rows, err := db.DB.QueryContext(ctx, `
SELECT id
FROM workspaces
WHERE hibernation_idle_minutes IS NOT NULL
AND hibernation_idle_minutes > 0
AND status IN ('online', 'degraded')
AND active_tasks = 0
AND COALESCE(runtime, 'claude-code') != 'external'
AND last_heartbeat_at IS NOT NULL
AND last_heartbeat_at < now() - (hibernation_idle_minutes * INTERVAL '1 minute')
`)
if err != nil {
log.Printf("Hibernation monitor: query error: %v", err)
return
}
defer rows.Close()
var ids []string
for rows.Next() {
var id string
if rows.Scan(&id) == nil {
ids = append(ids, id)
}
}
if err := rows.Err(); err != nil {
log.Printf("Hibernation monitor: row iteration error: %v", err)
return
}
for _, id := range ids {
log.Printf("Hibernation monitor: hibernating idle workspace %s", id)
if onHibernate != nil {
onHibernate(ctx, id)
}
}
}