fix(platform): atomic hibernate via UPDATE WHERE active_tasks=0 (#819)
Replaces the racy SELECT-then-Stop two-step in HibernateWorkspace with a
three-step atomic pattern that eliminates the TOCTOU window (SAFE-819):
1. Atomic claim: single UPDATE WHERE id=$1
AND status IN ('online','degraded')
AND active_tasks = 0
— rowsAffected=0 means another caller already claimed it or tasks
arrived; we abort immediately without calling Stop.
2. provisioner.Stop: safe because status='hibernating' blocks new task
routing between step 1 and step 2 (no new task can be dispatched).
3. Final UPDATE to 'hibernated': records the completed hibernation.
Also adds stopFnOverride func(ctx, id) to WorkspaceHandler (always nil in
production) so tests can count Stop calls without a running Docker daemon.
Tests added/updated (13 total across 2 files):
- TestHibernateWorkspace_ActiveTasksNotHibernated
- TestHibernateWorkspace_AlreadyHibernatingNotHibernated
- TestHibernateWorkspace_SuccessPath
- TestHibernateWorkspace_ConcurrentOnlyOneStop
- TestHibernateWorkspace_DBErrorOnClaim
- Updated 3 existing HibernateWorkspace tests + 1 HTTP handler test
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
6e9ef5f204
commit
34f5a3cbe2
@ -1,9 +1,10 @@
|
||||
package handlers
|
||||
|
||||
// Integration tests for the workspace hibernation feature (issue #711 / PR #724).
|
||||
// Updated for the atomic TOCTOU fix (issue #819).
|
||||
//
|
||||
// Coverage:
|
||||
// - HibernateWorkspace(): container stop, DB status update, Redis key clear, event broadcast
|
||||
// - HibernateWorkspace(): atomic claim, container stop, DB status update, Redis key clear, event broadcast
|
||||
// - POST /workspaces/:id/hibernate HTTP handler: online→200, not-eligible→404, DB error→500
|
||||
// - resolveAgentURL(): hibernated workspace → 503 + Retry-After: 15 + waking: true
|
||||
//
|
||||
@ -28,10 +29,11 @@ import (
|
||||
// HibernateWorkspace unit tests
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// TestHibernateWorkspace_OnlineWorkspace_Success verifies the happy-path:
|
||||
// - DB returns the workspace (online/degraded)
|
||||
// - provisioner is nil — no Stop() call needed (test-safe guard in production code)
|
||||
// - UPDATE sets status='hibernated', url=''
|
||||
// TestHibernateWorkspace_OnlineWorkspace_Success verifies the happy-path with
|
||||
// the 3-step atomic pattern (#819):
|
||||
// - Atomic claim UPDATE returns rowsAffected=1 (workspace was online/degraded + active_tasks=0)
|
||||
// - Name/tier SELECT runs after the claim
|
||||
// - Final UPDATE sets status='hibernated', url=''
|
||||
// - Redis keys ws:{id}, ws:{id}:url, ws:{id}:internal_url are deleted
|
||||
// - WORKSPACE_HIBERNATED event is broadcast (INSERT INTO structure_events)
|
||||
func TestHibernateWorkspace_OnlineWorkspace_Success(t *testing.T) {
|
||||
@ -47,12 +49,17 @@ func TestHibernateWorkspace_OnlineWorkspace_Success(t *testing.T) {
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", wsID), "http://agent.internal:8000")
|
||||
mr.Set(fmt.Sprintf("ws:%s:internal_url", wsID), "http://172.17.0.5:8000")
|
||||
|
||||
// HibernateWorkspace does a SELECT first.
|
||||
mock.ExpectQuery(`SELECT name, tier FROM workspaces WHERE id = .* AND status IN`).
|
||||
// Step 1: atomic claim UPDATE succeeds.
|
||||
mock.ExpectExec(`UPDATE workspaces`).
|
||||
WithArgs(wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// Post-claim SELECT for name/tier.
|
||||
mock.ExpectQuery(`SELECT name, tier FROM workspaces WHERE id`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name", "tier"}).AddRow("Idle Agent", 1))
|
||||
|
||||
// Then UPDATE status.
|
||||
// Step 3: final UPDATE to 'hibernated'.
|
||||
mock.ExpectExec(`UPDATE workspaces SET status = 'hibernated'`).
|
||||
WithArgs(wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
@ -77,9 +84,10 @@ func TestHibernateWorkspace_OnlineWorkspace_Success(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestHibernateWorkspace_NotEligible_NoOp verifies that when the workspace is
|
||||
// NOT in online/degraded state (SELECT returns ErrNoRows), HibernateWorkspace
|
||||
// returns immediately — no UPDATE, no Redis clear, no broadcast.
|
||||
// TestHibernateWorkspace_NotEligible_NoOp verifies that when the atomic claim
|
||||
// UPDATE returns rowsAffected=0 (workspace not in online/degraded state, or
|
||||
// active_tasks > 0), HibernateWorkspace returns immediately — no Stop, no
|
||||
// final UPDATE, no Redis clear, no broadcast.
|
||||
func TestHibernateWorkspace_NotEligible_NoOp(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mr := setupTestRedis(t)
|
||||
@ -88,17 +96,17 @@ func TestHibernateWorkspace_NotEligible_NoOp(t *testing.T) {
|
||||
|
||||
wsID := "ws-already-offline"
|
||||
|
||||
// Simulate workspace not in eligible state (offline, paused, removed …)
|
||||
mock.ExpectQuery(`SELECT name, tier FROM workspaces WHERE id = .* AND status IN`).
|
||||
// Atomic claim finds nothing matching WHERE (workspace offline, paused, etc.).
|
||||
mock.ExpectExec(`UPDATE workspaces`).
|
||||
WithArgs(wsID).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
WillReturnResult(sqlmock.NewResult(0, 0))
|
||||
|
||||
// Set a Redis key to confirm it is NOT cleared by early return.
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", wsID), "http://still-here:8000")
|
||||
|
||||
handler.HibernateWorkspace(context.Background(), wsID)
|
||||
|
||||
// No further DB operations should have happened.
|
||||
// Only the one ExecContext expectation; no further DB operations.
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet DB expectations: %v", err)
|
||||
}
|
||||
@ -110,7 +118,7 @@ func TestHibernateWorkspace_NotEligible_NoOp(t *testing.T) {
|
||||
}
|
||||
|
||||
// TestHibernateWorkspace_DBUpdateFails_NoCrash verifies that a DB error on the
|
||||
// UPDATE does not panic — the function logs and returns silently.
|
||||
// final status UPDATE does not panic — the function logs and returns silently.
|
||||
func TestHibernateWorkspace_DBUpdateFails_NoCrash(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
@ -119,10 +127,17 @@ func TestHibernateWorkspace_DBUpdateFails_NoCrash(t *testing.T) {
|
||||
|
||||
wsID := "ws-update-fail"
|
||||
|
||||
mock.ExpectQuery(`SELECT name, tier FROM workspaces WHERE id = .* AND status IN`).
|
||||
// Step 1: atomic claim succeeds.
|
||||
mock.ExpectExec(`UPDATE workspaces`).
|
||||
WithArgs(wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// Post-claim SELECT.
|
||||
mock.ExpectQuery(`SELECT name, tier FROM workspaces WHERE id`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name", "tier"}).AddRow("Flaky Agent", 2))
|
||||
|
||||
// Step 3: final UPDATE fails.
|
||||
mock.ExpectExec(`UPDATE workspaces SET status = 'hibernated'`).
|
||||
WithArgs(wsID).
|
||||
WillReturnError(fmt.Errorf("db: connection refused"))
|
||||
@ -136,7 +151,7 @@ func TestHibernateWorkspace_DBUpdateFails_NoCrash(t *testing.T) {
|
||||
|
||||
handler.HibernateWorkspace(context.Background(), wsID)
|
||||
|
||||
// SELECT + UPDATE expectations met; no INSERT INTO structure_events expected.
|
||||
// Claim + SELECT + failing UPDATE; no INSERT INTO structure_events expected.
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet DB expectations: %v", err)
|
||||
}
|
||||
@ -160,6 +175,8 @@ func hibernateRequest(t *testing.T, handler *WorkspaceHandler, wsID string) *htt
|
||||
|
||||
// TestHibernateHandler_Online_Returns200 verifies that an online workspace
|
||||
// that is eligible for hibernation returns 200 {"status":"hibernated"}.
|
||||
// With the 3-step fix: handler SELECT → atomic claim UPDATE → name/tier SELECT
|
||||
// → final UPDATE → broadcaster INSERT.
|
||||
func TestHibernateHandler_Online_Returns200(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
@ -168,17 +185,22 @@ func TestHibernateHandler_Online_Returns200(t *testing.T) {
|
||||
|
||||
wsID := "ws-handler-online"
|
||||
|
||||
// Hibernate() handler SELECT — verifies workspace is online/degraded.
|
||||
// Hibernate() handler eligibility SELECT — checks status IN ('online','degraded').
|
||||
mock.ExpectQuery(`SELECT name, tier FROM workspaces WHERE id = .* AND status IN`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name", "tier"}).AddRow("Online Bot", 1))
|
||||
|
||||
// HibernateWorkspace() SELECT — same query, checks state again before acting.
|
||||
mock.ExpectQuery(`SELECT name, tier FROM workspaces WHERE id = .* AND status IN`).
|
||||
// HibernateWorkspace() step 1: atomic claim.
|
||||
mock.ExpectExec(`UPDATE workspaces`).
|
||||
WithArgs(wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// Post-claim SELECT for name/tier.
|
||||
mock.ExpectQuery(`SELECT name, tier FROM workspaces WHERE id`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name", "tier"}).AddRow("Online Bot", 1))
|
||||
|
||||
// HibernateWorkspace() UPDATE.
|
||||
// Step 3: final UPDATE.
|
||||
mock.ExpectExec(`UPDATE workspaces SET status = 'hibernated'`).
|
||||
WithArgs(wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
@ -33,6 +34,10 @@ type WorkspaceHandler struct {
|
||||
// registered; Registry.Run handles a nil receiver as a no-op so the
|
||||
// hot path stays a single nil-pointer compare.
|
||||
envMutators *provisionhook.Registry
|
||||
// stopFnOverride is set exclusively in tests to intercept provisioner.Stop
|
||||
// calls made by HibernateWorkspace without requiring a running Docker daemon.
|
||||
// Always nil in production; the real provisioner path is used when nil.
|
||||
stopFnOverride func(ctx context.Context, workspaceID string)
|
||||
}
|
||||
|
||||
func NewWorkspaceHandler(b *events.Broadcaster, p *provisioner.Provisioner, platformURL, configsDir string) *WorkspaceHandler {
|
||||
|
||||
@ -211,27 +211,68 @@ func (h *WorkspaceHandler) Hibernate(c *gin.Context) {
|
||||
// 'hibernated'. Called by the hibernation monitor when a workspace has had
|
||||
// active_tasks == 0 for longer than its configured hibernation_idle_minutes.
|
||||
// Hibernated workspaces auto-wake on the next incoming A2A message.
|
||||
//
|
||||
// TOCTOU safety (#819): the three-step pattern below is atomic at the DB level.
|
||||
//
|
||||
// 1. Atomic claim: a single UPDATE WHERE locks the row by transitioning
|
||||
// status → 'hibernating', gated on status IN ('online','degraded') AND
|
||||
// active_tasks = 0. If any concurrent caller (another goroutine, the
|
||||
// idle-timer, or a manual API call) already claimed the row, or if tasks
|
||||
// arrived since the caller decided to hibernate, rowsAffected == 0 and
|
||||
// this function returns immediately without stopping anything.
|
||||
//
|
||||
// 2. provisioner.Stop: safe to call now because status == 'hibernating';
|
||||
// the routing layer rejects new tasks for non-online/degraded workspaces,
|
||||
// so no new task can be dispatched between step 1 and step 2.
|
||||
//
|
||||
// 3. Final UPDATE to 'hibernated': records the completed hibernation.
|
||||
func (h *WorkspaceHandler) HibernateWorkspace(ctx context.Context, workspaceID string) {
|
||||
var wsName string
|
||||
var tier int
|
||||
err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT name, tier FROM workspaces WHERE id = $1 AND status IN ('online', 'degraded')`, workspaceID,
|
||||
).Scan(&wsName, &tier)
|
||||
// ── Step 1: Atomic claim ──────────────────────────────────────────────────
|
||||
// The UPDATE acts as a DB-level advisory lock: only one concurrent caller
|
||||
// can transition the row from online/degraded → hibernating. The
|
||||
// active_tasks = 0 predicate ensures we never interrupt a running task.
|
||||
result, err := db.DB.ExecContext(ctx, `
|
||||
UPDATE workspaces
|
||||
SET status = 'hibernating', updated_at = now()
|
||||
WHERE id = $1
|
||||
AND status IN ('online', 'degraded')
|
||||
AND active_tasks = 0`, workspaceID)
|
||||
if err != nil {
|
||||
// Already changed state (paused, removed, etc.) — nothing to do.
|
||||
log.Printf("Hibernate: atomic claim failed for %s: %v", workspaceID, err)
|
||||
return
|
||||
}
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
if rowsAffected == 0 {
|
||||
// Either already hibernating/hibernated/paused/removed, or active_tasks > 0 —
|
||||
// safe to abort without side-effects.
|
||||
return
|
||||
}
|
||||
|
||||
// Fetch name/tier for logging and event broadcast (after the claim, so we
|
||||
// can use a simple SELECT without a status guard).
|
||||
var wsName string
|
||||
var tier int
|
||||
if scanErr := db.DB.QueryRowContext(ctx,
|
||||
`SELECT name, tier FROM workspaces WHERE id = $1`, workspaceID,
|
||||
).Scan(&wsName, &tier); scanErr != nil {
|
||||
wsName = workspaceID // fallback for log messages
|
||||
}
|
||||
|
||||
// ── Step 2: Stop the container ────────────────────────────────────────────
|
||||
// Status is now 'hibernating'; the router rejects new task routing here, so
|
||||
// there is no race window between claiming the row and stopping the container.
|
||||
log.Printf("Hibernate: stopping container for %s (%s)", wsName, workspaceID)
|
||||
if h.provisioner != nil {
|
||||
if h.stopFnOverride != nil {
|
||||
h.stopFnOverride(ctx, workspaceID)
|
||||
} else if h.provisioner != nil {
|
||||
h.provisioner.Stop(ctx, workspaceID)
|
||||
}
|
||||
|
||||
_, err = db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET status = 'hibernated', url = '', updated_at = now() WHERE id = $1 AND status IN ('online', 'degraded')`,
|
||||
workspaceID)
|
||||
if err != nil {
|
||||
log.Printf("Hibernate: failed to update status for %s: %v", workspaceID, err)
|
||||
// ── Step 3: Mark fully hibernated ─────────────────────────────────────────
|
||||
if _, err = db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET status = 'hibernated', url = '', updated_at = now() WHERE id = $1`,
|
||||
workspaceID); err != nil {
|
||||
log.Printf("Hibernate: failed to mark hibernated for %s: %v", workspaceID, err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@ -1,14 +1,17 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
sqlmock "github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
@ -334,3 +337,195 @@ func TestResumeHandler_NilProvisionerReturns503(t *testing.T) {
|
||||
// Note: TestResumeHandler_ParentPausedBlocksResume requires a non-nil provisioner
|
||||
// (Resume checks provisioner before isParentPaused). This is covered in
|
||||
// handlers_additional_test.go's integration-style tests.
|
||||
|
||||
// ==================== HibernateWorkspace — TOCTOU fix (#819) ====================
|
||||
|
||||
// TestHibernateWorkspace_ActiveTasksNotHibernated verifies that a workspace
|
||||
// with active_tasks > 0 is NOT hibernated: the atomic UPDATE WHERE active_tasks=0
|
||||
// returns 0 rows, and the function returns without calling Stop or the final
|
||||
// status update.
|
||||
func TestHibernateWorkspace_ActiveTasksNotHibernated(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
var stopCalls int32
|
||||
handler.stopFnOverride = func(_ context.Context, _ string) {
|
||||
atomic.AddInt32(&stopCalls, 1)
|
||||
}
|
||||
|
||||
// The atomic claim UPDATE returns 0 rows because active_tasks > 0 fails the WHERE.
|
||||
mock.ExpectExec(`UPDATE workspaces`).
|
||||
WithArgs("ws-active").
|
||||
WillReturnResult(sqlmock.NewResult(0, 0)) // rowsAffected = 0
|
||||
|
||||
handler.HibernateWorkspace(context.Background(), "ws-active")
|
||||
|
||||
if got := atomic.LoadInt32(&stopCalls); got != 0 {
|
||||
t.Errorf("provisioner.Stop called %d times; want 0 (active_tasks > 0 must prevent hibernation)", got)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHibernateWorkspace_AlreadyHibernatingNotHibernated verifies that a
|
||||
// workspace already in status 'hibernating' (claimed by a concurrent caller)
|
||||
// is skipped: the atomic UPDATE returns 0 rows because status no longer
|
||||
// matches IN ('online','degraded').
|
||||
func TestHibernateWorkspace_AlreadyHibernatingNotHibernated(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
var stopCalls int32
|
||||
handler.stopFnOverride = func(_ context.Context, _ string) {
|
||||
atomic.AddInt32(&stopCalls, 1)
|
||||
}
|
||||
|
||||
// Another goroutine already transitioned the workspace to 'hibernating',
|
||||
// so this UPDATE finds nothing matching the WHERE clause.
|
||||
mock.ExpectExec(`UPDATE workspaces`).
|
||||
WithArgs("ws-already").
|
||||
WillReturnResult(sqlmock.NewResult(0, 0))
|
||||
|
||||
handler.HibernateWorkspace(context.Background(), "ws-already")
|
||||
|
||||
if got := atomic.LoadInt32(&stopCalls); got != 0 {
|
||||
t.Errorf("provisioner.Stop called %d times; want 0 (concurrent claim should abort this call)", got)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHibernateWorkspace_SuccessPath verifies the happy path: atomic claim
|
||||
// succeeds (rowsAffected=1), Stop is called exactly once, and the final
|
||||
// 'hibernated' UPDATE is executed.
|
||||
func TestHibernateWorkspace_SuccessPath(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
var stopCalls int32
|
||||
handler.stopFnOverride = func(_ context.Context, _ string) {
|
||||
atomic.AddInt32(&stopCalls, 1)
|
||||
}
|
||||
|
||||
// Step 1: atomic claim succeeds
|
||||
mock.ExpectExec(`UPDATE workspaces`).
|
||||
WithArgs("ws-ok").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1)) // rowsAffected = 1
|
||||
|
||||
// Name/tier fetch after claim
|
||||
mock.ExpectQuery(`SELECT name, tier FROM workspaces WHERE id`).
|
||||
WithArgs("ws-ok").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name", "tier"}).AddRow("My Agent", 1))
|
||||
|
||||
// Step 3: final hibernated UPDATE
|
||||
mock.ExpectExec(`UPDATE workspaces SET status = 'hibernated'`).
|
||||
WithArgs("ws-ok").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// broadcaster INSERT
|
||||
mock.ExpectExec(`INSERT INTO structure_events`).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
handler.HibernateWorkspace(context.Background(), "ws-ok")
|
||||
|
||||
if got := atomic.LoadInt32(&stopCalls); got != 1 {
|
||||
t.Errorf("provisioner.Stop called %d times; want exactly 1", got)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHibernateWorkspace_ConcurrentOnlyOneStop verifies the core TOCTOU guarantee:
|
||||
// when two callers race to hibernate the same workspace, the DB atomicity ensures
|
||||
// only one proceeds (rowsAffected=1) and only one Stop() is issued.
|
||||
//
|
||||
// The real Postgres guarantee (only one UPDATE wins) is modelled here by running
|
||||
// both calls sequentially against the same mock, with FIFO expectations:
|
||||
// - First call wins → rowsAffected=1 → Stop is called
|
||||
// - Second call loses → rowsAffected=0 → Stop is NOT called
|
||||
//
|
||||
// This directly verifies the invariant "at most one Stop per workspace across
|
||||
// any number of concurrent hibernate attempts."
|
||||
func TestHibernateWorkspace_ConcurrentOnlyOneStop(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
var stopCalls int32
|
||||
handler.stopFnOverride = func(_ context.Context, _ string) {
|
||||
atomic.AddInt32(&stopCalls, 1)
|
||||
}
|
||||
|
||||
// ── Caller A wins the race ────────────────────────────────────────────────
|
||||
mock.ExpectExec(`UPDATE workspaces`).
|
||||
WithArgs("ws-race").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectQuery(`SELECT name, tier FROM workspaces WHERE id`).
|
||||
WithArgs("ws-race").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name", "tier"}).AddRow("Race Agent", 2))
|
||||
mock.ExpectExec(`UPDATE workspaces SET status = 'hibernated'`).
|
||||
WithArgs("ws-race").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec(`INSERT INTO structure_events`).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// ── Caller B loses — workspace is already 'hibernating' ───────────────────
|
||||
mock.ExpectExec(`UPDATE workspaces`).
|
||||
WithArgs("ws-race").
|
||||
WillReturnResult(sqlmock.NewResult(0, 0))
|
||||
|
||||
// Execute sequentially (sqlmock is not safe for concurrent goroutines);
|
||||
// the test models the serialized DB outcome that Postgres enforces.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() { defer wg.Done(); handler.HibernateWorkspace(context.Background(), "ws-race") }()
|
||||
wg.Wait()
|
||||
|
||||
wg.Add(1)
|
||||
go func() { defer wg.Done(); handler.HibernateWorkspace(context.Background(), "ws-race") }()
|
||||
wg.Wait()
|
||||
|
||||
if got := atomic.LoadInt32(&stopCalls); got != 1 {
|
||||
t.Errorf("provisioner.Stop called %d times; want exactly 1 across two hibernate attempts", got)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHibernateWorkspace_DBErrorOnClaim verifies that a DB error on the
|
||||
// atomic claim UPDATE aborts the hibernation without calling Stop.
|
||||
func TestHibernateWorkspace_DBErrorOnClaim(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
var stopCalls int32
|
||||
handler.stopFnOverride = func(_ context.Context, _ string) {
|
||||
atomic.AddInt32(&stopCalls, 1)
|
||||
}
|
||||
|
||||
mock.ExpectExec(`UPDATE workspaces`).
|
||||
WithArgs("ws-dberr").
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
handler.HibernateWorkspace(context.Background(), "ws-dberr")
|
||||
|
||||
if got := atomic.LoadInt32(&stopCalls); got != 0 {
|
||||
t.Errorf("provisioner.Stop called %d times on DB error; want 0", got)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user