From 22ee2b0e73b3e8386f3c2e90d8e8f7b3b5ffa780 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Tue, 2 Jun 2026 01:46:27 +0000 Subject: [PATCH] fix(workspace): make Pause/Resume cascade opt-in via ?cascade=true POST /workspaces/:id/pause and /resume currently implicitly cascade to all descendants. This caused incident internal#722 where a single Pause call terminated 5 EC2-backed workspaces. Change default behavior to single-workspace scope. If descendants exist and cascade=true is not set, return 409 Conflict with a descendants list so the caller can decide. ?cascade=true preserves the old recursive behavior. - Pause: check c.Query(\"cascade\") before stopping descendants - Resume: mirror Pause's cascade contract - Update TestPause_WithDescendants to use ?cascade=true - Add 4 new tests covering 409 + 200 paths for both handlers Fixes #1991 Co-Authored-By: Claude Opus 4.7 --- .../handlers/handlers_additional_test.go | 2 +- .../internal/handlers/workspace_restart.go | 22 ++ .../handlers/workspace_restart_test.go | 191 ++++++++++++++++++ 3 files changed, 214 insertions(+), 1 deletion(-) diff --git a/workspace-server/internal/handlers/handlers_additional_test.go b/workspace-server/internal/handlers/handlers_additional_test.go index 902c06cf7..36b8f027f 100644 --- a/workspace-server/internal/handlers/handlers_additional_test.go +++ b/workspace-server/internal/handlers/handlers_additional_test.go @@ -1132,7 +1132,7 @@ func TestPause_WithDescendants(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) c.Params = gin.Params{{Key: "id", Value: "ws-team"}} - c.Request = httptest.NewRequest("POST", "/workspaces/ws-team/pause", nil) + c.Request = httptest.NewRequest("POST", "/workspaces/ws-team/pause?cascade=true", nil) handler.Pause(c) diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index 3129386f2..6d4fb175c 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -905,6 +905,7 @@ func (h *WorkspaceHandler) Pause(c *gin.Context) { // Collect this workspace + all descendants to pause toPause := []struct{ id, name string }{{id, wsName}} + var descendantList []gin.H rows, err := db.DB.QueryContext(ctx, `WITH RECURSIVE descendants AS ( SELECT id, name FROM workspaces WHERE parent_id = $1 AND status NOT IN ('removed', 'paused') @@ -920,6 +921,7 @@ func (h *WorkspaceHandler) Pause(c *gin.Context) { var cid, cname string if rows.Scan(&cid, &cname) == nil { toPause = append(toPause, struct{ id, name string }{cid, cname}) + descendantList = append(descendantList, gin.H{"id": cid, "name": cname}) } } if err := rows.Err(); err != nil { @@ -927,6 +929,15 @@ func (h *WorkspaceHandler) Pause(c *gin.Context) { } } + // Default: single-workscope pause unless ?cascade=true + if c.Query("cascade") != "true" && len(descendantList) > 0 { + c.JSON(http.StatusConflict, gin.H{ + "error": "workspace has descendants — use ?cascade=true to pause all", + "descendants": descendantList, + }) + return + } + // Stop containers and mark all as paused. StopWorkspaceAuto routes // to whichever backend is wired (CP for SaaS, Docker for self-hosted) // — pre-2026-05-05 this site inlined `if h.provisioner != nil { Stop }`, @@ -994,6 +1005,7 @@ func (h *WorkspaceHandler) Resume(c *gin.Context) { tier int } toResume := []wsInfo{{id, wsName, dbRuntime, tier}} + var descendantList []gin.H rows, err := db.DB.QueryContext(ctx, `WITH RECURSIVE descendants AS ( SELECT id, name, tier, COALESCE(runtime, 'claude-code') AS runtime FROM workspaces WHERE parent_id = $1 AND status = 'paused' @@ -1009,6 +1021,7 @@ func (h *WorkspaceHandler) Resume(c *gin.Context) { var ws wsInfo if rows.Scan(&ws.id, &ws.name, &ws.tier, &ws.runtime) == nil { toResume = append(toResume, ws) + descendantList = append(descendantList, gin.H{"id": ws.id, "name": ws.name}) } } if err := rows.Err(); err != nil { @@ -1016,6 +1029,15 @@ func (h *WorkspaceHandler) Resume(c *gin.Context) { } } + // Default: single-workspace resume unless ?cascade=true + if c.Query("cascade") != "true" && len(descendantList) > 0 { + c.JSON(http.StatusConflict, gin.H{ + "error": "workspace has descendants — use ?cascade=true to resume all", + "descendants": descendantList, + }) + return + } + // Re-provision all for _, ws := range toResume { if _, err := db.DB.ExecContext(ctx, diff --git a/workspace-server/internal/handlers/workspace_restart_test.go b/workspace-server/internal/handlers/workspace_restart_test.go index 2437762ec..aeeb02b34 100644 --- a/workspace-server/internal/handlers/workspace_restart_test.go +++ b/workspace-server/internal/handlers/workspace_restart_test.go @@ -352,6 +352,93 @@ func TestPauseHandler_SuccessNoChildren(t *testing.T) { } } +func TestPauseHandler_DescendantsNoCascadeReturns409(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + mock.ExpectQuery("SELECT status, name FROM workspaces WHERE id ="). + WithArgs("ws-pause-parent"). + WillReturnRows(sqlmock.NewRows([]string{"status", "name"}).AddRow("online", "Parent Agent")) + + mock.ExpectQuery("WITH RECURSIVE descendants"). + WithArgs("ws-pause-parent"). + WillReturnRows(sqlmock.NewRows([]string{"id", "name"}). + AddRow("ws-child-1", "Child 1"). + AddRow("ws-child-2", "Child 2")) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-pause-parent"}} + c.Request = httptest.NewRequest("POST", "/workspaces/ws-pause-parent/pause", nil) + + handler.Pause(c) + + if w.Code != http.StatusConflict { + t.Errorf("expected 409, got %d: %s", w.Code, w.Body.String()) + } + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if _, ok := resp["error"].(string); !ok { + t.Errorf("expected error message, got %v", resp) + } + if descs, ok := resp["descendants"].([]interface{}); !ok || len(descs) != 2 { + t.Errorf("expected 2 descendants, got %v", resp["descendants"]) + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +func TestPauseHandler_DescendantsWithCascadeReturns200(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + mock.ExpectQuery("SELECT status, name FROM workspaces WHERE id ="). + WithArgs("ws-pause-parent-cascade"). + WillReturnRows(sqlmock.NewRows([]string{"status", "name"}).AddRow("online", "Parent Agent")) + + mock.ExpectQuery("WITH RECURSIVE descendants"). + WithArgs("ws-pause-parent-cascade"). + WillReturnRows(sqlmock.NewRows([]string{"id", "name"}). + AddRow("ws-child-1", "Child 1"). + AddRow("ws-child-2", "Child 2")) + + for _, wsID := range []string{"ws-pause-parent-cascade", "ws-child-1", "ws-child-2"} { + mock.ExpectExec("UPDATE workspaces SET status ="). + WithArgs(models.StatusPaused, wsID). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec("INSERT INTO structure_events"). + WillReturnResult(sqlmock.NewResult(0, 1)) + } + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-pause-parent-cascade"}} + c.Request = httptest.NewRequest("POST", "/workspaces/ws-pause-parent-cascade/pause?cascade=true", nil) + + handler.Pause(c) + + if w.Code != http.StatusOK { + t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if count, ok := resp["paused_count"].(float64); !ok || count != 3 { + t.Errorf("expected paused_count 3, got %v", resp["paused_count"]) + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + // ==================== POST /workspaces/:id/resume — additional coverage ==================== func TestResumeHandler_NotPausedReturns404(t *testing.T) { @@ -439,6 +526,110 @@ func TestResumeHandler_NilProvisionerReturns503(t *testing.T) { // (Resume checks provisioner before isParentPaused). This is covered in // handlers_additional_test.go's integration-style tests. +func TestResumeHandler_DescendantsNoCascadeReturns409(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + handler.SetCPProvisioner(&fakeCPProv{}) + + mock.ExpectQuery("SELECT name, tier, COALESCE"). + WithArgs("ws-resume-parent"). + WillReturnRows(sqlmock.NewRows([]string{"name", "tier", "runtime"}). + AddRow("Parent Agent", 1, "claude-code")) + + // isParentPaused: no parent + mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id ="). + WithArgs("ws-resume-parent"). + WillReturnRows(sqlmock.NewRows([]string{"parent_id"})) + + mock.ExpectQuery("WITH RECURSIVE descendants"). + WithArgs("ws-resume-parent"). + WillReturnRows(sqlmock.NewRows([]string{"id", "name", "tier", "runtime"}). + AddRow("ws-child-1", "Child 1", 1, "claude-code"). + AddRow("ws-child-2", "Child 2", 1, "claude-code")) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-resume-parent"}} + c.Request = httptest.NewRequest("POST", "/workspaces/ws-resume-parent/resume", nil) + + handler.Resume(c) + + if w.Code != http.StatusConflict { + t.Errorf("expected 409, got %d: %s", w.Code, w.Body.String()) + } + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if _, ok := resp["error"].(string); !ok { + t.Errorf("expected error message, got %v", resp) + } + if descs, ok := resp["descendants"].([]interface{}); !ok || len(descs) != 2 { + t.Errorf("expected 2 descendants, got %v", resp["descendants"]) + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +func TestResumeHandler_DescendantsWithCascadeReturns200(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + handler.SetCPProvisioner(&fakeCPProv{}) + + mock.ExpectQuery("SELECT name, tier, COALESCE"). + WithArgs("ws-resume-parent-cascade"). + WillReturnRows(sqlmock.NewRows([]string{"name", "tier", "runtime"}). + AddRow("Parent Agent", 1, "claude-code")) + + // isParentPaused: no parent + mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id ="). + WithArgs("ws-resume-parent-cascade"). + WillReturnRows(sqlmock.NewRows([]string{"parent_id"})) + + mock.ExpectQuery("WITH RECURSIVE descendants"). + WithArgs("ws-resume-parent-cascade"). + WillReturnRows(sqlmock.NewRows([]string{"id", "name", "tier", "runtime"}). + AddRow("ws-child-1", "Child 1", 1, "claude-code"). + AddRow("ws-child-2", "Child 2", 1, "claude-code")) + + for _, wsID := range []string{"ws-resume-parent-cascade", "ws-child-1", "ws-child-2"} { + mock.ExpectExec("UPDATE workspaces SET status ="). + WithArgs(models.StatusProvisioning, wsID). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec("INSERT INTO structure_events"). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectQuery("SELECT COALESCE"). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"COALESCE"}).AddRow("{}")) + } + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-resume-parent-cascade"}} + c.Request = httptest.NewRequest("POST", "/workspaces/ws-resume-parent-cascade/resume?cascade=true", nil) + + handler.Resume(c) + + if w.Code != http.StatusOK { + t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + + var resp map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &resp) + if count, ok := resp["resumed_count"].(float64); !ok || count != 3 { + t.Errorf("expected resumed_count 3, got %v", resp["resumed_count"]) + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + // ==================== HibernateWorkspace — TOCTOU fix (#819) ==================== // TestHibernateWorkspace_ActiveTasksNotHibernated verifies that a workspace -- 2.52.0