From 572b0d9c0027b348c7b75cdf7fefbc27064d083d Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sun, 7 Jun 2026 09:35:47 +0000 Subject: [PATCH] fix(restart): guard sync Restart UPDATE against removed/paused/hibernated (#306) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sync Restart UPDATE at :306 set status='provisioning' with WHERE id only — no status guard. A workspace removed (or paused/hibernated) between the initial SELECT and the UPDATE could be resurrected to provisioning before the async runRestartCycle guard at :810 fired. Fix: add AND status NOT IN ('removed', 'paused', 'hibernated') to the UPDATE, mirroring the async guard. Check RowsAffected; if 0 rows the workspace is terminal and we return 409 Conflict without broadcasting or provisioning. Scope: workspace_restart.go + workspace_restart_test.go only. Regression tests: - TestRestartHandler_RemovedWorkspaceBlocked: removed → 409, no resurrection - TestRestartHandler_OfflineWorkspaceRestartsNormally: offline → guard passes, proceeds to broadcast + provision --- .../internal/handlers/workspace_restart.go | 16 +++- .../handlers/workspace_restart_test.go | 84 +++++++++++++++++++ 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index e27b44a5e..ef4f7afef 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -301,10 +301,20 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) { // the Config tab which writes through to both the DB and the container). containerRuntime := h.restartRuntimeFromConfig(ctx, id, wsName, dbRuntime, body.ApplyTemplate) - // Reset to provisioning - if _, err := db.DB.ExecContext(ctx, - `UPDATE workspaces SET status = $1, url = '', updated_at = now() WHERE id = $2`, models.StatusProvisioning, id); err != nil { + // Reset to provisioning — but don't resurrect removed/paused/hibernated + // workspaces. Mirrors the async guard in runRestartCycle (:810) so sync + // and async behavior agree. + res, err := db.DB.ExecContext(ctx, + `UPDATE workspaces SET status = $1, url = '', updated_at = now() WHERE id = $2 AND status NOT IN ('removed', 'paused', 'hibernated')`, models.StatusProvisioning, id) + if err != nil { log.Printf("Restart: failed to set provisioning status for %s: %v", id, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update workspace status"}) + return + } + if rows, _ := res.RowsAffected(); rows == 0 { + log.Printf("Restart: workspace %s is removed, paused, or hibernated — skipping restart", id) + c.JSON(http.StatusConflict, gin.H{"error": "workspace is removed, paused, or hibernated and cannot be restarted"}) + return } h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisioning), id, map[string]interface{}{ "name": wsName, diff --git a/workspace-server/internal/handlers/workspace_restart_test.go b/workspace-server/internal/handlers/workspace_restart_test.go index 2437762ec..6154a4438 100644 --- a/workspace-server/internal/handlers/workspace_restart_test.go +++ b/workspace-server/internal/handlers/workspace_restart_test.go @@ -256,6 +256,90 @@ func TestRestartHandler_NilProvisionerReturns503(t *testing.T) { } } +// TestRestartHandler_RemovedWorkspaceBlocked asserts that a removed workspace +// hit by manual Restart is NOT resurrected to provisioning: the UPDATE guard +// (AND status NOT IN ('removed', 'paused', 'hibernated')) returns 0 rows and +// the handler returns 409 before any broadcast or provision attempt. +func TestRestartHandler_RemovedWorkspaceBlocked(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + mock.ExpectQuery("SELECT status, name, tier, COALESCE"). + WithArgs("ws-removed"). + WillReturnRows(sqlmock.NewRows([]string{"status", "name", "tier", "runtime"}). + AddRow("removed", "Removed Agent", 1, "claude-code")) + + // isParentPaused: no parent + mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id ="). + WithArgs("ws-removed"). + WillReturnRows(sqlmock.NewRows([]string{"parent_id"})) + + // Guarded UPDATE returns 0 rows because status='removed' is excluded + mock.ExpectExec("UPDATE workspaces SET status =.*AND status NOT IN"). + WithArgs(models.StatusProvisioning, "ws-removed"). + WillReturnResult(sqlmock.NewResult(0, 0)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-removed"}} + c.Request = httptest.NewRequest("POST", "/workspaces/ws-removed/restart", nil) + + handler.Restart(c) + + if w.Code != http.StatusConflict { + t.Errorf("expected 409 for removed workspace, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestRestartHandler_OfflineWorkspaceRestartsNormally asserts that a +// legitimately-restartable workspace (status='offline') passes the UPDATE +// guard (1 row affected) and proceeds to broadcast + provision. +func TestRestartHandler_OfflineWorkspaceRestartsNormally(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + mock.ExpectQuery("SELECT status, name, tier, COALESCE"). + WithArgs("ws-offline"). + WillReturnRows(sqlmock.NewRows([]string{"status", "name", "tier", "runtime"}). + AddRow("offline", "Offline Agent", 1, "claude-code")) + + // isParentPaused: no parent + mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id ="). + WithArgs("ws-offline"). + WillReturnRows(sqlmock.NewRows([]string{"parent_id"})) + + // Guarded UPDATE succeeds (1 row affected) + mock.ExpectExec("UPDATE workspaces SET status =.*AND status NOT IN"). + WithArgs(models.StatusProvisioning, "ws-offline"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // Broadcast fires + mock.ExpectExec("INSERT INTO structure_events"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-offline"}} + c.Request = httptest.NewRequest("POST", "/workspaces/ws-offline/restart", nil) + + handler.Restart(c) + + // nil provisioner → 503, but we got past the UPDATE guard successfully + if w.Code != http.StatusServiceUnavailable { + t.Errorf("expected 503 (nil provisioner after successful guard), got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + // ==================== POST /workspaces/:id/pause — additional coverage ==================== func TestPauseHandler_WorkspaceNotFoundReturns404(t *testing.T) { -- 2.52.0