From 61b7755c3c41aade1f42aa5168d3375fadd92089 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 23:52:53 -0700 Subject: [PATCH] =?UTF-8?q?feat(handlers):=20migrate=20Pause=20loop=20to?= =?UTF-8?q?=20StopWorkspaceAuto=20=E2=80=94=20#2799=20Phase=203?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Last open #2799 site. Pause's per-workspace stop call now routes through StopWorkspaceAuto, removing the final inline if-cpProv-else (actually if-h.provisioner) dispatch from workspace_restart.go's restart/pause/resume code paths. Pre-2026-05-05 the Pause loop was: if h.provisioner != nil { h.provisioner.Stop(ctx, ws.id) } Same drift class as #2813 (team-collapse leak) + #2814 (workspace delete leak) — Docker-only stop silently no-ops on SaaS, leaving the EC2 running while the workspace row gets marked paused. Orphan sweeper would catch it eventually but the leak window is real. Pause-specific bookkeeping (mark paused, clear workspace keys, broadcast WORKSPACE_PAUSED) stays inline in the handler; only the "stop the running workload" step delegates. StopWorkspaceAuto's no-backend → no-op semantics match the pre-fix behavior on misconfigured deployments (the bookkeeping still runs). One new source-level pin: TestPauseHandler_UsesStopWorkspaceAuto — gates regression to the inline dispatch shape. This closes #2799 Phase 3. After this PR + #2847 (Phase 2 PR-B) land, workspace_restart.go has no remaining inline if-cpProv-else dispatch in any user-facing code path. The remaining direct backend calls inside the file are in stopForRestart and cpStopWithRetry — both internal helpers that ARE the dispatcher's underlying primitives, not new bypasses. Note: scope was originally tagged "Phase 3 needs PauseWorkspaceAuto verb" in the audit on PR #2843. On closer reading Pause's stop step is identical to Stop — only the bookkeeping is Pause-specific. Reusing StopWorkspaceAuto avoids unnecessary surface and keeps the dispatcher trio (provision/stop/restart) tight. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../handlers/workspace_provision_auto_test.go | 26 +++++++++++++++++++ .../internal/handlers/workspace_restart.go | 14 +++++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/workspace-server/internal/handlers/workspace_provision_auto_test.go b/workspace-server/internal/handlers/workspace_provision_auto_test.go index f6fd41a8..1cf32994 100644 --- a/workspace-server/internal/handlers/workspace_provision_auto_test.go +++ b/workspace-server/internal/handlers/workspace_provision_auto_test.go @@ -894,6 +894,32 @@ func TestRunRestartCycle_UsesProvisionWorkspaceAutoSync(t *testing.T) { } } +// TestPauseHandler_UsesStopWorkspaceAuto — Phase 3 of #2799 source-level +// pin. Pause's per-workspace stop call must route through +// StopWorkspaceAuto so SaaS tenants terminate the EC2 instead of leaking +// it (same drift class as the team-collapse leak #2813 and the +// workspace-delete leak #2814 closed by PR #2824). +// +// Pause-specific bookkeeping (mark paused, clear keys, broadcast) +// stays in the Pause handler — only the "stop the running workload" +// step delegates to the dispatcher. This pin asserts the dispatcher +// is called from the Pause loop with `ws.id`. +func TestPauseHandler_UsesStopWorkspaceAuto(t *testing.T) { + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + src, err := os.ReadFile(filepath.Join(wd, "workspace_restart.go")) + if err != nil { + t.Fatalf("read workspace_restart.go: %v", err) + } + stripped := stripGoComments(src) + if !bytes.Contains(stripped, []byte("h.StopWorkspaceAuto(ctx, ws.id)")) { + t.Errorf("workspace_restart.go must call StopWorkspaceAuto from the Pause loop with `ws.id` — current code does not. " + + "Phase 3 of #2799 migrated this site; do not regress to the inline `if h.provisioner != nil { Stop }` dispatch.") + } +} + // stripGoComments removes // line comments and /* */ block comments // from Go source. Imperfect (doesn't handle comments-inside-strings) // but adequate for the source-level pin tests in this file — none of diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index 08bb02c3..3b3097c4 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -613,10 +613,18 @@ func (h *WorkspaceHandler) Pause(c *gin.Context) { } } - // Stop containers and mark all as paused + // Stop containers and mark all as paused. StopWorkspaceAuto routes + // to whichever backend is wired (CP for SaaS, Docker for self-hosted) + // — pre-2026-05-05 this site inlined `if h.provisioner != nil { Stop }`, + // which silently leaked EC2s on every SaaS Pause (same drift class as + // the team-collapse leak #2813 and the workspace-delete leak #2814, + // both closed by PR #2824). StopWorkspaceAuto returns nil on no-backend + // (no-op), so the Pause-specific bookkeeping (mark paused, clear keys, + // broadcast) still fires regardless of whether anything was actually + // stopped — matches the pre-fix behavior on misconfigured deployments. for _, ws := range toPause { - if h.provisioner != nil { - h.provisioner.Stop(ctx, ws.id) + if err := h.StopWorkspaceAuto(ctx, ws.id); err != nil { + log.Printf("Pause: stop %s failed: %v — orphan sweeper will reconcile", ws.id, err) } db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, url = '', updated_at = now() WHERE id = $2`, models.StatusPaused, ws.id)