From 5b7f4d260b4b837035b59ddc085d0cebe854d2ee Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 22:09:12 -0700 Subject: [PATCH] =?UTF-8?q?feat(handlers):=20migrate=20Restart=20+=20Resum?= =?UTF-8?q?e=20handlers=20to=20dispatchers=20=E2=80=94=20#2799=20Phase=202?= =?UTF-8?q?=20PR-A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sites 1+2 (Restart HTTP handler goroutine) and Site 3 (Resume HTTP handler goroutine) now route through RestartWorkspaceAutoOpts / provisionWorkspaceAuto instead of inlining the if-cpProv-else dispatch. Three changes: 1. **RestartWorkspaceAutoOpts** — new variant of RestartWorkspaceAuto that carries the resetClaudeSession Docker-only flag (issue #12). The bare RestartWorkspaceAuto still exists as a wrapper that calls Opts with false. CP path silently ignores the flag (each EC2 boots fresh — no session state to clear). Mirrors the Provision pair (provisionWorkspace / provisionWorkspaceOpts). 2. **Restart handler (Site 1+2)** — the inline goroutine `if h.provisioner != nil { Stop } else if h.cpProv != nil { ... }` collapses to `RestartWorkspaceAutoOpts(...)`. Pre-fix the dispatch was Docker-FIRST ordering (a different drift class from the silent-drop bugs PRs #2811/#2824 closed); the dispatcher enforces CP-FIRST. 3. **Resume handler (Site 3)** — Resume is provision-only (workspace is paused, no live container), so it routes through provisionWorkspaceAuto, not RestartWorkspaceAuto. Inline if-cpProv-else dispatch removed. Two new source-level pins: - TestRestartHandler_UsesRestartWorkspaceAuto - TestResumeHandler_UsesProvisionWorkspaceAuto These prevent regression to the inline dispatch pattern. Out of scope (tracked under #2799): - Site 4 (runRestartCycle) — synchronous coordination model needs a different shape than the fire-and-return dispatchers. PR-B. - Site 5 (Pause loop) — PAUSE doesn't reprovision, needs a new PauseWorkspaceAuto verb. Phase 3. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/handlers/workspace.go | 18 +++++- .../handlers/workspace_provision_auto_test.go | 56 ++++++++++++++++++ .../internal/handlers/workspace_restart.go | 58 +++++++++---------- 3 files changed, 100 insertions(+), 32 deletions(-) diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index e2e52798..8d4cd7e3 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -254,18 +254,34 @@ func (h *WorkspaceHandler) StopWorkspaceAuto(ctx context.Context, workspaceID st // runRestartCycle does this synchronously (line ~538) before delegating // — match that pattern in any new caller. func (h *WorkspaceHandler) RestartWorkspaceAuto(ctx context.Context, workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool { + return h.RestartWorkspaceAutoOpts(ctx, workspaceID, templatePath, configFiles, payload, false) +} + +// RestartWorkspaceAutoOpts is the variant that carries Docker-only +// per-invocation knobs that don't fit on CreateWorkspacePayload. Today +// the only such knob is resetClaudeSession (issue #12 — clears the +// in-container Claude session before restart so the agent comes up +// fresh). CP doesn't have a session-reset concept (each EC2 boots from +// a fresh image), so the flag is silently ignored on the CP path. +// +// Most callers should call RestartWorkspaceAuto (resetClaudeSession= +// false). The Restart HTTP handler is the one site that exposes the +// flag to operators — it reads ?reset_session=true from the query +// string when an operator wants to force a fresh session. +func (h *WorkspaceHandler) RestartWorkspaceAutoOpts(ctx context.Context, workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload, resetClaudeSession bool) bool { // Stop leg first. CP-first ordering matches the other dispatchers // (provisionWorkspaceAuto, StopWorkspaceAuto) and the convention // documented in docs/architecture/backends.md. if h.cpProv != nil { h.cpStopWithRetry(ctx, workspaceID, "RestartWorkspaceAuto") + // resetClaudeSession is Docker-only — CP has no session state to clear. go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload) return true } if h.provisioner != nil { // Docker.Stop has no retry — see docstring rationale. h.provisioner.Stop(ctx, workspaceID) - go h.provisionWorkspace(workspaceID, templatePath, configFiles, payload) + go h.provisionWorkspaceOpts(workspaceID, templatePath, configFiles, payload, resetClaudeSession) return true } // No backend wired — same shape as provisionWorkspaceAuto's no-backend diff --git a/workspace-server/internal/handlers/workspace_provision_auto_test.go b/workspace-server/internal/handlers/workspace_provision_auto_test.go index 98fef586..62652189 100644 --- a/workspace-server/internal/handlers/workspace_provision_auto_test.go +++ b/workspace-server/internal/handlers/workspace_provision_auto_test.go @@ -752,6 +752,62 @@ func TestRestartWorkspaceAuto_NoBackendMarksFailed(t *testing.T) { } } +// TestRestartHandler_UsesRestartWorkspaceAuto — source-level pin that +// the Restart HTTP handler routes through the dispatcher. Phase 2 PR-A +// of #2799 migrates Site 1+2 (the Restart goroutine) to call +// RestartWorkspaceAutoOpts. This test pins the migration so the next +// refactor doesn't accidentally regress to the inline if-cpProv-else +// dispatch — that pre-fix shape had Docker-FIRST ordering, a different +// drift class from the silent-drop bugs PRs #2811/#2824 closed. +// +// Allowed in workspace_restart.go: stopForRestart (Site 4), Pause +// (Site 5). Both are tracked under #2799 Phase 2 PR-B / Phase 3. +func TestRestartHandler_UsesRestartWorkspaceAuto(t *testing.T) { + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + src, err := os.ReadFile(filepath.Join(wd, "workspace_restart.go")) + if err != nil { + t.Fatalf("read workspace_restart.go: %v", err) + } + stripped := stripGoComments(src) + // The Restart handler must dispatch through the SoT helper. Either + // signature variant satisfies the gate. + if !bytes.Contains(stripped, []byte("h.RestartWorkspaceAutoOpts(")) && + !bytes.Contains(stripped, []byte("h.RestartWorkspaceAuto(")) { + t.Errorf("workspace_restart.go must call RestartWorkspaceAuto[Opts] from the Restart handler — current code does not. " + + "Phase 2 of #2799 migrated this site; do not regress to the inline if-cpProv-else dispatch.") + } +} + +// TestResumeHandler_UsesProvisionWorkspaceAuto — source-level pin that +// the Resume HTTP handler routes through the dispatcher. Phase 2 PR-A +// of #2799 migrates Site 3 (Resume goroutine) to call +// provisionWorkspaceAuto (Resume is provision-only — the workspace is +// already paused/stopped, no Stop step needed). +func TestResumeHandler_UsesProvisionWorkspaceAuto(t *testing.T) { + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + src, err := os.ReadFile(filepath.Join(wd, "workspace_restart.go")) + if err != nil { + t.Fatalf("read workspace_restart.go: %v", err) + } + stripped := stripGoComments(src) + // The Resume handler's loop must dispatch through provisionWorkspaceAuto. + // Doesn't need a uniqueness check — the file already calls it from at + // least the Resume site (a regression that removes only the Resume call + // would still match this needle from another call in the file, but the + // stripGoComments output of workspace_restart.go is small enough that + // inspecting the diff catches that.) + if !bytes.Contains(stripped, []byte("h.provisionWorkspaceAuto(ws.id")) { + t.Errorf("workspace_restart.go must call provisionWorkspaceAuto from the Resume handler with `ws.id` — current code does not. " + + "Phase 2 of #2799 migrated this site; do not regress to the inline if-cpProv-else dispatch.") + } +} + // stripGoComments removes // line comments and /* */ block comments // from Go source. Imperfect (doesn't handle comments-inside-strings) // but adequate for the source-level pin tests in this file — none of diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index 9ccab037..536a4d81 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -199,33 +199,31 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) { // last_heartbeat_at with the new session. Issue #19 Layer 1. restartData := loadRestartContextData(ctx, id) - // Dispatch to the correct provisioner. provisionWorkspaceOpts is the - // Docker path; provisionWorkspaceCP is the SaaS path. The Create - // handler already branches this way; Restart now mirrors it. + // Dispatch through the SoT restart dispatcher. RestartWorkspaceAutoOpts + // owns "which backend for stop" + "which backend for provision" and + // keeps the two halves in sync. resetClaudeSession is the one + // Docker-only per-invocation knob the dispatcher carries through. // - // Stop runs inside this goroutine — NOT before the response — because - // CPProvisioner.Stop is synchronous DELETE /cp/workspaces/:id → - // CP → AWS EC2 terminate, which can exceed the canvas's 15s default + // Stop runs inside the dispatcher's stop leg (synchronous), then the + // provision leg fires in a goroutine — NOT before the response — + // because CPProvisioner.Stop is synchronous DELETE /cp/workspaces/:id + // → CP → AWS EC2 terminate, which can exceed the canvas's 15s default // HTTP timeout when the platform has just redeployed (every tenant's - // CP request queues at once). Pre-fix the user saw a misleading - // "signal timed out" error on the canvas even though the restart - // actually succeeded — caught 2026-04-30 on hongmingwang hermes + // CP request queues at once). Pre-fix (2026-04-30) the user saw a + // misleading "signal timed out" on the canvas even though the + // restart actually succeeded — caught on hongmingwang hermes // workspace 32993ee7-…cb9d75d112a5 right after the heartbeat-fix - // platform redeploy. Use context.Background() to detach from the - // request lifecycle so an aborted client connection doesn't cancel - // the in-flight Stop/provision pair. + // platform redeploy. context.Background() detaches the dispatch + // from the request lifecycle so an aborted client connection + // doesn't cancel the in-flight Stop/provision pair. + // + // Pre-2026-05-05 this site inlined the manual if-cpProv-else + // dispatch with Docker-FIRST ordering (a different drift class from + // the silent-drop bugs PRs #2811/#2824 closed). RestartWorkspaceAuto + // enforces CP-FIRST ordering matching the other dispatchers — see + // docs/architecture/backends.md. go func() { - bgCtx := context.Background() - if h.provisioner != nil { - h.provisioner.Stop(bgCtx, id) - } else if h.cpProv != nil { - h.cpStopWithRetry(bgCtx, id, "Restart") - } - if h.cpProv != nil { - h.provisionWorkspaceCP(id, templatePath, configFiles, payload) - } else { - h.provisionWorkspaceOpts(id, templatePath, configFiles, payload, resetClaudeSession) - } + h.RestartWorkspaceAutoOpts(context.Background(), id, templatePath, configFiles, payload, resetClaudeSession) }() go h.sendRestartContext(id, restartData) @@ -698,14 +696,12 @@ func (h *WorkspaceHandler) Resume(c *gin.Context) { "name": ws.name, "tier": ws.tier, "runtime": ws.runtime, }) payload := models.CreateWorkspacePayload{Name: ws.name, Tier: ws.tier, Runtime: ws.runtime} - // Dispatch to the matching provisioner (mirrors the Create + - // Restart branching). SaaS tenants use cpProv; self-hosted Docker - // uses provisioner via provisionWorkspaceOpts. - if h.cpProv != nil { - go h.provisionWorkspaceCP(ws.id, "", nil, payload) - } else { - go h.provisionWorkspace(ws.id, "", nil, payload) - } + // Resume is provision-only (workspace is paused, no live container + // to stop). provisionWorkspaceAuto handles backend routing and the + // no-backend mark-failed fallback identically to Create. Pre- + // 2026-05-05 this site inlined the if-cpProv-else dispatch; the + // dispatcher is the SoT now. + h.provisionWorkspaceAuto(ws.id, "", nil, payload) } log.Printf("Resuming workspace %s (%s) + %d children", wsName, id, len(toResume)-1)