feat(handlers): migrate Restart + Resume handlers to dispatchers — #2799 Phase 2 PR-A

Sites 1+2 (Restart HTTP handler goroutine) and Site 3 (Resume HTTP
handler goroutine) now route through RestartWorkspaceAutoOpts /
provisionWorkspaceAuto instead of inlining the if-cpProv-else dispatch.

Three changes:

1. **RestartWorkspaceAutoOpts** — new variant of RestartWorkspaceAuto
   that carries the resetClaudeSession Docker-only flag (issue #12).
   The bare RestartWorkspaceAuto still exists as a wrapper that calls
   Opts with false. CP path silently ignores the flag (each EC2 boots
   fresh — no session state to clear). Mirrors the Provision pair
   (provisionWorkspace / provisionWorkspaceOpts).

2. **Restart handler (Site 1+2)** — the inline goroutine
   `if h.provisioner != nil { Stop } else if h.cpProv != nil { ... }`
   collapses to `RestartWorkspaceAutoOpts(...)`. Pre-fix the dispatch
   was Docker-FIRST ordering (a different drift class from the
   silent-drop bugs PRs #2811/#2824 closed); the dispatcher enforces
   CP-FIRST.

3. **Resume handler (Site 3)** — Resume is provision-only (workspace
   is paused, no live container), so it routes through
   provisionWorkspaceAuto, not RestartWorkspaceAuto. Inline
   if-cpProv-else dispatch removed.

Two new source-level pins:
  - TestRestartHandler_UsesRestartWorkspaceAuto
  - TestResumeHandler_UsesProvisionWorkspaceAuto

These prevent regression to the inline dispatch pattern.

Out of scope (tracked under #2799):
  - Site 4 (runRestartCycle) — synchronous coordination model needs
    a different shape than the fire-and-return dispatchers. PR-B.
  - Site 5 (Pause loop) — PAUSE doesn't reprovision, needs a new
    PauseWorkspaceAuto verb. Phase 3.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-05-04 22:09:12 -07:00
parent 789d705866
commit 5b7f4d260b
3 changed files with 100 additions and 32 deletions

View File

@ -254,18 +254,34 @@ func (h *WorkspaceHandler) StopWorkspaceAuto(ctx context.Context, workspaceID st
// runRestartCycle does this synchronously (line ~538) before delegating
// — match that pattern in any new caller.
func (h *WorkspaceHandler) RestartWorkspaceAuto(ctx context.Context, workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
return h.RestartWorkspaceAutoOpts(ctx, workspaceID, templatePath, configFiles, payload, false)
}
// RestartWorkspaceAutoOpts is the variant that carries Docker-only
// per-invocation knobs that don't fit on CreateWorkspacePayload. Today
// the only such knob is resetClaudeSession (issue #12 — clears the
// in-container Claude session before restart so the agent comes up
// fresh). CP doesn't have a session-reset concept (each EC2 boots from
// a fresh image), so the flag is silently ignored on the CP path.
//
// Most callers should call RestartWorkspaceAuto (resetClaudeSession=
// false). The Restart HTTP handler is the one site that exposes the
// flag to operators — it reads ?reset_session=true from the query
// string when an operator wants to force a fresh session.
func (h *WorkspaceHandler) RestartWorkspaceAutoOpts(ctx context.Context, workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload, resetClaudeSession bool) bool {
// Stop leg first. CP-first ordering matches the other dispatchers
// (provisionWorkspaceAuto, StopWorkspaceAuto) and the convention
// documented in docs/architecture/backends.md.
if h.cpProv != nil {
h.cpStopWithRetry(ctx, workspaceID, "RestartWorkspaceAuto")
// resetClaudeSession is Docker-only — CP has no session state to clear.
go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
return true
}
if h.provisioner != nil {
// Docker.Stop has no retry — see docstring rationale.
h.provisioner.Stop(ctx, workspaceID)
go h.provisionWorkspace(workspaceID, templatePath, configFiles, payload)
go h.provisionWorkspaceOpts(workspaceID, templatePath, configFiles, payload, resetClaudeSession)
return true
}
// No backend wired — same shape as provisionWorkspaceAuto's no-backend

View File

@ -752,6 +752,62 @@ func TestRestartWorkspaceAuto_NoBackendMarksFailed(t *testing.T) {
}
}
// TestRestartHandler_UsesRestartWorkspaceAuto — source-level pin that
// the Restart HTTP handler routes through the dispatcher. Phase 2 PR-A
// of #2799 migrates Site 1+2 (the Restart goroutine) to call
// RestartWorkspaceAutoOpts. This test pins the migration so the next
// refactor doesn't accidentally regress to the inline if-cpProv-else
// dispatch — that pre-fix shape had Docker-FIRST ordering, a different
// drift class from the silent-drop bugs PRs #2811/#2824 closed.
//
// Allowed in workspace_restart.go: stopForRestart (Site 4), Pause
// (Site 5). Both are tracked under #2799 Phase 2 PR-B / Phase 3.
func TestRestartHandler_UsesRestartWorkspaceAuto(t *testing.T) {
wd, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
src, err := os.ReadFile(filepath.Join(wd, "workspace_restart.go"))
if err != nil {
t.Fatalf("read workspace_restart.go: %v", err)
}
stripped := stripGoComments(src)
// The Restart handler must dispatch through the SoT helper. Either
// signature variant satisfies the gate.
if !bytes.Contains(stripped, []byte("h.RestartWorkspaceAutoOpts(")) &&
!bytes.Contains(stripped, []byte("h.RestartWorkspaceAuto(")) {
t.Errorf("workspace_restart.go must call RestartWorkspaceAuto[Opts] from the Restart handler — current code does not. " +
"Phase 2 of #2799 migrated this site; do not regress to the inline if-cpProv-else dispatch.")
}
}
// TestResumeHandler_UsesProvisionWorkspaceAuto — source-level pin that
// the Resume HTTP handler routes through the dispatcher. Phase 2 PR-A
// of #2799 migrates Site 3 (Resume goroutine) to call
// provisionWorkspaceAuto (Resume is provision-only — the workspace is
// already paused/stopped, no Stop step needed).
func TestResumeHandler_UsesProvisionWorkspaceAuto(t *testing.T) {
wd, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
src, err := os.ReadFile(filepath.Join(wd, "workspace_restart.go"))
if err != nil {
t.Fatalf("read workspace_restart.go: %v", err)
}
stripped := stripGoComments(src)
// The Resume handler's loop must dispatch through provisionWorkspaceAuto.
// Doesn't need a uniqueness check — the file already calls it from at
// least the Resume site (a regression that removes only the Resume call
// would still match this needle from another call in the file, but the
// stripGoComments output of workspace_restart.go is small enough that
// inspecting the diff catches that.)
if !bytes.Contains(stripped, []byte("h.provisionWorkspaceAuto(ws.id")) {
t.Errorf("workspace_restart.go must call provisionWorkspaceAuto from the Resume handler with `ws.id` — current code does not. " +
"Phase 2 of #2799 migrated this site; do not regress to the inline if-cpProv-else dispatch.")
}
}
// stripGoComments removes // line comments and /* */ block comments
// from Go source. Imperfect (doesn't handle comments-inside-strings)
// but adequate for the source-level pin tests in this file — none of

View File

@ -199,33 +199,31 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) {
// last_heartbeat_at with the new session. Issue #19 Layer 1.
restartData := loadRestartContextData(ctx, id)
// Dispatch to the correct provisioner. provisionWorkspaceOpts is the
// Docker path; provisionWorkspaceCP is the SaaS path. The Create
// handler already branches this way; Restart now mirrors it.
// Dispatch through the SoT restart dispatcher. RestartWorkspaceAutoOpts
// owns "which backend for stop" + "which backend for provision" and
// keeps the two halves in sync. resetClaudeSession is the one
// Docker-only per-invocation knob the dispatcher carries through.
//
// Stop runs inside this goroutine — NOT before the response — because
// CPProvisioner.Stop is synchronous DELETE /cp/workspaces/:id →
// CP → AWS EC2 terminate, which can exceed the canvas's 15s default
// Stop runs inside the dispatcher's stop leg (synchronous), then the
// provision leg fires in a goroutine — NOT before the response —
// because CPProvisioner.Stop is synchronous DELETE /cp/workspaces/:id
// → CP → AWS EC2 terminate, which can exceed the canvas's 15s default
// HTTP timeout when the platform has just redeployed (every tenant's
// CP request queues at once). Pre-fix the user saw a misleading
// "signal timed out" error on the canvas even though the restart
// actually succeeded — caught 2026-04-30 on hongmingwang hermes
// CP request queues at once). Pre-fix (2026-04-30) the user saw a
// misleading "signal timed out" on the canvas even though the
// restart actually succeeded — caught on hongmingwang hermes
// workspace 32993ee7-…cb9d75d112a5 right after the heartbeat-fix
// platform redeploy. Use context.Background() to detach from the
// request lifecycle so an aborted client connection doesn't cancel
// the in-flight Stop/provision pair.
// platform redeploy. context.Background() detaches the dispatch
// from the request lifecycle so an aborted client connection
// doesn't cancel the in-flight Stop/provision pair.
//
// Pre-2026-05-05 this site inlined the manual if-cpProv-else
// dispatch with Docker-FIRST ordering (a different drift class from
// the silent-drop bugs PRs #2811/#2824 closed). RestartWorkspaceAuto
// enforces CP-FIRST ordering matching the other dispatchers — see
// docs/architecture/backends.md.
go func() {
bgCtx := context.Background()
if h.provisioner != nil {
h.provisioner.Stop(bgCtx, id)
} else if h.cpProv != nil {
h.cpStopWithRetry(bgCtx, id, "Restart")
}
if h.cpProv != nil {
h.provisionWorkspaceCP(id, templatePath, configFiles, payload)
} else {
h.provisionWorkspaceOpts(id, templatePath, configFiles, payload, resetClaudeSession)
}
h.RestartWorkspaceAutoOpts(context.Background(), id, templatePath, configFiles, payload, resetClaudeSession)
}()
go h.sendRestartContext(id, restartData)
@ -698,14 +696,12 @@ func (h *WorkspaceHandler) Resume(c *gin.Context) {
"name": ws.name, "tier": ws.tier, "runtime": ws.runtime,
})
payload := models.CreateWorkspacePayload{Name: ws.name, Tier: ws.tier, Runtime: ws.runtime}
// Dispatch to the matching provisioner (mirrors the Create +
// Restart branching). SaaS tenants use cpProv; self-hosted Docker
// uses provisioner via provisionWorkspaceOpts.
if h.cpProv != nil {
go h.provisionWorkspaceCP(ws.id, "", nil, payload)
} else {
go h.provisionWorkspace(ws.id, "", nil, payload)
}
// Resume is provision-only (workspace is paused, no live container
// to stop). provisionWorkspaceAuto handles backend routing and the
// no-backend mark-failed fallback identically to Create. Pre-
// 2026-05-05 this site inlined the if-cpProv-else dispatch; the
// dispatcher is the SoT now.
h.provisionWorkspaceAuto(ws.id, "", nil, payload)
}
log.Printf("Resuming workspace %s (%s) + %d children", wsName, id, len(toResume)-1)