refactor(handlers): extract dispatchers from workspace.go (#2800 partial)

workspace.go was 950 lines after the dispatcher work in PRs #2811 + #2824 + #2843 + #2846 + #2847 + #2848 + #2850. This extracts the 6 SoT dispatcher helpers into a new workspace_dispatchers.go so the file is the architectural unit it deserves to be (one place for "how do we route a workspace lifecycle verb to a backend?"). Moved (no body changes — pure cut + paste with imports): - HasProvisioner (gate accessor) - provisionWorkspaceAuto (async provision) - provisionWorkspaceAutoSync (sync provision, runRestartCycle's path) - StopWorkspaceAuto (stop dispatcher) - RestartWorkspaceAuto (restart wrapper) - RestartWorkspaceAutoOpts (restart with resetClaudeSession) workspace.go shrinks from 950 → 735 lines and now holds: - WorkspaceHandler struct + constructor - SetCPProvisioner / SetEnvMutators - Create / List / Get / scanWorkspaceRow - HTTP handler glue workspace_dispatchers.go is 255 lines and holds the dispatcher trio + sync variant + gate accessor + a header docblock summarizing the history (PRs that added each helper) and the source-level pin tests that gate against drift. Source-level pin tests updated: - TestNoCallSiteCallsDirectProvisionerExceptAuto: workspace_dispatchers.go added to allowlist (the dispatcher IS the place that calls per-backend bodies directly). - TestNoCallSiteCallsBareStop: same. - TestNoBareBothNilCheck / TestOrgImportGate_UsesHasProvisionerNotBareField: no change — they were source-pinning specific files, not all callers. Build clean, vet clean, full test suite passes (1742 / 0 in workspace, all Go test packages green). Out of scope (#2800 has more): - workspace_provision.go (869 lines) split into Docker + CP halves — files would still be 400+ each, marginal value. Defer until a third backend lands and the symmetry breaks. - Splitting Create / List / Get into per-handler files — they're short and tightly coupled to the struct; keep co-located. Closes #2800 partial. Filing a follow-up issue if/when workspace.go or workspace_provision.go grows past 800 lines again. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 02:24:49 -07:00 · 2026-05-05 02:24:49 -07:00 · 024ef260db
commit 024ef260db
parent d21ac991c1
3 changed files with 267 additions and 222 deletions
--- a/workspace-server/internal/handlers/workspace.go
+++ b/workspace-server/internal/handlers/workspace.go
@ -112,222 +112,6 @@ func (h *WorkspaceHandler) SetCPProvisioner(cp provisioner.CPProvisionerAPI) {
 	h.cpProv = cp
 }

-// HasProvisioner reports whether either backend (CP or local Docker) is
-// wired. Callers that gate prep-work on "do we have something that can
-// provision a container?" should use this rather than direct field access
-// to either provisioner — those individual checks miss the SaaS path
-// (cpProv set, provisioner nil) or the self-hosted path (provisioner set,
-// cpProv nil) symmetrically. Org-import + future bulk paths gate their
-// template/config/secret prep on this so the work isn't wasted on
-// deployments where no backend is available.
-func (h *WorkspaceHandler) HasProvisioner() bool {
-	return h.cpProv != nil || h.provisioner != nil
-}
-
-// provisionWorkspaceAuto picks the backend (CP for SaaS, local Docker
-// for self-hosted) and starts provisioning in a goroutine. Returns true
-// when a backend was kicked off, false when neither is wired.
-//
-// Single source of truth for "start provisioning a workspace" across
-// every caller (Create, OrgHandler.createWorkspaceTree, TeamHandler.Expand,
-// future paths). Centralized routing here means callers don't repeat
-// the "Docker vs CP" decision and can't drift on it.
-//
-// Self-marks-failed on the no-backend path: pre-2026-05-05 the false
-// return was silent, and any caller that forgot to handle it (TeamHandler
-// pre-#2367, OrgHandler.createWorkspaceTree pre-this-fix) silently
-// dropped workspaces — they sat in 'provisioning' for 10 min until the
-// sweeper marked them failed with the misleading "container started but
-// never called /registry/register" message. Marking failed inside Auto
-// closes that class: even if a future caller bypasses HasProvisioner
-// gating or ignores the bool return, the workspace ends in a clean
-// failed state with an actionable error message.
-//
-// Architectural principle: templates own runtime/config/prompts/files/
-// plugins; the platform owns where it runs. Anything that picks
-// between CP and local Docker belongs in this one helper. Anything
-// post-routing-but-pre-Start (mint secrets, render template, etc.)
-// lives in prepareProvisionContext (shared by both per-backend
-// goroutines).
-func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
-	if h.cpProv != nil {
-		go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
-		return true
-	}
-	if h.provisioner != nil {
-		go h.provisionWorkspace(workspaceID, templatePath, configFiles, payload)
-		return true
-	}
-	// No backend wired — mark failed so the workspace doesn't linger in
-	// 'provisioning' for the full 10-minute sweep window. 10s is enough
-	// for the broadcast + single UPDATE inside markProvisionFailed.
-	log.Printf("provisionWorkspaceAuto: no provisioning backend wired for %s — marking failed (cpProv=nil, provisioner=nil)", workspaceID)
-	failCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
-	h.markProvisionFailed(failCtx, workspaceID,
-		"no provisioning backend available — workspace requires either a Docker daemon (self-hosted) or control-plane provisioner (SaaS)",
-		nil)
-	return false
-}
-
-// provisionWorkspaceAutoSync is the synchronous variant of
-// provisionWorkspaceAuto — it BLOCKS in the current goroutine until the
-// per-backend provision body returns, instead of spawning a goroutine.
-//
-// Used by callers that need to coordinate stop+provision as a pair and
-// can't return until provision is done — today that's runRestartCycle
-// (auto-restart cycle's pending-flag loop relies on synchronous return
-// to know when it's safe to start the next cycle without racing the
-// in-flight provision goroutine on the next iteration's Stop call).
-//
-// Backend selection + no-backend fallback are identical to
-// provisionWorkspaceAuto. The only difference is the goroutine wrapper.
-// Keep these two helpers in sync — when one grows a new arm (third
-// backend, retry semantics), the other should too.
-func (h *WorkspaceHandler) provisionWorkspaceAutoSync(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
-	if h.cpProv != nil {
-		h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
-		return true
-	}
-	if h.provisioner != nil {
-		h.provisionWorkspace(workspaceID, templatePath, configFiles, payload)
-		return true
-	}
-	log.Printf("provisionWorkspaceAutoSync: no provisioning backend wired for %s — marking failed (cpProv=nil, provisioner=nil)", workspaceID)
-	failCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
-	h.markProvisionFailed(failCtx, workspaceID,
-		"no provisioning backend available — workspace requires either a Docker daemon (self-hosted) or control-plane provisioner (SaaS)",
-		nil)
-	return false
-}
-
-// StopWorkspaceAuto picks the backend (CP for SaaS, local Docker for
-// self-hosted) and stops the workspace synchronously. Returns nil when
-// neither backend is wired (a workspace nobody is running can't be
-// stopped — that's a no-op, not an error).
-//
-// Single source of truth for "stop a workspace" — symmetric with
-// provisionWorkspaceAuto. Pre-2026-05-05 the stop side had no Auto
-// dispatcher and every caller wrote `if h.provisioner != nil { Stop }`,
-// which silently leaked EC2s on SaaS:
-//   - team.go:208 (Collapse) — issue #2813
-//   - workspace_crud.go:432 (stopAndRemove during Delete) — issue #2814
-//
-// Both bugs reproduced for ~6 months. The pattern is the same drift
-// class as the org-import provision bug closed by PR #2811.
-//
-// Why CP wins when both are wired (matching provisionWorkspaceAuto):
-// production runs exactly one backend at a time — a SaaS tenant has
-// cpProv set + provisioner nil; a self-hosted operator has provisioner
-// set + cpProv nil. The "both set" case only arises in test fixtures,
-// and the CP-wins ordering matches how Auto picks for provisioning so
-// the test stubs stay on a single side.
-//
-// Volume cleanup (workspace_crud.go) stays Docker-only — CP-managed
-// workspaces have no volumes to clean. Callers that need that extra
-// step keep their `if h.provisioner != nil { RemoveVolume(...) }`
-// gate AFTER calling StopWorkspaceAuto. The abstraction here is "stop
-// the running workload," not "tear down all state."
-func (h *WorkspaceHandler) StopWorkspaceAuto(ctx context.Context, workspaceID string) error {
-	if h.cpProv != nil {
-		return h.cpProv.Stop(ctx, workspaceID)
-	}
-	if h.provisioner != nil {
-		return h.provisioner.Stop(ctx, workspaceID)
-	}
-	return nil
-}
-
-// RestartWorkspaceAuto stops the running workload (with retry semantics
-// tuned for the restart hot path) then starts provisioning again, in a
-// detached goroutine. Returns true when a backend was kicked off, false
-// when neither is wired (caller owns the persist + mark-failed surface
-// in that case — symmetric with provisionWorkspaceAuto's bool return).
-//
-// Single source of truth for "restart a workspace" — third in the
-// dispatcher trio alongside provisionWorkspaceAuto and StopWorkspaceAuto.
-// Phase 1 of #2799 introduces this helper + migrates one caller; the
-// remaining workspace_restart.go sites (Restart HTTP handler goroutine,
-// Resume handler, Pause loop) follow in Phase 2/3 because they need
-// async-context reasoning beyond a fire-and-return dispatcher.
-//
-// Retry on the Stop leg is intentional and distinguishes this from
-// StopWorkspaceAuto:
-//
-//   - StopWorkspaceAuto (Stop-on-delete contract): no retry, no-backend
-//     is a silent no-op. Different verb, different stakes — a workspace
-//     nobody is running can't be stopped.
-//
-//   - RestartWorkspaceAuto: bounded exponential backoff on cpProv.Stop
-//     via cpStopWithRetry. Restart's contract is "make the workspace
-//     alive again" — refusing to reprovision when Stop fails strands
-//     the user with a dead workspace and no recovery path other than
-//     manual canvas intervention. Retry absorbs the transient CP/AWS
-//     hiccups that cause most EC2-leak-adjacent incidents. On final
-//     exhaustion, cpStopWithRetry logs LEAK-SUSPECT and proceeds with
-//     reprovision regardless, bridging to the orphan reconciler.
-//
-// Docker provisioner.Stop has no retry — a local container that fails
-// to stop is a local infrastructure problem (OOM, resource pressure)
-// and retries won't help; the subsequent provision attempt will surface
-// the underlying daemon failure.
-//
-// Architectural note: this helper encapsulates the stop+reprovision
-// pair. The "which backend for stop" and "which backend for provision"
-// decisions live here and stay in sync (CP-stop pairs with CP-provision;
-// Docker-stop pairs with Docker-provision). Callers that need only the
-// stop half use StopWorkspaceAuto (delete path) or stopForRestart
-// (restart-path internal helper) directly.
-//
-// Payload requirements: caller MUST construct payload from the live
-// workspace row (name, runtime, tier, model, workspace_dir, etc.) so
-// the reprovision comes up with the workspace's actual configuration.
-// runRestartCycle does this synchronously (line ~538) before delegating
-// — match that pattern in any new caller.
-func (h *WorkspaceHandler) RestartWorkspaceAuto(ctx context.Context, workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
-	return h.RestartWorkspaceAutoOpts(ctx, workspaceID, templatePath, configFiles, payload, false)
-}
-
-// RestartWorkspaceAutoOpts is the variant that carries Docker-only
-// per-invocation knobs that don't fit on CreateWorkspacePayload. Today
-// the only such knob is resetClaudeSession (issue #12 — clears the
-// in-container Claude session before restart so the agent comes up
-// fresh). CP doesn't have a session-reset concept (each EC2 boots from
-// a fresh image), so the flag is silently ignored on the CP path.
-//
-// Most callers should call RestartWorkspaceAuto (resetClaudeSession=
-// false). The Restart HTTP handler is the one site that exposes the
-// flag to operators — it reads ?reset_session=true from the query
-// string when an operator wants to force a fresh session.
-func (h *WorkspaceHandler) RestartWorkspaceAutoOpts(ctx context.Context, workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload, resetClaudeSession bool) bool {
-	// Stop leg first. CP-first ordering matches the other dispatchers
-	// (provisionWorkspaceAuto, StopWorkspaceAuto) and the convention
-	// documented in docs/architecture/backends.md.
-	if h.cpProv != nil {
-		h.cpStopWithRetry(ctx, workspaceID, "RestartWorkspaceAuto")
-		// resetClaudeSession is Docker-only — CP has no session state to clear.
-		go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
-		return true
-	}
-	if h.provisioner != nil {
-		// Docker.Stop has no retry — see docstring rationale.
-		h.provisioner.Stop(ctx, workspaceID)
-		go h.provisionWorkspaceOpts(workspaceID, templatePath, configFiles, payload, resetClaudeSession)
-		return true
-	}
-	// No backend wired — same shape as provisionWorkspaceAuto's no-backend
-	// arm. Mark the workspace failed so the user sees a meaningful state
-	// rather than a hang. 10s context lets markProvisionFailed broadcast
-	// + UPDATE; the original ctx may already be cancelled.
-	log.Printf("RestartWorkspaceAuto: no provisioning backend wired for %s — marking failed (cpProv=nil, provisioner=nil)", workspaceID)
-	failCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
-	h.markProvisionFailed(failCtx, workspaceID,
-		"no provisioning backend available — workspace requires either a Docker daemon (self-hosted) or control-plane provisioner (SaaS)",
-		nil)
-	return false
-}

 // SetEnvMutators wires a provisionhook.Registry into the handler. Plugins
 // living in separate repos register on the same Registry instance during
--- a/workspace-server/internal/handlers/workspace_dispatchers.go
+++ b/workspace-server/internal/handlers/workspace_dispatchers.go
@ -0,0 +1,255 @@
+package handlers
+
+// workspace_dispatchers.go — Single-source-of-truth dispatchers for the
+// workspace lifecycle verbs (Create / Stop / Restart). Each helper picks
+// the right backend (Docker for self-hosted, CP for SaaS) and either
+// runs the per-backend body in a goroutine or synchronously, depending
+// on caller need.
+//
+// The dispatchers are the architectural boundary between handler code
+// (HTTP / orchestration) and per-backend implementations
+// (workspace_provision.go for Docker + CP). Source-level pin tests in
+// workspace_provision_auto_test.go enforce that handlers route through
+// these helpers rather than calling the per-backend bodies directly —
+// see TestNoCallSiteCallsDirectProvisionerExceptAuto, TestNoCallSiteCallsBareStop,
+// TestNoBareBothNilCheck, TestOrgImportGate_UsesHasProvisionerNotBareField.
+//
+// Architectural docs: docs/architecture/backends.md.
+//
+// History:
+//   - PR #2811 introduced provisionWorkspaceAuto + HasProvisioner gate
+//     (closed the org-import SaaS-skip silent-drop bug class).
+//   - PR #2824 added StopWorkspaceAuto (closed the team-collapse +
+//     workspace-delete EC2-leak class — issues #2813, #2814).
+//   - PR #2843 + #2846 + #2847 + #2848 added RestartWorkspaceAuto +
+//     RestartWorkspaceAutoOpts + provisionWorkspaceAutoSync and
+//     migrated the four workspace_restart.go dispatch sites.
+//   - This file extracts the helpers from workspace.go so the dispatcher
+//     trio + sync variant + gate accessor are visually co-located,
+//     making it easier for the next contributor to find and add a new
+//     lifecycle verb without inlining dispatch logic.
+
+import (
+	"context"
+	"log"
+	"time"
+
+	"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
+)
+
+// HasProvisioner reports whether either backend (CP or local Docker) is
+// wired. Callers that gate prep-work on "do we have something that can
+// provision a container?" should use this rather than direct field access
+// to either provisioner — those individual checks miss the SaaS path
+// (cpProv set, provisioner nil) or the self-hosted path (provisioner set,
+// cpProv nil) symmetrically. Org-import + future bulk paths gate their
+// template/config/secret prep on this so the work isn't wasted on
+// deployments where no backend is available.
+func (h *WorkspaceHandler) HasProvisioner() bool {
+	return h.cpProv != nil || h.provisioner != nil
+}
+
+// provisionWorkspaceAuto picks the backend (CP for SaaS, local Docker
+// for self-hosted) and starts provisioning in a goroutine. Returns true
+// when a backend was kicked off, false when neither is wired.
+//
+// Single source of truth for "start provisioning a workspace" across
+// every caller (Create, OrgHandler.createWorkspaceTree, TeamHandler.Expand,
+// future paths). Centralized routing here means callers don't repeat
+// the "Docker vs CP" decision and can't drift on it.
+//
+// Self-marks-failed on the no-backend path: pre-2026-05-05 the false
+// return was silent, and any caller that forgot to handle it (TeamHandler
+// pre-#2367, OrgHandler.createWorkspaceTree pre-this-fix) silently
+// dropped workspaces — they sat in 'provisioning' for 10 min until the
+// sweeper marked them failed with the misleading "container started but
+// never called /registry/register" message. Marking failed inside Auto
+// closes that class: even if a future caller bypasses HasProvisioner
+// gating or ignores the bool return, the workspace ends in a clean
+// failed state with an actionable error message.
+//
+// Architectural principle: templates own runtime/config/prompts/files/
+// plugins; the platform owns where it runs. Anything that picks
+// between CP and local Docker belongs in this one helper. Anything
+// post-routing-but-pre-Start (mint secrets, render template, etc.)
+// lives in prepareProvisionContext (shared by both per-backend
+// goroutines).
+func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
+	if h.cpProv != nil {
+		go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
+		return true
+	}
+	if h.provisioner != nil {
+		go h.provisionWorkspace(workspaceID, templatePath, configFiles, payload)
+		return true
+	}
+	// No backend wired — mark failed so the workspace doesn't linger in
+	// 'provisioning' for the full 10-minute sweep window. 10s is enough
+	// for the broadcast + single UPDATE inside markProvisionFailed.
+	log.Printf("provisionWorkspaceAuto: no provisioning backend wired for %s — marking failed (cpProv=nil, provisioner=nil)", workspaceID)
+	failCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	h.markProvisionFailed(failCtx, workspaceID,
+		"no provisioning backend available — workspace requires either a Docker daemon (self-hosted) or control-plane provisioner (SaaS)",
+		nil)
+	return false
+}
+
+// provisionWorkspaceAutoSync is the synchronous variant of
+// provisionWorkspaceAuto — it BLOCKS in the current goroutine until the
+// per-backend provision body returns, instead of spawning a goroutine.
+//
+// Used by callers that need to coordinate stop+provision as a pair and
+// can't return until provision is done — today that's runRestartCycle
+// (auto-restart cycle's pending-flag loop relies on synchronous return
+// to know when it's safe to start the next cycle without racing the
+// in-flight provision goroutine on the next iteration's Stop call).
+//
+// Backend selection + no-backend fallback are identical to
+// provisionWorkspaceAuto. The only difference is the goroutine wrapper.
+// Keep these two helpers in sync — when one grows a new arm (third
+// backend, retry semantics), the other should too.
+func (h *WorkspaceHandler) provisionWorkspaceAutoSync(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
+	if h.cpProv != nil {
+		h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
+		return true
+	}
+	if h.provisioner != nil {
+		h.provisionWorkspace(workspaceID, templatePath, configFiles, payload)
+		return true
+	}
+	log.Printf("provisionWorkspaceAutoSync: no provisioning backend wired for %s — marking failed (cpProv=nil, provisioner=nil)", workspaceID)
+	failCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	h.markProvisionFailed(failCtx, workspaceID,
+		"no provisioning backend available — workspace requires either a Docker daemon (self-hosted) or control-plane provisioner (SaaS)",
+		nil)
+	return false
+}
+
+// StopWorkspaceAuto picks the backend (CP for SaaS, local Docker for
+// self-hosted) and stops the workspace synchronously. Returns nil when
+// neither backend is wired (a workspace nobody is running can't be
+// stopped — that's a no-op, not an error).
+//
+// Single source of truth for "stop a workspace" — symmetric with
+// provisionWorkspaceAuto. Pre-2026-05-05 the stop side had no Auto
+// dispatcher and every caller wrote `if h.provisioner != nil { Stop }`,
+// which silently leaked EC2s on SaaS:
+//   - team.go:208 (Collapse) — issue #2813
+//   - workspace_crud.go:432 (stopAndRemove during Delete) — issue #2814
+//
+// Both bugs reproduced for ~6 months. The pattern is the same drift
+// class as the org-import provision bug closed by PR #2811.
+//
+// Why CP wins when both are wired (matching provisionWorkspaceAuto):
+// production runs exactly one backend at a time — a SaaS tenant has
+// cpProv set + provisioner nil; a self-hosted operator has provisioner
+// set + cpProv nil. The "both set" case only arises in test fixtures,
+// and the CP-wins ordering matches how Auto picks for provisioning so
+// the test stubs stay on a single side.
+//
+// Volume cleanup (workspace_crud.go) stays Docker-only — CP-managed
+// workspaces have no volumes to clean. Callers that need that extra
+// step keep their `if h.provisioner != nil { RemoveVolume(...) }`
+// gate AFTER calling StopWorkspaceAuto. The abstraction here is "stop
+// the running workload," not "tear down all state."
+func (h *WorkspaceHandler) StopWorkspaceAuto(ctx context.Context, workspaceID string) error {
+	if h.cpProv != nil {
+		return h.cpProv.Stop(ctx, workspaceID)
+	}
+	if h.provisioner != nil {
+		return h.provisioner.Stop(ctx, workspaceID)
+	}
+	return nil
+}
+
+// RestartWorkspaceAuto stops the running workload (with retry semantics
+// tuned for the restart hot path) then starts provisioning again, in a
+// detached goroutine. Returns true when a backend was kicked off, false
+// when neither is wired (caller owns the persist + mark-failed surface
+// in that case — symmetric with provisionWorkspaceAuto's bool return).
+//
+// Single source of truth for "restart a workspace" — third in the
+// dispatcher trio alongside provisionWorkspaceAuto and StopWorkspaceAuto.
+// Phase 1 of #2799 introduces this helper + migrates one caller; the
+// remaining workspace_restart.go sites (Restart HTTP handler goroutine,
+// Resume handler, Pause loop) follow in Phase 2/3 because they need
+// async-context reasoning beyond a fire-and-return dispatcher.
+//
+// Retry on the Stop leg is intentional and distinguishes this from
+// StopWorkspaceAuto:
+//
+//   - StopWorkspaceAuto (Stop-on-delete contract): no retry, no-backend
+//     is a silent no-op. Different verb, different stakes — a workspace
+//     nobody is running can't be stopped.
+//
+//   - RestartWorkspaceAuto: bounded exponential backoff on cpProv.Stop
+//     via cpStopWithRetry. Restart's contract is "make the workspace
+//     alive again" — refusing to reprovision when Stop fails strands
+//     the user with a dead workspace and no recovery path other than
+//     manual canvas intervention. Retry absorbs the transient CP/AWS
+//     hiccups that cause most EC2-leak-adjacent incidents. On final
+//     exhaustion, cpStopWithRetry logs LEAK-SUSPECT and proceeds with
+//     reprovision regardless, bridging to the orphan reconciler.
+//
+// Docker provisioner.Stop has no retry — a local container that fails
+// to stop is a local infrastructure problem (OOM, resource pressure)
+// and retries won't help; the subsequent provision attempt will surface
+// the underlying daemon failure.
+//
+// Architectural note: this helper encapsulates the stop+reprovision
+// pair. The "which backend for stop" and "which backend for provision"
+// decisions live here and stay in sync (CP-stop pairs with CP-provision;
+// Docker-stop pairs with Docker-provision). Callers that need only the
+// stop half use StopWorkspaceAuto (delete path) or stopForRestart
+// (restart-path internal helper) directly.
+//
+// Payload requirements: caller MUST construct payload from the live
+// workspace row (name, runtime, tier, model, workspace_dir, etc.) so
+// the reprovision comes up with the workspace's actual configuration.
+// runRestartCycle does this synchronously (line ~538) before delegating
+// — match that pattern in any new caller.
+func (h *WorkspaceHandler) RestartWorkspaceAuto(ctx context.Context, workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
+	return h.RestartWorkspaceAutoOpts(ctx, workspaceID, templatePath, configFiles, payload, false)
+}
+
+// RestartWorkspaceAutoOpts is the variant that carries Docker-only
+// per-invocation knobs that don't fit on CreateWorkspacePayload. Today
+// the only such knob is resetClaudeSession (issue #12 — clears the
+// in-container Claude session before restart so the agent comes up
+// fresh). CP doesn't have a session-reset concept (each EC2 boots from
+// a fresh image), so the flag is silently ignored on the CP path.
+//
+// Most callers should call RestartWorkspaceAuto (resetClaudeSession=
+// false). The Restart HTTP handler is the one site that exposes the
+// flag to operators — it reads ?reset_session=true from the query
+// string when an operator wants to force a fresh session.
+func (h *WorkspaceHandler) RestartWorkspaceAutoOpts(ctx context.Context, workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload, resetClaudeSession bool) bool {
+	// Stop leg first. CP-first ordering matches the other dispatchers
+	// (provisionWorkspaceAuto, StopWorkspaceAuto) and the convention
+	// documented in docs/architecture/backends.md.
+	if h.cpProv != nil {
+		h.cpStopWithRetry(ctx, workspaceID, "RestartWorkspaceAuto")
+		// resetClaudeSession is Docker-only — CP has no session state to clear.
+		go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
+		return true
+	}
+	if h.provisioner != nil {
+		// Docker.Stop has no retry — see docstring rationale.
+		h.provisioner.Stop(ctx, workspaceID)
+		go h.provisionWorkspaceOpts(workspaceID, templatePath, configFiles, payload, resetClaudeSession)
+		return true
+	}
+	// No backend wired — same shape as provisionWorkspaceAuto's no-backend
+	// arm. Mark the workspace failed so the user sees a meaningful state
+	// rather than a hang. 10s context lets markProvisionFailed broadcast
+	// + UPDATE; the original ctx may already be cancelled.
+	log.Printf("RestartWorkspaceAuto: no provisioning backend wired for %s — marking failed (cpProv=nil, provisioner=nil)", workspaceID)
+	failCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	h.markProvisionFailed(failCtx, workspaceID,
+		"no provisioning backend available — workspace requires either a Docker daemon (self-hosted) or control-plane provisioner (SaaS)",
+		nil)
+	return false
+}
--- a/workspace-server/internal/handlers/workspace_provision_auto_test.go
+++ b/workspace-server/internal/handlers/workspace_provision_auto_test.go
@ -234,9 +234,14 @@ func TestNoCallSiteCallsDirectProvisionerExceptAuto(t *testing.T) {
 		".provisionWorkspaceCP(",
 	}
 	allowedFiles := map[string]bool{
-		// workspace.go DEFINES the methods + the Auto dispatcher; it's
-		// allowed to reference them directly.
+		// workspace.go holds the WorkspaceHandler struct + constructor.
 		"workspace.go": true,
+		// workspace_dispatchers.go IS the Auto dispatcher — calls the
+		// per-backend bodies directly via `go h.provisionWorkspaceCP(...)`
+		// / `go h.provisionWorkspace(...)`. The whole point of this gate
+		// is "every OTHER caller routes through the dispatcher; the
+		// dispatcher itself routes through the per-backend body".
+		"workspace_dispatchers.go": true,
 		// workspace_provision.go DEFINES the bodies of the direct
 		// methods (and the Auto-internal call from CP-mode itself).
 		"workspace_provision.go": true,
@ -571,10 +576,11 @@ func TestNoCallSiteCallsBareStop(t *testing.T) {
 		".cpProv.Stop(",
 	}
 	allowedFiles := map[string]bool{
-		"workspace.go":           true,
-		"workspace_provision.go": true,
-		"workspace_restart.go":   true,
-		"container_files.go":     true,
+		"workspace.go":             true,
+		"workspace_dispatchers.go": true,
+		"workspace_provision.go":   true,
+		"workspace_restart.go":     true,
+		"container_files.go":       true,
 	}
 	for _, entry := range entries {
 		name := entry.Name()