diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index 8d4cd7e3..199c7d71 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -170,6 +170,38 @@ func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath stri return false } +// provisionWorkspaceAutoSync is the synchronous variant of +// provisionWorkspaceAuto — it BLOCKS in the current goroutine until the +// per-backend provision body returns, instead of spawning a goroutine. +// +// Used by callers that need to coordinate stop+provision as a pair and +// can't return until provision is done — today that's runRestartCycle +// (auto-restart cycle's pending-flag loop relies on synchronous return +// to know when it's safe to start the next cycle without racing the +// in-flight provision goroutine on the next iteration's Stop call). +// +// Backend selection + no-backend fallback are identical to +// provisionWorkspaceAuto. The only difference is the goroutine wrapper. +// Keep these two helpers in sync — when one grows a new arm (third +// backend, retry semantics), the other should too. +func (h *WorkspaceHandler) provisionWorkspaceAutoSync(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool { + if h.cpProv != nil { + h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload) + return true + } + if h.provisioner != nil { + h.provisionWorkspace(workspaceID, templatePath, configFiles, payload) + return true + } + log.Printf("provisionWorkspaceAutoSync: no provisioning backend wired for %s — marking failed (cpProv=nil, provisioner=nil)", workspaceID) + failCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + h.markProvisionFailed(failCtx, workspaceID, + "no provisioning backend available — workspace requires either a Docker daemon (self-hosted) or control-plane provisioner (SaaS)", + nil) + return false +} + // StopWorkspaceAuto picks the backend (CP for SaaS, local Docker for // self-hosted) and stops the workspace synchronously. Returns nil when // neither backend is wired (a workspace nobody is running can't be diff --git a/workspace-server/internal/handlers/workspace_provision_auto_test.go b/workspace-server/internal/handlers/workspace_provision_auto_test.go index 62652189..f6fd41a8 100644 --- a/workspace-server/internal/handlers/workspace_provision_auto_test.go +++ b/workspace-server/internal/handlers/workspace_provision_auto_test.go @@ -808,6 +808,92 @@ func TestResumeHandler_UsesProvisionWorkspaceAuto(t *testing.T) { } } +// TestProvisionWorkspaceAutoSync_RoutesToCPWhenSet — sync variant of the +// provision dispatcher used by runRestartCycle. CP path runs synchronously +// (no goroutine wrapper). Verified via the same trackingCPProv stub as +// the async tests; the absence of `go` semantics is the load-bearing +// distinction we're pinning. +func TestProvisionWorkspaceAutoSync_RoutesToCPWhenSet(t *testing.T) { + mock := setupTestDB(t) + mock.MatchExpectationsInOrder(false) + // provisionWorkspaceCP runs prepareProvisionContext synchronously, which + // hits secrets selects + the markProvisionFailed UPDATE when CP.Start + // returns an error. We allow these calls without strictly asserting + // counts — the goal here is to assert the routing branch was taken. + mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`). + WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"})) + mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`). + WithArgs(sqlmock.AnyArg()). + WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"})) + mock.ExpectExec(`UPDATE workspaces SET status =`). + WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) + + rec := &trackingCPProv{startErr: errors.New("simulated CP rejection")} + bcast := &concurrentSafeBroadcaster{} + h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir()) + h.SetCPProvisioner(rec) + + wsID := "ws-sync-routes-cp" + ok := h.provisionWorkspaceAutoSync(wsID, "", nil, models.CreateWorkspacePayload{ + Name: "sync-test", Tier: 1, Runtime: "claude-code", + }) + if !ok { + t.Fatalf("expected provisionWorkspaceAutoSync to return true with CP wired") + } + // Synchronous: the call returns AFTER cpProv.Start has been invoked. + // No deadline-poll loop needed. + got := rec.startedSnapshot() + if len(got) != 1 || got[0] != wsID { + t.Errorf("expected cpProv.Start invoked once with %q, got %v", wsID, got) + } +} + +// TestProvisionWorkspaceAutoSync_NoBackendMarksFailed — sync variant +// uses the same no-backend fallback as the async dispatcher: returns +// false + marks failed. Pinning this so the two helpers stay +// behaviorally identical except for the goroutine wrapper. +func TestProvisionWorkspaceAutoSync_NoBackendMarksFailed(t *testing.T) { + mock := setupTestDB(t) + mock.MatchExpectationsInOrder(false) + mock.ExpectExec(`UPDATE workspaces SET status =`). + WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) + + bcast := &concurrentSafeBroadcaster{} + h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir()) + + ok := h.provisionWorkspaceAutoSync("ws-sync-noback", "", nil, models.CreateWorkspacePayload{ + Name: "sync-test", Tier: 1, Runtime: "claude-code", + }) + if ok { + t.Fatalf("expected provisionWorkspaceAutoSync to return false with no backend wired") + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("expected markProvisionFailed UPDATE to fire on no-backend path: %v", err) + } +} + +// TestRunRestartCycle_UsesProvisionWorkspaceAutoSync — source-level pin +// that runRestartCycle (Site 4) routes through the sync dispatcher +// instead of inlining the if-cpProv-else dispatch. Phase 2 PR-B of +// #2799 migrated this site. +func TestRunRestartCycle_UsesProvisionWorkspaceAutoSync(t *testing.T) { + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + src, err := os.ReadFile(filepath.Join(wd, "workspace_restart.go")) + if err != nil { + t.Fatalf("read workspace_restart.go: %v", err) + } + stripped := stripGoComments(src) + if !bytes.Contains(stripped, []byte("h.provisionWorkspaceAutoSync(workspaceID")) { + t.Errorf("workspace_restart.go must call provisionWorkspaceAutoSync from runRestartCycle — current code does not. " + + "Phase 2 PR-B of #2799 migrated this site; do not regress to the inline if-cpProv-else dispatch.") + } +} + // stripGoComments removes // line comments and /* */ block comments // from Go source. Imperfect (doesn't handle comments-inside-strings) // but adequate for the source-level pin tests in this file — none of diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index 536a4d81..08bb02c3 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -553,24 +553,22 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) { restartData := loadRestartContextData(ctx, workspaceID) // On auto-restart, do NOT re-apply templates — preserve existing config volume. - // SYNCHRONOUS provisionWorkspace: returns when the new container is up - // (or has failed). The outer loop relies on this to know when it's safe - // to start another restart cycle without racing this one's Stop call. + // provisionWorkspaceAutoSync is the SYNCHRONOUS dispatcher (mirrors + // provisionWorkspaceAuto but blocks instead of spawning a goroutine): + // returns when the new container is up (or has failed). The outer + // pending-flag loop in RestartByID relies on this to know when it's + // safe to start another restart cycle without racing this one's + // Stop call. // - // Branch on which provisioner is wired — same dispatch as the other call - // sites in this package (workspace.go:431-433, workspace_restart.go:197+596). - // Pre-fix this only called the Docker variant, so on SaaS the auto-restart - // cycle would NPE inside provisionWorkspace's `h.provisioner.VolumeHasFile` - // call, get swallowed by coalesceRestart's recover()-without-re-raise (a - // platform-stability safeguard), and leave the workspace permanently - // stuck in status='provisioning' (the UPDATE above already ran). User- - // observable result before this fix on SaaS: dead workspace → manual - // canvas restart was the only recovery path. - if h.cpProv != nil { - h.provisionWorkspaceCP(workspaceID, "", nil, payload) - } else { - h.provisionWorkspace(workspaceID, "", nil, payload) - } + // Pre-2026-05-05 this site inlined the if-cpProv-else dispatch. On + // SaaS the cycle would NPE inside provisionWorkspace's + // `h.provisioner.VolumeHasFile` call, get swallowed by + // coalesceRestart's recover()-without-re-raise (a platform-stability + // safeguard), and leave the workspace permanently stuck in + // status='provisioning' (the UPDATE above already ran). User- + // observable result on SaaS pre-fix: dead workspace → manual canvas + // restart was the only recovery path. + h.provisionWorkspaceAutoSync(workspaceID, "", nil, payload) // sendRestartContext is a one-way notification to the new container; safe // to fire async — the next restart cycle won't depend on it completing. go h.sendRestartContext(workspaceID, restartData)