Merge pull request #2847 from Molecule-AI/feat/2799-phase2b-runrestart-cycle-1777960000

feat(handlers): provisionWorkspaceAutoSync + Site 4 migration — #2799 Phase 2 PR-B
This commit is contained in:
Hongming Wang 2026-05-05 06:53:18 +00:00 committed by GitHub
commit 21a7e7b0e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 133 additions and 17 deletions

View File

@ -170,6 +170,38 @@ func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath stri
return false
}
// provisionWorkspaceAutoSync is the synchronous variant of
// provisionWorkspaceAuto — it BLOCKS in the current goroutine until the
// per-backend provision body returns, instead of spawning a goroutine.
//
// Used by callers that need to coordinate stop+provision as a pair and
// can't return until provision is done — today that's runRestartCycle
// (auto-restart cycle's pending-flag loop relies on synchronous return
// to know when it's safe to start the next cycle without racing the
// in-flight provision goroutine on the next iteration's Stop call).
//
// Backend selection + no-backend fallback are identical to
// provisionWorkspaceAuto. The only difference is the goroutine wrapper.
// Keep these two helpers in sync — when one grows a new arm (third
// backend, retry semantics), the other should too.
func (h *WorkspaceHandler) provisionWorkspaceAutoSync(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
if h.cpProv != nil {
h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
return true
}
if h.provisioner != nil {
h.provisionWorkspace(workspaceID, templatePath, configFiles, payload)
return true
}
log.Printf("provisionWorkspaceAutoSync: no provisioning backend wired for %s — marking failed (cpProv=nil, provisioner=nil)", workspaceID)
failCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
h.markProvisionFailed(failCtx, workspaceID,
"no provisioning backend available — workspace requires either a Docker daemon (self-hosted) or control-plane provisioner (SaaS)",
nil)
return false
}
// StopWorkspaceAuto picks the backend (CP for SaaS, local Docker for
// self-hosted) and stops the workspace synchronously. Returns nil when
// neither backend is wired (a workspace nobody is running can't be

View File

@ -808,6 +808,92 @@ func TestResumeHandler_UsesProvisionWorkspaceAuto(t *testing.T) {
}
}
// TestProvisionWorkspaceAutoSync_RoutesToCPWhenSet — sync variant of the
// provision dispatcher used by runRestartCycle. CP path runs synchronously
// (no goroutine wrapper). Verified via the same trackingCPProv stub as
// the async tests; the absence of `go` semantics is the load-bearing
// distinction we're pinning.
func TestProvisionWorkspaceAutoSync_RoutesToCPWhenSet(t *testing.T) {
mock := setupTestDB(t)
mock.MatchExpectationsInOrder(false)
// provisionWorkspaceCP runs prepareProvisionContext synchronously, which
// hits secrets selects + the markProvisionFailed UPDATE when CP.Start
// returns an error. We allow these calls without strictly asserting
// counts — the goal here is to assert the routing branch was taken.
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`).
WithArgs(sqlmock.AnyArg()).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
mock.ExpectExec(`UPDATE workspaces SET status =`).
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
rec := &trackingCPProv{startErr: errors.New("simulated CP rejection")}
bcast := &concurrentSafeBroadcaster{}
h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir())
h.SetCPProvisioner(rec)
wsID := "ws-sync-routes-cp"
ok := h.provisionWorkspaceAutoSync(wsID, "", nil, models.CreateWorkspacePayload{
Name: "sync-test", Tier: 1, Runtime: "claude-code",
})
if !ok {
t.Fatalf("expected provisionWorkspaceAutoSync to return true with CP wired")
}
// Synchronous: the call returns AFTER cpProv.Start has been invoked.
// No deadline-poll loop needed.
got := rec.startedSnapshot()
if len(got) != 1 || got[0] != wsID {
t.Errorf("expected cpProv.Start invoked once with %q, got %v", wsID, got)
}
}
// TestProvisionWorkspaceAutoSync_NoBackendMarksFailed — sync variant
// uses the same no-backend fallback as the async dispatcher: returns
// false + marks failed. Pinning this so the two helpers stay
// behaviorally identical except for the goroutine wrapper.
func TestProvisionWorkspaceAutoSync_NoBackendMarksFailed(t *testing.T) {
mock := setupTestDB(t)
mock.MatchExpectationsInOrder(false)
mock.ExpectExec(`UPDATE workspaces SET status =`).
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
bcast := &concurrentSafeBroadcaster{}
h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir())
ok := h.provisionWorkspaceAutoSync("ws-sync-noback", "", nil, models.CreateWorkspacePayload{
Name: "sync-test", Tier: 1, Runtime: "claude-code",
})
if ok {
t.Fatalf("expected provisionWorkspaceAutoSync to return false with no backend wired")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("expected markProvisionFailed UPDATE to fire on no-backend path: %v", err)
}
}
// TestRunRestartCycle_UsesProvisionWorkspaceAutoSync — source-level pin
// that runRestartCycle (Site 4) routes through the sync dispatcher
// instead of inlining the if-cpProv-else dispatch. Phase 2 PR-B of
// #2799 migrated this site.
func TestRunRestartCycle_UsesProvisionWorkspaceAutoSync(t *testing.T) {
wd, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
src, err := os.ReadFile(filepath.Join(wd, "workspace_restart.go"))
if err != nil {
t.Fatalf("read workspace_restart.go: %v", err)
}
stripped := stripGoComments(src)
if !bytes.Contains(stripped, []byte("h.provisionWorkspaceAutoSync(workspaceID")) {
t.Errorf("workspace_restart.go must call provisionWorkspaceAutoSync from runRestartCycle — current code does not. " +
"Phase 2 PR-B of #2799 migrated this site; do not regress to the inline if-cpProv-else dispatch.")
}
}
// stripGoComments removes // line comments and /* */ block comments
// from Go source. Imperfect (doesn't handle comments-inside-strings)
// but adequate for the source-level pin tests in this file — none of

View File

@ -553,24 +553,22 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) {
restartData := loadRestartContextData(ctx, workspaceID)
// On auto-restart, do NOT re-apply templates — preserve existing config volume.
// SYNCHRONOUS provisionWorkspace: returns when the new container is up
// (or has failed). The outer loop relies on this to know when it's safe
// to start another restart cycle without racing this one's Stop call.
// provisionWorkspaceAutoSync is the SYNCHRONOUS dispatcher (mirrors
// provisionWorkspaceAuto but blocks instead of spawning a goroutine):
// returns when the new container is up (or has failed). The outer
// pending-flag loop in RestartByID relies on this to know when it's
// safe to start another restart cycle without racing this one's
// Stop call.
//
// Branch on which provisioner is wired — same dispatch as the other call
// sites in this package (workspace.go:431-433, workspace_restart.go:197+596).
// Pre-fix this only called the Docker variant, so on SaaS the auto-restart
// cycle would NPE inside provisionWorkspace's `h.provisioner.VolumeHasFile`
// call, get swallowed by coalesceRestart's recover()-without-re-raise (a
// platform-stability safeguard), and leave the workspace permanently
// stuck in status='provisioning' (the UPDATE above already ran). User-
// observable result before this fix on SaaS: dead workspace → manual
// canvas restart was the only recovery path.
if h.cpProv != nil {
h.provisionWorkspaceCP(workspaceID, "", nil, payload)
} else {
h.provisionWorkspace(workspaceID, "", nil, payload)
}
// Pre-2026-05-05 this site inlined the if-cpProv-else dispatch. On
// SaaS the cycle would NPE inside provisionWorkspace's
// `h.provisioner.VolumeHasFile` call, get swallowed by
// coalesceRestart's recover()-without-re-raise (a platform-stability
// safeguard), and leave the workspace permanently stuck in
// status='provisioning' (the UPDATE above already ran). User-
// observable result on SaaS pre-fix: dead workspace → manual canvas
// restart was the only recovery path.
h.provisionWorkspaceAutoSync(workspaceID, "", nil, payload)
// sendRestartContext is a one-way notification to the new container; safe
// to fire async — the next restart cycle won't depend on it completing.
go h.sendRestartContext(workspaceID, restartData)