diff --git a/.github/workflows/canary-staging.yml b/.github/workflows/canary-staging.yml index c2a4d7f4..b040b196 100644 --- a/.github/workflows/canary-staging.yml +++ b/.github/workflows/canary-staging.yml @@ -302,7 +302,7 @@ jobs: -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \ -H "Authorization: Bearer $ADMIN_TOKEN" \ -H "Content-Type: application/json" \ - -d "{\"confirm\":\"$slug\"}" >/tmp/canary-cleanup.code 2>/dev/null + -d "{\"confirm\":\"$slug\"}" >/tmp/canary-cleanup.code set -e code=$(cat /tmp/canary-cleanup.code 2>/dev/null || echo "000") if [ "$code" = "200" ] || [ "$code" = "204" ]; then diff --git a/.github/workflows/e2e-staging-canvas.yml b/.github/workflows/e2e-staging-canvas.yml index 1c5b15ff..0bc152df 100644 --- a/.github/workflows/e2e-staging-canvas.yml +++ b/.github/workflows/e2e-staging-canvas.yml @@ -199,7 +199,7 @@ jobs: -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \ -H "Authorization: Bearer $ADMIN_TOKEN" \ -H "Content-Type: application/json" \ - -d "{\"confirm\":\"$slug\"}" >/tmp/canvas-cleanup.code 2>/dev/null + -d "{\"confirm\":\"$slug\"}" >/tmp/canvas-cleanup.code set -e code=$(cat /tmp/canvas-cleanup.code 2>/dev/null || echo "000") if [ "$code" = "200" ] || [ "$code" = "204" ]; then diff --git a/.github/workflows/e2e-staging-external.yml b/.github/workflows/e2e-staging-external.yml index 923423a1..acd9cef2 100644 --- a/.github/workflows/e2e-staging-external.yml +++ b/.github/workflows/e2e-staging-external.yml @@ -166,7 +166,7 @@ jobs: -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \ -H "Authorization: Bearer $ADMIN_TOKEN" \ -H "Content-Type: application/json" \ - -d "{\"confirm\":\"$slug\"}" >/tmp/external-cleanup.code 2>/dev/null + -d "{\"confirm\":\"$slug\"}" >/tmp/external-cleanup.code set -e code=$(cat /tmp/external-cleanup.code 2>/dev/null || echo "000") if [ "$code" = "200" ] || [ "$code" = "204" ]; then diff --git a/.github/workflows/e2e-staging-saas.yml b/.github/workflows/e2e-staging-saas.yml index aab10b96..bab88409 100644 --- a/.github/workflows/e2e-staging-saas.yml +++ b/.github/workflows/e2e-staging-saas.yml @@ -231,7 +231,7 @@ jobs: -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \ -H "Authorization: Bearer $ADMIN_TOKEN" \ -H "Content-Type: application/json" \ - -d "{\"confirm\":\"$slug\"}" >/tmp/saas-cleanup.code 2>/dev/null + -d "{\"confirm\":\"$slug\"}" >/tmp/saas-cleanup.code set -e code=$(cat /tmp/saas-cleanup.code 2>/dev/null || echo "000") if [ "$code" = "200" ] || [ "$code" = "204" ]; then diff --git a/.github/workflows/e2e-staging-sanity.yml b/.github/workflows/e2e-staging-sanity.yml index a89821a7..bedf4ed5 100644 --- a/.github/workflows/e2e-staging-sanity.yml +++ b/.github/workflows/e2e-staging-sanity.yml @@ -155,7 +155,7 @@ jobs: -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \ -H "Authorization: Bearer $ADMIN_TOKEN" \ -H "Content-Type: application/json" \ - -d "{\"confirm\":\"$slug\"}" >/tmp/sanity-cleanup.code 2>/dev/null + -d "{\"confirm\":\"$slug\"}" >/tmp/sanity-cleanup.code set -e code=$(cat /tmp/sanity-cleanup.code 2>/dev/null || echo "000") if [ "$code" = "200" ] || [ "$code" = "204" ]; then diff --git a/.github/workflows/redeploy-tenants-on-main.yml b/.github/workflows/redeploy-tenants-on-main.yml index 7d68a6d9..f862c487 100644 --- a/.github/workflows/redeploy-tenants-on-main.yml +++ b/.github/workflows/redeploy-tenants-on-main.yml @@ -200,8 +200,11 @@ jobs: -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \ -H "Content-Type: application/json" \ -X POST "$CP_URL/cp/admin/tenants/redeploy-fleet" \ - -d "$BODY" >"$HTTP_CODE_FILE" 2>/dev/null + -d "$BODY" >"$HTTP_CODE_FILE" set -e + # Stderr from curl (e.g. dial errors with -sS) goes to the runner + # log so operators can see WHY a connection failed. Stdout is + # captured to $HTTP_CODE_FILE because that's where -w writes. HTTP_CODE=$(cat "$HTTP_CODE_FILE" 2>/dev/null || echo "000") [ -z "$HTTP_CODE" ] && HTTP_CODE="000" diff --git a/.github/workflows/redeploy-tenants-on-staging.yml b/.github/workflows/redeploy-tenants-on-staging.yml index 433df238..e0d69544 100644 --- a/.github/workflows/redeploy-tenants-on-staging.yml +++ b/.github/workflows/redeploy-tenants-on-staging.yml @@ -160,8 +160,10 @@ jobs: -H "Authorization: Bearer $CP_STAGING_ADMIN_API_TOKEN" \ -H "Content-Type: application/json" \ -X POST "$CP_URL/cp/admin/tenants/redeploy-fleet" \ - -d "$BODY" >"$HTTP_CODE_FILE" 2>/dev/null + -d "$BODY" >"$HTTP_CODE_FILE" set -e + # Stderr from curl (-sS shows dial errors etc.) goes to the + # runner log so operators can see WHY a connection failed. HTTP_CODE=$(cat "$HTTP_CODE_FILE" 2>/dev/null || echo "000") [ -z "$HTTP_CODE" ] && HTTP_CODE="000" diff --git a/.github/workflows/sweep-stale-e2e-orgs.yml b/.github/workflows/sweep-stale-e2e-orgs.yml index 1531f117..d477eee3 100644 --- a/.github/workflows/sweep-stale-e2e-orgs.yml +++ b/.github/workflows/sweep-stale-e2e-orgs.yml @@ -167,8 +167,9 @@ jobs: -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \ -H "Authorization: Bearer $ADMIN_TOKEN" \ -H "Content-Type: application/json" \ - -d "{\"confirm\":\"$slug\"}" >/tmp/del_code 2>/dev/null + -d "{\"confirm\":\"$slug\"}" >/tmp/del_code set -e + # Stderr from curl (-sS shows dial errors etc.) goes to runner log. http_code=$(cat /tmp/del_code 2>/dev/null || echo "000") if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then deleted=$((deleted+1)) diff --git a/workspace-server/internal/handlers/a2a_proxy_helpers.go b/workspace-server/internal/handlers/a2a_proxy_helpers.go index 355af23f..10e9efc6 100644 --- a/workspace-server/internal/handlers/a2a_proxy_helpers.go +++ b/workspace-server/internal/handlers/a2a_proxy_helpers.go @@ -163,7 +163,7 @@ func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspace if wsRuntime == "external" { return false } - if h.provisioner == nil && h.cpProv == nil { + if !h.HasProvisioner() { return false } diff --git a/workspace-server/internal/handlers/org_import.go b/workspace-server/internal/handlers/org_import.go index 2e698f18..8b06a157 100644 --- a/workspace-server/internal/handlers/org_import.go +++ b/workspace-server/internal/handlers/org_import.go @@ -175,8 +175,18 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_ONLINE", id, map[string]interface{}{ "name": ws.Name, "external": true, }) - } else if h.provisioner != nil { - // Provision container + } else if h.workspace.HasProvisioner() { + // Provision container — either backend (CP for SaaS, local Docker + // for self-hosted) is fine. Pre-2026-05-05 this gate was + // `h.provisioner != nil`, which only checked the Docker pointer + // and silently dropped every workspace on a SaaS tenant: the prep + // block was skipped, no Auto call ever fired, and the row sat in + // 'provisioning' until the 600s sweeper marked it failed with the + // misleading "container started but never called /registry/register" + // (incident: hongming tenant org-import 2026-05-05 01:14, 7-of-7 + // claude-code workspaces stuck). Routing to the right backend + // happens inside provisionWorkspaceAuto — this gate just decides + // whether to do prep at all. payload := models.CreateWorkspacePayload{ Name: ws.Name, Tier: tier, Runtime: runtime, Model: model, WorkspaceDir: ws.WorkspaceDir, diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index f6fef476..27051e23 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -112,21 +112,43 @@ func (h *WorkspaceHandler) SetCPProvisioner(cp provisioner.CPProvisionerAPI) { h.cpProv = cp } +// HasProvisioner reports whether either backend (CP or local Docker) is +// wired. Callers that gate prep-work on "do we have something that can +// provision a container?" should use this rather than direct field access +// to either provisioner — those individual checks miss the SaaS path +// (cpProv set, provisioner nil) or the self-hosted path (provisioner set, +// cpProv nil) symmetrically. Org-import + future bulk paths gate their +// template/config/secret prep on this so the work isn't wasted on +// deployments where no backend is available. +func (h *WorkspaceHandler) HasProvisioner() bool { + return h.cpProv != nil || h.provisioner != nil +} + // provisionWorkspaceAuto picks the backend (CP for SaaS, local Docker // for self-hosted) and starts provisioning in a goroutine. Returns true -// when a backend was kicked off, false when neither is wired (caller -// owns the persist-config + mark-failed surface in that case). +// when a backend was kicked off, false when neither is wired. // -// Centralized so every caller — Create, TeamHandler.Expand, future -// paths — gets the same routing. Pre-2026-05-04 TeamHandler.Expand -// hardcoded provisionWorkspace (Docker) and silently broke the -// "deploy a team on SaaS" flow: child workspace rows were created with -// no EC2 instance, the runtime never ran, and the 600s sweeper logged -// the misleading "container started but never called /registry/register". +// Single source of truth for "start provisioning a workspace" across +// every caller (Create, OrgHandler.createWorkspaceTree, TeamHandler.Expand, +// future paths). Centralized routing here means callers don't repeat +// the "Docker vs CP" decision and can't drift on it. +// +// Self-marks-failed on the no-backend path: pre-2026-05-05 the false +// return was silent, and any caller that forgot to handle it (TeamHandler +// pre-#2367, OrgHandler.createWorkspaceTree pre-this-fix) silently +// dropped workspaces — they sat in 'provisioning' for 10 min until the +// sweeper marked them failed with the misleading "container started but +// never called /registry/register" message. Marking failed inside Auto +// closes that class: even if a future caller bypasses HasProvisioner +// gating or ignores the bool return, the workspace ends in a clean +// failed state with an actionable error message. // // Architectural principle: templates own runtime/config/prompts/files/ // plugins; the platform owns where it runs. Anything that picks -// between CP and local Docker belongs in this one helper. +// between CP and local Docker belongs in this one helper. Anything +// post-routing-but-pre-Start (mint secrets, render template, etc.) +// lives in prepareProvisionContext (shared by both per-backend +// goroutines). func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool { if h.cpProv != nil { go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload) @@ -136,6 +158,15 @@ func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath stri go h.provisionWorkspace(workspaceID, templatePath, configFiles, payload) return true } + // No backend wired — mark failed so the workspace doesn't linger in + // 'provisioning' for the full 10-minute sweep window. 10s is enough + // for the broadcast + single UPDATE inside markProvisionFailed. + log.Printf("provisionWorkspaceAuto: no provisioning backend wired for %s — marking failed (cpProv=nil, provisioner=nil)", workspaceID) + failCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + h.markProvisionFailed(failCtx, workspaceID, + "no provisioning backend available — workspace requires either a Docker daemon (self-hosted) or control-plane provisioner (SaaS)", + nil) return false } @@ -565,29 +596,22 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { } // Auto-provision — pick backend: control plane (SaaS) or Docker (self-hosted). - // Routing is centralized in provisionWorkspaceAuto so every caller - // (Create, TeamHandler.Expand, future paths) gets the same backend - // selection. Pre-2026-05-04 the team-deploy path hardcoded the - // Docker route, so on a SaaS tenant 7-of-7 sub-agents were created - // as DB rows but had no EC2 — symptom: "container started but never - // called /registry/register" + diagnose returns "docker client not - // configured". Centralizing here closes that drift class. + // Routing AND the no-backend mark-failed path are both inside + // provisionWorkspaceAuto (single source of truth). The Create-specific + // extra is the workspace_config UPSERT below: when no backend is + // wired, Auto marks the row failed but doesn't persist the bare + // runtime/model/tier as JSON — the Config tab needs that to render + // even on failed workspaces, so Create owns this Create-only side + // effect rather than coupling Auto to a UI concern. if !h.provisionWorkspaceAuto(id, templatePath, configFiles, payload) { - // No Docker available (SaaS tenant). Persist basic config as JSON - // so the Config tab shows the correct runtime/model/name. Then mark - // the workspace as failed with a clear message. cfgJSON := fmt.Sprintf(`{"name":%q,"runtime":%q,"tier":%d,"template":%q}`, payload.Name, payload.Runtime, payload.Tier, payload.Template) - db.DB.ExecContext(ctx, ` + if _, err := db.DB.ExecContext(ctx, ` INSERT INTO workspace_config (workspace_id, data) VALUES ($1, $2::jsonb) ON CONFLICT (workspace_id) DO UPDATE SET data = $2::jsonb - `, id, cfgJSON) - db.DB.ExecContext(ctx, - `UPDATE workspaces SET status = $1, last_sample_error = 'Docker not available — workspace containers require a Docker daemon or external provisioning.', updated_at = now() WHERE id = $2`, models.StatusFailed, id) - h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISION_FAILED", id, map[string]interface{}{ - "error": "Docker not available on this platform instance", - }) - log.Printf("Create: no Docker daemon — workspace %s config persisted, marked failed", id) + `, id, cfgJSON); err != nil { + log.Printf("Create: workspace_config persist failed for %s: %v", id, err) + } } c.JSON(http.StatusCreated, gin.H{ diff --git a/workspace-server/internal/handlers/workspace_provision_auto_test.go b/workspace-server/internal/handlers/workspace_provision_auto_test.go index 9dd4fc51..9198e8f6 100644 --- a/workspace-server/internal/handlers/workspace_provision_auto_test.go +++ b/workspace-server/internal/handlers/workspace_provision_auto_test.go @@ -67,12 +67,27 @@ func (r *trackingCPProv) startedSnapshot() []string { return out } -// TestProvisionWorkspaceAuto_NoBackendReturnsFalse — when neither -// cpProv nor provisioner is wired, the dispatcher returns false so the -// caller knows it must own the persist + mark-failed path. Pre-fix, -// TeamHandler had no equivalent fallback at all and silently dropped -// children on the floor. -func TestProvisionWorkspaceAuto_NoBackendReturnsFalse(t *testing.T) { +// TestProvisionWorkspaceAuto_NoBackendMarksFailed — when neither cpProv +// nor provisioner is wired, the dispatcher must: +// 1. Return false (so the caller can do its own extra cleanup if +// needed — Create persists workspace_config for the Config tab). +// 2. Mark the workspace failed via markProvisionFailed (defense in +// depth: if a future caller bypasses the bool return, the workspace +// still doesn't sit stuck in 'provisioning' for 10 min until the +// sweeper fires). +// +// Pre-2026-05-05 the false return was silent and TeamHandler / +// OrgHandler.createWorkspaceTree dropped workspaces on the floor when +// they ignored it. This test pins the new contract that Auto owns the +// failed-mark on no-backend. +func TestProvisionWorkspaceAuto_NoBackendMarksFailed(t *testing.T) { + mock := setupTestDB(t) + mock.MatchExpectationsInOrder(false) + // markProvisionFailed does a single UPDATE workspaces ... SET status='failed'. + mock.ExpectExec(`UPDATE workspaces SET status =`). + WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) + bcast := &concurrentSafeBroadcaster{} h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir()) // Do NOT call SetCPProvisioner — both backends nil. @@ -83,6 +98,9 @@ func TestProvisionWorkspaceAuto_NoBackendReturnsFalse(t *testing.T) { if ok { t.Fatalf("expected provisionWorkspaceAuto to return false with no backend wired") } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("expected markProvisionFailed UPDATE to fire on no-backend path: %v", err) + } } // TestProvisionWorkspaceAuto_RoutesToCPWhenSet — when cpProv is set @@ -285,3 +303,132 @@ func TestOrgImport_UsesAutoNotDirectDockerPath(t *testing.T) { t.Errorf("org_import.go must call h.workspace.provisionWorkspaceAuto for child provisioning — current code does not") } } + +// TestHasProvisioner_TrueOnCPOnly — SaaS tenants run with cpProv set and +// the local Docker provisioner nil. HasProvisioner must report true so +// gate-y callers (org-import prep block) don't skip provisioning. +// +// Pre-2026-05-05 the org-import gate checked `h.provisioner != nil` +// directly — false on SaaS — and the entire provisioning prep block was +// skipped. The Auto call inside the block was unreachable; PR #2798's +// "route through Auto" fix didn't help because the gate fired earlier. +// Symptom: 7-workspace org-import on hongming sat in 'provisioning' for +// the full 10-minute sweep window. +func TestHasProvisioner_TrueOnCPOnly(t *testing.T) { + bcast := &concurrentSafeBroadcaster{} + h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir()) + h.SetCPProvisioner(&trackingCPProv{}) + if !h.HasProvisioner() { + t.Errorf("HasProvisioner() == false with cpProv wired (Docker nil) — every gate that uses this would skip provisioning on SaaS, reproducing the hongming 7-workspace stuck-in-provisioning incident from 2026-05-05") + } +} + +// TestHasProvisioner_TrueOnDockerOnly — self-hosted operators run with +// the local Docker provisioner wired and cpProv nil. HasProvisioner must +// report true. +func TestHasProvisioner_TrueOnDockerOnly(t *testing.T) { + bcast := &concurrentSafeBroadcaster{} + // NewWorkspaceHandler guards the typed-nil-interface trap (workspace.go + // docstring) — pass a real *Provisioner stub via the test fixture + // rather than a nil pointer cast to the interface. + h := NewWorkspaceHandler(bcast, &provisioner.Provisioner{}, "http://localhost:8080", t.TempDir()) + if !h.HasProvisioner() { + t.Errorf("HasProvisioner() == false with Docker wired (cpProv nil) — would break self-hosted operators") + } +} + +// TestHasProvisioner_FalseWhenNeitherWired — misconfigured deployment +// with neither backend reachable. HasProvisioner must report false so +// the org-import prep block is skipped (no point doing template/secret +// prep work when nothing can run the resulting container). +func TestHasProvisioner_FalseWhenNeitherWired(t *testing.T) { + bcast := &concurrentSafeBroadcaster{} + h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir()) + if h.HasProvisioner() { + t.Errorf("HasProvisioner() == true with no backend wired — gate should short-circuit and not waste prep cycles") + } +} + +// TestNoBareBothNilCheck — source-level pin: any code that wants to ask +// "is no backend wired?" must use !HasProvisioner(), not the verbose +// `h.provisioner == nil && h.cpProv == nil` shape. Two reasons: +// +// 1. Single source of truth — when a third backend lands (k8s, +// containerd, whatever), HasProvisioner gets the new field added in +// one place. Bare both-nil checks each need to be hunted down. +// 2. Symmetry — easier to read `!h.HasProvisioner()` and know the +// intent than to mentally evaluate `nil && nil`. +// +// Allowed exception: workspace.go's HasProvisioner() definition itself. +// Test files are also exempt — assertions on internal field state are +// fine. +func TestNoBareBothNilCheck(t *testing.T) { + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + entries, err := os.ReadDir(wd) + if err != nil { + t.Fatalf("readdir: %v", err) + } + bareShapes := []string{ + "h.provisioner == nil && h.cpProv == nil", + "h.cpProv == nil && h.provisioner == nil", + } + for _, entry := range entries { + name := entry.Name() + if filepath.Ext(name) != ".go" { + continue + } + // Allow tests (legitimate field-state assertions). + if len(name) > len("_test.go") && + name[len(name)-len("_test.go"):] == "_test.go" { + continue + } + // workspace.go houses HasProvisioner's definition + can reference + // the fields directly — but with the !HasProvisioner() refactor + // it shouldn't contain the bare both-nil shape any more. + src, err := os.ReadFile(filepath.Join(wd, name)) + if err != nil { + t.Fatalf("read %s: %v", name, err) + } + for _, needle := range bareShapes { + if bytes.Contains(src, []byte(needle)) { + t.Errorf("%s contains bare `%s` — must use `!h.HasProvisioner()` for SSOT.", name, needle) + } + } + } +} + +// TestOrgImportGate_UsesHasProvisionerNotBareField — source-level pin +// for the org-import gate. Pre-fix the gate read `h.provisioner != nil`, +// which checked only the Docker pointer and silently dropped every +// SaaS workspace. The fix routes through HasProvisioner so both +// backends count. +// +// Substring match because the failure shape is "wrong field" — a plain +// text gate suffices, same rationale as TestTeamExpand_UsesAutoNotDirectDockerPath +// above. +func TestOrgImportGate_UsesHasProvisionerNotBareField(t *testing.T) { + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + src, err := os.ReadFile(filepath.Join(wd, "org_import.go")) + if err != nil { + t.Fatalf("read org_import.go: %v", err) + } + // The provisioning gate is the `else if ...` clause that follows the + // `if ws.External {` external-workspace branch. If org_import.go + // reintroduces a bare `h.provisioner` check there, every SaaS tenant + // silently drops org-imported workspaces again. Auto's nil check is + // the right routing layer; the gate just decides whether to do prep + // work at all, and HasProvisioner is the symmetric question. + if bytes.Contains(src, []byte("} else if h.provisioner != nil {")) { + t.Errorf("org_import.go gates the provisioning prep block on `h.provisioner != nil` (bare Docker check) — must use `h.workspace.HasProvisioner()` so SaaS tenants (cpProv set, provisioner nil) reach the Auto call. " + + "Repro: 2026-05-05 hongming org-import incident — 7 claude-code workspaces stuck in 'provisioning' for 10 min because the gate skipped the entire block on SaaS, hiding the Auto call PR #2798 introduced.") + } + if !bytes.Contains(src, []byte("h.workspace.HasProvisioner()")) { + t.Errorf("org_import.go must call h.workspace.HasProvisioner() in the provisioning gate — current code does not") + } +} diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index eef2fc64..9ccab037 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -115,7 +115,7 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) { // available — previously only `provisioner` was checked, which broke // restart entirely on every SaaS tenant (the workspace EC2 couldn't // be terminated + relaunched, the endpoint 503'd on every try). - if h.provisioner == nil && h.cpProv == nil { + if !h.HasProvisioner() { c.JSON(http.StatusServiceUnavailable, gin.H{"error": "provisioner not available"}) return } @@ -360,7 +360,7 @@ func (h *WorkspaceHandler) RestartByID(workspaceID string) { // reactive auto-restart on every SaaS tenant (where the local Docker // provisioner is intentionally nil). The runRestartCycle below now // branches on which one is set for the Stop call. - if h.provisioner == nil && h.cpProv == nil { + if !h.HasProvisioner() { return } coalesceRestart(workspaceID, func() { h.runRestartCycle(workspaceID) }) @@ -657,7 +657,7 @@ func (h *WorkspaceHandler) Resume(c *gin.Context) { // Accept either provisioner (Docker self-hosted OR CP SaaS). See the // same guard in Restart above for context — Resume previously 503'd // on every SaaS tenant. - if h.provisioner == nil && h.cpProv == nil { + if !h.HasProvisioner() { c.JSON(http.StatusServiceUnavailable, gin.H{"error": "provisioner not available"}) return }