fix(workspace-server): skip provision pipeline on Restart for runtime=external

POST /workspaces/:id/restart on a runtime=external workspace ran the full
re-provision pipeline (Stop → provisionWorkspace*), which calls
issueAndInjectToken → RevokeAllForWorkspace. For external workspaces
(operator-driven, no container/EC2) that silently destroyed the operator's
local bearer token on every "Restart" click in the canvas — the local
poller would then 401-spam against /activity until the operator manually
regenerated from the Tokens tab.

The auto-restart path (runRestartCycle, line 436) already short-circuits
runtime=external. This patch mirrors that for the manual handler so the
two paths agree, and surfaces a 200 OK with a clear message so the
canvas can tell the operator the fix is on their side rather than
silently no-op'ing.

Test coverage: TestRestartHandler_ExternalRuntimeNoOps asserts the
short-circuit fires *before* any DB write or provision call. sqlmock's
"unexpected query" failure mode would catch a regression that
re-introduced the token revoke or the status=provisioning UPDATE.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-04-30 15:08:48 -07:00
parent c68ec23d3c
commit 36e263a07d
2 changed files with 76 additions and 0 deletions

View File

@ -89,6 +89,27 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) {
return
}
// runtime=external: the workspace has no Docker container or EC2 — its
// lifecycle is operator-driven (a remote poller heartbeats from outside
// the platform). Pre-fix, this handler still ran the full re-provision
// pipeline, which calls issueAndInjectToken → RevokeAllForWorkspace.
// That silently destroyed the operator's local bearer token on every
// "Restart" click, leaving them with a 401-spamming poller and no
// platform-side recovery path short of regenerating from the canvas
// Tokens tab. Auto-restart already short-circuits external (see
// runRestartCycle below). Mirror that here so manual + automatic
// behavior agree, and surface a clear message instead of silently
// no-op'ing — the canvas can show the operator that the fix is on
// their side.
if dbRuntime == "external" {
c.JSON(http.StatusOK, gin.H{
"status": "noop",
"runtime": "external",
"message": "external workspaces are operator-driven — restart your local poller; platform has nothing to restart",
})
return
}
// SaaS mode: cpProv handles workspace EC2 lifecycle. Self-hosted mode:
// provisioner handles local Docker containers. At least one must be
// available — previously only `provisioner` was checked, which broke

View File

@ -124,6 +124,61 @@ func TestRestartHandler_AncestorPausedBlocksRestart(t *testing.T) {
}
}
func TestRestartHandler_ExternalRuntimeNoOps(t *testing.T) {
// Manual Restart on a runtime=external workspace must short-circuit:
// no Stop, no provision, no token revoke. Pre-fix, this path ran the
// full re-provision pipeline and silently revoked the operator's
// bearer token on every click.
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectQuery("SELECT status, name, tier, COALESCE").
WithArgs("ws-external").
WillReturnRows(sqlmock.NewRows([]string{"status", "name", "tier", "runtime"}).
AddRow("offline", "External Agent", 1, "external"))
// isParentPaused: no parent
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs("ws-external").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}))
// No further expectations — Restart must NOT touch the DB or the
// provisioner from this point. sqlmock will fail the test if any
// unexpected query runs (UPDATE workspaces SET status=..., the
// RevokeAllForWorkspace DELETE, etc.).
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-external"}}
c.Request = httptest.NewRequest("POST", "/workspaces/ws-external/restart", nil)
handler.Restart(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("decode response: %v", err)
}
if got, _ := resp["status"].(string); got != "noop" {
t.Errorf("expected status=noop, got %v", resp["status"])
}
if got, _ := resp["runtime"].(string); got != "external" {
t.Errorf("expected runtime=external, got %v", resp["runtime"])
}
if msg, _ := resp["message"].(string); !strings.Contains(msg, "operator-driven") {
t.Errorf("expected message about operator-driven, got %v", resp["message"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations (or unexpected queries — token revoke or status update would trigger this): %v", err)
}
}
func TestRestartHandler_NilProvisionerReturns503(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)