From b3ef8d5ad76e874fb8a9fe84304781d7557e561c Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Wed, 10 Jun 2026 23:32:28 +0000 Subject: [PATCH] =?UTF-8?q?fix(registry):=20heartbeat=20promotes=20provisi?= =?UTF-8?q?oning=20=E2=86=92=20online=20atomically=20(#2500)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The provisioning→online promotion previously lived only in Register's ON CONFLICT upsert and in a separate evaluateStatus query. If Register never succeeded (401 token-gate / 400 push-URL / unreachable PLATFORM_URL), the workspace stayed stuck 'provisioning' even though it was heartbeating (which proves reachability + valid token). Extend both heartbeat UPDATE branches with: status = CASE WHEN status = 'provisioning' THEN 'online' ELSE status END This makes the promotion atomic with the heartbeat write, closing the stuck-provisioning gap regardless of why Register didn't flip. Also updates heartbeatUpdateSQL in integration tests to mirror the new query shape, and adds two integration tests: - Heartbeat on 'provisioning' row → promotes to 'online' - Heartbeat on 'online' row → status unchanged Refs #2500 --- .../internal/handlers/registry.go | 2 + .../registry_auth_integration_test.go | 40 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/workspace-server/internal/handlers/registry.go b/workspace-server/internal/handlers/registry.go index c96fb6d7a..966698e39 100644 --- a/workspace-server/internal/handlers/registry.go +++ b/workspace-server/internal/handlers/registry.go @@ -702,6 +702,7 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) { uptime_seconds = $5, current_task = $6, monthly_spend = $7, + status = CASE WHEN status = 'provisioning' THEN 'online' ELSE status END, updated_at = now() WHERE id = $1 AND status != 'removed' `, payload.WorkspaceID, payload.ErrorRate, payload.SampleError, @@ -716,6 +717,7 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) { active_tasks = $4, uptime_seconds = $5, current_task = $6, + status = CASE WHEN status = 'provisioning' THEN 'online' ELSE status END, updated_at = now() WHERE id = $1 AND status != 'removed' `, payload.WorkspaceID, payload.ErrorRate, payload.SampleError, diff --git a/workspace-server/internal/handlers/registry_auth_integration_test.go b/workspace-server/internal/handlers/registry_auth_integration_test.go index d1f1398f4..17f45d74f 100644 --- a/workspace-server/internal/handlers/registry_auth_integration_test.go +++ b/workspace-server/internal/handlers/registry_auth_integration_test.go @@ -197,6 +197,7 @@ const registerUpsertSQL = ` const heartbeatUpdateSQL = ` UPDATE workspaces SET last_heartbeat_at = now(), + status = CASE WHEN status = 'provisioning' THEN 'online' ELSE status END, updated_at = now() WHERE id = $1 AND status != 'removed' ` @@ -285,6 +286,45 @@ func TestIntegration_RegistryRowState_HeartbeatUpdatesLiveWorkspace(t *testing.T } } +func TestIntegration_RegistryRowState_HeartbeatPromotesProvisioningToOnline(t *testing.T) { + conn := integrationAuthDB(t) + ctx := context.Background() + + id := insertWorkspace(t, conn, "provisioning-ws", "provisioning", "") + + if _, err := conn.ExecContext(ctx, heartbeatUpdateSQL, id); err != nil { + t.Fatalf("heartbeat update: %v", err) + } + + if got := statusOf(t, conn, id); got != "online" { + t.Fatalf("provisioning workspace not promoted to online by heartbeat: status=%q, want 'online'", got) + } + + var hb sql.NullTime + if err := conn.QueryRowContext(ctx, + `SELECT last_heartbeat_at FROM workspaces WHERE id = $1`, id).Scan(&hb); err != nil { + t.Fatalf("read last_heartbeat_at: %v", err) + } + if !hb.Valid { + t.Fatalf("provisioning workspace heartbeat did NOT bump last_heartbeat_at") + } +} + +func TestIntegration_RegistryRowState_HeartbeatProvisioningAlreadyOnlineUnchanged(t *testing.T) { + conn := integrationAuthDB(t) + ctx := context.Background() + + id := insertWorkspace(t, conn, "online-ws", "online", "") + + if _, err := conn.ExecContext(ctx, heartbeatUpdateSQL, id); err != nil { + t.Fatalf("heartbeat update: %v", err) + } + + if got := statusOf(t, conn, id); got != "online" { + t.Fatalf("online workspace status changed unexpectedly by heartbeat: status=%q, want 'online'", got) + } +} + // --------------------------------------------------------------------------- // 2 — wsauth.ValidateToken A↔B binding (the cross-tenant non-leak boundary). // -- 2.52.0