forked from molecule-ai/molecule-core
fix(sweeper): emit WORKSPACE_PROVISION_FAILED so canvas updates UI
The provision-timeout sweeper was emitting a new WORKSPACE_PROVISION_TIMEOUT event type, but the canvas event handler (canvas-events.ts:234) only has a case for WORKSPACE_PROVISION_FAILED — the sweep's event fell through silently. DB was being marked 'failed' but the UI stayed on 'starting' indefinitely until the user hard-refreshed. Reusing the existing event name keeps the UI reaction uniform across both fail paths (runtime-crash via bootstrap-watcher and boot-timeout via sweeper). Operators who need to distinguish can read the `source` payload field — "bootstrap_watcher" vs "provision_timeout_sweep". Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
7158f8f01c
commit
ec52d155f4
@ -127,9 +127,15 @@ func sweepStuckProvisioning(ctx context.Context, emitter ProvisionTimeoutEmitter
|
||||
continue
|
||||
}
|
||||
log.Printf("Provision-timeout sweep: %s stuck in provisioning > %s — marked failed", id, timeout)
|
||||
if emitErr := emitter.RecordAndBroadcast(ctx, "WORKSPACE_PROVISION_TIMEOUT", id, map[string]interface{}{
|
||||
"error": msg,
|
||||
"timeout_secs": timeoutSec,
|
||||
// Emit as WORKSPACE_PROVISION_FAILED, not _TIMEOUT, because the
|
||||
// canvas event handler only flips node state on the _FAILED case.
|
||||
// A separate event type was considered but the UI reaction is
|
||||
// identical either way — operators who need to distinguish can
|
||||
// tell from the `source` payload field.
|
||||
if emitErr := emitter.RecordAndBroadcast(ctx, "WORKSPACE_PROVISION_FAILED", id, map[string]interface{}{
|
||||
"error": msg,
|
||||
"timeout_secs": timeoutSec,
|
||||
"source": "provision_timeout_sweep",
|
||||
}); emitErr != nil {
|
||||
log.Printf("Provision-timeout sweep: broadcast failed for %s: %v", id, emitErr)
|
||||
}
|
||||
|
||||
@ -58,8 +58,8 @@ func TestSweepStuckProvisioning_FlipsOverdue(t *testing.T) {
|
||||
if emit.count() != 1 {
|
||||
t.Fatalf("expected 1 event, got %d", emit.count())
|
||||
}
|
||||
if emit.events[0].Type != "WORKSPACE_PROVISION_TIMEOUT" {
|
||||
t.Errorf("event type = %q, want WORKSPACE_PROVISION_TIMEOUT", emit.events[0].Type)
|
||||
if emit.events[0].Type != "WORKSPACE_PROVISION_FAILED" {
|
||||
t.Errorf("event type = %q, want WORKSPACE_PROVISION_FAILED", emit.events[0].Type)
|
||||
}
|
||||
if emit.events[0].WorkspaceID != "ws-stuck" {
|
||||
t.Errorf("workspace id = %q, want ws-stuck", emit.events[0].WorkspaceID)
|
||||
@ -72,7 +72,7 @@ func TestSweepStuckProvisioning_FlipsOverdue(t *testing.T) {
|
||||
// TestSweepStuckProvisioning_RaceSafe covers the case where UPDATE affects
|
||||
// 0 rows because the workspace flipped to online (or got restarted) between
|
||||
// the SELECT and the UPDATE. We should skip the event, not emit a false
|
||||
// WORKSPACE_PROVISION_TIMEOUT.
|
||||
// WORKSPACE_PROVISION_FAILED.
|
||||
func TestSweepStuckProvisioning_RaceSafe(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user