From f1b2b521c480ff85dc895736c2679e5dedb8f649 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 23 May 2026 06:42:23 +0000 Subject: [PATCH 1/8] fix(registry): check rows.Err() after iteration in background sweeps sweepOnlineWorkspaces and sweepStaleRemoteWorkspaces (healthsweep.go) and sweep (provisiontimeout.go) iterated sql.Rows without calling rows.Err() after the loop. A mid-stream Postgres error would silently truncate the candidate list, causing missed offline-marking or missed provision-timeout failures. Co-Authored-By: Claude Opus 4.7 --- workspace-server/internal/registry/healthsweep.go | 6 ++++++ workspace-server/internal/registry/provisiontimeout.go | 3 +++ 2 files changed, 9 insertions(+) diff --git a/workspace-server/internal/registry/healthsweep.go b/workspace-server/internal/registry/healthsweep.go index fdeef4f96..e290d823a 100644 --- a/workspace-server/internal/registry/healthsweep.go +++ b/workspace-server/internal/registry/healthsweep.go @@ -93,6 +93,9 @@ func sweepOnlineWorkspaces(ctx context.Context, checker ContainerChecker, onOffl ids = append(ids, id) } } + if err := rows.Err(); err != nil { + log.Printf("Health sweep: rows error: %v", err) + } for _, id := range ids { running, err := checker.IsRunning(ctx, id) @@ -159,6 +162,9 @@ func sweepStaleRemoteWorkspaces(ctx context.Context, onOffline OfflineHandler) { ids = append(ids, id) } } + if err := rows.Err(); err != nil { + log.Printf("Health sweep: rows error: %v", err) + } for _, id := range ids { // External workspaces flip to 'awaiting_agent' (re-registrable diff --git a/workspace-server/internal/registry/provisiontimeout.go b/workspace-server/internal/registry/provisiontimeout.go index 46b9e1577..f5c2638b1 100644 --- a/workspace-server/internal/registry/provisiontimeout.go +++ b/workspace-server/internal/registry/provisiontimeout.go @@ -166,6 +166,9 @@ func sweepStuckProvisioning(ctx context.Context, emitter ProvisionTimeoutEmitter ids = append(ids, c) } } + if err := rows.Err(); err != nil { + log.Printf("Provision-timeout sweep: rows error: %v", err) + } for _, c := range ids { timeout := provisioningTimeoutFor(c.runtime, lookup) -- 2.52.0 From 77e878966ffddded7168d12cdb10efc8b1a3c484 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 23 May 2026 06:52:43 +0000 Subject: [PATCH 2/8] fix(handlers): add missing rows.Err() checks in restart and discovery paths Adds rows.Err() after rows.Next() loops in three handlers: - restart_context.go: global_secrets + workspace_secrets queries - workspace_restart.go: Pause/Resume descendant CTE queries - discovery.go: queryPeerMaps peer listing Also switches restart_context.go from inline rows.Close() to defer rows.Close() for panic safety (matches pattern in healthsweep.go). These close the remaining gaps from PR #1704 and #1708. Co-Authored-By: Claude Opus 4.7 --- workspace-server/internal/handlers/discovery.go | 3 +++ workspace-server/internal/handlers/restart_context.go | 10 ++++++++-- .../internal/handlers/workspace_restart.go | 6 ++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/workspace-server/internal/handlers/discovery.go b/workspace-server/internal/handlers/discovery.go index 1c639e2ea..9269290e6 100644 --- a/workspace-server/internal/handlers/discovery.go +++ b/workspace-server/internal/handlers/discovery.go @@ -393,6 +393,9 @@ func queryPeerMaps(query string, args ...interface{}) ([]map[string]interface{}, result = append(result, peer) } + if err := rows.Err(); err != nil { + log.Printf("queryPeerMaps rows.Err: %v", err) + } return result, nil } diff --git a/workspace-server/internal/handlers/restart_context.go b/workspace-server/internal/handlers/restart_context.go index 39022e7cb..9ec759df6 100644 --- a/workspace-server/internal/handlers/restart_context.go +++ b/workspace-server/internal/handlers/restart_context.go @@ -133,24 +133,30 @@ func loadRestartContextData(ctx context.Context, workspaceID string) restartCont // message bus. keySet := map[string]struct{}{} if rows, err := db.DB.QueryContext(ctx, `SELECT key FROM global_secrets`); err == nil { + defer rows.Close() for rows.Next() { var k string if rows.Scan(&k) == nil { keySet[k] = struct{}{} } } - rows.Close() + if err := rows.Err(); err != nil { + log.Printf("loadRestartContextData: global_secrets rows.Err: %v", err) + } } if rows, err := db.DB.QueryContext(ctx, `SELECT key FROM workspace_secrets WHERE workspace_id = $1`, workspaceID, ); err == nil { + defer rows.Close() for rows.Next() { var k string if rows.Scan(&k) == nil { keySet[k] = struct{}{} } } - rows.Close() + if err := rows.Err(); err != nil { + log.Printf("loadRestartContextData: workspace_secrets rows.Err: %v", err) + } } for k := range keySet { d.EnvKeys = append(d.EnvKeys, k) diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index 6ac3dc506..8de60838d 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -858,6 +858,9 @@ func (h *WorkspaceHandler) Pause(c *gin.Context) { toPause = append(toPause, struct{ id, name string }{cid, cname}) } } + if err := rows.Err(); err != nil { + log.Printf("Pause: descendant query rows.Err: %v", err) + } } // Stop containers and mark all as paused. StopWorkspaceAuto routes @@ -939,6 +942,9 @@ func (h *WorkspaceHandler) Resume(c *gin.Context) { toResume = append(toResume, ws) } } + if err := rows.Err(); err != nil { + log.Printf("Resume: descendant query rows.Err: %v", err) + } } // Re-provision all -- 2.52.0 From 2f4a1b2e620db2ecaeb3474be849c4c05b4f92e8 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 23 May 2026 07:02:07 +0000 Subject: [PATCH 3/8] fix(channels): add missing rows.Err() checks in manager reload and broadcast loops Adds rows.Err() after three for rows.Next() loops in channels/manager.go: - pausePollersForDiscovery - Reload - BroadcastToWorkspace Without these, mid-stream query errors silently truncate the channel list, leaving stale pollers running or skipping broadcast targets. Co-Authored-By: Claude Opus 4.7 --- workspace-server/internal/channels/manager.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/workspace-server/internal/channels/manager.go b/workspace-server/internal/channels/manager.go index 63cfe9503..3f5863eb2 100644 --- a/workspace-server/internal/channels/manager.go +++ b/workspace-server/internal/channels/manager.go @@ -156,6 +156,9 @@ func (m *Manager) PausePollersForToken(workspaceID, botToken string) func() { } } } + if err := rows.Err(); err != nil { + log.Printf("Channels: pause-pollers rows.Err: %v", err) + } m.mu.Unlock() if len(pausedIDs) == 0 { @@ -216,6 +219,9 @@ func (m *Manager) Reload(ctx context.Context) { } desired[ch.ID] = ch } + if err := rows.Err(); err != nil { + log.Printf("Channels: reload rows.Err: %v", err) + } m.mu.Lock() defer m.mu.Unlock() @@ -473,6 +479,9 @@ func (m *Manager) BroadcastToWorkspaceChannels(ctx context.Context, workspaceID, } } } + if err := rows.Err(); err != nil { + log.Printf("Channels: broadcast rows.Err: %v", err) + } } // FetchWorkspaceChannelContext returns recent Slack channel messages formatted -- 2.52.0 From 92f4bfaa8d45ecf7fe1f8703e1e02cfcca57c630 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 23 May 2026 07:08:15 +0000 Subject: [PATCH 4/8] fix(channels): add missing rows.Err() checks in ListChannels and HandleTelegramWebhook Adds rows.Err() after two for rows.Next() loops in channels.go: - ListChannels - HandleTelegramWebhook Closes remaining gaps from PR #1708. Co-Authored-By: Claude Opus 4.7 --- workspace-server/internal/handlers/channels.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/workspace-server/internal/handlers/channels.go b/workspace-server/internal/handlers/channels.go index b14690740..6d9f0942f 100644 --- a/workspace-server/internal/handlers/channels.go +++ b/workspace-server/internal/handlers/channels.go @@ -104,6 +104,9 @@ func (h *ChannelHandler) List(c *gin.Context) { } result = append(result, entry) } + if err := rows.Err(); err != nil { + log.Printf("Channels: list rows.Err: %v", err) + } c.JSON(http.StatusOK, result) } @@ -514,6 +517,9 @@ func (h *ChannelHandler) Webhook(c *gin.Context) { candidates = append(candidates, row) } } + if err := rows.Err(); err != nil { + log.Printf("Channels: telegram webhook rows.Err: %v", err) + } if targetSlug != "" { // [slug] routing — match against config username (lowercased) -- 2.52.0 From 68685567988f85352f3ba477dd57f2f3a13f9ae0 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 23 May 2026 07:11:14 +0000 Subject: [PATCH 5/8] fix(mcp-tools): log scanPeers errors instead of silently dropping them toolListPeers ignored scanPeers return values for siblings, children, and parent queries. A mid-stream DB error would truncate the peer list without any observability. Now errors are logged with query context (sibling/children/parent) so operators can detect incomplete peer data. Co-Authored-By: Claude Opus 4.7 --- workspace-server/internal/handlers/mcp_tools.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/workspace-server/internal/handlers/mcp_tools.go b/workspace-server/internal/handlers/mcp_tools.go index a457b7d10..4dd863cf7 100644 --- a/workspace-server/internal/handlers/mcp_tools.go +++ b/workspace-server/internal/handlers/mcp_tools.go @@ -95,14 +95,18 @@ func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (str cols+` FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`, parentID.String, workspaceID) if err == nil { - _ = scanPeers(rows) + if scanErr := scanPeers(rows); scanErr != nil { + log.Printf("MCP toolListPeers: sibling scan error: %v", scanErr) + } } } else { rows, err := h.database.QueryContext(ctx, cols+` FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`, workspaceID) if err == nil { - _ = scanPeers(rows) + if scanErr := scanPeers(rows); scanErr != nil { + log.Printf("MCP toolListPeers: sibling scan error: %v", scanErr) + } } } @@ -112,7 +116,9 @@ func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (str cols+` FROM workspaces w WHERE w.parent_id = $1 AND w.status != 'removed'`, workspaceID) if err == nil { - _ = scanPeers(rows) + if scanErr := scanPeers(rows); scanErr != nil { + log.Printf("MCP toolListPeers: children scan error: %v", scanErr) + } } } @@ -122,7 +128,9 @@ func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (str cols+` FROM workspaces w WHERE w.id = $1 AND w.status != 'removed'`, parentID.String) if err == nil { - _ = scanPeers(rows) + if scanErr := scanPeers(rows); scanErr != nil { + log.Printf("MCP toolListPeers: parent scan error: %v", scanErr) + } } } -- 2.52.0 From f4308942a43b958401b8220deb71e7a812154926 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 23 May 2026 07:15:26 +0000 Subject: [PATCH 6/8] fix(workspace-crud): add missing descRows.Err() check in CascadeDelete The descendant CTE query loop in CascadeDelete did not check descRows.Err() after iteration, silently truncating descendant IDs on mid-stream errors. Also switched inline descRows.Close() to defer for panic safety. Co-Authored-By: Claude Opus 4.7 --- workspace-server/internal/handlers/workspace_crud.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/workspace-server/internal/handlers/workspace_crud.go b/workspace-server/internal/handlers/workspace_crud.go index e1a35793c..7185badd0 100644 --- a/workspace-server/internal/handlers/workspace_crud.go +++ b/workspace-server/internal/handlers/workspace_crud.go @@ -435,13 +435,16 @@ func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string) ([]stri if err != nil { return nil, nil, fmt.Errorf("descendant query: %w", err) } + defer descRows.Close() for descRows.Next() { var descID string if descRows.Scan(&descID) == nil { descendantIDs = append(descendantIDs, descID) } } - descRows.Close() + if err := descRows.Err(); err != nil { + log.Printf("CascadeDelete descendant rows.Err: %v", err) + } allIDs := append([]string{id}, descendantIDs...) -- 2.52.0 From 0a0da872873d8b0c516c87be63298f51254db5af Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 23 May 2026 10:25:30 +0000 Subject: [PATCH 7/8] fix(handlers): return error on descRows.Err() in CascadeDelete Previously, if the descendant CTE query succeeded but row iteration failed (e.g. connection dropped mid-scan), the error was only logged and CascadeDelete continued with a potentially truncated descendant set. Now we return the error before any deletion or side-effects run. Also adds TestCascadeDelete_DescendantRowsError covering the mid-iteration failure path. Closes review_id=5564 (CR2 blocker). Co-Authored-By: Claude Opus 4.7 --- .../internal/handlers/workspace_crud.go | 2 +- .../internal/handlers/workspace_crud_test.go | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/workspace-server/internal/handlers/workspace_crud.go b/workspace-server/internal/handlers/workspace_crud.go index 7185badd0..dc30c48d6 100644 --- a/workspace-server/internal/handlers/workspace_crud.go +++ b/workspace-server/internal/handlers/workspace_crud.go @@ -443,7 +443,7 @@ func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string) ([]stri } } if err := descRows.Err(); err != nil { - log.Printf("CascadeDelete descendant rows.Err: %v", err) + return nil, nil, fmt.Errorf("CascadeDelete: failed iterating descendants: %w", err) } allIDs := append([]string{id}, descendantIDs...) diff --git a/workspace-server/internal/handlers/workspace_crud_test.go b/workspace-server/internal/handlers/workspace_crud_test.go index 2ef07d4c0..36603c6db 100644 --- a/workspace-server/internal/handlers/workspace_crud_test.go +++ b/workspace-server/internal/handlers/workspace_crud_test.go @@ -503,6 +503,30 @@ func TestCascadeDelete_DescendantQueryError(t *testing.T) { // sqlmock verifies all expected queries were executed } +func TestCascadeDelete_DescendantRowsError(t *testing.T) { + mock, _ := setupWorkspaceCrudTest(t) + wsID := "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" + + // RowError(0, ...) causes rows.Next() to return false immediately and + // rows.Err() to return the error — exercising the mid-iteration path. + h := &WorkspaceHandler{} + rows := sqlmock.NewRows([]string{"id"}).RowError(0, sql.ErrConnDone) + mock.ExpectQuery(`WITH RECURSIVE descendants AS`). + WithArgs(wsID). + WillReturnRows(rows) + + deleted, stopErrs, err := h.CascadeDelete(context.Background(), wsID) + if err == nil { + t.Fatal("CascadeDelete returned nil error; want descendant rows error") + } + if deleted != nil { + t.Errorf("deleted = %v; want nil", deleted) + } + if stopErrs != nil { + t.Errorf("stopErrs = %v; want nil", stopErrs) + } +} + // Note: Full CascadeDelete testing requires mocking StopWorkspace, RemoveVolume, // and provisioner calls — covered in integration tests. Unit tests here focus on // the validation and pre-condition paths. -- 2.52.0 From c14436b8ac673a325916bb2c33f928d6ae993c13 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 23 May 2026 15:32:00 +0000 Subject: [PATCH 8/8] fix(tests): add AddRow before RowError in CascadeDelete rows-error test sqlmock RowError(0, ...) requires a real row at index 0 to be reachable. Without AddRow, rows.Next() returns false immediately but rows.Err() stays nil, so the test falls through to deletion code and panics on nil db/redis. Adding AddRow(\"desc-1\") ensures the error fires. Co-Authored-By: Claude Opus 4.7 --- workspace-server/internal/handlers/workspace_crud_test.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/workspace-server/internal/handlers/workspace_crud_test.go b/workspace-server/internal/handlers/workspace_crud_test.go index 36603c6db..624447752 100644 --- a/workspace-server/internal/handlers/workspace_crud_test.go +++ b/workspace-server/internal/handlers/workspace_crud_test.go @@ -507,10 +507,12 @@ func TestCascadeDelete_DescendantRowsError(t *testing.T) { mock, _ := setupWorkspaceCrudTest(t) wsID := "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" - // RowError(0, ...) causes rows.Next() to return false immediately and - // rows.Err() to return the error — exercising the mid-iteration path. + // RowError(0, ...) requires a real row at index 0 to be reachable — + // sqlmock only invokes nextErr[N] when r.pos-1 == N and the row exists. + // AddRow ensures Next() attempts the first row, triggers the error, + // and rows.Err() returns the injected error. h := &WorkspaceHandler{} - rows := sqlmock.NewRows([]string{"id"}).RowError(0, sql.ErrConnDone) + rows := sqlmock.NewRows([]string{"id"}).AddRow("desc-1").RowError(0, sql.ErrConnDone) mock.ExpectQuery(`WITH RECURSIVE descendants AS`). WithArgs(wsID). WillReturnRows(rows) -- 2.52.0