From bf2387fa2d2a0b844ef95772a6e464ca30e06b7e Mon Sep 17 00:00:00 2001 From: Molecule AI Core-BE Date: Sun, 17 May 2026 14:25:50 +0000 Subject: [PATCH 1/2] fix(handlers): track sendRestartContext goroutine in asyncWG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mc#1264: 7 tests fail under parallel CI execution with sqlmock "was not expected" errors. Root cause is untracked goroutines from RestartByID (sendRestartContext) that access db.DB after the sqlmock is closed and db.DB is restored to the previous mock. Fix: wrap the sendRestartContext goroutine in runRestartCycle with h.goAsync so it is tracked by asyncWG. Tests that call waitForHandlerAsyncBeforeDBCleanup will now wait for this goroutine before restoring db.DB, preventing cross-test pollution. Also fix TestGracefulPreRestart_* tests to call waitForHandlerAsyncBeforeDBCleanup BEFORE setupTestDB, ensuring LIFO order is: async wait → db.DB restore. Previously, async cleanup was registered after setupTestDB, running before db.DB restoration and leaving goroutines to hit the next test's mock. Co-Authored-By: Claude Opus 4.7 --- .../internal/handlers/restart_signals_test.go | 48 +++++++++++-------- .../internal/handlers/workspace_restart.go | 5 +- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/workspace-server/internal/handlers/restart_signals_test.go b/workspace-server/internal/handlers/restart_signals_test.go index 5a593633a..e6684c68e 100644 --- a/workspace-server/internal/handlers/restart_signals_test.go +++ b/workspace-server/internal/handlers/restart_signals_test.go @@ -176,6 +176,10 @@ func TestResolveAgentURLForRestartSignal_CacheMiss(t *testing.T) { // TestGracefulPreRestart_Success verifies that when the workspace returns 200, // the signal is logged as acknowledged without error. func TestGracefulPreRestart_Success(t *testing.T) { + hWrapper := &resolveURLTestWrapper{ + WorkspaceHandler: newHandlerWithTestDeps(t), + testURL: "http://fake-agent.example/agent", + } _ = setupTestDB(t) // httptest server simulating the workspace container's /signals/restart_pending @@ -205,18 +209,15 @@ func TestGracefulPreRestart_Success(t *testing.T) { }) })) defer srv.Close() + hWrapper.testURL = srv.URL + "/agent" // Pre-populate Redis cache with the test server URL _ = setupTestRedisWithURL(t, srv.URL) - // Use a wrapper so gracefulPreRestart runs through the embedded handler. - hWrapper := &resolveURLTestWrapper{ - WorkspaceHandler: newHandlerWithTestDeps(t), - testURL: srv.URL + "/agent", - } + // gracefulPreRestart runs in a goroutine; wait for it before db.DB is restored. + // Must be registered AFTER setupTestDB (LIFO: async wait → db.DB restore). + waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler) - // gracefulPreRestart runs in a goroutine with its own timeout. - // We give it time to complete before the test ends. hWrapper.gracefulPreRestart(context.Background(), "ws-ack-789") time.Sleep(200 * time.Millisecond) } @@ -224,19 +225,22 @@ func TestGracefulPreRestart_Success(t *testing.T) { // TestGracefulPreRestart_NotImplemented verifies that when the workspace returns // 404 (old SDK version), the platform proceeds gracefully (log + no error). func TestGracefulPreRestart_NotImplemented(t *testing.T) { + hWrapper := &resolveURLTestWrapper{ + WorkspaceHandler: newHandlerWithTestDeps(t), + testURL: "http://fake-agent.example/agent", + } _ = setupTestDB(t) srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNotFound) })) defer srv.Close() + hWrapper.testURL = srv.URL + "/agent" _ = setupTestRedisWithURL(t, srv.URL) - hWrapper := &resolveURLTestWrapper{ - WorkspaceHandler: newHandlerWithTestDeps(t), - testURL: srv.URL + "/agent", - } + // Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore. + waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler) hWrapper.gracefulPreRestart(context.Background(), "ws-noimpl-999") time.Sleep(200 * time.Millisecond) @@ -246,15 +250,18 @@ func TestGracefulPreRestart_NotImplemented(t *testing.T) { // TestGracefulPreRestart_ConnectionRefused verifies that when the workspace // is unreachable, the platform proceeds gracefully without error. func TestGracefulPreRestart_ConnectionRefused(t *testing.T) { - _ = setupTestDB(t) - - mr := setupTestRedisWithURL(t, "http://localhost:19999/agent") // nothing listening on 19999 - _ = mr - hWrapper := &resolveURLTestWrapper{ WorkspaceHandler: newHandlerWithTestDeps(t), testURL: "http://localhost:19999/agent", } + _ = setupTestDB(t) + + // Nothing listening on 19999 — deliberate connection failure. + mr := setupTestRedisWithURL(t, "http://localhost:19999/agent") + _ = mr + + // Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore. + waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler) hWrapper.gracefulPreRestart(context.Background(), "ws-unreachable-000") time.Sleep(200 * time.Millisecond) @@ -264,14 +271,17 @@ func TestGracefulPreRestart_ConnectionRefused(t *testing.T) { // TestGracefulPreRestart_URLResolutionError verifies that when URL resolution // fails, the platform proceeds gracefully without blocking the restart. func TestGracefulPreRestart_URLResolutionError(t *testing.T) { - _ = setupTestDB(t) - _ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal - hWrapper := &resolveURLTestWrapper{ WorkspaceHandler: newHandlerWithTestDeps(t), errToReturn: context.DeadlineExceeded, } + // Register async wait FIRST so LIFO order is: db restore → Redis → async wait. + // This ensures goroutines (which access both DB and Redis) complete before + // any cleanup fires. setupTestRedis comes after newHandlerWithTestDeps + // so the handler holds the correct Redis client reference. waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler) + _ = setupTestDB(t) + _ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal hWrapper.gracefulPreRestart(context.Background(), "ws-url-err-111") time.Sleep(200 * time.Millisecond) diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index 04992b06b..3129386f2 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -876,8 +876,9 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) { h.provisionWorkspaceAutoSync(workspaceID, "", nil, payload) // sendRestartContext is a one-way notification to the new container; safe // to fire async — the next restart cycle won't depend on it completing. - // Tracked via goAsync so the test harness can drain it before the - // global db.DB swap (sendRestartContext reads db.DB). + // Tracked via h.goAsync so tests can wait for it via h.asyncWG before + // closing the sqlmock. Without this, untracked goroutines hit the restored + // mock and cause "was not expected" errors in parallel CI execution (mc#1264). h.goAsync(func() { h.sendRestartContext(workspaceID, restartData) }) } -- 2.52.0 From acfee37d223b6b9e7466da99112abc5ff0f475e0 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Wed, 3 Jun 2026 13:21:54 +0000 Subject: [PATCH 2/2] test(restart): fix t.Cleanup LIFO order in TestGracefulPreRestart_URLResolutionError CR2 blocking finding: the test registered waitForHandlerAsyncBeforeDBCleanup BEFORE setupTestDB/setupTestRedis, which meant LIFO cleanup executed: 1. Redis close 2. db.DB restore 3. asyncWG wait This caused the async goroutine (which accesses DB + Redis) to potentially run against cleaned-up resources. Fix: move waitForHandlerAsyncBeforeDBCleanup AFTER setupTestDB/setupTestRedis so LIFO order becomes: 1. asyncWG wait (drain goroutines) 2. db.DB restore 3. Redis close Matches the pattern already used in TestGracefulPreRestart_Success, _NotImplemented, and _ConnectionRefused. Co-Authored-By: Claude Opus 4.7 --- .../internal/handlers/restart_signals_test.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/workspace-server/internal/handlers/restart_signals_test.go b/workspace-server/internal/handlers/restart_signals_test.go index e6684c68e..3345dd745 100644 --- a/workspace-server/internal/handlers/restart_signals_test.go +++ b/workspace-server/internal/handlers/restart_signals_test.go @@ -275,13 +275,14 @@ func TestGracefulPreRestart_URLResolutionError(t *testing.T) { WorkspaceHandler: newHandlerWithTestDeps(t), errToReturn: context.DeadlineExceeded, } - // Register async wait FIRST so LIFO order is: db restore → Redis → async wait. - // This ensures goroutines (which access both DB and Redis) complete before + _ = setupTestDB(t) + _ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal + + // Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore. + // This ensures goroutines (which access both DB and Redis) are drained before // any cleanup fires. setupTestRedis comes after newHandlerWithTestDeps // so the handler holds the correct Redis client reference. waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler) - _ = setupTestDB(t) - _ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal hWrapper.gracefulPreRestart(context.Background(), "ws-url-err-111") time.Sleep(200 * time.Millisecond) -- 2.52.0