From 02a3de7c0e941279d86efb94638ccc4157f9c502 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Wed, 27 May 2026 09:45:31 +0000 Subject: [PATCH] fix(workspace-server): replace time.After with time.NewTimer to prevent goroutine leaks Inside loops, time.After creates a new timer goroutine each iteration that cannot be GC'd until it fires. In long-running loops (supervisor restart backoff, Telegram polling, restart-context polling, CP stop retry) this leaks goroutines proportional to iteration count. Replace with time.NewTimer + timer.Stop() on ctx cancellation so the timer is cleaned up immediately when the goroutine exits. Affected files: - supervised/supervised.go (RunWithRecover backoff) - channels/telegram.go (429 retry + poll error sleep) - handlers/restart_context.go (online + heartbeat polling) - handlers/workspace_restart.go (cpStop retry backoff) Co-Authored-By: Claude Opus 4.7 --- workspace-server/internal/channels/telegram.go | 12 ++++++++---- .../internal/handlers/restart_context.go | 8 ++++++-- .../internal/handlers/workspace_restart.go | 4 +++- workspace-server/internal/supervised/supervised.go | 4 +++- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/workspace-server/internal/channels/telegram.go b/workspace-server/internal/channels/telegram.go index 778afa5c0..3d323057c 100644 --- a/workspace-server/internal/channels/telegram.go +++ b/workspace-server/internal/channels/telegram.go @@ -482,12 +482,14 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in if apiErr.Code == 429 { retryAfter := time.Duration(apiErr.RetryAfter) * time.Second log.Printf("Channels: Telegram poll rate-limited, sleeping %s", retryAfter) + timer := time.NewTimer(retryAfter) select { case <-ctx.Done(): + timer.Stop() return nil - case <-time.After(retryAfter): - continue + case <-timer.C: } + continue } if apiErr.Code == 401 { invalidateBot(token) @@ -495,12 +497,14 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in } } log.Printf("Channels: Telegram poll error: %v", err) + timer := time.NewTimer(telegramPollInterval) select { case <-ctx.Done(): + timer.Stop() return nil - case <-time.After(telegramPollInterval): - continue + case <-timer.C: } + continue } for _, update := range updates { diff --git a/workspace-server/internal/handlers/restart_context.go b/workspace-server/internal/handlers/restart_context.go index d84141976..d0bfad617 100644 --- a/workspace-server/internal/handlers/restart_context.go +++ b/workspace-server/internal/handlers/restart_context.go @@ -177,10 +177,12 @@ func waitForWorkspaceOnline(ctx context.Context, workspaceID string, timeout tim ).Scan(&status); err == nil && status == "online" { return true } + timer := time.NewTimer(restartContextOnlinePollInterval) select { case <-ctx.Done(): + timer.Stop() return false - case <-time.After(restartContextOnlinePollInterval): + case <-timer.C: } } return false @@ -213,10 +215,12 @@ func waitForFreshHeartbeat(ctx context.Context, workspaceID string, restartStart lastHB.Valid && lastHB.Time.After(restartStartTs) { return true } + timer := time.NewTimer(restartContextOnlinePollInterval) select { case <-ctx.Done(): + timer.Stop() return false - case <-time.After(restartContextOnlinePollInterval): + case <-timer.C: } } return false diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index 5d9122648..8bae4ef61 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -763,12 +763,14 @@ func (h *WorkspaceHandler) cpStopWithRetryErr(ctx context.Context, workspaceID, } // Sleep with ctx awareness so a cancelled ctx exits early instead // of stalling the goroutine through the remaining backoff. + timer := time.NewTimer(delay) select { case <-ctx.Done(): + timer.Stop() log.Printf("%s: cpProv.Stop(%s) abandoned mid-retry: ctx cancelled (last_err=%v)", source, workspaceID, lastErr) return ctx.Err() - case <-time.After(delay): + case <-timer.C: } delay *= 2 } diff --git a/workspace-server/internal/supervised/supervised.go b/workspace-server/internal/supervised/supervised.go index 8de84258f..e9c6a40f3 100644 --- a/workspace-server/internal/supervised/supervised.go +++ b/workspace-server/internal/supervised/supervised.go @@ -60,10 +60,12 @@ func RunWithRecover(ctx context.Context, name string, fn func(context.Context)) } // Panic → back off and restart. + timer := time.NewTimer(backoff) select { case <-ctx.Done(): + timer.Stop() return - case <-time.After(backoff): + case <-timer.C: } if backoff < maxBackoff { backoff *= 2 -- 2.52.0