From b47a1b87b06f8da0b4b2e17badd8aa11be51ad30 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sat, 25 Apr 2026 23:34:57 -0700 Subject: [PATCH] chore: refresh stale orphan-sweeper Stop-failure comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convergence-pass review noted the comment at orphan_sweeper.go:171 still describes the pre-cb126014 contract ("Stop returns nil even when container is gone, but a future change could surface real errors"). The future is now — Stop does surface real errors today. Tightened the comment to match the live contract: isContainerNotFound is treated as success, anything else returns the wrapped Docker error, sweeper retries on the next cycle. Pure comment change, no behavior diff. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/registry/orphan_sweeper.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/workspace-server/internal/registry/orphan_sweeper.go b/workspace-server/internal/registry/orphan_sweeper.go index 293e5384..23faf28d 100644 --- a/workspace-server/internal/registry/orphan_sweeper.go +++ b/workspace-server/internal/registry/orphan_sweeper.go @@ -168,12 +168,15 @@ func sweepOnce(parent context.Context, reaper OrphanReaper) { for _, id := range orphanIDs { log.Printf("Orphan sweeper: stopping leaked container for removed workspace %s", id) if stopErr := reaper.Stop(ctx, id); stopErr != nil { - // Stop() itself returns nil even when container is - // gone, but a future change could surface real errors. - // Keep the volume around for the next sweep so we - // don't fall into the same Stop-failed-then-volume- - // in-use trap that motivated this sweeper. - log.Printf("Orphan sweeper: Stop failed for %s: %v — leaving volume", id, stopErr) + // Stop returns the wrapped Docker error (treating + // "container not found" as nil-success via + // isContainerNotFound), so a non-nil here means the + // container is genuinely still alive — daemon timeout, + // ctx cancellation, or a transient socket EOF. + // Skip RemoveVolume so we don't fall into the same + // Stop-failed-then-volume-in-use trap that motivated + // this sweeper. The next cycle (60s out) retries Stop. + log.Printf("Orphan sweeper: Stop failed for %s: %v — leaving volume for next cycle", id, stopErr) continue } if rmErr := reaper.RemoveVolume(ctx, id); rmErr != nil {