diff --git a/workspace-server/internal/handlers/workspace_crud.go b/workspace-server/internal/handlers/workspace_crud.go
index 1d4281838..b7e83a74a 100644
--- a/workspace-server/internal/handlers/workspace_crud.go
+++ b/workspace-server/internal/handlers/workspace_crud.go
@@ -6,6 +6,7 @@ package handlers
 
 import (
 	"database/sql"
+	"errors"
 	"fmt"
 	"log"
 	"net/http"
@@ -388,9 +389,24 @@ func (h *WorkspaceHandler) Delete(c *gin.Context) {
 	// Now stop containers + remove volumes for all descendants (any depth).
 	// Any concurrent heartbeat / registration / liveness-triggered restart
 	// will see status='removed' and bail out early.
+	//
+	// #1843: Stop() errors used to be silently swallowed. On the CP/EC2
+	// backend, Stop() calls the control plane's DELETE workspaces endpoint
+	// to terminate the EC2; if that errors (CP transient 5xx, network),
+	// the EC2 stays running with no DB row to track it — the
+	// "14 orphan workspace EC2s on a 0-customer account" scenario.
+	// Aggregate Stop failures and surface them as 500 so the client can
+	// retry. The retry replays Stop with the same instance_id (still
+	// readable from the row even after status='removed') — idempotent on
+	// the CP side. RemoveVolume errors stay log-and-continue: those are
+	// local cleanup of /var/data, not infra-leak class.
+	var stopErrs []error
 	for _, descID := range descendantIDs {
 		if h.provisioner != nil {
-			h.provisioner.Stop(ctx, descID)
+			if err := h.provisioner.Stop(ctx, descID); err != nil {
+				log.Printf("Delete descendant %s stop error: %v", descID, err)
+				stopErrs = append(stopErrs, fmt.Errorf("stop descendant %s: %w", descID, err))
+			}
 			if err := h.provisioner.RemoveVolume(ctx, descID); err != nil {
 				log.Printf("Delete descendant %s volume removal warning: %v", descID, err)
 			}
@@ -401,7 +417,10 @@ func (h *WorkspaceHandler) Delete(c *gin.Context) {
 
 	// Stop + remove volume for the workspace itself
 	if h.provisioner != nil {
-		h.provisioner.Stop(ctx, id)
+		if err := h.provisioner.Stop(ctx, id); err != nil {
+			log.Printf("Delete %s stop error: %v", id, err)
+			stopErrs = append(stopErrs, fmt.Errorf("stop %s: %w", id, err))
+		}
 		if err := h.provisioner.RemoveVolume(ctx, id); err != nil {
 			log.Printf("Delete %s volume removal warning: %v", id, err)
 		}
@@ -412,6 +431,21 @@ func (h *WorkspaceHandler) Delete(c *gin.Context) {
 		"cascade_deleted": len(descendantIDs),
 	})
 
+	// If any Stop call failed, surface 500 so the client retries. The DB
+	// row is already 'removed' (idempotent), and Stop's instance_id
+	// lookup tolerates that — the retry replays the terminate. This is
+	// the loud-fail-instead-of-silent-leak choice; users see a 500
+	// instead of an orphaned EC2.
+	if len(stopErrs) > 0 {
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": fmt.Sprintf("workspace marked removed, but %d stop call(s) failed — please retry: %v",
+				len(stopErrs), errors.Join(stopErrs...)),
+			"removed_count": len(allIDs),
+			"stop_failures": len(stopErrs),
+		})
+		return
+	}
+
 	// Hard purge: cascade delete all FK data and remove the DB row entirely (#1087)
 	if c.Query("purge") == "true" {
 		purgeIDs := pq.Array(allIDs)