From f951f37a504e5cdc064e6ae1dfeb6b2ea9173c57 Mon Sep 17 00:00:00 2001 From: Molecule AI Fullstack Engineer Date: Fri, 15 May 2026 16:41:05 +0000 Subject: [PATCH] =?UTF-8?q?fix(provisioner):=20remove=20ContainerName=20tr?= =?UTF-8?q?uncation=20=E2=80=94=20fix=20Docker=20DNS=20collision=20(KI-010?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ContainerName()`, `ConfigVolumeName()`, and `ClaudeSessionVolumeName()` truncated workspace IDs to 12 chars. Two workspaces whose UUIDs share the same first 12 characters collided on Docker container/volume names, causing DNS resolution failures and A2A routing errors. Changes: - provisioner.go: remove the `if len(id) > 12 { id = id[:12] }` guard from all three name functions. Full UUID (36 chars) + prefix is well within Docker's 63-char name limit (container: 39 chars, config volume: 48, session volume: 56). - orphan_sweeper.go: update comments to reflect full-UUID container names; the LIKE query becomes a de-facto exact match — orphan detection unchanged. - provisioner_test.go: update expected values for the no-truncation case; add TestContainerNameCollisionRegression (proves same-first-12-chars IDs now produce distinct names) and TestContainerNameDockerLengthLimit (proves all name forms stay under 63 chars). Co-Authored-By: Claude Opus 4.7 --- .../internal/provisioner/provisioner.go | 40 +++++++--------- .../internal/provisioner/provisioner_test.go | 48 ++++++++++++++++--- .../internal/registry/orphan_sweeper.go | 36 ++++++-------- 3 files changed, 75 insertions(+), 49 deletions(-) diff --git a/workspace-server/internal/provisioner/provisioner.go b/workspace-server/internal/provisioner/provisioner.go index 4c19c2046..999d82f1b 100644 --- a/workspace-server/internal/provisioner/provisioner.go +++ b/workspace-server/internal/provisioner/provisioner.go @@ -129,24 +129,23 @@ const ( ) // ConfigVolumeName returns the Docker named volume for a workspace's configs. +// The full workspace ID is used — Docker volume names are capped at 63 chars +// and a UUID (36 chars) plus the 9-char "-configs" suffix (ws- prefix = 3) +// is 48 chars total, well within the limit. Truncating to 12 chars caused +// Docker DNS collisions when two workspace IDs shared the same leading prefix. func ConfigVolumeName(workspaceID string) string { - id := workspaceID - if len(id) > 12 { - id = id[:12] - } - return fmt.Sprintf("ws-%s-configs", id) + return fmt.Sprintf("ws-%s-configs", workspaceID) } // ClaudeSessionVolumeName returns the Docker named volume for a workspace's // Claude Code session directory (/root/.claude/sessions). Separate from the // config volume so it can be discarded independently (via WORKSPACE_RESET_SESSION // or ?reset=true) without wiping the user's config. Issue #12. +// Full workspace ID used — 56 chars total (ws- + 36-char UUID + suffix) +// fits within Docker's 63-char name limit. Truncation removed to prevent +// Docker DNS collisions (KI-010). func ClaudeSessionVolumeName(workspaceID string) string { - id := workspaceID - if len(id) > 12 { - id = id[:12] - } - return fmt.Sprintf("ws-%s-claude-sessions", id) + return fmt.Sprintf("ws-%s-claude-sessions", workspaceID) } // Provisioner manages Docker containers for workspace agents. @@ -164,12 +163,11 @@ func New() (*Provisioner, error) { } // ContainerName returns the Docker container name for a workspace. +// Full workspace ID used — 39 chars total (ws- + 36-char UUID) fits within +// Docker's 63-char container name limit. Truncation removed to prevent Docker +// DNS collisions (KI-010). func ContainerName(workspaceID string) string { - id := workspaceID - if len(id) > 12 { - id = id[:12] - } - return fmt.Sprintf("ws-%s", id) + return fmt.Sprintf("ws-%s", workspaceID) } // containerNamePrefix is the shared prefix every workspace container @@ -196,12 +194,10 @@ func managedLabels() map[string]string { return map[string]string{LabelManaged: "true"} } -// ListWorkspaceContainerIDPrefixes returns the 12-char workspace ID -// prefixes of every running ws-* container the Docker daemon knows -// about. The 12-char form matches ContainerName's truncation, so the -// orphan sweeper can intersect this set against `SELECT -// substring(id::text, 1, 12) FROM workspaces WHERE status = 'removed'` -// without an extra round-trip per row. +// ListWorkspaceContainerIDPrefixes returns the full workspace IDs (UUID, minus +// the "ws-" prefix) of every running ws-* container the Docker daemon knows +// about. The orphan sweeper uses these as exact-match targets in its +// `id::text LIKE '%'` query against workspaces with status='removed'. // // Returns an empty slice on any Docker error (sweeper treats that as // "skip this round" — better than a partial scan that misses leaks). @@ -246,7 +242,7 @@ func (p *Provisioner) ListWorkspaceContainerIDPrefixes(ctx context.Context) ([]s return prefixes, nil } -// ListManagedContainerIDPrefixes returns the workspace ID prefix of every +// ListManagedContainerIDPrefixes returns the full workspace ID of every // container carrying the LabelManaged stamp. Distinct from // ListWorkspaceContainerIDPrefixes (name-filtered, may include sibling // platforms' containers on a shared Docker daemon): this method is the diff --git a/workspace-server/internal/provisioner/provisioner_test.go b/workspace-server/internal/provisioner/provisioner_test.go index 8d4a20f05..cb5bb711f 100644 --- a/workspace-server/internal/provisioner/provisioner_test.go +++ b/workspace-server/internal/provisioner/provisioner_test.go @@ -350,6 +350,7 @@ func TestTierEscalation(t *testing.T) { } // TestContainerName verifies the naming convention. +// Full workspace IDs are used — no truncation (KI-010). func TestContainerName(t *testing.T) { tests := []struct { id string @@ -357,7 +358,7 @@ func TestContainerName(t *testing.T) { }{ {"short", "ws-short"}, {"exactly12ch", "ws-exactly12ch"}, - {"longer-than-twelve-characters", "ws-longer-than-"}, + {"longer-than-twelve-characters", "ws-longer-than-twelve-characters"}, {"abc", "ws-abc"}, } @@ -370,6 +371,7 @@ func TestContainerName(t *testing.T) { } // TestConfigVolumeName verifies config volume naming. +// Full workspace IDs are used — no truncation (KI-010). func TestConfigVolumeName(t *testing.T) { tests := []struct { id string @@ -377,7 +379,7 @@ func TestConfigVolumeName(t *testing.T) { }{ {"short", "ws-short-configs"}, {"exactly12ch", "ws-exactly12ch-configs"}, - {"longer-than-twelve-characters", "ws-longer-than--configs"}, + {"longer-than-twelve-characters", "ws-longer-than-twelve-characters-configs"}, {"abc", "ws-abc-configs"}, } @@ -391,9 +393,8 @@ func TestConfigVolumeName(t *testing.T) { // ---------- #12 — claude-sessions volume naming ---------- -// TestClaudeSessionVolumeName_Deterministic: same ID → same volume name, and -// the name follows the ws--claude-sessions shape used everywhere -// else in the provisioner. +// TestClaudeSessionVolumeName_Deterministic: same ID → same volume name. +// Full workspace IDs are used — no truncation (KI-010). func TestClaudeSessionVolumeName_Deterministic(t *testing.T) { tests := []struct { id string @@ -401,7 +402,7 @@ func TestClaudeSessionVolumeName_Deterministic(t *testing.T) { }{ {"short", "ws-short-claude-sessions"}, {"exactly12ch", "ws-exactly12ch-claude-sessions"}, - {"longer-than-twelve-characters", "ws-longer-than--claude-sessions"}, + {"longer-than-twelve-characters", "ws-longer-than-twelve-characters-claude-sessions"}, {"abc", "ws-abc-claude-sessions"}, } for _, tt := range tests { @@ -426,6 +427,41 @@ func TestClaudeSessionVolumeName_DistinctFromConfig(t *testing.T) { } } +// TestContainerNameCollisionRegression verifies that two workspace IDs sharing +// the same first 12 characters produce distinct container names (KI-010). +// Previously, truncation to 12 chars caused Docker DNS collisions. +func TestContainerNameCollisionRegression(t *testing.T) { + id1 := "abcd1234-5678-90ab-cdef-1234567890ab" + id2 := "abcd1234-5678-90ab-cdef-9876543210cd" // same first 12 chars as id1 + if ContainerName(id1) == ContainerName(id2) { + t.Errorf("ContainerName collision: both IDs produced %q — Docker DNS would fail", ContainerName(id1)) + } + if ConfigVolumeName(id1) == ConfigVolumeName(id2) { + t.Errorf("ConfigVolumeName collision: both IDs produced %q", ConfigVolumeName(id1)) + } + if ClaudeSessionVolumeName(id1) == ClaudeSessionVolumeName(id2) { + t.Errorf("ClaudeSessionVolumeName collision: both IDs produced %q", ClaudeSessionVolumeName(id1)) + } +} + +// TestContainerNameDockerLengthLimit verifies that a full UUID workspace ID +// (36 chars) plus the "ws-" prefix stays within Docker's 63-char name limit. +func TestContainerNameDockerLengthLimit(t *testing.T) { + fullUUID := "abcd1234-5678-90ab-cdef-1234567890ab" // 36 chars + got := ContainerName(fullUUID) + if len(got) > 63 { + t.Errorf("ContainerName length %d exceeds Docker 63-char limit: %q", len(got), got) + } + gotVol := ConfigVolumeName(fullUUID) + if len(gotVol) > 63 { + t.Errorf("ConfigVolumeName length %d exceeds Docker 63-char limit: %q", len(gotVol), gotVol) + } + gotSession := ClaudeSessionVolumeName(fullUUID) + if len(gotSession) > 63 { + t.Errorf("ClaudeSessionVolumeName length %d exceeds Docker 63-char limit: %q", len(gotSession), gotSession) + } +} + // TestWorkspaceConfig_ResetClaudeSessionFieldPresent is a compile-time check // that the ResetClaudeSession knob exists on WorkspaceConfig so handlers can // plumb ?reset=true through to the provisioner without a struct tag dance. diff --git a/workspace-server/internal/registry/orphan_sweeper.go b/workspace-server/internal/registry/orphan_sweeper.go index 6e4110cbc..edfca7fe8 100644 --- a/workspace-server/internal/registry/orphan_sweeper.go +++ b/workspace-server/internal/registry/orphan_sweeper.go @@ -45,12 +45,10 @@ type OrphanReaper interface { RemoveVolume(ctx context.Context, workspaceID string) error } -// isLikelyWorkspaceID accepts strings shaped like a UUID prefix — -// hex chars and `-` only. Workspace IDs are full UUIDs and the -// container-name truncation keeps the hex prefix intact, so any -// container name that doesn't match this is by definition not one -// of ours and should be skipped. Also doubles as a SQL LIKE -// wildcard guard (rejects `_` and `%`). +// isLikelyWorkspaceID accepts strings shaped like a UUID — hex chars and `-` only. +// Container names now carry the full UUID (ws-), so any container whose +// extracted ID doesn't match the UUID alphabet is not ours and should be skipped. +// Also doubles as a SQL LIKE wildcard guard (rejects `_` and `%`). func isLikelyWorkspaceID(s string) bool { if s == "" { return false @@ -137,22 +135,18 @@ func sweepRemovedRows(ctx context.Context, reaper OrphanReaper) { return } - // Resolve each prefix to a full workspace_id whose status is - // 'removed'. The platform's workspace IDs are full UUIDs but - // container names are truncated to 12 chars — an UPPER BOUND - // of one match per prefix is guaranteed by the DB (UUID v4 - // collisions in the first 12 chars across active rows are - // statistically negligible). Use a single IN-style query so - // the cost is one round-trip regardless of leak count. + // Resolve each container ID to a full workspace_id whose status is + // 'removed'. Container names now carry the full UUID (ws-), + // so the LIKE pattern matches exactly one row. Use a single + // IN-style query so the cost is one round-trip regardless of leak count. // - // Defence: drop any prefix whose contents fall outside the - // hex-and-dash UUID alphabet. Workspace IDs are UUIDs, so - // container names follow ws-<12 hex chars>. Anything else is - // either a non-workspace container that slipped past the - // substring-match Docker filter (workspace-runner, etc.) or a - // malformed entry — neither should be turned into a LIKE - // pattern. Also blocks SQL LIKE wildcards (`_` and `%`) from - // reaching the query, even though Docker's container-name + // Defence: drop any ID whose contents fall outside the hex-and-dash + // UUID alphabet. Workspace IDs are full UUIDs, so container names + // follow ws-<36-char-uuid>. Anything else is either a non-workspace + // container that slipped past the substring-match Docker filter + // (workspace-runner, etc.) or a malformed entry — neither should be + // turned into a LIKE pattern. Also blocks SQL LIKE wildcards (`_` and + // `%`) from reaching the query, even though Docker's container-name // validation would already have rejected them upstream. likes := make([]string, 0, len(prefixes)) for _, p := range prefixes { -- 2.52.0