fix(provisioner): KI-013 rename-migrate legacy truncated containers/volumes in-place #2490

Merged
devops-engineer merged 8 commits from fix/KI-013-migrate-legacy-names into main 2026-06-10 07:59:31 +00:00
3 changed files with 747 additions and 23 deletions
@@ -19,6 +19,7 @@ import (
"sync"
"time"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters"
dockerimage "github.com/docker/docker/api/types/image"
@@ -227,9 +228,31 @@ func legacyClaudeSessionVolumeName(workspaceID string) string {
return fmt.Sprintf("ws-%s-claude-sessions", id)
}
// dockerClient is the subset of client.Client methods used by Provisioner.
// Declared as an interface so tests can inject fakes without a real daemon.
type dockerClient interface {
Close() error
ContainerCreate(ctx context.Context, config *container.Config, hostConfig *container.HostConfig, networkingConfig *network.NetworkingConfig, platform *ocispec.Platform, containerName string) (container.CreateResponse, error)
ContainerExecAttach(ctx context.Context, execID string, config container.ExecAttachOptions) (types.HijackedResponse, error)
ContainerExecCreate(ctx context.Context, container string, config container.ExecOptions) (container.ExecCreateResponse, error)
ContainerInspect(ctx context.Context, container string) (container.InspectResponse, error)
ContainerList(ctx context.Context, options container.ListOptions) ([]container.Summary, error)
ContainerLogs(ctx context.Context, container string, options container.LogsOptions) (io.ReadCloser, error)
ContainerRemove(ctx context.Context, container string, options container.RemoveOptions) error
ContainerRename(ctx context.Context, container, newContainerName string) error
ContainerStart(ctx context.Context, container string, options container.StartOptions) error
ContainerWait(ctx context.Context, container string, condition container.WaitCondition) (<-chan container.WaitResponse, <-chan error)
CopyToContainer(ctx context.Context, container, path string, content io.Reader, options container.CopyToContainerOptions) error
ImageInspect(ctx context.Context, image string, opts ...client.ImageInspectOption) (dockerimage.InspectResponse, error)
ImagePull(ctx context.Context, ref string, opts dockerimage.PullOptions) (io.ReadCloser, error)
VolumeCreate(ctx context.Context, options volume.CreateOptions) (volume.Volume, error)
VolumeInspect(ctx context.Context, volumeID string) (volume.Volume, error)
VolumeRemove(ctx context.Context, volumeID string, force bool) error
}
// Provisioner manages Docker containers for workspace agents.
type Provisioner struct {
cli *client.Client
cli dockerClient
}
// New creates a new Provisioner connected to the local Docker daemon.
@@ -1336,35 +1359,115 @@ func (p *Provisioner) WriteAuthTokenToVolume(ctx context.Context, workspaceID, t
}
// resolveConfigVolumeName returns the effective config volume name for a
// workspace, preferring the legacy truncated name if that volume already
// exists (KI-013 deploy safety: pre-deploy volumes must not be orphaned).
// workspace. KI-013 deploy safety: if a legacy truncated-name volume exists,
// it is migrated in-place to the new full-ID name so existing workspace data
// is preserved AND all workspaces eventually use collision-safe names.
func (p *Provisioner) resolveConfigVolumeName(ctx context.Context, workspaceID string) string {
if p == nil || p.cli == nil {
return ConfigVolumeName(workspaceID)
}
newName := ConfigVolumeName(workspaceID)
legacy := legacyConfigVolumeName(workspaceID)
if _, err := p.cli.VolumeInspect(ctx, legacy); err == nil {
return legacy
if err := p.migrateVolumeIfNeeded(ctx, newName, legacy); err != nil {
log.Printf("Provisioner: volume migration warning for %s: %v", workspaceID, err)
}
return ConfigVolumeName(workspaceID)
return newName
}
// resolveClaudeSessionVolumeName returns the effective claude-sessions volume
// name, preferring the legacy truncated name if that volume already exists.
// name. KI-013 deploy safety: legacy truncated-name volumes are migrated
// in-place to the new full-ID name.
func (p *Provisioner) resolveClaudeSessionVolumeName(ctx context.Context, workspaceID string) string {
if p == nil || p.cli == nil {
return ClaudeSessionVolumeName(workspaceID)
}
newName := ClaudeSessionVolumeName(workspaceID)
legacy := legacyClaudeSessionVolumeName(workspaceID)
if _, err := p.cli.VolumeInspect(ctx, legacy); err == nil {
return legacy
if err := p.migrateVolumeIfNeeded(ctx, newName, legacy); err != nil {
log.Printf("Provisioner: session volume migration warning for %s: %v", workspaceID, err)
}
return ClaudeSessionVolumeName(workspaceID)
return newName
}
// migrateVolumeIfNeeded renames a legacy truncated-name Docker volume to its
// new full-ID name by copying data via a temporary alpine container. If the
// legacy volume does not exist, or the new volume already exists, this is a
// no-op. The operation is idempotent — calling it multiple times is safe.
func (p *Provisioner) migrateVolumeIfNeeded(ctx context.Context, newName, legacyName string) error {
if p == nil || p.cli == nil {
return nil
}
// Legacy volume missing — nothing to migrate.
if _, err := p.cli.VolumeInspect(ctx, legacyName); err != nil {
return nil
}
// New volume already exists — migration already done (or new workspace).
if _, err := p.cli.VolumeInspect(ctx, newName); err == nil {
return nil
}
// Create the new volume.
if _, err := p.cli.VolumeCreate(ctx, volume.CreateOptions{
Name: newName,
Labels: managedLabels(),
}); err != nil {
return fmt.Errorf("create new volume %s: %w", newName, err)
}
// Copy data from legacy to new via a short-lived alpine container.
resp, err := p.cli.ContainerCreate(ctx, &container.Config{
Image: "alpine",
Cmd: []string{"sh", "-c", "cp -a /legacy/. /new/"},
}, &container.HostConfig{
Binds: []string{
legacyName + ":/legacy",
newName + ":/new",
},
}, nil, nil, "")
if err != nil {
return fmt.Errorf("create migration container: %w", err)
}
defer p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
if err := p.cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil {
return fmt.Errorf("start migration container: %w", err)
}
waitCh, errCh := p.cli.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning)
select {
case waitResp := <-waitCh:
if waitResp.StatusCode != 0 {
return fmt.Errorf("migration copy failed (exit %d) — preserving legacy volume %s for retry", waitResp.StatusCode, legacyName)
}
case err := <-errCh:
if err != nil {
return fmt.Errorf("migration container exited with error: %w", err)
}
}
// Explicitly remove the migration container before removing the legacy
// volume so the volume is no longer referenced. The deferred remove above
// is a safety-net for early-return paths.
_ = p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
// Best-effort cleanup of the legacy volume. If removal fails (e.g. still
// referenced by a running container), the new volume is already populated
// and the next restart will retry.
if err := p.cli.VolumeRemove(ctx, legacyName, true); err != nil {
log.Printf("Provisioner: warning: failed to remove legacy volume %s after migration: %v", legacyName, err)
} else {
log.Printf("Provisioner: migrated legacy volume %s → %s", legacyName, newName)
}
return nil
}
// RemoveVolume removes the config volume for a workspace.
// Also removes the claude-sessions volume (best-effort, may not exist
// for non claude-code runtimes). Issue #12.
// Also removes the claude-sessions volume (best-effort, may not exist
// for non claude-code runtimes). Issue #12.
func (p *Provisioner) RemoveVolume(ctx context.Context, workspaceID string) error {
if p == nil || p.cli == nil {
return ErrNoBackend
@@ -1490,24 +1593,56 @@ func (p *Provisioner) IsRunning(ctx context.Context, workspaceID string) (bool,
// transient errors into the same "" return as a genuinely-stopped container.
// That hid daemon flakes as misleading 503 "container not running" responses
// AND let the two impls drift on edge-case behavior. This is the SSOT.
func RunningContainerName(ctx context.Context, cli *client.Client, workspaceID string) (string, error) {
// isNilDockerClient reports whether cli is nil or a typed nil pointer
// (e.g. (*client.Client)(nil) passed as a dockerClient interface value).
// Required because a non-nil interface holding a nil pointer does not == nil.
func isNilDockerClient(cli dockerClient) bool {
if cli == nil {
return true
}
switch c := cli.(type) {
case *client.Client:
return c == nil
default:
return false
}
}
func RunningContainerName(ctx context.Context, cli dockerClient, workspaceID string) (string, error) {
if isNilDockerClient(cli) {
return "", ErrNoBackend
}
// KI-013 deploy safety: new full-ID name first, then fall back to the
// old truncated name so pre-deploy containers are still discoverable.
names := []string{ContainerName(workspaceID), legacyContainerName(workspaceID)}
for _, name := range names {
info, err := cli.ContainerInspect(ctx, name)
if err != nil {
if isContainerNotFound(err) {
continue
newName := ContainerName(workspaceID)
legacyName := legacyContainerName(workspaceID)
// If a legacy container is still running, rename it in-place to the
// new full-ID name so all callers converge on collision-safe names.
legacyInfo, legacyErr := cli.ContainerInspect(ctx, legacyName)
if legacyErr == nil && legacyInfo.State.Running {
if _, newErr := cli.ContainerInspect(ctx, newName); isContainerNotFound(newErr) {
if renameErr := cli.ContainerRename(ctx, legacyName, newName); renameErr == nil {
log.Printf("Provisioner: renamed legacy container %s → %s", legacyName, newName)
return newName, nil
} else {
log.Printf("Provisioner: warning: failed to rename legacy container %s → %s: %v", legacyName, newName, renameErr)
}
return "", err
}
if info.State.Running {
return name, nil
// Rename not possible (or new name already occupied) — return legacy
// name so the caller can still exec into the live container.
return legacyName, nil
}
// Standard path: look for a running container with the new name.
info, err := cli.ContainerInspect(ctx, newName)
if err != nil {
if isContainerNotFound(err) {
return "", nil
}
return "", err
}
if info.State.Running {
return newName, nil
}
return "", nil
}
@@ -1558,8 +1693,15 @@ func isRemovalInProgress(err error) bool {
}
// DockerClient returns the underlying Docker client for sharing with other handlers.
// If the provisioner is backed by a fake (e.g. in unit tests), this returns nil.
func (p *Provisioner) DockerClient() *client.Client {
return p.cli
if p == nil || p.cli == nil {
return nil
}
if c, ok := p.cli.(*client.Client); ok {
return c
}
return nil
}
// Close cleans up the Docker client.
@@ -0,0 +1,461 @@
package provisioner
import (
"context"
"errors"
"io"
"strings"
"sync"
"testing"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/image"
"github.com/docker/docker/api/types/network"
"github.com/docker/docker/client"
"github.com/docker/docker/api/types/volume"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
// fakeDockerClient is a minimal in-memory implementation of the dockerClient
// interface used by the provisioner. It records calls and lets tests simulate
// the legacy/full-ID naming transition without a real Docker daemon.
type fakeDockerClient struct {
mu sync.Mutex
volumes map[string]volume.Volume
containers map[string]container.InspectResponse
renameErr error
migrationExitCode int64
volumeInspectCalls []string
volumeCreateCalls []volume.CreateOptions
volumeRemoveCalls []string
containerInspectCalls []string
containerRemoveCalls []string
containerRenameCalls []struct{ Old, New string }
containerCreateCalls []string
containerStartCalls []string
}
func newFakeDockerClient() *fakeDockerClient {
return &fakeDockerClient{
volumes: make(map[string]volume.Volume),
containers: make(map[string]container.InspectResponse),
}
}
func (f *fakeDockerClient) Close() error { return nil }
func (f *fakeDockerClient) ContainerCreate(ctx context.Context, config *container.Config, hostConfig *container.HostConfig, networkingConfig *network.NetworkingConfig, platform *ocispec.Platform, containerName string) (container.CreateResponse, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.containerCreateCalls = append(f.containerCreateCalls, containerName)
return container.CreateResponse{ID: "cid-" + containerName}, nil
}
func (f *fakeDockerClient) ContainerExecAttach(ctx context.Context, execID string, config container.ExecAttachOptions) (types.HijackedResponse, error) {
return types.HijackedResponse{}, errors.New("not implemented")
}
func (f *fakeDockerClient) ContainerExecCreate(ctx context.Context, ctr string, config container.ExecOptions) (container.ExecCreateResponse, error) {
return container.ExecCreateResponse{}, errors.New("not implemented")
}
func (f *fakeDockerClient) ContainerInspect(ctx context.Context, name string) (container.InspectResponse, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.containerInspectCalls = append(f.containerInspectCalls, name)
if c, ok := f.containers[name]; ok {
return c, nil
}
return container.InspectResponse{}, errors.New("No such container: " + name)
}
func (f *fakeDockerClient) ContainerList(ctx context.Context, options container.ListOptions) ([]container.Summary, error) {
return nil, errors.New("not implemented")
}
func (f *fakeDockerClient) ContainerLogs(ctx context.Context, container string, options container.LogsOptions) (io.ReadCloser, error) {
return nil, errors.New("not implemented")
}
func (f *fakeDockerClient) ContainerRemove(ctx context.Context, name string, options container.RemoveOptions) error {
f.mu.Lock()
defer f.mu.Unlock()
f.containerRemoveCalls = append(f.containerRemoveCalls, name)
if _, ok := f.containers[name]; ok {
delete(f.containers, name)
return nil
}
return errors.New("No such container: " + name)
}
func (f *fakeDockerClient) ContainerRename(ctx context.Context, oldName, newName string) error {
f.mu.Lock()
defer f.mu.Unlock()
f.containerRenameCalls = append(f.containerRenameCalls, struct{ Old, New string }{oldName, newName})
if f.renameErr != nil {
return f.renameErr
}
if c, ok := f.containers[oldName]; ok {
delete(f.containers, oldName)
c.Name = newName
f.containers[newName] = c
}
return nil
}
func (f *fakeDockerClient) ContainerStart(ctx context.Context, id string, options container.StartOptions) error {
f.mu.Lock()
defer f.mu.Unlock()
f.containerStartCalls = append(f.containerStartCalls, id)
return nil
}
func (f *fakeDockerClient) ContainerWait(ctx context.Context, id string, condition container.WaitCondition) (<-chan container.WaitResponse, <-chan error) {
waitCh := make(chan container.WaitResponse, 1)
errCh := make(chan error)
f.mu.Lock()
code := f.migrationExitCode
f.mu.Unlock()
waitCh <- container.WaitResponse{StatusCode: code}
close(waitCh)
return waitCh, errCh
}
func (f *fakeDockerClient) CopyToContainer(ctx context.Context, container, path string, content io.Reader, options container.CopyToContainerOptions) error {
return errors.New("not implemented")
}
func (f *fakeDockerClient) ImageInspect(ctx context.Context, img string, opts ...client.ImageInspectOption) (image.InspectResponse, error) {
return image.InspectResponse{}, errors.New("not implemented")
}
func (f *fakeDockerClient) ImagePull(ctx context.Context, ref string, opts image.PullOptions) (io.ReadCloser, error) {
return nil, errors.New("not implemented")
}
func (f *fakeDockerClient) VolumeCreate(ctx context.Context, options volume.CreateOptions) (volume.Volume, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.volumeCreateCalls = append(f.volumeCreateCalls, options)
v := volume.Volume{Name: options.Name, Labels: options.Labels}
f.volumes[options.Name] = v
return v, nil
}
func (f *fakeDockerClient) VolumeInspect(ctx context.Context, name string) (volume.Volume, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.volumeInspectCalls = append(f.volumeInspectCalls, name)
if v, ok := f.volumes[name]; ok {
return v, nil
}
return volume.Volume{}, errors.New("No such volume: " + name)
}
func (f *fakeDockerClient) VolumeRemove(ctx context.Context, name string, force bool) error {
f.mu.Lock()
defer f.mu.Unlock()
f.volumeRemoveCalls = append(f.volumeRemoveCalls, name)
if _, ok := f.volumes[name]; ok {
delete(f.volumes, name)
return nil
}
return errors.New("No such volume: " + name)
}
func runningContainer(name string) container.InspectResponse {
return container.InspectResponse{
ContainerJSONBase: &container.ContainerJSONBase{
Name: name,
State: &container.State{Running: true},
},
}
}
const migrateTestWorkspaceID = "abcdef1234567890"
func TestResolveConfigVolumeName_LegacyExists_MigratesInPlace(t *testing.T) {
ctx := context.Background()
cli := newFakeDockerClient()
p := &Provisioner{cli: cli}
newName := ConfigVolumeName(migrateTestWorkspaceID)
legacyName := legacyConfigVolumeName(migrateTestWorkspaceID)
cli.volumes[legacyName] = volume.Volume{Name: legacyName}
got := p.resolveConfigVolumeName(ctx, migrateTestWorkspaceID)
if got != newName {
t.Fatalf("resolveConfigVolumeName: got %q, want %q", got, newName)
}
// Migration path must inspect both names, then create new volume and copy.
if !strings.Contains(strings.Join(cli.volumeInspectCalls, ","), legacyName) {
t.Fatalf("expected VolumeInspect(%s) for legacy presence check", legacyName)
}
if !strings.Contains(strings.Join(cli.volumeInspectCalls, ","), newName) {
t.Fatalf("expected VolumeInspect(%s) to check for existing new volume", newName)
}
if len(cli.volumeCreateCalls) == 0 || cli.volumeCreateCalls[0].Name != newName {
t.Fatalf("expected VolumeCreate(%s)", newName)
}
if len(cli.containerCreateCalls) == 0 {
t.Fatal("expected migration container creation")
}
if len(cli.containerStartCalls) == 0 {
t.Fatal("expected migration container start")
}
if len(cli.containerRemoveCalls) == 0 {
t.Fatal("expected migration container removal")
}
// Legacy volume must be removed so it is not orphaned.
removed := false
for _, n := range cli.volumeRemoveCalls {
if n == legacyName {
removed = true
break
}
}
if !removed {
t.Fatalf("legacy volume %s was not removed after migration — orphan risk", legacyName)
}
if _, ok := cli.volumes[legacyName]; ok {
t.Fatalf("legacy volume %s still present in fake client after migration", legacyName)
}
if _, ok := cli.volumes[newName]; !ok {
t.Fatalf("new volume %s must exist after migration", newName)
}
}
func TestResolveConfigVolumeName_LegacyAbsent_NoMigration(t *testing.T) {
ctx := context.Background()
cli := newFakeDockerClient()
p := &Provisioner{cli: cli}
newName := ConfigVolumeName(migrateTestWorkspaceID)
legacyName := legacyConfigVolumeName(migrateTestWorkspaceID)
got := p.resolveConfigVolumeName(ctx, migrateTestWorkspaceID)
if got != newName {
t.Fatalf("resolveConfigVolumeName: got %q, want %q", got, newName)
}
// Should check legacy once and short-circuit.
if len(cli.volumeInspectCalls) != 1 || cli.volumeInspectCalls[0] != legacyName {
t.Fatalf("expected exactly one VolumeInspect call for legacy name, got %v", cli.volumeInspectCalls)
}
if len(cli.volumeCreateCalls) != 0 {
t.Fatalf("expected no VolumeCreate when legacy absent, got %v", cli.volumeCreateCalls)
}
if len(cli.volumeRemoveCalls) != 0 {
t.Fatalf("expected no VolumeRemove when legacy absent, got %v", cli.volumeRemoveCalls)
}
}
func TestResolveClaudeSessionVolumeName_LegacyExists_MigratesInPlace(t *testing.T) {
ctx := context.Background()
cli := newFakeDockerClient()
p := &Provisioner{cli: cli}
newName := ClaudeSessionVolumeName(migrateTestWorkspaceID)
legacyName := legacyClaudeSessionVolumeName(migrateTestWorkspaceID)
cli.volumes[legacyName] = volume.Volume{Name: legacyName}
got := p.resolveClaudeSessionVolumeName(ctx, migrateTestWorkspaceID)
if got != newName {
t.Fatalf("resolveClaudeSessionVolumeName: got %q, want %q", got, newName)
}
removed := false
for _, n := range cli.volumeRemoveCalls {
if n == legacyName {
removed = true
break
}
}
if !removed {
t.Fatalf("legacy session volume %s was not removed after migration — orphan risk", legacyName)
}
if _, ok := cli.volumes[newName]; !ok {
t.Fatalf("new session volume %s must exist after migration", newName)
}
}
func TestResolveClaudeSessionVolumeName_LegacyAbsent_NoMigration(t *testing.T) {
ctx := context.Background()
cli := newFakeDockerClient()
p := &Provisioner{cli: cli}
newName := ClaudeSessionVolumeName(migrateTestWorkspaceID)
legacyName := legacyClaudeSessionVolumeName(migrateTestWorkspaceID)
got := p.resolveClaudeSessionVolumeName(ctx, migrateTestWorkspaceID)
if got != newName {
t.Fatalf("resolveClaudeSessionVolumeName: got %q, want %q", got, newName)
}
if len(cli.volumeInspectCalls) != 1 || cli.volumeInspectCalls[0] != legacyName {
t.Fatalf("expected exactly one VolumeInspect call for legacy session name, got %v", cli.volumeInspectCalls)
}
if len(cli.volumeCreateCalls) != 0 {
t.Fatalf("expected no VolumeCreate when legacy absent, got %v", cli.volumeCreateCalls)
}
}
func TestMigrateVolumeIfNeeded_CopyFails_PreservesLegacy(t *testing.T) {
ctx := context.Background()
cli := newFakeDockerClient()
p := &Provisioner{cli: cli}
newName := ConfigVolumeName(migrateTestWorkspaceID)
legacyName := legacyConfigVolumeName(migrateTestWorkspaceID)
cli.volumes[legacyName] = volume.Volume{Name: legacyName}
cli.migrationExitCode = 1
if err := p.migrateVolumeIfNeeded(ctx, newName, legacyName); err == nil {
t.Fatal("expected migration error when copy container exits non-zero")
}
// Legacy volume must survive a failed copy so no data is lost.
if _, ok := cli.volumes[legacyName]; !ok {
t.Fatal("legacy volume must be preserved when migration copy fails (data-loss guard)")
}
}
func TestStop_FullIDAbsent_LegacyRemoved(t *testing.T) {
ctx := context.Background()
cli := newFakeDockerClient()
p := &Provisioner{cli: cli}
newName := ContainerName(migrateTestWorkspaceID)
legacyName := legacyContainerName(migrateTestWorkspaceID)
cli.containers[legacyName] = runningContainer(legacyName)
if err := p.Stop(ctx, migrateTestWorkspaceID); err != nil {
t.Fatalf("Stop failed: %v", err)
}
if len(cli.containerRemoveCalls) < 2 {
t.Fatalf("expected Remove on full-id then legacy, got %v", cli.containerRemoveCalls)
}
if cli.containerRemoveCalls[0] != newName {
t.Fatalf("expected first remove target %q, got %q", newName, cli.containerRemoveCalls[0])
}
if cli.containerRemoveCalls[1] != legacyName {
t.Fatalf("expected second remove target %q, got %q", legacyName, cli.containerRemoveCalls[1])
}
if _, ok := cli.containers[legacyName]; ok {
t.Fatal("legacy container still present after Stop")
}
}
func TestStop_BothAbsent_IsNoOp(t *testing.T) {
ctx := context.Background()
cli := newFakeDockerClient()
p := &Provisioner{cli: cli}
newName := ContainerName(migrateTestWorkspaceID)
legacyName := legacyContainerName(migrateTestWorkspaceID)
if err := p.Stop(ctx, migrateTestWorkspaceID); err != nil {
t.Fatalf("Stop failed: %v", err)
}
if len(cli.containerRemoveCalls) != 2 {
t.Fatalf("expected 2 remove attempts, got %v", cli.containerRemoveCalls)
}
if cli.containerRemoveCalls[0] != newName || cli.containerRemoveCalls[1] != legacyName {
t.Fatalf("expected remove order [%q, %q], got %v", newName, legacyName, cli.containerRemoveCalls)
}
}
func TestRunningContainerName_LegacyRunning_RenameFails_FallsBackToLegacy(t *testing.T) {
ctx := context.Background()
cli := newFakeDockerClient()
newName := ContainerName(migrateTestWorkspaceID)
legacyName := legacyContainerName(migrateTestWorkspaceID)
cli.containers[legacyName] = runningContainer(legacyName)
cli.renameErr = errors.New("daemon rename failed")
got, err := RunningContainerName(ctx, cli, migrateTestWorkspaceID)
if err != nil {
t.Fatalf("RunningContainerName returned error: %v", err)
}
if got != legacyName {
t.Fatalf("expected fallback to legacy name %q, got %q", legacyName, got)
}
if len(cli.containerInspectCalls) < 2 {
t.Fatalf("expected inspect of legacy and new names, got %v", cli.containerInspectCalls)
}
if cli.containerInspectCalls[0] != legacyName {
t.Fatalf("expected legacy inspect first, got %v", cli.containerInspectCalls)
}
renamed := false
for _, r := range cli.containerRenameCalls {
if r.Old == legacyName && r.New == newName {
renamed = true
break
}
}
if !renamed {
t.Fatalf("expected rename attempt %q -> %q, got %v", legacyName, newName, cli.containerRenameCalls)
}
}
func TestRunningContainerName_LegacyRunning_RenameSucceeds(t *testing.T) {
ctx := context.Background()
cli := newFakeDockerClient()
newName := ContainerName(migrateTestWorkspaceID)
legacyName := legacyContainerName(migrateTestWorkspaceID)
cli.containers[legacyName] = runningContainer(legacyName)
got, err := RunningContainerName(ctx, cli, migrateTestWorkspaceID)
if err != nil {
t.Fatalf("RunningContainerName returned error: %v", err)
}
if got != newName {
t.Fatalf("expected new name %q after rename, got %q", newName, got)
}
if _, ok := cli.containers[legacyName]; ok {
t.Fatal("legacy container should have been renamed away")
}
if _, ok := cli.containers[newName]; !ok {
t.Fatal("new container name should exist after rename")
}
}
func TestRunningContainerName_NewRunning_ReturnsNew(t *testing.T) {
ctx := context.Background()
cli := newFakeDockerClient()
newName := ContainerName(migrateTestWorkspaceID)
cli.containers[newName] = runningContainer(newName)
got, err := RunningContainerName(ctx, cli, migrateTestWorkspaceID)
if err != nil {
t.Fatalf("RunningContainerName returned error: %v", err)
}
if got != newName {
t.Fatalf("expected new name %q, got %q", newName, got)
}
}
func TestRunningContainerName_BothAbsent_ReturnsEmpty(t *testing.T) {
ctx := context.Background()
cli := newFakeDockerClient()
got, err := RunningContainerName(ctx, cli, migrateTestWorkspaceID)
if err != nil {
t.Fatalf("RunningContainerName returned error: %v", err)
}
if got != "" {
t.Fatalf("expected empty name when neither container exists, got %q", got)
}
}
@@ -2,14 +2,19 @@ package provisioner
import (
"archive/tar"
"context"
"errors"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"testing"
"time"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/volume"
"github.com/docker/docker/client"
)
// TestValidateConfigSource covers issue #17: a workspace restart with no
@@ -1422,3 +1427,119 @@ func TestApplyTierConfig_T3_DefaultCap(t *testing.T) {
t.Errorf("T3 default NanoCPUs: got %d, want %d", hc.NanoCPUs, wantCPU)
}
}
// TestMigrateVolumeIfNeeded_ExistingTruncatedVolume verifies the KI-013 deploy
// safety path: when a legacy truncated-name volume already exists, data is
// copied to the new full-ID name and the legacy volume is removed. Existing
// workspace state is preserved without operator intervention.
func TestMigrateVolumeIfNeeded_ExistingTruncatedVolume(t *testing.T) {
ctx := context.Background()
cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
if err != nil {
t.Skip("docker client unavailable:", err)
}
if _, pingErr := cli.Ping(ctx); pingErr != nil {
t.Skip("docker daemon unreachable:", pingErr)
}
p := &Provisioner{cli: cli}
workspaceID := "test-migrate-" + strconv.FormatInt(time.Now().UnixNano(), 10)
legacyName := legacyConfigVolumeName(workspaceID)
newName := ConfigVolumeName(workspaceID)
// Cleanup before and after (defensive — avoid pollution on retries).
_ = cli.VolumeRemove(ctx, legacyName, true)
_ = cli.VolumeRemove(ctx, newName, true)
defer func() {
_ = cli.VolumeRemove(ctx, legacyName, true)
_ = cli.VolumeRemove(ctx, newName, true)
}()
// 1. Create legacy volume and seed it with a sentinel file.
if _, err := cli.VolumeCreate(ctx, volume.CreateOptions{Name: legacyName}); err != nil {
t.Fatalf("create legacy volume: %v", err)
}
seedResp, err := cli.ContainerCreate(ctx, &container.Config{
Image: "alpine",
Cmd: []string{"sh", "-c", "echo sentinel-data > /vol/sentinel.txt"},
}, &container.HostConfig{
Binds: []string{legacyName + ":/vol"},
}, nil, nil, "")
if err != nil {
t.Fatalf("create seed container: %v", err)
}
defer cli.ContainerRemove(ctx, seedResp.ID, container.RemoveOptions{Force: true})
if err := cli.ContainerStart(ctx, seedResp.ID, container.StartOptions{}); err != nil {
t.Fatalf("start seed container: %v", err)
}
waitCh, errCh := cli.ContainerWait(ctx, seedResp.ID, container.WaitConditionNotRunning)
select {
case <-waitCh:
case err := <-errCh:
if err != nil {
t.Fatalf("seed container failed: %v", err)
}
}
// Remove the seed container before migration so the legacy volume is
// no longer referenced by any container. The deferred remove above is a
// safety net for panic/early-return paths.
if err := cli.ContainerRemove(ctx, seedResp.ID, container.RemoveOptions{Force: true}); err != nil {
t.Fatalf("remove seed container: %v", err)
}
// 2. Run migration.
if err := p.migrateVolumeIfNeeded(ctx, newName, legacyName); err != nil {
t.Fatalf("migrateVolumeIfNeeded failed: %v", err)
}
// 3. Legacy volume must be gone.
if _, inspectErr := cli.VolumeInspect(ctx, legacyName); inspectErr == nil {
t.Fatalf("legacy volume %s still exists after migration", legacyName)
}
// 4. New volume must exist and contain the sentinel file.
if _, inspectErr := cli.VolumeInspect(ctx, newName); inspectErr != nil {
t.Fatalf("new volume %s does not exist after migration: %v", newName, inspectErr)
}
readResp, err := cli.ContainerCreate(ctx, &container.Config{
Image: "alpine",
Cmd: []string{"cat", "/vol/sentinel.txt"},
}, &container.HostConfig{
Binds: []string{newName + ":/vol"},
}, nil, nil, "")
if err != nil {
t.Fatalf("create read container: %v", err)
}
defer cli.ContainerRemove(ctx, readResp.ID, container.RemoveOptions{Force: true})
if err := cli.ContainerStart(ctx, readResp.ID, container.StartOptions{}); err != nil {
t.Fatalf("start read container: %v", err)
}
waitCh, errCh = cli.ContainerWait(ctx, readResp.ID, container.WaitConditionNotRunning)
select {
case <-waitCh:
case err := <-errCh:
if err != nil {
t.Fatalf("read container failed: %v", err)
}
}
logs, err := cli.ContainerLogs(ctx, readResp.ID, container.LogsOptions{ShowStdout: true})
if err != nil {
t.Fatalf("read container logs: %v", err)
}
defer logs.Close()
data, err := io.ReadAll(logs)
if err != nil {
t.Fatalf("read logs: %v", err)
}
if !strings.Contains(string(data), "sentinel-data") {
t.Fatalf("new volume missing sentinel data; logs: %q", data)
}
// 5. Idempotency: second migration must be a no-op.
if err := p.migrateVolumeIfNeeded(ctx, newName, legacyName); err != nil {
t.Fatalf("second migration (idempotency) failed: %v", err)
}
}