fix(provisioner): KI-013 rename-migrate legacy truncated containers/volumes in-place #2490
@@ -19,6 +19,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/filters"
|
||||
dockerimage "github.com/docker/docker/api/types/image"
|
||||
@@ -227,9 +228,31 @@ func legacyClaudeSessionVolumeName(workspaceID string) string {
|
||||
return fmt.Sprintf("ws-%s-claude-sessions", id)
|
||||
}
|
||||
|
||||
// dockerClient is the subset of client.Client methods used by Provisioner.
|
||||
// Declared as an interface so tests can inject fakes without a real daemon.
|
||||
type dockerClient interface {
|
||||
Close() error
|
||||
ContainerCreate(ctx context.Context, config *container.Config, hostConfig *container.HostConfig, networkingConfig *network.NetworkingConfig, platform *ocispec.Platform, containerName string) (container.CreateResponse, error)
|
||||
ContainerExecAttach(ctx context.Context, execID string, config container.ExecAttachOptions) (types.HijackedResponse, error)
|
||||
ContainerExecCreate(ctx context.Context, container string, config container.ExecOptions) (container.ExecCreateResponse, error)
|
||||
ContainerInspect(ctx context.Context, container string) (container.InspectResponse, error)
|
||||
ContainerList(ctx context.Context, options container.ListOptions) ([]container.Summary, error)
|
||||
ContainerLogs(ctx context.Context, container string, options container.LogsOptions) (io.ReadCloser, error)
|
||||
ContainerRemove(ctx context.Context, container string, options container.RemoveOptions) error
|
||||
ContainerRename(ctx context.Context, container, newContainerName string) error
|
||||
ContainerStart(ctx context.Context, container string, options container.StartOptions) error
|
||||
ContainerWait(ctx context.Context, container string, condition container.WaitCondition) (<-chan container.WaitResponse, <-chan error)
|
||||
CopyToContainer(ctx context.Context, container, path string, content io.Reader, options container.CopyToContainerOptions) error
|
||||
ImageInspect(ctx context.Context, image string, opts ...client.ImageInspectOption) (dockerimage.InspectResponse, error)
|
||||
ImagePull(ctx context.Context, ref string, opts dockerimage.PullOptions) (io.ReadCloser, error)
|
||||
VolumeCreate(ctx context.Context, options volume.CreateOptions) (volume.Volume, error)
|
||||
VolumeInspect(ctx context.Context, volumeID string) (volume.Volume, error)
|
||||
VolumeRemove(ctx context.Context, volumeID string, force bool) error
|
||||
}
|
||||
|
||||
// Provisioner manages Docker containers for workspace agents.
|
||||
type Provisioner struct {
|
||||
cli *client.Client
|
||||
cli dockerClient
|
||||
}
|
||||
|
||||
// New creates a new Provisioner connected to the local Docker daemon.
|
||||
@@ -1336,35 +1359,115 @@ func (p *Provisioner) WriteAuthTokenToVolume(ctx context.Context, workspaceID, t
|
||||
}
|
||||
|
||||
// resolveConfigVolumeName returns the effective config volume name for a
|
||||
// workspace, preferring the legacy truncated name if that volume already
|
||||
// exists (KI-013 deploy safety: pre-deploy volumes must not be orphaned).
|
||||
// workspace. KI-013 deploy safety: if a legacy truncated-name volume exists,
|
||||
// it is migrated in-place to the new full-ID name so existing workspace data
|
||||
// is preserved AND all workspaces eventually use collision-safe names.
|
||||
func (p *Provisioner) resolveConfigVolumeName(ctx context.Context, workspaceID string) string {
|
||||
if p == nil || p.cli == nil {
|
||||
return ConfigVolumeName(workspaceID)
|
||||
}
|
||||
newName := ConfigVolumeName(workspaceID)
|
||||
legacy := legacyConfigVolumeName(workspaceID)
|
||||
if _, err := p.cli.VolumeInspect(ctx, legacy); err == nil {
|
||||
return legacy
|
||||
if err := p.migrateVolumeIfNeeded(ctx, newName, legacy); err != nil {
|
||||
log.Printf("Provisioner: volume migration warning for %s: %v", workspaceID, err)
|
||||
}
|
||||
return ConfigVolumeName(workspaceID)
|
||||
return newName
|
||||
}
|
||||
|
||||
// resolveClaudeSessionVolumeName returns the effective claude-sessions volume
|
||||
// name, preferring the legacy truncated name if that volume already exists.
|
||||
// name. KI-013 deploy safety: legacy truncated-name volumes are migrated
|
||||
// in-place to the new full-ID name.
|
||||
func (p *Provisioner) resolveClaudeSessionVolumeName(ctx context.Context, workspaceID string) string {
|
||||
if p == nil || p.cli == nil {
|
||||
return ClaudeSessionVolumeName(workspaceID)
|
||||
}
|
||||
newName := ClaudeSessionVolumeName(workspaceID)
|
||||
legacy := legacyClaudeSessionVolumeName(workspaceID)
|
||||
if _, err := p.cli.VolumeInspect(ctx, legacy); err == nil {
|
||||
return legacy
|
||||
if err := p.migrateVolumeIfNeeded(ctx, newName, legacy); err != nil {
|
||||
log.Printf("Provisioner: session volume migration warning for %s: %v", workspaceID, err)
|
||||
}
|
||||
return ClaudeSessionVolumeName(workspaceID)
|
||||
return newName
|
||||
}
|
||||
|
||||
// migrateVolumeIfNeeded renames a legacy truncated-name Docker volume to its
|
||||
// new full-ID name by copying data via a temporary alpine container. If the
|
||||
// legacy volume does not exist, or the new volume already exists, this is a
|
||||
// no-op. The operation is idempotent — calling it multiple times is safe.
|
||||
func (p *Provisioner) migrateVolumeIfNeeded(ctx context.Context, newName, legacyName string) error {
|
||||
if p == nil || p.cli == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Legacy volume missing — nothing to migrate.
|
||||
if _, err := p.cli.VolumeInspect(ctx, legacyName); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// New volume already exists — migration already done (or new workspace).
|
||||
if _, err := p.cli.VolumeInspect(ctx, newName); err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create the new volume.
|
||||
if _, err := p.cli.VolumeCreate(ctx, volume.CreateOptions{
|
||||
Name: newName,
|
||||
Labels: managedLabels(),
|
||||
}); err != nil {
|
||||
return fmt.Errorf("create new volume %s: %w", newName, err)
|
||||
}
|
||||
|
||||
// Copy data from legacy to new via a short-lived alpine container.
|
||||
resp, err := p.cli.ContainerCreate(ctx, &container.Config{
|
||||
Image: "alpine",
|
||||
Cmd: []string{"sh", "-c", "cp -a /legacy/. /new/"},
|
||||
}, &container.HostConfig{
|
||||
Binds: []string{
|
||||
legacyName + ":/legacy",
|
||||
newName + ":/new",
|
||||
},
|
||||
}, nil, nil, "")
|
||||
if err != nil {
|
||||
return fmt.Errorf("create migration container: %w", err)
|
||||
}
|
||||
defer p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
|
||||
|
||||
if err := p.cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil {
|
||||
return fmt.Errorf("start migration container: %w", err)
|
||||
}
|
||||
|
||||
waitCh, errCh := p.cli.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning)
|
||||
select {
|
||||
case waitResp := <-waitCh:
|
||||
if waitResp.StatusCode != 0 {
|
||||
return fmt.Errorf("migration copy failed (exit %d) — preserving legacy volume %s for retry", waitResp.StatusCode, legacyName)
|
||||
}
|
||||
case err := <-errCh:
|
||||
if err != nil {
|
||||
return fmt.Errorf("migration container exited with error: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Explicitly remove the migration container before removing the legacy
|
||||
// volume so the volume is no longer referenced. The deferred remove above
|
||||
// is a safety-net for early-return paths.
|
||||
_ = p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
|
||||
|
||||
// Best-effort cleanup of the legacy volume. If removal fails (e.g. still
|
||||
// referenced by a running container), the new volume is already populated
|
||||
// and the next restart will retry.
|
||||
if err := p.cli.VolumeRemove(ctx, legacyName, true); err != nil {
|
||||
log.Printf("Provisioner: warning: failed to remove legacy volume %s after migration: %v", legacyName, err)
|
||||
} else {
|
||||
log.Printf("Provisioner: migrated legacy volume %s → %s", legacyName, newName)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemoveVolume removes the config volume for a workspace.
|
||||
// Also removes the claude-sessions volume (best-effort, may not exist
|
||||
// for non claude-code runtimes). Issue #12.
|
||||
// Also removes the claude-sessions volume (best-effort, may not exist
|
||||
// for non claude-code runtimes). Issue #12.
|
||||
func (p *Provisioner) RemoveVolume(ctx context.Context, workspaceID string) error {
|
||||
if p == nil || p.cli == nil {
|
||||
return ErrNoBackend
|
||||
@@ -1490,24 +1593,56 @@ func (p *Provisioner) IsRunning(ctx context.Context, workspaceID string) (bool,
|
||||
// transient errors into the same "" return as a genuinely-stopped container.
|
||||
// That hid daemon flakes as misleading 503 "container not running" responses
|
||||
// AND let the two impls drift on edge-case behavior. This is the SSOT.
|
||||
func RunningContainerName(ctx context.Context, cli *client.Client, workspaceID string) (string, error) {
|
||||
// isNilDockerClient reports whether cli is nil or a typed nil pointer
|
||||
// (e.g. (*client.Client)(nil) passed as a dockerClient interface value).
|
||||
// Required because a non-nil interface holding a nil pointer does not == nil.
|
||||
func isNilDockerClient(cli dockerClient) bool {
|
||||
if cli == nil {
|
||||
return true
|
||||
}
|
||||
switch c := cli.(type) {
|
||||
case *client.Client:
|
||||
return c == nil
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func RunningContainerName(ctx context.Context, cli dockerClient, workspaceID string) (string, error) {
|
||||
if isNilDockerClient(cli) {
|
||||
return "", ErrNoBackend
|
||||
}
|
||||
// KI-013 deploy safety: new full-ID name first, then fall back to the
|
||||
// old truncated name so pre-deploy containers are still discoverable.
|
||||
names := []string{ContainerName(workspaceID), legacyContainerName(workspaceID)}
|
||||
for _, name := range names {
|
||||
info, err := cli.ContainerInspect(ctx, name)
|
||||
if err != nil {
|
||||
if isContainerNotFound(err) {
|
||||
continue
|
||||
|
||||
newName := ContainerName(workspaceID)
|
||||
legacyName := legacyContainerName(workspaceID)
|
||||
|
||||
// If a legacy container is still running, rename it in-place to the
|
||||
// new full-ID name so all callers converge on collision-safe names.
|
||||
legacyInfo, legacyErr := cli.ContainerInspect(ctx, legacyName)
|
||||
if legacyErr == nil && legacyInfo.State.Running {
|
||||
if _, newErr := cli.ContainerInspect(ctx, newName); isContainerNotFound(newErr) {
|
||||
if renameErr := cli.ContainerRename(ctx, legacyName, newName); renameErr == nil {
|
||||
log.Printf("Provisioner: renamed legacy container %s → %s", legacyName, newName)
|
||||
return newName, nil
|
||||
} else {
|
||||
log.Printf("Provisioner: warning: failed to rename legacy container %s → %s: %v", legacyName, newName, renameErr)
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
if info.State.Running {
|
||||
return name, nil
|
||||
// Rename not possible (or new name already occupied) — return legacy
|
||||
// name so the caller can still exec into the live container.
|
||||
return legacyName, nil
|
||||
}
|
||||
|
||||
// Standard path: look for a running container with the new name.
|
||||
info, err := cli.ContainerInspect(ctx, newName)
|
||||
if err != nil {
|
||||
if isContainerNotFound(err) {
|
||||
return "", nil
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
if info.State.Running {
|
||||
return newName, nil
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
@@ -1558,8 +1693,15 @@ func isRemovalInProgress(err error) bool {
|
||||
}
|
||||
|
||||
// DockerClient returns the underlying Docker client for sharing with other handlers.
|
||||
// If the provisioner is backed by a fake (e.g. in unit tests), this returns nil.
|
||||
func (p *Provisioner) DockerClient() *client.Client {
|
||||
return p.cli
|
||||
if p == nil || p.cli == nil {
|
||||
return nil
|
||||
}
|
||||
if c, ok := p.cli.(*client.Client); ok {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close cleans up the Docker client.
|
||||
|
||||
@@ -0,0 +1,461 @@
|
||||
package provisioner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/image"
|
||||
"github.com/docker/docker/api/types/network"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/docker/docker/api/types/volume"
|
||||
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
)
|
||||
|
||||
// fakeDockerClient is a minimal in-memory implementation of the dockerClient
|
||||
// interface used by the provisioner. It records calls and lets tests simulate
|
||||
// the legacy/full-ID naming transition without a real Docker daemon.
|
||||
type fakeDockerClient struct {
|
||||
mu sync.Mutex
|
||||
|
||||
volumes map[string]volume.Volume
|
||||
containers map[string]container.InspectResponse
|
||||
|
||||
renameErr error
|
||||
migrationExitCode int64
|
||||
|
||||
volumeInspectCalls []string
|
||||
volumeCreateCalls []volume.CreateOptions
|
||||
volumeRemoveCalls []string
|
||||
containerInspectCalls []string
|
||||
containerRemoveCalls []string
|
||||
containerRenameCalls []struct{ Old, New string }
|
||||
containerCreateCalls []string
|
||||
containerStartCalls []string
|
||||
}
|
||||
|
||||
func newFakeDockerClient() *fakeDockerClient {
|
||||
return &fakeDockerClient{
|
||||
volumes: make(map[string]volume.Volume),
|
||||
containers: make(map[string]container.InspectResponse),
|
||||
}
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) Close() error { return nil }
|
||||
|
||||
func (f *fakeDockerClient) ContainerCreate(ctx context.Context, config *container.Config, hostConfig *container.HostConfig, networkingConfig *network.NetworkingConfig, platform *ocispec.Platform, containerName string) (container.CreateResponse, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.containerCreateCalls = append(f.containerCreateCalls, containerName)
|
||||
return container.CreateResponse{ID: "cid-" + containerName}, nil
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) ContainerExecAttach(ctx context.Context, execID string, config container.ExecAttachOptions) (types.HijackedResponse, error) {
|
||||
return types.HijackedResponse{}, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) ContainerExecCreate(ctx context.Context, ctr string, config container.ExecOptions) (container.ExecCreateResponse, error) {
|
||||
return container.ExecCreateResponse{}, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) ContainerInspect(ctx context.Context, name string) (container.InspectResponse, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.containerInspectCalls = append(f.containerInspectCalls, name)
|
||||
if c, ok := f.containers[name]; ok {
|
||||
return c, nil
|
||||
}
|
||||
return container.InspectResponse{}, errors.New("No such container: " + name)
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) ContainerList(ctx context.Context, options container.ListOptions) ([]container.Summary, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) ContainerLogs(ctx context.Context, container string, options container.LogsOptions) (io.ReadCloser, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) ContainerRemove(ctx context.Context, name string, options container.RemoveOptions) error {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.containerRemoveCalls = append(f.containerRemoveCalls, name)
|
||||
if _, ok := f.containers[name]; ok {
|
||||
delete(f.containers, name)
|
||||
return nil
|
||||
}
|
||||
return errors.New("No such container: " + name)
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) ContainerRename(ctx context.Context, oldName, newName string) error {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.containerRenameCalls = append(f.containerRenameCalls, struct{ Old, New string }{oldName, newName})
|
||||
if f.renameErr != nil {
|
||||
return f.renameErr
|
||||
}
|
||||
if c, ok := f.containers[oldName]; ok {
|
||||
delete(f.containers, oldName)
|
||||
c.Name = newName
|
||||
f.containers[newName] = c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) ContainerStart(ctx context.Context, id string, options container.StartOptions) error {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.containerStartCalls = append(f.containerStartCalls, id)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) ContainerWait(ctx context.Context, id string, condition container.WaitCondition) (<-chan container.WaitResponse, <-chan error) {
|
||||
waitCh := make(chan container.WaitResponse, 1)
|
||||
errCh := make(chan error)
|
||||
f.mu.Lock()
|
||||
code := f.migrationExitCode
|
||||
f.mu.Unlock()
|
||||
waitCh <- container.WaitResponse{StatusCode: code}
|
||||
close(waitCh)
|
||||
return waitCh, errCh
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) CopyToContainer(ctx context.Context, container, path string, content io.Reader, options container.CopyToContainerOptions) error {
|
||||
return errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) ImageInspect(ctx context.Context, img string, opts ...client.ImageInspectOption) (image.InspectResponse, error) {
|
||||
return image.InspectResponse{}, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) ImagePull(ctx context.Context, ref string, opts image.PullOptions) (io.ReadCloser, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) VolumeCreate(ctx context.Context, options volume.CreateOptions) (volume.Volume, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.volumeCreateCalls = append(f.volumeCreateCalls, options)
|
||||
v := volume.Volume{Name: options.Name, Labels: options.Labels}
|
||||
f.volumes[options.Name] = v
|
||||
return v, nil
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) VolumeInspect(ctx context.Context, name string) (volume.Volume, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.volumeInspectCalls = append(f.volumeInspectCalls, name)
|
||||
if v, ok := f.volumes[name]; ok {
|
||||
return v, nil
|
||||
}
|
||||
return volume.Volume{}, errors.New("No such volume: " + name)
|
||||
}
|
||||
|
||||
func (f *fakeDockerClient) VolumeRemove(ctx context.Context, name string, force bool) error {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.volumeRemoveCalls = append(f.volumeRemoveCalls, name)
|
||||
if _, ok := f.volumes[name]; ok {
|
||||
delete(f.volumes, name)
|
||||
return nil
|
||||
}
|
||||
return errors.New("No such volume: " + name)
|
||||
}
|
||||
|
||||
func runningContainer(name string) container.InspectResponse {
|
||||
return container.InspectResponse{
|
||||
ContainerJSONBase: &container.ContainerJSONBase{
|
||||
Name: name,
|
||||
State: &container.State{Running: true},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const migrateTestWorkspaceID = "abcdef1234567890"
|
||||
|
||||
func TestResolveConfigVolumeName_LegacyExists_MigratesInPlace(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cli := newFakeDockerClient()
|
||||
p := &Provisioner{cli: cli}
|
||||
|
||||
newName := ConfigVolumeName(migrateTestWorkspaceID)
|
||||
legacyName := legacyConfigVolumeName(migrateTestWorkspaceID)
|
||||
cli.volumes[legacyName] = volume.Volume{Name: legacyName}
|
||||
|
||||
got := p.resolveConfigVolumeName(ctx, migrateTestWorkspaceID)
|
||||
if got != newName {
|
||||
t.Fatalf("resolveConfigVolumeName: got %q, want %q", got, newName)
|
||||
}
|
||||
|
||||
// Migration path must inspect both names, then create new volume and copy.
|
||||
if !strings.Contains(strings.Join(cli.volumeInspectCalls, ","), legacyName) {
|
||||
t.Fatalf("expected VolumeInspect(%s) for legacy presence check", legacyName)
|
||||
}
|
||||
if !strings.Contains(strings.Join(cli.volumeInspectCalls, ","), newName) {
|
||||
t.Fatalf("expected VolumeInspect(%s) to check for existing new volume", newName)
|
||||
}
|
||||
if len(cli.volumeCreateCalls) == 0 || cli.volumeCreateCalls[0].Name != newName {
|
||||
t.Fatalf("expected VolumeCreate(%s)", newName)
|
||||
}
|
||||
if len(cli.containerCreateCalls) == 0 {
|
||||
t.Fatal("expected migration container creation")
|
||||
}
|
||||
if len(cli.containerStartCalls) == 0 {
|
||||
t.Fatal("expected migration container start")
|
||||
}
|
||||
if len(cli.containerRemoveCalls) == 0 {
|
||||
t.Fatal("expected migration container removal")
|
||||
}
|
||||
|
||||
// Legacy volume must be removed so it is not orphaned.
|
||||
removed := false
|
||||
for _, n := range cli.volumeRemoveCalls {
|
||||
if n == legacyName {
|
||||
removed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !removed {
|
||||
t.Fatalf("legacy volume %s was not removed after migration — orphan risk", legacyName)
|
||||
}
|
||||
if _, ok := cli.volumes[legacyName]; ok {
|
||||
t.Fatalf("legacy volume %s still present in fake client after migration", legacyName)
|
||||
}
|
||||
if _, ok := cli.volumes[newName]; !ok {
|
||||
t.Fatalf("new volume %s must exist after migration", newName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveConfigVolumeName_LegacyAbsent_NoMigration(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cli := newFakeDockerClient()
|
||||
p := &Provisioner{cli: cli}
|
||||
|
||||
newName := ConfigVolumeName(migrateTestWorkspaceID)
|
||||
legacyName := legacyConfigVolumeName(migrateTestWorkspaceID)
|
||||
|
||||
got := p.resolveConfigVolumeName(ctx, migrateTestWorkspaceID)
|
||||
if got != newName {
|
||||
t.Fatalf("resolveConfigVolumeName: got %q, want %q", got, newName)
|
||||
}
|
||||
|
||||
// Should check legacy once and short-circuit.
|
||||
if len(cli.volumeInspectCalls) != 1 || cli.volumeInspectCalls[0] != legacyName {
|
||||
t.Fatalf("expected exactly one VolumeInspect call for legacy name, got %v", cli.volumeInspectCalls)
|
||||
}
|
||||
if len(cli.volumeCreateCalls) != 0 {
|
||||
t.Fatalf("expected no VolumeCreate when legacy absent, got %v", cli.volumeCreateCalls)
|
||||
}
|
||||
if len(cli.volumeRemoveCalls) != 0 {
|
||||
t.Fatalf("expected no VolumeRemove when legacy absent, got %v", cli.volumeRemoveCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveClaudeSessionVolumeName_LegacyExists_MigratesInPlace(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cli := newFakeDockerClient()
|
||||
p := &Provisioner{cli: cli}
|
||||
|
||||
newName := ClaudeSessionVolumeName(migrateTestWorkspaceID)
|
||||
legacyName := legacyClaudeSessionVolumeName(migrateTestWorkspaceID)
|
||||
cli.volumes[legacyName] = volume.Volume{Name: legacyName}
|
||||
|
||||
got := p.resolveClaudeSessionVolumeName(ctx, migrateTestWorkspaceID)
|
||||
if got != newName {
|
||||
t.Fatalf("resolveClaudeSessionVolumeName: got %q, want %q", got, newName)
|
||||
}
|
||||
|
||||
removed := false
|
||||
for _, n := range cli.volumeRemoveCalls {
|
||||
if n == legacyName {
|
||||
removed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !removed {
|
||||
t.Fatalf("legacy session volume %s was not removed after migration — orphan risk", legacyName)
|
||||
}
|
||||
if _, ok := cli.volumes[newName]; !ok {
|
||||
t.Fatalf("new session volume %s must exist after migration", newName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveClaudeSessionVolumeName_LegacyAbsent_NoMigration(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cli := newFakeDockerClient()
|
||||
p := &Provisioner{cli: cli}
|
||||
|
||||
newName := ClaudeSessionVolumeName(migrateTestWorkspaceID)
|
||||
legacyName := legacyClaudeSessionVolumeName(migrateTestWorkspaceID)
|
||||
|
||||
got := p.resolveClaudeSessionVolumeName(ctx, migrateTestWorkspaceID)
|
||||
if got != newName {
|
||||
t.Fatalf("resolveClaudeSessionVolumeName: got %q, want %q", got, newName)
|
||||
}
|
||||
if len(cli.volumeInspectCalls) != 1 || cli.volumeInspectCalls[0] != legacyName {
|
||||
t.Fatalf("expected exactly one VolumeInspect call for legacy session name, got %v", cli.volumeInspectCalls)
|
||||
}
|
||||
if len(cli.volumeCreateCalls) != 0 {
|
||||
t.Fatalf("expected no VolumeCreate when legacy absent, got %v", cli.volumeCreateCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigrateVolumeIfNeeded_CopyFails_PreservesLegacy(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cli := newFakeDockerClient()
|
||||
p := &Provisioner{cli: cli}
|
||||
|
||||
newName := ConfigVolumeName(migrateTestWorkspaceID)
|
||||
legacyName := legacyConfigVolumeName(migrateTestWorkspaceID)
|
||||
cli.volumes[legacyName] = volume.Volume{Name: legacyName}
|
||||
cli.migrationExitCode = 1
|
||||
|
||||
if err := p.migrateVolumeIfNeeded(ctx, newName, legacyName); err == nil {
|
||||
t.Fatal("expected migration error when copy container exits non-zero")
|
||||
}
|
||||
|
||||
// Legacy volume must survive a failed copy so no data is lost.
|
||||
if _, ok := cli.volumes[legacyName]; !ok {
|
||||
t.Fatal("legacy volume must be preserved when migration copy fails (data-loss guard)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStop_FullIDAbsent_LegacyRemoved(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cli := newFakeDockerClient()
|
||||
p := &Provisioner{cli: cli}
|
||||
|
||||
newName := ContainerName(migrateTestWorkspaceID)
|
||||
legacyName := legacyContainerName(migrateTestWorkspaceID)
|
||||
cli.containers[legacyName] = runningContainer(legacyName)
|
||||
|
||||
if err := p.Stop(ctx, migrateTestWorkspaceID); err != nil {
|
||||
t.Fatalf("Stop failed: %v", err)
|
||||
}
|
||||
|
||||
if len(cli.containerRemoveCalls) < 2 {
|
||||
t.Fatalf("expected Remove on full-id then legacy, got %v", cli.containerRemoveCalls)
|
||||
}
|
||||
if cli.containerRemoveCalls[0] != newName {
|
||||
t.Fatalf("expected first remove target %q, got %q", newName, cli.containerRemoveCalls[0])
|
||||
}
|
||||
if cli.containerRemoveCalls[1] != legacyName {
|
||||
t.Fatalf("expected second remove target %q, got %q", legacyName, cli.containerRemoveCalls[1])
|
||||
}
|
||||
if _, ok := cli.containers[legacyName]; ok {
|
||||
t.Fatal("legacy container still present after Stop")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStop_BothAbsent_IsNoOp(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cli := newFakeDockerClient()
|
||||
p := &Provisioner{cli: cli}
|
||||
|
||||
newName := ContainerName(migrateTestWorkspaceID)
|
||||
legacyName := legacyContainerName(migrateTestWorkspaceID)
|
||||
|
||||
if err := p.Stop(ctx, migrateTestWorkspaceID); err != nil {
|
||||
t.Fatalf("Stop failed: %v", err)
|
||||
}
|
||||
|
||||
if len(cli.containerRemoveCalls) != 2 {
|
||||
t.Fatalf("expected 2 remove attempts, got %v", cli.containerRemoveCalls)
|
||||
}
|
||||
if cli.containerRemoveCalls[0] != newName || cli.containerRemoveCalls[1] != legacyName {
|
||||
t.Fatalf("expected remove order [%q, %q], got %v", newName, legacyName, cli.containerRemoveCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunningContainerName_LegacyRunning_RenameFails_FallsBackToLegacy(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cli := newFakeDockerClient()
|
||||
|
||||
newName := ContainerName(migrateTestWorkspaceID)
|
||||
legacyName := legacyContainerName(migrateTestWorkspaceID)
|
||||
cli.containers[legacyName] = runningContainer(legacyName)
|
||||
cli.renameErr = errors.New("daemon rename failed")
|
||||
|
||||
got, err := RunningContainerName(ctx, cli, migrateTestWorkspaceID)
|
||||
if err != nil {
|
||||
t.Fatalf("RunningContainerName returned error: %v", err)
|
||||
}
|
||||
if got != legacyName {
|
||||
t.Fatalf("expected fallback to legacy name %q, got %q", legacyName, got)
|
||||
}
|
||||
|
||||
if len(cli.containerInspectCalls) < 2 {
|
||||
t.Fatalf("expected inspect of legacy and new names, got %v", cli.containerInspectCalls)
|
||||
}
|
||||
if cli.containerInspectCalls[0] != legacyName {
|
||||
t.Fatalf("expected legacy inspect first, got %v", cli.containerInspectCalls)
|
||||
}
|
||||
|
||||
renamed := false
|
||||
for _, r := range cli.containerRenameCalls {
|
||||
if r.Old == legacyName && r.New == newName {
|
||||
renamed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !renamed {
|
||||
t.Fatalf("expected rename attempt %q -> %q, got %v", legacyName, newName, cli.containerRenameCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunningContainerName_LegacyRunning_RenameSucceeds(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cli := newFakeDockerClient()
|
||||
|
||||
newName := ContainerName(migrateTestWorkspaceID)
|
||||
legacyName := legacyContainerName(migrateTestWorkspaceID)
|
||||
cli.containers[legacyName] = runningContainer(legacyName)
|
||||
|
||||
got, err := RunningContainerName(ctx, cli, migrateTestWorkspaceID)
|
||||
if err != nil {
|
||||
t.Fatalf("RunningContainerName returned error: %v", err)
|
||||
}
|
||||
if got != newName {
|
||||
t.Fatalf("expected new name %q after rename, got %q", newName, got)
|
||||
}
|
||||
if _, ok := cli.containers[legacyName]; ok {
|
||||
t.Fatal("legacy container should have been renamed away")
|
||||
}
|
||||
if _, ok := cli.containers[newName]; !ok {
|
||||
t.Fatal("new container name should exist after rename")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunningContainerName_NewRunning_ReturnsNew(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cli := newFakeDockerClient()
|
||||
|
||||
newName := ContainerName(migrateTestWorkspaceID)
|
||||
cli.containers[newName] = runningContainer(newName)
|
||||
|
||||
got, err := RunningContainerName(ctx, cli, migrateTestWorkspaceID)
|
||||
if err != nil {
|
||||
t.Fatalf("RunningContainerName returned error: %v", err)
|
||||
}
|
||||
if got != newName {
|
||||
t.Fatalf("expected new name %q, got %q", newName, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunningContainerName_BothAbsent_ReturnsEmpty(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cli := newFakeDockerClient()
|
||||
|
||||
got, err := RunningContainerName(ctx, cli, migrateTestWorkspaceID)
|
||||
if err != nil {
|
||||
t.Fatalf("RunningContainerName returned error: %v", err)
|
||||
}
|
||||
if got != "" {
|
||||
t.Fatalf("expected empty name when neither container exists, got %q", got)
|
||||
}
|
||||
}
|
||||
@@ -2,14 +2,19 @@ package provisioner
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/volume"
|
||||
"github.com/docker/docker/client"
|
||||
)
|
||||
|
||||
// TestValidateConfigSource covers issue #17: a workspace restart with no
|
||||
@@ -1422,3 +1427,119 @@ func TestApplyTierConfig_T3_DefaultCap(t *testing.T) {
|
||||
t.Errorf("T3 default NanoCPUs: got %d, want %d", hc.NanoCPUs, wantCPU)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMigrateVolumeIfNeeded_ExistingTruncatedVolume verifies the KI-013 deploy
|
||||
// safety path: when a legacy truncated-name volume already exists, data is
|
||||
// copied to the new full-ID name and the legacy volume is removed. Existing
|
||||
// workspace state is preserved without operator intervention.
|
||||
func TestMigrateVolumeIfNeeded_ExistingTruncatedVolume(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
|
||||
if err != nil {
|
||||
t.Skip("docker client unavailable:", err)
|
||||
}
|
||||
if _, pingErr := cli.Ping(ctx); pingErr != nil {
|
||||
t.Skip("docker daemon unreachable:", pingErr)
|
||||
}
|
||||
|
||||
p := &Provisioner{cli: cli}
|
||||
workspaceID := "test-migrate-" + strconv.FormatInt(time.Now().UnixNano(), 10)
|
||||
legacyName := legacyConfigVolumeName(workspaceID)
|
||||
newName := ConfigVolumeName(workspaceID)
|
||||
|
||||
// Cleanup before and after (defensive — avoid pollution on retries).
|
||||
_ = cli.VolumeRemove(ctx, legacyName, true)
|
||||
_ = cli.VolumeRemove(ctx, newName, true)
|
||||
defer func() {
|
||||
_ = cli.VolumeRemove(ctx, legacyName, true)
|
||||
_ = cli.VolumeRemove(ctx, newName, true)
|
||||
}()
|
||||
|
||||
// 1. Create legacy volume and seed it with a sentinel file.
|
||||
if _, err := cli.VolumeCreate(ctx, volume.CreateOptions{Name: legacyName}); err != nil {
|
||||
t.Fatalf("create legacy volume: %v", err)
|
||||
}
|
||||
seedResp, err := cli.ContainerCreate(ctx, &container.Config{
|
||||
Image: "alpine",
|
||||
Cmd: []string{"sh", "-c", "echo sentinel-data > /vol/sentinel.txt"},
|
||||
}, &container.HostConfig{
|
||||
Binds: []string{legacyName + ":/vol"},
|
||||
}, nil, nil, "")
|
||||
if err != nil {
|
||||
t.Fatalf("create seed container: %v", err)
|
||||
}
|
||||
defer cli.ContainerRemove(ctx, seedResp.ID, container.RemoveOptions{Force: true})
|
||||
if err := cli.ContainerStart(ctx, seedResp.ID, container.StartOptions{}); err != nil {
|
||||
t.Fatalf("start seed container: %v", err)
|
||||
}
|
||||
waitCh, errCh := cli.ContainerWait(ctx, seedResp.ID, container.WaitConditionNotRunning)
|
||||
select {
|
||||
case <-waitCh:
|
||||
case err := <-errCh:
|
||||
if err != nil {
|
||||
t.Fatalf("seed container failed: %v", err)
|
||||
}
|
||||
}
|
||||
// Remove the seed container before migration so the legacy volume is
|
||||
// no longer referenced by any container. The deferred remove above is a
|
||||
// safety net for panic/early-return paths.
|
||||
if err := cli.ContainerRemove(ctx, seedResp.ID, container.RemoveOptions{Force: true}); err != nil {
|
||||
t.Fatalf("remove seed container: %v", err)
|
||||
}
|
||||
|
||||
// 2. Run migration.
|
||||
if err := p.migrateVolumeIfNeeded(ctx, newName, legacyName); err != nil {
|
||||
t.Fatalf("migrateVolumeIfNeeded failed: %v", err)
|
||||
}
|
||||
|
||||
// 3. Legacy volume must be gone.
|
||||
if _, inspectErr := cli.VolumeInspect(ctx, legacyName); inspectErr == nil {
|
||||
t.Fatalf("legacy volume %s still exists after migration", legacyName)
|
||||
}
|
||||
|
||||
// 4. New volume must exist and contain the sentinel file.
|
||||
if _, inspectErr := cli.VolumeInspect(ctx, newName); inspectErr != nil {
|
||||
t.Fatalf("new volume %s does not exist after migration: %v", newName, inspectErr)
|
||||
}
|
||||
|
||||
readResp, err := cli.ContainerCreate(ctx, &container.Config{
|
||||
Image: "alpine",
|
||||
Cmd: []string{"cat", "/vol/sentinel.txt"},
|
||||
}, &container.HostConfig{
|
||||
Binds: []string{newName + ":/vol"},
|
||||
}, nil, nil, "")
|
||||
if err != nil {
|
||||
t.Fatalf("create read container: %v", err)
|
||||
}
|
||||
defer cli.ContainerRemove(ctx, readResp.ID, container.RemoveOptions{Force: true})
|
||||
if err := cli.ContainerStart(ctx, readResp.ID, container.StartOptions{}); err != nil {
|
||||
t.Fatalf("start read container: %v", err)
|
||||
}
|
||||
waitCh, errCh = cli.ContainerWait(ctx, readResp.ID, container.WaitConditionNotRunning)
|
||||
select {
|
||||
case <-waitCh:
|
||||
case err := <-errCh:
|
||||
if err != nil {
|
||||
t.Fatalf("read container failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
logs, err := cli.ContainerLogs(ctx, readResp.ID, container.LogsOptions{ShowStdout: true})
|
||||
if err != nil {
|
||||
t.Fatalf("read container logs: %v", err)
|
||||
}
|
||||
defer logs.Close()
|
||||
data, err := io.ReadAll(logs)
|
||||
if err != nil {
|
||||
t.Fatalf("read logs: %v", err)
|
||||
}
|
||||
if !strings.Contains(string(data), "sentinel-data") {
|
||||
t.Fatalf("new volume missing sentinel data; logs: %q", data)
|
||||
}
|
||||
|
||||
// 5. Idempotency: second migration must be a no-op.
|
||||
if err := p.migrateVolumeIfNeeded(ctx, newName, legacyName); err != nil {
|
||||
t.Fatalf("second migration (idempotency) failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user