molecule-core/workspace-server/internal/provisioner/provisioner.go
Hongming Wang 0de67cd379 feat(platform/admin): /admin/workspace-images/refresh + Docker SDK + GHCR auth
The production-side end of the runtime CD chain. Operators (or the post-
publish CI workflow) hit this after a runtime release to pull the latest
workspace-template-* images from GHCR and recreate any running ws-* containers
so they adopt the new image. Without this, freshly-published runtime sat in
the registry but containers kept the old image until naturally cycled.

Implementation notes:
- Uses Docker SDK ImagePull rather than shelling out to docker CLI — the
  alpine platform container has no docker CLI installed.
- ghcrAuthHeader() reads GHCR_USER + GHCR_TOKEN env, builds the base64-
  encoded JSON payload Docker engine expects in PullOptions.RegistryAuth.
  Both empty → public/cached images only; both set → private GHCR pulls.
- Container matching uses ContainerInspect (NOT ContainerList) because
  ContainerList returns the resolved digest in .Image, not the human tag.
  Inspect surfaces .Config.Image which is what we need.
- Provisioner.DefaultImagePlatform() exported so admin handler picks the
  same Apple-Silicon-needs-amd64 platform as the provisioner — single
  source of truth for the multi-arch override.

Local-dev companion: scripts/refresh-workspace-images.sh runs on the
host and inherits the host's docker keychain auth — alternate path for
when GHCR_USER/TOKEN aren't set in the platform env.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
2026-04-26 10:17:21 -07:00

1115 lines
44 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package provisioner manages Docker container lifecycle for workspace agents.
package provisioner
import (
"archive/tar"
"bytes"
"context"
"errors"
"fmt"
"io"
"log"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
"github.com/docker/docker/api/types/container"
dockerimage "github.com/docker/docker/api/types/image"
"github.com/docker/docker/api/types/network"
"github.com/docker/docker/api/types/volume"
"github.com/docker/docker/client"
"github.com/docker/go-connections/nat"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
// ErrNoBackend is returned by lifecycle methods (Stop, IsRunning) when
// the receiver is zero-valued — i.e. no Docker daemon connection has
// been wired up. The orphan sweeper and the contract-test scaffolding
// both speculatively call these methods on a possibly-nil receiver;
// returning a typed error rather than panicking lets callers reason
// about the case explicitly. See docs/architecture/backends.md
// drift-risk #6.
var ErrNoBackend = errors.New("provisioner: no backend configured (zero-valued receiver)")
// RuntimeImages maps runtime names to their Docker image refs on GHCR.
// Each standalone template repo publishes its image via the reusable
// publish-template-image workflow in molecule-ci on every main merge.
// The provisioner pulls these on demand (see ensureImageLocal) — no
// pre-build step on the tenant host.
//
// Legacy local-build path (`docker build -t workspace-template:<runtime>`
// via scripts/build-images.sh) is still supported for development:
// when a bare `workspace-template:<runtime>` image is present locally,
// Docker's image resolver matches it before any pull is attempted. Set
// the env var WORKSPACE_IMAGE_LOCAL_OVERRIDE=1 (enforced by callers) to
// short-circuit pulls entirely if needed.
var RuntimeImages = map[string]string{
"langgraph": "ghcr.io/molecule-ai/workspace-template-langgraph:latest",
"claude-code": "ghcr.io/molecule-ai/workspace-template-claude-code:latest",
"openclaw": "ghcr.io/molecule-ai/workspace-template-openclaw:latest",
"deepagents": "ghcr.io/molecule-ai/workspace-template-deepagents:latest",
"crewai": "ghcr.io/molecule-ai/workspace-template-crewai:latest",
"autogen": "ghcr.io/molecule-ai/workspace-template-autogen:latest",
"hermes": "ghcr.io/molecule-ai/workspace-template-hermes:latest", // Hermes (Nous Research) — real hermes-agent behind A2A bridge
"gemini-cli": "ghcr.io/molecule-ai/workspace-template-gemini-cli:latest", // Google Gemini CLI
}
const (
// DefaultImage is the fallback workspace Docker image (langgraph is the most common runtime).
DefaultImage = "ghcr.io/molecule-ai/workspace-template-langgraph:latest"
// NOTE: Every runtime MUST have an entry in RuntimeImages above. If a runtime is missing,
// it falls back to DefaultImage which may have wrong deps. Add new runtimes to both
// RuntimeImages AND create the standalone template repo.
// DefaultNetwork is the Docker network workspaces join.
DefaultNetwork = "molecule-monorepo-net"
// DefaultPort is the port the A2A server listens on inside the container.
DefaultPort = "8000"
// ProvisionTimeout is how long to wait for first heartbeat before marking as failed.
ProvisionTimeout = 3 * time.Minute
)
// WorkspaceConfig holds the parameters needed to provision a workspace container.
type WorkspaceConfig struct {
WorkspaceID string
TemplatePath string // Host path to template dir to copy from (e.g. claude-code-default/)
ConfigFiles map[string][]byte // Generated config files to write into /configs volume
PluginsPath string // Host path to plugins directory (mounted at /plugins)
WorkspacePath string // Host path to bind-mount as /workspace (if empty, uses Docker named volume)
Tier int
Runtime string // "langgraph" (default) or "claude-code", "codex", "ollama", "custom"
EnvVars map[string]string // Additional env vars (API keys, etc.)
PlatformURL string
AwarenessURL string
AwarenessNamespace string
WorkspaceAccess string // #65: "none" (default), "read_only", or "read_write"
ResetClaudeSession bool // #12: if true, discard the claude-sessions volume before start (fresh session dir)
}
// Workspace-access constants for #65. Matches the CHECK constraint on
// the workspaces.workspace_access column (migration 019).
const (
WorkspaceAccessNone = "none"
WorkspaceAccessReadOnly = "read_only"
WorkspaceAccessReadWrite = "read_write"
)
// ConfigVolumeName returns the Docker named volume for a workspace's configs.
func ConfigVolumeName(workspaceID string) string {
id := workspaceID
if len(id) > 12 {
id = id[:12]
}
return fmt.Sprintf("ws-%s-configs", id)
}
// ClaudeSessionVolumeName returns the Docker named volume for a workspace's
// Claude Code session directory (/root/.claude/sessions). Separate from the
// config volume so it can be discarded independently (via WORKSPACE_RESET_SESSION
// or ?reset=true) without wiping the user's config. Issue #12.
func ClaudeSessionVolumeName(workspaceID string) string {
id := workspaceID
if len(id) > 12 {
id = id[:12]
}
return fmt.Sprintf("ws-%s-claude-sessions", id)
}
// Provisioner manages Docker containers for workspace agents.
type Provisioner struct {
cli *client.Client
}
// New creates a new Provisioner connected to the local Docker daemon.
func New() (*Provisioner, error) {
cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
if err != nil {
return nil, fmt.Errorf("failed to connect to Docker: %w", err)
}
return &Provisioner{cli: cli}, nil
}
// ContainerName returns the Docker container name for a workspace.
func ContainerName(workspaceID string) string {
id := workspaceID
if len(id) > 12 {
id = id[:12]
}
return fmt.Sprintf("ws-%s", id)
}
// InternalURL returns the Docker-internal URL for a workspace container.
func InternalURL(workspaceID string) string {
return fmt.Sprintf("http://%s:%s", ContainerName(workspaceID), DefaultPort)
}
// Start provisions and starts a workspace container.
func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, error) {
name := ContainerName(cfg.WorkspaceID)
configVolume := ConfigVolumeName(cfg.WorkspaceID)
// Create named volume for configs (idempotent — no-op if already exists)
_, err := p.cli.VolumeCreate(ctx, volume.CreateOptions{
Name: configVolume,
})
if err != nil {
return "", fmt.Errorf("failed to create config volume %s: %w", configVolume, err)
}
log.Printf("Provisioner: config volume %s ready", configVolume)
env := buildContainerEnv(cfg)
// Select image based on runtime (each adapter has its own pre-built image)
image := DefaultImage
if cfg.Runtime != "" {
if img, ok := RuntimeImages[cfg.Runtime]; ok {
image = img
}
}
containerCfg := &container.Config{
Image: image,
Env: env,
ExposedPorts: nat.PortSet{
nat.Port(DefaultPort + "/tcp"): {},
},
}
// Host config with volume mounts. #65: workspace_access controls whether
// a bind-mount is read-only (:ro) or read-write. Default "none" implies
// isolated volume; "read_only"/"read_write" require WorkspacePath set
// (validated at the handler layer before we get here).
workspaceMount := buildWorkspaceMount(cfg)
log.Printf("Provisioner: workspace mount = %q (access=%q)", workspaceMount, cfg.WorkspaceAccess)
// Mount configs as read-write named volume (agent and Files API need to write)
// Plugins are installed per-workspace into /configs/plugins/ via the platform API.
// No global /plugins mount — each workspace owns its plugin set.
configMount := fmt.Sprintf("%s:/configs", configVolume)
binds := []string{
configMount,
workspaceMount,
}
// #12: Preserve Claude Code session directory across restarts.
// The claude-code SDK stores conversations in /root/.claude/sessions/
// and Postgres keeps current_session_id. Without a persistent volume,
// restarts drop the session file and the SDK dies with
// "No conversation found with session ID: <uuid>".
//
// Only mount for runtime=claude-code (other runtimes don't use the path).
// Opt-out: ResetClaudeSession or env WORKSPACE_RESET_SESSION=1 → we
// remove the existing volume before recreating it, so the agent
// boots with a clean session dir.
if cfg.Runtime == "claude-code" {
claudeSessionsVolume := ClaudeSessionVolumeName(cfg.WorkspaceID)
resetEnv, _ := strconv.ParseBool(cfg.EnvVars["WORKSPACE_RESET_SESSION"])
if cfg.ResetClaudeSession || resetEnv {
if rmErr := p.cli.VolumeRemove(ctx, claudeSessionsVolume, true); rmErr != nil {
log.Printf("Provisioner: claude-sessions volume reset warning for %s: %v", claudeSessionsVolume, rmErr)
} else {
log.Printf("Provisioner: claude-sessions volume %s reset (fresh session)", claudeSessionsVolume)
}
}
if _, cvErr := p.cli.VolumeCreate(ctx, volume.CreateOptions{Name: claudeSessionsVolume}); cvErr != nil {
return "", fmt.Errorf("failed to create claude-sessions volume %s: %w", claudeSessionsVolume, cvErr)
}
binds = append(binds, fmt.Sprintf("%s:/root/.claude/sessions", claudeSessionsVolume))
log.Printf("Provisioner: claude-sessions volume %s mounted at /root/.claude/sessions", claudeSessionsVolume)
}
hostCfg := &container.HostConfig{
Binds: binds,
RestartPolicy: container.RestartPolicy{Name: "unless-stopped"},
PortBindings: nat.PortMap{
nat.Port(DefaultPort + "/tcp"): []nat.PortBinding{
{HostIP: "127.0.0.1", HostPort: ""}, // Ephemeral host port
},
},
}
// Apply tier-based container configuration
ApplyTierConfig(hostCfg, cfg, configMount, name)
// Network config — join molecule-monorepo-net with container name as alias
networkCfg := &network.NetworkingConfig{
EndpointsConfig: map[string]*network.EndpointSettings{
DefaultNetwork: {
Aliases: []string{name},
},
},
}
// Ensure no stale container exists with the same name (race with restart policy)
_ = p.cli.ContainerRemove(ctx, name, container.RemoveOptions{Force: true})
// Resolve the target image platform once so the pull and the
// container-create use the same value. On an Apple Silicon dev
// laptop the GHCR workspace-template-* images only ship a
// linux/amd64 manifest today; without an explicit platform the
// daemon asks for linux/arm64/v8 and ImagePull returns
// "no matching manifest for linux/arm64/v8 in the manifest list
// entries". Forcing linux/amd64 lets Docker Desktop run them
// under QEMU emulation (slow but functional — unblocks local
// dev + Canvas smoke-testing on M-series Macs). See issue #1875.
imgPlatformStr := defaultImagePlatform()
imgPlatform := parseOCIPlatform(imgPlatformStr)
// Log image resolution for debugging stale-image issues, and pull from
// GHCR on miss so tenant hosts don't need a pre-build step anymore.
// The pull is best-effort: if it fails (network, auth, rate limit) the
// subsequent ContainerCreate still surfaces the actionable error below.
imgInspect, _, imgErr := p.cli.ImageInspectWithRaw(ctx, image)
if imgErr == nil {
log.Printf("Provisioner: creating %s from image %s (ID: %s, created: %s)",
name, image, imgInspect.ID[:19], imgInspect.Created[:19])
} else {
if imgPlatformStr != "" {
log.Printf("Provisioner: image %s not present locally (%v) — attempting pull (platform=%s)", image, imgErr, imgPlatformStr)
} else {
log.Printf("Provisioner: image %s not present locally (%v) — attempting pull", image, imgErr)
}
if perr := pullImageAndDrain(ctx, p.cli, image, imgPlatformStr); perr != nil {
log.Printf("Provisioner: image pull for %s failed: %v (falling through to create)", image, perr)
} else {
log.Printf("Provisioner: pulled %s", image)
}
}
// Create and start container. If the image still isn't available,
// Docker returns a generic "No such image" error that's opaque to
// operators — wrap it with the resolved tag and the exact pull
// command so last_sample_error surfaces something actionable. Issue #117.
resp, err := p.cli.ContainerCreate(ctx, containerCfg, hostCfg, networkCfg, imgPlatform, name)
if err != nil {
if isImageNotFoundErr(err) {
return "", fmt.Errorf(
"docker image %q not found after pull attempt — verify GHCR visibility for %s and that the tenant has internet access (underlying error: %w)",
image, runtimeTagFromImage(image), err,
)
}
return "", fmt.Errorf("failed to create container: %w", err)
}
if err := p.cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil {
// Clean up created container on start failure
_ = p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
return "", fmt.Errorf("failed to start container: %w", err)
}
// Verify the started container uses the expected image
if startedInfo, siErr := p.cli.ContainerInspect(ctx, resp.ID); siErr == nil {
log.Printf("Provisioner: started container %s (image: %s)", name, startedInfo.Image[:19])
}
// Volume ownership is fixed by the entrypoint (starts as root, chowns
// /configs and /workspace, then drops to agent via gosu). No per-start
// chown needed here.
// Copy template files into /configs if TemplatePath is set
if cfg.TemplatePath != "" {
if err := p.CopyTemplateToContainer(ctx, resp.ID, cfg.TemplatePath); err != nil {
log.Printf("Provisioner: warning — failed to copy template to container %s: %v", name, err)
}
}
// Write generated config files into /configs if ConfigFiles is set
if len(cfg.ConfigFiles) > 0 {
if err := p.WriteFilesToContainer(ctx, resp.ID, cfg.ConfigFiles); err != nil {
log.Printf("Provisioner: warning — failed to write config files to container %s: %v", name, err)
}
}
// Resolve the host-mapped port. Retry inspect up to 3 times if Docker hasn't
// bound the ephemeral port yet (rare race under heavy load).
hostURL := InternalURL(cfg.WorkspaceID) // fallback to Docker-internal
for attempt := 0; attempt < 3; attempt++ {
info, inspectErr := p.cli.ContainerInspect(ctx, resp.ID)
if inspectErr != nil {
break
}
portBindings := info.NetworkSettings.Ports[nat.Port(DefaultPort+"/tcp")]
if len(portBindings) > 0 {
hostPort := portBindings[0].HostPort
hostIP := portBindings[0].HostIP
if hostIP == "" {
hostIP = "127.0.0.1"
}
hostURL = fmt.Sprintf("http://%s:%s", hostIP, hostPort)
break
}
if attempt < 2 {
time.Sleep(500 * time.Millisecond) // wait for Docker to bind the port
}
}
log.Printf("Provisioner: started container %s for workspace %s at %s (internal: %s)", name, cfg.WorkspaceID, hostURL, InternalURL(cfg.WorkspaceID))
return hostURL, nil
}
// buildWorkspaceMount returns the Docker volume spec for /workspace (#65).
//
// Selection matrix:
//
// cfg.WorkspacePath | cfg.WorkspaceAccess | mount
// ------------------+-------------------------+--------------------------------
// "" | "" / "none" | <named-volume>:/workspace (isolated, current default)
// "<host-dir>" | "" / "read_write" | <host-dir>:/workspace (current PM behaviour)
// "<host-dir>" | "read_only" | <host-dir>:/workspace:ro (research agents get read access without write risk)
// "" | "read_only"/"read_write"| <named-volume>:/workspace (degraded — access requires a mount; validated at handler layer)
//
// Kept pure + side-effect-free so it's unit-testable.
func buildWorkspaceMount(cfg WorkspaceConfig) string {
// Named volume when no host path is configured.
if cfg.WorkspacePath == "" {
volumeName := fmt.Sprintf("ws-%s-workspace", cfg.WorkspaceID)
return fmt.Sprintf("%s:/workspace", volumeName)
}
// Host bind mount. Append :ro for read-only mode; otherwise default
// (implicit read-write). "none" explicitly opts out of the mount
// even when a path is set.
if cfg.WorkspaceAccess == WorkspaceAccessNone {
volumeName := fmt.Sprintf("ws-%s-workspace", cfg.WorkspaceID)
return fmt.Sprintf("%s:/workspace", volumeName)
}
if cfg.WorkspaceAccess == WorkspaceAccessReadOnly {
return fmt.Sprintf("%s:/workspace:ro", cfg.WorkspacePath)
}
return fmt.Sprintf("%s:/workspace", cfg.WorkspacePath)
}
// ValidateWorkspaceAccess checks that a (access, path) pair is consistent.
// Returns a clear error on mismatch so the handler layer can reject bad
// payloads with a 400 before provisioning.
//
// - read_only / read_write with empty path → error (needs a host dir)
// - unknown access value → error
// - none / "" → always valid
func ValidateWorkspaceAccess(access, workspacePath string) error {
switch access {
case "", WorkspaceAccessNone:
return nil
case WorkspaceAccessReadOnly, WorkspaceAccessReadWrite:
if workspacePath == "" {
return fmt.Errorf("workspace_access=%q requires workspace_dir to be set", access)
}
return nil
default:
return fmt.Errorf("workspace_access=%q — must be 'none', 'read_only', or 'read_write'", access)
}
}
// buildContainerEnv assembles the initial environment variables injected
// into every workspace container.
//
// - PLATFORM_URL: canonical env var the workspace runtime reads for
// heartbeat / register / A2A proxy.
// - MOLECULE_URL: canonical env var the Molecule AI MCP client reads
// (mcp-server/src/index.ts). Injecting it at provision time so
// mcp__molecule__* tools called FROM inside the agent container
// reach the host platform instead of localhost:8080 (which is the
// container itself). Fixes #67.
//
// Extracted from Start() so it's unit-testable without standing up a
// Docker daemon.
func buildContainerEnv(cfg WorkspaceConfig) []string {
env := []string{
fmt.Sprintf("WORKSPACE_ID=%s", cfg.WorkspaceID),
"WORKSPACE_CONFIG_PATH=/configs",
fmt.Sprintf("PLATFORM_URL=%s", cfg.PlatformURL),
fmt.Sprintf("MOLECULE_URL=%s", cfg.PlatformURL),
fmt.Sprintf("TIER=%d", cfg.Tier),
"PLUGINS_DIR=/plugins",
// PYTHONPATH=/app makes ADAPTER_MODULE imports resolve regardless of
// runtime cwd. Standalone workspace-template repos COPY adapter.py to
// /app and set ENV ADAPTER_MODULE=adapter, but molecule-runtime is a
// pip console_script entry point so cwd isn't on sys.path automatically.
// Setting PYTHONPATH from the provisioner fixes every adapter image
// (claude-code, hermes, langgraph, …) without needing to PR each
// standalone template repo. Per-template ENV in the Dockerfile can
// still override (Dockerfile ENV is overridden by docker -e at runtime).
"PYTHONPATH=/app",
}
if cfg.AwarenessNamespace != "" && cfg.AwarenessURL != "" {
env = append(env, fmt.Sprintf("AWARENESS_NAMESPACE=%s", cfg.AwarenessNamespace))
env = append(env, fmt.Sprintf("AWARENESS_URL=%s", cfg.AwarenessURL))
}
for k, v := range cfg.EnvVars {
env = append(env, fmt.Sprintf("%s=%s", k, v))
}
return env
}
// Per-tier resource defaults. Configurable via TIERn_MEMORY_MB and
// TIERn_CPU_SHARES env vars (n in {2,3,4}). CPU shares follow the convention
// 1024 shares == 1 CPU; internally translated to NanoCPUs for a hard cap.
//
// Defaults reflect the tier sizing agreed in issue #14:
// - T2: 512 MiB, 1024 shares (1 CPU) — unchanged historical default
// - T3: 2048 MiB, 2048 shares (2 CPU) — new cap (previously uncapped)
// - T4: 4096 MiB, 4096 shares (4 CPU) — new cap (previously uncapped)
const (
defaultTier2MemoryMB = 512
defaultTier2CPUShares = 1024
defaultTier3MemoryMB = 2048
defaultTier3CPUShares = 2048
defaultTier4MemoryMB = 4096
defaultTier4CPUShares = 4096
)
// getTierMemoryMB returns the memory cap (MiB) for the given tier, reading
// TIERn_MEMORY_MB env var with fallback to the hardcoded default. Returns 0
// for tiers with no cap (e.g. tier 1).
func getTierMemoryMB(tier int) int64 {
var def int64
switch tier {
case 2:
def = defaultTier2MemoryMB
case 3:
def = defaultTier3MemoryMB
case 4:
def = defaultTier4MemoryMB
default:
return 0
}
if v := os.Getenv(fmt.Sprintf("TIER%d_MEMORY_MB", tier)); v != "" {
if n, err := strconv.ParseInt(v, 10, 64); err == nil && n > 0 {
return n
}
}
return def
}
// getTierCPUShares returns the CPU allocation (shares, where 1024 == 1 CPU)
// for the given tier, reading TIERn_CPU_SHARES env var with fallback to the
// hardcoded default. Returns 0 for tiers with no cap.
func getTierCPUShares(tier int) int64 {
var def int64
switch tier {
case 2:
def = defaultTier2CPUShares
case 3:
def = defaultTier3CPUShares
case 4:
def = defaultTier4CPUShares
default:
return 0
}
if v := os.Getenv(fmt.Sprintf("TIER%d_CPU_SHARES", tier)); v != "" {
if n, err := strconv.ParseInt(v, 10, 64); err == nil && n > 0 {
return n
}
}
return def
}
// applyTierResources writes Memory + NanoCPUs to hostCfg from the tier's
// configured limits (env override or default). Returns the resolved values
// for logging.
func applyTierResources(hostCfg *container.HostConfig, tier int) (memMB, cpuShares int64) {
memMB = getTierMemoryMB(tier)
cpuShares = getTierCPUShares(tier)
if memMB > 0 {
hostCfg.Resources.Memory = memMB * 1024 * 1024
}
if cpuShares > 0 {
// shares -> NanoCPUs: 1024 shares == 1 CPU == 1e9 NanoCPUs
hostCfg.Resources.NanoCPUs = (cpuShares * 1_000_000_000) / 1024
}
return memMB, cpuShares
}
// ApplyTierConfig configures a HostConfig based on the workspace tier.
// Extracted from Start() so it can be tested independently.
//
// - Tier 1 (Sandboxed): readonly rootfs, tmpfs /tmp, strip /workspace mount
// - Tier 2 (Standard): resource limits (default 512 MiB, 1 CPU)
// - Tier 3 (Privileged): privileged + host PID, Docker network, capped resources
// - Tier 4 (Full access): privileged, host PID, host network, Docker socket, capped resources
//
// Per-tier memory/CPU caps are overridable via TIERn_MEMORY_MB /
// TIERn_CPU_SHARES env vars (n in {2,3,4}).
//
// Unknown/zero tiers default to Tier 2 behavior (safe resource-limited container).
func ApplyTierConfig(hostCfg *container.HostConfig, cfg WorkspaceConfig, configMount, name string) {
switch cfg.Tier {
case 1:
// Sandboxed: strip /workspace mount, keep only config (plugins are in /configs/plugins/)
tier1Binds := []string{configMount}
hostCfg.Binds = tier1Binds
// Readonly root filesystem with tmpfs for /tmp (agent needs scratch space)
hostCfg.ReadonlyRootfs = true
hostCfg.Tmpfs = map[string]string{
"/tmp": "rw,noexec,nosuid,size=64m",
}
log.Printf("Provisioner: T1 sandboxed mode for %s (readonly, no /workspace)", name)
case 3:
// Privileged access: privileged mode + host PID.
// Keep the Docker network (not host network) so containers can still reach
// each other by name. Host networking conflicts with Docker networks and
// causes port collisions when multiple T3 containers run simultaneously.
hostCfg.Privileged = true
hostCfg.PidMode = "host"
memMB, shares := applyTierResources(hostCfg, 3)
log.Printf("Provisioner: T3 privileged mode for %s (privileged, host PID, %dm memory, %d CPU shares)", name, memMB, shares)
case 4:
// Full host access: everything from T3 + host network + Docker socket + all capabilities.
// Use for workspaces that need to manage other containers or access host services directly.
hostCfg.Privileged = true
hostCfg.PidMode = "host"
hostCfg.NetworkMode = "host"
// Mount Docker socket so workspace can manage containers
hostCfg.Binds = append(hostCfg.Binds, "/var/run/docker.sock:/var/run/docker.sock")
memMB, shares := applyTierResources(hostCfg, 4)
log.Printf("Provisioner: T4 full-host mode for %s (privileged, host PID, host network, docker socket, %dm memory, %d CPU shares)", name, memMB, shares)
default:
// Tier 2 (Standard) and unknown tiers: normal container with resource limits.
// This is the safe default — no privileged access, reasonable resource caps.
memMB, shares := applyTierResources(hostCfg, 2)
log.Printf("Provisioner: T2 standard mode for %s (%dm memory, %d CPU shares)", name, memMB, shares)
}
}
// CopyTemplateToContainer copies files from a host directory into /configs in the container.
func (p *Provisioner) CopyTemplateToContainer(ctx context.Context, containerID, templatePath string) error {
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
err := filepath.Walk(templatePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
rel, err := filepath.Rel(templatePath, path)
if err != nil {
return err
}
if rel == "." {
return nil
}
header, err := tar.FileInfoHeader(info, "")
if err != nil {
return err
}
header.Name = rel
if err := tw.WriteHeader(header); err != nil {
return err
}
if !info.IsDir() {
data, err := os.ReadFile(path)
if err != nil {
return err
}
// Strip CRLF from shell scripts and Python files. Windows
// git checkout introduces \r\n even with .gitattributes eol=lf;
// Linux containers choke on \r in shebangs and Python path args.
// This is the single fix point — every file that enters a
// container passes through CopyTemplateToContainer.
ext := filepath.Ext(path)
if ext == ".sh" || ext == ".py" || ext == ".md" {
data = bytes.ReplaceAll(data, []byte("\r\n"), []byte("\n"))
}
header.Size = int64(len(data))
if _, err := tw.Write(data); err != nil {
return err
}
}
return nil
})
if err != nil {
return fmt.Errorf("failed to create tar from %s: %w", templatePath, err)
}
if err := tw.Close(); err != nil {
return fmt.Errorf("failed to close tar writer: %w", err)
}
return p.cli.CopyToContainer(ctx, containerID, "/configs", &buf, container.CopyToContainerOptions{})
}
// WriteFilesToContainer writes in-memory files into /configs in the container.
func (p *Provisioner) WriteFilesToContainer(ctx context.Context, containerID string, files map[string][]byte) error {
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
createdDirs := map[string]bool{}
for name, data := range files {
// Create parent directories in tar (deduplicated)
dir := filepath.Dir(name)
if dir != "." && !createdDirs[dir] {
if err := tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeDir,
Name: dir + "/",
Mode: 0755,
}); err != nil {
return fmt.Errorf("failed to write tar dir header for %s: %w", dir, err)
}
createdDirs[dir] = true
}
header := &tar.Header{
Name: name,
Mode: 0644,
Size: int64(len(data)),
}
if err := tw.WriteHeader(header); err != nil {
return fmt.Errorf("failed to write tar header for %s: %w", name, err)
}
if _, err := tw.Write(data); err != nil {
return fmt.Errorf("failed to write tar data for %s: %w", name, err)
}
}
if err := tw.Close(); err != nil {
return fmt.Errorf("failed to close tar writer: %w", err)
}
return p.cli.CopyToContainer(ctx, containerID, "/configs", &buf, container.CopyToContainerOptions{})
}
// CopyToContainer exposes CopyToContainer from the Docker client for use by other packages.
func (p *Provisioner) CopyToContainer(ctx context.Context, containerID, dstPath string, content io.Reader) error {
return p.cli.CopyToContainer(ctx, containerID, dstPath, content, container.CopyToContainerOptions{})
}
// ExecRead runs "cat <filePath>" in an existing container and returns the output.
// Used to read config files from a running container before stopping it.
func (p *Provisioner) ExecRead(ctx context.Context, containerName, filePath string) ([]byte, error) {
exec, err := p.cli.ContainerExecCreate(ctx, containerName, container.ExecOptions{
Cmd: []string{"cat", filePath},
AttachStdout: true,
})
if err != nil {
return nil, err
}
attach, err := p.cli.ContainerExecAttach(ctx, exec.ID, container.ExecAttachOptions{})
if err != nil {
return nil, err
}
defer attach.Close()
data, err := io.ReadAll(attach.Reader)
if err != nil {
return nil, err
}
// Docker multiplexed stream: strip 8-byte headers
var clean []byte
for len(data) >= 8 {
size := int(data[4])<<24 | int(data[5])<<16 | int(data[6])<<8 | int(data[7])
if 8+size > len(data) {
break
}
clean = append(clean, data[8:8+size]...)
data = data[8+size:]
}
return clean, nil
}
// ReadFromVolume reads a file from a Docker named volume using a throwaway container.
// Used as a fallback when ExecRead fails (container already stopped).
func (p *Provisioner) ReadFromVolume(ctx context.Context, volumeName, filePath string) ([]byte, error) {
resp, err := p.cli.ContainerCreate(ctx, &container.Config{
Image: "alpine",
Cmd: []string{"cat", "/vol/" + filePath},
}, &container.HostConfig{
Binds: []string{volumeName + ":/vol:ro"},
}, nil, nil, "")
if err != nil {
return nil, err
}
defer p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
if err := p.cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil {
return nil, err
}
waitCh, errCh := p.cli.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning)
select {
case <-waitCh:
case err := <-errCh:
if err != nil {
return nil, err
}
}
reader, err := p.cli.ContainerLogs(ctx, resp.ID, container.LogsOptions{ShowStdout: true})
if err != nil {
return nil, err
}
defer reader.Close()
data, err := io.ReadAll(reader)
if err != nil {
return nil, err
}
// Strip Docker multiplexed stream headers
var clean []byte
for len(data) >= 8 {
size := int(data[4])<<24 | int(data[5])<<16 | int(data[6])<<8 | int(data[7])
if 8+size > len(data) {
break
}
clean = append(clean, data[8:8+size]...)
data = data[8+size:]
}
return clean, nil
}
// WriteAuthTokenToVolume writes the workspace auth token into the config volume
// BEFORE the container starts, eliminating the token-injection race window where
// a restarted container could read a stale token from /configs/.auth_token before
// WriteFilesToContainer writes the new one. Issue #1877.
//
// Uses a throwaway alpine container to write directly to the named volume,
// bypassing the container lifecycle entirely.
func (p *Provisioner) WriteAuthTokenToVolume(ctx context.Context, workspaceID, token string) error {
volName := ConfigVolumeName(workspaceID)
resp, err := p.cli.ContainerCreate(ctx, &container.Config{
Image: "alpine",
Cmd: []string{"sh", "-c", "mkdir -p /vol && printf '%s' $TOKEN > /vol/.auth_token && chmod 0600 /vol/.auth_token"},
Env: []string{"TOKEN=" + token},
}, &container.HostConfig{
Binds: []string{volName + ":/vol"},
}, nil, nil, "")
if err != nil {
return fmt.Errorf("failed to create token-write container: %w", err)
}
defer p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
if err := p.cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil {
return fmt.Errorf("failed to start token-write container: %w", err)
}
waitCh, errCh := p.cli.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning)
select {
case <-waitCh:
case writeErr := <-errCh:
if writeErr != nil {
return fmt.Errorf("token-write container exited with error: %w", writeErr)
}
}
log.Printf("Provisioner: wrote auth token to volume %s/.auth_token", volName)
return nil
}
// execInContainer runs a command inside a running container as root.
// Best-effort: logs errors but does not fail the caller.
func (p *Provisioner) execInContainer(ctx context.Context, containerID string, cmd []string) {
execCfg := container.ExecOptions{Cmd: cmd, User: "root"}
execID, err := p.cli.ContainerExecCreate(ctx, containerID, execCfg)
if err != nil {
log.Printf("Provisioner: exec create failed: %v", err)
return
}
if err := p.cli.ContainerExecStart(ctx, execID.ID, container.ExecStartOptions{}); err != nil {
log.Printf("Provisioner: exec start failed: %v", err)
}
}
// RemoveVolume removes the config volume for a workspace.
// Also removes the claude-sessions volume (best-effort, may not exist
// for non claude-code runtimes). Issue #12.
func (p *Provisioner) RemoveVolume(ctx context.Context, workspaceID string) error {
volName := ConfigVolumeName(workspaceID)
if err := p.cli.VolumeRemove(ctx, volName, true); err != nil {
return fmt.Errorf("failed to remove volume %s: %w", volName, err)
}
log.Printf("Provisioner: removed config volume %s", volName)
csName := ClaudeSessionVolumeName(workspaceID)
if rmErr := p.cli.VolumeRemove(ctx, csName, true); rmErr != nil {
log.Printf("Provisioner: claude-sessions volume cleanup warning for %s: %v", csName, rmErr)
} else {
log.Printf("Provisioner: removed claude-sessions volume %s", csName)
}
return nil
}
// Stop stops and removes a workspace container.
//
// Uses force-remove FIRST to avoid a race with Docker's `unless-stopped`
// restart policy: if we ContainerStop first, the restart policy can
// respawn the container before ContainerRemove runs, leaving a zombie
// that re-registers via heartbeat after deletion.
func (p *Provisioner) Stop(ctx context.Context, workspaceID string) error {
if p == nil || p.cli == nil {
return ErrNoBackend
}
name := ContainerName(workspaceID)
// Force-remove kills and removes in one atomic operation, bypassing
// the restart policy entirely. If the container doesn't exist, the
// error is harmless.
if err := p.cli.ContainerRemove(ctx, name, container.RemoveOptions{Force: true}); err != nil {
// Container may already be gone — log but don't fail.
log.Printf("Provisioner: force-remove warning for %s: %v", name, err)
}
log.Printf("Provisioner: stopped and removed container %s", name)
return nil
}
// IsRunning checks if a workspace container is currently running.
//
// Conservative on transient Docker errors: returns (true, err) for any
// inspect failure OTHER than NotFound. Rationale: the only caller that
// acts destructively on `running=false` is a2a_proxy.maybeMarkContainerDead,
// which tears down + re-provisions the workspace. A Docker daemon hiccup
// (timeout, EOF on the daemon socket, context deadline) is NOT evidence
// that the container died — it's evidence the daemon is momentarily busy.
// The old behaviour collapsed all errors into "container doesn't exist",
// which triggered a restart cascade on 2026-04-16 when 6 containers
// received simultaneous A2A forward failures during a batch delegation;
// the followup reactive IsRunning calls all hit the daemon under load,
// timed out, and flipped every container to "dead" in parallel.
//
// NotFound (container legitimately deleted) is the only case where
// running=false is safe to act on — every other error path stays alive
// so a real crash still surfaces via exec heartbeat or TTL, both of which
// have narrower false-positive windows than daemon-inspect RPC.
func (p *Provisioner) IsRunning(ctx context.Context, workspaceID string) (bool, error) {
if p == nil || p.cli == nil {
return false, ErrNoBackend
}
name := ContainerName(workspaceID)
info, err := p.cli.ContainerInspect(ctx, name)
if err != nil {
if isContainerNotFound(err) {
return false, nil
}
// Transient daemon error: caller treats !running as dead + restarts.
// Returning true + the underlying error preserves the error for
// metrics/logging without triggering the destructive path.
return true, err
}
return info.State.Running, nil
}
// isContainerNotFound returns true when the Docker client indicates the
// named container genuinely does not exist, versus a transient daemon
// error (timeout, socket EOF, context cancellation).
//
// docker/docker v28 uses multiple distinct NotFound shapes depending on
// transport:
// - the typed errdefs.ErrNotFound
// - a wrapped error whose message contains "No such container"
//
// Rather than import errdefs (which would add a transitive dep), we
// match on the error string. String-matching is the exact approach the
// Docker CLI itself uses internally — see the "No such container" check
// in docker/cli — and is stable across daemon versions.
func isContainerNotFound(err error) bool {
if err == nil {
return false
}
s := err.Error()
return strings.Contains(s, "No such container") ||
strings.Contains(s, "not found")
}
// DockerClient returns the underlying Docker client for sharing with other handlers.
func (p *Provisioner) DockerClient() *client.Client {
return p.cli
}
// Close cleans up the Docker client.
func (p *Provisioner) Close() error {
return p.cli.Close()
}
// ValidateConfigSource is a pure check that ensures at least one static
// source of /configs/config.yaml is available before a container starts.
//
// Inputs mirror the fields on WorkspaceConfig:
// - templatePath: host dir expected to contain config.yaml (copied into /configs)
// - configFiles: in-memory files written into /configs at start time
//
// Returns nil if either source will place config.yaml into /configs.
// When both sources are empty, returns ErrNoConfigSource so callers can
// fall through to a volume probe (VolumeHasFile) before giving up.
//
// Used by the platform's provision flow to catch the rogue-restart-loop
// case (#17): a workspace whose config volume was wiped and whose
// auto-restart path passes empty template+configFiles would otherwise
// boot into a FileNotFoundError crash loop under Docker's
// `unless-stopped` restart policy.
func ValidateConfigSource(templatePath string, configFiles map[string][]byte) error {
if templatePath != "" {
// Stat the template's config.yaml; an empty/stale template dir
// without config.yaml is as broken as no template at all.
info, err := os.Stat(filepath.Join(templatePath, "config.yaml"))
if err == nil && !info.IsDir() {
return nil
}
}
if configFiles != nil {
if data, ok := configFiles["config.yaml"]; ok && len(data) > 0 {
return nil
}
}
return ErrNoConfigSource
}
// ErrNoConfigSource is returned by ValidateConfigSource when neither the
// template path nor the in-memory config files supply a config.yaml.
var ErrNoConfigSource = fmt.Errorf("no config.yaml source: template path missing config.yaml and configFiles empty")
// VolumeHasFile returns true if the named config volume for a workspace
// already contains the given file path (relative to /configs). Used by
// the auto-restart path to confirm a previously-provisioned volume is
// still populated before reusing it — if the volume was wiped, we must
// regenerate config or fail cleanly rather than loop on FileNotFoundError.
//
// Implementation: run a throwaway alpine `test -f` container bound to the
// volume read-only. Returns (false, nil) if the file is absent and
// (false, err) only on Docker-level failures.
func (p *Provisioner) VolumeHasFile(ctx context.Context, workspaceID, relPath string) (bool, error) {
volName := ConfigVolumeName(workspaceID)
// Confirm the volume exists first — Docker auto-creates on bind otherwise.
if _, err := p.cli.VolumeInspect(ctx, volName); err != nil {
return false, nil
}
resp, err := p.cli.ContainerCreate(ctx, &container.Config{
Image: "alpine",
Cmd: []string{"test", "-f", "/vol/" + relPath},
}, &container.HostConfig{
Binds: []string{volName + ":/vol:ro"},
}, nil, nil, "")
if err != nil {
return false, err
}
defer p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
if err := p.cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil {
return false, err
}
waitCh, errCh := p.cli.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning)
select {
case w := <-waitCh:
return w.StatusCode == 0, nil
case err := <-errCh:
return false, err
case <-ctx.Done():
return false, ctx.Err()
}
}
// isImageNotFoundErr classifies a Docker client error as "image not
// available locally." The daemon wraps this message in a generic
// SystemError type without exposing a typed sentinel, so we fall back
// to substring match on the known messages emitted by moby. Used by
// Start() to rewrite opaque ContainerCreate failures into actionable
// "run build-all.sh" hints. Issue #117.
func isImageNotFoundErr(err error) bool {
if err == nil {
return false
}
m := strings.ToLower(err.Error())
return strings.Contains(m, "no such image") ||
strings.Contains(m, "not found") && strings.Contains(m, "image")
}
// runtimeTagFromImage extracts the runtime name from a workspace-template
// image reference for use in user-facing error hints. Handles both the
// legacy local tag (`workspace-template:<runtime>`) and the current GHCR
// form (`ghcr.io/molecule-ai/workspace-template-<runtime>:<tag>`). Falls
// back to the full image string if the shape is unrecognised.
func runtimeTagFromImage(image string) string {
const legacyPrefix = "workspace-template:"
if strings.HasPrefix(image, legacyPrefix) {
return image[len(legacyPrefix):]
}
// GHCR form: strip everything before and including "workspace-template-",
// then drop the :<tag> suffix.
const ghcrInfix = "workspace-template-"
if i := strings.Index(image, ghcrInfix); i >= 0 {
rest := image[i+len(ghcrInfix):]
if j := strings.Index(rest, ":"); j >= 0 {
rest = rest[:j]
}
return rest
}
if i := strings.LastIndex(image, ":"); i >= 0 && i < len(image)-1 {
return image[i+1:]
}
return image
}
// dockerImageClient is the subset of the Docker client API used by
// pullImageAndDrain. Declared as an interface so tests can inject a
// fake without spinning up a daemon.
type dockerImageClient interface {
ImagePull(ctx context.Context, ref string, opts dockerimage.PullOptions) (io.ReadCloser, error)
}
// pullImageAndDrain pulls the given image from its registry and drains
// the progress stream to completion. The Docker engine pull API is
// asynchronous — the returned ReadCloser MUST be fully consumed for the
// pull to finish; returning early leaves the daemon mid-pull. We
// discard the progress payload because operators read container logs
// for boot diagnostics, not pull chatter.
//
// `platform` is "os/arch" (e.g. "linux/amd64") when the host needs to
// pull a non-native manifest, or "" to let the daemon pick the default
// for its arch. See defaultImagePlatform for when that matters.
func pullImageAndDrain(ctx context.Context, cli dockerImageClient, ref, platform string) error {
rc, err := cli.ImagePull(ctx, ref, dockerimage.PullOptions{Platform: platform})
if err != nil {
return fmt.Errorf("ImagePull: %w", err)
}
defer rc.Close()
if _, err := io.Copy(io.Discard, rc); err != nil {
return fmt.Errorf("drain pull stream: %w", err)
}
return nil
}
// defaultImagePlatform picks the Docker image platform string used for
// `ImagePull` + `ContainerCreate` on the workspace-template-* images.
//
// Empty result means "use the daemon default" — the common case on
// linux/amd64 hosts (CI, SaaS EC2, Linux dev machines). On Apple Silicon
// the GHCR workspace-template-* images ship a single linux/amd64
// manifest today, so the daemon's native linux/arm64/v8 request misses
// with "no matching manifest". Forcing linux/amd64 pulls the amd64
// manifest and lets Docker Desktop run it under QEMU emulation. Slow
// (25× native) but functional — unblocks local dev on M-series Macs.
//
// Override via MOLECULE_IMAGE_PLATFORM — set to the empty string to
// disable the auto-force, or to a specific value ("linux/amd64",
// "linux/arm64") to pin. SaaS production should leave this unset.
//
// Tracked in issue #1875; remove this fallback once the template repos
// publish multi-arch manifests.
// DefaultImagePlatform is the exported alias used by the admin
// workspace-images handler so its ImagePull picks the same platform as
// the provisioner's. Avoids duplicating the Apple-Silicon-needs-amd64
// logic and keeps both call sites in sync if Docker manifest support
// changes (e.g., when the templates start shipping multi-arch).
func DefaultImagePlatform() string { return defaultImagePlatform() }
func defaultImagePlatform() string {
if v, ok := os.LookupEnv("MOLECULE_IMAGE_PLATFORM"); ok {
return v
}
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
return "linux/amd64"
}
return ""
}
// parseOCIPlatform turns "linux/amd64" into the *ocispec.Platform shape
// `ContainerCreate`'s platform argument expects. "" returns nil, which
// is exactly how the Docker SDK signals "no preference".
func parseOCIPlatform(s string) *ocispec.Platform {
if s == "" {
return nil
}
parts := strings.SplitN(s, "/", 2)
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
return nil
}
return &ocispec.Platform{OS: parts[0], Architecture: parts[1]}
}