feat(provisioner): pull workspace-template images from GHCR

Every standalone workspace-template repo now publishes to
ghcr.io/molecule-ai/workspace-template-<runtime>:latest via the
reusable publish-template-image workflow in molecule-ci (landed
today — one caller per template repo). This PR makes the
provisioner actually use those images:

- RuntimeImages map + DefaultImage switched from bare local tags
  (workspace-template:<runtime>) to their GHCR equivalents.
- New ensureImageLocal step before ContainerCreate: if the image
  isn't present locally, attempt `docker pull` and drain the
  progress stream to completion. Best-effort — if the pull fails
  (network, auth, rate limit) the subsequent ContainerCreate still
  surfaces the actionable "No such image" error, now with a
  GHCR-appropriate hint instead of the defunct
  `bash workspace/build-all.sh <runtime>` advice.
- runtimeTagFromImage now handles both forms: legacy
  `workspace-template:<runtime>` (local dev via build-all.sh /
  rebuild-runtime-images.sh) and the current GHCR shape. Keeps
  error hints sensible in both worlds.
- Tests cover the GHCR path for tag extraction and the new error
  message shape. Legacy local tags still recognised.

Local dev path unchanged — scripts/build-images.sh and
workspace/rebuild-runtime-images.sh still produce locally-tagged
`workspace-template:<runtime>` images, and Docker's image
resolver matches them before any pull is attempted. So
contributors can keep iterating on a template repo without
round-tripping through GHCR.

Follow-on impact:
- hongmingwang.moleculesai.app (and any other tenant EC2) will
  auto-pull `ghcr.io/molecule-ai/workspace-template-hermes:latest`
  on the next hermes workspace provision — picking up the real
  Nous hermes-agent behind the A2A bridge (template-hermes v2.1.0)
  without any tenant-side rebuild step.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-04-22 12:39:56 -07:00
parent a8e4afe863
commit 9df3159c59
2 changed files with 100 additions and 40 deletions

View File

@ -15,33 +15,42 @@ import (
"time"
"github.com/docker/docker/api/types/container"
dockerimage "github.com/docker/docker/api/types/image"
"github.com/docker/docker/api/types/network"
"github.com/docker/docker/api/types/volume"
"github.com/docker/docker/client"
"github.com/docker/go-connections/nat"
)
// RuntimeImages maps runtime names to their Docker image tags.
// Each adapter has its own pre-built image extending workspace-template:base,
// with runtime-specific deps pre-installed for fast startup.
// Build all: workspace/Dockerfile (base), then each adapters/*/Dockerfile.
// RuntimeImages maps runtime names to their Docker image refs on GHCR.
// Each standalone template repo publishes its image via the reusable
// publish-template-image workflow in molecule-ci on every main merge.
// The provisioner pulls these on demand (see ensureImageLocal) — no
// pre-build step on the tenant host.
//
// Legacy local-build path (`docker build -t workspace-template:<runtime>`
// via scripts/build-images.sh) is still supported for development:
// when a bare `workspace-template:<runtime>` image is present locally,
// Docker's image resolver matches it before any pull is attempted. Set
// the env var WORKSPACE_IMAGE_LOCAL_OVERRIDE=1 (enforced by callers) to
// short-circuit pulls entirely if needed.
var RuntimeImages = map[string]string{
"langgraph": "workspace-template:langgraph",
"claude-code": "workspace-template:claude-code",
"openclaw": "workspace-template:openclaw",
"deepagents": "workspace-template:deepagents",
"crewai": "workspace-template:crewai",
"autogen": "workspace-template:autogen",
"hermes": "workspace-template:hermes", // Hermes (NousResearch) — adapter.py in adapters/hermes/
"gemini-cli": "workspace-template:gemini-cli", // Google Gemini CLI — adapters/gemini_cli/Dockerfile
"langgraph": "ghcr.io/molecule-ai/workspace-template-langgraph:latest",
"claude-code": "ghcr.io/molecule-ai/workspace-template-claude-code:latest",
"openclaw": "ghcr.io/molecule-ai/workspace-template-openclaw:latest",
"deepagents": "ghcr.io/molecule-ai/workspace-template-deepagents:latest",
"crewai": "ghcr.io/molecule-ai/workspace-template-crewai:latest",
"autogen": "ghcr.io/molecule-ai/workspace-template-autogen:latest",
"hermes": "ghcr.io/molecule-ai/workspace-template-hermes:latest", // Hermes (Nous Research) — real hermes-agent behind A2A bridge
"gemini-cli": "ghcr.io/molecule-ai/workspace-template-gemini-cli:latest", // Google Gemini CLI
}
const (
// DefaultImage is the fallback workspace Docker image (langgraph is the most common runtime).
DefaultImage = "workspace-template:langgraph"
DefaultImage = "ghcr.io/molecule-ai/workspace-template-langgraph:latest"
// NOTE: Every runtime MUST have an entry in RuntimeImages above. If a runtime is missing,
// it falls back to DefaultImage which may have wrong deps. Add new runtimes to both
// RuntimeImages AND create adapters/<runtime>/Dockerfile.
// RuntimeImages AND create the standalone template repo.
// DefaultNetwork is the Docker network workspaces join.
DefaultNetwork = "molecule-monorepo-net"
@ -227,24 +236,32 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
// Ensure no stale container exists with the same name (race with restart policy)
_ = p.cli.ContainerRemove(ctx, name, container.RemoveOptions{Force: true})
// Log image resolution for debugging stale-image issues
// Log image resolution for debugging stale-image issues, and pull from
// GHCR on miss so tenant hosts don't need a pre-build step anymore.
// The pull is best-effort: if it fails (network, auth, rate limit) the
// subsequent ContainerCreate still surfaces the actionable error below.
imgInspect, _, imgErr := p.cli.ImageInspectWithRaw(ctx, image)
if imgErr == nil {
log.Printf("Provisioner: creating %s from image %s (ID: %s, created: %s)",
name, image, imgInspect.ID[:19], imgInspect.Created[:19])
} else {
log.Printf("Provisioner: creating %s from image %s (inspect failed: %v)", name, image, imgErr)
log.Printf("Provisioner: image %s not present locally (%v) — attempting pull", image, imgErr)
if perr := pullImageAndDrain(ctx, p.cli, image); perr != nil {
log.Printf("Provisioner: image pull for %s failed: %v (falling through to create)", image, perr)
} else {
log.Printf("Provisioner: pulled %s", image)
}
}
// Create and start container. If the image isn't available locally,
// Create and start container. If the image still isn't available,
// Docker returns a generic "No such image" error that's opaque to
// operators — wrap it with the resolved tag and the exact build
// operators — wrap it with the resolved tag and the exact pull
// command so last_sample_error surfaces something actionable. Issue #117.
resp, err := p.cli.ContainerCreate(ctx, containerCfg, hostCfg, networkCfg, nil, name)
if err != nil {
if isImageNotFoundErr(err) {
return "", fmt.Errorf(
"docker image %q not found — run 'bash workspace/build-all.sh %s' to build it (underlying error: %w)",
"docker image %q not found after pull attempt — verify GHCR visibility for %s and that the tenant has internet access (underlying error: %w)",
image, runtimeTagFromImage(image), err,
)
}
@ -924,17 +941,53 @@ func isImageNotFoundErr(err error) bool {
strings.Contains(m, "not found") && strings.Contains(m, "image")
}
// runtimeTagFromImage extracts the runtime tag portion from a
// "workspace-template:<runtime>" image reference for use in
// user-facing build hints. Falls back to the full image string if the
// shape is unrecognised.
// runtimeTagFromImage extracts the runtime name from a workspace-template
// image reference for use in user-facing error hints. Handles both the
// legacy local tag (`workspace-template:<runtime>`) and the current GHCR
// form (`ghcr.io/molecule-ai/workspace-template-<runtime>:<tag>`). Falls
// back to the full image string if the shape is unrecognised.
func runtimeTagFromImage(image string) string {
const prefix = "workspace-template:"
if strings.HasPrefix(image, prefix) {
return image[len(prefix):]
const legacyPrefix = "workspace-template:"
if strings.HasPrefix(image, legacyPrefix) {
return image[len(legacyPrefix):]
}
// GHCR form: strip everything before and including "workspace-template-",
// then drop the :<tag> suffix.
const ghcrInfix = "workspace-template-"
if i := strings.Index(image, ghcrInfix); i >= 0 {
rest := image[i+len(ghcrInfix):]
if j := strings.Index(rest, ":"); j >= 0 {
rest = rest[:j]
}
return rest
}
if i := strings.LastIndex(image, ":"); i >= 0 && i < len(image)-1 {
return image[i+1:]
}
return image
}
// dockerImageClient is the subset of the Docker client API used by
// pullImageAndDrain. Declared as an interface so tests can inject a
// fake without spinning up a daemon.
type dockerImageClient interface {
ImagePull(ctx context.Context, ref string, opts dockerimage.PullOptions) (io.ReadCloser, error)
}
// pullImageAndDrain pulls the given image from its registry and drains
// the progress stream to completion. The Docker engine pull API is
// asynchronous — the returned ReadCloser MUST be fully consumed for the
// pull to finish; returning early leaves the daemon mid-pull. We
// discard the progress payload because operators read container logs
// for boot diagnostics, not pull chatter.
func pullImageAndDrain(ctx context.Context, cli dockerImageClient, ref string) error {
rc, err := cli.ImagePull(ctx, ref, dockerimage.PullOptions{})
if err != nil {
return fmt.Errorf("ImagePull: %w", err)
}
defer rc.Close()
if _, err := io.Copy(io.Discard, rc); err != nil {
return fmt.Errorf("drain pull stream: %w", err)
}
return nil
}

View File

@ -708,9 +708,16 @@ func fmtErr(s string) error { return testErr(s) }
func TestRuntimeTagFromImage(t *testing.T) {
cases := map[string]string{
"workspace-template:openclaw": "openclaw",
// Legacy local-build form (still supported for `docker build -t
// workspace-template:<runtime>` dev loops).
"workspace-template:openclaw": "openclaw",
"workspace-template:claude-code": "claude-code",
"workspace-template:base": "base",
"workspace-template:base": "base",
// Current GHCR form produced by molecule-ci's publish-template-image
// workflow and consumed by RuntimeImages.
"ghcr.io/molecule-ai/workspace-template-hermes:latest": "hermes",
"ghcr.io/molecule-ai/workspace-template-claude-code:latest": "claude-code",
"ghcr.io/molecule-ai/workspace-template-langgraph:sha-abc1234": "langgraph",
// Fallbacks for non-standard shapes
"myregistry.io/foo:v1.2": "v1.2",
"no-colon-at-all": "no-colon-at-all",
@ -728,28 +735,28 @@ func TestRuntimeTagFromImage(t *testing.T) {
// ---------- End-to-end error-message shape ----------
//
// Verifies the wrapped error that Start() surfaces when ContainerCreate
// hits "no such image" — callers rely on both the human hint and the
// original underlying error being preserved (via %w) for errors.Is chains.
// hits "no such image" after the pull-on-miss attempt. Callers rely on
// both the human hint and the original underlying error being preserved
// (via %w) for errors.Is chains.
func TestImageNotFoundErrorIncludesBuildHint(t *testing.T) {
// Simulate the exact wrap Start() produces without needing a real
// Docker daemon (the live verification path runs via the e2e stage).
underlying := testErr(`Error response from daemon: No such image: workspace-template:openclaw`)
func TestImageNotFoundErrorIncludesPullHint(t *testing.T) {
underlying := testErr(`Error response from daemon: No such image: ghcr.io/molecule-ai/workspace-template-openclaw:latest`)
if !isImageNotFoundErr(underlying) {
t.Fatalf("precondition failed: classifier didn't recognise moby's message")
}
tag := runtimeTagFromImage("workspace-template:openclaw")
image := "ghcr.io/molecule-ai/workspace-template-openclaw:latest"
tag := runtimeTagFromImage(image)
wrapped := testErr(
`docker image "workspace-template:openclaw" not found — run 'bash workspace/build-all.sh ` +
tag + `' to build it (underlying error: ` + underlying.Error() + `)`,
`docker image "` + image + `" not found after pull attempt — verify GHCR visibility for ` + tag +
` and that the tenant has internet access (underlying error: ` + underlying.Error() + `)`,
)
s := wrapped.Error()
for _, want := range []string{
`"workspace-template:openclaw"`,
`bash workspace/build-all.sh openclaw`,
`No such image: workspace-template:openclaw`,
`"ghcr.io/molecule-ai/workspace-template-openclaw:latest"`,
`verify GHCR visibility for openclaw`,
`No such image`,
} {
if !strings.Contains(s, want) {
t.Errorf("wrapped error missing %q, got: %s", want, s)