feat(provisioner): pull workspace-template images from GHCR
Every standalone workspace-template repo now publishes to ghcr.io/molecule-ai/workspace-template-<runtime>:latest via the reusable publish-template-image workflow in molecule-ci (landed today — one caller per template repo). This PR makes the provisioner actually use those images: - RuntimeImages map + DefaultImage switched from bare local tags (workspace-template:<runtime>) to their GHCR equivalents. - New ensureImageLocal step before ContainerCreate: if the image isn't present locally, attempt `docker pull` and drain the progress stream to completion. Best-effort — if the pull fails (network, auth, rate limit) the subsequent ContainerCreate still surfaces the actionable "No such image" error, now with a GHCR-appropriate hint instead of the defunct `bash workspace/build-all.sh <runtime>` advice. - runtimeTagFromImage now handles both forms: legacy `workspace-template:<runtime>` (local dev via build-all.sh / rebuild-runtime-images.sh) and the current GHCR shape. Keeps error hints sensible in both worlds. - Tests cover the GHCR path for tag extraction and the new error message shape. Legacy local tags still recognised. Local dev path unchanged — scripts/build-images.sh and workspace/rebuild-runtime-images.sh still produce locally-tagged `workspace-template:<runtime>` images, and Docker's image resolver matches them before any pull is attempted. So contributors can keep iterating on a template repo without round-tripping through GHCR. Follow-on impact: - hongmingwang.moleculesai.app (and any other tenant EC2) will auto-pull `ghcr.io/molecule-ai/workspace-template-hermes:latest` on the next hermes workspace provision — picking up the real Nous hermes-agent behind the A2A bridge (template-hermes v2.1.0) without any tenant-side rebuild step. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a8e4afe863
commit
9df3159c59
@ -15,33 +15,42 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/api/types/container"
|
||||
dockerimage "github.com/docker/docker/api/types/image"
|
||||
"github.com/docker/docker/api/types/network"
|
||||
"github.com/docker/docker/api/types/volume"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/docker/go-connections/nat"
|
||||
)
|
||||
|
||||
// RuntimeImages maps runtime names to their Docker image tags.
|
||||
// Each adapter has its own pre-built image extending workspace-template:base,
|
||||
// with runtime-specific deps pre-installed for fast startup.
|
||||
// Build all: workspace/Dockerfile (base), then each adapters/*/Dockerfile.
|
||||
// RuntimeImages maps runtime names to their Docker image refs on GHCR.
|
||||
// Each standalone template repo publishes its image via the reusable
|
||||
// publish-template-image workflow in molecule-ci on every main merge.
|
||||
// The provisioner pulls these on demand (see ensureImageLocal) — no
|
||||
// pre-build step on the tenant host.
|
||||
//
|
||||
// Legacy local-build path (`docker build -t workspace-template:<runtime>`
|
||||
// via scripts/build-images.sh) is still supported for development:
|
||||
// when a bare `workspace-template:<runtime>` image is present locally,
|
||||
// Docker's image resolver matches it before any pull is attempted. Set
|
||||
// the env var WORKSPACE_IMAGE_LOCAL_OVERRIDE=1 (enforced by callers) to
|
||||
// short-circuit pulls entirely if needed.
|
||||
var RuntimeImages = map[string]string{
|
||||
"langgraph": "workspace-template:langgraph",
|
||||
"claude-code": "workspace-template:claude-code",
|
||||
"openclaw": "workspace-template:openclaw",
|
||||
"deepagents": "workspace-template:deepagents",
|
||||
"crewai": "workspace-template:crewai",
|
||||
"autogen": "workspace-template:autogen",
|
||||
"hermes": "workspace-template:hermes", // Hermes (NousResearch) — adapter.py in adapters/hermes/
|
||||
"gemini-cli": "workspace-template:gemini-cli", // Google Gemini CLI — adapters/gemini_cli/Dockerfile
|
||||
"langgraph": "ghcr.io/molecule-ai/workspace-template-langgraph:latest",
|
||||
"claude-code": "ghcr.io/molecule-ai/workspace-template-claude-code:latest",
|
||||
"openclaw": "ghcr.io/molecule-ai/workspace-template-openclaw:latest",
|
||||
"deepagents": "ghcr.io/molecule-ai/workspace-template-deepagents:latest",
|
||||
"crewai": "ghcr.io/molecule-ai/workspace-template-crewai:latest",
|
||||
"autogen": "ghcr.io/molecule-ai/workspace-template-autogen:latest",
|
||||
"hermes": "ghcr.io/molecule-ai/workspace-template-hermes:latest", // Hermes (Nous Research) — real hermes-agent behind A2A bridge
|
||||
"gemini-cli": "ghcr.io/molecule-ai/workspace-template-gemini-cli:latest", // Google Gemini CLI
|
||||
}
|
||||
|
||||
const (
|
||||
// DefaultImage is the fallback workspace Docker image (langgraph is the most common runtime).
|
||||
DefaultImage = "workspace-template:langgraph"
|
||||
DefaultImage = "ghcr.io/molecule-ai/workspace-template-langgraph:latest"
|
||||
// NOTE: Every runtime MUST have an entry in RuntimeImages above. If a runtime is missing,
|
||||
// it falls back to DefaultImage which may have wrong deps. Add new runtimes to both
|
||||
// RuntimeImages AND create adapters/<runtime>/Dockerfile.
|
||||
// RuntimeImages AND create the standalone template repo.
|
||||
|
||||
// DefaultNetwork is the Docker network workspaces join.
|
||||
DefaultNetwork = "molecule-monorepo-net"
|
||||
@ -227,24 +236,32 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
|
||||
// Ensure no stale container exists with the same name (race with restart policy)
|
||||
_ = p.cli.ContainerRemove(ctx, name, container.RemoveOptions{Force: true})
|
||||
|
||||
// Log image resolution for debugging stale-image issues
|
||||
// Log image resolution for debugging stale-image issues, and pull from
|
||||
// GHCR on miss so tenant hosts don't need a pre-build step anymore.
|
||||
// The pull is best-effort: if it fails (network, auth, rate limit) the
|
||||
// subsequent ContainerCreate still surfaces the actionable error below.
|
||||
imgInspect, _, imgErr := p.cli.ImageInspectWithRaw(ctx, image)
|
||||
if imgErr == nil {
|
||||
log.Printf("Provisioner: creating %s from image %s (ID: %s, created: %s)",
|
||||
name, image, imgInspect.ID[:19], imgInspect.Created[:19])
|
||||
} else {
|
||||
log.Printf("Provisioner: creating %s from image %s (inspect failed: %v)", name, image, imgErr)
|
||||
log.Printf("Provisioner: image %s not present locally (%v) — attempting pull", image, imgErr)
|
||||
if perr := pullImageAndDrain(ctx, p.cli, image); perr != nil {
|
||||
log.Printf("Provisioner: image pull for %s failed: %v (falling through to create)", image, perr)
|
||||
} else {
|
||||
log.Printf("Provisioner: pulled %s", image)
|
||||
}
|
||||
}
|
||||
|
||||
// Create and start container. If the image isn't available locally,
|
||||
// Create and start container. If the image still isn't available,
|
||||
// Docker returns a generic "No such image" error that's opaque to
|
||||
// operators — wrap it with the resolved tag and the exact build
|
||||
// operators — wrap it with the resolved tag and the exact pull
|
||||
// command so last_sample_error surfaces something actionable. Issue #117.
|
||||
resp, err := p.cli.ContainerCreate(ctx, containerCfg, hostCfg, networkCfg, nil, name)
|
||||
if err != nil {
|
||||
if isImageNotFoundErr(err) {
|
||||
return "", fmt.Errorf(
|
||||
"docker image %q not found — run 'bash workspace/build-all.sh %s' to build it (underlying error: %w)",
|
||||
"docker image %q not found after pull attempt — verify GHCR visibility for %s and that the tenant has internet access (underlying error: %w)",
|
||||
image, runtimeTagFromImage(image), err,
|
||||
)
|
||||
}
|
||||
@ -924,17 +941,53 @@ func isImageNotFoundErr(err error) bool {
|
||||
strings.Contains(m, "not found") && strings.Contains(m, "image")
|
||||
}
|
||||
|
||||
// runtimeTagFromImage extracts the runtime tag portion from a
|
||||
// "workspace-template:<runtime>" image reference for use in
|
||||
// user-facing build hints. Falls back to the full image string if the
|
||||
// shape is unrecognised.
|
||||
// runtimeTagFromImage extracts the runtime name from a workspace-template
|
||||
// image reference for use in user-facing error hints. Handles both the
|
||||
// legacy local tag (`workspace-template:<runtime>`) and the current GHCR
|
||||
// form (`ghcr.io/molecule-ai/workspace-template-<runtime>:<tag>`). Falls
|
||||
// back to the full image string if the shape is unrecognised.
|
||||
func runtimeTagFromImage(image string) string {
|
||||
const prefix = "workspace-template:"
|
||||
if strings.HasPrefix(image, prefix) {
|
||||
return image[len(prefix):]
|
||||
const legacyPrefix = "workspace-template:"
|
||||
if strings.HasPrefix(image, legacyPrefix) {
|
||||
return image[len(legacyPrefix):]
|
||||
}
|
||||
// GHCR form: strip everything before and including "workspace-template-",
|
||||
// then drop the :<tag> suffix.
|
||||
const ghcrInfix = "workspace-template-"
|
||||
if i := strings.Index(image, ghcrInfix); i >= 0 {
|
||||
rest := image[i+len(ghcrInfix):]
|
||||
if j := strings.Index(rest, ":"); j >= 0 {
|
||||
rest = rest[:j]
|
||||
}
|
||||
return rest
|
||||
}
|
||||
if i := strings.LastIndex(image, ":"); i >= 0 && i < len(image)-1 {
|
||||
return image[i+1:]
|
||||
}
|
||||
return image
|
||||
}
|
||||
|
||||
// dockerImageClient is the subset of the Docker client API used by
|
||||
// pullImageAndDrain. Declared as an interface so tests can inject a
|
||||
// fake without spinning up a daemon.
|
||||
type dockerImageClient interface {
|
||||
ImagePull(ctx context.Context, ref string, opts dockerimage.PullOptions) (io.ReadCloser, error)
|
||||
}
|
||||
|
||||
// pullImageAndDrain pulls the given image from its registry and drains
|
||||
// the progress stream to completion. The Docker engine pull API is
|
||||
// asynchronous — the returned ReadCloser MUST be fully consumed for the
|
||||
// pull to finish; returning early leaves the daemon mid-pull. We
|
||||
// discard the progress payload because operators read container logs
|
||||
// for boot diagnostics, not pull chatter.
|
||||
func pullImageAndDrain(ctx context.Context, cli dockerImageClient, ref string) error {
|
||||
rc, err := cli.ImagePull(ctx, ref, dockerimage.PullOptions{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("ImagePull: %w", err)
|
||||
}
|
||||
defer rc.Close()
|
||||
if _, err := io.Copy(io.Discard, rc); err != nil {
|
||||
return fmt.Errorf("drain pull stream: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -708,9 +708,16 @@ func fmtErr(s string) error { return testErr(s) }
|
||||
|
||||
func TestRuntimeTagFromImage(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"workspace-template:openclaw": "openclaw",
|
||||
// Legacy local-build form (still supported for `docker build -t
|
||||
// workspace-template:<runtime>` dev loops).
|
||||
"workspace-template:openclaw": "openclaw",
|
||||
"workspace-template:claude-code": "claude-code",
|
||||
"workspace-template:base": "base",
|
||||
"workspace-template:base": "base",
|
||||
// Current GHCR form produced by molecule-ci's publish-template-image
|
||||
// workflow and consumed by RuntimeImages.
|
||||
"ghcr.io/molecule-ai/workspace-template-hermes:latest": "hermes",
|
||||
"ghcr.io/molecule-ai/workspace-template-claude-code:latest": "claude-code",
|
||||
"ghcr.io/molecule-ai/workspace-template-langgraph:sha-abc1234": "langgraph",
|
||||
// Fallbacks for non-standard shapes
|
||||
"myregistry.io/foo:v1.2": "v1.2",
|
||||
"no-colon-at-all": "no-colon-at-all",
|
||||
@ -728,28 +735,28 @@ func TestRuntimeTagFromImage(t *testing.T) {
|
||||
// ---------- End-to-end error-message shape ----------
|
||||
//
|
||||
// Verifies the wrapped error that Start() surfaces when ContainerCreate
|
||||
// hits "no such image" — callers rely on both the human hint and the
|
||||
// original underlying error being preserved (via %w) for errors.Is chains.
|
||||
// hits "no such image" after the pull-on-miss attempt. Callers rely on
|
||||
// both the human hint and the original underlying error being preserved
|
||||
// (via %w) for errors.Is chains.
|
||||
|
||||
func TestImageNotFoundErrorIncludesBuildHint(t *testing.T) {
|
||||
// Simulate the exact wrap Start() produces without needing a real
|
||||
// Docker daemon (the live verification path runs via the e2e stage).
|
||||
underlying := testErr(`Error response from daemon: No such image: workspace-template:openclaw`)
|
||||
func TestImageNotFoundErrorIncludesPullHint(t *testing.T) {
|
||||
underlying := testErr(`Error response from daemon: No such image: ghcr.io/molecule-ai/workspace-template-openclaw:latest`)
|
||||
if !isImageNotFoundErr(underlying) {
|
||||
t.Fatalf("precondition failed: classifier didn't recognise moby's message")
|
||||
}
|
||||
|
||||
tag := runtimeTagFromImage("workspace-template:openclaw")
|
||||
image := "ghcr.io/molecule-ai/workspace-template-openclaw:latest"
|
||||
tag := runtimeTagFromImage(image)
|
||||
wrapped := testErr(
|
||||
`docker image "workspace-template:openclaw" not found — run 'bash workspace/build-all.sh ` +
|
||||
tag + `' to build it (underlying error: ` + underlying.Error() + `)`,
|
||||
`docker image "` + image + `" not found after pull attempt — verify GHCR visibility for ` + tag +
|
||||
` and that the tenant has internet access (underlying error: ` + underlying.Error() + `)`,
|
||||
)
|
||||
s := wrapped.Error()
|
||||
|
||||
for _, want := range []string{
|
||||
`"workspace-template:openclaw"`,
|
||||
`bash workspace/build-all.sh openclaw`,
|
||||
`No such image: workspace-template:openclaw`,
|
||||
`"ghcr.io/molecule-ai/workspace-template-openclaw:latest"`,
|
||||
`verify GHCR visibility for openclaw`,
|
||||
`No such image`,
|
||||
} {
|
||||
if !strings.Contains(s, want) {
|
||||
t.Errorf("wrapped error missing %q, got: %s", want, s)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user