diff --git a/workspace-server/internal/handlers/admin_workspace_images.go b/workspace-server/internal/handlers/admin_workspace_images.go index 78d347d1..68bc50f1 100644 --- a/workspace-server/internal/handlers/admin_workspace_images.go +++ b/workspace-server/internal/handlers/admin_workspace_images.go @@ -56,10 +56,17 @@ type RefreshResult struct { Recreated []string `json:"recreated"` } -// TemplateImageRef returns the canonical GHCR ref for a runtime's template -// image. Single source of truth shared with imagewatch. +// TemplateImageRef returns the canonical image ref for a runtime's template, +// using the configured registry (provisioner.RegistryPrefix()) and the +// moving `:latest` tag. Single source of truth shared with imagewatch. +// +// Defaults to ghcr.io/molecule-ai/workspace-template-:latest +// (upstream OSS). When MOLECULE_IMAGE_REGISTRY is set in the environment +// (typically the AWS ECR mirror in production), this returns the prefixed +// equivalent so admin operations and image-watch checks hit the same +// registry the provisioner pulls from. func TemplateImageRef(runtime string) string { - return fmt.Sprintf("ghcr.io/molecule-ai/workspace-template-%s:latest", runtime) + return fmt.Sprintf("%s/workspace-template-%s:latest", provisioner.RegistryPrefix(), runtime) } // ghcrAuthHeader returns the base64-encoded JSON auth payload Docker's diff --git a/workspace-server/internal/provisioner/provisioner.go b/workspace-server/internal/provisioner/provisioner.go index 0b90c899..0a9797ad 100644 --- a/workspace-server/internal/provisioner/provisioner.go +++ b/workspace-server/internal/provisioner/provisioner.go @@ -35,36 +35,37 @@ import ( // drift-risk #6. var ErrNoBackend = errors.New("provisioner: no backend configured (zero-valued receiver)") -// RuntimeImages maps runtime names to their Docker image refs on GHCR. +// RuntimeImages maps runtime names to their Docker image refs. // Each standalone template repo publishes its image via the reusable // publish-template-image workflow in molecule-ci on every main merge. // The provisioner pulls these on demand (see ensureImageLocal) — no // pre-build step on the tenant host. // +// The registry prefix is determined by RegistryPrefix() in registry.go; +// defaults to ghcr.io/molecule-ai (upstream OSS) and is overridden via the +// MOLECULE_IMAGE_REGISTRY env var in production tenants that mirror to +// AWS ECR or another registry. The map is computed at package init and +// captures whatever prefix was active then. +// // Legacy local-build path (`docker build -t workspace-template:` // via scripts/build-images.sh) is still supported for development: // when a bare `workspace-template:` image is present locally, // Docker's image resolver matches it before any pull is attempted. Set // the env var WORKSPACE_IMAGE_LOCAL_OVERRIDE=1 (enforced by callers) to // short-circuit pulls entirely if needed. -var RuntimeImages = map[string]string{ - "langgraph": "ghcr.io/molecule-ai/workspace-template-langgraph:latest", - "claude-code": "ghcr.io/molecule-ai/workspace-template-claude-code:latest", - "openclaw": "ghcr.io/molecule-ai/workspace-template-openclaw:latest", - "deepagents": "ghcr.io/molecule-ai/workspace-template-deepagents:latest", - "crewai": "ghcr.io/molecule-ai/workspace-template-crewai:latest", - "autogen": "ghcr.io/molecule-ai/workspace-template-autogen:latest", - "hermes": "ghcr.io/molecule-ai/workspace-template-hermes:latest", // Hermes (Nous Research) — real hermes-agent behind A2A bridge - "gemini-cli": "ghcr.io/molecule-ai/workspace-template-gemini-cli:latest", // Google Gemini CLI -} +var RuntimeImages = computeRuntimeImages() + +// DefaultImage is the fallback workspace Docker image (langgraph is the +// most common runtime). Computed via RegistryPrefix() so the prefix +// override applies to the fallback path too. +// +// NOTE: Every runtime MUST have an entry in knownRuntimes (registry.go). +// If a runtime is missing, it falls back to DefaultImage which may have +// wrong deps. Add new runtimes to knownRuntimes AND create the standalone +// template repo. +var DefaultImage = RuntimeImage(defaultRuntime) const ( - // DefaultImage is the fallback workspace Docker image (langgraph is the most common runtime). - DefaultImage = "ghcr.io/molecule-ai/workspace-template-langgraph:latest" - // NOTE: Every runtime MUST have an entry in RuntimeImages above. If a runtime is missing, - // it falls back to DefaultImage which may have wrong deps. Add new runtimes to both - // RuntimeImages AND create the standalone template repo. - // DefaultNetwork is the Docker network workspaces join. DefaultNetwork = "molecule-monorepo-net" diff --git a/workspace-server/internal/provisioner/registry.go b/workspace-server/internal/provisioner/registry.go new file mode 100644 index 00000000..209411a4 --- /dev/null +++ b/workspace-server/internal/provisioner/registry.go @@ -0,0 +1,95 @@ +package provisioner + +import ( + "fmt" + "os" +) + +// defaultRegistryPrefix is the upstream OSS face for all workspace template +// images. Self-hosted Molecule deployments without the MOLECULE_IMAGE_REGISTRY +// override pull from here. +const defaultRegistryPrefix = "ghcr.io/molecule-ai" + +// knownRuntimes is the canonical list of workspace template runtimes shipped +// in main. Any runtime added here MUST also have a standalone template repo +// (Molecule-AI/molecule-ai-workspace-template-) and an entry in the +// publish-template-image workflow that builds it. +// +// Order matters for deterministic test snapshots; keep alphabetical. +var knownRuntimes = []string{ + "autogen", + "claude-code", + "codex", + "crewai", + "deepagents", + "gemini-cli", + "hermes", + "langgraph", + "openclaw", +} + +// defaultRuntime is the fallback when a workspace's config doesn't specify a +// runtime. Picked because LangGraph is the most common in our org templates +// and has the smallest "first impression" cold-start surface. +const defaultRuntime = "langgraph" + +// RegistryPrefix returns the registry prefix all workspace-template image +// references should use. Defaults to ghcr.io/molecule-ai (the upstream OSS +// face) and is overridden by the MOLECULE_IMAGE_REGISTRY env var in +// production tenants where we mirror images to a private registry. +// +// The override is set at deploy time (Railway env, EC2 user-data) — never +// from user-supplied input — so the value is trusted by the time it reaches +// this code. Validation is deliberately minimal: an operator-supplied +// prefix that points at a registry the EC2 can't authenticate to will fail +// loudly at docker-pull time, which is the right blast radius. +// +// Example values: +// +// (unset) → ghcr.io/molecule-ai (OSS default) +// "123456789012.dkr.ecr.us-east-2.amazonaws.com/molecule-ai" → AWS ECR mirror +// "git.moleculesai.app/molecule-ai" → self-hosted Gitea Container Registry (future) +// +// Auth is registry-specific and configured outside this function: +// - GHCR: GHCR_USER/GHCR_TOKEN env vars consumed by ghcrAuthHeader() +// - ECR: docker credential helper (amazon-ecr-credential-helper) configured +// in EC2 user-data; ~/.docker/config.json has credHelpers entry; the +// daemon resolves auth automatically on every pull. +func RegistryPrefix() string { + if v := os.Getenv("MOLECULE_IMAGE_REGISTRY"); v != "" { + return v + } + return defaultRegistryPrefix +} + +// RuntimeImage returns the canonical image reference for the given runtime, +// using the current RegistryPrefix() and the moving `:latest` tag. +// +// For SHA-pinned references (production thin-AMI launches), the +// runtime_image_pins lookup in handlers/runtime_image_pin.go strips the +// `:latest` suffix and appends an immutable `@sha256:` from the DB. +// That code path naturally inherits any RegistryPrefix() change because it +// reads from RuntimeImages[runtime] and only re-formats the tag suffix. +// +// Returns the empty string for unknown runtimes; callers should fall through +// to DefaultImage in that case (matching legacy behavior). +func RuntimeImage(runtime string) string { + for _, r := range knownRuntimes { + if r == runtime { + return fmt.Sprintf("%s/workspace-template-%s:latest", RegistryPrefix(), runtime) + } + } + return "" +} + +// computeRuntimeImages returns the {runtime: image-ref} map evaluated against +// the current RegistryPrefix(). Called at package init to populate the +// exported RuntimeImages var. Tests that flip MOLECULE_IMAGE_REGISTRY between +// expected values use this helper to rebuild the map mid-run. +func computeRuntimeImages() map[string]string { + out := make(map[string]string, len(knownRuntimes)) + for _, r := range knownRuntimes { + out[r] = RuntimeImage(r) + } + return out +} diff --git a/workspace-server/internal/provisioner/registry_test.go b/workspace-server/internal/provisioner/registry_test.go new file mode 100644 index 00000000..885a6b99 --- /dev/null +++ b/workspace-server/internal/provisioner/registry_test.go @@ -0,0 +1,140 @@ +package provisioner + +import ( + "strings" + "testing" +) + +// TestRegistryPrefix_DefaultsToGHCR pins the OSS-default behavior. If a future +// refactor accidentally drops the default, OSS users self-hosting Molecule +// would silently lose image pulls — this test should fail loudly instead. +func TestRegistryPrefix_DefaultsToGHCR(t *testing.T) { + t.Setenv("MOLECULE_IMAGE_REGISTRY", "") + got := RegistryPrefix() + want := "ghcr.io/molecule-ai" + if got != want { + t.Fatalf("RegistryPrefix() = %q, want %q (default must remain GHCR for OSS users)", got, want) + } +} + +// TestRegistryPrefix_RespectsEnv verifies the override path used in +// production tenants where MOLECULE_IMAGE_REGISTRY points at a private +// mirror (AWS ECR, self-hosted Harbor, etc.). +func TestRegistryPrefix_RespectsEnv(t *testing.T) { + t.Setenv("MOLECULE_IMAGE_REGISTRY", "123456789012.dkr.ecr.us-east-2.amazonaws.com/molecule-ai") + got := RegistryPrefix() + want := "123456789012.dkr.ecr.us-east-2.amazonaws.com/molecule-ai" + if got != want { + t.Fatalf("RegistryPrefix() = %q, want %q (env override path is the production cutover mechanism)", got, want) + } +} + +// TestRegistryPrefix_EmptyEnvFallsBackToDefault — guard against an operator +// setting MOLECULE_IMAGE_REGISTRY="" by mistake (e.g. unset deploy variable +// becomes empty string, not literally absent). We treat "" as "use default" +// so a misconfigured env doesn't mean an empty registry prefix. +func TestRegistryPrefix_EmptyEnvFallsBackToDefault(t *testing.T) { + t.Setenv("MOLECULE_IMAGE_REGISTRY", "") + if RegistryPrefix() != defaultRegistryPrefix { + t.Fatalf("empty MOLECULE_IMAGE_REGISTRY should fall back to %q, got %q", defaultRegistryPrefix, RegistryPrefix()) + } +} + +// TestRuntimeImage_AllKnownRuntimes — every runtime in the canonical list +// must produce a properly-formatted image ref. If a new runtime is added to +// knownRuntimes but the format changes, this catches it. +func TestRuntimeImage_AllKnownRuntimes(t *testing.T) { + t.Setenv("MOLECULE_IMAGE_REGISTRY", "") + for _, r := range knownRuntimes { + got := RuntimeImage(r) + want := "ghcr.io/molecule-ai/workspace-template-" + r + ":latest" + if got != want { + t.Errorf("RuntimeImage(%q) = %q, want %q", r, got, want) + } + } + // Pin the count so adding a runtime requires explicit test acknowledgement. + if len(knownRuntimes) != 9 { + t.Errorf("knownRuntimes length = %d, want 9 (autogen, claude-code, codex, crewai, deepagents, gemini-cli, hermes, langgraph, openclaw)", len(knownRuntimes)) + } +} + +// TestRuntimeImage_UnknownRuntime — defensive: callers must fall back to +// DefaultImage when a runtime is unknown, never silently use the wrong +// prefix. Returning "" enforces an explicit fallback at every call site. +func TestRuntimeImage_UnknownRuntime(t *testing.T) { + for _, name := range []string{"", "nonexistent", "WORKSPACE-TEMPLATE-FAKE", "../../../etc/passwd"} { + if got := RuntimeImage(name); got != "" { + t.Errorf("RuntimeImage(%q) = %q, want empty string for unknown runtime", name, got) + } + } +} + +// TestRuntimeImage_RegistryOverrideAppliesToAllRuntimes — the override +// flips ALL runtimes consistently. If a refactor accidentally hardcoded +// the prefix in some runtimes but not others (the failure mode that +// triggered this whole rollout), this test catches it. +func TestRuntimeImage_RegistryOverrideAppliesToAllRuntimes(t *testing.T) { + const ecr = "999999999999.dkr.ecr.us-east-2.amazonaws.com/molecule-ai" + t.Setenv("MOLECULE_IMAGE_REGISTRY", ecr) + + for _, r := range knownRuntimes { + got := RuntimeImage(r) + if !strings.HasPrefix(got, ecr+"/workspace-template-") { + t.Errorf("RuntimeImage(%q) = %q, must start with override prefix %q", r, got, ecr) + } + if !strings.HasSuffix(got, ":latest") { + t.Errorf("RuntimeImage(%q) = %q, must keep :latest tag suffix", r, got) + } + } +} + +// TestComputeRuntimeImages_AllRuntimesPresent — the map must contain every +// known runtime. Drift between knownRuntimes and computeRuntimeImages would +// silently break the runtime → image lookup that provisioner.Start uses. +func TestComputeRuntimeImages_AllRuntimesPresent(t *testing.T) { + t.Setenv("MOLECULE_IMAGE_REGISTRY", "") + m := computeRuntimeImages() + if len(m) != len(knownRuntimes) { + t.Fatalf("computeRuntimeImages() has %d entries, want %d (one per knownRuntime)", len(m), len(knownRuntimes)) + } + for _, r := range knownRuntimes { + img, ok := m[r] + if !ok { + t.Errorf("computeRuntimeImages() missing runtime %q", r) + continue + } + if img == "" { + t.Errorf("computeRuntimeImages()[%q] is empty", r) + } + } +} + +// TestComputeRuntimeImages_ReflectsCurrentEnv — calling computeRuntimeImages +// after env change rebuilds the map with new prefix. Tests + ops procedures +// that flip the env in-process rely on this. +func TestComputeRuntimeImages_ReflectsCurrentEnv(t *testing.T) { + t.Setenv("MOLECULE_IMAGE_REGISTRY", "") + defaultMap := computeRuntimeImages() + if !strings.HasPrefix(defaultMap["claude-code"], "ghcr.io/molecule-ai/") { + t.Fatalf("default map should be GHCR-prefixed, got %q", defaultMap["claude-code"]) + } + + const mirror = "registry.example.com/molecule-ai" + t.Setenv("MOLECULE_IMAGE_REGISTRY", mirror) + mirrorMap := computeRuntimeImages() + if !strings.HasPrefix(mirrorMap["claude-code"], mirror+"/") { + t.Fatalf("mirror-prefixed map should start with %q, got %q", mirror, mirrorMap["claude-code"]) + } +} + +// TestKnownRuntimes_AlphabeticalOrder — pin the order so test snapshots +// (and human readers diffing the file) see deterministic output. Adding a +// new runtime out of alphabetical order will fail this test, which is the +// nudge to keep the file readable. +func TestKnownRuntimes_AlphabeticalOrder(t *testing.T) { + for i := 1; i < len(knownRuntimes); i++ { + if knownRuntimes[i-1] >= knownRuntimes[i] { + t.Errorf("knownRuntimes not alphabetical: %q comes before %q", knownRuntimes[i-1], knownRuntimes[i]) + } + } +}