From d61d9af761b57ae3b0003e89009541679a04d27e Mon Sep 17 00:00:00 2001 From: devops-engineer Date: Fri, 5 Jun 2026 23:57:56 -0700 Subject: [PATCH] fix(workspace-server): derive image-refresh runtime allowlist from providers SSOT (google-adk drift) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #578. The tenant image-refresh endpoint (POST /admin/workspace-images/refresh) hardcoded AllRuntimes = {claude-code, codex, hermes, openclaw}, missing google-adk. Controlplane already accepts google-adk for pin-promote/ redeploy, so a google-adk pin was accepted CP-side then rejected 400 ("unknown runtime") at the tenant — google-adk image fixes never deployed. Instead of just appending google-adk (which would drift again), AllRuntimes is now DERIVED at package init from providers.LoadManifest().Runtimes — the same providers.yaml `runtimes:` SSOT (mirrored from CP's providers.yaml) the rest of the platform routes against. The CP pin-promote allowlist and the tenant refresh allowlist are now provably the same set. A static imageRefreshFallbackRuntimes (now including google-adk) is used only if the embedded manifest fails to load, preserving availability; a drift guard test pins it to the SSOT. Tests: - TestAllRuntimes_IncludesGoogleADK — google-adk is accepted (regression). - TestAllRuntimes_MatchesProvidersSSOT — derived list == providers SSOT keys (drift guard so CP/tenant can't diverge again). - TestImageRefreshFallbackMatchesSSOT — fallback pinned to SSOT. - TestRefresh_RejectsUnknownRuntime — guard intact; 400 body advertises google-adk in known_runtimes. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../handlers/admin_workspace_images.go | 55 +++++++- .../handlers/admin_workspace_images_test.go | 120 ++++++++++++++++++ 2 files changed, 170 insertions(+), 5 deletions(-) diff --git a/workspace-server/internal/handlers/admin_workspace_images.go b/workspace-server/internal/handlers/admin_workspace_images.go index c408f149c..2fa74bae4 100644 --- a/workspace-server/internal/handlers/admin_workspace_images.go +++ b/workspace-server/internal/handlers/admin_workspace_images.go @@ -9,6 +9,7 @@ import ( "log" "net/http" "os" + "sort" "strings" "time" @@ -18,6 +19,7 @@ import ( dockerclient "github.com/docker/docker/client" "github.com/gin-gonic/gin" + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers" "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner" ) @@ -41,10 +43,53 @@ func NewWorkspaceImageService(docker *dockerclient.Client) *WorkspaceImageServic return &WorkspaceImageService{docker: docker} } -// AllRuntimes is the canonical list mirroring docs/workspace-runtime-package.md. -// Update both when a new template is added. -var AllRuntimes = []string{ - "claude-code", "codex", "hermes", "openclaw", +// AllRuntimes is the canonical set of workspace runtimes this tenant will +// pull/recreate template images for. It is DERIVED from the same providers +// manifest SSOT (internal/providers/providers.yaml `runtimes:` block, mirrored +// from CP's providers.yaml) that the rest of the platform routes against — +// NOT a second hand-maintained list. +// +// Why derive instead of hardcode (controlplane#578): the old hardcoded slice +// here ({claude-code, codex, hermes, openclaw}) silently DRIFTED from CP, which +// already accepts `google-adk` for pin-promote/redeploy. A google-adk pin would +// be accepted CP-side, then this tenant's POST /admin/workspace-images/refresh +// ?runtime=google-adk rejected it 400 ("unknown runtime"), so google-adk image +// fixes never deployed. Deriving from the manifest makes the tenant allowlist +// and the CP allowlist provably the same set — they can't drift again. +// +// imageRefreshFallbackRuntimes is used ONLY if the embedded providers manifest +// fails to load (which would be a build/CI failure caught by the providers +// package's own tests, never a healthy prod). It preserves the historical +// behavior — plus google-adk — so a manifest regression can never take the +// refresh endpoint fully offline. Kept in lockstep with the providers.yaml +// `runtimes:` keys; the drift guard in admin_workspace_images_test.go asserts +// the two match. +var imageRefreshFallbackRuntimes = []string{ + "claude-code", "codex", "google-adk", "hermes", "openclaw", +} + +// AllRuntimes is computed once at package init from the providers SSOT. +var AllRuntimes = loadImageRefreshRuntimes() + +// loadImageRefreshRuntimes returns the sorted runtime names declared in the +// providers manifest, falling back to imageRefreshFallbackRuntimes if the +// manifest can't be loaded. +func loadImageRefreshRuntimes() []string { + m, err := providers.LoadManifest() + if err != nil || len(m.Runtimes) == 0 { + if err != nil { + log.Printf("workspace-images: providers.LoadManifest failed (%v); falling back to static runtime allowlist", err) + } + out := append([]string(nil), imageRefreshFallbackRuntimes...) + sort.Strings(out) + return out + } + out := make([]string, 0, len(m.Runtimes)) + for rt := range m.Runtimes { + out = append(out, rt) + } + sort.Strings(out) + return out } // RefreshResult is the per-call outcome surfaced to HTTP callers AND logged @@ -197,7 +242,7 @@ func (s *WorkspaceImageService) Refresh(ctx context.Context, runtimes []string, // AdminWorkspaceImagesHandler serves POST /admin/workspace-images/refresh. // -// ?runtime=claude-code (optional; default = all 8 templates) +// ?runtime=claude-code (optional; default = all runtimes in AllRuntimes) // &recreate=true|false (default true; false = pull only) // // Returns JSON {pulled: [...], failed: [...], recreated: [...]} diff --git a/workspace-server/internal/handlers/admin_workspace_images_test.go b/workspace-server/internal/handlers/admin_workspace_images_test.go index 411cba5a4..6c663db46 100644 --- a/workspace-server/internal/handlers/admin_workspace_images_test.go +++ b/workspace-server/internal/handlers/admin_workspace_images_test.go @@ -3,7 +3,14 @@ package handlers import ( "encoding/base64" "encoding/json" + "net/http" + "net/http/httptest" + "sort" "testing" + + "github.com/gin-gonic/gin" + + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers" ) func TestGHCRAuthHeader_NoEnvReturnsEmpty(t *testing.T) { @@ -92,6 +99,119 @@ func TestGHCRAuthHeader_RespectsRegistryEnv(t *testing.T) { } } +// runtimeListContains is a tiny membership helper for the runtime-allowlist tests. +func runtimeListContains(s []string, v string) bool { + for _, x := range s { + if x == v { + return true + } + } + return false +} + +// TestAllRuntimes_IncludesGoogleADK is the direct regression for +// controlplane#578: a google-adk pin promote/redeploy is accepted CP-side, so +// the tenant image-refresh allowlist MUST also accept google-adk or the image +// fix never deploys (tenant returned 400 "unknown runtime"). google-adk lives +// in the providers SSOT, so the derived AllRuntimes must contain it. +func TestAllRuntimes_IncludesGoogleADK(t *testing.T) { + if !runtimeListContains(AllRuntimes, "google-adk") { + t.Fatalf("AllRuntimes must include google-adk (controlplane#578 drift); got %v", AllRuntimes) + } +} + +// TestAllRuntimes_MatchesProvidersSSOT is the drift guard. AllRuntimes is +// derived from providers.LoadManifest().Runtimes — assert it equals exactly the +// runtime keys the providers manifest (mirrored from CP's providers.yaml) +// declares. If CP adds/removes a runtime, this test fails RED until the tenant +// re-derives, so the tenant image-refresh allowlist can never silently drift +// from the CP pin-promote allowlist again. +func TestAllRuntimes_MatchesProvidersSSOT(t *testing.T) { + m, err := providers.LoadManifest() + if err != nil { + t.Fatalf("providers.LoadManifest: %v", err) + } + want := make([]string, 0, len(m.Runtimes)) + for rt := range m.Runtimes { + want = append(want, rt) + } + sort.Strings(want) + + got := append([]string(nil), AllRuntimes...) + sort.Strings(got) + + if len(got) != len(want) { + t.Fatalf("AllRuntimes drift: got %v, want %v (providers SSOT)", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("AllRuntimes drift at %d: got %v, want %v (providers SSOT)", i, got, want) + } + } +} + +// TestImageRefreshFallbackMatchesSSOT pins the static fallback (used only when +// the embedded manifest fails to load) to the providers SSOT. If a runtime is +// added to providers.yaml but not to imageRefreshFallbackRuntimes, this fails +// RED — so a manifest-load failure can't silently drop a supported runtime. +func TestImageRefreshFallbackMatchesSSOT(t *testing.T) { + m, err := providers.LoadManifest() + if err != nil { + t.Fatalf("providers.LoadManifest: %v", err) + } + want := make([]string, 0, len(m.Runtimes)) + for rt := range m.Runtimes { + want = append(want, rt) + } + sort.Strings(want) + + got := append([]string(nil), imageRefreshFallbackRuntimes...) + sort.Strings(got) + + if len(got) != len(want) { + t.Fatalf("fallback drift: got %v, want %v (providers SSOT)", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("fallback drift at %d: got %v, want %v (providers SSOT)", i, got, want) + } + } +} + +// TestRefresh_RejectsUnknownRuntime asserts a genuinely unknown runtime still +// 400s (the guard isn't removed) AND that the 400 body lists google-adk in +// known_runtimes (proving the allowlist now advertises it). This exercises the +// gin handler's reject branch, which runs entirely before any Docker call. +func TestRefresh_RejectsUnknownRuntime(t *testing.T) { + gin.SetMode(gin.TestMode) + + // nil docker client is safe: the unknown-runtime branch returns 400 + // before svc.Refresh (which is the only path that touches Docker). + h := &AdminWorkspaceImagesHandler{svc: &WorkspaceImageService{}} + + r := gin.New() + r.POST("/admin/workspace-images/refresh", h.Refresh) + + req := httptest.NewRequest(http.MethodPost, "/admin/workspace-images/refresh?runtime=not-a-real-runtime", nil) + rec := httptest.NewRecorder() + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusBadRequest { + t.Fatalf("unknown runtime: got status %d, want 400; body=%s", rec.Code, rec.Body.String()) + } + + var body struct { + Error string `json:"error"` + KnownRuntimes []string `json:"known_runtimes"` + } + if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil { + t.Fatalf("decode 400 body: %v (raw=%s)", err, rec.Body.String()) + } + if !runtimeListContains(body.KnownRuntimes, "google-adk") { + t.Errorf("400 known_runtimes must advertise google-adk (controlplane#578); got %v", body.KnownRuntimes) + } +} + func TestGHCRAuthHeader_TrimsWhitespace(t *testing.T) { t.Setenv("MOLECULE_IMAGE_REGISTRY", "") // .env lines often have trailing newlines or accidental spaces. Without -- 2.52.0