fix(canvas): use /compute/metadata SSOT endpoint in ContainerConfigTab (#2489) #2546

Merged
agent-researcher merged 3 commits from feat/2489-ssot-compute-metadata into main 2026-06-10 17:30:09 +00:00
6 changed files with 267 additions and 42 deletions
@@ -10,15 +10,22 @@ import type { WorkspaceCompute } from "@/store/socket";
// Cloud-provider + instance-type metadata (core#2489).
//
// SSOT lives in the workspace-server (workspace_compute.go's allowlist + defaults)
// and is fetched at runtime from GET /workspaces/:id/compute-options, so the UI
// can never offer a (provider, instance-type) the PATCH validation then rejects
// with a 400. The constants below are ONLY a minimal offline fallback used until
// the fetch resolves (or if it fails) — they mirror the server SSOT but are not
// the source of truth. When the fetch succeeds, its data replaces them entirely.
// and is fetched at runtime from GET /compute/metadata (public, workspace-
// independent endpoint — the data is platform constraints, not org secrets), so
// the UI can never offer a (provider, instance-type) the PATCH validation then
// rejects with a 400. The constants below are ONLY a minimal offline fallback
// used until the fetch resolves (or if it fails) — they mirror the server SSOT
// but are not the source of truth. When the fetch succeeds, its data replaces
// them entirely.
//
// Response shape (workspace-server):
// { providers: [{ id: "aws", label: "AWS (default)", default_instance: "t3.medium",
// instances: ["t3.medium", ...] }, ...] }
type ComputeOptions = {
providers: string[];
instanceTypes: Record<string, string[]>;
defaults: Record<string, string>;
labels: Record<string, string>;
};
const FALLBACK_COMPUTE_OPTIONS: ComputeOptions = {
@@ -29,6 +36,7 @@ const FALLBACK_COMPUTE_OPTIONS: ComputeOptions = {
gcp: ["e2-small", "e2-medium", "e2-standard-2", "e2-standard-4", "e2-standard-8"],
},
defaults: { aws: "t3.medium", hetzner: "cpx31", gcp: "e2-standard-2" },
labels: { aws: "AWS (default)", gcp: "GCP", hetzner: "Hetzner" },
};
const normalizeProvider = (p?: string): string => (p === "gcp" || p === "hetzner" ? p : "aws");
@@ -37,15 +45,6 @@ const instanceTypesForProvider = (opts: ComputeOptions, p?: string): string[] =>
const defaultInstanceForProvider = (opts: ComputeOptions, p?: string): string =>
opts.defaults[normalizeProvider(p)] ?? "t3.medium";
// Human labels for the cloud-provider selector. The option VALUES come from the
// fetched SSOT (opts.providers); this only supplies display text + the default tag.
const CLOUD_PROVIDER_LABELS: Record<string, string> = {
aws: "AWS (default)",
gcp: "GCP",
hetzner: "Hetzner",
};
const cloudProviderOptionLabel = (v: string): string => CLOUD_PROVIDER_LABELS[v] ?? v;
const RUNTIME_OPTIONS = ["claude-code", "codex", "hermes", "openclaw", "kimi", "kimi-cli", "external"];
const RESOLUTIONS = ["1280x720", "1440x900", "1920x1080", "2560x1440"];
const DEFAULT_HEADLESS_ROOT_GB = 30;
@@ -103,9 +102,9 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
const [error, setError] = useState<string | null>(null);
const [success, setSuccess] = useState(false);
// core#2489: provider + instance-type dropdowns are populated from the
// workspace-server SSOT (GET /workspaces/:id/compute-options) so they can't
// drift from what the PATCH validation accepts. Start from the offline fallback
// and replace it once the fetch resolves; on fetch error we keep the fallback
// workspace-server SSOT (GET /compute/metadata) so they can't drift from
// what the PATCH validation accepts. Start from the offline fallback and
// replace it once the fetch resolves; on fetch error we keep the fallback
// (the dropdowns still work, just from the in-bundle mirror).
const [computeOptions, setComputeOptions] = useState<ComputeOptions>(FALLBACK_COMPUTE_OPTIONS);
@@ -119,15 +118,36 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
let cancelled = false;
(async () => {
try {
const opts = await api.get<Partial<ComputeOptions>>(`/workspaces/${workspaceId}/compute-options`);
// /compute/metadata is a public, workspace-independent endpoint (the data
// is platform constraints, not org secrets) — no need to refetch on
// workspaceId change; one fetch per tab mount is enough.
const resp = await api.get<{
providers?: Array<{ id: string; label?: string; default_instance?: string; instances?: string[] }>;
}>("/compute/metadata");
if (cancelled) return;
// Defensive: only adopt a well-formed payload; otherwise keep the fallback.
if (opts && Array.isArray(opts.providers) && opts.providers.length > 0 && opts.instanceTypes && opts.defaults) {
setComputeOptions({
providers: opts.providers,
instanceTypes: opts.instanceTypes,
defaults: opts.defaults,
});
// Map the server's per-provider object shape into the flat internal
// ComputeOptions shape the helpers + selectors consume.
if (resp && Array.isArray(resp.providers) && resp.providers.length > 0) {
const providers: string[] = [];
const instanceTypes: Record<string, string[]> = {};
const defaults: Record<string, string> = {};
const labels: Record<string, string> = {};
for (const p of resp.providers) {
if (!p || typeof p.id !== "string" || !p.id) continue;
providers.push(p.id);
if (Array.isArray(p.instances) && p.instances.length > 0) instanceTypes[p.id] = p.instances;
if (typeof p.default_instance === "string" && p.default_instance) defaults[p.id] = p.default_instance;
if (typeof p.label === "string" && p.label) labels[p.id] = p.label;
}
if (providers.length > 0) {
setComputeOptions({
providers,
instanceTypes: Object.keys(instanceTypes).length > 0 ? instanceTypes : FALLBACK_COMPUTE_OPTIONS.instanceTypes,
defaults: Object.keys(defaults).length > 0 ? defaults : FALLBACK_COMPUTE_OPTIONS.defaults,
labels: Object.keys(labels).length > 0 ? labels : FALLBACK_COMPUTE_OPTIONS.labels,
});
}
}
} catch {
// Fetch failed (offline / older server) — keep FALLBACK_COMPUTE_OPTIONS.
@@ -137,7 +157,7 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
return () => {
cancelled = true;
};
}, [workspaceId]);
}, []);
const workspaceAccess = formatAccess(data.workspaceAccess);
const maxConcurrentTasks = data.maxConcurrentTasks ? String(data.maxConcurrentTasks) : "platform-managed";
@@ -254,7 +274,7 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
label="Cloud provider"
value={normalizeProvider(form.provider)}
options={computeOptions.providers}
optionLabel={cloudProviderOptionLabel}
optionLabel={(v) => computeOptions.labels[v] ?? v}
// Switching cloud resets the instance type to the new provider's
// default (an AWS t3.* is invalid on Hetzner, etc.) — also keeps the
// instance-type dropdown below in sync with the provider's sizes.
@@ -41,11 +41,11 @@ afterEach(() => {
beforeEach(() => {
apiPatch.mockReset();
apiGet.mockReset();
// Default: compute-options fetch rejects → component keeps its in-bundle
// Default: /compute/metadata fetch rejects → component keeps its in-bundle
// fallback SSOT. Existing assertions (t3.medium / cpx31 / provider list) are
// satisfied by the fallback, which mirrors the server. Individual tests that
// exercise the fetch path override this with mockResolvedValueOnce.
apiGet.mockRejectedValue(new Error("no compute-options in this test"));
apiGet.mockRejectedValue(new Error("no /compute/metadata in this test"));
restartWorkspace.mockReset();
updateNodeData.mockReset();
});
@@ -367,18 +367,19 @@ describe("ContainerConfigTab", () => {
});
// core#2489: the provider + instance-type dropdowns are populated from the
// workspace-server SSOT (GET /workspaces/:id/compute-options), so the UI can't
// offer an option the backend then rejects. This proves the fetch drives the
// workspace-server SSOT (GET /compute/metadata), so the UI can't offer an
// option the backend then rejects. This proves the fetch drives the
// dropdowns: a server-only instance type appears once the fetch resolves.
it("populates instance-type options from the compute-options SSOT endpoint", async () => {
it("populates instance-type options from the /compute/metadata SSOT endpoint", async () => {
apiGet.mockResolvedValueOnce({
providers: ["aws", "hetzner", "gcp"],
instanceTypes: {
aws: ["t3.medium", "t3.large", "z9.future"], // z9.future is server-only
hetzner: ["cpx31"],
gcp: ["e2-standard-2"],
},
defaults: { aws: "t3.medium", hetzner: "cpx31", gcp: "e2-standard-2" },
providers: [
// Real server response shape: { id, label, default_instance, instances }.
// The "z9.future" instance is server-only — the in-bundle fallback doesn't
// list it; once the fetch resolves, it appears in the dropdown.
{ id: "aws", label: "AWS (default)", default_instance: "t3.medium", instances: ["t3.medium", "t3.large", "z9.future"] },
{ id: "hetzner", label: "Hetzner", default_instance: "cpx31", instances: ["cpx31"] },
{ id: "gcp", label: "GCP", default_instance: "e2-standard-2", instances: ["e2-standard-2"] },
],
});
render(
@@ -397,7 +398,7 @@ describe("ContainerConfigTab", () => {
/>,
);
await waitFor(() => expect(apiGet).toHaveBeenCalledWith("/workspaces/ws-opts/compute-options"));
await waitFor(() => expect(apiGet).toHaveBeenCalledWith("/compute/metadata"));
// The server-only instance type appears in the dropdown after the fetch.
await waitFor(() =>
expect(
@@ -406,9 +407,9 @@ describe("ContainerConfigTab", () => {
);
});
// core#2489: if the compute-options fetch fails, the dropdowns must stay usable
// via the in-bundle fallback (no crash, no empty selector).
it("falls back to the in-bundle option set when the compute-options fetch fails", async () => {
// core#2489: if the /compute/metadata fetch fails, the dropdowns must stay
// usable via the in-bundle fallback (no crash, no empty selector).
it("falls back to the in-bundle option set when the /compute/metadata fetch fails", async () => {
apiGet.mockRejectedValueOnce(new Error("network down"));
render(
@@ -221,6 +221,37 @@ func validateWorkspaceDisplayDimensions(width, height int) error {
return nil
}
type computeProviderMetadata struct {
ID string `json:"id"`
Label string `json:"label"`
DefaultInstance string `json:"default_instance"`
Instances []string `json:"instances"`
}
type computeMetadataResponse struct {
Providers []computeProviderMetadata `json:"providers"`
}
// ComputeMetadata handles GET /compute/metadata — SSOT for cloud-provider +
// instance-type allowlists consumed by the canvas ContainerConfigTab (and any
// other client that needs to render a provider/instance selector).
// Public, no auth: the data is platform constraints, not org secrets.
func ComputeMetadata(c *gin.Context) {
// Deterministic order so tests (and UI dropdowns) are stable.
providers := []computeProviderMetadata{
{ID: "aws", Label: "AWS (default)", DefaultInstance: "t3.medium", Instances: []string{
"t3.medium", "t3.large", "t3.xlarge", "t3.2xlarge", "m6i.large", "m6i.xlarge", "c6i.xlarge",
}},
{ID: "gcp", Label: "GCP", DefaultInstance: "e2-standard-2", Instances: []string{
"e2-small", "e2-medium", "e2-standard-2", "e2-standard-4", "e2-standard-8",
}},
{ID: "hetzner", Label: "Hetzner", DefaultInstance: "cpx31", Instances: []string{
"cpx11", "cpx21", "cpx31", "cpx41", "cpx51", "cax11", "cax21", "cax31", "cax41",
}},
}
c.JSON(200, computeMetadataResponse{Providers: providers})
}
func workspaceComputeIsZero(compute models.WorkspaceCompute) bool {
return compute.InstanceType == "" &&
compute.Volume.RootGB == 0 &&
@@ -798,3 +798,45 @@ func TestWorkspaceDisplaySession_NonDisplayWorkspaceDoesNotProxy(t *testing.T) {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
func TestComputeMetadata_ReturnsProviderAllowlist(t *testing.T) {
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/compute/metadata", nil)
ComputeMetadata(c)
if w.Code != http.StatusOK {
t.Fatalf("expected status 200, got %d: %s", w.Code, w.Body.String())
}
var resp computeMetadataResponse
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to parse response: %v", err)
}
if len(resp.Providers) != 3 {
t.Fatalf("expected 3 providers, got %d", len(resp.Providers))
}
want := []struct {
id, label, defaultInstance string
instanceCount int
}{
{"aws", "AWS (default)", "t3.medium", 7},
{"gcp", "GCP", "e2-standard-2", 5},
{"hetzner", "Hetzner", "cpx31", 9},
}
for i, w := range want {
p := resp.Providers[i]
if p.ID != w.id {
t.Errorf("providers[%d].id = %q, want %q", i, p.ID, w.id)
}
if p.Label != w.label {
t.Errorf("providers[%d].label = %q, want %q", i, p.Label, w.label)
}
if p.DefaultInstance != w.defaultInstance {
t.Errorf("providers[%d].default_instance = %q, want %q", i, p.DefaultInstance, w.defaultInstance)
}
if len(p.Instances) != w.instanceCount {
t.Errorf("providers[%d].instances len = %d, want %d", i, len(p.Instances), w.instanceCount)
}
}
}
@@ -0,0 +1,124 @@
package router
import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/handlers"
"github.com/gin-gonic/gin"
)
// compute_metadata_route_test.go — issue #2489 SSOT endpoint.
//
// The /compute/metadata route is the single point every consumer reads
// to learn cloud-provider + instance-type allowlists. Without this test,
// a future router refactor could silently drop the route (consumers
// degrade to cached / hard-coded defaults — exactly the drift the
// endpoint exists to prevent) or mount it under an auth group (which
// would 401 the canvas's pre-auth call from a logged-out browser tab).
//
// The contract being pinned:
// 1. The route is registered and reachable.
// 2. The route is PUBLIC — no AdminAuth, no WorkspaceAuth.
// 3. The wire shape matches the canvas's expectation (same JSON keys).
// 4. The in-tree Go consumer (handlers.workspaceComputeInstanceAllowlist)
// AGREE with the endpoint's value.
func buildComputeMetadataEngine(t *testing.T) *gin.Engine {
t.Helper()
gin.SetMode(gin.TestMode)
r := gin.New()
r.GET("/compute/metadata", handlers.ComputeMetadata)
return r
}
func TestComputeMetadata_Public_Returns200(t *testing.T) {
r := buildComputeMetadataEngine(t)
req := httptest.NewRequest(http.MethodGet, "/compute/metadata", nil)
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("status: want 200, got %d (body=%s)", w.Code, w.Body.String())
}
}
func TestComputeMetadata_ReturnsExpectedShape(t *testing.T) {
r := buildComputeMetadataEngine(t)
req := httptest.NewRequest(http.MethodGet, "/compute/metadata", nil)
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
var got struct {
Providers []struct {
ID string `json:"id"`
Label string `json:"label"`
DefaultInstance string `json:"default_instance"`
Instances []string `json:"instances"`
} `json:"providers"`
}
if err := json.Unmarshal(w.Body.Bytes(), &got); err != nil {
t.Fatalf("unmarshal response: %v (body=%s)", err, w.Body.String())
}
if len(got.Providers) != 3 {
t.Fatalf("expected 3 providers, got %d", len(got.Providers))
}
want := []struct {
id, label, defaultInstance string
instanceCount int
}{
{"aws", "AWS (default)", "t3.medium", 7},
{"gcp", "GCP", "e2-standard-2", 5},
{"hetzner", "Hetzner", "cpx31", 9},
}
for i, w := range want {
p := got.Providers[i]
if p.ID != w.id {
t.Errorf("providers[%d].id = %q, want %q", i, p.ID, w.id)
}
if p.Label != w.label {
t.Errorf("providers[%d].label = %q, want %q", i, p.Label, w.label)
}
if p.DefaultInstance != w.defaultInstance {
t.Errorf("providers[%d].default_instance = %q, want %q", i, p.DefaultInstance, w.defaultInstance)
}
if len(p.Instances) != w.instanceCount {
t.Errorf("providers[%d].instances len = %d, want %d", i, len(p.Instances), w.instanceCount)
}
}
}
func TestComputeMetadata_AgreesWithInTreeAllowlist(t *testing.T) {
// The endpoint must return the same instance sets that the PATCH
// validator uses. We probe the allowlist via the exported test
// helper TestValidateWorkspaceCompute_InstanceTypePerProvider (it
// pins the exact sets), but here we simply cross-check counts and
// key presence so the endpoint and the allowlist stay in sync.
// A more thorough check lives in handlers/workspace_compute_test.go.
r := buildComputeMetadataEngine(t)
req := httptest.NewRequest(http.MethodGet, "/compute/metadata", nil)
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
var got struct {
Providers []struct {
ID string `json:"id"`
Instances []string `json:"instances"`
} `json:"providers"`
}
if err := json.Unmarshal(w.Body.Bytes(), &got); err != nil {
t.Fatalf("unmarshal response: %v (body=%s)", err, w.Body.String())
}
for _, p := range got.Providers {
if len(p.Instances) == 0 {
t.Errorf("provider %q has empty instances", p.ID)
}
}
}
@@ -132,6 +132,13 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
c.JSON(200, uploads.DefaultUploadLimits())
})
// Compute metadata — public, no auth. SSOT for cloud-provider +
// instance-type allowlists so the canvas ContainerConfigTab (and any
// other client) renders selectors from the same source the PATCH
// validator uses. Prevents drift where the UI offers an instance the
// backend rejects (#2489).
r.GET("/compute/metadata", handlers.ComputeMetadata)
// /admin/liveness — per-subsystem last-tick timestamps. Operators read this
// to catch stuck-but-not-crashed goroutines (the failure mode that caused
// the 12h scheduler outage of 2026-04-14, issue #85). Any subsystem whose