feat(workspace): in-place cloud-provider switch in Container Config #2465
@@ -3,13 +3,36 @@
|
||||
import { useEffect, useMemo, useState } from "react";
|
||||
import { api } from "@/lib/api";
|
||||
import { runtimeDisplayName } from "@/lib/runtime-names";
|
||||
import { isSaaSTenant } from "@/lib/tenant";
|
||||
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
|
||||
import type { WorkspaceCompute } from "@/store/socket";
|
||||
|
||||
const INSTANCE_TYPES = ["t3.medium", "t3.large", "t3.xlarge", "t3.2xlarge", "m6i.large", "m6i.xlarge", "c6i.xlarge"];
|
||||
// Machine sizes keyed by cloud provider — an AWS t3.* is meaningless on Hetzner,
|
||||
// etc. MUST mirror the workspace-server workspaceComputeInstanceAllowlist (which
|
||||
// mirrors the CP provider configs); the PATCH validation rejects a mismatch 400.
|
||||
const INSTANCE_TYPES_BY_PROVIDER: Record<string, string[]> = {
|
||||
aws: ["t3.medium", "t3.large", "t3.xlarge", "t3.2xlarge", "m6i.large", "m6i.xlarge", "c6i.xlarge"],
|
||||
hetzner: ["cpx11", "cpx21", "cpx31", "cpx41", "cpx51", "cax11", "cax21", "cax31", "cax41"],
|
||||
gcp: ["e2-small", "e2-medium", "e2-standard-2", "e2-standard-4", "e2-standard-8"],
|
||||
};
|
||||
const DEFAULT_INSTANCE_BY_PROVIDER: Record<string, string> = {
|
||||
aws: "t3.medium", hetzner: "cpx31", gcp: "e2-standard-2",
|
||||
};
|
||||
const normalizeProvider = (p?: string): string => (p === "gcp" || p === "hetzner" ? p : "aws");
|
||||
const instanceTypesForProvider = (p?: string): string[] =>
|
||||
INSTANCE_TYPES_BY_PROVIDER[normalizeProvider(p)] ?? INSTANCE_TYPES_BY_PROVIDER.aws;
|
||||
const defaultInstanceForProvider = (p?: string): string =>
|
||||
DEFAULT_INSTANCE_BY_PROVIDER[normalizeProvider(p)] ?? "t3.medium";
|
||||
|
||||
// Editable cloud-provider options (multi-provider RFC) — mirrors CreateWorkspaceDialog.
|
||||
const CLOUD_PROVIDER_OPTIONS = [
|
||||
{ value: "aws", label: "AWS (default)" },
|
||||
{ value: "gcp", label: "GCP" },
|
||||
{ value: "hetzner", label: "Hetzner" },
|
||||
];
|
||||
|
||||
const RUNTIME_OPTIONS = ["claude-code", "codex", "hermes", "openclaw", "kimi", "kimi-cli", "external"];
|
||||
const RESOLUTIONS = ["1280x720", "1440x900", "1920x1080", "2560x1440"];
|
||||
const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
|
||||
const DEFAULT_HEADLESS_ROOT_GB = 30;
|
||||
|
||||
type Props = {
|
||||
@@ -23,6 +46,7 @@ type Props = {
|
||||
|
||||
type FormState = {
|
||||
runtime: string;
|
||||
provider: string; // cloud backend; editable in SaaS (in-place switch recreates the box)
|
||||
instanceType: string;
|
||||
rootGB: string;
|
||||
displayEnabled: boolean;
|
||||
@@ -38,16 +62,16 @@ const DATA_PERSISTENCE_OPTIONS = ["", "persist", "ephemeral"];
|
||||
const dataPersistenceLabel = (v: string): string =>
|
||||
v === "persist" ? "Always keep (persist)" : v === "ephemeral" ? "Don't keep (ephemeral)" : "Auto";
|
||||
|
||||
// Cloud/compute backend display name. The provider is chosen at create time and
|
||||
// is NOT editable here (changing a workspace's cloud requires a recreate), so
|
||||
// it renders as a read-only badge — but we must preserve it across Save (the
|
||||
// compute payload is rebuilt below, and dropping it would wipe the column).
|
||||
// Cloud/compute backend display name (read-only fallback for non-SaaS / legacy).
|
||||
const cloudProviderLabel = (v: string | undefined): string =>
|
||||
v === "gcp" ? "GCP" : v === "hetzner" ? "Hetzner" : "AWS";
|
||||
|
||||
export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
// Provider is editable only in SaaS (CP-provisioned boxes). Local/Docker has no
|
||||
// cloud-provider concept, so we keep the read-only badge there.
|
||||
const isSaaS = useMemo(() => isSaaSTenant(), []);
|
||||
const runtime = data.runtime;
|
||||
const provider = data.compute?.provider; // read-only; set at create time
|
||||
const provider = data.compute?.provider;
|
||||
const instanceType = data.compute?.instance_type;
|
||||
const rootGB = data.compute?.volume?.root_gb;
|
||||
const displayMode = data.compute?.display?.mode;
|
||||
@@ -56,8 +80,8 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
const displayHeight = data.compute?.display?.height;
|
||||
const dataPersistence = data.compute?.data_persistence;
|
||||
const initial = useMemo(
|
||||
() => formFromData({ runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence }),
|
||||
[runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence],
|
||||
() => formFromData({ runtime, provider, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence }),
|
||||
[runtime, provider, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence],
|
||||
);
|
||||
const [form, setForm] = useState<FormState>(initial);
|
||||
const [saving, setSaving] = useState(false);
|
||||
@@ -87,6 +111,21 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
try {
|
||||
let applyTemplateOnRestart = data.applyTemplateOnRestart ?? false;
|
||||
if (dirty) {
|
||||
// In-place cloud switch is DESTRUCTIVE: changing the provider recreates the
|
||||
// box on the new cloud (the workspace-server deprovisions the old box on
|
||||
// its old cloud first, then the restart provisions on the new one). Confirm
|
||||
// before doing it — the current box and any non-persisted state are lost.
|
||||
const providerChanged = normalizeProvider(form.provider) !== normalizeProvider(initial.provider);
|
||||
if (providerChanged && typeof window !== "undefined") {
|
||||
const ok = window.confirm(
|
||||
`Switch this workspace to ${cloudProviderLabel(form.provider)}? This RECREATES the box on the new cloud — the current box and any non-persisted state are replaced.`,
|
||||
);
|
||||
if (!ok) {
|
||||
setSaving(false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const rootGB = parseInt(form.rootGB, 10);
|
||||
if (!Number.isFinite(rootGB)) {
|
||||
setError("Root volume must be a number");
|
||||
@@ -102,10 +141,11 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
: { mode: "none" },
|
||||
// internal#734: omit when "auto" so the wire/default behavior is unchanged.
|
||||
...(form.dataPersistence ? { data_persistence: form.dataPersistence } : {}),
|
||||
// Preserve the create-time cloud provider — it's not editable here, but
|
||||
// this PATCH rebuilds the whole compute object, so omitting it would
|
||||
// wipe the persisted provider (and mislead the badge after a Save).
|
||||
...(provider ? { provider } : {}),
|
||||
// Cloud backend: send the (possibly switched) provider. Omit for the
|
||||
// default (aws) so a non-switching AWS save keeps the wire unchanged;
|
||||
// a switch TO aws (omit) vs FROM aws (explicit) both register correctly
|
||||
// because the workspace-server normalizes ""→aws when diffing.
|
||||
...(normalizeProvider(form.provider) !== "aws" ? { provider: normalizeProvider(form.provider) } : {}),
|
||||
};
|
||||
|
||||
const resp = await api.patch<{ needs_restart?: boolean }>(`/workspaces/${workspaceId}`, {
|
||||
@@ -140,15 +180,16 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
<div className="mb-3 flex items-center justify-between gap-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<h3 className="text-sm font-semibold text-ink">Container Config</h3>
|
||||
{/* Read-only cloud-provider badge — which cloud this workspace's box
|
||||
runs on (AWS/GCP/Hetzner). Defaults to AWS when unset (legacy
|
||||
rows). Set at create time in the Create Workspace dialog. */}
|
||||
<span
|
||||
title="Cloud provider for this workspace's compute (set at create time)"
|
||||
className="rounded-full border border-line/60 bg-surface-sunken px-2 py-0.5 font-mono text-[10px] uppercase tracking-wide text-ink-mid"
|
||||
>
|
||||
{cloudProviderLabel(provider)}
|
||||
</span>
|
||||
{/* Non-SaaS (local/Docker) has no cloud-provider concept → read-only
|
||||
badge. In SaaS the provider is an editable selector in the form. */}
|
||||
{!isSaaS && (
|
||||
<span
|
||||
title="Cloud provider for this workspace's compute"
|
||||
className="rounded-full border border-line/60 bg-surface-sunken px-2 py-0.5 font-mono text-[10px] uppercase tracking-wide text-ink-mid"
|
||||
>
|
||||
{cloudProviderLabel(provider)}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
{data.needsRestart && <span className="text-[11px] text-warm">Restart required</span>}
|
||||
</div>
|
||||
@@ -162,11 +203,32 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
optionLabel={runtimeDisplayName}
|
||||
onChange={(runtime) => setForm((s) => ({ ...s, runtime }))}
|
||||
/>
|
||||
{isSaaS && (
|
||||
<SelectField
|
||||
id="cloud-provider"
|
||||
label="Cloud provider"
|
||||
value={normalizeProvider(form.provider)}
|
||||
options={CLOUD_PROVIDER_OPTIONS.map((p) => p.value)}
|
||||
optionLabel={(v) => CLOUD_PROVIDER_OPTIONS.find((p) => p.value === v)?.label ?? v}
|
||||
// Switching cloud resets the instance type to the new provider's
|
||||
// default (an AWS t3.* is invalid on Hetzner, etc.) — also keeps the
|
||||
// instance-type dropdown below in sync with the provider's sizes.
|
||||
onChange={(provider) =>
|
||||
setForm((s) => ({
|
||||
...s,
|
||||
provider,
|
||||
instanceType: instanceTypesForProvider(provider).includes(s.instanceType)
|
||||
? s.instanceType
|
||||
: defaultInstanceForProvider(provider),
|
||||
}))
|
||||
}
|
||||
/>
|
||||
)}
|
||||
<SelectField
|
||||
id="instance-type"
|
||||
label="Instance type"
|
||||
value={form.instanceType}
|
||||
options={INSTANCE_TYPES}
|
||||
options={instanceTypesForProvider(form.provider)}
|
||||
onChange={(instanceType) => setForm((s) => ({ ...s, instanceType }))}
|
||||
/>
|
||||
<label className="grid gap-1" htmlFor="root-volume-gb">
|
||||
@@ -270,6 +332,7 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
|
||||
function formFromData(data: {
|
||||
runtime?: string;
|
||||
provider?: string;
|
||||
instanceType?: string;
|
||||
rootGB?: number;
|
||||
displayMode?: string;
|
||||
@@ -281,9 +344,11 @@ function formFromData(data: {
|
||||
const width = data.displayWidth ?? 1920;
|
||||
const height = data.displayHeight ?? 1080;
|
||||
const resolution = `${width}x${height}`;
|
||||
const provider = normalizeProvider(data.provider);
|
||||
return {
|
||||
runtime: data.runtime || "claude-code",
|
||||
instanceType: data.instanceType || DEFAULT_HEADLESS_INSTANCE_TYPE,
|
||||
provider,
|
||||
instanceType: data.instanceType || defaultInstanceForProvider(provider),
|
||||
rootGB: String(data.rootGB || DEFAULT_HEADLESS_ROOT_GB),
|
||||
displayEnabled: !!data.displayMode && data.displayMode !== "none",
|
||||
displayMode: data.displayMode && data.displayMode !== "none" ? data.displayMode : "desktop-control",
|
||||
|
||||
@@ -23,6 +23,13 @@ vi.mock("@/store/canvas", () => ({
|
||||
),
|
||||
}));
|
||||
|
||||
// SaaS so the editable cloud-provider selector renders (non-SaaS shows a read-only
|
||||
// badge). Existing tests keep provider=aws (default), which is omitted from the
|
||||
// PATCH payload, so their assertions are unaffected.
|
||||
vi.mock("@/lib/tenant", () => ({
|
||||
isSaaSTenant: () => true,
|
||||
}));
|
||||
|
||||
import { ContainerConfigTab } from "../ContainerConfigTab";
|
||||
|
||||
afterEach(() => {
|
||||
@@ -314,4 +321,67 @@ describe("ContainerConfigTab", () => {
|
||||
await waitFor(() => expect(restartWorkspace).toHaveBeenCalledWith("ws-compute", { applyTemplate: true }));
|
||||
expect(apiPatch).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("switches cloud provider — keys the instance-type list to the provider, confirms the recreate, and PATCHes the new provider", async () => {
|
||||
const confirmSpy = vi.spyOn(window, "confirm").mockReturnValue(true);
|
||||
render(
|
||||
<ContainerConfigTab
|
||||
workspaceId="ws-switch"
|
||||
data={{
|
||||
runtime: "claude-code",
|
||||
status: "online",
|
||||
needsRestart: false,
|
||||
activeTasks: 0,
|
||||
maxConcurrentTasks: null,
|
||||
workspaceAccess: "read-write",
|
||||
deliveryMode: "push",
|
||||
compute: { instance_type: "t3.large", provider: "aws", volume: { root_gb: 30 } },
|
||||
}}
|
||||
/>,
|
||||
);
|
||||
|
||||
const providerSel = screen.getByLabelText("Cloud provider");
|
||||
expect(providerSel).toHaveProperty("value", "aws");
|
||||
expect(screen.getByLabelText("Instance type")).toHaveProperty("value", "t3.large");
|
||||
|
||||
// Switch to Hetzner → the instance type resets to the Hetzner default (an AWS
|
||||
// t3.* is invalid on Hetzner) and the options become Hetzner sizes.
|
||||
fireEvent.change(providerSel, { target: { value: "hetzner" } });
|
||||
expect(screen.getByLabelText("Instance type")).toHaveProperty("value", "cpx31");
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
await waitFor(() => expect(apiPatch).toHaveBeenCalledTimes(1));
|
||||
expect(confirmSpy).toHaveBeenCalled(); // destructive recreate confirmed
|
||||
const body = apiPatch.mock.calls[0][1] as { compute: { provider?: string; instance_type?: string } };
|
||||
expect(body.compute.provider).toBe("hetzner");
|
||||
expect(body.compute.instance_type).toBe("cpx31");
|
||||
confirmSpy.mockRestore();
|
||||
});
|
||||
|
||||
it("does not treat a non-provider edit as a recreate (no confirm; aws default omitted)", async () => {
|
||||
const confirmSpy = vi.spyOn(window, "confirm").mockReturnValue(true);
|
||||
render(
|
||||
<ContainerConfigTab
|
||||
workspaceId="ws-noswitch"
|
||||
data={{
|
||||
runtime: "claude-code",
|
||||
status: "online",
|
||||
needsRestart: false,
|
||||
activeTasks: 0,
|
||||
maxConcurrentTasks: null,
|
||||
workspaceAccess: "read-write",
|
||||
deliveryMode: "push",
|
||||
compute: { instance_type: "t3.large", provider: "aws", volume: { root_gb: 30 } },
|
||||
}}
|
||||
/>,
|
||||
);
|
||||
|
||||
fireEvent.change(screen.getByLabelText("Root volume"), { target: { value: "60" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
await waitFor(() => expect(apiPatch).toHaveBeenCalledTimes(1));
|
||||
expect(confirmSpy).not.toHaveBeenCalled();
|
||||
const body = apiPatch.mock.calls[0][1] as { compute: { provider?: string } };
|
||||
expect(body.compute.provider).toBeUndefined(); // aws default omitted (wire unchanged)
|
||||
confirmSpy.mockRestore();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -31,14 +31,53 @@ type workspaceDisplayResponse struct {
|
||||
Status string `json:"status,omitempty"`
|
||||
}
|
||||
|
||||
var workspaceComputeInstanceAllowlist = map[string]struct{}{
|
||||
"t3.medium": {},
|
||||
"t3.large": {},
|
||||
"t3.xlarge": {},
|
||||
"t3.2xlarge": {},
|
||||
"m6i.large": {},
|
||||
"m6i.xlarge": {},
|
||||
"c6i.xlarge": {},
|
||||
// workspaceComputeInstanceAllowlist is keyed by cloud provider (multi-provider /
|
||||
// in-place switch): each provider's box accepts only that provider's machine
|
||||
// sizes (an AWS t3.* is meaningless on Hetzner, and vice-versa). Mirrors the CP
|
||||
// provider SSOT — keep in lock-step with the controlplane provider configs
|
||||
// (Hetzner ServerType cpx*/cax*, GCP MachineType e2-*, AWS EC2 t3*/m6i*/c6i*).
|
||||
// TestValidateWorkspaceCompute_Provider / _InstanceTypePerProvider pin the sets.
|
||||
// "" provider = AWS default.
|
||||
var workspaceComputeInstanceAllowlist = map[string]map[string]struct{}{
|
||||
"aws": {
|
||||
"t3.medium": {}, "t3.large": {}, "t3.xlarge": {}, "t3.2xlarge": {},
|
||||
"m6i.large": {}, "m6i.xlarge": {}, "c6i.xlarge": {},
|
||||
},
|
||||
"hetzner": {
|
||||
"cpx11": {}, "cpx21": {}, "cpx31": {}, "cpx41": {}, "cpx51": {},
|
||||
"cax11": {}, "cax21": {}, "cax31": {}, "cax41": {},
|
||||
},
|
||||
"gcp": {
|
||||
"e2-small": {}, "e2-medium": {},
|
||||
"e2-standard-2": {}, "e2-standard-4": {}, "e2-standard-8": {},
|
||||
},
|
||||
}
|
||||
|
||||
// normalizeCloudProvider maps "" → "aws" so the in-place switch comparison
|
||||
// treats the default and an explicit "aws" as the same cloud (no spurious switch).
|
||||
func normalizeCloudProvider(p string) string {
|
||||
if p == "" {
|
||||
return "aws"
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// instanceTypeAllowedForProvider reports whether instanceType is valid for the
|
||||
// given provider ("" → aws). Empty instanceType is always allowed (CP defaults).
|
||||
func instanceTypeAllowedForProvider(provider, instanceType string) bool {
|
||||
if instanceType == "" {
|
||||
return true
|
||||
}
|
||||
p := provider
|
||||
if p == "" {
|
||||
p = "aws"
|
||||
}
|
||||
set, ok := workspaceComputeInstanceAllowlist[p]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
_, ok = set[instanceType]
|
||||
return ok
|
||||
}
|
||||
|
||||
// workspaceComputeProviderAllowlist mirrors the controlplane cloud-provider SSOT
|
||||
@@ -56,11 +95,24 @@ var workspaceComputeProviderAllowlist = map[string]struct{}{
|
||||
}
|
||||
|
||||
func validateWorkspaceCompute(compute models.WorkspaceCompute) error {
|
||||
if compute.InstanceType != "" {
|
||||
if _, ok := workspaceComputeInstanceAllowlist[compute.InstanceType]; !ok {
|
||||
return fmt.Errorf("unsupported compute.instance_type")
|
||||
// Provider first (so the instance-type check below can be provider-scoped).
|
||||
// "" = default (AWS). CP fail-closes an unwired provider with a 422; validating
|
||||
// here gives a clean 400 before the round-trip and is the gate reused by the
|
||||
// switch-provider flow. Mirrors the controlplane cloudprovider SSOT.
|
||||
if compute.Provider != "" {
|
||||
if _, ok := workspaceComputeProviderAllowlist[compute.Provider]; !ok {
|
||||
return fmt.Errorf("unsupported compute.provider (want aws|gcp|hetzner)")
|
||||
}
|
||||
}
|
||||
// Instance type must belong to the chosen provider (an AWS t3.* is invalid on
|
||||
// Hetzner, etc.). Empty = CP default for the provider.
|
||||
if !instanceTypeAllowedForProvider(compute.Provider, compute.InstanceType) {
|
||||
prov := compute.Provider
|
||||
if prov == "" {
|
||||
prov = "aws"
|
||||
}
|
||||
return fmt.Errorf("unsupported compute.instance_type %q for provider %q", compute.InstanceType, prov)
|
||||
}
|
||||
if compute.Volume.RootGB != 0 {
|
||||
if compute.Volume.RootGB < workspaceComputeDiskFloorGB || compute.Volume.RootGB > workspaceComputeDiskCeilingGB {
|
||||
return fmt.Errorf("compute.volume.root_gb must be between %d and %d", workspaceComputeDiskFloorGB, workspaceComputeDiskCeilingGB)
|
||||
@@ -87,15 +139,6 @@ func validateWorkspaceCompute(compute models.WorkspaceCompute) error {
|
||||
default:
|
||||
return fmt.Errorf("unsupported compute.data_persistence (want persist|ephemeral)")
|
||||
}
|
||||
// Cloud backend for the box (multi-provider). "" = default (AWS). CP fail-
|
||||
// closes an unwired provider with a 422 (PROVIDER_UNAVAILABLE); validating
|
||||
// here gives a clean 400 before the round-trip and is the gate reused by the
|
||||
// switch-provider flow. Mirrors the controlplane cloudprovider SSOT.
|
||||
if compute.Provider != "" {
|
||||
if _, ok := workspaceComputeProviderAllowlist[compute.Provider]; !ok {
|
||||
return fmt.Errorf("unsupported compute.provider (want aws|gcp|hetzner)")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -64,6 +64,40 @@ func TestValidateWorkspaceCompute_Provider(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Multi-provider / in-place switch: an instance type must belong to the chosen
|
||||
// provider — an AWS t3.* is meaningless on Hetzner, a cpx* on AWS, etc. Pins the
|
||||
// provider-keyed allowlist (mirrors the CP provider configs).
|
||||
func TestValidateWorkspaceCompute_InstanceTypePerProvider(t *testing.T) {
|
||||
good := []struct{ provider, instance string }{
|
||||
{"", "t3.medium"}, {"aws", "t3.2xlarge"}, {"aws", "c6i.xlarge"},
|
||||
{"hetzner", "cpx31"}, {"hetzner", "cax41"},
|
||||
{"gcp", "e2-standard-2"}, {"gcp", "e2-small"},
|
||||
{"hetzner", ""}, {"gcp", ""}, // empty instance = CP default, always ok
|
||||
}
|
||||
for _, g := range good {
|
||||
c := models.WorkspaceCompute{Provider: g.provider, InstanceType: g.instance}
|
||||
if err := validateWorkspaceCompute(c); err != nil {
|
||||
t.Errorf("provider=%q instance=%q must be accepted: %v", g.provider, g.instance, err)
|
||||
}
|
||||
}
|
||||
bad := []struct{ provider, instance string }{
|
||||
{"hetzner", "t3.medium"}, // AWS type on Hetzner
|
||||
{"aws", "cpx31"}, // Hetzner type on AWS
|
||||
{"gcp", "t3.large"}, // AWS type on GCP
|
||||
{"hetzner", "e2-small"}, // GCP type on Hetzner
|
||||
{"", "cpx31"}, // default(aws) + Hetzner type
|
||||
}
|
||||
for _, b := range bad {
|
||||
c := models.WorkspaceCompute{Provider: b.provider, InstanceType: b.instance}
|
||||
if err := validateWorkspaceCompute(c); err == nil {
|
||||
t.Errorf("provider=%q instance=%q must be rejected (cross-provider instance type)", b.provider, b.instance)
|
||||
}
|
||||
}
|
||||
if normalizeCloudProvider("") != "aws" || normalizeCloudProvider("hetzner") != "hetzner" {
|
||||
t.Fatal("normalizeCloudProvider: \"\" must map to aws; explicit providers unchanged")
|
||||
}
|
||||
}
|
||||
|
||||
// internal#734: data_persistence enum. "" (auto), "persist", "ephemeral" are
|
||||
// the only accepted values; anything else is a clear 400 before the CP call.
|
||||
func TestValidateWorkspaceCompute_DataPersistence(t *testing.T) {
|
||||
|
||||
@@ -164,6 +164,7 @@ func (h *WorkspaceHandler) Update(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
var computeJSON string
|
||||
var newComputeProvider string // hoisted: drives the cloud-provider switch detection below
|
||||
computePatch := false
|
||||
if rawCompute, ok := body["compute"]; ok {
|
||||
computePatch = true
|
||||
@@ -184,6 +185,7 @@ func (h *WorkspaceHandler) Update(c *gin.Context) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
newComputeProvider = compute.Provider
|
||||
encoded, err := workspaceComputeJSON(compute)
|
||||
if err != nil {
|
||||
log.Printf("Update compute encode error for %s: %v", id, err)
|
||||
@@ -262,6 +264,55 @@ func (h *WorkspaceHandler) Update(c *gin.Context) {
|
||||
needsRestart = true
|
||||
}
|
||||
if computePatch {
|
||||
// Cloud-provider SWITCH (in-place): if the incoming provider differs from
|
||||
// the one currently stored, the existing box lives on the OLD cloud. We
|
||||
// MUST deprovision it on the OLD provider BEFORE overwriting compute —
|
||||
// otherwise the subsequent "Save & Restart" restart's provider-aware
|
||||
// deprovision (cpProv.Stop → resolveProvider reads compute->>'provider')
|
||||
// would target the NEW cloud and ORPHAN the old box (a silently-billing
|
||||
// leak). Cloud mode only (the local Docker provisioner has no cross-cloud
|
||||
// concept; provider stays "" there so this never fires). After this, the
|
||||
// canvas's restart provisions the box on the new cloud; its own Stop is a
|
||||
// safe no-op (the box is already gone).
|
||||
if h.cpProv != nil {
|
||||
var oldProvider sql.NullString
|
||||
err := db.DB.QueryRowContext(ctx, `SELECT compute->>'provider' FROM workspaces WHERE id = $1`, id).Scan(&oldProvider)
|
||||
// FAIL-CLOSED on the read. The earlier `err == nil` gate was fail-OPEN:
|
||||
// a transient/unexpected DB error here skipped the whole switch block and
|
||||
// fell through to the compute UPDATE — so during a real switch the later
|
||||
// provider-aware restart deprovision would target the NEW cloud and ORPHAN
|
||||
// the old box (silent billing, unrecoverable). We cannot tell whether this
|
||||
// is a cross-cloud switch without the old provider, so on any error other
|
||||
// than "no such row" we abort exactly like a failed deprovision: compute
|
||||
// untouched, old box still recoverable, user retries. (sql.ErrNoRows means
|
||||
// there is genuinely no prior box — nothing to orphan — so it's safe to
|
||||
// skip the switch and let the UPDATE proceed.)
|
||||
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
||||
log.Printf("Update: provider-switch precheck for %s ABORTED — could not read current cloud provider (provider left unchanged): %v", id, err)
|
||||
c.JSON(http.StatusBadGateway, gin.H{"error": "could not read the current cloud provider; provider unchanged — please retry"})
|
||||
return
|
||||
}
|
||||
if err == nil && normalizeCloudProvider(oldProvider.String) != normalizeCloudProvider(newComputeProvider) {
|
||||
log.Printf("Update: cloud-provider switch for %s: %q -> %q; deprovisioning old box on old provider before overwriting compute",
|
||||
id, normalizeCloudProvider(oldProvider.String), normalizeCloudProvider(newComputeProvider))
|
||||
// Use the ERROR-returning variant and ABORT before overwriting
|
||||
// compute if the old-box deprovision fails. If we proceeded, the
|
||||
// old box would keep running on the OLD cloud while the row now
|
||||
// records the NEW provider+instance — stranding it with no DB
|
||||
// pointer (an UNRECOVERABLE cross-cloud orphan that no reconciler
|
||||
// can map back). Aborting leaves the row pointing at the
|
||||
// still-recoverable old box; the user can retry the switch. (The
|
||||
// restart paths' void cpStopWithRetry is fine there because the
|
||||
// box stays on the SAME cloud, so the provider record is unchanged
|
||||
// and a provider-scoped sweep can still find it.)
|
||||
if err := h.cpStopWithRetryErr(ctx, id, "provider-switch", false); err != nil {
|
||||
log.Printf("Update: provider-switch for %s ABORTED — could not deprovision old box on %q (provider left unchanged, old box recoverable): %v",
|
||||
id, normalizeCloudProvider(oldProvider.String), err)
|
||||
c.JSON(http.StatusBadGateway, gin.H{"error": "could not deprovision the current cloud box; provider unchanged — please retry"})
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET compute = $2::jsonb, updated_at = now() WHERE id = $1`, id, computeJSON); err != nil {
|
||||
log.Printf("Update compute error for %s: %v", id, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save compute config"})
|
||||
|
||||
@@ -0,0 +1,180 @@
|
||||
package handlers
|
||||
|
||||
// workspace_provider_switch_test.go — deterministic coverage for the in-place
|
||||
// cloud-provider switch in the Update (PATCH /workspaces/:id) handler.
|
||||
//
|
||||
// The switch is DESTRUCTIVE (it recreates the box on a new cloud) and its
|
||||
// safety hinges on ORDER + ABORT, which these tests pin without touching a real
|
||||
// cloud (sqlmock DB + the scriptedCPStop fake from workspace_restart_stop_retry_test):
|
||||
//
|
||||
// 1. On a provider change, the OLD box is deprovisioned (cpProv.Stop) BEFORE
|
||||
// the compute row is overwritten — otherwise the later restart's
|
||||
// provider-aware deprovision would target the NEW cloud and ORPHAN the old
|
||||
// (still-billing) box. The sqlmock query ORDER pins "read old provider →
|
||||
// [Stop] → UPDATE compute".
|
||||
// 2. If the old-box deprovision FAILS, the handler ABORTS (502) and does NOT
|
||||
// overwrite compute — leaving the row pointed at the recoverable old box
|
||||
// (an unexpected UPDATE would fail sqlmock's expectations).
|
||||
// 3. A non-switch compute edit (same provider) does NOT deprovision anything.
|
||||
// 4. If the old-provider READ errors (transient DB fault, not sql.ErrNoRows),
|
||||
// the handler FAILS CLOSED: aborts (502), deprovisions nothing, and does NOT
|
||||
// overwrite compute — closing the fail-open read path that would otherwise
|
||||
// orphan the old box on a real switch (security review RC 9895).
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func newPatchContext(t *testing.T, id, body string) (*gin.Context, *httptest.ResponseRecorder) {
|
||||
t.Helper()
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: id}}
|
||||
req := httptest.NewRequest("PATCH", "/workspaces/"+id, bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
c.Request = req
|
||||
return c, w
|
||||
}
|
||||
|
||||
const switchTestWSID = "cccccccc-0001-0000-0000-000000000000"
|
||||
|
||||
func newSwitchTestHandler(t *testing.T, cp *scriptedCPStop) *WorkspaceHandler {
|
||||
t.Helper()
|
||||
h := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
|
||||
h.cpProv = cp
|
||||
return h
|
||||
}
|
||||
|
||||
// 1. aws → hetzner: deprovision the OLD box, THEN overwrite compute (200).
|
||||
func TestWorkspaceUpdate_ProviderSwitch_DeprovisionsOldBeforeUpdate(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
cp := &scriptedCPStop{} // Stop succeeds
|
||||
h := newSwitchTestHandler(t, cp)
|
||||
|
||||
// Ordered expectations pin: EXISTS → read OLD provider (aws) → UPDATE compute.
|
||||
// The cpProv.Stop deprovision runs (in code) AFTER the provider read and
|
||||
// BEFORE the UPDATE — exactly the orphan-safe order.
|
||||
mock.ExpectQuery("SELECT EXISTS").WithArgs(switchTestWSID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
mock.ExpectQuery("compute->>'provider'").WithArgs(switchTestWSID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"provider"}).AddRow("aws"))
|
||||
mock.ExpectExec("UPDATE workspaces SET compute").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
c, w := newPatchContext(t, switchTestWSID,
|
||||
`{"compute":{"instance_type":"cpx31","provider":"hetzner","volume":{"root_gb":30}}}`)
|
||||
h.Update(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 on a successful switch, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if cp.calls != 1 {
|
||||
t.Fatalf("expected the OLD box to be deprovisioned exactly once on a provider switch; got %d Stop calls", cp.calls)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet/unexpected DB queries (ordering broken?): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Deprovision FAILS → abort (502) + compute NOT overwritten (no UPDATE).
|
||||
func TestWorkspaceUpdate_ProviderSwitch_AbortsWhenDeprovisionFails(t *testing.T) {
|
||||
shrinkRetryBackoff(t) // don't burn the 1s/2s/4s retry backoff
|
||||
mock := setupTestDB(t)
|
||||
// All retry attempts fail → cpStopWithRetryErr returns an error → abort.
|
||||
cp := &scriptedCPStop{errs: []error{
|
||||
fmt.Errorf("cp 503"), fmt.Errorf("cp 503"), fmt.Errorf("cp 503"),
|
||||
}}
|
||||
h := newSwitchTestHandler(t, cp)
|
||||
|
||||
mock.ExpectQuery("SELECT EXISTS").WithArgs(switchTestWSID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
mock.ExpectQuery("compute->>'provider'").WithArgs(switchTestWSID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"provider"}).AddRow("aws"))
|
||||
// NO UPDATE expectation: if the handler overwrote compute after a failed
|
||||
// deprovision (the orphan bug), sqlmock would flag the unexpected query.
|
||||
|
||||
c, w := newPatchContext(t, switchTestWSID,
|
||||
`{"compute":{"instance_type":"cpx31","provider":"hetzner","volume":{"root_gb":30}}}`)
|
||||
h.Update(c)
|
||||
|
||||
if w.Code != http.StatusBadGateway {
|
||||
t.Fatalf("expected 502 when the old-box deprovision fails, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if cp.calls == 0 {
|
||||
t.Fatal("expected at least one Stop attempt before aborting")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
// A failure here means an UNEXPECTED UPDATE ran — i.e. compute was
|
||||
// overwritten after a failed deprovision → the orphan bug is back.
|
||||
t.Fatalf("compute must NOT be overwritten when deprovision fails (orphan-prevention): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Same provider (no switch): no deprovision; compute is updated normally.
|
||||
func TestWorkspaceUpdate_NoProviderSwitch_DoesNotDeprovision(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
cp := &scriptedCPStop{}
|
||||
h := newSwitchTestHandler(t, cp)
|
||||
|
||||
mock.ExpectQuery("SELECT EXISTS").WithArgs(switchTestWSID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
mock.ExpectQuery("compute->>'provider'").WithArgs(switchTestWSID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"provider"}).AddRow("aws"))
|
||||
mock.ExpectExec("UPDATE workspaces SET compute").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// provider stays aws (only the instance size changes) → no switch, no Stop.
|
||||
c, w := newPatchContext(t, switchTestWSID,
|
||||
`{"compute":{"instance_type":"t3.large","provider":"aws","volume":{"root_gb":60}}}`)
|
||||
h.Update(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if cp.calls != 0 {
|
||||
t.Fatalf("a non-switching compute edit must NOT deprovision the box; got %d Stop calls", cp.calls)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet/unexpected DB queries: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Provider READ errors (transient DB fault) → fail-CLOSED: abort 502, no
|
||||
// deprovision, no compute overwrite. A fail-open read (the old `err == nil`
|
||||
// gate) would skip switch detection and overwrite compute → orphan the old
|
||||
// cloud box. sqlmock has NO UPDATE/Stop expectations, so either an overwrite
|
||||
// or a stray deprovision trips it.
|
||||
func TestWorkspaceUpdate_ProviderSwitch_AbortsOnProviderReadError(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
cp := &scriptedCPStop{}
|
||||
h := newSwitchTestHandler(t, cp)
|
||||
|
||||
mock.ExpectQuery("SELECT EXISTS").WithArgs(switchTestWSID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
// The old-provider read hits a transient error (NOT sql.ErrNoRows).
|
||||
mock.ExpectQuery("compute->>'provider'").WithArgs(switchTestWSID).
|
||||
WillReturnError(fmt.Errorf("connection reset by peer"))
|
||||
|
||||
c, w := newPatchContext(t, switchTestWSID,
|
||||
`{"compute":{"instance_type":"cpx31","provider":"hetzner","volume":{"root_gb":30}}}`)
|
||||
h.Update(c)
|
||||
|
||||
if w.Code != http.StatusBadGateway {
|
||||
t.Fatalf("expected 502 when the provider read fails (fail-closed), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if cp.calls != 0 {
|
||||
t.Fatalf("must NOT deprovision when the current provider can't be read; got %d Stop calls", cp.calls)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
// An unexpected UPDATE here = compute was overwritten despite an unreadable
|
||||
// provider → the fail-open orphan path is back.
|
||||
t.Fatalf("compute must NOT be overwritten on a provider read error (fail-closed): %v", err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user