From 9ce8d9744836bdfd2748f24705c6ae6d376460f1 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Thu, 23 Apr 2026 15:28:02 -0700 Subject: [PATCH] =?UTF-8?q?test:=20regression=20guard=20for=20#1738=20?= =?UTF-8?q?=E2=80=94=20cp-provisioner=20uses=20real=20instance=5Fid?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pins the fix-invariants from PR #1738 (merged 2026-04-23) against regression. Pre-fix, `CPProvisioner.Stop` and `IsRunning` both passed the workspace UUID as the `instance_id` query param: url := fmt.Sprintf("%s/cp/workspaces/%s?instance_id=%s", baseURL, workspaceID, workspaceID) ^ should be the real i-* ID AWS rejected downstream with InvalidInstanceID.Malformed, orphaned the EC2, and the next provision hit InvalidGroup.Duplicate on the leftover SG — full Save & Restart cascade failure. ## Tests added - **TestStop_UsesRealInstanceIDNotWorkspaceUUID**: stub resolveInstanceID to return an i-* ID, assert the CP request's instance_id query param carries that i-* value (not the workspace UUID). - **TestStop_NoInstanceIDSkipsCPCall**: empty DB lookup → no CP call at all (idempotent). Guards against re-introducing the "call CP with '' and let AWS reject" footgun. - **TestIsRunning_UsesRealInstanceIDNotWorkspaceUUID**: mirror for the /cp/workspaces/:id/status path — same bug shape. All 3 pass on current staging (which has the fix). Reverting either Stop or IsRunning to the pre-#1738 shape causes these to fail loud. Extends molecule-core#1902's regression suite. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../cp_provisioner_instance_id_test.go | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 workspace-server/internal/provisioner/cp_provisioner_instance_id_test.go diff --git a/workspace-server/internal/provisioner/cp_provisioner_instance_id_test.go b/workspace-server/internal/provisioner/cp_provisioner_instance_id_test.go new file mode 100644 index 00000000..bb77c934 --- /dev/null +++ b/workspace-server/internal/provisioner/cp_provisioner_instance_id_test.go @@ -0,0 +1,127 @@ +package provisioner + +// Regression tests for PR #1738 (merged 2026-04-23) — CPProvisioner.Stop + +// IsRunning must look up the real EC2 instance_id (i-*) from the DB +// before calling the control plane, NOT pass the workspace UUID verbatim. +// +// Original bug: +// url := fmt.Sprintf("%s/cp/workspaces/%s?instance_id=%s", +// baseURL, workspaceID, workspaceID) +// ^^^^^^^^^^^^^^ +// sends UUID as instance_id +// +// AWS then rejects with InvalidInstanceID.Malformed, the next provision +// hits InvalidGroup.Duplicate on the leftover SG, and Save & Restart +// cascades into a full failure. Production incident 2026-04-22 on +// hongmingwang workspace a8af9d79 + recurrent on every SaaS workspace +// secret update that triggers a restart. +// +// These tests pin two invariants of the fix: +// 1. Stop + IsRunning query resolveInstanceID(ctx, workspaceID) BEFORE +// hitting CP, and use the returned i-* ID (not the workspace UUID) +// in the instance_id query param. +// 2. Empty instance_id → no CP call (idempotent no-op). + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" +) + +// TestStop_UsesRealInstanceIDNotWorkspaceUUID is the load-bearing +// regression guard for #1738. If someone reverts the resolveInstanceID +// lookup and ships the `workspaceID, workspaceID` version back, this +// test fails immediately. +func TestStop_UsesRealInstanceIDNotWorkspaceUUID(t *testing.T) { + primeInstanceIDLookup(t, map[string]string{ + "ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c": "i-0a1b2c3d4e5f67890", + }) + + var sawInstance string + var sawPath string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + sawInstance = r.URL.Query().Get("instance_id") + sawPath = r.URL.Path + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + p := &CPProvisioner{ + baseURL: srv.URL, + orgID: "org-1", + sharedSecret: "s3cret", + adminToken: "tok-xyz", + httpClient: srv.Client(), + } + if err := p.Stop(context.Background(), "ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c"); err != nil { + t.Fatalf("Stop: %v", err) + } + + // Load-bearing assertion: the AWS-facing instance_id must be the + // i-* ID from the DB, NEVER the workspace UUID. + if sawInstance != "i-0a1b2c3d4e5f67890" { + t.Errorf("#1738 REGRESSION: instance_id query = %q, want i-0a1b2c3d4e5f67890. "+ + "CP would forward this to AWS TerminateInstances — a UUID triggers "+ + "InvalidInstanceID.Malformed and orphans the EC2. See PR #1738.", sawInstance) + } + + // Sanity: path still carries the workspace UUID (that's how CP looks + // up the row). Only the instance_id query param changed. + if sawPath != "/cp/workspaces/ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c" { + t.Errorf("path = %q, want /cp/workspaces/ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c", sawPath) + } +} + +// TestStop_NoInstanceIDSkipsCPCall — when the workspace has no EC2 on +// file (never provisioned, already deprovisioned, or external runtime), +// Stop must be a no-op. Calling CP with empty instance_id triggers the +// exact AWS error the fix was meant to prevent. +func TestStop_NoInstanceIDSkipsCPCall(t *testing.T) { + primeInstanceIDLookup(t, map[string]string{}) // empty map → "" for everything + + called := false + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + called = true + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + p := &CPProvisioner{baseURL: srv.URL, orgID: "org-1", httpClient: srv.Client()} + if err := p.Stop(context.Background(), "ws-never-provisioned"); err != nil { + t.Errorf("Stop with no instance_id should be no-op, got err: %v", err) + } + if called { + t.Error("#1738 REGRESSION: Stop hit CP with empty instance_id — would trigger " + + "InvalidInstanceID.Malformed downstream. Fix must short-circuit on empty lookup.") + } +} + +// TestIsRunning_UsesRealInstanceIDNotWorkspaceUUID mirrors the Stop test +// for IsRunning's GET /cp/workspaces/:id/status?instance_id=... path. +// Same class of bug, same acceptance criterion. +func TestIsRunning_UsesRealInstanceIDNotWorkspaceUUID(t *testing.T) { + primeInstanceIDLookup(t, map[string]string{ + "ws-abc": "i-deadbeef", + }) + + var sawInstance string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + sawInstance = r.URL.Query().Get("instance_id") + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"state":"running"}`)) + })) + defer srv.Close() + + p := &CPProvisioner{baseURL: srv.URL, orgID: "org-1", httpClient: srv.Client()} + running, err := p.IsRunning(context.Background(), "ws-abc") + if err != nil { + t.Fatalf("IsRunning: %v", err) + } + if !running { + t.Errorf("expected running=true") + } + if sawInstance != "i-deadbeef" { + t.Errorf("#1738 REGRESSION: IsRunning sent instance_id=%q, want i-deadbeef", sawInstance) + } +}