From b4a0f8b74d28844b7da5e50d9c5e5c4ecb8732b8 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 23 May 2026 07:47:50 +0000 Subject: [PATCH 1/2] fix(provisioner): check io.ReadAll + json.Unmarshal errors in CP client Start() silently ignored io.ReadAll and json.Unmarshal errors when reading the control plane provision response. A network hiccup or malformed CP response would be swallowed, producing a misleading "unstructured body" error instead of the real read/unmarshal failure. Stop() also ignored io.ReadAll on the error path. Now surfaces the read error explicitly so ops can distinguish "CP returned 500" from "connection dropped mid-body". Co-Authored-By: Claude Opus 4.7 --- .../internal/provisioner/cp_provisioner.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/workspace-server/internal/provisioner/cp_provisioner.go b/workspace-server/internal/provisioner/cp_provisioner.go index 269816909..8a3e056b5 100644 --- a/workspace-server/internal/provisioner/cp_provisioner.go +++ b/workspace-server/internal/provisioner/cp_provisioner.go @@ -241,9 +241,14 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, // Cap body read at 64 KiB — the CP only ever returns small JSON // responses; an unbounded read could be weaponized into log-flood // DoS by a compromised upstream. - respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 64<<10)) + respBody, readErr := io.ReadAll(io.LimitReader(resp.Body, 64<<10)) + if readErr != nil { + return "", fmt.Errorf("cp provisioner: read response body: %w", readErr) + } var result cpProvisionResponse - json.Unmarshal(respBody, &result) + if err := json.Unmarshal(respBody, &result); err != nil { + log.Printf("CP provisioner: unmarshal response for workspace %s: %v", cfg.WorkspaceID, err) + } if resp.StatusCode != http.StatusCreated { // Prefer the structured {"error":"..."} field. Do NOT fall back @@ -409,7 +414,11 @@ func (p *CPProvisioner) Stop(ctx context.Context, workspaceID string) error { // Read a bounded slice of the body so the error message gives ops // enough to triage without risking a multi-MB log line on a // pathological response. 512 bytes covers any sane error envelope. - body, _ := io.ReadAll(io.LimitReader(resp.Body, 512)) + body, readErr := io.ReadAll(io.LimitReader(resp.Body, 512)) + if readErr != nil { + return fmt.Errorf("cp provisioner: stop %s: unexpected %d (read body failed: %w)", + workspaceID, resp.StatusCode, readErr) + } return fmt.Errorf("cp provisioner: stop %s: unexpected %d: %s", workspaceID, resp.StatusCode, strings.TrimSpace(string(body))) } -- 2.52.0 From 156a9623bee5d1b753e16b2523a9f79d280f31a8 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sat, 23 May 2026 10:19:07 +0000 Subject: [PATCH 2/2] fix(provisioner): surface json.Unmarshal error on malformed 201 response CR2 from review #5552: Start() previously logged (or ignored) json.Unmarshal errors but still treated malformed 201 Created CP provision responses as success, returning an empty instance_id with nil error. Change the logic so that on HTTP 201, a json.Unmarshal failure returns an error instead of proceeding with an empty instance ID. Non-201 paths keep the existing byte-count fallback behaviour. Added TestStart_Malformed201SurfacesError covering the malformed-201 case. Co-Authored-By: Claude Opus 4.7 --- .../internal/provisioner/cp_provisioner.go | 8 +++++--- .../provisioner/cp_provisioner_test.go | 20 +++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/workspace-server/internal/provisioner/cp_provisioner.go b/workspace-server/internal/provisioner/cp_provisioner.go index 8a3e056b5..ef1f8595c 100644 --- a/workspace-server/internal/provisioner/cp_provisioner.go +++ b/workspace-server/internal/provisioner/cp_provisioner.go @@ -246,9 +246,7 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, return "", fmt.Errorf("cp provisioner: read response body: %w", readErr) } var result cpProvisionResponse - if err := json.Unmarshal(respBody, &result); err != nil { - log.Printf("CP provisioner: unmarshal response for workspace %s: %v", cfg.WorkspaceID, err) - } + unmarshalErr := json.Unmarshal(respBody, &result) if resp.StatusCode != http.StatusCreated { // Prefer the structured {"error":"..."} field. Do NOT fall back @@ -262,6 +260,10 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, return "", fmt.Errorf("cp provisioner: provision failed (%d): %s", resp.StatusCode, errMsg) } + if unmarshalErr != nil { + return "", fmt.Errorf("cp provisioner: decode 201 response: %w", unmarshalErr) + } + log.Printf("CP provisioner: workspace %s → EC2 instance %s (%s)", cfg.WorkspaceID, result.InstanceID, result.State) provlog.Event("provision.ec2_started", map[string]any{ "workspace_id": cfg.WorkspaceID, diff --git a/workspace-server/internal/provisioner/cp_provisioner_test.go b/workspace-server/internal/provisioner/cp_provisioner_test.go index 44e27660a..23c747882 100644 --- a/workspace-server/internal/provisioner/cp_provisioner_test.go +++ b/workspace-server/internal/provisioner/cp_provisioner_test.go @@ -442,6 +442,26 @@ func TestStart_SymlinkTemplatePathError(t *testing.T) { } } +// TestStart_Malformed201SurfacesError — when CP returns 201 Created with +// unparseable JSON, Start must return an error instead of silently +// returning an empty instance_id. CR2 blocker from review #5552. +func TestStart_Malformed201SurfacesError(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusCreated) + _, _ = io.WriteString(w, `{"instance_id": broken-json`) + })) + defer srv.Close() + + p := &CPProvisioner{baseURL: srv.URL, orgID: "org-1", httpClient: srv.Client()} + _, err := p.Start(context.Background(), WorkspaceConfig{WorkspaceID: "ws-1", Runtime: "py"}) + if err == nil { + t.Fatal("expected error on malformed 201, got nil") + } + if !strings.Contains(err.Error(), "decode 201 response") { + t.Errorf("error should mention decode 201 response, got %q", err.Error()) + } +} + // TestStop_SendsBothAuthHeaders — verify #118/#130 compliance on the // teardown path. Any call to /cp/workspaces/:id must carry both the // platform-wide shared secret AND the per-tenant admin token, or the -- 2.52.0