From daf52daa1d8cecd661611a6d3157c5659415823f Mon Sep 17 00:00:00 2001 From: Molecule AI Backend Engineer Date: Fri, 17 Apr 2026 06:39:47 +0000 Subject: [PATCH] fix(platform): address security review findings on CF Artifacts (#641) Four findings from the security audit on PR #641: FIX 1 (MEDIUM): import_url scheme validation - Reject non-HTTPS import URLs with 400 before forwarding to CF API. Prevents SSRF via http://, git://, ssh://, file:// etc. FIX 2 (MEDIUM): CF 5xx error leakage - Add cfErrMessage() helper: returns "upstream service error" for CF 5xx responses and non-CF errors, passes through 4xx messages. - Applied at all four CF-error response sites (Create, Get, Fork, Token). FIX 3 (LOW): repo name validation - Add package-level repoNameRE = ^[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}$ - Validate in Create and Fork handlers when caller supplies an explicit name. Auto-generated names ("molecule-ws-") are always safe and skip validation. FIX 4 (LOW): response body size limit in CF client - Wrap resp.Body with io.LimitReader(1 MB) before json.NewDecoder in do(). Prevents memory exhaustion from a runaway/malicious CF response. Tests: 16 new tests covering all four fixes (cfErrMessage 4xx/5xx/non-API, import_url non-HTTPS cases, invalid repo names in Create and Fork). Co-Authored-By: Claude Sonnet 4.6 --- platform/internal/artifacts/client.go | 5 +- platform/internal/handlers/artifacts.go | 40 ++++- platform/internal/handlers/artifacts_test.go | 168 +++++++++++++++++++ 3 files changed, 207 insertions(+), 6 deletions(-) diff --git a/platform/internal/artifacts/client.go b/platform/internal/artifacts/client.go index 37efef7b..18510e00 100644 --- a/platform/internal/artifacts/client.go +++ b/platform/internal/artifacts/client.go @@ -174,7 +174,8 @@ func (c *Client) do(ctx context.Context, method, path string, body, out interfac } defer resp.Body.Close() - // Decode the Cloudflare v4 envelope. + // Decode the Cloudflare v4 envelope. Cap at 1 MiB to prevent a + // malicious or runaway upstream response from exhausting memory. var envelope struct { Result json.RawMessage `json:"result"` Success bool `json:"success"` @@ -183,7 +184,7 @@ func (c *Client) do(ctx context.Context, method, path string, body, out interfac Message string `json:"message"` } `json:"errors"` } - if err := json.NewDecoder(resp.Body).Decode(&envelope); err != nil { + if err := json.NewDecoder(io.LimitReader(resp.Body, 1<<20)).Decode(&envelope); err != nil { // Non-JSON body (network error page, etc.) return &APIError{StatusCode: resp.StatusCode, Message: fmt.Sprintf("non-JSON body (status %d)", resp.StatusCode)} } diff --git a/platform/internal/handlers/artifacts.go b/platform/internal/handlers/artifacts.go index dd125b62..2dec903f 100644 --- a/platform/internal/handlers/artifacts.go +++ b/platform/internal/handlers/artifacts.go @@ -24,6 +24,7 @@ import ( "log" "net/http" "os" + "regexp" "strings" "time" @@ -32,6 +33,21 @@ import ( "github.com/gin-gonic/gin" ) +// repoNameRE validates CF Artifacts repo names: start with alphanumeric, +// then up to 62 alphanumeric/hyphen/underscore chars (63 total max). +var repoNameRE = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}$`) + +// cfErrMessage returns a safe error message for CF API errors. +// For CF 5xx errors (or non-CF errors), returns a generic "upstream service error" +// to avoid leaking internal CF error details to clients. +func cfErrMessage(err error) string { + apiErr, ok := err.(*artifacts.APIError) + if !ok || apiErr.StatusCode >= 500 { + return "upstream service error" + } + return apiErr.Message +} + // ArtifactsHandler holds a pre-built CF Artifacts client. // The client is nil when CF_ARTIFACTS_API_TOKEN / CF_ARTIFACTS_NAMESPACE are unset. type ArtifactsHandler struct { @@ -138,11 +154,22 @@ func (h *ArtifactsHandler) Create(c *gin.Context) { } } + // Validate explicit repo names; auto-generated names are always safe. + if req.Name != "" && !repoNameRE.MatchString(req.Name) { + c.JSON(http.StatusBadRequest, gin.H{"error": "repo name must match ^[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}$"}) + return + } + var ( repo *artifacts.Repo err error ) if req.ImportURL != "" { + // Fix 1: require HTTPS for import URLs to prevent SSRF via non-HTTPS schemes. + if !strings.HasPrefix(req.ImportURL, "https://") { + c.JSON(http.StatusBadRequest, gin.H{"error": "import_url must use https://"}) + return + } repo, err = h.client.ImportRepo(ctx, repoName, artifacts.ImportRepoRequest{ URL: req.ImportURL, Branch: req.ImportBranch, @@ -158,7 +185,7 @@ func (h *ArtifactsHandler) Create(c *gin.Context) { } if err != nil { log.Printf("artifacts: CreateRepo/ImportRepo failed for workspace %s: %v", workspaceID, err) - c.JSON(cfErrToHTTP(err), gin.H{"error": err.Error()}) + c.JSON(cfErrToHTTP(err), gin.H{"error": cfErrMessage(err)}) return } @@ -222,7 +249,7 @@ func (h *ArtifactsHandler) Get(c *gin.Context) { c.JSON(http.StatusOK, gin.H{ "artifact": row, "cf_status": "unavailable", - "cf_error": err.Error(), + "cf_error": cfErrMessage(err), }) return } @@ -279,6 +306,11 @@ func (h *ArtifactsHandler) Fork(c *gin.Context) { return } + if req.Name != "" && !repoNameRE.MatchString(req.Name) { + c.JSON(http.StatusBadRequest, gin.H{"error": "repo name must match ^[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}$"}) + return + } + result, err := h.client.ForkRepo(ctx, cfRepoName, artifacts.ForkRepoRequest{ Name: req.Name, Description: req.Description, @@ -287,7 +319,7 @@ func (h *ArtifactsHandler) Fork(c *gin.Context) { }) if err != nil { log.Printf("artifacts: ForkRepo failed for workspace %s: %v", workspaceID, err) - c.JSON(cfErrToHTTP(err), gin.H{"error": err.Error()}) + c.JSON(cfErrToHTTP(err), gin.H{"error": cfErrMessage(err)}) return } @@ -363,7 +395,7 @@ func (h *ArtifactsHandler) Token(c *gin.Context) { }) if err != nil { log.Printf("artifacts: CreateToken failed for workspace %s: %v", workspaceID, err) - c.JSON(cfErrToHTTP(err), gin.H{"error": err.Error()}) + c.JSON(cfErrToHTTP(err), gin.H{"error": cfErrMessage(err)}) return } diff --git a/platform/internal/handlers/artifacts_test.go b/platform/internal/handlers/artifacts_test.go index 71dea2e7..283dea0b 100644 --- a/platform/internal/handlers/artifacts_test.go +++ b/platform/internal/handlers/artifacts_test.go @@ -4,6 +4,7 @@ import ( "bytes" "database/sql" "encoding/json" + "fmt" "net/http" "net/http/httptest" "testing" @@ -803,6 +804,173 @@ func TestCfErrToHTTP(t *testing.T) { } } +// ============================= Security fix tests ============================ + +// TestCfErrMessage_5xxReturnsGeneric verifies that CF 5xx errors return a +// generic message instead of leaking CF internals. +func TestCfErrMessage_5xxReturnsGeneric(t *testing.T) { + err := &artifacts.APIError{StatusCode: http.StatusInternalServerError, Message: "internal CF detail"} + got := cfErrMessage(err) + if got != "upstream service error" { + t.Errorf("cfErrMessage(500) = %q, want %q", got, "upstream service error") + } +} + +// TestCfErrMessage_502ReturnsGeneric verifies that CF 502 (bad gateway) is also masked. +func TestCfErrMessage_502ReturnsGeneric(t *testing.T) { + err := &artifacts.APIError{StatusCode: http.StatusBadGateway, Message: "gateway detail"} + got := cfErrMessage(err) + if got != "upstream service error" { + t.Errorf("cfErrMessage(502) = %q, want %q", got, "upstream service error") + } +} + +// TestCfErrMessage_4xxPassesThrough verifies that CF 4xx messages are surfaced. +func TestCfErrMessage_4xxPassesThrough(t *testing.T) { + msg := "repo name already taken" + err := &artifacts.APIError{StatusCode: http.StatusConflict, Message: msg} + got := cfErrMessage(err) + if got != msg { + t.Errorf("cfErrMessage(409) = %q, want %q", got, msg) + } +} + +// TestCfErrMessage_NonAPIErrorReturnsGeneric verifies that non-CF errors return generic message. +func TestCfErrMessage_NonAPIErrorReturnsGeneric(t *testing.T) { + err := fmt.Errorf("some network error") + got := cfErrMessage(err) + if got != "upstream service error" { + t.Errorf("cfErrMessage(non-API) = %q, want %q", got, "upstream service error") + } +} + +// TestArtifactsCreate_ImportURLNonHTTPS verifies that a non-HTTPS import_url +// is rejected with 400. +func TestArtifactsCreate_ImportURLNonHTTPS(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + mock.ExpectQuery(`SELECT EXISTS`). + WithArgs("ws-badurl"). + WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false)) + + h := newArtifactsHandlerWithClient( + artifacts.NewWithBaseURL("tok", "test-ns", "http://unused"), + "test-ns", + ) + + cases := []string{ + "http://github.com/org/repo.git", + "git://github.com/org/repo.git", + "ssh://git@github.com/org/repo.git", + "file:///etc/passwd", + } + for _, url := range cases { + t.Run(url, func(t *testing.T) { + // Re-register the EXISTS probe expectation for each sub-test case. + mock.ExpectQuery(`SELECT EXISTS`). + WithArgs("ws-badurl"). + WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-badurl"}} + body, _ := json.Marshal(map[string]interface{}{ + "name": "my-repo", + "import_url": url, + }) + c.Request = httptest.NewRequest("POST", "/workspaces/ws-badurl/artifacts", + bytes.NewBuffer(body)) + c.Request.Header.Set("Content-Type", "application/json") + + h.Create(c) + + if w.Code != http.StatusBadRequest { + t.Errorf("import_url=%q: expected 400, got %d: %s", url, w.Code, w.Body.String()) + } + }) + } +} + +// TestArtifactsCreate_InvalidRepoName verifies that invalid repo names return 400. +func TestArtifactsCreate_InvalidRepoName(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + h := newArtifactsHandlerWithClient( + artifacts.NewWithBaseURL("tok", "test-ns", "http://unused"), + "test-ns", + ) + + invalidNames := []string{ + "-starts-with-dash", + "_starts-with-underscore", + "has spaces", + "has/slash", + "has.dot", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // 64 chars + } + for _, name := range invalidNames { + t.Run(name, func(t *testing.T) { + mock.ExpectQuery(`SELECT EXISTS`). + WithArgs("ws-badname"). + WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-badname"}} + body, _ := json.Marshal(map[string]interface{}{"name": name}) + c.Request = httptest.NewRequest("POST", "/workspaces/ws-badname/artifacts", + bytes.NewBuffer(body)) + c.Request.Header.Set("Content-Type", "application/json") + + h.Create(c) + + if w.Code != http.StatusBadRequest { + t.Errorf("name=%q: expected 400, got %d: %s", name, w.Code, w.Body.String()) + } + }) + } +} + +// TestArtifactsFork_InvalidRepoName verifies that invalid fork names return 400. +func TestArtifactsFork_InvalidRepoName(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + h := newArtifactsHandlerWithClient( + artifacts.NewWithBaseURL("tok", "test-ns", "http://unused"), + "test-ns", + ) + + invalidNames := []string{ + "-bad-start", + "has spaces", + "../traversal", + } + for _, name := range invalidNames { + t.Run(name, func(t *testing.T) { + mock.ExpectQuery(`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id`). + WithArgs("ws-forknm"). + WillReturnRows(sqlmock.NewRows([]string{"cf_repo_name"}).AddRow("src")) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-forknm"}} + body, _ := json.Marshal(map[string]interface{}{"name": name}) + c.Request = httptest.NewRequest("POST", "/workspaces/ws-forknm/artifacts/fork", + bytes.NewBuffer(body)) + c.Request.Header.Set("Content-Type", "application/json") + + h.Fork(c) + + if w.Code != http.StatusBadRequest { + t.Errorf("fork name=%q: expected 400, got %d: %s", name, w.Code, w.Body.String()) + } + }) + } +} + // containsCredentials is a test helper that checks whether a URL has embedded // user:password@ credentials (should never appear in a stored remote URL). func containsCredentials(u string) bool {