fix(platform): address security review findings on CF Artifacts (#641)

Four findings from the security audit on PR #641:

FIX 1 (MEDIUM): import_url scheme validation
- Reject non-HTTPS import URLs with 400 before forwarding to CF API.
  Prevents SSRF via http://, git://, ssh://, file:// etc.

FIX 2 (MEDIUM): CF 5xx error leakage
- Add cfErrMessage() helper: returns "upstream service error" for CF 5xx
  responses and non-CF errors, passes through 4xx messages.
- Applied at all four CF-error response sites (Create, Get, Fork, Token).

FIX 3 (LOW): repo name validation
- Add package-level repoNameRE = ^[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}$
- Validate in Create and Fork handlers when caller supplies an explicit name.
  Auto-generated names ("molecule-ws-<id>") are always safe and skip validation.

FIX 4 (LOW): response body size limit in CF client
- Wrap resp.Body with io.LimitReader(1 MB) before json.NewDecoder in do().
  Prevents memory exhaustion from a runaway/malicious CF response.

Tests: 16 new tests covering all four fixes (cfErrMessage 4xx/5xx/non-API,
import_url non-HTTPS cases, invalid repo names in Create and Fork).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Molecule AI Backend Engineer 2026-04-17 06:39:47 +00:00
parent 3bcb2b21a5
commit daf52daa1d
3 changed files with 207 additions and 6 deletions

View File

@ -174,7 +174,8 @@ func (c *Client) do(ctx context.Context, method, path string, body, out interfac
}
defer resp.Body.Close()
// Decode the Cloudflare v4 envelope.
// Decode the Cloudflare v4 envelope. Cap at 1 MiB to prevent a
// malicious or runaway upstream response from exhausting memory.
var envelope struct {
Result json.RawMessage `json:"result"`
Success bool `json:"success"`
@ -183,7 +184,7 @@ func (c *Client) do(ctx context.Context, method, path string, body, out interfac
Message string `json:"message"`
} `json:"errors"`
}
if err := json.NewDecoder(resp.Body).Decode(&envelope); err != nil {
if err := json.NewDecoder(io.LimitReader(resp.Body, 1<<20)).Decode(&envelope); err != nil {
// Non-JSON body (network error page, etc.)
return &APIError{StatusCode: resp.StatusCode, Message: fmt.Sprintf("non-JSON body (status %d)", resp.StatusCode)}
}

View File

@ -24,6 +24,7 @@ import (
"log"
"net/http"
"os"
"regexp"
"strings"
"time"
@ -32,6 +33,21 @@ import (
"github.com/gin-gonic/gin"
)
// repoNameRE validates CF Artifacts repo names: start with alphanumeric,
// then up to 62 alphanumeric/hyphen/underscore chars (63 total max).
var repoNameRE = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}$`)
// cfErrMessage returns a safe error message for CF API errors.
// For CF 5xx errors (or non-CF errors), returns a generic "upstream service error"
// to avoid leaking internal CF error details to clients.
func cfErrMessage(err error) string {
apiErr, ok := err.(*artifacts.APIError)
if !ok || apiErr.StatusCode >= 500 {
return "upstream service error"
}
return apiErr.Message
}
// ArtifactsHandler holds a pre-built CF Artifacts client.
// The client is nil when CF_ARTIFACTS_API_TOKEN / CF_ARTIFACTS_NAMESPACE are unset.
type ArtifactsHandler struct {
@ -138,11 +154,22 @@ func (h *ArtifactsHandler) Create(c *gin.Context) {
}
}
// Validate explicit repo names; auto-generated names are always safe.
if req.Name != "" && !repoNameRE.MatchString(req.Name) {
c.JSON(http.StatusBadRequest, gin.H{"error": "repo name must match ^[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}$"})
return
}
var (
repo *artifacts.Repo
err error
)
if req.ImportURL != "" {
// Fix 1: require HTTPS for import URLs to prevent SSRF via non-HTTPS schemes.
if !strings.HasPrefix(req.ImportURL, "https://") {
c.JSON(http.StatusBadRequest, gin.H{"error": "import_url must use https://"})
return
}
repo, err = h.client.ImportRepo(ctx, repoName, artifacts.ImportRepoRequest{
URL: req.ImportURL,
Branch: req.ImportBranch,
@ -158,7 +185,7 @@ func (h *ArtifactsHandler) Create(c *gin.Context) {
}
if err != nil {
log.Printf("artifacts: CreateRepo/ImportRepo failed for workspace %s: %v", workspaceID, err)
c.JSON(cfErrToHTTP(err), gin.H{"error": err.Error()})
c.JSON(cfErrToHTTP(err), gin.H{"error": cfErrMessage(err)})
return
}
@ -222,7 +249,7 @@ func (h *ArtifactsHandler) Get(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{
"artifact": row,
"cf_status": "unavailable",
"cf_error": err.Error(),
"cf_error": cfErrMessage(err),
})
return
}
@ -279,6 +306,11 @@ func (h *ArtifactsHandler) Fork(c *gin.Context) {
return
}
if req.Name != "" && !repoNameRE.MatchString(req.Name) {
c.JSON(http.StatusBadRequest, gin.H{"error": "repo name must match ^[a-zA-Z0-9][a-zA-Z0-9_-]{0,62}$"})
return
}
result, err := h.client.ForkRepo(ctx, cfRepoName, artifacts.ForkRepoRequest{
Name: req.Name,
Description: req.Description,
@ -287,7 +319,7 @@ func (h *ArtifactsHandler) Fork(c *gin.Context) {
})
if err != nil {
log.Printf("artifacts: ForkRepo failed for workspace %s: %v", workspaceID, err)
c.JSON(cfErrToHTTP(err), gin.H{"error": err.Error()})
c.JSON(cfErrToHTTP(err), gin.H{"error": cfErrMessage(err)})
return
}
@ -363,7 +395,7 @@ func (h *ArtifactsHandler) Token(c *gin.Context) {
})
if err != nil {
log.Printf("artifacts: CreateToken failed for workspace %s: %v", workspaceID, err)
c.JSON(cfErrToHTTP(err), gin.H{"error": err.Error()})
c.JSON(cfErrToHTTP(err), gin.H{"error": cfErrMessage(err)})
return
}

View File

@ -4,6 +4,7 @@ import (
"bytes"
"database/sql"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
@ -803,6 +804,173 @@ func TestCfErrToHTTP(t *testing.T) {
}
}
// ============================= Security fix tests ============================
// TestCfErrMessage_5xxReturnsGeneric verifies that CF 5xx errors return a
// generic message instead of leaking CF internals.
func TestCfErrMessage_5xxReturnsGeneric(t *testing.T) {
err := &artifacts.APIError{StatusCode: http.StatusInternalServerError, Message: "internal CF detail"}
got := cfErrMessage(err)
if got != "upstream service error" {
t.Errorf("cfErrMessage(500) = %q, want %q", got, "upstream service error")
}
}
// TestCfErrMessage_502ReturnsGeneric verifies that CF 502 (bad gateway) is also masked.
func TestCfErrMessage_502ReturnsGeneric(t *testing.T) {
err := &artifacts.APIError{StatusCode: http.StatusBadGateway, Message: "gateway detail"}
got := cfErrMessage(err)
if got != "upstream service error" {
t.Errorf("cfErrMessage(502) = %q, want %q", got, "upstream service error")
}
}
// TestCfErrMessage_4xxPassesThrough verifies that CF 4xx messages are surfaced.
func TestCfErrMessage_4xxPassesThrough(t *testing.T) {
msg := "repo name already taken"
err := &artifacts.APIError{StatusCode: http.StatusConflict, Message: msg}
got := cfErrMessage(err)
if got != msg {
t.Errorf("cfErrMessage(409) = %q, want %q", got, msg)
}
}
// TestCfErrMessage_NonAPIErrorReturnsGeneric verifies that non-CF errors return generic message.
func TestCfErrMessage_NonAPIErrorReturnsGeneric(t *testing.T) {
err := fmt.Errorf("some network error")
got := cfErrMessage(err)
if got != "upstream service error" {
t.Errorf("cfErrMessage(non-API) = %q, want %q", got, "upstream service error")
}
}
// TestArtifactsCreate_ImportURLNonHTTPS verifies that a non-HTTPS import_url
// is rejected with 400.
func TestArtifactsCreate_ImportURLNonHTTPS(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-badurl").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "test-ns", "http://unused"),
"test-ns",
)
cases := []string{
"http://github.com/org/repo.git",
"git://github.com/org/repo.git",
"ssh://git@github.com/org/repo.git",
"file:///etc/passwd",
}
for _, url := range cases {
t.Run(url, func(t *testing.T) {
// Re-register the EXISTS probe expectation for each sub-test case.
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-badurl").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-badurl"}}
body, _ := json.Marshal(map[string]interface{}{
"name": "my-repo",
"import_url": url,
})
c.Request = httptest.NewRequest("POST", "/workspaces/ws-badurl/artifacts",
bytes.NewBuffer(body))
c.Request.Header.Set("Content-Type", "application/json")
h.Create(c)
if w.Code != http.StatusBadRequest {
t.Errorf("import_url=%q: expected 400, got %d: %s", url, w.Code, w.Body.String())
}
})
}
}
// TestArtifactsCreate_InvalidRepoName verifies that invalid repo names return 400.
func TestArtifactsCreate_InvalidRepoName(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "test-ns", "http://unused"),
"test-ns",
)
invalidNames := []string{
"-starts-with-dash",
"_starts-with-underscore",
"has spaces",
"has/slash",
"has.dot",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // 64 chars
}
for _, name := range invalidNames {
t.Run(name, func(t *testing.T) {
mock.ExpectQuery(`SELECT EXISTS`).
WithArgs("ws-badname").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-badname"}}
body, _ := json.Marshal(map[string]interface{}{"name": name})
c.Request = httptest.NewRequest("POST", "/workspaces/ws-badname/artifacts",
bytes.NewBuffer(body))
c.Request.Header.Set("Content-Type", "application/json")
h.Create(c)
if w.Code != http.StatusBadRequest {
t.Errorf("name=%q: expected 400, got %d: %s", name, w.Code, w.Body.String())
}
})
}
}
// TestArtifactsFork_InvalidRepoName verifies that invalid fork names return 400.
func TestArtifactsFork_InvalidRepoName(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
h := newArtifactsHandlerWithClient(
artifacts.NewWithBaseURL("tok", "test-ns", "http://unused"),
"test-ns",
)
invalidNames := []string{
"-bad-start",
"has spaces",
"../traversal",
}
for _, name := range invalidNames {
t.Run(name, func(t *testing.T) {
mock.ExpectQuery(`SELECT cf_repo_name FROM workspace_artifacts WHERE workspace_id`).
WithArgs("ws-forknm").
WillReturnRows(sqlmock.NewRows([]string{"cf_repo_name"}).AddRow("src"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-forknm"}}
body, _ := json.Marshal(map[string]interface{}{"name": name})
c.Request = httptest.NewRequest("POST", "/workspaces/ws-forknm/artifacts/fork",
bytes.NewBuffer(body))
c.Request.Header.Set("Content-Type", "application/json")
h.Fork(c)
if w.Code != http.StatusBadRequest {
t.Errorf("fork name=%q: expected 400, got %d: %s", name, w.Code, w.Body.String())
}
})
}
}
// containsCredentials is a test helper that checks whether a URL has embedded
// user:password@ credentials (should never appear in a stored remote URL).
func containsCredentials(u string) bool {