molecule-core/workspace-server/internal/handlers/ssrf.go
Hongming Wang 2df644f528 fix(handlers): unblock Platform (Go) CI — sqlmock budget-check + test loopback
Fixes 14 of the 18 failing tests that have been reddening Platform (Go)
CI on main since the 2026-04-18 open-source restructure + 2026-04-21
SSRF-backport. Reduces handlers package failure count 18 → 4
(remaining 4 are unrelated schema/behavior drift — see follow-ups).

Three root causes fixed:

  1. httptest.NewServer binds to 127.0.0.1; isSafeURL rejects loopback.
     Tests that stub workspace URLs via httptest therefore 502'd at
     the SSRF guard before reaching the handler logic they wanted to
     exercise.
     Fix: add `testAllowLoopback` var to ssrf.go + `allowLoopbackForTest(t)`
     helper in handlers_test.go. Only 127.0.0.0/8 and ::1 are relaxed;
     169.254 metadata, RFC-1918, TEST-NET, CGNAT, and link-local
     protections remain active. Flag is paired with t.Cleanup and is
     never touched by production code.

  2. ProxyA2A's checkWorkspaceBudget query (SELECT budget_limit, COALESCE
     (monthly_spend, 0) FROM workspaces WHERE id = $1) was added with the
     restructure but the a2a_proxy_test.go sqlmock expectations never
     caught up, producing "call to Query ... was not expected" on every
     ProxyA2A-exercising test.
     Fix: `expectBudgetCheck(mock, workspaceID)` helper that registers
     an empty-rows expectation (checkWorkspaceBudget fails-open on
     sql.ErrNoRows, so an empty result = "no budget limit"). Added to
     each of the 8 affected TestProxyA2A_* tests in the correct
     position relative to access-control + activity-log expectations.

  3. TestAdminMemories_Import_Success + _RedactsSecretsBeforeDedup
     mocked a 5-arg INSERT when the handler actually issues a 4-arg
     INSERT (workspace_id, content, scope, namespace) unless the
     payload carries a created_at override. Removed the spurious 5th
     AnyArg from both tests; _PreservesCreatedAt is untouched since it
     legitimately uses the 5-arg form.

Also: TestResolveAgentURL_CacheHit and _CacheMissDBHit used bogus
`cached.example` / `dbhit.example` hostnames that fail DNS resolution
inside isSafeURL (which happens BEFORE the loopback check). Swapped to
`127.0.0.1` variants preserving test intent (they never hit the network).

Remaining 4 failures — out of scope for this PR, tracked separately:
  - TestGitHubToken_NoTokenProvider (handler behavior drift — 500 vs 404)
  - TestWorkspaceList + TestWorkspaceList_WithData (Scan arg count —
    workspaces table gained a column, mock not updated)
  - TestRegister_ProvisionerURLPreserved (request body shape drift)

Closes the 4 wrong-target PRs (#1710, #1718, #1719, #1664) that all
tried to silence the symptom by disabling golangci-lint — which has
`continue-on-error: true` in ci.yml and was never the actual blocker.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 19:40:06 -07:00

177 lines
5.5 KiB
Go

package handlers
import (
"fmt"
"net"
"net/url"
"path/filepath"
"strings"
)
// isSafeURL validates that a URL resolves to a publicly-routable address,
// preventing A2A requests from being redirected to internal/cloud-metadata
// infrastructure (SSRF, CWE-918). Workspace URLs come from DB/Redis caches
// so we validate before making any outbound HTTP call.
//
// SaaS relaxation: when saasMode() is true, RFC-1918 private ranges and
// IPv6 ULA are considered safe because workspaces live on sibling EC2s in
// the same VPC and register by their VPC-private IP. Metadata endpoints,
// loopback, link-local, and TEST-NET stay blocked in every mode.
func isSafeURL(rawURL string) error {
u, err := url.Parse(rawURL)
if err != nil {
return fmt.Errorf("invalid URL: %w", err)
}
if u.Scheme != "http" && u.Scheme != "https" {
return fmt.Errorf("forbidden scheme: %s (only http/https allowed)", u.Scheme)
}
host := u.Hostname()
if host == "" {
return fmt.Errorf("empty hostname")
}
if ip := net.ParseIP(host); ip != nil {
if (ip.IsLoopback() && !testAllowLoopback) || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsInterfaceLocalMulticast() {
return fmt.Errorf("forbidden loopback/unspecified/link-local IP: %s", ip)
}
if isPrivateOrMetadataIP(ip) {
return fmt.Errorf("forbidden private/metadata IP: %s", ip)
}
return nil
}
addrs, err := net.LookupHost(host)
if err != nil {
return fmt.Errorf("DNS resolution blocked for hostname: %s (%v)", host, err)
}
if len(addrs) == 0 {
return fmt.Errorf("DNS returned no addresses for: %s", host)
}
for _, addr := range addrs {
ip := net.ParseIP(addr)
if ip == nil {
continue
}
if (ip.IsLoopback() && !testAllowLoopback) || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsInterfaceLocalMulticast() {
return fmt.Errorf("hostname %s resolves to forbidden link-local/loopback IP: %s", host, ip)
}
if isPrivateOrMetadataIP(ip) {
return fmt.Errorf("hostname %s resolves to forbidden IP: %s", host, ip)
}
}
return nil
}
// testAllowLoopback is a test-only escape hatch. When true, isSafeURL
// accepts 127.0.0.0/8 and ::1 so unit tests that stub workspace URLs
// with httptest.NewServer (which binds to loopback) can reach their
// own mock backends. Flipped via allowLoopbackForTest(t) in tests —
// never set in production code paths.
//
// The 169.254 metadata, RFC-1918, TEST-NET, CGNAT, and link-local
// guards are NOT relaxed by this flag — only loopback.
var testAllowLoopback = false
// isPrivateOrMetadataIP returns true for IPs that must not be reached via A2A.
//
// Always blocked (both modes):
// - 169.254.0.0/16 link-local (cloud metadata endpoints)
// - 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 (TEST-NET RFC-5737)
// - 100.64.0.0/10 (carrier-grade NAT)
// - IPv6 loopback ::1, link-local fe80::/10, and ULA fc00::/7 in strict mode
//
// Allowed in SaaS mode only (saasMode() == true):
// - 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 (RFC-1918)
// - fd00::/8 (IPv6 ULA subset of fc00::/7)
//
// Rationale: SaaS tenants run workspaces on sibling EC2s in the same VPC
// and register them by VPC-private IP. The control plane provisions these
// instances, so intra-VPC routing is trusted. On self-hosted / single-
// container deployments the relaxation is off and every private range
// stays blocked.
func isPrivateOrMetadataIP(ip net.IP) bool {
saas := saasMode()
// IPv4 path.
if ip4 := ip.To4(); ip4 != nil {
// Metadata link-local — always blocked.
if metadataV4.Contains(ip4) {
return true
}
// TEST-NET / documentation — always blocked.
for _, r := range docRangesV4 {
if r.Contains(ip4) {
return true
}
}
// Carrier-grade NAT — always blocked.
if cgnatV4.Contains(ip4) {
return true
}
// RFC-1918 private — blocked strict, allowed in SaaS.
for _, r := range privateV4 {
if r.Contains(ip4) {
return !saas
}
}
return false
}
// IPv6 path — .To4() was nil so this is a real v6 address.
// ::1 (loopback) — treat as blocked here too for defense-in-depth,
// unless tests have opted into loopback via testAllowLoopback.
if ip.IsLoopback() && !testAllowLoopback {
return true
}
// Link-local fe80::/10 — always blocked.
if ip.IsLinkLocalUnicast() {
return true
}
// ULA fc00::/7. fd00::/8 is the "locally assigned" half AWS hands out;
// fc00::/8 is reserved. We treat the whole fc00::/7 as private, then
// let SaaS relax fd00::/8 (matches the tests).
if ulaV6.Contains(ip) {
if saas && fd00V6.Contains(ip) {
return false
}
return true
}
return false
}
var (
metadataV4 = mustCIDR("169.254.0.0/16")
cgnatV4 = mustCIDR("100.64.0.0/10")
privateV4 = []net.IPNet{
mustCIDR("10.0.0.0/8"),
mustCIDR("172.16.0.0/12"),
mustCIDR("192.168.0.0/16"),
}
docRangesV4 = []net.IPNet{
mustCIDR("192.0.2.0/24"),
mustCIDR("198.51.100.0/24"),
mustCIDR("203.0.113.0/24"),
}
ulaV6 = mustCIDR("fc00::/7")
fd00V6 = mustCIDR("fd00::/8")
)
func mustCIDR(s string) net.IPNet {
_, n, err := net.ParseCIDR(s)
if err != nil {
panic("ssrf: bad CIDR " + s + ": " + err.Error())
}
return *n
}
// validateRelPath checks that a file path is relative and does not escape
// the destination via absolute paths or ".." traversal. Used by
// copyFilesToContainer and deleteViaEphemeral as a defence-in-depth measure.
func validateRelPath(filePath string) error {
clean := filepath.Clean(filePath)
if filepath.IsAbs(clean) || strings.Contains(clean, "..") {
return fmt.Errorf("path traversal or absolute path not allowed: %s", filePath)
}
return nil
}