diff --git a/workspace-server/internal/handlers/transcript.go b/workspace-server/internal/handlers/transcript.go index b4e26832..e2dbed2e 100644 --- a/workspace-server/internal/handlers/transcript.go +++ b/workspace-server/internal/handlers/transcript.go @@ -12,6 +12,8 @@ package handlers import ( "context" + "errors" + "fmt" "io" "log" "net" @@ -142,11 +144,27 @@ func safeDialControl(network, address string, _ syscall.RawConn) error { // an unresolved hostname (Go's dialer normally resolves // first, but custom resolvers can defer). Re-resolve here // so the SSRF policy is still applied. - if ips, lookupErr := net.LookupIP(host); lookupErr == nil { - for _, candidate := range ips { - if safeErr := isSafeURL("http://" + candidate.String() + "/"); safeErr != nil { - return &ssrfDialError{ip: candidate, reason: safeErr} - } + // + // FAIL-CLOSED hardening (Researcher RC 103980/12169, non-blocking + // nit on merged #2967): if LookupIP errored OR returned an + // empty result, treat the hostname as UNSAFE and block the + // dial. The prior code's fall-through `return nil` would have + // let an unresolvable / empty-result hostname through, which + // is a fail-open. The SSRF policy is deny-by-default — a + // hostname we can't positively verify is safe is treated as + // unsafe. + ips, lookupErr := net.LookupIP(host) + if lookupErr != nil { + log.Printf("safeDialControl: hostname %q LookupIP errored: %v (treating as unsafe — FAIL-CLOSED)", host, lookupErr) + return &ssrfDialError{host: host, reason: fmt.Errorf("hostname resolution failed: %w", lookupErr)} + } + if len(ips) == 0 { + log.Printf("safeDialControl: hostname %q LookupIP returned no addresses (treating as unsafe — FAIL-CLOSED)", host) + return &ssrfDialError{host: host, reason: errors.New("hostname resolution returned no addresses")} + } + for _, candidate := range ips { + if safeErr := isSafeURL("http://" + candidate.String() + "/"); safeErr != nil { + return &ssrfDialError{ip: candidate, reason: safeErr} } } return nil @@ -158,19 +176,31 @@ func safeDialControl(network, address string, _ syscall.RawConn) error { } // ssrfDialError is the error type returned by safeDialControl when -// the resolved IP fails the isSafeURL policy. The error message -// includes the IP and the policy reason so the platform log -// surfaces the SSRF attempt (the workspace agent_card.url -// embedding attack in #2132 / RC 103771). Returned from -// net.Dialer.Control, so it propagates out of DialContext as the -// dial error — the http.Client surfaces it to the caller. +// the resolved IP fails the isSafeURL policy, OR when the hostname +// could not be resolved (LookupIP error / empty result) and the +// dial-time guard is failing closed. The error message includes the +// IP (when known) OR the hostname (when the IP couldn't be resolved) +// plus the policy reason so the platform log surfaces the SSRF +// attempt (the workspace agent_card.url embedding attack in +// #2132 / RC 103771, and the unresolvable-hostname fail-closed +// hardening in RC 103980/12169). Returned from net.Dialer.Control, +// so it propagates out of DialContext as the dial error — the +// http.Client surfaces it to the caller. type ssrfDialError struct { - ip net.IP + ip net.IP // set when the IP is known (blocked-IP case) + host string // set when the hostname couldn't be resolved (LookupIP error / empty) reason error } func (e *ssrfDialError) Error() string { - return "ssrf: dial-time IP " + e.ip.String() + " blocked: " + e.reason.Error() + switch { + case e.ip != nil: + return "ssrf: dial-time IP " + e.ip.String() + " blocked: " + e.reason.Error() + case e.host != "": + return "ssrf: dial-time hostname " + e.host + " blocked: " + e.reason.Error() + default: + return "ssrf: dial-time target blocked: " + e.reason.Error() + } } // Get handles GET /workspaces/:id/transcript?since=N&limit=N. diff --git a/workspace-server/internal/handlers/transcript_test.go b/workspace-server/internal/handlers/transcript_test.go index 1be4027e..a180c62b 100644 --- a/workspace-server/internal/handlers/transcript_test.go +++ b/workspace-server/internal/handlers/transcript_test.go @@ -8,6 +8,7 @@ import ( "net" "net/http" "net/http/httptest" + "strings" "testing" "time" @@ -456,3 +457,75 @@ func TestTranscript_DialGuardBlocksBeforeConnect(t *testing.T) { t.Errorf("listener accepted %d connections — Control did NOT block the dial before connect() (port-scan side-channel open)", acceptCount) } } + +// TestSafeDialControl_FailsClosedOnLookupIPError pins the RC 103980/12169 +// fail-closed hardening: when safeDialControl's hostname-fallback path +// is invoked (ip == nil on the address) and net.LookupIP returns an +// error, the dial MUST be blocked. The prior shape FAIL-OPENed in this +// case (the inner `if ips, lookupErr := net.LookupIP(host); lookupErr == nil` +// gate skipped the policy when LookupIP errored, then `return nil` +// approved the dial). An attacker who can suppress DNS for a hostname +// (DNS outage, hostile resolver, etc.) could otherwise get the dial +// to proceed with whatever IP the dialer's own resolver picks up later. +// +// .invalid is a reserved TLD (RFC 2606) that does not resolve, so +// LookupIP("nonexistent.invalid") returns an error deterministically. +func TestSafeDialControl_FailsClosedOnLookupIPError(t *testing.T) { + // .invalid TLD — RFC 2606 reserved, never resolves. + err := safeDialControl("tcp", "nonexistent-host.invalid:443", nil) + if err == nil { + t.Fatalf("safeDialControl: want error for unresolvable hostname (FAIL-CLOSED), got nil (FAIL-OPEN — the bug the Researcher flagged, RC 103980/12169)") + } + var ssrfErr *ssrfDialError + if !errors.As(err, &ssrfErr) { + t.Fatalf("safeDialControl: want *ssrfDialError for unresolvable hostname, got %T: %v", err, err) + } + if ssrfErr.host != "nonexistent-host.invalid" { + t.Errorf("safeDialControl: want ssrfDialError.host=%q, got %q", "nonexistent-host.invalid", ssrfErr.host) + } + if ssrfErr.ip != nil { + t.Errorf("safeDialControl: want ssrfDialError.ip=nil (no IP resolved), got %v", ssrfErr.ip) + } + // Error() must not panic and must include the hostname + reason. + msg := ssrfErr.Error() + if !strings.Contains(msg, "nonexistent-host.invalid") { + t.Errorf("ssrfDialError.Error() should include hostname, got %q", msg) + } + if !strings.Contains(msg, "hostname resolution failed") { + t.Errorf("ssrfDialError.Error() should include the reason, got %q", msg) + } +} + +// TestSafeDialControl_FailsClosedOnEmptyLookupIPResult pins the same +// fail-closed contract for the LookupIP-returns-empty case. We can't +// easily force an empty result without a custom resolver, but we CAN +// assert the contract by checking the docstring-equivalent invariant: +// when len(ips) == 0, safeDialControl returns *ssrfDialError with +// host set + reason containing "no addresses". The test documents +// the contract; the LookupIP-error test above exercises the same +// code path (LookupIP errored) with the same fail-closed behavior. +func TestSafeDialControl_FailsClosedOnEmptyLookupIPResult(t *testing.T) { + // We can't easily mock LookupIP to return an empty slice, but + // the FAIL-CLOSED contract for the empty case is structurally + // identical to the LookupIP-error case (both paths return + // *ssrfDialError with host set). The integration through + // real LookupIP with a hostname that resolves to 0 records + // would be the canonical test, but no public DNS zone + // guarantees a zero-record host. The two contract-level tests + // (LookupIP-error above + Error() format below) cover the + // behavior; the empty-result branch is exercised by code review + // of the explicit `if len(ips) == 0` check. + // + // Verify the Error() format for the empty-result case. + ssrfErr := &ssrfDialError{ + host: "empty-lookup.invalid", + reason: errors.New("hostname resolution returned no addresses"), + } + msg := ssrfErr.Error() + if !strings.Contains(msg, "empty-lookup.invalid") { + t.Errorf("Error() for empty-result case should include hostname, got %q", msg) + } + if !strings.Contains(msg, "no addresses") { + t.Errorf("Error() for empty-result case should include the reason, got %q", msg) + } +}