fix(security#2132/RC): fail-CLOSED in safeDialControl hostname-fallback path (RC 103980/12169) #2969

Merged
devops-engineer merged 1 commits from fix/2967-safedialcontrol-fail-closed-lookupip into main 2026-06-15 22:05:11 +00:00
2 changed files with 116 additions and 13 deletions
@@ -12,6 +12,8 @@ package handlers
import (
"context"
"errors"
"fmt"
"io"
"log"
"net"
@@ -142,11 +144,27 @@ func safeDialControl(network, address string, _ syscall.RawConn) error {
// an unresolved hostname (Go's dialer normally resolves
// first, but custom resolvers can defer). Re-resolve here
// so the SSRF policy is still applied.
if ips, lookupErr := net.LookupIP(host); lookupErr == nil {
for _, candidate := range ips {
if safeErr := isSafeURL("http://" + candidate.String() + "/"); safeErr != nil {
return &ssrfDialError{ip: candidate, reason: safeErr}
}
//
// FAIL-CLOSED hardening (Researcher RC 103980/12169, non-blocking
// nit on merged #2967): if LookupIP errored OR returned an
// empty result, treat the hostname as UNSAFE and block the
// dial. The prior code's fall-through `return nil` would have
// let an unresolvable / empty-result hostname through, which
// is a fail-open. The SSRF policy is deny-by-default — a
// hostname we can't positively verify is safe is treated as
// unsafe.
ips, lookupErr := net.LookupIP(host)
if lookupErr != nil {
log.Printf("safeDialControl: hostname %q LookupIP errored: %v (treating as unsafe — FAIL-CLOSED)", host, lookupErr)
return &ssrfDialError{host: host, reason: fmt.Errorf("hostname resolution failed: %w", lookupErr)}
}
if len(ips) == 0 {
log.Printf("safeDialControl: hostname %q LookupIP returned no addresses (treating as unsafe — FAIL-CLOSED)", host)
return &ssrfDialError{host: host, reason: errors.New("hostname resolution returned no addresses")}
}
for _, candidate := range ips {
if safeErr := isSafeURL("http://" + candidate.String() + "/"); safeErr != nil {
return &ssrfDialError{ip: candidate, reason: safeErr}
}
}
return nil
@@ -158,19 +176,31 @@ func safeDialControl(network, address string, _ syscall.RawConn) error {
}
// ssrfDialError is the error type returned by safeDialControl when
// the resolved IP fails the isSafeURL policy. The error message
// includes the IP and the policy reason so the platform log
// surfaces the SSRF attempt (the workspace agent_card.url
// embedding attack in #2132 / RC 103771). Returned from
// net.Dialer.Control, so it propagates out of DialContext as the
// dial error — the http.Client surfaces it to the caller.
// the resolved IP fails the isSafeURL policy, OR when the hostname
// could not be resolved (LookupIP error / empty result) and the
// dial-time guard is failing closed. The error message includes the
// IP (when known) OR the hostname (when the IP couldn't be resolved)
// plus the policy reason so the platform log surfaces the SSRF
// attempt (the workspace agent_card.url embedding attack in
// #2132 / RC 103771, and the unresolvable-hostname fail-closed
// hardening in RC 103980/12169). Returned from net.Dialer.Control,
// so it propagates out of DialContext as the dial error — the
// http.Client surfaces it to the caller.
type ssrfDialError struct {
ip net.IP
ip net.IP // set when the IP is known (blocked-IP case)
host string // set when the hostname couldn't be resolved (LookupIP error / empty)
reason error
}
func (e *ssrfDialError) Error() string {
return "ssrf: dial-time IP " + e.ip.String() + " blocked: " + e.reason.Error()
switch {
case e.ip != nil:
return "ssrf: dial-time IP " + e.ip.String() + " blocked: " + e.reason.Error()
case e.host != "":
return "ssrf: dial-time hostname " + e.host + " blocked: " + e.reason.Error()
default:
return "ssrf: dial-time target blocked: " + e.reason.Error()
}
}
// Get handles GET /workspaces/:id/transcript?since=N&limit=N.
@@ -8,6 +8,7 @@ import (
"net"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
@@ -456,3 +457,75 @@ func TestTranscript_DialGuardBlocksBeforeConnect(t *testing.T) {
t.Errorf("listener accepted %d connections — Control did NOT block the dial before connect() (port-scan side-channel open)", acceptCount)
}
}
// TestSafeDialControl_FailsClosedOnLookupIPError pins the RC 103980/12169
// fail-closed hardening: when safeDialControl's hostname-fallback path
// is invoked (ip == nil on the address) and net.LookupIP returns an
// error, the dial MUST be blocked. The prior shape FAIL-OPENed in this
// case (the inner `if ips, lookupErr := net.LookupIP(host); lookupErr == nil`
// gate skipped the policy when LookupIP errored, then `return nil`
// approved the dial). An attacker who can suppress DNS for a hostname
// (DNS outage, hostile resolver, etc.) could otherwise get the dial
// to proceed with whatever IP the dialer's own resolver picks up later.
//
// .invalid is a reserved TLD (RFC 2606) that does not resolve, so
// LookupIP("nonexistent.invalid") returns an error deterministically.
func TestSafeDialControl_FailsClosedOnLookupIPError(t *testing.T) {
// .invalid TLD — RFC 2606 reserved, never resolves.
err := safeDialControl("tcp", "nonexistent-host.invalid:443", nil)
if err == nil {
t.Fatalf("safeDialControl: want error for unresolvable hostname (FAIL-CLOSED), got nil (FAIL-OPEN — the bug the Researcher flagged, RC 103980/12169)")
}
var ssrfErr *ssrfDialError
if !errors.As(err, &ssrfErr) {
t.Fatalf("safeDialControl: want *ssrfDialError for unresolvable hostname, got %T: %v", err, err)
}
if ssrfErr.host != "nonexistent-host.invalid" {
t.Errorf("safeDialControl: want ssrfDialError.host=%q, got %q", "nonexistent-host.invalid", ssrfErr.host)
}
if ssrfErr.ip != nil {
t.Errorf("safeDialControl: want ssrfDialError.ip=nil (no IP resolved), got %v", ssrfErr.ip)
}
// Error() must not panic and must include the hostname + reason.
msg := ssrfErr.Error()
if !strings.Contains(msg, "nonexistent-host.invalid") {
t.Errorf("ssrfDialError.Error() should include hostname, got %q", msg)
}
if !strings.Contains(msg, "hostname resolution failed") {
t.Errorf("ssrfDialError.Error() should include the reason, got %q", msg)
}
}
// TestSafeDialControl_FailsClosedOnEmptyLookupIPResult pins the same
// fail-closed contract for the LookupIP-returns-empty case. We can't
// easily force an empty result without a custom resolver, but we CAN
// assert the contract by checking the docstring-equivalent invariant:
// when len(ips) == 0, safeDialControl returns *ssrfDialError with
// host set + reason containing "no addresses". The test documents
// the contract; the LookupIP-error test above exercises the same
// code path (LookupIP errored) with the same fail-closed behavior.
func TestSafeDialControl_FailsClosedOnEmptyLookupIPResult(t *testing.T) {
// We can't easily mock LookupIP to return an empty slice, but
// the FAIL-CLOSED contract for the empty case is structurally
// identical to the LookupIP-error case (both paths return
// *ssrfDialError with host set). The integration through
// real LookupIP with a hostname that resolves to 0 records
// would be the canonical test, but no public DNS zone
// guarantees a zero-record host. The two contract-level tests
// (LookupIP-error above + Error() format below) cover the
// behavior; the empty-result branch is exercised by code review
// of the explicit `if len(ips) == 0` check.
//
// Verify the Error() format for the empty-result case.
ssrfErr := &ssrfDialError{
host: "empty-lookup.invalid",
reason: errors.New("hostname resolution returned no addresses"),
}
msg := ssrfErr.Error()
if !strings.Contains(msg, "empty-lookup.invalid") {
t.Errorf("Error() for empty-result case should include hostname, got %q", msg)
}
if !strings.Contains(msg, "no addresses") {
t.Errorf("Error() for empty-result case should include the reason, got %q", msg)
}
}