fix(registry): allow pending-DNS platform tunnel URL at register (#36 register half) #2425
@@ -261,9 +261,28 @@ func validateAgentURL(rawURL string) error {
|
||||
// the agent won't be reachable anyway, so blocking on DNS failure is safe.
|
||||
ips, lookupErr := net.LookupIP(hostname)
|
||||
if lookupErr != nil {
|
||||
// DNS lookup failed — block the URL rather than allow a potentially-
|
||||
// unreachable or intentionally-unresolvable hostname through. The
|
||||
// platform has no use for a workspace it cannot reach.
|
||||
// #36/#2421: a freshly-provisioned CROSS-CLOUD workspace advertises its
|
||||
// per-workspace Cloudflare tunnel hostname (ws-<id>.<appDomain>). That DNS
|
||||
// record is eventually-consistent, and a FAST-booting box (a Hetzner cpx
|
||||
// reports "workspace ready after ~1s") registers BEFORE the record
|
||||
// propagates → the lookup fails → 400 → and the runtime does not retry a
|
||||
// 4xx → agent_card never lands and the agent never comes online. AWS boots
|
||||
// slowly enough to miss the race, which is why only the fast cloud broke.
|
||||
//
|
||||
// Such a hostname is NOT an SSRF vector: it lives under the platform's own
|
||||
// domain (only the platform can create records there, so it can't be
|
||||
// pointed at 169.254/127/private space by an attacker), and it resolves to
|
||||
// nothing right now. So in SaaS mode allow a platform-tunnel hostname
|
||||
// through while its DNS settles; everything else stays blocked. The
|
||||
// unconditional metadata/loopback blocks above still apply once it
|
||||
// resolves. (Restores the pre-#1130 "let an unresolvable platform URL
|
||||
// through" behaviour, scoped to the trusted tunnel domain.)
|
||||
if saasMode() && isPlatformTunnelHostname(hostname) {
|
||||
log.Printf("Registry validateAgentURL: allowing not-yet-resolvable platform tunnel hostname %q (DNS still propagating)", hostname)
|
||||
return nil
|
||||
}
|
||||
// DNS lookup failed for a non-platform hostname — block it. The platform
|
||||
// has no use for a workspace it cannot reach.
|
||||
return fmt.Errorf("hostname %q cannot be resolved (DNS error): %w", hostname, lookupErr)
|
||||
}
|
||||
for _, ip := range ips {
|
||||
@@ -274,6 +293,24 @@ func validateAgentURL(rawURL string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// isPlatformTunnelHostname reports whether h is a platform-provisioned per-
|
||||
// workspace Cloudflare tunnel hostname — `ws-<id>.<appDomain>` under the
|
||||
// platform's OWN domain. Only the platform controls DNS there, so a not-yet-
|
||||
// resolvable such hostname is a pending-DNS tunnel (DNS propagation race), never
|
||||
// an attacker-controlled SSRF URL. The domain defaults to moleculesai.app
|
||||
// (covers prod `*.moleculesai.app` and staging `*.staging.moleculesai.app`) and
|
||||
// is overridable via MOLECULE_APP_DOMAIN for other deployments.
|
||||
func isPlatformTunnelHostname(h string) bool {
|
||||
if !strings.HasPrefix(h, "ws-") {
|
||||
return false
|
||||
}
|
||||
domain := strings.TrimSpace(os.Getenv("MOLECULE_APP_DOMAIN"))
|
||||
if domain == "" {
|
||||
domain = "moleculesai.app"
|
||||
}
|
||||
return strings.HasSuffix(h, "."+domain)
|
||||
}
|
||||
|
||||
// Register handles POST /registry/register
|
||||
// Upserts workspace, sets Redis TTL, broadcasts WORKSPACE_ONLINE.
|
||||
func (h *RegistryHandler) Register(c *gin.Context) {
|
||||
|
||||
@@ -882,6 +882,42 @@ func TestValidateAgentURL_SaaSMode_AllowsRFC1918(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidateAgentURL_PendingPlatformTunnel (#36/#2421): a freshly-provisioned
|
||||
// cross-cloud workspace advertises its per-workspace tunnel hostname
|
||||
// (ws-<id>.<appDomain>) whose DNS has not propagated yet when a FAST box (Hetzner
|
||||
// ~1s boot) registers. validateAgentURL must allow such a platform-tunnel
|
||||
// hostname through in SaaS mode instead of 400 (which the runtime never retries
|
||||
// → agent_card never lands). Non-platform unresolvable hostnames stay blocked.
|
||||
func TestValidateAgentURL_PendingPlatformTunnel(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
h string
|
||||
want bool
|
||||
}{
|
||||
{"ws-abc123.moleculesai.app", true},
|
||||
{"ws-abc123.staging.moleculesai.app", true},
|
||||
{"ws-abc123.evil.com", false}, // not under the platform domain
|
||||
{"api.moleculesai.app", false}, // no ws- prefix
|
||||
{"ws-x.fakemoleculesai.app", false}, // lookalike domain, not a subdomain
|
||||
} {
|
||||
if got := isPlatformTunnelHostname(tc.h); got != tc.want {
|
||||
t.Errorf("isPlatformTunnelHostname(%q)=%v want %v", tc.h, got, tc.want)
|
||||
}
|
||||
}
|
||||
t.Setenv("MOLECULE_ORG_ID", "")
|
||||
t.Setenv("MOLECULE_DEPLOY_MODE", "saas")
|
||||
// A platform tunnel hostname is allowed — whether or not its DNS has
|
||||
// propagated (a resolved record is a public Cloudflare IP = allowed; an
|
||||
// unresolved one is allowed by the pending-tunnel branch).
|
||||
if err := validateAgentURL("https://ws-deadbeef0001.staging.moleculesai.app/a2a"); err != nil {
|
||||
t.Errorf("SaaS: pending platform tunnel must be allowed, got %v", err)
|
||||
}
|
||||
// A NON-platform unresolvable hostname stays blocked even in SaaS
|
||||
// (.invalid never resolves — RFC 2606).
|
||||
if err := validateAgentURL("https://ws-x.attacker.invalid/a2a"); err == nil {
|
||||
t.Error("SaaS: non-platform unresolvable hostname must stay blocked")
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidateAgentURL_SaaSMode_StillBlocksMetadataEtAl verifies that even in
|
||||
// SaaS mode the always-blocked ranges (metadata, loopback, TEST-NET, CGNAT,
|
||||
// non-fd00 ULA) stay blocked.
|
||||
|
||||
Reference in New Issue
Block a user