fix(transcript): validate workspace URL to prevent SSRF (#272)

`TranscriptHandler.Get` previously proxied `agent_card->>'url'` directly
to the outbound HTTP client with no validation. Since `agent_card` is
attacker-writable via /registry/register, a workspace-token holder
could point it at cloud metadata (169.254.169.254), link-local ranges,
or non-http schemes and pivot the platform container against internal
services (IMDS, Redis, Postgres, other containers on the Docker net).

Four required fixes per reviewer:

1. `validateWorkspaceURL(u *url.URL)` — runs before `httpClient.Do`:
   - scheme must be http/https (rejects file://, gopher://, ftp://)
   - cloud metadata hostname blocklist (GCP + Azure + plain "metadata")
   - IMDS IP blocklist (169.254.169.254)
   - IPv4/IPv6 link-local blocklist (169.254/16, fe80::/10, multicast)
   - IPv6 unique-local fd00::/8 blocklist
   - loopback + docker.internal still allowed for local dev

2. Query-param allowlist — `target.RawQuery = c.Request.URL.RawQuery`
   forwarded everything verbatim, letting a caller smuggle params the
   upstream transcript endpoint didn't intend to expose. Replaced with
   an allowlist of `since` and `limit`.

3. Sanitized error string — `fmt.Sprintf("workspace unreachable: %v", err)`
   leaked the actual internal host/IP via `net.OpError`. Now logs the
   real error server-side and returns a plain "workspace unreachable"
   to the caller.

4. 10 new regression test cases:
   - `TestTranscript_Rejects{CloudMetadataIP,NonHTTPScheme,MetadataHostname,LinkLocalIPv6}`
     exercise the handler end-to-end with each attack URL and assert
     400 before the HTTP client fires.
   - `TestValidateWorkspaceURL` table-drives the validator across
     localhost/public/docker-internal (allowed) + IMDS/GCP/Azure/file/
     gopher/link-local/multicast (rejected).
   - `TestTranscript_ProxyPropagatesAllowlistedQueryParams` asserts
     `secret=leak&cmd=rm` is stripped while `since=42&limit=7` pass
     through.

Also fixed a pre-existing test bug: `seedWorkspace` was issuing a real
SQL Exec against sqlmock with no expectation set, so the prior test
helpers silently failed in CI. Replaced with `expectWorkspaceURLLookup`
which programs the mock correctly. All 11 tests now pass.

Closes #272

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
airenostars 2026-04-15 17:46:55 -07:00
parent 853734aa4e
commit 36ed90d807
2 changed files with 227 additions and 29 deletions

View File

@ -14,8 +14,11 @@ import (
"context"
"fmt"
"io"
"log"
"net"
"net/http"
"net/url"
"strings"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
@ -55,16 +58,32 @@ func (h *TranscriptHandler) Get(c *gin.Context) {
return
}
// No bearer minting needed — workspace /transcript trusts the internal
// Docker network (same model as POST / for A2A). Phase 30 remote work-
// spaces will need an auth story; tracked as follow-up.
// workspaceURL comes from agent_card which is attacker-writable via
// /registry/register — treat it as untrusted and validate before the
// outbound HTTP call to prevent SSRF (issue #272).
target, err := url.Parse(workspaceURL)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "invalid workspace URL"})
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace URL"})
return
}
if err := validateWorkspaceURL(target); err != nil {
log.Printf("transcript: workspace %s URL rejected: %v", workspaceID, err)
c.JSON(http.StatusBadRequest, gin.H{"error": "workspace URL not allowed"})
return
}
target.Path = "/transcript"
target.RawQuery = c.Request.URL.RawQuery
// Don't forward the raw query string — an attacker-controlled caller
// could smuggle params the upstream endpoint didn't intend to expose.
// Allowlist the two params the transcript endpoint actually uses.
q := url.Values{}
if since := c.Query("since"); since != "" {
q.Set("since", since)
}
if limit := c.Query("limit"); limit != "" {
q.Set("limit", limit)
}
target.RawQuery = q.Encode()
reqCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
@ -76,7 +95,11 @@ func (h *TranscriptHandler) Get(c *gin.Context) {
resp, err := h.httpClient.Do(req)
if err != nil {
c.JSON(http.StatusBadGateway, gin.H{"error": fmt.Sprintf("workspace unreachable: %v", err)})
// Log the real error server-side (includes the target URL), but
// don't leak it to the caller — that would reveal internal host
// names / IPs reachable from the platform.
log.Printf("transcript: workspace %s unreachable: %v", workspaceID, err)
c.JSON(http.StatusBadGateway, gin.H{"error": "workspace unreachable"})
return
}
defer resp.Body.Close()
@ -89,3 +112,58 @@ func (h *TranscriptHandler) Get(c *gin.Context) {
}
c.Data(resp.StatusCode, resp.Header.Get("Content-Type"), body)
}
// validateWorkspaceURL enforces that the agent_card URL is safe to
// proxy to. agent_card is attacker-writable via /registry/register so
// any workspace-token holder could otherwise point the URL at cloud
// metadata (169.254.169.254), the Docker host, or other internal
// services reachable from the platform container.
//
// Policy:
// - scheme must be http or https (no file://, gopher://, ftp://, etc.)
// - host must be present
// - block cloud metadata endpoints (IMDS, GCP, Azure)
// - block link-local IPs (169.254/16 IPv4, fe80::/10 IPv6)
// - loopback is allowed — local dev runs workspaces on 127.0.0.1
// - Docker internal hostnames (host.docker.internal, *.molecule-monorepo-net)
// are allowed; the whole threat model assumes the platform already
// trusts peers on that network
func validateWorkspaceURL(u *url.URL) error {
if u.Scheme != "http" && u.Scheme != "https" {
return fmt.Errorf("unsupported scheme %q", u.Scheme)
}
host := u.Hostname()
if host == "" {
return fmt.Errorf("empty host")
}
// Hostname blocklist (pre-IP-parse — these are usually resolved by
// the HTTP stack, not by us).
lower := strings.ToLower(host)
for _, banned := range []string{
"metadata.google.internal",
"metadata.azure.com",
"metadata",
} {
if lower == banned {
return fmt.Errorf("metadata hostname blocked: %s", host)
}
}
// IP-literal checks.
if ip := net.ParseIP(host); ip != nil {
// IMDS / cloud metadata.
if ip.String() == "169.254.169.254" {
return fmt.Errorf("cloud metadata endpoint blocked")
}
// Link-local: IPv4 169.254.0.0/16, IPv6 fe80::/10.
if ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
return fmt.Errorf("link-local address blocked: %s", host)
}
// IPv6 unique local fd00::/8 — used by some IMDS implementations.
if ip.To4() == nil && len(ip) == net.IPv6len && ip[0] == 0xfd {
return fmt.Errorf("IPv6 unique-local address blocked: %s", host)
}
}
return nil
}

View File

@ -1,37 +1,44 @@
package handlers
import (
"database/sql"
"encoding/json"
"net/http"
"net/http/httptest"
"net/url"
"testing"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// helper: register a workspace row + return its ID
func seedWorkspace(t *testing.T, agentURL string) string {
t.Helper()
// urlParse is a tiny wrapper so table-driven tests can keep their lines short.
func urlParse(s string) (*url.URL, error) { return url.Parse(s) }
// expectWorkspaceURLLookup programs the sqlmock to answer the SELECT that
// TranscriptHandler.Get issues for `agent_card->>'url'`. Tests call this
// instead of inserting real rows (we use sqlmock — there's no DB).
//
// Returns the workspace ID as the handler's :id path param.
func expectWorkspaceURLLookup(mock sqlmock.Sqlmock, agentURL string) string {
id := "11111111-2222-3333-4444-555555555555"
_, err := db.DB.Exec(
`INSERT INTO workspaces (id, name, agent_card, status) VALUES ($1, 'transcript-test', $2, 'online')
ON CONFLICT (id) DO UPDATE SET agent_card = EXCLUDED.agent_card`,
id, []byte(`{"url":"`+agentURL+`"}`),
)
if err != nil {
t.Fatalf("seed workspace: %v", err)
}
mock.ExpectQuery("SELECT agent_card->>'url' FROM workspaces WHERE id = \\$1").
WithArgs(id).
WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow(agentURL))
return id
}
// ==================== GET /workspaces/:id/transcript ====================
func TestTranscript_WorkspaceNotFound(t *testing.T) {
setupTestDB(t)
mock := setupTestDB(t)
setupTestRedis(t)
h := NewTranscriptHandler()
mock.ExpectQuery("SELECT agent_card->>'url' FROM workspaces WHERE id = \\$1").
WithArgs("00000000-0000-0000-0000-000000000000").
WillReturnError(sql.ErrNoRows)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000000"}}
@ -43,7 +50,7 @@ func TestTranscript_WorkspaceNotFound(t *testing.T) {
}
func TestTranscript_ProxyForwardsAndReturnsBody(t *testing.T) {
setupTestDB(t)
mock := setupTestDB(t)
setupTestRedis(t)
h := NewTranscriptHandler()
@ -56,7 +63,7 @@ func TestTranscript_ProxyForwardsAndReturnsBody(t *testing.T) {
}))
defer stub.Close()
wsID := seedWorkspace(t, stub.URL)
wsID := expectWorkspaceURLLookup(mock,stub.URL)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -82,8 +89,8 @@ func TestTranscript_ProxyForwardsAndReturnsBody(t *testing.T) {
}
}
func TestTranscript_ProxyPropagatesQueryString(t *testing.T) {
setupTestDB(t)
func TestTranscript_ProxyPropagatesAllowlistedQueryParams(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
h := NewTranscriptHandler()
@ -94,23 +101,136 @@ func TestTranscript_ProxyPropagatesQueryString(t *testing.T) {
}))
defer stub.Close()
wsID := seedWorkspace(t, stub.URL)
wsID := expectWorkspaceURLLookup(mock,stub.URL)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: wsID}}
c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript?since=42&limit=7", nil)
c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript?since=42&limit=7&secret=leak&cmd=rm", nil)
h.Get(c)
if gotQuery != "since=42&limit=7" {
t.Errorf("expected query forwarded, got %q", gotQuery)
// url.Values.Encode() sorts alphabetically — limit before since.
// Crucially: secret + cmd are dropped (not in the allowlist).
if gotQuery != "limit=7&since=42" {
t.Errorf("expected only allowlisted since/limit forwarded, got %q", gotQuery)
}
}
// SSRF regression tests — see issue #272. agent_card->>'url' is attacker-
// writable via /registry/register so validateWorkspaceURL must reject
// link-local / cloud-metadata / non-http(s) targets before the outbound
// HTTP call fires.
func TestTranscript_RejectsCloudMetadataIP(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
h := NewTranscriptHandler()
wsID := expectWorkspaceURLLookup(mock,"http://169.254.169.254/")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: wsID}}
c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript", nil)
h.Get(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for IMDS target, got %d: %s", w.Code, w.Body.String())
}
}
func TestTranscript_RejectsNonHTTPScheme(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
h := NewTranscriptHandler()
wsID := expectWorkspaceURLLookup(mock,"file:///etc/passwd")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: wsID}}
c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript", nil)
h.Get(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for file:// scheme, got %d: %s", w.Code, w.Body.String())
}
}
func TestTranscript_RejectsMetadataHostname(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
h := NewTranscriptHandler()
wsID := expectWorkspaceURLLookup(mock,"http://metadata.google.internal/computeMetadata/v1/")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: wsID}}
c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript", nil)
h.Get(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for metadata hostname, got %d: %s", w.Code, w.Body.String())
}
}
func TestTranscript_RejectsLinkLocalIPv6(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
h := NewTranscriptHandler()
wsID := expectWorkspaceURLLookup(mock,"http://[fe80::1]/")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: wsID}}
c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript", nil)
h.Get(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for link-local IPv6, got %d: %s", w.Code, w.Body.String())
}
}
// validateWorkspaceURL unit tests — pure function, no DB/Redis needed.
func TestValidateWorkspaceURL(t *testing.T) {
cases := []struct {
name string
raw string
wantErr bool
}{
{"http localhost allowed (dev)", "http://127.0.0.1:8000", false},
{"https public allowed", "https://agent.example.com", false},
{"docker internal allowed", "http://host.docker.internal:8000", false},
{"IMDS IP rejected", "http://169.254.169.254", true},
{"GCP metadata hostname rejected", "http://metadata.google.internal", true},
{"Azure metadata rejected", "http://metadata.azure.com", true},
{"file scheme rejected", "file:///etc/passwd", true},
{"gopher rejected", "gopher://internal:70/", true},
{"IPv6 link-local rejected", "http://[fe80::1]", true},
{"IPv4 link-local multicast rejected", "http://224.0.0.1", true},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
u, parseErr := urlParse(tc.raw)
if parseErr != nil && !tc.wantErr {
t.Fatalf("parse error: %v", parseErr)
}
if parseErr != nil {
return // unparseable URLs are rejected upstream; not this function's job
}
err := validateWorkspaceURL(u)
if tc.wantErr && err == nil {
t.Errorf("expected error for %q, got nil", tc.raw)
}
if !tc.wantErr && err != nil {
t.Errorf("expected OK for %q, got %v", tc.raw, err)
}
})
}
}
func TestTranscript_UnreachableWorkspaceReturns502(t *testing.T) {
setupTestDB(t)
mock := setupTestDB(t)
setupTestRedis(t)
h := NewTranscriptHandler()
wsID := seedWorkspace(t, "http://127.0.0.1:1") // refused
wsID := expectWorkspaceURLLookup(mock,"http://127.0.0.1:1") // refused
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)