From 36ed90d8070cdff995bcf7e50f5e77ffa719feec Mon Sep 17 00:00:00 2001 From: airenostars Date: Wed, 15 Apr 2026 17:46:55 -0700 Subject: [PATCH] fix(transcript): validate workspace URL to prevent SSRF (#272) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `TranscriptHandler.Get` previously proxied `agent_card->>'url'` directly to the outbound HTTP client with no validation. Since `agent_card` is attacker-writable via /registry/register, a workspace-token holder could point it at cloud metadata (169.254.169.254), link-local ranges, or non-http schemes and pivot the platform container against internal services (IMDS, Redis, Postgres, other containers on the Docker net). Four required fixes per reviewer: 1. `validateWorkspaceURL(u *url.URL)` — runs before `httpClient.Do`: - scheme must be http/https (rejects file://, gopher://, ftp://) - cloud metadata hostname blocklist (GCP + Azure + plain "metadata") - IMDS IP blocklist (169.254.169.254) - IPv4/IPv6 link-local blocklist (169.254/16, fe80::/10, multicast) - IPv6 unique-local fd00::/8 blocklist - loopback + docker.internal still allowed for local dev 2. Query-param allowlist — `target.RawQuery = c.Request.URL.RawQuery` forwarded everything verbatim, letting a caller smuggle params the upstream transcript endpoint didn't intend to expose. Replaced with an allowlist of `since` and `limit`. 3. Sanitized error string — `fmt.Sprintf("workspace unreachable: %v", err)` leaked the actual internal host/IP via `net.OpError`. Now logs the real error server-side and returns a plain "workspace unreachable" to the caller. 4. 10 new regression test cases: - `TestTranscript_Rejects{CloudMetadataIP,NonHTTPScheme,MetadataHostname,LinkLocalIPv6}` exercise the handler end-to-end with each attack URL and assert 400 before the HTTP client fires. - `TestValidateWorkspaceURL` table-drives the validator across localhost/public/docker-internal (allowed) + IMDS/GCP/Azure/file/ gopher/link-local/multicast (rejected). - `TestTranscript_ProxyPropagatesAllowlistedQueryParams` asserts `secret=leak&cmd=rm` is stripped while `since=42&limit=7` pass through. Also fixed a pre-existing test bug: `seedWorkspace` was issuing a real SQL Exec against sqlmock with no expectation set, so the prior test helpers silently failed in CI. Replaced with `expectWorkspaceURLLookup` which programs the mock correctly. All 11 tests now pass. Closes #272 Co-Authored-By: Claude Opus 4.6 (1M context) --- platform/internal/handlers/transcript.go | 90 +++++++++- platform/internal/handlers/transcript_test.go | 166 +++++++++++++++--- 2 files changed, 227 insertions(+), 29 deletions(-) diff --git a/platform/internal/handlers/transcript.go b/platform/internal/handlers/transcript.go index d7ac3e6b..c07426e2 100644 --- a/platform/internal/handlers/transcript.go +++ b/platform/internal/handlers/transcript.go @@ -14,8 +14,11 @@ import ( "context" "fmt" "io" + "log" + "net" "net/http" "net/url" + "strings" "time" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" @@ -55,16 +58,32 @@ func (h *TranscriptHandler) Get(c *gin.Context) { return } - // No bearer minting needed — workspace /transcript trusts the internal - // Docker network (same model as POST / for A2A). Phase 30 remote work- - // spaces will need an auth story; tracked as follow-up. + // workspaceURL comes from agent_card which is attacker-writable via + // /registry/register — treat it as untrusted and validate before the + // outbound HTTP call to prevent SSRF (issue #272). target, err := url.Parse(workspaceURL) if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": "invalid workspace URL"}) + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace URL"}) + return + } + if err := validateWorkspaceURL(target); err != nil { + log.Printf("transcript: workspace %s URL rejected: %v", workspaceID, err) + c.JSON(http.StatusBadRequest, gin.H{"error": "workspace URL not allowed"}) return } target.Path = "/transcript" - target.RawQuery = c.Request.URL.RawQuery + + // Don't forward the raw query string — an attacker-controlled caller + // could smuggle params the upstream endpoint didn't intend to expose. + // Allowlist the two params the transcript endpoint actually uses. + q := url.Values{} + if since := c.Query("since"); since != "" { + q.Set("since", since) + } + if limit := c.Query("limit"); limit != "" { + q.Set("limit", limit) + } + target.RawQuery = q.Encode() reqCtx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() @@ -76,7 +95,11 @@ func (h *TranscriptHandler) Get(c *gin.Context) { resp, err := h.httpClient.Do(req) if err != nil { - c.JSON(http.StatusBadGateway, gin.H{"error": fmt.Sprintf("workspace unreachable: %v", err)}) + // Log the real error server-side (includes the target URL), but + // don't leak it to the caller — that would reveal internal host + // names / IPs reachable from the platform. + log.Printf("transcript: workspace %s unreachable: %v", workspaceID, err) + c.JSON(http.StatusBadGateway, gin.H{"error": "workspace unreachable"}) return } defer resp.Body.Close() @@ -89,3 +112,58 @@ func (h *TranscriptHandler) Get(c *gin.Context) { } c.Data(resp.StatusCode, resp.Header.Get("Content-Type"), body) } + +// validateWorkspaceURL enforces that the agent_card URL is safe to +// proxy to. agent_card is attacker-writable via /registry/register so +// any workspace-token holder could otherwise point the URL at cloud +// metadata (169.254.169.254), the Docker host, or other internal +// services reachable from the platform container. +// +// Policy: +// - scheme must be http or https (no file://, gopher://, ftp://, etc.) +// - host must be present +// - block cloud metadata endpoints (IMDS, GCP, Azure) +// - block link-local IPs (169.254/16 IPv4, fe80::/10 IPv6) +// - loopback is allowed — local dev runs workspaces on 127.0.0.1 +// - Docker internal hostnames (host.docker.internal, *.molecule-monorepo-net) +// are allowed; the whole threat model assumes the platform already +// trusts peers on that network +func validateWorkspaceURL(u *url.URL) error { + if u.Scheme != "http" && u.Scheme != "https" { + return fmt.Errorf("unsupported scheme %q", u.Scheme) + } + host := u.Hostname() + if host == "" { + return fmt.Errorf("empty host") + } + + // Hostname blocklist (pre-IP-parse — these are usually resolved by + // the HTTP stack, not by us). + lower := strings.ToLower(host) + for _, banned := range []string{ + "metadata.google.internal", + "metadata.azure.com", + "metadata", + } { + if lower == banned { + return fmt.Errorf("metadata hostname blocked: %s", host) + } + } + + // IP-literal checks. + if ip := net.ParseIP(host); ip != nil { + // IMDS / cloud metadata. + if ip.String() == "169.254.169.254" { + return fmt.Errorf("cloud metadata endpoint blocked") + } + // Link-local: IPv4 169.254.0.0/16, IPv6 fe80::/10. + if ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() { + return fmt.Errorf("link-local address blocked: %s", host) + } + // IPv6 unique local fd00::/8 — used by some IMDS implementations. + if ip.To4() == nil && len(ip) == net.IPv6len && ip[0] == 0xfd { + return fmt.Errorf("IPv6 unique-local address blocked: %s", host) + } + } + return nil +} diff --git a/platform/internal/handlers/transcript_test.go b/platform/internal/handlers/transcript_test.go index feba35ff..f88bb812 100644 --- a/platform/internal/handlers/transcript_test.go +++ b/platform/internal/handlers/transcript_test.go @@ -1,37 +1,44 @@ package handlers import ( + "database/sql" "encoding/json" "net/http" "net/http/httptest" + "net/url" "testing" - "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" + "github.com/DATA-DOG/go-sqlmock" "github.com/gin-gonic/gin" ) -// helper: register a workspace row + return its ID -func seedWorkspace(t *testing.T, agentURL string) string { - t.Helper() +// urlParse is a tiny wrapper so table-driven tests can keep their lines short. +func urlParse(s string) (*url.URL, error) { return url.Parse(s) } + +// expectWorkspaceURLLookup programs the sqlmock to answer the SELECT that +// TranscriptHandler.Get issues for `agent_card->>'url'`. Tests call this +// instead of inserting real rows (we use sqlmock — there's no DB). +// +// Returns the workspace ID as the handler's :id path param. +func expectWorkspaceURLLookup(mock sqlmock.Sqlmock, agentURL string) string { id := "11111111-2222-3333-4444-555555555555" - _, err := db.DB.Exec( - `INSERT INTO workspaces (id, name, agent_card, status) VALUES ($1, 'transcript-test', $2, 'online') - ON CONFLICT (id) DO UPDATE SET agent_card = EXCLUDED.agent_card`, - id, []byte(`{"url":"`+agentURL+`"}`), - ) - if err != nil { - t.Fatalf("seed workspace: %v", err) - } + mock.ExpectQuery("SELECT agent_card->>'url' FROM workspaces WHERE id = \\$1"). + WithArgs(id). + WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow(agentURL)) return id } // ==================== GET /workspaces/:id/transcript ==================== func TestTranscript_WorkspaceNotFound(t *testing.T) { - setupTestDB(t) + mock := setupTestDB(t) setupTestRedis(t) h := NewTranscriptHandler() + mock.ExpectQuery("SELECT agent_card->>'url' FROM workspaces WHERE id = \\$1"). + WithArgs("00000000-0000-0000-0000-000000000000"). + WillReturnError(sql.ErrNoRows) + w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000000"}} @@ -43,7 +50,7 @@ func TestTranscript_WorkspaceNotFound(t *testing.T) { } func TestTranscript_ProxyForwardsAndReturnsBody(t *testing.T) { - setupTestDB(t) + mock := setupTestDB(t) setupTestRedis(t) h := NewTranscriptHandler() @@ -56,7 +63,7 @@ func TestTranscript_ProxyForwardsAndReturnsBody(t *testing.T) { })) defer stub.Close() - wsID := seedWorkspace(t, stub.URL) + wsID := expectWorkspaceURLLookup(mock,stub.URL) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -82,8 +89,8 @@ func TestTranscript_ProxyForwardsAndReturnsBody(t *testing.T) { } } -func TestTranscript_ProxyPropagatesQueryString(t *testing.T) { - setupTestDB(t) +func TestTranscript_ProxyPropagatesAllowlistedQueryParams(t *testing.T) { + mock := setupTestDB(t) setupTestRedis(t) h := NewTranscriptHandler() @@ -94,23 +101,136 @@ func TestTranscript_ProxyPropagatesQueryString(t *testing.T) { })) defer stub.Close() - wsID := seedWorkspace(t, stub.URL) + wsID := expectWorkspaceURLLookup(mock,stub.URL) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) c.Params = gin.Params{{Key: "id", Value: wsID}} - c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript?since=42&limit=7", nil) + c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript?since=42&limit=7&secret=leak&cmd=rm", nil) h.Get(c) - if gotQuery != "since=42&limit=7" { - t.Errorf("expected query forwarded, got %q", gotQuery) + // url.Values.Encode() sorts alphabetically — limit before since. + // Crucially: secret + cmd are dropped (not in the allowlist). + if gotQuery != "limit=7&since=42" { + t.Errorf("expected only allowlisted since/limit forwarded, got %q", gotQuery) + } +} + +// SSRF regression tests — see issue #272. agent_card->>'url' is attacker- +// writable via /registry/register so validateWorkspaceURL must reject +// link-local / cloud-metadata / non-http(s) targets before the outbound +// HTTP call fires. + +func TestTranscript_RejectsCloudMetadataIP(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + h := NewTranscriptHandler() + + wsID := expectWorkspaceURLLookup(mock,"http://169.254.169.254/") + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: wsID}} + c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript", nil) + h.Get(c) + if w.Code != http.StatusBadRequest { + t.Errorf("expected 400 for IMDS target, got %d: %s", w.Code, w.Body.String()) + } +} + +func TestTranscript_RejectsNonHTTPScheme(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + h := NewTranscriptHandler() + + wsID := expectWorkspaceURLLookup(mock,"file:///etc/passwd") + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: wsID}} + c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript", nil) + h.Get(c) + if w.Code != http.StatusBadRequest { + t.Errorf("expected 400 for file:// scheme, got %d: %s", w.Code, w.Body.String()) + } +} + +func TestTranscript_RejectsMetadataHostname(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + h := NewTranscriptHandler() + + wsID := expectWorkspaceURLLookup(mock,"http://metadata.google.internal/computeMetadata/v1/") + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: wsID}} + c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript", nil) + h.Get(c) + if w.Code != http.StatusBadRequest { + t.Errorf("expected 400 for metadata hostname, got %d: %s", w.Code, w.Body.String()) + } +} + +func TestTranscript_RejectsLinkLocalIPv6(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + h := NewTranscriptHandler() + + wsID := expectWorkspaceURLLookup(mock,"http://[fe80::1]/") + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: wsID}} + c.Request = httptest.NewRequest("GET", "/workspaces/"+wsID+"/transcript", nil) + h.Get(c) + if w.Code != http.StatusBadRequest { + t.Errorf("expected 400 for link-local IPv6, got %d: %s", w.Code, w.Body.String()) + } +} + +// validateWorkspaceURL unit tests — pure function, no DB/Redis needed. +func TestValidateWorkspaceURL(t *testing.T) { + cases := []struct { + name string + raw string + wantErr bool + }{ + {"http localhost allowed (dev)", "http://127.0.0.1:8000", false}, + {"https public allowed", "https://agent.example.com", false}, + {"docker internal allowed", "http://host.docker.internal:8000", false}, + {"IMDS IP rejected", "http://169.254.169.254", true}, + {"GCP metadata hostname rejected", "http://metadata.google.internal", true}, + {"Azure metadata rejected", "http://metadata.azure.com", true}, + {"file scheme rejected", "file:///etc/passwd", true}, + {"gopher rejected", "gopher://internal:70/", true}, + {"IPv6 link-local rejected", "http://[fe80::1]", true}, + {"IPv4 link-local multicast rejected", "http://224.0.0.1", true}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + u, parseErr := urlParse(tc.raw) + if parseErr != nil && !tc.wantErr { + t.Fatalf("parse error: %v", parseErr) + } + if parseErr != nil { + return // unparseable URLs are rejected upstream; not this function's job + } + err := validateWorkspaceURL(u) + if tc.wantErr && err == nil { + t.Errorf("expected error for %q, got nil", tc.raw) + } + if !tc.wantErr && err != nil { + t.Errorf("expected OK for %q, got %v", tc.raw, err) + } + }) } } func TestTranscript_UnreachableWorkspaceReturns502(t *testing.T) { - setupTestDB(t) + mock := setupTestDB(t) setupTestRedis(t) h := NewTranscriptHandler() - wsID := seedWorkspace(t, "http://127.0.0.1:1") // refused + wsID := expectWorkspaceURLLookup(mock,"http://127.0.0.1:1") // refused w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w)