molecule-core/platform/internal/handlers/transcript.go
airenostars 36ed90d807 fix(transcript): validate workspace URL to prevent SSRF (#272)
`TranscriptHandler.Get` previously proxied `agent_card->>'url'` directly
to the outbound HTTP client with no validation. Since `agent_card` is
attacker-writable via /registry/register, a workspace-token holder
could point it at cloud metadata (169.254.169.254), link-local ranges,
or non-http schemes and pivot the platform container against internal
services (IMDS, Redis, Postgres, other containers on the Docker net).

Four required fixes per reviewer:

1. `validateWorkspaceURL(u *url.URL)` — runs before `httpClient.Do`:
   - scheme must be http/https (rejects file://, gopher://, ftp://)
   - cloud metadata hostname blocklist (GCP + Azure + plain "metadata")
   - IMDS IP blocklist (169.254.169.254)
   - IPv4/IPv6 link-local blocklist (169.254/16, fe80::/10, multicast)
   - IPv6 unique-local fd00::/8 blocklist
   - loopback + docker.internal still allowed for local dev

2. Query-param allowlist — `target.RawQuery = c.Request.URL.RawQuery`
   forwarded everything verbatim, letting a caller smuggle params the
   upstream transcript endpoint didn't intend to expose. Replaced with
   an allowlist of `since` and `limit`.

3. Sanitized error string — `fmt.Sprintf("workspace unreachable: %v", err)`
   leaked the actual internal host/IP via `net.OpError`. Now logs the
   real error server-side and returns a plain "workspace unreachable"
   to the caller.

4. 10 new regression test cases:
   - `TestTranscript_Rejects{CloudMetadataIP,NonHTTPScheme,MetadataHostname,LinkLocalIPv6}`
     exercise the handler end-to-end with each attack URL and assert
     400 before the HTTP client fires.
   - `TestValidateWorkspaceURL` table-drives the validator across
     localhost/public/docker-internal (allowed) + IMDS/GCP/Azure/file/
     gopher/link-local/multicast (rejected).
   - `TestTranscript_ProxyPropagatesAllowlistedQueryParams` asserts
     `secret=leak&cmd=rm` is stripped while `since=42&limit=7` pass
     through.

Also fixed a pre-existing test bug: `seedWorkspace` was issuing a real
SQL Exec against sqlmock with no expectation set, so the prior test
helpers silently failed in CI. Replaced with `expectWorkspaceURLLookup`
which programs the mock correctly. All 11 tests now pass.

Closes #272

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 17:46:55 -07:00

170 lines
5.6 KiB
Go

// Package handlers — transcript proxy.
//
// GET /workspaces/:id/transcript proxies to the workspace's own
// /transcript endpoint, which surfaces the live agent session log
// (claude-code reads ~/.claude/projects/<cwd>/<session>.jsonl). Other
// runtimes return supported:false.
//
// Why this lives in the platform: docker exec works for local dev but
// not for remote (Phase 30) workspaces on Fly Machines. The platform's
// network proxy is the only path that scales to both.
package handlers
import (
"context"
"fmt"
"io"
"log"
"net"
"net/http"
"net/url"
"strings"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/gin-gonic/gin"
)
// TranscriptHandler proxies /workspaces/:id/transcript to the workspace agent.
type TranscriptHandler struct {
httpClient *http.Client
}
func NewTranscriptHandler() *TranscriptHandler {
return &TranscriptHandler{
httpClient: &http.Client{Timeout: 15 * time.Second},
}
}
// Get handles GET /workspaces/:id/transcript?since=N&limit=N.
//
// Looks up the workspace's URL, mints a workspace-scoped bearer token,
// forwards the GET, and streams the response back. Caps payload at 1MB
// to keep a runaway transcript from saturating canvas.
func (h *TranscriptHandler) Get(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
var workspaceURL string
if err := db.DB.QueryRowContext(ctx,
`SELECT agent_card->>'url' FROM workspaces WHERE id = $1`,
workspaceID,
).Scan(&workspaceURL); err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return
}
if workspaceURL == "" {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace not registered (no URL on file)"})
return
}
// workspaceURL comes from agent_card which is attacker-writable via
// /registry/register — treat it as untrusted and validate before the
// outbound HTTP call to prevent SSRF (issue #272).
target, err := url.Parse(workspaceURL)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace URL"})
return
}
if err := validateWorkspaceURL(target); err != nil {
log.Printf("transcript: workspace %s URL rejected: %v", workspaceID, err)
c.JSON(http.StatusBadRequest, gin.H{"error": "workspace URL not allowed"})
return
}
target.Path = "/transcript"
// Don't forward the raw query string — an attacker-controlled caller
// could smuggle params the upstream endpoint didn't intend to expose.
// Allowlist the two params the transcript endpoint actually uses.
q := url.Values{}
if since := c.Query("since"); since != "" {
q.Set("since", since)
}
if limit := c.Query("limit"); limit != "" {
q.Set("limit", limit)
}
target.RawQuery = q.Encode()
reqCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(reqCtx, "GET", target.String(), nil)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to build request"})
return
}
resp, err := h.httpClient.Do(req)
if err != nil {
// Log the real error server-side (includes the target URL), but
// don't leak it to the caller — that would reveal internal host
// names / IPs reachable from the platform.
log.Printf("transcript: workspace %s unreachable: %v", workspaceID, err)
c.JSON(http.StatusBadGateway, gin.H{"error": "workspace unreachable"})
return
}
defer resp.Body.Close()
// Cap at 1 MB so a giant transcript doesn't melt the canvas.
body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read workspace response"})
return
}
c.Data(resp.StatusCode, resp.Header.Get("Content-Type"), body)
}
// validateWorkspaceURL enforces that the agent_card URL is safe to
// proxy to. agent_card is attacker-writable via /registry/register so
// any workspace-token holder could otherwise point the URL at cloud
// metadata (169.254.169.254), the Docker host, or other internal
// services reachable from the platform container.
//
// Policy:
// - scheme must be http or https (no file://, gopher://, ftp://, etc.)
// - host must be present
// - block cloud metadata endpoints (IMDS, GCP, Azure)
// - block link-local IPs (169.254/16 IPv4, fe80::/10 IPv6)
// - loopback is allowed — local dev runs workspaces on 127.0.0.1
// - Docker internal hostnames (host.docker.internal, *.molecule-monorepo-net)
// are allowed; the whole threat model assumes the platform already
// trusts peers on that network
func validateWorkspaceURL(u *url.URL) error {
if u.Scheme != "http" && u.Scheme != "https" {
return fmt.Errorf("unsupported scheme %q", u.Scheme)
}
host := u.Hostname()
if host == "" {
return fmt.Errorf("empty host")
}
// Hostname blocklist (pre-IP-parse — these are usually resolved by
// the HTTP stack, not by us).
lower := strings.ToLower(host)
for _, banned := range []string{
"metadata.google.internal",
"metadata.azure.com",
"metadata",
} {
if lower == banned {
return fmt.Errorf("metadata hostname blocked: %s", host)
}
}
// IP-literal checks.
if ip := net.ParseIP(host); ip != nil {
// IMDS / cloud metadata.
if ip.String() == "169.254.169.254" {
return fmt.Errorf("cloud metadata endpoint blocked")
}
// Link-local: IPv4 169.254.0.0/16, IPv6 fe80::/10.
if ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
return fmt.Errorf("link-local address blocked: %s", host)
}
// IPv6 unique local fd00::/8 — used by some IMDS implementations.
if ip.To4() == nil && len(ip) == net.IPv6len && ip[0] == 0xfd {
return fmt.Errorf("IPv6 unique-local address blocked: %s", host)
}
}
return nil
}