Merge pull request #2315 from Molecule-AI/auto/issue-2312-pr-c-platform-forward

feat(chat_files): rewrite Upload as HTTP-forward (RFC #2312, PR-C)
This commit is contained in:
Hongming Wang 2026-04-29 15:30:19 -07:00 committed by GitHub
commit 593f2bd2be
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 521 additions and 409 deletions

135
tests/e2e/test_chat_upload_e2e.sh Executable file
View File

@ -0,0 +1,135 @@
#!/usr/bin/env bash
# E2E for the v2 chat upload path (RFC #2312):
#
# POST /workspaces/:id/chat/uploads
# └─▶ platform Go workspace-server (proxies)
# └─▶ workspace's own /internal/chat/uploads/ingest
# └─▶ writes to /workspace/.molecule/chat-uploads
#
# The same script runs against ANY environment because the architecture
# is now uniform — local docker-compose, staging tenant, production
# health-probe — all hit the same call site with the same expected
# behavior. This is the design goal RFC #2312 set: "test local will
# pretty much match production."
#
# Required env:
# BASE default http://localhost:8080
# override to https://<id>.<tenant>.staging...
# WORKSPACE_RUNTIME default langgraph (any internal runtime)
#
# Exit codes:
# 0 upload + read-back round-trip succeeded
# 1 setup failed (couldn't create workspace, never came online, etc.)
# 2 upload returned non-2xx
# 3 upload succeeded but the file isn't readable via download
set -uo pipefail
BASE="${BASE:-http://localhost:8080}"
RUNTIME="${WORKSPACE_RUNTIME:-langgraph}"
PARENT=""
PARENT_TOK=""
# shellcheck disable=SC1091
source "$(dirname "$0")/_lib.sh"
cleanup() {
local rc=$?
set +e
if [ -n "$PARENT" ]; then
curl -sS -X DELETE "$BASE/workspaces/$PARENT?confirm=true&purge=true" \
${PARENT_TOK:+-H "Authorization: Bearer $PARENT_TOK"} >/dev/null 2>&1
fi
exit $rc
}
trap cleanup EXIT INT TERM
# ─── 1. Create workspace ───────────────────────────────────────────────
echo "[1/5] POST /workspaces (runtime=$RUNTIME)..."
P_RESP=$(curl -sS -X POST "$BASE/workspaces" \
-H "Content-Type: application/json" \
-d "{\"name\":\"e2e-chat-upload\",\"runtime\":\"$RUNTIME\",\"tier\":2}")
PARENT=$(echo "$P_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null)
[ -n "$PARENT" ] || { echo " ✗ workspace create failed: $P_RESP"; exit 1; }
echo " ✓ workspace=$PARENT"
# ─── 2. Wait for online ────────────────────────────────────────────────
echo "[2/5] waiting for workspace online (up to 5min)..."
for i in $(seq 1 60); do
S=$(curl -sS "$BASE/workspaces/$PARENT" 2>/dev/null \
| python3 -c "import sys,json; d=json.load(sys.stdin); w=d.get('workspace') if isinstance(d.get('workspace'),dict) else d; print(w.get('status') or '')" 2>/dev/null)
[ $((i % 6)) -eq 1 ] && echo " attempt $i: status=$S"
[ "$S" = "online" ] && break
sleep 5
done
[ "$S" = "online" ] || { echo " ✗ workspace never online (last=$S)"; exit 1; }
echo " ✓ online"
# Mint a workspace bearer for the test (the auth needed to call
# /workspaces/:id/chat/uploads, which is wsAuth-gated).
PARENT_TOK=$(e2e_mint_test_token "$PARENT") || {
echo " ✗ couldn't mint test token (MOLECULE_ENV=production?)"
exit 1
}
# ─── 3. Upload a fixture ───────────────────────────────────────────────
echo "[3/5] POST /workspaces/$PARENT/chat/uploads ..."
FIXTURE=$(mktemp)
echo "e2e fixture content $(date +%s)" > "$FIXTURE"
EXPECTED=$(cat "$FIXTURE")
UPLOAD=$(curl -sS -X POST "$BASE/workspaces/$PARENT/chat/uploads" \
-H "Authorization: Bearer $PARENT_TOK" \
-F "files=@$FIXTURE;filename=greeting.txt;type=text/plain" \
-w "\nHTTP_CODE=%{http_code}\n")
CODE=$(echo "$UPLOAD" | grep -oE 'HTTP_CODE=[0-9]+' | cut -d= -f2)
BODY=$(echo "$UPLOAD" | sed '/^HTTP_CODE=/,$d')
echo " status=$CODE"
echo " body=$(echo "$BODY" | head -c 300)"
if [ "$CODE" != "200" ]; then
echo " ✗ upload returned $CODE"
rm -f "$FIXTURE"
exit 2
fi
URI=$(echo "$BODY" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['files'][0]['uri'])" 2>/dev/null)
NAME=$(echo "$BODY" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['files'][0]['name'])" 2>/dev/null)
SIZE=$(echo "$BODY" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['files'][0]['size'])" 2>/dev/null)
[ -n "$URI" ] || { echo " ✗ no URI in response"; rm -f "$FIXTURE"; exit 2; }
[ "$NAME" = "greeting.txt" ] || { echo " ✗ name mismatch: $NAME"; rm -f "$FIXTURE"; exit 2; }
[ "$SIZE" = "$(wc -c <"$FIXTURE" | tr -d ' ')" ] || { echo " ✗ size mismatch: $SIZE"; rm -f "$FIXTURE"; exit 2; }
echo " ✓ uri=$URI"
echo " ✓ name=$NAME size=$SIZE"
# Extract the absolute path inside the workspace (strip workspace: scheme).
PATH_IN_WS="${URI#workspace:}"
# ─── 4. Read it back via /chat/download ────────────────────────────────
echo "[4/5] GET /workspaces/$PARENT/chat/download?path=$PATH_IN_WS"
DOWNLOADED=$(curl -sS "$BASE/workspaces/$PARENT/chat/download?path=$PATH_IN_WS" \
-H "Authorization: Bearer $PARENT_TOK")
if [ "$DOWNLOADED" != "$EXPECTED" ]; then
echo " ✗ content mismatch"
echo " expected: $EXPECTED"
echo " got: $DOWNLOADED"
rm -f "$FIXTURE"
exit 3
fi
echo " ✓ round-trip content matches"
# ─── 5. Auth: bare upload without bearer is rejected ───────────────────
echo "[5/5] POST without bearer must be 401..."
NA_CODE=$(curl -sS -o /dev/null -w "%{http_code}" -X POST "$BASE/workspaces/$PARENT/chat/uploads" \
-F "files=@$FIXTURE")
if [ "$NA_CODE" != "401" ]; then
echo " ✗ expected 401 without bearer, got $NA_CODE"
rm -f "$FIXTURE"
exit 2
fi
echo " ✓ 401 without bearer"
rm -f "$FIXTURE"
echo ""
echo "✓ chat upload v2 (RFC #2312) end-to-end passed against $BASE"

View File

@ -1,6 +1,18 @@
package handlers
// chat_files.go — file upload/download for workspace chat.
// chat_files.go — file upload (HTTP-forward) + download (Docker-exec)
// for workspace chat.
//
// Upload is the v2 architecture (RFC #2312): the platform proxies the
// multipart request straight to the workspace's own /internal/chat/
// uploads/ingest endpoint. The workspace agent then writes to local
// /workspace/.molecule/chat-uploads. Same code path on local Docker
// and SaaS — the v1 docker-exec path was structurally broken in SaaS
// because workspace-server's local Docker client has no visibility
// into EC2-hosted workspaces (#2308 root cause).
//
// Download still uses the v1 docker-cp path; migrating it lives in the
// next PR in this stack so each surface is reviewable in isolation.
//
// Split from templates.go because these endpoints have a different
// security model (no /configs write, no template fallback) and a
@ -10,62 +22,79 @@ package handlers
import (
"archive/tar"
"bytes"
"context"
"crypto/rand"
"database/sql"
"encoding/hex"
"errors"
"fmt"
"io"
"log"
"mime"
"mime/multipart"
"net/http"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/docker/docker/api/types/container"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
"github.com/gin-gonic/gin"
)
// ChatFilesHandler serves file upload + download for chat. It
// composes the existing TemplatesHandler's Docker plumbing
// (findContainer, execInContainer, copyFilesToContainer) rather than
// duplicating them, so a bug fix in the Docker layer propagates to
// both endpoints.
// ChatFilesHandler serves file upload + download for chat. Holds a
// reference to TemplatesHandler so the (still docker-exec) Download
// path keeps using the shared findContainer/CopyFromContainer helpers
// without duplicating them. Upload no longer reaches into Docker.
type ChatFilesHandler struct {
templates *TemplatesHandler
// httpClient is broken out so tests can swap in an httptest.Server
// transport. Prod uses a default with a generous Timeout to cover
// the 50 MB worst case on a slow EC2 link without leaving a
// connection hanging forever on a sick workspace.
httpClient *http.Client
}
func NewChatFilesHandler(t *TemplatesHandler) *ChatFilesHandler {
return &ChatFilesHandler{templates: t}
return &ChatFilesHandler{
templates: t,
httpClient: &http.Client{
// 50 MB total body cap / ~1 MB/s slow-network floor → ~60s.
// Doubled for headroom on the legitimate-but-slow case.
Timeout: 120 * time.Second,
},
}
}
// chatUploadMaxBytes caps the full multipart request body so a
// malicious / runaway client can't OOM the server. 50 MB covers most
// documents + a handful of images per message; larger artefacts
// should go through git/S3 rather than chat.
// malicious / runaway client can't OOM the proxy hop. 50 MB matches
// the workspace-side limit; anything larger is rejected at the
// network boundary before forwarding.
const chatUploadMaxBytes = 50 * 1024 * 1024
// chatUploadMaxFileBytes caps individual files in a multi-file upload.
// Keeping the per-file cap below the total lets a user send, say, a
// 5 MB PDF + 10 screenshots without tripping the batch limit on any
// single attachment.
const chatUploadMaxFileBytes = 25 * 1024 * 1024
// chatUploadDir is the in-container path where user-uploaded chat
// attachments land. Under /workspace so the file persists with the
// workspace volume and is readable by the agent without any extra
// plumbing — the agent just reads from the URI path we return.
// attachments land. Kept here for documentation parity with the
// workspace-side handler — the platform no longer writes files
// directly, but the URI scheme returned in responses still uses this
// path, so any consumer parsing those URIs has the constant to
// reference.
const chatUploadDir = "/workspace/.molecule/chat-uploads"
// unsafeFilenameChars matches anything outside the conservative
// {alnum, dot, underscore, dash} set. Filenames get rewritten
// character-class at a time, so embedded paths, control chars,
// newlines, quotes, and shell metachars never reach the filesystem.
var unsafeFilenameChars = regexp.MustCompile(`[^a-zA-Z0-9._\-]`)
// urlPathEscape percent-encodes every byte outside the RFC 3986
// unreserved set — stricter than net/url.PathEscape (which leaves
// "/" unescaped because it's legal in URL paths). Filenames must
// never contain "/" anyway, so escaping it is defence-in-depth
// against an agent that writes a path-like name.
//
// Used by Download's Content-Disposition header.
func urlPathEscape(s string) string {
const unreserved = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
var b strings.Builder
for _, c := range []byte(s) {
if strings.IndexByte(unreserved, c) >= 0 {
b.WriteByte(c)
} else {
fmt.Fprintf(&b, "%%%02X", c)
}
}
return b.String()
}
// contentDispositionAttachment produces a safe `attachment; filename=...`
// header. Quotes, CR, and LF in the filename are escaped per RFC 6266 /
@ -99,60 +128,23 @@ func contentDispositionAttachment(name string) string {
asciiSafe, urlPathEscape(name))
}
// urlPathEscape percent-encodes every byte outside the RFC 3986
// unreserved set — stricter than net/url.PathEscape (which leaves
// "/" unescaped because it's legal in URL paths). Filenames must
// never contain "/" anyway, so escaping it is defence-in-depth
// against an agent that writes a path-like name.
func urlPathEscape(s string) string {
const unreserved = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
var b strings.Builder
for _, c := range []byte(s) {
if strings.IndexByte(unreserved, c) >= 0 {
b.WriteByte(c)
} else {
fmt.Fprintf(&b, "%%%02X", c)
}
}
return b.String()
}
func sanitizeFilename(in string) string {
base := filepath.Base(in)
base = strings.ReplaceAll(base, " ", "_")
base = unsafeFilenameChars.ReplaceAllString(base, "_")
if len(base) > 100 {
ext := filepath.Ext(base)
if len(ext) > 16 {
ext = ""
}
base = base[:100-len(ext)] + ext
}
if base == "" || base == "." || base == ".." {
return "file"
}
return base
}
// ChatUploadedFile is the per-file response returned from POST
// /workspaces/:id/chat/uploads. Clients include this payload (or a
// trimmed subset) in their outgoing A2A `message/send` parts.
type ChatUploadedFile struct {
// URI uses a custom "workspace:" scheme so clients can resolve it
// against the streaming Download endpoint regardless of where the
// canvas itself is hosted. The path component is always absolute
// within the workspace container.
URI string `json:"uri"`
Name string `json:"name"`
MimeType string `json:"mimeType,omitempty"`
Size int64 `json:"size"`
}
// Upload handles POST /workspaces/:id/chat/uploads.
// Accepts multipart/form-data with one or more `files` fields, stages
// each under /workspace/.molecule/chat-uploads with a UUID prefix,
// and returns the list of URIs for the caller to attach to an A2A
// message.
//
// Streams the multipart body straight to the workspace's own
// /internal/chat/uploads/ingest endpoint with the platform_inbound_secret
// (RFC #2312, migration 044) in the Authorization header. The workspace
// validates and writes to its local /workspace/.molecule/chat-uploads;
// the response (containing one ChatUploadedFile per upload) is streamed
// back unchanged.
//
// Why streaming, not parse-then-re-encode:
// - Eliminates the 50 MB intermediate buffer on the platform.
// - Per-file size + path-safety enforcement is the workspace's job;
// duplicating it here just creates two places to keep in sync.
// - The error responses from the workspace (413 with the offending
// filename, 400 on missing files field, etc.) propagate through
// unchanged, so the user sees the same shapes regardless of where
// the failure originated.
func (h *ChatFilesHandler) Upload(c *gin.Context) {
workspaceID := c.Param("id")
if err := validateWorkspaceID(workspaceID); err != nil {
@ -160,188 +152,96 @@ func (h *ChatFilesHandler) Upload(c *gin.Context) {
return
}
// Hard cap the request body BEFORE ParseMultipartForm — otherwise
// a client could chunk-upload past the cap before Go notices.
// Hard cap the request body BEFORE forwarding. http.MaxBytesReader
// enforces lazily as the body is read; a malicious client cannot
// chunk-upload past the cap, the wrapped reader returns an error
// when the cap is exceeded and the workspace receives a truncated
// stream that fails its own multipart parser.
c.Request.Body = http.MaxBytesReader(c.Writer, c.Request.Body, chatUploadMaxBytes)
if err := c.Request.ParseMultipartForm(chatUploadMaxBytes); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "failed to parse multipart form"})
return
}
form := c.Request.MultipartForm
var headers []*multipart.FileHeader
if form != nil && form.File != nil {
headers = form.File["files"]
}
if len(headers) == 0 {
c.JSON(http.StatusBadRequest, gin.H{"error": "expected at least one 'files' field"})
return
}
ctx := c.Request.Context()
// External workspaces have no container — the container-find-then-tar-exec
// flow below doesn't apply. Surface a clear 4xx instead of the misleading
// "container not running" 503 (#2308). File ingest for external workspaces
// is on the v0.2 roadmap (likely via the artifacts table or the channel-
// plugin's inbox/ pattern); flagging unsupported is the v0.1 contract.
var wsRuntime string
// Resolve workspace URL + inbound secret. Both must be present;
// either one missing means the workspace was provisioned before
// migration 044 or the row got into a bad state. Surface as 503
// rather than silently failing — operators should notice.
var wsURL string
if err := db.DB.QueryRowContext(ctx,
`SELECT COALESCE(runtime, '') FROM workspaces WHERE id = $1`, workspaceID,
).Scan(&wsRuntime); err != nil && err != sql.ErrNoRows {
log.Printf("chat_files Upload: runtime lookup failed for %s: %v", workspaceID, err)
}
if wsRuntime == "external" {
c.JSON(http.StatusUnprocessableEntity, gin.H{
"error": "file upload not supported for external workspaces",
"detail": "External workspaces have no container to receive uploads. " +
"Use POST /admin/secrets to share data, or attach via your external agent's own input channel. " +
"Native external-workspace upload tracked at https://github.com/Molecule-AI/molecule-core/issues/2308.",
"runtime": "external",
})
`SELECT COALESCE(url, '') FROM workspaces WHERE id = $1`, workspaceID,
).Scan(&wsURL); err != nil {
log.Printf("chat_files Upload: workspace lookup failed for %s: %v", workspaceID, err)
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return
}
containerName := h.templates.findContainer(ctx, workspaceID)
if containerName == "" {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"})
if wsURL == "" {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace url not registered yet"})
return
}
// Trust note: workspaces.url passes validateAgentURL at /registry/
// register write time, blocking SSRF-shaped URLs. We rely on that
// upstream gate rather than re-validating here. Tracked at #2316
// for follow-up: forward-time re-validation as defense-in-depth.
// Build the archive in memory. Files are byte-preserving through
// Go's string<->[]byte (the tar helper takes map[string]string but
// the conversion is a literal copy, not a UTF-8 reinterpretation).
archive := map[string]string{}
uploaded := make([]ChatUploadedFile, 0, len(headers))
for _, fh := range headers {
if fh.Size > chatUploadMaxFileBytes {
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
"error": fmt.Sprintf("%s exceeds per-file limit (%d MB)", fh.Filename, chatUploadMaxFileBytes/(1024*1024)),
secret, err := wsauth.ReadPlatformInboundSecret(ctx, db.DB, workspaceID)
if err != nil {
if errors.Is(err, wsauth.ErrNoInboundSecret) {
// Workspace predates migration 044 OR the provisioner's
// secret-mint hop failed. Both are operational issues,
// not user errors. Log loudly so ops can backfill.
log.Printf("chat_files Upload: no platform_inbound_secret for %s — workspace needs reprovision (#2312)", workspaceID)
c.JSON(http.StatusServiceUnavailable, gin.H{
"error": "workspace not yet enrolled in v2 upload (RFC #2312)",
"detail": "Reprovisioning the workspace will mint the platform_inbound_secret it's missing.",
})
return
}
f, err := fh.Open()
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "failed to read upload"})
return
}
// LimitReader guards against a truthful-but-lying Size header:
// if the multipart stream carries more bytes than declared, we
// stop at the cap instead of growing the buffer.
data, err := io.ReadAll(io.LimitReader(f, chatUploadMaxFileBytes+1))
f.Close()
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "failed to read upload"})
return
}
if int64(len(data)) > chatUploadMaxFileBytes {
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
"error": fmt.Sprintf("%s exceeds per-file limit (%d MB)", fh.Filename, chatUploadMaxFileBytes/(1024*1024)),
})
return
}
name := sanitizeFilename(fh.Filename)
// 16-byte (UUID-equivalent) random prefix. Within a single
// batch we also check for collisions — birthday on 128 bits
// is astronomical, but a bad PRNG or single re-used draw
// would silently overwrite a sibling upload with its own
// content and return two URIs pointing at one file.
var stored string
for attempt := 0; attempt < 4; attempt++ {
idBytes := make([]byte, 16)
if _, err := rand.Read(idBytes); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to allocate upload ID"})
return
}
candidate := hex.EncodeToString(idBytes) + "-" + name
if _, taken := archive[candidate]; !taken {
stored = candidate
break
}
}
if stored == "" {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to allocate unique upload ID"})
return
}
archive[stored] = string(data)
mt := fh.Header.Get("Content-Type")
if mt == "" {
mt = mime.TypeByExtension(filepath.Ext(name))
}
uploaded = append(uploaded, ChatUploadedFile{
URI: "workspace:" + chatUploadDir + "/" + stored,
Name: name,
MimeType: mt,
Size: int64(len(data)),
})
}
// mkdir -p is idempotent; we fire it every upload instead of
// caching state here so container restarts don't surprise us.
_, _ = h.templates.execInContainer(ctx, containerName, []string{"mkdir", "-p", chatUploadDir})
// Defence in depth: pre-remove each target path before extracting
// the tar. An agent with write access to /workspace could in
// theory race-create a symlink at <chatUploadDir>/<stored-name>
// pointing at a sensitive in-container path (its own /etc/*,
// mounted secrets). Docker's tar extraction on some drivers
// follows pre-existing symlinks at the destination. `rm -f` the
// exact stored-name closes that window — the UUID prefix on the
// name makes a successful race effectively impossible, but this
// guard costs nothing and documents the intent.
rmArgs := []string{"rm", "-f", "--"}
for stored := range archive {
rmArgs = append(rmArgs, chatUploadDir+"/"+stored)
}
_, _ = h.templates.execInContainer(ctx, containerName, rmArgs)
if err := h.copyFlatToContainer(ctx, containerName, chatUploadDir, archive); err != nil {
log.Printf("Chat upload copy failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to stage files in workspace"})
log.Printf("chat_files Upload: read platform_inbound_secret failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read workspace secret"})
return
}
c.JSON(http.StatusOK, gin.H{"files": uploaded})
}
// Build the forward request. Body is the (capped) reader from the
// inbound request — Go's http.Client streams it directly to the
// workspace, no intermediate buffering on the platform.
forwardURL := strings.TrimRight(wsURL, "/") + "/internal/chat/uploads/ingest"
req, err := http.NewRequestWithContext(ctx, http.MethodPost, forwardURL, c.Request.Body)
if err != nil {
log.Printf("chat_files Upload: build request failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to construct forward request"})
return
}
// Forward the multipart Content-Type (with boundary) verbatim;
// without it the workspace's parser cannot find part boundaries.
if ct := c.Request.Header.Get("Content-Type"); ct != "" {
req.Header.Set("Content-Type", ct)
}
req.Header.Set("Authorization", "Bearer "+secret)
// Pass through Content-Length so the workspace can short-circuit
// the total-body cap before parsing. ContentLength on the request
// struct also lets Go's transport know whether to stream or send
// chunked-encoded.
if c.Request.ContentLength > 0 {
req.ContentLength = c.Request.ContentLength
}
// copyFlatToContainer extracts one tar of flat files into destPath
// inside the container. Unlike the shared copyFilesToContainer helper
// (which prepends destPath into tar entry names — correct for its
// callers whose files relative-live inside a nested tree), this
// helper writes tar entries with ONLY the flat filename so Docker's
// extraction at destPath lands them directly in destPath, not at
// destPath/destPath/... as the shared helper would.
// Filenames are validated to contain no path separator so nothing
// can escape destPath via an embedded "../" or a leading "/".
func (h *ChatFilesHandler) copyFlatToContainer(ctx context.Context, containerName, destPath string, files map[string]string) error {
if h.templates.docker == nil {
return fmt.Errorf("docker not available")
resp, err := h.httpClient.Do(req)
if err != nil {
log.Printf("chat_files Upload: forward to %s failed: %v", forwardURL, err)
c.JSON(http.StatusBadGateway, gin.H{"error": "workspace unreachable"})
return
}
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
for name, content := range files {
if strings.ContainsAny(name, "/\\") || name == ".." || name == "." || name == "" {
return fmt.Errorf("unsafe flat filename: %q", name)
}
data := []byte(content)
if err := tw.WriteHeader(&tar.Header{
Name: name, // relative — Docker resolves against destPath
Mode: 0644,
Size: int64(len(data)),
Typeflag: tar.TypeReg,
}); err != nil {
return fmt.Errorf("tar header %q: %w", name, err)
}
if _, err := tw.Write(data); err != nil {
return fmt.Errorf("tar write %q: %w", name, err)
}
defer resp.Body.Close()
// Stream response back. Copy headers we know are safe + the body.
if ct := resp.Header.Get("Content-Type"); ct != "" {
c.Header("Content-Type", ct)
}
if err := tw.Close(); err != nil {
return fmt.Errorf("tar close: %w", err)
c.Status(resp.StatusCode)
if _, err := io.Copy(c.Writer, resp.Body); err != nil {
// Mid-stream failure — too late to write a JSON error, just
// log so ops can correlate with the workspace's logs.
log.Printf("chat_files Upload: stream response back failed for %s: %v", workspaceID, err)
}
return h.templates.docker.CopyToContainer(ctx, containerName, destPath, &buf, container.CopyToContainerOptions{})
}
// Download handles GET /workspaces/:id/chat/download?path=<abs path>.
@ -349,6 +249,12 @@ func (h *ChatFilesHandler) copyFlatToContainer(ctx context.Context, containerNam
// Content-Type and attachment Content-Disposition. Binary-safe —
// unlike the existing JSON ReadFile endpoint which carries content
// as a string (lossy for non-UTF-8 bytes).
//
// TODO(#2312, follow-up PR): migrate Download to the same HTTP-forward
// pattern as Upload. For now keeping the docker-cp path so this PR is
// reviewable as a single-surface change. SaaS download is broken
// today the same way SaaS upload was broken before this PR — the next
// PR closes that gap.
func (h *ChatFilesHandler) Download(c *gin.Context) {
workspaceID := c.Param("id")
if err := validateWorkspaceID(workspaceID); err != nil {

View File

@ -1,99 +0,0 @@
package handlers
import (
"bytes"
"encoding/json"
"mime/multipart"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// TestChatFilesUpload_ExternalRuntime_Returns422 pins the contract that
// upload to a runtime="external" workspace returns a clear 422 with the
// "external workspaces don't support file upload" message instead of the
// misleading 503 "container not running" the v0.1 surface returned (#2308).
//
// Without this branch, an operator pasting a screenshot into the canvas chat
// for an external CEO workspace got a `503 {"error":"workspace container
// not running"}` — accurate from the upload handler's POV (no container
// exists for external workspaces) but misleading because it implies the
// container has crashed. The 422 with structured detail tells the operator
// what's actually happening + points at the v0.2 follow-up issue.
func TestChatFilesUpload_ExternalRuntime_Returns422(t *testing.T) {
mock := setupTestDB(t)
const wsID = "00000000-0000-0000-0000-000000000001"
// Runtime lookup returns "external" — should trigger the early return.
mock.ExpectQuery("SELECT COALESCE\\(runtime, ''\\) FROM workspaces").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("external"))
// Construct a multipart upload body — handler must reject BEFORE
// touching docker, so a nil templates field is intentional. If the
// runtime check is removed in a future change, this test crashes on
// nil deref of h.templates instead of silently passing.
body, contentType := buildMultipartUpload(t, "screenshot.png", []byte("fake-png-bytes"))
// Use a nil-templates handler — proves runtime check happens BEFORE
// any docker plumbing. The handler is constructed in production via
// NewChatFilesHandler(templates) but the runtime branch should never
// reach into templates for external workspaces.
h := &ChatFilesHandler{templates: nil}
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: wsID}}
c.Request = httptest.NewRequest("POST", "/workspaces/"+wsID+"/chat/files", bytes.NewReader(body))
c.Request.Header.Set("Content-Type", contentType)
h.Upload(c)
if w.Code != http.StatusUnprocessableEntity {
t.Fatalf("expected 422, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]string
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to parse response: %v", err)
}
if !strings.Contains(resp["error"], "external workspaces") {
t.Errorf("expected error to mention external workspaces, got: %q", resp["error"])
}
if resp["runtime"] != "external" {
t.Errorf("expected runtime=external in response, got %q", resp["runtime"])
}
// Spot-check that the error points at issue #2308 so operators reading
// it know where to track v0.2 file-ingest.
if !strings.Contains(resp["detail"], "2308") {
t.Errorf("expected detail to reference issue #2308, got: %q", resp["detail"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// buildMultipartUpload returns a body + content-type pair suitable for
// posting a single file through the chat upload handler.
func buildMultipartUpload(t *testing.T, filename string, contents []byte) ([]byte, string) {
t.Helper()
var buf bytes.Buffer
w := multipart.NewWriter(&buf)
part, err := w.CreateFormFile("files", filename)
if err != nil {
t.Fatalf("CreateFormFile: %v", err)
}
if _, err := part.Write(contents); err != nil {
t.Fatalf("part.Write: %v", err)
}
if err := w.Close(); err != nil {
t.Fatalf("multipart Close: %v", err)
}
return buf.Bytes(), w.FormDataContentType()
}

View File

@ -1,67 +1,91 @@
package handlers
// Unit tests for chat_files.go. The Docker-touching paths (Upload
// actually copying into a container, Download actually streaming tar)
// are exercised via integration tests — docker-in-docker is out of
// scope for the unit suite. These tests cover the validation + error
// surfaces that a caller can reach without a running container.
// Unit tests for chat_files.go.
//
// Upload (HTTP-forward, RFC #2312 PR-C): exercised against an httptest
// mock workspace + sqlmock-backed db.DB. The platform-side handler is
// now a streaming proxy; assertions focus on:
// * input validation (400 on bad workspace id)
// * resolution failures (404 missing row, 503 missing secret/url)
// * forward shape (Authorization, Content-Type, body)
// * pass-through of the workspace's status + body
//
// Path-safety + sanitization that lived on the platform pre-#2312 is
// now the workspace-side handler's concern; covered in the Python
// suite (workspace/tests/test_internal_chat_uploads.py).
import (
"bytes"
"database/sql"
"io"
"mime/multipart"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
func TestSanitizeFilename(t *testing.T) {
cases := []struct {
in, want string
}{
{"report.pdf", "report.pdf"},
{"my file.pdf", "my_file.pdf"},
{"../../etc/passwd", "passwd"},
{"weird;$name`.txt", "weird__name_.txt"},
{"", "file"},
{".", "file"},
{"..", "file"},
}
for _, tc := range cases {
got := sanitizeFilename(tc.in)
if got != tc.want {
t.Errorf("sanitizeFilename(%q) = %q, want %q", tc.in, got, tc.want)
}
}
// makeUploadRequest builds a gin context for POST /workspaces/:id/chat/uploads
// with the given multipart body. The recorder is returned so callers can
// assert status + body after invoking h.Upload(c).
func makeUploadRequest(t *testing.T, workspaceID string, body *bytes.Buffer, contentType string) (*gin.Context, *httptest.ResponseRecorder) {
t.Helper()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: workspaceID}}
req := httptest.NewRequest("POST", "/workspaces/"+workspaceID+"/chat/uploads", body)
req.Header.Set("Content-Type", contentType)
c.Request = req
return c, w
}
func TestSanitizeFilename_LongNamePreservesExtension(t *testing.T) {
// 120-char base + .pdf — the helper should truncate the base but
// keep the extension intact so content-type inference still works.
longBase := strings.Repeat("a", 120)
got := sanitizeFilename(longBase + ".pdf")
if len(got) > 100 {
t.Errorf("filename not truncated: len=%d", len(got))
}
if !strings.HasSuffix(got, ".pdf") {
t.Errorf("extension stripped: %q", got)
// uploadFixture builds a minimal multipart/form-data body with a single
// `files` part. The exact bytes don't matter for proxy tests — only that
// the workspace receives the same boundary + headers we sent.
func uploadFixture(t *testing.T) (*bytes.Buffer, string) {
t.Helper()
var buf bytes.Buffer
mw := multipart.NewWriter(&buf)
fw, err := mw.CreateFormFile("files", "fixture.txt")
if err != nil {
t.Fatalf("CreateFormFile: %v", err)
}
_, _ = fw.Write([]byte("fixture-payload"))
mw.Close()
return &buf, mw.FormDataContentType()
}
// expectURL stubs the SELECT that resolves the workspace's url.
func expectURL(mock sqlmock.Sqlmock, workspaceID, url string) {
mock.ExpectQuery(`SELECT COALESCE\(url, ''\) FROM workspaces WHERE id = \$1`).
WithArgs(workspaceID).
WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow(url))
}
// expectURLMissing stubs the SELECT to return sql.ErrNoRows.
func expectURLMissing(mock sqlmock.Sqlmock, workspaceID string) {
mock.ExpectQuery(`SELECT COALESCE\(url, ''\) FROM workspaces WHERE id = \$1`).
WithArgs(workspaceID).
WillReturnError(sql.ErrNoRows)
}
// expectInboundSecret stubs the SELECT performed by ReadPlatformInboundSecret.
func expectInboundSecret(mock sqlmock.Sqlmock, workspaceID string, secret interface{}) {
mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`).
WithArgs(workspaceID).
WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"}).AddRow(secret))
}
func TestChatUpload_InvalidWorkspaceID(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
tmplh := NewTemplatesHandler(t.TempDir(), nil)
h := NewChatFilesHandler(tmplh)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "not-a-uuid"}}
c.Request = httptest.NewRequest("POST", "/workspaces/not-a-uuid/chat/uploads", nil)
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
c, w := makeUploadRequest(t, "not-a-uuid", &bytes.Buffer{}, "")
h.Upload(c)
if w.Code != http.StatusBadRequest {
@ -69,33 +93,180 @@ func TestChatUpload_InvalidWorkspaceID(t *testing.T) {
}
}
func TestChatUpload_MissingFiles(t *testing.T) {
setupTestDB(t)
func TestChatUpload_WorkspaceNotInDB(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
tmplh := NewTemplatesHandler(t.TempDir(), nil)
h := NewChatFilesHandler(tmplh)
// Multipart body with no `files` field — only a text field.
var buf bytes.Buffer
mw := multipart.NewWriter(&buf)
_ = mw.WriteField("other", "value")
mw.Close()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000001"}}
req := httptest.NewRequest("POST", "/workspaces/00000000-0000-0000-0000-000000000001/chat/uploads", &buf)
req.Header.Set("Content-Type", mw.FormDataContentType())
c.Request = req
wsID := "00000000-0000-0000-0000-000000000099"
expectURLMissing(mock, wsID)
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 when files field missing, got %d: %s", w.Code, w.Body.String())
// QueryRow returning sql.ErrNoRows surfaces as 404. The validate-id
// step already passed; this is the next layer.
if w.Code != http.StatusNotFound {
t.Errorf("expected 404 when workspace row missing, got %d: %s", w.Code, w.Body.String())
}
if !strings.Contains(w.Body.String(), "files") {
t.Errorf("expected error to mention files field: %s", w.Body.String())
}
func TestChatUpload_NoInboundSecret(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// Legacy row: URL set but platform_inbound_secret is NULL.
wsID := "00000000-0000-0000-0000-000000000041"
expectURL(mock, wsID, "http://127.0.0.1:1")
expectInboundSecret(mock, wsID, nil) // NULL
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
// 503 with detail steering ops to reprovision. NOT 200, NOT a
// silent no-bearer forward (which would land as a 401 that the
// user can't action).
if w.Code != http.StatusServiceUnavailable {
t.Errorf("expected 503 when platform_inbound_secret missing, got %d: %s", w.Code, w.Body.String())
}
if !strings.Contains(w.Body.String(), "RFC #2312") {
t.Errorf("expected detail to reference RFC #2312, got: %s", w.Body.String())
}
}
func TestChatUpload_NoURL(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// Workspace registered but URL hasn't been reported yet (mid-boot).
wsID := "00000000-0000-0000-0000-000000000042"
expectURL(mock, wsID, "")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
if w.Code != http.StatusServiceUnavailable {
t.Errorf("expected 503 when workspace url empty, got %d: %s", w.Code, w.Body.String())
}
}
// captured snapshots everything the forwarder sent to the workspace so
// we can assert auth + body + content-type forwarded correctly.
type captured struct {
authorization string
contentType string
method string
path string
body []byte
}
func newCapturingWorkspace(t *testing.T, status int, response string) (*httptest.Server, *captured) {
t.Helper()
cap := &captured{}
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
cap.authorization = r.Header.Get("Authorization")
cap.contentType = r.Header.Get("Content-Type")
cap.method = r.Method
cap.path = r.URL.Path
body, _ := io.ReadAll(r.Body)
cap.body = body
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_, _ = w.Write([]byte(response))
}))
t.Cleanup(srv.Close)
return srv, cap
}
func TestChatUpload_ForwardsToWorkspace_HappyPath(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
srv, captured := newCapturingWorkspace(t, http.StatusOK, `{"files":[{"uri":"workspace:/workspace/.molecule/chat-uploads/abc-fixture.txt","name":"fixture.txt","size":15}]}`)
wsID := "00000000-0000-0000-0000-000000000043"
expectURL(mock, wsID, srv.URL)
expectInboundSecret(mock, wsID, "super-secret-123")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200 from happy forward, got %d: %s", w.Code, w.Body.String())
}
if captured.method != "POST" {
t.Errorf("expected POST, got %s", captured.method)
}
if captured.path != "/internal/chat/uploads/ingest" {
t.Errorf("expected /internal/chat/uploads/ingest, got %s", captured.path)
}
if captured.authorization != "Bearer super-secret-123" {
t.Errorf("expected secret in Authorization header, got %q", captured.authorization)
}
if !strings.HasPrefix(captured.contentType, "multipart/form-data") {
t.Errorf("expected multipart Content-Type forwarded, got %q", captured.contentType)
}
// Body shape: must contain the multipart-encoded fixture content.
if !bytes.Contains(captured.body, []byte("fixture-payload")) {
t.Errorf("expected body to contain fixture payload, got %d bytes", len(captured.body))
}
// Response body streamed back unchanged.
if !strings.Contains(w.Body.String(), "fixture.txt") {
t.Errorf("expected workspace response forwarded back, got: %s", w.Body.String())
}
}
func TestChatUpload_ForwardsErrorStatusUnchanged(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// Workspace returns 413 with its standard "exceeds per-file limit"
// shape. Platform must propagate, NOT remap to 500.
srv, _ := newCapturingWorkspace(t, http.StatusRequestEntityTooLarge, `{"error":"big.bin exceeds per-file limit (25 MB)"}`)
wsID := "00000000-0000-0000-0000-000000000044"
expectURL(mock, wsID, srv.URL)
expectInboundSecret(mock, wsID, "tok")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
if w.Code != http.StatusRequestEntityTooLarge {
t.Errorf("expected 413 propagated unchanged, got %d", w.Code)
}
if !strings.Contains(w.Body.String(), "exceeds per-file limit") {
t.Errorf("expected workspace's 413 body verbatim, got: %s", w.Body.String())
}
}
func TestChatUpload_WorkspaceUnreachable(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
wsID := "00000000-0000-0000-0000-000000000045"
// 127.0.0.1:1 — port 1 has no listener → connect refused.
expectURL(mock, wsID, "http://127.0.0.1:1")
expectInboundSecret(mock, wsID, "tok")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
// Connect-refused → BadGateway. NOT 500 — the platform itself is
// fine; the upstream is broken.
if w.Code != http.StatusBadGateway {
t.Errorf("expected 502 on workspace unreachable, got %d: %s", w.Code, w.Body.String())
}
}
@ -103,8 +274,7 @@ func TestChatDownload_InvalidPath(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
tmplh := NewTemplatesHandler(t.TempDir(), nil)
h := NewChatFilesHandler(tmplh)
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
cases := []struct {
name, path, wantSubstr string