From 6eaacf175b1f307ad3e87e7f422364f5e93be147 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sun, 26 Apr 2026 19:47:31 -0700 Subject: [PATCH] fix(notify): review-flagged Critical + Required findings on PR #2130 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Critical bugs caught in code review of the agent→user attachments PR: 1. **Empty-URI attachments slipped past validation.** Gin's go-playground/validator does NOT iterate slice elements without `dive` — verified zero `dive` usage anywhere in workspace-server — so the inner `binding:"required"` tags on NotifyAttachment.URI/Name were never enforced. `attachments: [{"uri":"","name":""}]` would pass validation, broadcast empty-URI chips that render blank in canvas, AND persist them in activity_logs for every page reload to re-render. Added explicit per-element validation in Notify (returns 400 with `attachment[i]: uri and name are required`) plus defence-in-depth in the canvas filter (rejects empty strings, not just non-strings). 3-case regression test pins the rejection. 2. **Hardcoded application/octet-stream stripped real mime types.** `_upload_chat_files` always passed octet-stream as the multipart Content-Type. chat_files.go:Upload reads `fh.Header.Get("Content-Type")` FIRST and only falls back to extension-sniffing when the header is empty, so every agent-attached file lost its real type forever — broke the canvas's MIME-based icon/preview logic. Now sniff via `mimetypes.guess_type(path)` and only fall back to octet-stream when sniffing returns None. Plus three Required nits: - `sqlmockArgMatcher` was misleading — the closure always returned true after capture, identical to `sqlmock.AnyArg()` semantics, but named like a custom matcher. Renamed to `sqlmockCaptureArg(*string)` so the intent (capture for post-call inspection, not validate via driver-callback) is unambiguous. - Test asserted notify call by `await_args_list[1]` index — fragile to any future _upload_chat_files refactor that adds a pre-flight POST. Now filter call list by URL suffix `/notify` and assert exactly one match. - Added `TestNotify_RejectsAttachmentWithEmptyURIOrName` (3 cases) covering empty-uri, empty-name, both-empty so the Critical fix stays defended. Deferred to follow-up: - ORDER BY tiebreaker for same-millisecond notifies — pre-existing risk, not regression. - Streaming multipart upload — bounded by the platform's 50MB total cap so RAM ceiling is fixed; switch to streaming if cap rises. - Symlink rejection — agent UID can already read whatever its filesystem perms allow via the shell tool; rejecting symlinks doesn't materially shrink the attack surface. Co-Authored-By: Claude Opus 4.7 (1M context) --- canvas/src/store/canvas-events.ts | 11 ++- .../internal/handlers/activity.go | 16 ++++ .../internal/handlers/activity_test.go | 82 +++++++++++++++---- workspace/a2a_tools.py | 14 +++- workspace/tests/test_a2a_tools_impl.py | 11 ++- 5 files changed, 113 insertions(+), 21 deletions(-) diff --git a/canvas/src/store/canvas-events.ts b/canvas/src/store/canvas-events.ts index 58516d9e..30807530 100644 --- a/canvas/src/store/canvas-events.ts +++ b/canvas/src/store/canvas-events.ts @@ -403,7 +403,16 @@ export function handleCanvasEvent( const rawAttachments = msg.payload.attachments; const attachments = Array.isArray(rawAttachments) ? (rawAttachments as Array<{ uri?: unknown; name?: unknown; mimeType?: unknown; size?: unknown }>) - .filter((a) => typeof a?.uri === "string" && typeof a?.name === "string") + // Reject empty strings as well as non-strings — server-side + // gin validation does NOT enforce binding:"required" on + // slice-element struct fields without `dive` (which the + // notify handler does not use), so a malformed broadcast + // could carry uri:"" or name:"". Defence-in-depth: drop + // those here so the chat doesn't render a blank/broken chip. + .filter((a) => + typeof a?.uri === "string" && a.uri.length > 0 && + typeof a?.name === "string" && a.name.length > 0, + ) .map((a) => ({ uri: a.uri as string, name: a.name as string, diff --git a/workspace-server/internal/handlers/activity.go b/workspace-server/internal/handlers/activity.go index 971d559a..c8dba3ea 100644 --- a/workspace-server/internal/handlers/activity.go +++ b/workspace-server/internal/handlers/activity.go @@ -289,6 +289,22 @@ func (h *ActivityHandler) Notify(c *gin.Context) { return } + // Per-element attachment validation: gin's go-playground/validator + // does NOT iterate slice elements without `dive`, so the inner + // `binding:"required"` tags on NotifyAttachment.URI/Name don't + // actually run. Without this loop, attachments: [{"uri":"","name":""}] + // would slip through, broadcast empty-URI chips that render + // blank/broken in the canvas, and persist them in activity_logs + // for every page reload to re-render. Validate explicitly. + for i, a := range body.Attachments { + if a.URI == "" || a.Name == "" { + c.JSON(http.StatusBadRequest, gin.H{ + "error": fmt.Sprintf("attachment[%d]: uri and name are required", i), + }) + return + } + } + // Verify workspace exists var wsName string err := db.DB.QueryRowContext(c.Request.Context(), diff --git a/workspace-server/internal/handlers/activity_test.go b/workspace-server/internal/handlers/activity_test.go index e3d8cda8..6cc4038f 100644 --- a/workspace-server/internal/handlers/activity_test.go +++ b/workspace-server/internal/handlers/activity_test.go @@ -280,20 +280,15 @@ func TestNotify_WithAttachments_PersistsFilePartsForReload(t *testing.T) { WithArgs("ws-attach"). WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("DD")) - // Capture the JSONB arg via a custom matcher so we can assert on - // the persisted shape (must include parts[].kind=file so reload - // reconstructs download chips). + // Capture the JSONB arg so we can assert on the persisted shape + // AFTER the call (must include parts[].kind=file so reload + // reconstructs download chips). Use AnyArg() for the binding + // gate — the substring asserts below are what actually validate + // the shape; a custom matcher that always returned true would + // be misleading about which step does the gating. var capturedRespJSON string - respMatcher := sqlmockArgMatcher(func(v driver.Value) bool { - s, ok := v.(string) - if !ok { - return false - } - capturedRespJSON = s - return true - }) mock.ExpectExec(`INSERT INTO activity_logs`). - WithArgs("ws-attach", sqlmock.AnyArg(), respMatcher). + WithArgs("ws-attach", sqlmock.AnyArg(), sqlmockCaptureArg(&capturedRespJSON)). WillReturnResult(sqlmock.NewResult(1, 1)) broadcaster := newTestBroadcaster() @@ -333,12 +328,65 @@ func TestNotify_WithAttachments_PersistsFilePartsForReload(t *testing.T) { } } -// sqlmockArgMatcher adapts a closure into the sqlmock.Argument interface -// so tests can capture/inspect the actual driver value sent into a -// prepared statement. Returns true to match. -type sqlmockArgMatcher func(driver.Value) bool +func TestNotify_RejectsAttachmentWithEmptyURIOrName(t *testing.T) { + // Critical regression guard. gin's go-playground/validator does NOT + // iterate slice elements without `dive`, so `binding:"required"` on + // NotifyAttachment.URI/Name would silently fail to enforce on + // `attachments: [{"uri":"","name":""}]`. Without this explicit + // per-element check, the platform broadcasts empty-URI chips that + // render blank in the canvas AND get persisted in activity_logs + // for every page reload to re-render. Pre-fix: passed validation. + cases := []struct { + name string + body string + }{ + {"empty uri", `{"message":"hi","attachments":[{"uri":"","name":"file.zip"}]}`}, + {"empty name", `{"message":"hi","attachments":[{"uri":"workspace:/x","name":""}]}`}, + {"both empty", `{"message":"hi","attachments":[{"uri":"","name":""}]}`}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + mockDB, _, _ := sqlmock.New() + defer mockDB.Close() + db.DB = mockDB + // No DB expectations — handler must reject with 400 BEFORE + // reaching SELECT/INSERT. sqlmock will fail "expectations not met" + // only if the handler unexpectedly queries. -func (m sqlmockArgMatcher) Match(v driver.Value) bool { return m(v) } + broadcaster := newTestBroadcaster() + handler := NewActivityHandler(broadcaster) + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-x"}} + c.Request = httptest.NewRequest("POST", "/workspaces/ws-x/notify", strings.NewReader(tc.body)) + c.Request.Header.Set("Content-Type", "application/json") + handler.Notify(c) + + if w.Code != http.StatusBadRequest { + t.Errorf("expected 400 for %s, got %d: %s", tc.name, w.Code, w.Body.String()) + } + }) + } +} + +// sqlmockCaptureArg returns an sqlmock.Argument that always matches AND +// writes the string-coerced driver value into `dst`. Lets a test +// inspect the actual JSON bytes written to a JSONB column without +// pretending to enforce shape — that's what the downstream substring +// asserts in the test body do. +func sqlmockCaptureArg(dst *string) sqlmock.Argument { + return sqlmockArgFn(func(v driver.Value) bool { + if s, ok := v.(string); ok { + *dst = s + } + return true + }) +} + +type sqlmockArgFn func(driver.Value) bool + +func (f sqlmockArgFn) Match(v driver.Value) bool { return f(v) } func TestNotify_DBFailure_StillBroadcastsAnd200(t *testing.T) { // Persistence is best-effort — a DB hiccup must NOT block the diff --git a/workspace/a2a_tools.py b/workspace/a2a_tools.py index 313085ed..622960e0 100644 --- a/workspace/a2a_tools.py +++ b/workspace/a2a_tools.py @@ -5,6 +5,7 @@ Imports shared client functions and constants from a2a_client. import hashlib import json +import mimetypes import os import uuid @@ -313,7 +314,18 @@ async def _upload_chat_files(client: httpx.AsyncClient, paths: list[str]) -> tup data = fh.read() except OSError as e: return [], f"Error reading {p}: {e}" - files_payload.append(("files", (os.path.basename(p), data, "application/octet-stream"))) + # Sniff mime from filename so the canvas can pick the right + # icon / preview / inline-image renderer. Pre-fix this was + # hardcoded application/octet-stream and chat_files.go's + # Upload trusts whatever Content-Type the multipart part + # carries — `mt := fh.Header.Get("Content-Type")` only falls + # back to extension-sniffing when the header is empty. So a + # hardcoded octet-stream meant every attachment lost its + # real type forever, breaking the canvas chip's icon logic. + mime_type, _ = mimetypes.guess_type(p) + if not mime_type: + mime_type = "application/octet-stream" + files_payload.append(("files", (os.path.basename(p), data, mime_type))) try: resp = await client.post( f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/chat/uploads", diff --git a/workspace/tests/test_a2a_tools_impl.py b/workspace/tests/test_a2a_tools_impl.py index e8cb045b..e9e95a5e 100644 --- a/workspace/tests/test_a2a_tools_impl.py +++ b/workspace/tests/test_a2a_tools_impl.py @@ -400,8 +400,15 @@ class TestToolSendMessageToUser: assert "1 attachment" in result # Verify the notify call carried attachment metadata, not bytes. - notify_call = mc.post.await_args_list[1] - notify_body = notify_call.kwargs.get("json") or {} + # Locate the call by URL suffix, not by index — a future refactor + # in _upload_chat_files that adds a pre-flight call would silently + # shift the array index and the assert would target the wrong call. + notify_calls = [ + c for c in mc.post.await_args_list + if c.args and isinstance(c.args[0], str) and c.args[0].endswith("/notify") + ] + assert len(notify_calls) == 1, f"expected 1 notify POST, got {len(notify_calls)}" + notify_body = notify_calls[0].kwargs.get("json") or {} assert notify_body.get("message") == "Done — see attached." assert len(notify_body.get("attachments", [])) == 1 att = notify_body["attachments"][0]