From 02a8841402bd8cf87293acd92bb889491efd3209 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sat, 2 May 2026 00:58:05 -0700 Subject: [PATCH] fix(a2a): send v1 file Part shape; tolerate v1 server-side MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Image-only chats surface "Error: message contained no text content" because canvas posts v0 `{kind:"file", file:{uri,name,mimeType}}` shapes that the workspace runtime's a2a-sdk v1 protobuf parser silently drops: v1 `Part` has fields `[text, raw, url, data, metadata, filename, media_type]` and `ignore_unknown_fields=True` discards `kind`+`file`, producing a fully-empty Part. With no text and no extracted file attachments, the executor's "no text content" guard fires. Three coordinated changes close the gap: 1. canvas/ChatTab.tsx — outbound file parts now carry the v1 flat shape `{url, filename, mediaType}` so the v1 protobuf parser populates Part fields instead of dropping them. 2. workspace/executor_helpers.py — extract_attached_files learns the v1 detection branch (non-empty `part.url` + `filename` + `media_type`) alongside the existing v0 RootModel and flat-file shapes. Defends every runtime that mounts the OSS wheel against the same drop, including any pre-fix client still on the wire. 3. canvas/message-parser.ts — extractFilesFromTask tolerates the v1 shape on incoming agent responses too, so file chips render in chat history regardless of which Part shape the runtime emits. Test pins: - workspace/tests/test_executor_helpers.py: + v1 protobuf shape extraction + empty-Part defense (v0→v1 silent-drop fall-through returns []) - canvas message-parser test: + v1 protobuf flat parts + filename fallback to URL basename for v1 --- canvas/src/components/tabs/ChatTab.tsx | 34 ++++++++--- .../chat/__tests__/message-parser.test.ts | 35 +++++++++++ .../components/tabs/chat/message-parser.ts | 56 ++++++++++++----- workspace/executor_helpers.py | 61 +++++++++++++++---- workspace/tests/test_executor_helpers.py | 60 ++++++++++++++++++ 5 files changed, 209 insertions(+), 37 deletions(-) diff --git a/canvas/src/components/tabs/ChatTab.tsx b/canvas/src/components/tabs/ChatTab.tsx index e0a4b7c7..e234e731 100644 --- a/canvas/src/components/tabs/ChatTab.tsx +++ b/canvas/src/components/tabs/ChatTab.tsx @@ -32,10 +32,23 @@ interface A2AFileRef { bytes?: string; size?: number; } +// A2A Part shape — covers both v0 (Pydantic discriminated union with +// `kind: "text" | "file"`) and v1 (a2a-sdk protobuf with flat fields: +// `text` for text parts; `url` + `filename` + `mediaType` for file +// parts; no `kind` discriminator at all). Outbound we now send v1 for +// file parts because the v1 protobuf parser drops the v0 keys via +// `ignore_unknown_fields=True`, surfacing as the user-visible +// "Error: message contained no text content" on image-only chats +// (2026-05-01 hongming incident). Text parts stay accident-compatible +// across v0/v1 because the field name is `text` in both. interface A2APart { - kind: string; + kind?: string; text?: string; file?: A2AFileRef; + // v1 file-part fields (flat — no nested `file` object): + url?: string; + filename?: string; + mediaType?: string; } interface A2AResponse { result?: { @@ -502,17 +515,22 @@ function MyChatPanel({ workspaceId, data }: Props) { // A2A parts: text part (if any) + file parts (per attachment). The // agent sees both in a single turn, matching the A2A spec shape. + // + // File parts use the v1 protobuf shape (flat `url`/`filename`/ + // `mediaType`) because the workspace runtime's v1 parser drops + // the legacy v0 `{kind:"file", file:{...}}` shape via + // `ignore_unknown_fields=True`. Sending v0 → empty Part → + // empty attachments → "Error: message contained no text content" + // on image-only chats (2026-05-01 hongming). Text parts keep the + // shared `{kind:"text", text}` shape because `text` is a field + // in both v0 and v1. const parts: A2APart[] = []; if (text) parts.push({ kind: "text", text }); for (const att of uploaded) { parts.push({ - kind: "file", - file: { - name: att.name, - mimeType: att.mimeType, - uri: att.uri, - size: att.size, - }, + url: att.uri, + filename: att.name, + mediaType: att.mimeType, }); } diff --git a/canvas/src/components/tabs/chat/__tests__/message-parser.test.ts b/canvas/src/components/tabs/chat/__tests__/message-parser.test.ts index 238f5bb6..6b408436 100644 --- a/canvas/src/components/tabs/chat/__tests__/message-parser.test.ts +++ b/canvas/src/components/tabs/chat/__tests__/message-parser.test.ts @@ -263,6 +263,41 @@ describe("extractFilesFromTask", () => { expect(files[0]).toMatchObject({ name: "out.txt", uri: "workspace:/workspace/out.txt" }); }); + // a2a-sdk v1 protobuf flattens file parts: no `kind`, no nested `file`, + // top-level `url` + `filename` + `mediaType` instead. Every workspace + // runtime since the SDK migration emits this shape, so the canvas + // chat parser must surface them or chips silently disappear from + // agent replies. Pinning here so a parser refactor can't regress + // back to v0-only and lose the new wire format. + it("pulls v1 protobuf file parts (flat url/filename/mediaType, no kind)", () => { + const task = { + parts: [ + { kind: "text", text: "here's the screenshot" }, + { + url: "workspace:/screenshots/run-42.png", + filename: "run-42.png", + mediaType: "image/png", + }, + ], + }; + const files = extractFilesFromTask(task); + expect(files).toEqual([ + { + name: "run-42.png", + uri: "workspace:/screenshots/run-42.png", + mimeType: "image/png", + size: undefined, + }, + ]); + }); + + it("recovers a filename from the URI on v1 file parts when filename is absent", () => { + const task = { + parts: [{ url: "workspace:/workspace/out/graph.png" }], + }; + expect(extractFilesFromTask(task)[0].name).toBe("graph.png"); + }); + it("hydrates a notify-with-attachments response_body — both text caption AND file chips", () => { // Pins the exact wire shape the platform's Notify handler persists // when send_message_to_user passes attachments (activity.go writes diff --git a/canvas/src/components/tabs/chat/message-parser.ts b/canvas/src/components/tabs/chat/message-parser.ts index 54fa3a64..5c8cc6b6 100644 --- a/canvas/src/components/tabs/chat/message-parser.ts +++ b/canvas/src/components/tabs/chat/message-parser.ts @@ -40,27 +40,51 @@ export interface ParsedFilePart { } /** Extract file parts from an A2A response. Walks parts[] + artifacts[]. - * Per the A2A spec a file part looks like: - * { kind: "file", file: { name, mimeType, uri | bytes } } - * We only surface parts that carry a `uri` — inline bytes would - * require a different renderer (data URL) and are out of scope for - * MVP. Names fall back to the URI's basename when absent. */ + * + * Tolerates both A2A protocol generations: + * - v0 (Pydantic): `{ kind: "file", file: { name, mimeType, uri } }` + * - v1 (protobuf): `{ url, filename, mediaType }` — flat, no `kind` + * and no nested `file` object (the v1 Part's content oneof is + * `{text, raw, url, data}`; file metadata sits at top level). + * + * Without v1 tolerance, agents that emit the v1 shape (every workspace + * runtime since the SDK migration) silently drop file parts in chat — + * the agent says "I sent the file" but the user never sees the chip. + * + * We only surface parts that carry a URL — inline bytes would require + * a different renderer (data URL) and are out of scope for MVP. Names + * fall back to the URL's basename when absent. */ export function extractFilesFromTask(task: Record): ParsedFilePart[] { const out: ParsedFilePart[] = []; const pushFromParts = (parts: unknown) => { if (!Array.isArray(parts)) return; for (const raw of parts as Array>) { - if (raw.kind !== "file" && raw.type !== "file") continue; - const file = (raw.file ?? raw) as Record; - const uri = typeof file.uri === "string" ? file.uri : ""; - if (!uri) continue; - const name = (typeof file.name === "string" && file.name) || basename(uri); - out.push({ - name, - uri, - mimeType: typeof file.mimeType === "string" ? file.mimeType : undefined, - size: typeof file.size === "number" ? file.size : undefined, - }); + const isV0File = raw.kind === "file" || raw.type === "file"; + const v1Url = typeof raw.url === "string" ? raw.url : ""; + if (!isV0File && !v1Url) continue; + + let uri = ""; + let name = ""; + let mimeType: string | undefined; + let size: number | undefined; + + if (isV0File) { + const file = (raw.file ?? raw) as Record; + uri = typeof file.uri === "string" ? file.uri : ""; + if (!uri) continue; + name = (typeof file.name === "string" && file.name) || basename(uri); + mimeType = typeof file.mimeType === "string" ? file.mimeType : undefined; + size = typeof file.size === "number" ? file.size : undefined; + } else { + // v1 flat shape: url + filename + mediaType (camelCase from + // protobuf JSON serialization of media_type). + uri = v1Url; + const v1Name = typeof raw.filename === "string" ? raw.filename : ""; + name = v1Name || basename(uri); + mimeType = typeof raw.mediaType === "string" ? raw.mediaType : undefined; + } + + out.push({ name, uri, mimeType, size }); } }; try { diff --git a/workspace/executor_helpers.py b/workspace/executor_helpers.py index e6d335e2..d3f9d00a 100644 --- a/workspace/executor_helpers.py +++ b/workspace/executor_helpers.py @@ -844,26 +844,61 @@ def resolve_attachment_uri(uri: str) -> str | None: def extract_attached_files(message: Any) -> list[dict[str, str]]: """Pull ``{name, mime_type, path}`` dicts out of an A2A message. - Handles the discriminated-union shape ``part.root.file`` that a2a-sdk - produces via Pydantic RootModel, and the flatter ``part.file`` shape - hand-built callers sometimes emit. Non-file parts and files with - unresolvable URIs are skipped — the caller sees an empty list rather - than a mix of valid and broken entries. + Tolerates three Part shapes seen in the wild: + + 1. a2a-sdk v0 Pydantic RootModel — ``part.root.kind == 'file'`` with + ``part.root.file.{uri,name,mimeType}``. + 2. a2a-sdk v0 flatter shape — ``part.kind == 'file'`` with + ``part.file.{uri,name,mimeType}`` (some hand-built callers). + 3. a2a-sdk v1 protobuf — ``part.url`` non-empty with + ``part.filename`` + ``part.media_type``. The v1 ``Part`` proto + has no ``kind`` field at all (the discriminator is now a oneof + ``content`` of {text, raw, url, data}). Without this branch a v1 + file part — which is what a v1 server constructs from any caller + that JSON-encodes the v1 shape — silently parses to an empty + Part on the v0→v1 transition because protobuf json_format with + ``ignore_unknown_fields=True`` drops the legacy ``kind`` and + ``file`` keys, surfacing as the user-visible + "Error: message contained no text content" on image-only chats + (2026-05-01 hongming incident). + + Non-file parts and files with unresolvable URIs are skipped — the + caller sees an empty list rather than a mix of valid and broken + entries. """ if message is None: return [] parts = getattr(message, "parts", None) or [] out: list[dict[str, str]] = [] for part in parts: + uri = "" + name = "" + mime = "" + root = getattr(part, "root", part) - if getattr(root, "kind", None) != "file": - continue - f = getattr(root, "file", None) - if f is None: - continue - uri = getattr(f, "uri", "") or "" - name = getattr(f, "name", "") or "" - mime = getattr(f, "mimeType", None) or getattr(f, "mime_type", None) or "" + if getattr(root, "kind", None) == "file": + f = getattr(root, "file", None) + if f is None: + continue + uri = getattr(f, "uri", "") or "" + name = getattr(f, "name", "") or "" + mime = getattr(f, "mimeType", None) or getattr(f, "mime_type", None) or "" + else: + # v1 protobuf Part has no `kind`; detect by a non-empty + # `url` (the file/url-of-bytes oneof slot). Fall back to + # `media_type` then `mimeType` for the camelCase Pydantic + # variant some adapters still hand us. + v1_url = getattr(part, "url", "") or "" + if not v1_url: + continue + uri = v1_url + name = getattr(part, "filename", "") or "" + mime = ( + getattr(part, "media_type", None) + or getattr(part, "mediaType", None) + or "" + ) + path = resolve_attachment_uri(uri) if not path or not os.path.isfile(path): logger.warning("skipping attached file with unresolvable uri=%r", uri) diff --git a/workspace/tests/test_executor_helpers.py b/workspace/tests/test_executor_helpers.py index 195d8dda..09c4ab2b 100644 --- a/workspace/tests/test_executor_helpers.py +++ b/workspace/tests/test_executor_helpers.py @@ -767,6 +767,66 @@ def test_extract_attached_files_accepts_both_shapes(tmp_path, monkeypatch): assert {f["name"] for f in out} == {"a.txt", "b.txt"} +def test_extract_attached_files_accepts_v1_protobuf_part(tmp_path, monkeypatch): + """a2a-sdk v1 protobuf ``Part`` has fields + ``[text, raw, url, data, metadata, filename, media_type]`` — no + ``kind`` field at all (the discriminator is now a oneof + ``content`` of {text, raw, url, data}). Without v1-shape tolerance, + every file part on the v0→v1 transition silently parses to an + empty Part and surfaces as the user-visible + "Error: message contained no text content" on image-only chats + (2026-05-01 hongming incident). + + This pins the v1 detection: a non-empty ``url`` plus ``filename`` + + ``media_type`` is treated as a file part regardless of the + missing ``kind``. The conftest stub ``Part`` mirrors v1's flat + field shape (kwargs become attributes) so extracting via getattr + sees the same surface the real protobuf does.""" + from types import SimpleNamespace + from executor_helpers import extract_attached_files + + img = tmp_path / "screenshot.png" + img.write_bytes(b"\x89PNG\r\n\x1a\n") + monkeypatch.setattr("executor_helpers.WORKSPACE_MOUNT", str(tmp_path)) + + # v1 protobuf surface: flat Part with url/filename/media_type, no kind. + v1_part = SimpleNamespace( + url=f"workspace:{img}", + filename="screenshot.png", + media_type="image/png", + ) + msg = SimpleNamespace(parts=[v1_part]) + out = extract_attached_files(msg) + assert len(out) == 1 + assert out[0]["name"] == "screenshot.png" + assert out[0]["mime_type"] == "image/png" + assert out[0]["path"] == str(img) + + +def test_extract_attached_files_empty_v1_part_returns_empty(tmp_path, monkeypatch): + """Documents the v0→v1 silent-drop failure mode this fix defends + against. When canvas pre-fix sends ``{kind:"file", file:{...}}`` + and the a2a-sdk v1 protobuf parser receives it with + ``ignore_unknown_fields=True``, both legacy keys silently drop — + the resulting Part has every field empty. The helper must NOT + raise and must return ``[]`` — empty, not crashy. + + The real fix is shipping the canvas v1 shape; this test pins the + runtime's defense so a template stuck on an old wheel against a + new canvas still fails closed (empty attachments + agent + proceeds) rather than mid-turn.""" + from types import SimpleNamespace + from executor_helpers import extract_attached_files + + monkeypatch.setattr("executor_helpers.WORKSPACE_MOUNT", str(tmp_path)) + # Empty Part — no kind, no url, no filename, no media_type. This is + # the all-empty proto state json_format leaves behind on the v0→v1 + # silent-drop. The helper must skip it without raising. + empty_v1_part = SimpleNamespace() + msg = SimpleNamespace(parts=[empty_v1_part]) + assert extract_attached_files(msg) == [] + + def test_build_user_content_with_files_no_attachments_is_string(): """Zero attachments → plain string so models without multi-modal support (most non-vision LLMs) see the same payload shape they always