fix(a2a): send v1 file Part shape; tolerate v1 server-side

Image-only chats surface "Error: message contained no text content"
because canvas posts v0 `{kind:"file", file:{uri,name,mimeType}}` shapes
that the workspace runtime's a2a-sdk v1 protobuf parser silently drops:
v1 `Part` has fields `[text, raw, url, data, metadata, filename,
media_type]` and `ignore_unknown_fields=True` discards `kind`+`file`,
producing a fully-empty Part. With no text and no extracted file
attachments, the executor's "no text content" guard fires.

Three coordinated changes close the gap:

1. canvas/ChatTab.tsx — outbound file parts now carry the v1 flat
   shape `{url, filename, mediaType}` so the v1 protobuf parser
   populates Part fields instead of dropping them.
2. workspace/executor_helpers.py — extract_attached_files learns the
   v1 detection branch (non-empty `part.url` + `filename` +
   `media_type`) alongside the existing v0 RootModel and flat-file
   shapes. Defends every runtime that mounts the OSS wheel against
   the same drop, including any pre-fix client still on the wire.
3. canvas/message-parser.ts — extractFilesFromTask tolerates the v1
   shape on incoming agent responses too, so file chips render in
   chat history regardless of which Part shape the runtime emits.

Test pins:
- workspace/tests/test_executor_helpers.py:
  + v1 protobuf shape extraction
  + empty-Part defense (v0→v1 silent-drop fall-through returns [])
- canvas message-parser test:
  + v1 protobuf flat parts
  + filename fallback to URL basename for v1
This commit is contained in:
Hongming Wang 2026-05-02 00:58:05 -07:00
parent b36eed97f6
commit 02a8841402
5 changed files with 209 additions and 37 deletions

View File

@ -32,10 +32,23 @@ interface A2AFileRef {
bytes?: string;
size?: number;
}
// A2A Part shape — covers both v0 (Pydantic discriminated union with
// `kind: "text" | "file"`) and v1 (a2a-sdk protobuf with flat fields:
// `text` for text parts; `url` + `filename` + `mediaType` for file
// parts; no `kind` discriminator at all). Outbound we now send v1 for
// file parts because the v1 protobuf parser drops the v0 keys via
// `ignore_unknown_fields=True`, surfacing as the user-visible
// "Error: message contained no text content" on image-only chats
// (2026-05-01 hongming incident). Text parts stay accident-compatible
// across v0/v1 because the field name is `text` in both.
interface A2APart {
kind: string;
kind?: string;
text?: string;
file?: A2AFileRef;
// v1 file-part fields (flat — no nested `file` object):
url?: string;
filename?: string;
mediaType?: string;
}
interface A2AResponse {
result?: {
@ -502,17 +515,22 @@ function MyChatPanel({ workspaceId, data }: Props) {
// A2A parts: text part (if any) + file parts (per attachment). The
// agent sees both in a single turn, matching the A2A spec shape.
//
// File parts use the v1 protobuf shape (flat `url`/`filename`/
// `mediaType`) because the workspace runtime's v1 parser drops
// the legacy v0 `{kind:"file", file:{...}}` shape via
// `ignore_unknown_fields=True`. Sending v0 → empty Part →
// empty attachments → "Error: message contained no text content"
// on image-only chats (2026-05-01 hongming). Text parts keep the
// shared `{kind:"text", text}` shape because `text` is a field
// in both v0 and v1.
const parts: A2APart[] = [];
if (text) parts.push({ kind: "text", text });
for (const att of uploaded) {
parts.push({
kind: "file",
file: {
name: att.name,
mimeType: att.mimeType,
uri: att.uri,
size: att.size,
},
url: att.uri,
filename: att.name,
mediaType: att.mimeType,
});
}

View File

@ -263,6 +263,41 @@ describe("extractFilesFromTask", () => {
expect(files[0]).toMatchObject({ name: "out.txt", uri: "workspace:/workspace/out.txt" });
});
// a2a-sdk v1 protobuf flattens file parts: no `kind`, no nested `file`,
// top-level `url` + `filename` + `mediaType` instead. Every workspace
// runtime since the SDK migration emits this shape, so the canvas
// chat parser must surface them or chips silently disappear from
// agent replies. Pinning here so a parser refactor can't regress
// back to v0-only and lose the new wire format.
it("pulls v1 protobuf file parts (flat url/filename/mediaType, no kind)", () => {
const task = {
parts: [
{ kind: "text", text: "here's the screenshot" },
{
url: "workspace:/screenshots/run-42.png",
filename: "run-42.png",
mediaType: "image/png",
},
],
};
const files = extractFilesFromTask(task);
expect(files).toEqual([
{
name: "run-42.png",
uri: "workspace:/screenshots/run-42.png",
mimeType: "image/png",
size: undefined,
},
]);
});
it("recovers a filename from the URI on v1 file parts when filename is absent", () => {
const task = {
parts: [{ url: "workspace:/workspace/out/graph.png" }],
};
expect(extractFilesFromTask(task)[0].name).toBe("graph.png");
});
it("hydrates a notify-with-attachments response_body — both text caption AND file chips", () => {
// Pins the exact wire shape the platform's Notify handler persists
// when send_message_to_user passes attachments (activity.go writes

View File

@ -40,27 +40,51 @@ export interface ParsedFilePart {
}
/** Extract file parts from an A2A response. Walks parts[] + artifacts[].
* Per the A2A spec a file part looks like:
* { kind: "file", file: { name, mimeType, uri | bytes } }
* We only surface parts that carry a `uri` inline bytes would
* require a different renderer (data URL) and are out of scope for
* MVP. Names fall back to the URI's basename when absent. */
*
* Tolerates both A2A protocol generations:
* - v0 (Pydantic): `{ kind: "file", file: { name, mimeType, uri } }`
* - v1 (protobuf): `{ url, filename, mediaType }` flat, no `kind`
* and no nested `file` object (the v1 Part's content oneof is
* `{text, raw, url, data}`; file metadata sits at top level).
*
* Without v1 tolerance, agents that emit the v1 shape (every workspace
* runtime since the SDK migration) silently drop file parts in chat
* the agent says "I sent the file" but the user never sees the chip.
*
* We only surface parts that carry a URL inline bytes would require
* a different renderer (data URL) and are out of scope for MVP. Names
* fall back to the URL's basename when absent. */
export function extractFilesFromTask(task: Record<string, unknown>): ParsedFilePart[] {
const out: ParsedFilePart[] = [];
const pushFromParts = (parts: unknown) => {
if (!Array.isArray(parts)) return;
for (const raw of parts as Array<Record<string, unknown>>) {
if (raw.kind !== "file" && raw.type !== "file") continue;
const file = (raw.file ?? raw) as Record<string, unknown>;
const uri = typeof file.uri === "string" ? file.uri : "";
if (!uri) continue;
const name = (typeof file.name === "string" && file.name) || basename(uri);
out.push({
name,
uri,
mimeType: typeof file.mimeType === "string" ? file.mimeType : undefined,
size: typeof file.size === "number" ? file.size : undefined,
});
const isV0File = raw.kind === "file" || raw.type === "file";
const v1Url = typeof raw.url === "string" ? raw.url : "";
if (!isV0File && !v1Url) continue;
let uri = "";
let name = "";
let mimeType: string | undefined;
let size: number | undefined;
if (isV0File) {
const file = (raw.file ?? raw) as Record<string, unknown>;
uri = typeof file.uri === "string" ? file.uri : "";
if (!uri) continue;
name = (typeof file.name === "string" && file.name) || basename(uri);
mimeType = typeof file.mimeType === "string" ? file.mimeType : undefined;
size = typeof file.size === "number" ? file.size : undefined;
} else {
// v1 flat shape: url + filename + mediaType (camelCase from
// protobuf JSON serialization of media_type).
uri = v1Url;
const v1Name = typeof raw.filename === "string" ? raw.filename : "";
name = v1Name || basename(uri);
mimeType = typeof raw.mediaType === "string" ? raw.mediaType : undefined;
}
out.push({ name, uri, mimeType, size });
}
};
try {

View File

@ -844,26 +844,61 @@ def resolve_attachment_uri(uri: str) -> str | None:
def extract_attached_files(message: Any) -> list[dict[str, str]]:
"""Pull ``{name, mime_type, path}`` dicts out of an A2A message.
Handles the discriminated-union shape ``part.root.file`` that a2a-sdk
produces via Pydantic RootModel, and the flatter ``part.file`` shape
hand-built callers sometimes emit. Non-file parts and files with
unresolvable URIs are skipped the caller sees an empty list rather
than a mix of valid and broken entries.
Tolerates three Part shapes seen in the wild:
1. a2a-sdk v0 Pydantic RootModel ``part.root.kind == 'file'`` with
``part.root.file.{uri,name,mimeType}``.
2. a2a-sdk v0 flatter shape ``part.kind == 'file'`` with
``part.file.{uri,name,mimeType}`` (some hand-built callers).
3. a2a-sdk v1 protobuf ``part.url`` non-empty with
``part.filename`` + ``part.media_type``. The v1 ``Part`` proto
has no ``kind`` field at all (the discriminator is now a oneof
``content`` of {text, raw, url, data}). Without this branch a v1
file part which is what a v1 server constructs from any caller
that JSON-encodes the v1 shape silently parses to an empty
Part on the v0v1 transition because protobuf json_format with
``ignore_unknown_fields=True`` drops the legacy ``kind`` and
``file`` keys, surfacing as the user-visible
"Error: message contained no text content" on image-only chats
(2026-05-01 hongming incident).
Non-file parts and files with unresolvable URIs are skipped the
caller sees an empty list rather than a mix of valid and broken
entries.
"""
if message is None:
return []
parts = getattr(message, "parts", None) or []
out: list[dict[str, str]] = []
for part in parts:
uri = ""
name = ""
mime = ""
root = getattr(part, "root", part)
if getattr(root, "kind", None) != "file":
continue
f = getattr(root, "file", None)
if f is None:
continue
uri = getattr(f, "uri", "") or ""
name = getattr(f, "name", "") or ""
mime = getattr(f, "mimeType", None) or getattr(f, "mime_type", None) or ""
if getattr(root, "kind", None) == "file":
f = getattr(root, "file", None)
if f is None:
continue
uri = getattr(f, "uri", "") or ""
name = getattr(f, "name", "") or ""
mime = getattr(f, "mimeType", None) or getattr(f, "mime_type", None) or ""
else:
# v1 protobuf Part has no `kind`; detect by a non-empty
# `url` (the file/url-of-bytes oneof slot). Fall back to
# `media_type` then `mimeType` for the camelCase Pydantic
# variant some adapters still hand us.
v1_url = getattr(part, "url", "") or ""
if not v1_url:
continue
uri = v1_url
name = getattr(part, "filename", "") or ""
mime = (
getattr(part, "media_type", None)
or getattr(part, "mediaType", None)
or ""
)
path = resolve_attachment_uri(uri)
if not path or not os.path.isfile(path):
logger.warning("skipping attached file with unresolvable uri=%r", uri)

View File

@ -767,6 +767,66 @@ def test_extract_attached_files_accepts_both_shapes(tmp_path, monkeypatch):
assert {f["name"] for f in out} == {"a.txt", "b.txt"}
def test_extract_attached_files_accepts_v1_protobuf_part(tmp_path, monkeypatch):
"""a2a-sdk v1 protobuf ``Part`` has fields
``[text, raw, url, data, metadata, filename, media_type]`` no
``kind`` field at all (the discriminator is now a oneof
``content`` of {text, raw, url, data}). Without v1-shape tolerance,
every file part on the v0v1 transition silently parses to an
empty Part and surfaces as the user-visible
"Error: message contained no text content" on image-only chats
(2026-05-01 hongming incident).
This pins the v1 detection: a non-empty ``url`` plus ``filename``
+ ``media_type`` is treated as a file part regardless of the
missing ``kind``. The conftest stub ``Part`` mirrors v1's flat
field shape (kwargs become attributes) so extracting via getattr
sees the same surface the real protobuf does."""
from types import SimpleNamespace
from executor_helpers import extract_attached_files
img = tmp_path / "screenshot.png"
img.write_bytes(b"\x89PNG\r\n\x1a\n")
monkeypatch.setattr("executor_helpers.WORKSPACE_MOUNT", str(tmp_path))
# v1 protobuf surface: flat Part with url/filename/media_type, no kind.
v1_part = SimpleNamespace(
url=f"workspace:{img}",
filename="screenshot.png",
media_type="image/png",
)
msg = SimpleNamespace(parts=[v1_part])
out = extract_attached_files(msg)
assert len(out) == 1
assert out[0]["name"] == "screenshot.png"
assert out[0]["mime_type"] == "image/png"
assert out[0]["path"] == str(img)
def test_extract_attached_files_empty_v1_part_returns_empty(tmp_path, monkeypatch):
"""Documents the v0→v1 silent-drop failure mode this fix defends
against. When canvas pre-fix sends ``{kind:"file", file:{...}}``
and the a2a-sdk v1 protobuf parser receives it with
``ignore_unknown_fields=True``, both legacy keys silently drop
the resulting Part has every field empty. The helper must NOT
raise and must return ``[]`` empty, not crashy.
The real fix is shipping the canvas v1 shape; this test pins the
runtime's defense so a template stuck on an old wheel against a
new canvas still fails closed (empty attachments + agent
proceeds) rather than mid-turn."""
from types import SimpleNamespace
from executor_helpers import extract_attached_files
monkeypatch.setattr("executor_helpers.WORKSPACE_MOUNT", str(tmp_path))
# Empty Part — no kind, no url, no filename, no media_type. This is
# the all-empty proto state json_format leaves behind on the v0→v1
# silent-drop. The helper must skip it without raising.
empty_v1_part = SimpleNamespace()
msg = SimpleNamespace(parts=[empty_v1_part])
assert extract_attached_files(msg) == []
def test_build_user_content_with_files_no_attachments_is_string():
"""Zero attachments → plain string so models without multi-modal
support (most non-vision LLMs) see the same payload shape they always