fix(a2a): send v1 file Part shape; tolerate v1 server-side
Image-only chats surface "Error: message contained no text content"
because canvas posts v0 `{kind:"file", file:{uri,name,mimeType}}` shapes
that the workspace runtime's a2a-sdk v1 protobuf parser silently drops:
v1 `Part` has fields `[text, raw, url, data, metadata, filename,
media_type]` and `ignore_unknown_fields=True` discards `kind`+`file`,
producing a fully-empty Part. With no text and no extracted file
attachments, the executor's "no text content" guard fires.
Three coordinated changes close the gap:
1. canvas/ChatTab.tsx — outbound file parts now carry the v1 flat
shape `{url, filename, mediaType}` so the v1 protobuf parser
populates Part fields instead of dropping them.
2. workspace/executor_helpers.py — extract_attached_files learns the
v1 detection branch (non-empty `part.url` + `filename` +
`media_type`) alongside the existing v0 RootModel and flat-file
shapes. Defends every runtime that mounts the OSS wheel against
the same drop, including any pre-fix client still on the wire.
3. canvas/message-parser.ts — extractFilesFromTask tolerates the v1
shape on incoming agent responses too, so file chips render in
chat history regardless of which Part shape the runtime emits.
Test pins:
- workspace/tests/test_executor_helpers.py:
+ v1 protobuf shape extraction
+ empty-Part defense (v0→v1 silent-drop fall-through returns [])
- canvas message-parser test:
+ v1 protobuf flat parts
+ filename fallback to URL basename for v1
This commit is contained in:
parent
b36eed97f6
commit
02a8841402
@ -32,10 +32,23 @@ interface A2AFileRef {
|
||||
bytes?: string;
|
||||
size?: number;
|
||||
}
|
||||
// A2A Part shape — covers both v0 (Pydantic discriminated union with
|
||||
// `kind: "text" | "file"`) and v1 (a2a-sdk protobuf with flat fields:
|
||||
// `text` for text parts; `url` + `filename` + `mediaType` for file
|
||||
// parts; no `kind` discriminator at all). Outbound we now send v1 for
|
||||
// file parts because the v1 protobuf parser drops the v0 keys via
|
||||
// `ignore_unknown_fields=True`, surfacing as the user-visible
|
||||
// "Error: message contained no text content" on image-only chats
|
||||
// (2026-05-01 hongming incident). Text parts stay accident-compatible
|
||||
// across v0/v1 because the field name is `text` in both.
|
||||
interface A2APart {
|
||||
kind: string;
|
||||
kind?: string;
|
||||
text?: string;
|
||||
file?: A2AFileRef;
|
||||
// v1 file-part fields (flat — no nested `file` object):
|
||||
url?: string;
|
||||
filename?: string;
|
||||
mediaType?: string;
|
||||
}
|
||||
interface A2AResponse {
|
||||
result?: {
|
||||
@ -502,17 +515,22 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
|
||||
// A2A parts: text part (if any) + file parts (per attachment). The
|
||||
// agent sees both in a single turn, matching the A2A spec shape.
|
||||
//
|
||||
// File parts use the v1 protobuf shape (flat `url`/`filename`/
|
||||
// `mediaType`) because the workspace runtime's v1 parser drops
|
||||
// the legacy v0 `{kind:"file", file:{...}}` shape via
|
||||
// `ignore_unknown_fields=True`. Sending v0 → empty Part →
|
||||
// empty attachments → "Error: message contained no text content"
|
||||
// on image-only chats (2026-05-01 hongming). Text parts keep the
|
||||
// shared `{kind:"text", text}` shape because `text` is a field
|
||||
// in both v0 and v1.
|
||||
const parts: A2APart[] = [];
|
||||
if (text) parts.push({ kind: "text", text });
|
||||
for (const att of uploaded) {
|
||||
parts.push({
|
||||
kind: "file",
|
||||
file: {
|
||||
name: att.name,
|
||||
mimeType: att.mimeType,
|
||||
uri: att.uri,
|
||||
size: att.size,
|
||||
},
|
||||
url: att.uri,
|
||||
filename: att.name,
|
||||
mediaType: att.mimeType,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -263,6 +263,41 @@ describe("extractFilesFromTask", () => {
|
||||
expect(files[0]).toMatchObject({ name: "out.txt", uri: "workspace:/workspace/out.txt" });
|
||||
});
|
||||
|
||||
// a2a-sdk v1 protobuf flattens file parts: no `kind`, no nested `file`,
|
||||
// top-level `url` + `filename` + `mediaType` instead. Every workspace
|
||||
// runtime since the SDK migration emits this shape, so the canvas
|
||||
// chat parser must surface them or chips silently disappear from
|
||||
// agent replies. Pinning here so a parser refactor can't regress
|
||||
// back to v0-only and lose the new wire format.
|
||||
it("pulls v1 protobuf file parts (flat url/filename/mediaType, no kind)", () => {
|
||||
const task = {
|
||||
parts: [
|
||||
{ kind: "text", text: "here's the screenshot" },
|
||||
{
|
||||
url: "workspace:/screenshots/run-42.png",
|
||||
filename: "run-42.png",
|
||||
mediaType: "image/png",
|
||||
},
|
||||
],
|
||||
};
|
||||
const files = extractFilesFromTask(task);
|
||||
expect(files).toEqual([
|
||||
{
|
||||
name: "run-42.png",
|
||||
uri: "workspace:/screenshots/run-42.png",
|
||||
mimeType: "image/png",
|
||||
size: undefined,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("recovers a filename from the URI on v1 file parts when filename is absent", () => {
|
||||
const task = {
|
||||
parts: [{ url: "workspace:/workspace/out/graph.png" }],
|
||||
};
|
||||
expect(extractFilesFromTask(task)[0].name).toBe("graph.png");
|
||||
});
|
||||
|
||||
it("hydrates a notify-with-attachments response_body — both text caption AND file chips", () => {
|
||||
// Pins the exact wire shape the platform's Notify handler persists
|
||||
// when send_message_to_user passes attachments (activity.go writes
|
||||
|
||||
@ -40,27 +40,51 @@ export interface ParsedFilePart {
|
||||
}
|
||||
|
||||
/** Extract file parts from an A2A response. Walks parts[] + artifacts[].
|
||||
* Per the A2A spec a file part looks like:
|
||||
* { kind: "file", file: { name, mimeType, uri | bytes } }
|
||||
* We only surface parts that carry a `uri` — inline bytes would
|
||||
* require a different renderer (data URL) and are out of scope for
|
||||
* MVP. Names fall back to the URI's basename when absent. */
|
||||
*
|
||||
* Tolerates both A2A protocol generations:
|
||||
* - v0 (Pydantic): `{ kind: "file", file: { name, mimeType, uri } }`
|
||||
* - v1 (protobuf): `{ url, filename, mediaType }` — flat, no `kind`
|
||||
* and no nested `file` object (the v1 Part's content oneof is
|
||||
* `{text, raw, url, data}`; file metadata sits at top level).
|
||||
*
|
||||
* Without v1 tolerance, agents that emit the v1 shape (every workspace
|
||||
* runtime since the SDK migration) silently drop file parts in chat —
|
||||
* the agent says "I sent the file" but the user never sees the chip.
|
||||
*
|
||||
* We only surface parts that carry a URL — inline bytes would require
|
||||
* a different renderer (data URL) and are out of scope for MVP. Names
|
||||
* fall back to the URL's basename when absent. */
|
||||
export function extractFilesFromTask(task: Record<string, unknown>): ParsedFilePart[] {
|
||||
const out: ParsedFilePart[] = [];
|
||||
const pushFromParts = (parts: unknown) => {
|
||||
if (!Array.isArray(parts)) return;
|
||||
for (const raw of parts as Array<Record<string, unknown>>) {
|
||||
if (raw.kind !== "file" && raw.type !== "file") continue;
|
||||
const file = (raw.file ?? raw) as Record<string, unknown>;
|
||||
const uri = typeof file.uri === "string" ? file.uri : "";
|
||||
if (!uri) continue;
|
||||
const name = (typeof file.name === "string" && file.name) || basename(uri);
|
||||
out.push({
|
||||
name,
|
||||
uri,
|
||||
mimeType: typeof file.mimeType === "string" ? file.mimeType : undefined,
|
||||
size: typeof file.size === "number" ? file.size : undefined,
|
||||
});
|
||||
const isV0File = raw.kind === "file" || raw.type === "file";
|
||||
const v1Url = typeof raw.url === "string" ? raw.url : "";
|
||||
if (!isV0File && !v1Url) continue;
|
||||
|
||||
let uri = "";
|
||||
let name = "";
|
||||
let mimeType: string | undefined;
|
||||
let size: number | undefined;
|
||||
|
||||
if (isV0File) {
|
||||
const file = (raw.file ?? raw) as Record<string, unknown>;
|
||||
uri = typeof file.uri === "string" ? file.uri : "";
|
||||
if (!uri) continue;
|
||||
name = (typeof file.name === "string" && file.name) || basename(uri);
|
||||
mimeType = typeof file.mimeType === "string" ? file.mimeType : undefined;
|
||||
size = typeof file.size === "number" ? file.size : undefined;
|
||||
} else {
|
||||
// v1 flat shape: url + filename + mediaType (camelCase from
|
||||
// protobuf JSON serialization of media_type).
|
||||
uri = v1Url;
|
||||
const v1Name = typeof raw.filename === "string" ? raw.filename : "";
|
||||
name = v1Name || basename(uri);
|
||||
mimeType = typeof raw.mediaType === "string" ? raw.mediaType : undefined;
|
||||
}
|
||||
|
||||
out.push({ name, uri, mimeType, size });
|
||||
}
|
||||
};
|
||||
try {
|
||||
|
||||
@ -844,26 +844,61 @@ def resolve_attachment_uri(uri: str) -> str | None:
|
||||
def extract_attached_files(message: Any) -> list[dict[str, str]]:
|
||||
"""Pull ``{name, mime_type, path}`` dicts out of an A2A message.
|
||||
|
||||
Handles the discriminated-union shape ``part.root.file`` that a2a-sdk
|
||||
produces via Pydantic RootModel, and the flatter ``part.file`` shape
|
||||
hand-built callers sometimes emit. Non-file parts and files with
|
||||
unresolvable URIs are skipped — the caller sees an empty list rather
|
||||
than a mix of valid and broken entries.
|
||||
Tolerates three Part shapes seen in the wild:
|
||||
|
||||
1. a2a-sdk v0 Pydantic RootModel — ``part.root.kind == 'file'`` with
|
||||
``part.root.file.{uri,name,mimeType}``.
|
||||
2. a2a-sdk v0 flatter shape — ``part.kind == 'file'`` with
|
||||
``part.file.{uri,name,mimeType}`` (some hand-built callers).
|
||||
3. a2a-sdk v1 protobuf — ``part.url`` non-empty with
|
||||
``part.filename`` + ``part.media_type``. The v1 ``Part`` proto
|
||||
has no ``kind`` field at all (the discriminator is now a oneof
|
||||
``content`` of {text, raw, url, data}). Without this branch a v1
|
||||
file part — which is what a v1 server constructs from any caller
|
||||
that JSON-encodes the v1 shape — silently parses to an empty
|
||||
Part on the v0→v1 transition because protobuf json_format with
|
||||
``ignore_unknown_fields=True`` drops the legacy ``kind`` and
|
||||
``file`` keys, surfacing as the user-visible
|
||||
"Error: message contained no text content" on image-only chats
|
||||
(2026-05-01 hongming incident).
|
||||
|
||||
Non-file parts and files with unresolvable URIs are skipped — the
|
||||
caller sees an empty list rather than a mix of valid and broken
|
||||
entries.
|
||||
"""
|
||||
if message is None:
|
||||
return []
|
||||
parts = getattr(message, "parts", None) or []
|
||||
out: list[dict[str, str]] = []
|
||||
for part in parts:
|
||||
uri = ""
|
||||
name = ""
|
||||
mime = ""
|
||||
|
||||
root = getattr(part, "root", part)
|
||||
if getattr(root, "kind", None) != "file":
|
||||
continue
|
||||
f = getattr(root, "file", None)
|
||||
if f is None:
|
||||
continue
|
||||
uri = getattr(f, "uri", "") or ""
|
||||
name = getattr(f, "name", "") or ""
|
||||
mime = getattr(f, "mimeType", None) or getattr(f, "mime_type", None) or ""
|
||||
if getattr(root, "kind", None) == "file":
|
||||
f = getattr(root, "file", None)
|
||||
if f is None:
|
||||
continue
|
||||
uri = getattr(f, "uri", "") or ""
|
||||
name = getattr(f, "name", "") or ""
|
||||
mime = getattr(f, "mimeType", None) or getattr(f, "mime_type", None) or ""
|
||||
else:
|
||||
# v1 protobuf Part has no `kind`; detect by a non-empty
|
||||
# `url` (the file/url-of-bytes oneof slot). Fall back to
|
||||
# `media_type` then `mimeType` for the camelCase Pydantic
|
||||
# variant some adapters still hand us.
|
||||
v1_url = getattr(part, "url", "") or ""
|
||||
if not v1_url:
|
||||
continue
|
||||
uri = v1_url
|
||||
name = getattr(part, "filename", "") or ""
|
||||
mime = (
|
||||
getattr(part, "media_type", None)
|
||||
or getattr(part, "mediaType", None)
|
||||
or ""
|
||||
)
|
||||
|
||||
path = resolve_attachment_uri(uri)
|
||||
if not path or not os.path.isfile(path):
|
||||
logger.warning("skipping attached file with unresolvable uri=%r", uri)
|
||||
|
||||
@ -767,6 +767,66 @@ def test_extract_attached_files_accepts_both_shapes(tmp_path, monkeypatch):
|
||||
assert {f["name"] for f in out} == {"a.txt", "b.txt"}
|
||||
|
||||
|
||||
def test_extract_attached_files_accepts_v1_protobuf_part(tmp_path, monkeypatch):
|
||||
"""a2a-sdk v1 protobuf ``Part`` has fields
|
||||
``[text, raw, url, data, metadata, filename, media_type]`` — no
|
||||
``kind`` field at all (the discriminator is now a oneof
|
||||
``content`` of {text, raw, url, data}). Without v1-shape tolerance,
|
||||
every file part on the v0→v1 transition silently parses to an
|
||||
empty Part and surfaces as the user-visible
|
||||
"Error: message contained no text content" on image-only chats
|
||||
(2026-05-01 hongming incident).
|
||||
|
||||
This pins the v1 detection: a non-empty ``url`` plus ``filename``
|
||||
+ ``media_type`` is treated as a file part regardless of the
|
||||
missing ``kind``. The conftest stub ``Part`` mirrors v1's flat
|
||||
field shape (kwargs become attributes) so extracting via getattr
|
||||
sees the same surface the real protobuf does."""
|
||||
from types import SimpleNamespace
|
||||
from executor_helpers import extract_attached_files
|
||||
|
||||
img = tmp_path / "screenshot.png"
|
||||
img.write_bytes(b"\x89PNG\r\n\x1a\n")
|
||||
monkeypatch.setattr("executor_helpers.WORKSPACE_MOUNT", str(tmp_path))
|
||||
|
||||
# v1 protobuf surface: flat Part with url/filename/media_type, no kind.
|
||||
v1_part = SimpleNamespace(
|
||||
url=f"workspace:{img}",
|
||||
filename="screenshot.png",
|
||||
media_type="image/png",
|
||||
)
|
||||
msg = SimpleNamespace(parts=[v1_part])
|
||||
out = extract_attached_files(msg)
|
||||
assert len(out) == 1
|
||||
assert out[0]["name"] == "screenshot.png"
|
||||
assert out[0]["mime_type"] == "image/png"
|
||||
assert out[0]["path"] == str(img)
|
||||
|
||||
|
||||
def test_extract_attached_files_empty_v1_part_returns_empty(tmp_path, monkeypatch):
|
||||
"""Documents the v0→v1 silent-drop failure mode this fix defends
|
||||
against. When canvas pre-fix sends ``{kind:"file", file:{...}}``
|
||||
and the a2a-sdk v1 protobuf parser receives it with
|
||||
``ignore_unknown_fields=True``, both legacy keys silently drop —
|
||||
the resulting Part has every field empty. The helper must NOT
|
||||
raise and must return ``[]`` — empty, not crashy.
|
||||
|
||||
The real fix is shipping the canvas v1 shape; this test pins the
|
||||
runtime's defense so a template stuck on an old wheel against a
|
||||
new canvas still fails closed (empty attachments + agent
|
||||
proceeds) rather than mid-turn."""
|
||||
from types import SimpleNamespace
|
||||
from executor_helpers import extract_attached_files
|
||||
|
||||
monkeypatch.setattr("executor_helpers.WORKSPACE_MOUNT", str(tmp_path))
|
||||
# Empty Part — no kind, no url, no filename, no media_type. This is
|
||||
# the all-empty proto state json_format leaves behind on the v0→v1
|
||||
# silent-drop. The helper must skip it without raising.
|
||||
empty_v1_part = SimpleNamespace()
|
||||
msg = SimpleNamespace(parts=[empty_v1_part])
|
||||
assert extract_attached_files(msg) == []
|
||||
|
||||
|
||||
def test_build_user_content_with_files_no_attachments_is_string():
|
||||
"""Zero attachments → plain string so models without multi-modal
|
||||
support (most non-vision LLMs) see the same payload shape they always
|
||||
|
||||
Loading…
Reference in New Issue
Block a user