diff --git a/workspace/a2a_mcp_server.py b/workspace/a2a_mcp_server.py index cdfe2e2d..a681e5a5 100644 --- a/workspace/a2a_mcp_server.py +++ b/workspace/a2a_mcp_server.py @@ -113,17 +113,44 @@ TOOLS = [ }, { "name": "send_message_to_user", - "description": "Send a message directly to the user's canvas chat — pushed instantly via WebSocket. Use this to: (1) acknowledge a task immediately ('Got it, I'll start working on this'), (2) send interim progress updates while doing long work, (3) deliver follow-up results after delegation completes, (4) attach files (zip, pdf, csv, image) for the user to download. The message appears in the user's chat as if you're proactively reaching out.", + "description": "Send a message directly to the user's canvas chat — pushed instantly via WebSocket. Use this to: (1) acknowledge a task immediately ('Got it, I'll start working on this'), (2) send interim progress updates while doing long work, (3) deliver follow-up results after delegation completes, (4) attach files (zip, pdf, csv, image) for the user to download via the `attachments` field (NEVER paste file URLs in `message`). The message appears in the user's chat as if you're proactively reaching out.", "inputSchema": { "type": "object", "properties": { "message": { "type": "string", - "description": "The message to send to the user. Required even when sending attachments — set to a short caption like 'Here's the build:' or 'Done — see attached.'", + # The "no URLs in message text" rule is the single biggest + # cause of bad chat UX: agents drop catbox.moe / file:// + # / temporary upload-host links into the prose, the + # canvas renders them as plain markdown links the user + # can't preview, and SaaS deployments often can't even + # reach those external hosts. Every download MUST go + # through the structured `attachments` field below. + "description": ( + "Caption text for the chat bubble. Required even when sending " + "attachments — set to a short label like 'Here's the build:' " + "or 'Done — see attached.'\n\n" + "DO NOT paste file URLs, download links, or container paths in " + "this string. Files MUST go through the `attachments` field, " + "which renders as a clickable download chip and works on SaaS " + "deployments where external file-host URLs (catbox.moe, file://, " + "etc.) are unreachable from the user's browser." + ), }, "attachments": { "type": "array", - "description": "Optional list of absolute file paths inside this container to attach. Each renders as a clickable download chip in the user's chat. Use this whenever you'd otherwise paste a path in the message text — paths render as plain text the user can't click. Examples: ['/tmp/build-output.zip'] or ['/workspace/report.pdf', '/workspace/data.csv']. Files are uploaded through the platform's chat-uploads endpoint (25 MB per file cap).", + "description": ( + "REQUIRED for any file delivery. Pass absolute file paths inside " + "THIS container (e.g. ['/tmp/build.zip', '/workspace/report.pdf']) " + "— the platform uploads each file and returns a download chip " + "with the file's icon + name + size in the user's chat. The chip " + "works in SaaS deployments because the URL is platform-served, " + "not an external host.\n\n" + "USE THIS instead of: pasting URLs in `message`, base64-encoding " + "in the body, or telling the user to look at a path on disk. " + "If the file isn't already on disk, write it first (Bash, Write " + "tool, etc.) then pass its path here. 25 MB per file cap." + ), "items": {"type": "string"}, }, }, diff --git a/workspace/tests/test_a2a_mcp_server.py b/workspace/tests/test_a2a_mcp_server.py index a23df15c..8969abcb 100644 --- a/workspace/tests/test_a2a_mcp_server.py +++ b/workspace/tests/test_a2a_mcp_server.py @@ -74,3 +74,67 @@ async def test_handle_tool_call_missing_args_defaults(): # No workspace_id or task in arguments — defaults to "" result = await handle_tool_call("delegate_task", {}) assert result == "ok" + + +# --------------------------------------------------------------------------- +# Tool description steering — load-bearing prompts that train the LLM to +# use structured fields instead of pasting URLs in chat (task #118). +# +# Pin specific phrases so a future doc edit that softens or drops them +# fails this test. Production symptom of regression: agent pastes +# https://files.catbox.moe/... in the message body, canvas renders it as +# a plain text link the user can't click on a SaaS deployment where the +# external host is unreachable. +# --------------------------------------------------------------------------- + + +def _send_message_to_user_tool() -> dict: + from a2a_mcp_server import TOOLS + matches = [t for t in TOOLS if t["name"] == "send_message_to_user"] + assert len(matches) == 1, "send_message_to_user not found in TOOLS" + return matches[0] + + +def test_send_message_to_user_top_description_warns_against_pasting_urls(): + desc = _send_message_to_user_tool()["description"] + # Combined: "NEVER paste file URLs in `message`" inside the tool-level + # description. Without this the LLM frequently pastes URLs into the + # message body and the canvas renders a plain markdown link. + assert "NEVER paste file URLs" in desc, ( + "send_message_to_user top description must explicitly forbid pasting " + "file URLs in `message`. Pre-#118 the description omitted this rule " + "and agents routinely shipped catbox.moe / file:// links in chat." + ) + + +def test_message_param_description_says_DO_NOT_paste_URLs(): + desc = _send_message_to_user_tool()["inputSchema"]["properties"]["message"]["description"] + # Caps lock matters — claude-code/hermes both responded better to the + # all-caps version in informal testing during #118 prep. If a future + # edit lowercases it, we lose that prompt-engineering signal. + assert "DO NOT paste file URLs" in desc, ( + "`message` param description must include the all-caps DO NOT rule" + ) + # SaaS reachability is the WHY — operators have asked for that + # rationale to be explicit because external file hosts work in + # self-hosted dev but break under SaaS where the user's browser + # can't reach the agent's outbound network. + assert "SaaS deployments" in desc, ( + "`message` param description must explain the SaaS reachability " + "rationale, not just the rule" + ) + + +def test_attachments_param_description_emphasizes_REQUIRED(): + desc = _send_message_to_user_tool()["inputSchema"]["properties"]["attachments"]["description"] + assert "REQUIRED for any file delivery" in desc, ( + "`attachments` description must lead with REQUIRED so the LLM picks " + "this field instead of putting paths in `message`" + ) + # Spell out the alternatives the agent should NOT use, so the LLM has + # an explicit list of bad patterns to avoid (instead of relying on it + # to infer). + for forbidden in ("pasting URLs", "base64-encoding", "telling the user to look at a path"): + assert forbidden in desc, ( + f"`attachments` description must call out {forbidden!r} as a wrong alternative" + )