From 166c7f77afef3560899b80fb44c142de7536a70c Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sat, 25 Apr 2026 08:28:55 -0700 Subject: [PATCH] feat(chat): stream per-tool progress into MyChat live feed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two halves of the same UX win โ€” the user wants to see what Claude is doing while a chat reply is in flight instead of staring at "0s" for minutes. Workspace side (claude_sdk_executor.py): - The executor's _run_query message loop already iterated the SDK stream for AssistantMessage.TextBlock content. Now also detects ToolUseBlock / ServerToolUseBlock entries (by class name, since the conftest stub doesn't define them) and fires-and-forgets a POST /workspaces/:id/activity row of type agent_log per tool use. - _summarize_tool_use maps the common tools (Read, Write, Edit, Bash, Glob, Grep, WebFetch, WebSearch, Task, TodoWrite) to a one-line summary with the file path / pattern / command, falling back to "๐Ÿ›  (โ€ฆ)" for anything else. Truncated at 200 chars. - Posts directly to /workspaces/:id/activity rather than going through a2a_tools.report_activity, which would also push a /registry/heartbeat current_task and double-log as a TASK_UPDATED line in the same chat feed. - All failures swallowed silently โ€” telemetry must not break the conversation. Canvas side (ChatTab.tsx): - The existing ACTIVITY_LOGGED handler streams a2a_send / a2a_receive / task_update events into a sliding-window activityLog state. Two issues fixed: 1. No `msg.workspace_id === workspaceId` filter โ€” a sibling workspace's a2a_send was leaking into the wrong chat panel as "โ†’ Delegating to X...". Added an early return. 2. No agent_log render branch. Added one that renders the summary verbatim (the workspace already prefixed its own emoji icon, so no double-icon). - Existing 8-line sliding window keeps the UI scoped; older progress lines naturally roll off as new ones arrive. Result: when DD is delegating to Visual Designer + reading config files + running Bash to lint, the spinner area shows: ๐Ÿ“„ Read /configs/system-prompt.md โšก Bash: pnpm test โ†’ Delegating to Visual Designer... โ† Visual Designer responded (47s) instead of bare "0s ยท Processing with Claude Code..." for minutes. 63 Python tests + 58 canvas chat tests pass; tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- canvas/src/components/tabs/ChatTab.tsx | 17 ++++- workspace/claude_sdk_executor.py | 94 ++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/canvas/src/components/tabs/ChatTab.tsx b/canvas/src/components/tabs/ChatTab.tsx index 9b6dd1cd..ce1f3bed 100644 --- a/canvas/src/components/tabs/ChatTab.tsx +++ b/canvas/src/components/tabs/ChatTab.tsx @@ -372,12 +372,21 @@ function MyChatPanel({ workspaceId, data }: Props) { try { const msg = JSON.parse(event.data); if (msg.event === "ACTIVITY_LOGGED") { + // Filter to events for THIS workspace. The platform's + // BroadcastOnly fires to every connected client, and + // without this guard a sibling workspace's a2a_send would + // surface as "โ†’ Delegating to X..." inside the wrong + // chat panel. (workspace_id on the WS envelope is the + // workspace whose activity_log row we just wrote.) + if (msg.workspace_id !== workspaceId) return; + const p = msg.payload || {}; const type = p.activity_type as string; const method = (p.method as string) || ""; const status = (p.status as string) || ""; const targetId = (p.target_id as string) || ""; const durationMs = p.duration_ms as number | undefined; + const summary = (p.summary as string) || ""; let line = ""; if (type === "a2a_receive" && method === "message/send") { @@ -408,8 +417,14 @@ function MyChatPanel({ workspaceId, data }: Props) { const targetName = resolveWorkspaceName(targetId); line = `โ†’ Delegating to ${targetName}...`; } else if (type === "task_update") { - const summary = (p.summary as string) || ""; if (summary) line = `โŸณ ${summary}`; + } else if (type === "agent_log") { + // Per-tool-use telemetry from claude_sdk_executor's + // _report_tool_use. The summary already carries an icon + // + human-readable args (๐Ÿ“„ Read /path, โšก Bash: โ€ฆ) + // so we render it verbatim. No icon prefix here โ€” the + // emoji at the start of summary is the visual marker. + if (summary) line = summary; } if (line) { diff --git a/workspace/claude_sdk_executor.py b/workspace/claude_sdk_executor.py index 1c65c346..d0b53b07 100644 --- a/workspace/claude_sdk_executor.py +++ b/workspace/claude_sdk_executor.py @@ -138,6 +138,92 @@ def _reset_sdk_wedge_for_test() -> None: _sdk_wedged_reason = None +# Per-tool-use summarizers. Reads the most-useful argument from each +# tool's input dict so the canvas progress feed shows +# `๐Ÿ›  Read /tmp/foo` instead of the bare tool name. Anything not in the +# table falls through to a generic "๐Ÿ›  (โ€ฆ)" line. Order keys by +# tool frequency so a future contributor can see the high-traffic +# tools first. +_TOOL_USE_SUMMARIZERS: "dict[str, callable[[dict], str]]" = { + "Read": lambda i: f"๐Ÿ“„ Read {i.get('file_path', '?')}", + "Write": lambda i: f"โœ๏ธ Write {i.get('file_path', '?')}", + "Edit": lambda i: f"โœ๏ธ Edit {i.get('file_path', '?')}", + "Bash": lambda i: f"โšก Bash: {(i.get('command') or '')[:80]}", + "Glob": lambda i: f"๐Ÿ” Glob {i.get('pattern', '?')}", + "Grep": lambda i: f"๐Ÿ” Grep {i.get('pattern', '?')}", + "WebFetch": lambda i: f"๐ŸŒ WebFetch {i.get('url', '?')}", + "WebSearch": lambda i: f"๐ŸŒ WebSearch {i.get('query', '?')}", + "Task": lambda i: f"๐Ÿค– Task: {(i.get('description') or '')[:60]}", + "TodoWrite": lambda _i: "๐Ÿ“ TodoWrite", +} + + +def _summarize_tool_use(tool_name: str, tool_input: dict) -> str: + summarizer = _TOOL_USE_SUMMARIZERS.get(tool_name) + if summarizer: + try: + return summarizer(tool_input or {})[:200] + except Exception: + pass + # Generic fallback. Truncated so a tool with a giant input dict + # doesn't write a 10kB activity row per call. + return f"๐Ÿ›  {tool_name}(โ€ฆ)"[:200] + + +async def _report_tool_use(block: Any) -> None: + """Fire-and-forget agent_log activity row per tool the SDK invoked, + so the canvas's MyChat live-progress feed can render each step + Claude is doing instead of staring at a single spinner. + + Posts directly to /workspaces/:id/activity rather than through + a2a_tools.report_activity โ€” that helper also pushes a current_task + heartbeat which would duplicate as a TASK_UPDATED line in the + chat feed. The workspace card's current_task is already set + once per turn by the executor's set_current_task(brief_summary) + call, so the per-tool telemetry stays a chat-only signal. + + Best-effort โ€” any failure (network blip, platform unreachable, the + block didn't have the attrs we expected) is swallowed silently. + The tool will still execute regardless; only the progress + telemetry is lost. Deliberately does NOT raise โ€” a malformed + block must not abort the message-stream iteration in + `_run_query`. + """ + try: + # Lazy imports to keep this helper non-essential โ€” the + # executor must still run when the workspace's network/auth + # plumbing isn't fully set up (e.g. unit tests). + import httpx + from a2a_client import PLATFORM_URL, WORKSPACE_ID + from platform_auth import auth_headers + except Exception: + return + try: + tool_name = getattr(block, "name", "") or "" + tool_input = getattr(block, "input", {}) or {} + if not tool_name: + return + summary = _summarize_tool_use(tool_name, tool_input) + # 5s budget โ€” long enough to absorb a single platform GC + # pause, short enough that a wedged platform doesn't slow + # the tool-iteration cadence beyond noticeable. + async with httpx.AsyncClient(timeout=5.0) as client: + await client.post( + f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/activity", + json={ + "activity_type": "agent_log", + "source_id": WORKSPACE_ID, + "summary": summary, + "status": "ok", + "method": tool_name, + }, + headers=auth_headers(), + ) + except Exception: + # Telemetry failures must not break the conversation. + return + + # Substring patterns that classify an exception as the specific # claude_agent_sdk init-timeout wedge (vs. a rate-limit, transient # subprocess crash, etc.). Match is case-insensitive on the formatted @@ -408,6 +494,14 @@ class ClaudeSDKExecutor(AgentExecutor): for block in message.content: if isinstance(block, sdk.TextBlock): assistant_chunks.append(block.text) + else: + # ToolUseBlock / ServerToolUseBlock are present + # on the real SDK but not on the conftest stub โ€” + # check by class name to avoid an isinstance() + # against a class the stub doesn't define. + cls = type(block).__name__ + if cls in ("ToolUseBlock", "ServerToolUseBlock"): + await _report_tool_use(block) elif isinstance(message, sdk.ResultMessage): sid = getattr(message, "session_id", None) if sid: