From 166c7f77afef3560899b80fb44c142de7536a70c Mon Sep 17 00:00:00 2001
From: Hongming Wang <hongmingwangrabbit@gmail.com>
Date: Sat, 25 Apr 2026 08:28:55 -0700
Subject: [PATCH] feat(chat): stream per-tool progress into MyChat live feed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two halves of the same UX win — the user wants to see what Claude is
doing while a chat reply is in flight instead of staring at "0s" for
minutes.

Workspace side (claude_sdk_executor.py):
  - The executor's _run_query message loop already iterated the SDK
    stream for AssistantMessage.TextBlock content. Now also detects
    ToolUseBlock / ServerToolUseBlock entries (by class name, since
    the conftest stub doesn't define them) and fires-and-forgets a
    POST /workspaces/:id/activity row of type agent_log per tool use.
  - _summarize_tool_use maps the common tools (Read, Write, Edit,
    Bash, Glob, Grep, WebFetch, WebSearch, Task, TodoWrite) to a
    one-line summary with the file path / pattern / command, falling
    back to "🛠 <tool>(…)" for anything else. Truncated at 200 chars.
  - Posts directly to /workspaces/:id/activity rather than going
    through a2a_tools.report_activity, which would also push a
    /registry/heartbeat current_task and double-log as a TASK_UPDATED
    line in the same chat feed.
  - All failures swallowed silently — telemetry must not break
    the conversation.

Canvas side (ChatTab.tsx):
  - The existing ACTIVITY_LOGGED handler streams a2a_send /
    a2a_receive / task_update events into a sliding-window
    activityLog state. Two issues fixed:
      1. No `msg.workspace_id === workspaceId` filter — a sibling
         workspace's a2a_send was leaking into the wrong chat
         panel as "→ Delegating to X...". Added an early return.
      2. No agent_log render branch. Added one that renders the
         summary verbatim (the workspace already prefixed its
         own emoji icon, so no double-icon).
  - Existing 8-line sliding window keeps the UI scoped; older
    progress lines naturally roll off as new ones arrive.

Result: when DD is delegating to Visual Designer + reading
config files + running Bash to lint, the spinner area shows:
  📄 Read /configs/system-prompt.md
  ⚡ Bash: pnpm test
  → Delegating to Visual Designer...
  ← Visual Designer responded (47s)

instead of bare "0s · Processing with Claude Code..." for minutes.

63 Python tests + 58 canvas chat tests pass; tsc clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 canvas/src/components/tabs/ChatTab.tsx | 17 ++++-
 workspace/claude_sdk_executor.py       | 94 ++++++++++++++++++++++++++
 2 files changed, 110 insertions(+), 1 deletion(-)
diff --git a/canvas/src/components/tabs/ChatTab.tsx b/canvas/src/components/tabs/ChatTab.tsx
index 9b6dd1cd..ce1f3bed 100644
--- a/canvas/src/components/tabs/ChatTab.tsx
+++ b/canvas/src/components/tabs/ChatTab.tsx
@@ -372,12 +372,21 @@ function MyChatPanel({ workspaceId, data }: Props) {
       try {
         const msg = JSON.parse(event.data);
         if (msg.event === "ACTIVITY_LOGGED") {
+          // Filter to events for THIS workspace. The platform's
+          // BroadcastOnly fires to every connected client, and
+          // without this guard a sibling workspace's a2a_send would
+          // surface as "→ Delegating to X..." inside the wrong
+          // chat panel. (workspace_id on the WS envelope is the
+          // workspace whose activity_log row we just wrote.)
+          if (msg.workspace_id !== workspaceId) return;
+
           const p = msg.payload || {};
           const type = p.activity_type as string;
           const method = (p.method as string) || "";
           const status = (p.status as string) || "";
           const targetId = (p.target_id as string) || "";
           const durationMs = p.duration_ms as number | undefined;
+          const summary = (p.summary as string) || "";
 
           let line = "";
           if (type === "a2a_receive" && method === "message/send") {
@@ -408,8 +417,14 @@ function MyChatPanel({ workspaceId, data }: Props) {
             const targetName = resolveWorkspaceName(targetId);
             line = `→ Delegating to ${targetName}...`;
           } else if (type === "task_update") {
-            const summary = (p.summary as string) || "";
             if (summary) line = `⟳ ${summary}`;
+          } else if (type === "agent_log") {
+            // Per-tool-use telemetry from claude_sdk_executor's
+            // _report_tool_use. The summary already carries an icon
+            // + human-readable args (📄 Read /path, ⚡ Bash: …)
+            // so we render it verbatim. No icon prefix here — the
+            // emoji at the start of summary is the visual marker.
+            if (summary) line = summary;
           }
 
           if (line) {
diff --git a/workspace/claude_sdk_executor.py b/workspace/claude_sdk_executor.py
index 1c65c346..d0b53b07 100644
--- a/workspace/claude_sdk_executor.py
+++ b/workspace/claude_sdk_executor.py
@@ -138,6 +138,92 @@ def _reset_sdk_wedge_for_test() -> None:
     _sdk_wedged_reason = None
 
 
+# Per-tool-use summarizers. Reads the most-useful argument from each
+# tool's input dict so the canvas progress feed shows
+# `🛠 Read /tmp/foo` instead of the bare tool name. Anything not in the
+# table falls through to a generic "🛠 <tool>(…)" line. Order keys by
+# tool frequency so a future contributor can see the high-traffic
+# tools first.
+_TOOL_USE_SUMMARIZERS: "dict[str, callable[[dict], str]]" = {
+    "Read":  lambda i: f"📄 Read {i.get('file_path', '?')}",
+    "Write": lambda i: f"✍️  Write {i.get('file_path', '?')}",
+    "Edit":  lambda i: f"✏️  Edit {i.get('file_path', '?')}",
+    "Bash":  lambda i: f"⚡ Bash: {(i.get('command') or '')[:80]}",
+    "Glob":  lambda i: f"🔍 Glob {i.get('pattern', '?')}",
+    "Grep":  lambda i: f"🔍 Grep {i.get('pattern', '?')}",
+    "WebFetch": lambda i: f"🌐 WebFetch {i.get('url', '?')}",
+    "WebSearch": lambda i: f"🌐 WebSearch {i.get('query', '?')}",
+    "Task":  lambda i: f"🤖 Task: {(i.get('description') or '')[:60]}",
+    "TodoWrite": lambda _i: "📝 TodoWrite",
+}
+
+
+def _summarize_tool_use(tool_name: str, tool_input: dict) -> str:
+    summarizer = _TOOL_USE_SUMMARIZERS.get(tool_name)
+    if summarizer:
+        try:
+            return summarizer(tool_input or {})[:200]
+        except Exception:
+            pass
+    # Generic fallback. Truncated so a tool with a giant input dict
+    # doesn't write a 10kB activity row per call.
+    return f"🛠 {tool_name}(…)"[:200]
+
+
+async def _report_tool_use(block: Any) -> None:
+    """Fire-and-forget agent_log activity row per tool the SDK invoked,
+    so the canvas's MyChat live-progress feed can render each step
+    Claude is doing instead of staring at a single spinner.
+
+    Posts directly to /workspaces/:id/activity rather than through
+    a2a_tools.report_activity — that helper also pushes a current_task
+    heartbeat which would duplicate as a TASK_UPDATED line in the
+    chat feed. The workspace card's current_task is already set
+    once per turn by the executor's set_current_task(brief_summary)
+    call, so the per-tool telemetry stays a chat-only signal.
+
+    Best-effort — any failure (network blip, platform unreachable, the
+    block didn't have the attrs we expected) is swallowed silently.
+    The tool will still execute regardless; only the progress
+    telemetry is lost. Deliberately does NOT raise — a malformed
+    block must not abort the message-stream iteration in
+    `_run_query`.
+    """
+    try:
+        # Lazy imports to keep this helper non-essential — the
+        # executor must still run when the workspace's network/auth
+        # plumbing isn't fully set up (e.g. unit tests).
+        import httpx
+        from a2a_client import PLATFORM_URL, WORKSPACE_ID
+        from platform_auth import auth_headers
+    except Exception:
+        return
+    try:
+        tool_name = getattr(block, "name", "") or ""
+        tool_input = getattr(block, "input", {}) or {}
+        if not tool_name:
+            return
+        summary = _summarize_tool_use(tool_name, tool_input)
+        # 5s budget — long enough to absorb a single platform GC
+        # pause, short enough that a wedged platform doesn't slow
+        # the tool-iteration cadence beyond noticeable.
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            await client.post(
+                f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/activity",
+                json={
+                    "activity_type": "agent_log",
+                    "source_id": WORKSPACE_ID,
+                    "summary": summary,
+                    "status": "ok",
+                    "method": tool_name,
+                },
+                headers=auth_headers(),
+            )
+    except Exception:
+        # Telemetry failures must not break the conversation.
+        return
+
+
 # Substring patterns that classify an exception as the specific
 # claude_agent_sdk init-timeout wedge (vs. a rate-limit, transient
 # subprocess crash, etc.). Match is case-insensitive on the formatted
@@ -408,6 +494,14 @@ class ClaudeSDKExecutor(AgentExecutor):
                     for block in message.content:
                         if isinstance(block, sdk.TextBlock):
                             assistant_chunks.append(block.text)
+                        else:
+                            # ToolUseBlock / ServerToolUseBlock are present
+                            # on the real SDK but not on the conftest stub —
+                            # check by class name to avoid an isinstance()
+                            # against a class the stub doesn't define.
+                            cls = type(block).__name__
+                            if cls in ("ToolUseBlock", "ServerToolUseBlock"):
+                                await _report_tool_use(block)
                 elif isinstance(message, sdk.ResultMessage):
                     sid = getattr(message, "session_id", None)
                     if sid: