From ddf6720498ac1bc4def5716fbd1c2839e26c131c Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 28 Apr 2026 20:42:15 -0700 Subject: [PATCH 1/2] chore(registry): snapshot tests + CLI-block alignment for #2240 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups from the #2240 code review: 1. Snapshot tests for the rendered tool-instruction blocks. The structural tests added in #2240 guarantee tool NAMES are present; these new tests pin the SHAPE — bullet ordering, heading style, footer placement — so a future contributor who reorders fields in `_render_section` or rewrites a `when_to_use` paragraph sees the diff in CI rather than shipping a silently-different system prompt. Golden files live under workspace/tests/snapshots/. 2. CLI-block alignment test + corrected source-of-truth comment. `_A2A_INSTRUCTIONS_CLI` is a separate hand-maintained surface for ollama and other non-MCP runtimes — the registry can't auto-generate it because the CLI subprocess interface uses different command shapes (`peers` vs `list_peers`, etc.). A new `_CLI_A2A_COMMAND_KEYWORDS` mapping declares the registry-tool → CLI-keyword correspondence (or explicit `None` for tools not exposed via subprocess). Two tests enforce coverage: - every a2a tool in the registry is keyed in the mapping - every non-None subcommand keyword literally appears in `_A2A_INSTRUCTIONS_CLI` Caught one real gap: `send_message_to_user` is in the registry but has no CLI subcommand. Mapped to `None` with an explanatory comment. The "no other source of truth" claim in registry.py's docstring was wrong post-#2240 (the CLI block survived) — corrected to describe the two surfaces explicitly and point at the alignment tests as the gate. Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace/executor_helpers.py | 37 ++++++ workspace/platform_tools/registry.py | 31 +++-- .../tests/snapshots/a2a_instructions_cli.txt | 10 ++ .../tests/snapshots/a2a_instructions_mcp.txt | 28 +++++ .../tests/snapshots/hma_instructions.txt | 12 ++ workspace/tests/test_platform_tools.py | 119 ++++++++++++++++++ 6 files changed, 229 insertions(+), 8 deletions(-) create mode 100644 workspace/tests/snapshots/a2a_instructions_cli.txt create mode 100644 workspace/tests/snapshots/a2a_instructions_mcp.txt create mode 100644 workspace/tests/snapshots/hma_instructions.txt diff --git a/workspace/executor_helpers.py b/workspace/executor_helpers.py index 757061b1..19b5769f 100644 --- a/workspace/executor_helpers.py +++ b/workspace/executor_helpers.py @@ -298,6 +298,43 @@ You can delegate tasks to other workspaces using the a2a command: For quick questions, use sync delegate. For long tasks, use --async + status. Only delegate to peers listed by the peers command (access control enforced).""" +# Maps every a2a-section registry tool to the substring that MUST appear +# in `_A2A_INSTRUCTIONS_CLI` for CLI-runtime agents to discover it. The +# CLI subprocess interface uses different command-shape names than the +# MCP tool names (e.g. `peers` vs `list_peers`), so this is NOT a +# generated mapping — it's a hand-maintained alignment table. +# +# `None` declares "this MCP tool is intentionally NOT exposed via the +# CLI subprocess interface" — make the decision explicit so adding a +# new registry tool fails the alignment test until the mapping is +# updated. test_platform_tools.py asserts both directions: +# +# 1. every a2a tool in the registry is keyed here (no silent omission) +# 2. every non-None substring actually appears in `_A2A_INSTRUCTIONS_CLI` +# +# Why hand-maintained: the registry is the source of truth for +# MCP-capable runtimes, but the CLI subprocess interface in +# `molecule_runtime.a2a_cli` is a separate surface with its own command +# vocabulary. Auto-generating CLI command lines from JSON-schema specs +# would lose the human-readable invocation syntax (`delegate ` +# vs. `--workspace_id=... --task=...`). The mapping + test gives us +# alignment without forcing a uniform shape. +_CLI_A2A_COMMAND_KEYWORDS: dict[str, str | None] = { + "list_peers": "peers", + "delegate_task": "delegate ", # trailing space disambiguates from "--async" line + "delegate_task_async": "delegate --async", + "check_task_status": "status", + "get_workspace_info": "info", + # `send_message_to_user` is not exposed via the CLI subprocess + # interface today — it requires a structured `attachments` field + # that wouldn't survive a positional-arg shell invocation cleanly. + # CLI-runtime agents fall back to printing results to stdout (which + # the runtime forwards to the user) instead. If the a2a_cli ever + # grows a `say` or `message` subcommand, change `None` to that + # keyword and the alignment test will start passing. + "send_message_to_user": None, +} + def _render_section(heading: str, specs, footer: str = "") -> str: """Render a section: heading, per-tool bullet, per-tool when_to_use, footer.""" diff --git a/workspace/platform_tools/registry.py b/workspace/platform_tools/registry.py index 3a3558cc..d0f12cb0 100644 --- a/workspace/platform_tools/registry.py +++ b/workspace/platform_tools/registry.py @@ -13,20 +13,35 @@ runtime format: wrappers using `name=` from the spec; the wrapper body just calls spec.impl. - - executor_helpers.get_a2a_instructions() / get_hma_instructions() - GENERATE the system-prompt doc string from `TOOLS` — no - hand-maintained instruction text. + - executor_helpers.get_a2a_instructions(mcp=True) / + get_hma_instructions() GENERATE the system-prompt doc string from + `TOOLS` — no hand-maintained instruction text for MCP-capable + runtimes. -Adding a new tool: append a ToolSpec to `TOOLS` below. Every adapter -picks it up. Structural alignment tests (workspace/tests/test_platform_tools.py) -fail if any side drifts from the registry. + - executor_helpers._A2A_INSTRUCTIONS_CLI is a SEPARATE hand-maintained + block for CLI subprocess runtimes (ollama and any other adapter + that drives a2a via `python3 -m molecule_runtime.a2a_cli ...`). It + uses different command-shape names than the registry tool names + (e.g. `peers` vs `list_peers`), so it cannot be auto-generated + from JSON-schema specs without losing the readable invocation + syntax. Its tool-coverage alignment with the registry is enforced + by the `_CLI_A2A_COMMAND_KEYWORDS` mapping in executor_helpers.py + and the alignment tests in test_platform_tools.py — adding a new + a2a tool here will fail those tests until the mapping is updated. + +Adding a new tool: append a ToolSpec to `TOOLS` below, then update +`_CLI_A2A_COMMAND_KEYWORDS` in executor_helpers.py (set the value to +the CLI subcommand keyword, or to `None` if the tool isn't exposed via +the CLI subprocess interface). The structural alignment tests in +workspace/tests/test_platform_tools.py fail otherwise. Renaming a tool: change `name` here. Search workspace/ for the old literal in case any non-adapter consumer (tests, plugin code) hard-coded it; update those manually. The grep is the audit, the test is the gate. -Removing a tool: delete the entry. Adapters stop registering it -automatically; doc generators stop mentioning it. +Removing a tool: delete the entry AND its `_CLI_A2A_COMMAND_KEYWORDS` +key. Adapters stop registering it automatically; doc generators stop +mentioning it. """ from __future__ import annotations diff --git a/workspace/tests/snapshots/a2a_instructions_cli.txt b/workspace/tests/snapshots/a2a_instructions_cli.txt new file mode 100644 index 00000000..6264027c --- /dev/null +++ b/workspace/tests/snapshots/a2a_instructions_cli.txt @@ -0,0 +1,10 @@ +## Inter-Agent Communication +You can delegate tasks to other workspaces using the a2a command: + python3 -m molecule_runtime.a2a_cli peers # List available peers + python3 -m molecule_runtime.a2a_cli delegate # Sync: wait for response + python3 -m molecule_runtime.a2a_cli delegate --async # Async: return task_id + python3 -m molecule_runtime.a2a_cli status # Check async task + python3 -m molecule_runtime.a2a_cli info # Your workspace info + +For quick questions, use sync delegate. For long tasks, use --async + status. +Only delegate to peers listed by the peers command (access control enforced). \ No newline at end of file diff --git a/workspace/tests/snapshots/a2a_instructions_mcp.txt b/workspace/tests/snapshots/a2a_instructions_mcp.txt new file mode 100644 index 00000000..62a2b95d --- /dev/null +++ b/workspace/tests/snapshots/a2a_instructions_mcp.txt @@ -0,0 +1,28 @@ +## Inter-Agent Communication + +- **delegate_task**: Delegate a task to a peer workspace via A2A and WAIT for the response (synchronous). +- **delegate_task_async**: Send a task to a peer and return immediately with a task_id (non-blocking). +- **check_task_status**: Poll the status of a task started with delegate_task_async; returns result when done. +- **list_peers**: List the workspaces this agent can communicate with — name, ID, status, role for each. +- **get_workspace_info**: Get this workspace's own info — ID, name, role, tier, parent, status. +- **send_message_to_user**: Send a message directly to the user's canvas chat — pushed instantly via WebSocket. Use this to: (1) acknowledge a task immediately ('Got it, I'll start working on this'), (2) send interim progress updates while doing long work, (3) deliver follow-up results after delegation completes, (4) attach files (zip, pdf, csv, image) for the user to download via the `attachments` field (NEVER paste file URLs in `message`). The message appears in the user's chat as if you're proactively reaching out. + +### delegate_task +Use for QUICK questions and small sub-tasks where you can afford to wait inline. Returns the peer's response text directly. For longer-running work (research, multi-minute jobs) use delegate_task_async + check_task_status instead so you don't hold this workspace busy waiting. + +### delegate_task_async +Use for long-running work where you want to keep doing other things while the peer processes. Poll with check_task_status to retrieve the result. The platform's A2A queue handles delivery + retries; the peer works independently. + +### check_task_status +Statuses: pending/in_progress (peer still working — wait), queued (peer is busy with a prior task — DO NOT retry, the platform stitches the response when it finishes), completed (result available), failed (real error — fall back to a different peer or handle it yourself). + +### list_peers +Call this first when you need to delegate but don't know the target's ID. Access control is enforced — you only see siblings, parent, and direct children. + +### get_workspace_info +Use to introspect your own identity (e.g. before reporting back to the user, or to determine whether you're a tier-0 root that can write GLOBAL memory). + +### send_message_to_user +Use proactively across the lifecycle of a task — early to acknowledge, mid-flight to update, late to deliver. Never paste file URLs in the message body — always pass absolute paths in `attachments` so the platform serves them as download chips (works on SaaS where external file hosts are unreachable). + +Always use list_peers first to discover available workspace IDs. Access control is enforced — you can only reach siblings and parent/children. If a delegation returns a DELEGATION FAILED message, do NOT forward the raw error to the user. Instead: (1) try a different peer, (2) handle the task yourself, or (3) tell the user which peer is unavailable and provide your own best answer. diff --git a/workspace/tests/snapshots/hma_instructions.txt b/workspace/tests/snapshots/hma_instructions.txt new file mode 100644 index 00000000..8aecc814 --- /dev/null +++ b/workspace/tests/snapshots/hma_instructions.txt @@ -0,0 +1,12 @@ +## Hierarchical Memory (HMA) + +- **commit_memory**: Save a fact to persistent memory; survives across sessions and restarts. +- **recall_memory**: Search persistent memory; returns matching LOCAL + TEAM + GLOBAL rows. + +### commit_memory +Scopes: LOCAL (private to you, default), TEAM (shared with parent + siblings), GLOBAL (entire org — only tier-0 root workspaces can write). Commit decisions, learned facts, and completed-task summaries so future sessions and teammates can recall them. + +### recall_memory +Call at the start of new work and when picking up something you may have done before. Empty query returns ALL accessible memories — cheap and avoids missing rows that don't match a narrow keyword. Memory is automatically recalled at session start; use this to refresh mid-session. + +Memory is automatically recalled at the start of each new session. Use commit_memory proactively during work so future sessions and teammates can recall what you learned. diff --git a/workspace/tests/test_platform_tools.py b/workspace/tests/test_platform_tools.py index 6c375f0f..13a71acf 100644 --- a/workspace/tests/test_platform_tools.py +++ b/workspace/tests/test_platform_tools.py @@ -121,3 +121,122 @@ def test_old_pre_rename_names_not_present_in_docs(): f"pre-rename name {stale!r} leaked into docs — registry " f"is the source of truth, not the doc generator." ) + + +# --------------------------------------------------------------------------- +# Snapshot / golden-file tests +# +# `_render_section` produces the LLM-visible system-prompt block. The +# structural tests above guarantee tool NAMES are present; these tests +# pin the SHAPE — bullet ordering, heading style, footer placement — +# so a future contributor who reorders fields in `_render_section` or +# rewrites a `when_to_use` paragraph sees the diff in CI. +# +# To regenerate after an intentional registry edit: +# cd workspace && WORKSPACE_ID=test-snapshot PLATFORM_URL=http://localhost \ +# python3 -c "from executor_helpers import get_a2a_instructions, get_hma_instructions; \ +# open('tests/snapshots/a2a_instructions_mcp.txt','w').write(get_a2a_instructions(mcp=True)); \ +# open('tests/snapshots/a2a_instructions_cli.txt','w').write(get_a2a_instructions(mcp=False)); \ +# open('tests/snapshots/hma_instructions.txt','w').write(get_hma_instructions())" +# --------------------------------------------------------------------------- + +from pathlib import Path + +_SNAPSHOTS = Path(__file__).parent / "snapshots" + + +def _read_snapshot(name: str) -> str: + return (_SNAPSHOTS / name).read_text(encoding="utf-8") + + +def test_a2a_mcp_instructions_match_snapshot(): + """Pin the rendered MCP-variant A2A doc string against the golden file.""" + from executor_helpers import get_a2a_instructions + + actual = get_a2a_instructions(mcp=True) + expected = _read_snapshot("a2a_instructions_mcp.txt") + assert actual == expected, ( + "get_a2a_instructions(mcp=True) drifted from snapshot. If the change " + "is intentional, regenerate with the command in the test-file header." + ) + + +def test_a2a_cli_instructions_match_snapshot(): + """Pin the rendered CLI-variant A2A doc string against the golden file.""" + from executor_helpers import get_a2a_instructions + + actual = get_a2a_instructions(mcp=False) + expected = _read_snapshot("a2a_instructions_cli.txt") + assert actual == expected, ( + "get_a2a_instructions(mcp=False) drifted from snapshot. If the change " + "is intentional, regenerate with the command in the test-file header." + ) + + +def test_hma_instructions_match_snapshot(): + """Pin the rendered HMA persistent-memory doc string against the golden file.""" + from executor_helpers import get_hma_instructions + + actual = get_hma_instructions() + expected = _read_snapshot("hma_instructions.txt") + assert actual == expected, ( + "get_hma_instructions() drifted from snapshot. If the change is " + "intentional, regenerate with the command in the test-file header." + ) + + +# --------------------------------------------------------------------------- +# CLI-block alignment tests +# +# Registry is the source of truth for MCP-capable runtimes; the CLI +# subprocess block (`_A2A_INSTRUCTIONS_CLI`) is a SEPARATE hand-maintained +# surface for ollama and other non-MCP adapters. The two diverged +# silently in the past — `send_message_to_user` was added to the +# registry but the CLI block was never updated. These tests close that +# gap by requiring a deliberate decision (subcommand keyword OR +# explicit `None`) for every a2a tool. +# --------------------------------------------------------------------------- + + +def test_cli_keyword_mapping_covers_every_a2a_tool(): + """Every a2a-section registry tool must have an entry in + `_CLI_A2A_COMMAND_KEYWORDS` — either a subcommand keyword or an + explicit `None`. Adding a new a2a tool without updating the + mapping fails this test, forcing the contributor to decide + whether the CLI subprocess interface should expose it. + """ + from executor_helpers import _CLI_A2A_COMMAND_KEYWORDS + + a2a_names = {t.name for t in a2a_tools()} + keyed_names = set(_CLI_A2A_COMMAND_KEYWORDS.keys()) + + missing = a2a_names - keyed_names + extra = keyed_names - a2a_names + assert not missing, ( + f"a2a tools missing from _CLI_A2A_COMMAND_KEYWORDS: {missing}. " + f"Add a key for each — set value to the CLI subcommand keyword " + f"or None if the tool isn't exposed via the subprocess interface." + ) + assert not extra, ( + f"_CLI_A2A_COMMAND_KEYWORDS has keys for tools no longer in the " + f"registry: {extra}. Remove them." + ) + + +def test_cli_keyword_substrings_appear_in_cli_block(): + """Every non-None subcommand keyword in `_CLI_A2A_COMMAND_KEYWORDS` + must literally appear in `_A2A_INSTRUCTIONS_CLI`. If a CLI + subcommand is mapped here but missing from the doc block, agents + on CLI-only runtimes don't see the invocation syntax. + """ + from executor_helpers import _A2A_INSTRUCTIONS_CLI, _CLI_A2A_COMMAND_KEYWORDS + + for tool_name, keyword in _CLI_A2A_COMMAND_KEYWORDS.items(): + if keyword is None: + continue + assert keyword in _A2A_INSTRUCTIONS_CLI, ( + f"_CLI_A2A_COMMAND_KEYWORDS[{tool_name!r}] = {keyword!r} but " + f"that substring is missing from _A2A_INSTRUCTIONS_CLI. Either " + f"add the subcommand to the CLI doc block or change the " + f"mapping value to None." + ) From 9bc3d6e352758c1fb73b8f5eb35b4c588ec63abc Mon Sep 17 00:00:00 2001 From: hongmingwang-moleculeai Date: Tue, 28 Apr 2026 20:45:53 -0700 Subject: [PATCH 2/2] Potential fix for pull request finding 'Unused global variable' Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com> --- workspace/executor_helpers.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/workspace/executor_helpers.py b/workspace/executor_helpers.py index 19b5769f..ad98ab33 100644 --- a/workspace/executor_helpers.py +++ b/workspace/executor_helpers.py @@ -336,6 +336,24 @@ _CLI_A2A_COMMAND_KEYWORDS: dict[str, str | None] = { } +def _validate_cli_a2a_command_keywords() -> None: + """Keep CLI instruction text aligned with command keyword mapping.""" + missing = [ + (tool_name, keyword) + for tool_name, keyword in _CLI_A2A_COMMAND_KEYWORDS.items() + if keyword is not None and keyword not in _A2A_INSTRUCTIONS_CLI + ] + if missing: + details = ", ".join(f"{tool_name}={keyword!r}" for tool_name, keyword in missing) + raise ValueError( + "CLI A2A command mapping is out of sync with _A2A_INSTRUCTIONS_CLI: " + f"{details}" + ) + + +_validate_cli_a2a_command_keywords() + + def _render_section(heading: str, specs, footer: str = "") -> str: """Render a section: heading, per-tool bullet, per-tool when_to_use, footer.""" parts = [heading, ""]