Establishes workspace/platform_tools/registry.py as THE place tool
naming and docs live. Every consumer reads from it; nothing duplicates
the source. Closes the architectural gap behind the doc/tool drift
discussion 2026-04-28 — adding hundreds of future runtime SDK adapters
should not require touching tool names anywhere except the registry.
What the registry owns
ToolSpec dataclass with: name, short (one-line description), when_to_use
(multi-paragraph agent-facing usage guidance), input_schema (JSON Schema),
impl (the actual coroutine in a2a_tools.py), section ('a2a' | 'memory').
TOOLS list with 8 entries — delegate_task, delegate_task_async,
check_task_status, list_peers, get_workspace_info, send_message_to_user,
commit_memory, recall_memory.
What now reads from the registry
- workspace/a2a_mcp_server.py
The hardcoded TOOLS list (167 lines of hand-maintained dicts) is
gone. Replaced with a 6-line list comprehension over the registry.
MCP description = spec.short. inputSchema = spec.input_schema.
- workspace/executor_helpers.py
get_a2a_instructions(mcp=True) and get_hma_instructions() now
GENERATE the agent-facing system-prompt text from the registry.
Heading + per-tool bullet (spec.short) + per-tool when_to_use +
a section-specific footer. No more hand-maintained instruction
blocks that drift from reality.
- workspace/builtin_tools/delegation.py
Renamed delegate_to_workspace -> delegate_task_async to match
registry. check_delegation_status -> check_task_status. Added
sync delegate_task @tool wrapping a2a_tools.tool_delegate_task
(was missing for LangChain runtimes — CP review Issue 3).
- workspace/builtin_tools/memory.py
Renamed search_memory -> recall_memory to match registry.
- workspace/adapter_base.py, workspace/main.py
Bundle all 7 core tools (was 6) into all_tools / base_tools.
- workspace/coordinator.py, shared_runtime.py, policies/routing.py
Updated system-prompt-text references to use the registry names.
Structural alignment tests
workspace/tests/test_platform_tools.py — 9 tests pin every
registry-to-adapter mapping:
- registry names are unique
- a2a + memory partition is complete (no orphans)
- by_name lookup works
- MCP server registers exactly the registry's tool set
- MCP description equals registry.short for every tool
- MCP inputSchema equals registry.input_schema for every tool
- get_a2a_instructions text contains every a2a tool name
- get_hma_instructions text contains every memory tool name
- pre-rename names (delegate_to_workspace, search_memory,
check_delegation_status) cannot leak back
Adding a future tool means adding one ToolSpec; the test failure
list tells the author exactly which adapter to update.
Adapter pattern for future SDK support
When (e.g.) AutoGen or Pydantic AI gets adapters, the only work
needed for tool surfacing is "wrap registry.TOOLS in your SDK's
tool format." Names, descriptions, schemas, impl come from the
registry — adapter author writes zero strings.
Why this needed to ship now
PR #2237 (already in staging) injected MCP-world docs as the
default system-prompt content. Without the registry, those docs
said "delegate_task" while LangChain runtimes only had
"delegate_to_workspace" — workers see docs for tools that don't
exist (CP review Issue 1+3). PR #2239 was a tactical rename;
this PR is the structural fix that prevents the same class of
drift from recurring as new adapters ship.
PR #2239 was closed in favor of this — same renames, plus the
registry, plus structural tests. Single coherent change.
Tests: 1232 pass, 2 xfailed (pre-existing). 9 new in
test_platform_tools.py; 4 alignment tests in test_prompt.py from
#2237 still pass; original test_executor_helpers tests adapted to
the registry-driven world.
Refs: CP review Issues 1, 2, 3, 5; project memory
project_runtime_native_pluggable.md (platform owns A2A);
project memory feedback_doc_tool_alignment.md (this is the structural
fix for the tactical lesson).
472 lines
15 KiB
Python
472 lines
15 KiB
Python
"""Tests for prompt.py — system prompt construction."""
|
|
|
|
from pathlib import Path
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from skill_loader.loader import LoadedSkill, SkillMetadata
|
|
from prompt import build_system_prompt, get_peer_capabilities
|
|
|
|
|
|
def test_build_system_prompt_with_prompt_files(tmp_path):
|
|
"""Prompt files are loaded in order and concatenated."""
|
|
(tmp_path / "SOUL.md").write_text("You are a helpful agent.")
|
|
(tmp_path / "TOOLS.md").write_text("You have these tools.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
prompt_files=["SOUL.md", "TOOLS.md"],
|
|
)
|
|
|
|
assert "You are a helpful agent." in result
|
|
assert "You have these tools." in result
|
|
# SOUL.md should appear before TOOLS.md
|
|
assert result.index("helpful agent") < result.index("these tools")
|
|
|
|
|
|
def test_build_system_prompt_default_fallback(tmp_path):
|
|
"""Without prompt_files, falls back to system-prompt.md."""
|
|
(tmp_path / "system-prompt.md").write_text("Default system prompt content.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
)
|
|
|
|
assert "Default system prompt content." in result
|
|
|
|
|
|
def test_build_system_prompt_auto_includes_memory_snapshot(tmp_path):
|
|
"""Memory snapshot files are auto-included when present."""
|
|
(tmp_path / "system-prompt.md").write_text("Base prompt.")
|
|
(tmp_path / "MEMORY.md").write_text("Known workspace facts.")
|
|
(tmp_path / "USER.md").write_text("User prefers concise answers.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
)
|
|
|
|
assert "Base prompt." in result
|
|
assert "Known workspace facts." in result
|
|
assert "User prefers concise answers." in result
|
|
assert result.index("Base prompt.") < result.index("Known workspace facts.")
|
|
assert result.index("Known workspace facts.") < result.index("User prefers concise answers.")
|
|
|
|
|
|
def test_build_system_prompt_deduplicates_explicit_memory_files(tmp_path):
|
|
"""Explicit snapshot files are not loaded twice."""
|
|
(tmp_path / "system-prompt.md").write_text("Base prompt.")
|
|
(tmp_path / "MEMORY.md").write_text("Known workspace facts.")
|
|
(tmp_path / "USER.md").write_text("User prefers concise answers.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
prompt_files=["system-prompt.md", "MEMORY.md"],
|
|
)
|
|
|
|
assert result.count("Known workspace facts.") == 1
|
|
assert result.count("User prefers concise answers.") == 1
|
|
|
|
|
|
def test_build_system_prompt_missing_file(tmp_path):
|
|
"""Missing prompt files are skipped with a warning (no crash)."""
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
prompt_files=["nonexistent.md"],
|
|
)
|
|
|
|
# Should still contain the delegation failure section
|
|
assert "Handling delegation failures" in result
|
|
|
|
|
|
def test_plugin_rules_injection(tmp_path):
|
|
"""Plugin rules are injected under '## Platform Rules'."""
|
|
(tmp_path / "system-prompt.md").write_text("Base prompt.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
plugin_rules=["Always be concise.", "Never reveal secrets."],
|
|
)
|
|
|
|
assert "## Platform Rules" in result
|
|
assert "Always be concise." in result
|
|
assert "Never reveal secrets." in result
|
|
|
|
|
|
def test_plugin_prompts_injection(tmp_path):
|
|
"""Plugin prompts are injected under '## Platform Guidelines'."""
|
|
(tmp_path / "system-prompt.md").write_text("Base prompt.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
plugin_prompts=["Use markdown formatting."],
|
|
)
|
|
|
|
assert "## Platform Guidelines" in result
|
|
assert "Use markdown formatting." in result
|
|
|
|
|
|
def test_skills_listing(tmp_path):
|
|
"""Loaded skills appear with name, description, and instructions."""
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
skills = [
|
|
LoadedSkill(
|
|
metadata=SkillMetadata(
|
|
id="seo",
|
|
name="SEO Optimization",
|
|
description="Optimize content for search engines.",
|
|
tags=["seo"],
|
|
examples=["Optimize this blog post"],
|
|
),
|
|
instructions="1. Analyze keywords\n2. Optimize headings",
|
|
),
|
|
LoadedSkill(
|
|
metadata=SkillMetadata(
|
|
id="writing",
|
|
name="Creative Writing",
|
|
description="",
|
|
),
|
|
instructions="Write creatively.",
|
|
),
|
|
]
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=skills,
|
|
peers=[],
|
|
)
|
|
|
|
assert "## Your Skills" in result
|
|
assert "### SEO Optimization" in result
|
|
assert "Optimize content for search engines." in result
|
|
assert "1. Analyze keywords" in result
|
|
assert "### Creative Writing" in result
|
|
assert "Write creatively." in result
|
|
|
|
|
|
def test_peer_capabilities_format(tmp_path):
|
|
"""Peers appear with name, id, status, and skills."""
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
peers = [
|
|
{
|
|
"id": "peer-1",
|
|
"name": "Echo Agent",
|
|
"status": "online",
|
|
"agent_card": {
|
|
"name": "Echo Agent",
|
|
"skills": [
|
|
{"name": "echo", "id": "echo"},
|
|
{"name": "repeat", "id": "repeat"},
|
|
],
|
|
},
|
|
},
|
|
{
|
|
"id": "peer-2",
|
|
"name": "Silent Agent",
|
|
"status": "offline",
|
|
"agent_card": None,
|
|
},
|
|
]
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=peers,
|
|
)
|
|
|
|
assert "## Your Peers" in result
|
|
assert "**Echo Agent** (id: `peer-1`, status: online)" in result
|
|
assert "Skills: echo, repeat" in result
|
|
assert "delegate_task_async" in result
|
|
# peer-2 has no agent_card but DOES have a DB name + status — must
|
|
# still render so coordinators can delegate to freshly-created peers
|
|
# whose A2A discovery hasn't populated a card yet (regression of the
|
|
# 2026-04-27 Design Director discovery bug).
|
|
assert "**Silent Agent** (id: `peer-2`, status: offline)" in result
|
|
|
|
|
|
def test_peer_with_json_string_agent_card(tmp_path):
|
|
"""agent_card as a JSON string is parsed correctly."""
|
|
import json
|
|
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
peers = [
|
|
{
|
|
"id": "peer-3",
|
|
"name": "JSON Peer",
|
|
"status": "online",
|
|
"agent_card": json.dumps({
|
|
"name": "JSON Peer",
|
|
"skills": [{"name": "parse"}],
|
|
}),
|
|
},
|
|
]
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=peers,
|
|
)
|
|
|
|
assert "**JSON Peer** (id: `peer-3`, status: online)" in result
|
|
assert "Skills: parse" in result
|
|
|
|
|
|
def test_delegation_failure_section_always_present(tmp_path):
|
|
"""The delegation failure handling section is always appended."""
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
)
|
|
|
|
assert "## Handling delegation failures" in result
|
|
assert "Retry transient failures" in result
|
|
|
|
|
|
def test_parent_context_injection(tmp_path):
|
|
"""parent_context creates a '## Parent Context' section with file contents."""
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
parent_context = [
|
|
{"path": "guidelines.md", "content": "Always use type hints."},
|
|
{"path": "architecture.md", "content": "We use hexagonal architecture."},
|
|
]
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
parent_context=parent_context,
|
|
)
|
|
|
|
assert "## Parent Context" in result
|
|
assert "shared by your parent workspace" in result
|
|
assert "### guidelines.md" in result
|
|
assert "Always use type hints." in result
|
|
assert "### architecture.md" in result
|
|
assert "We use hexagonal architecture." in result
|
|
|
|
|
|
def test_parent_context_empty(tmp_path):
|
|
"""No '## Parent Context' section when parent_context is an empty list."""
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
parent_context=[],
|
|
)
|
|
|
|
assert "## Parent Context" not in result
|
|
|
|
|
|
def test_parent_context_none(tmp_path):
|
|
"""No '## Parent Context' section when parent_context is None."""
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
parent_context=None,
|
|
)
|
|
|
|
assert "## Parent Context" not in result
|
|
|
|
|
|
def test_parent_context_skips_empty_content(tmp_path):
|
|
"""Files with empty/whitespace-only content are skipped."""
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
parent_context = [
|
|
{"path": "empty.md", "content": ""},
|
|
{"path": "whitespace.md", "content": " \n "},
|
|
{"path": "real.md", "content": "Real content here."},
|
|
]
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
parent_context=parent_context,
|
|
)
|
|
|
|
assert "## Parent Context" in result
|
|
assert "### empty.md" not in result
|
|
assert "### whitespace.md" not in result
|
|
assert "### real.md" in result
|
|
assert "Real content here." in result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# get_peer_capabilities() tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_peer_capabilities_success():
|
|
"""get_peer_capabilities() returns the list from a 200 response."""
|
|
peers = [
|
|
{"id": "peer-1", "name": "Alpha"},
|
|
{"id": "peer-2", "name": "Beta"},
|
|
]
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.status_code = 200
|
|
mock_resp.json.return_value = peers
|
|
|
|
mock_client = AsyncMock()
|
|
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
mock_client.__aexit__ = AsyncMock(return_value=False)
|
|
mock_client.get = AsyncMock(return_value=mock_resp)
|
|
|
|
# httpx is imported lazily inside get_peer_capabilities(), so patch at module level
|
|
with patch("httpx.AsyncClient", return_value=mock_client):
|
|
result = await get_peer_capabilities("http://platform:8080", "ws-abc")
|
|
|
|
assert result == peers
|
|
mock_client.get.assert_called_once_with(
|
|
"http://platform:8080/registry/ws-abc/peers",
|
|
headers={"X-Workspace-ID": "ws-abc"},
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_peer_capabilities_non_200():
|
|
"""get_peer_capabilities() returns [] when response status is not 200."""
|
|
mock_resp = MagicMock()
|
|
mock_resp.status_code = 404
|
|
|
|
mock_client = AsyncMock()
|
|
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
mock_client.__aexit__ = AsyncMock(return_value=False)
|
|
mock_client.get = AsyncMock(return_value=mock_resp)
|
|
|
|
with patch("httpx.AsyncClient", return_value=mock_client):
|
|
result = await get_peer_capabilities("http://platform:8080", "ws-abc")
|
|
|
|
assert result == []
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_peer_capabilities_exception():
|
|
"""get_peer_capabilities() returns [] when httpx raises an exception."""
|
|
mock_client = AsyncMock()
|
|
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
mock_client.__aexit__ = AsyncMock(return_value=False)
|
|
mock_client.get = AsyncMock(side_effect=Exception("Network unreachable"))
|
|
|
|
with patch("httpx.AsyncClient", return_value=mock_client):
|
|
result = await get_peer_capabilities("http://platform:8080", "ws-abc")
|
|
|
|
assert result == []
|
|
|
|
|
|
# Regression tests for the A2A + HMA tool-instruction injection. Pre-fix,
|
|
# get_a2a_instructions() and get_hma_instructions() were defined in
|
|
# executor_helpers.py but never called from build_system_prompt — workers
|
|
# saw the platform's delegate_task / commit_memory tools registered but
|
|
# had no documentation telling them how to use them.
|
|
|
|
def test_a2a_instructions_injected_default_mcp(tmp_path):
|
|
"""build_system_prompt embeds A2A MCP-variant instructions by default."""
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
)
|
|
|
|
assert "## Inter-Agent Communication" in result
|
|
assert "delegate_task" in result
|
|
assert "list_peers" in result
|
|
assert "send_message_to_user" in result
|
|
|
|
|
|
def test_a2a_instructions_cli_variant_when_disabled(tmp_path):
|
|
"""a2a_mcp=False emits the CLI subprocess variant for non-MCP runtimes."""
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
a2a_mcp=False,
|
|
)
|
|
|
|
assert "## Inter-Agent Communication" in result
|
|
assert "molecule_runtime.a2a_cli" in result
|
|
# MCP-only details must NOT leak into the CLI variant.
|
|
assert "send_message_to_user" not in result
|
|
|
|
|
|
def test_hma_instructions_injected(tmp_path):
|
|
"""build_system_prompt embeds HMA persistent-memory instructions."""
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=[],
|
|
)
|
|
|
|
assert "## Hierarchical Memory (HMA)" in result
|
|
assert "commit_memory" in result
|
|
assert "recall_memory" in result
|
|
|
|
|
|
def test_tool_instructions_precede_peer_section(tmp_path):
|
|
"""A2A docs must precede the peer list — peer IDs are operands of A2A tools."""
|
|
(tmp_path / "system-prompt.md").write_text("Base.")
|
|
|
|
peers = [{"id": "p1", "name": "Worker", "status": "active", "agent_card": None}]
|
|
result = build_system_prompt(
|
|
config_path=str(tmp_path),
|
|
workspace_id="ws-1",
|
|
loaded_skills=[],
|
|
peers=peers,
|
|
)
|
|
|
|
a2a_idx = result.index("## Inter-Agent Communication")
|
|
peers_idx = result.index("## Your Peers")
|
|
assert a2a_idx < peers_idx, "A2A instructions must come before the peer list"
|