From 203a4f0f91319c53d3a8e2c672118438020672f2 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 27 Apr 2026 11:15:06 -0700 Subject: [PATCH 1/4] fix(runtime): resolve a2a_mcp_server.py path from wheel install location MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DEFAULT_MCP_SERVER_PATH was hardcoded to /app/a2a_mcp_server.py, which was correct under the pre-#87 monolithic-template Docker layout where the workspace/ tree was COPY'd into /app/. After the universal-runtime refactor (#87, #117), workspace modules ship inside the molecule-ai-workspace-runtime wheel under site-packages/molecule_runtime/, while /app/ now holds only template-specific files (adapter.py + the runtime-native executor for that template). Net effect: in every workspace built since the wheel cutover, Claude Code SDK's mcp_servers={"a2a": {"command": python, "args": ["/app/a2a_mcp_server.py"]}} pointed at a missing file. The subprocess launch failed silently, the SDK registered zero MCP tools, and the agent's list_peers / delegate_task / a2a_send_message / a2a_send_signal all disappeared. Symptom observed today: Design Director said "I tried to reach the perf auditor via the inter-agent MCP tools (list_peers, delegate_task) but those tools didn't resolve in this environment" and fell back to running the audit itself with WebFetch. Why this slipped through E2E: the priority-runtimes harness sends a single message and verifies a reply — it does not exercise inter-agent delegation, so the missing MCP tools are invisible at that layer. Fix: resolve the path relative to executor_helpers.py via __file__, which tracks wherever the wheel is installed (site-packages today, anywhere else tomorrow). The A2A_MCP_SERVER_PATH env override is preserved for tests / non-default layouts. Regression test: assert os.path.exists(DEFAULT_MCP_SERVER_PATH) so any future move of a2a_mcp_server.py out of the package directory fails at unit-test time instead of silently disabling delegation in production. Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace/executor_helpers.py | 11 ++++++++++- workspace/tests/test_executor_helpers.py | 13 +++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/workspace/executor_helpers.py b/workspace/executor_helpers.py index 35c8e7ca..95e73857 100644 --- a/workspace/executor_helpers.py +++ b/workspace/executor_helpers.py @@ -48,7 +48,16 @@ logger = logging.getLogger(__name__) WORKSPACE_MOUNT = "/workspace" CONFIG_MOUNT = "/configs" -DEFAULT_MCP_SERVER_PATH = "/app/a2a_mcp_server.py" +# Resolved relative to this module so it tracks the wheel install +# location. The hardcoded "/app/a2a_mcp_server.py" was correct under +# the pre-#87 monolithic-template layout, but post-universal-runtime +# the file ships inside the molecule-ai-workspace-runtime wheel at +# site-packages/molecule_runtime/, while /app/ now holds only +# template-specific modules (adapter.py + the runtime-native executor). +# Stale path → Claude Code SDK silently fails to spawn the MCP +# subprocess → list_peers / delegate_task / a2a_send_message all +# disappear from the agent's toolset. +DEFAULT_MCP_SERVER_PATH = str(Path(__file__).parent / "a2a_mcp_server.py") DEFAULT_DELEGATION_RESULTS_FILE = "/tmp/delegation_results.jsonl" PLATFORM_HTTP_TIMEOUT_S = 5.0 MEMORY_RECALL_LIMIT = 10 diff --git a/workspace/tests/test_executor_helpers.py b/workspace/tests/test_executor_helpers.py index d9dd35fa..688f6044 100644 --- a/workspace/tests/test_executor_helpers.py +++ b/workspace/tests/test_executor_helpers.py @@ -23,6 +23,7 @@ Covers 100% of the public surface: from __future__ import annotations import json +import os from pathlib import Path from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock, patch @@ -88,6 +89,18 @@ def test_get_mcp_server_path_default(monkeypatch): assert get_mcp_server_path() == DEFAULT_MCP_SERVER_PATH +def test_get_mcp_server_path_default_resolves_to_existing_file(): + # Locks in the wheel-relative resolution: if a future refactor moves + # a2a_mcp_server.py out of the package directory or breaks the + # __file__-based lookup, Claude Code SDK silently fails to spawn the + # MCP subprocess and inter-agent tools (list_peers, delegate_task) + # vanish at runtime. This assertion catches that at unit-test time. + assert os.path.exists(DEFAULT_MCP_SERVER_PATH), ( + f"DEFAULT_MCP_SERVER_PATH points at a missing file: " + f"{DEFAULT_MCP_SERVER_PATH}" + ) + + def test_get_mcp_server_path_env_override(monkeypatch): monkeypatch.setenv("A2A_MCP_SERVER_PATH", "/custom/mcp.py") assert get_mcp_server_path() == "/custom/mcp.py" From 28fc7a8cbd669ef4702af712581efe9c5ddb289c Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 27 Apr 2026 11:22:00 -0700 Subject: [PATCH 2/4] fix(runtime): replace remaining /app/ legacy paths in agent prompts + docstrings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Comprehensive sweep follow-up to the MCP server path fix. Audited every /app/ reference in the runtime source against the live claude-code template image and confirmed the actual /app/ contents post-#87 are ONLY: __init__.py, adapter.py, claude_sdk_executor.py, requirements.txt — every other workspace module ships in the wheel under site-packages/molecule_runtime/. Two more leaks found: 1. executor_helpers.py:_A2A_INSTRUCTIONS_CLI — inter-agent system prompt for non-MCP runtimes (Ollama, custom) had 5 lines telling the model `python3 /app/a2a_cli.py X`. Models copy these examples verbatim, so every CLI-runtime delegation would fail at the shell layer (no such file). Replaced with `python3 -m molecule_runtime.a2a_cli` form, which works regardless of where the wheel is installed. 2. molecule_ai_status.py docstring — usage examples invoked `python3 /app/molecule_ai_status.py` and claimed a `molecule-monorepo-status` shell alias. Both broken in current templates: the file's at site-packages, and `which molecule-monorepo-status` errors (the legacy symlink only existed in the dev-only workspace/Dockerfile base image, not in the standalone template Dockerfiles that ship to production). Updated docstring + the __main__ usage banner + the stderr error prefix to use the same `python3 -m molecule_runtime.X` form. Plugins audited and clean: WORKSPACE_PLUGINS_DIR=/configs/plugins, SHARED_PLUGINS_DIR=$PLUGINS_DIR fallback /plugins. No /app/ assumptions. Regression test: `test_a2a_cli_instructions_use_module_invocation_not_legacy_app_path` asserts the legacy /app/a2a_cli.py path can't drift back into the CLI system prompt and that the canonical module form is present. The legacy workspace/Dockerfile + workspace/entrypoint.sh + workspace/scripts/ still contain /app/-shaped paths but are dev-only base-image scaffolding (per workspace/build-all.sh's own header comment) — not shipped to the standalone template images. Out of scope here; can be cleaned up in a separate dead-code pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace/executor_helpers.py | 10 +++++----- workspace/molecule_ai_status.py | 14 +++++--------- workspace/tests/test_executor_helpers.py | 20 +++++++++++++++++++- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/workspace/executor_helpers.py b/workspace/executor_helpers.py index 95e73857..dc40301e 100644 --- a/workspace/executor_helpers.py +++ b/workspace/executor_helpers.py @@ -299,11 +299,11 @@ Instead: (1) try delegating to a different peer, (2) handle the task yourself, o _A2A_INSTRUCTIONS_CLI = """## Inter-Agent Communication You can delegate tasks to other workspaces using the a2a command: - python3 /app/a2a_cli.py peers # List available peers - python3 /app/a2a_cli.py delegate # Sync: wait for response - python3 /app/a2a_cli.py delegate --async # Async: return task_id - python3 /app/a2a_cli.py status # Check async task - python3 /app/a2a_cli.py info # Your workspace info + python3 -m molecule_runtime.a2a_cli peers # List available peers + python3 -m molecule_runtime.a2a_cli delegate # Sync: wait for response + python3 -m molecule_runtime.a2a_cli delegate --async # Async: return task_id + python3 -m molecule_runtime.a2a_cli status # Check async task + python3 -m molecule_runtime.a2a_cli info # Your workspace info For quick questions, use sync delegate. For long tasks, use --async + status. Only delegate to peers listed by the peers command (access control enforced).""" diff --git a/workspace/molecule_ai_status.py b/workspace/molecule_ai_status.py index 6787c38e..fa22ba9c 100644 --- a/workspace/molecule_ai_status.py +++ b/workspace/molecule_ai_status.py @@ -4,14 +4,10 @@ Usage (from any script, cron job, or shell inside the container): # Set current task (shows on canvas card) - python3 /app/molecule_ai_status.py "Running weekly SEO audit..." + python3 -m molecule_runtime.molecule_ai_status "Running weekly SEO audit..." # Clear task (removes banner from canvas) - python3 /app/molecule_ai_status.py "" - - # Or use the shell alias: - molecule-monorepo-status "Analyzing competitor data..." - molecule-monorepo-status "" + python3 -m molecule_runtime.molecule_ai_status "" The status appears as an amber banner on the workspace card in the canvas, visible to the project owner in real-time. @@ -63,13 +59,13 @@ def set_status(task: str): timeout=5.0, ) except Exception as e: - print(f"molecule-monorepo-status: failed to update: {e}", file=sys.stderr) + print(f"molecule_ai_status: failed to update: {e}", file=sys.stderr) if __name__ == "__main__": # pragma: no cover if len(sys.argv) < 2: - print("Usage: molecule-monorepo-status 'task description'") - print(" molecule-monorepo-status '' # clear") + print("Usage: python3 -m molecule_runtime.molecule_ai_status 'task description'") + print(" python3 -m molecule_runtime.molecule_ai_status '' # clear") sys.exit(1) set_status(sys.argv[1]) diff --git a/workspace/tests/test_executor_helpers.py b/workspace/tests/test_executor_helpers.py index 688f6044..75869be2 100644 --- a/workspace/tests/test_executor_helpers.py +++ b/workspace/tests/test_executor_helpers.py @@ -445,10 +445,28 @@ def test_get_a2a_instructions_mcp_default(): def test_get_a2a_instructions_cli_variant(): out = get_a2a_instructions(mcp=False) - assert "a2a_cli.py" in out + assert "a2a_cli" in out assert "MCP tools" not in out +def test_a2a_cli_instructions_use_module_invocation_not_legacy_app_path(): + # The CLI variant of the a2a instructions ships in the agent system + # prompt for non-MCP runtimes (Ollama, custom). The model copies the + # invocation form verbatim into shell calls, so any path drift here + # silently breaks delegation. The legacy /app/a2a_cli.py path was + # correct under the pre-#87 monolithic-template Docker layout but + # stops resolving once the runtime ships as a wheel — pin the + # canonical `python3 -m molecule_runtime.a2a_cli` form so future + # refactors can't silently regress it. + out = get_a2a_instructions(mcp=False) + assert "/app/a2a_cli.py" not in out, ( + "Legacy /app/a2a_cli.py path leaked back into the CLI-variant " + "system prompt — agents on Ollama/custom runtimes would copy " + "this verbatim and every delegation would fail." + ) + assert "python3 -m molecule_runtime.a2a_cli" in out + + def test_a2a_mcp_instructions_reference_existing_tools(): """The MCP instructions text must only reference tools that are actually registered in a2a_mcp_server.py. If someone renames a server tool, the From 9c3695df6d4f68ecb1e6dee8b52b478477a38b47 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 27 Apr 2026 11:48:05 -0700 Subject: [PATCH 3/4] test(runtime): update molecule_ai_status test for renamed error prefix Pre-existing test_set_status_exception_prints_to_stderr asserted on the legacy "molecule-monorepo-status: failed to update" prefix string. The prior commit renamed it to "molecule_ai_status: failed to update" so the printed label matches the canonical module-form invocation (`python3 -m molecule_runtime.molecule_ai_status`) instead of a shell alias that only ever existed in the dev-only base image. Updating the expected substring in lockstep. Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace/tests/test_molecule_ai_status.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/workspace/tests/test_molecule_ai_status.py b/workspace/tests/test_molecule_ai_status.py index 9c3f8d80..cbddd816 100644 --- a/workspace/tests/test_molecule_ai_status.py +++ b/workspace/tests/test_molecule_ai_status.py @@ -113,7 +113,11 @@ class TestSetStatus: mod.set_status("something") captured = capsys.readouterr() - assert "molecule-monorepo-status: failed to update" in captured.err + # Error prefix matches the canonical module-form invocation; the + # legacy molecule-monorepo-status shell alias only existed in the + # dev-only workspace/Dockerfile base image, never in shipped + # template images, so the prefix was misleading. + assert "molecule_ai_status: failed to update" in captured.err assert "platform unreachable" in captured.err def test_set_status_heartbeat_fields_are_correct(self, monkeypatch): From 49ded748767938ef41ff8d202c32bbc3eb222214 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 27 Apr 2026 12:27:50 -0700 Subject: [PATCH 4/4] docs(cli-runtime): use module-form invocation, drop dead shell-alias claim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same root cause as the workspace/molecule_ai_status.py docstring fix in this PR: this doc claimed `molecule-monorepo-status` was a usable shell alias and `from molecule_ai_status import set_status` was a usable Python import. Both worked under the pre-#87 monolithic-template layout (where workspace/Dockerfile created the symlink and COPY'd the modules into /app/) but neither works in current standalone template images that install the runtime as a wheel: - `which molecule-monorepo-status` errors — only `a2a-db` and `molecule-runtime` are registered console scripts. - `from molecule_ai_status` raises ImportError — modules are under the `molecule_runtime` package now. Switched both examples to the canonical `python3 -m molecule_runtime.molecule_ai_status` form (CLI) and `from molecule_runtime.molecule_ai_status import set_status` (Python). Same form the runtime ships in its own usage banner, so anyone discovering this doc gets a runnable example. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/agent-runtime/cli-runtime.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/agent-runtime/cli-runtime.md b/docs/agent-runtime/cli-runtime.md index eeaa21c3..dd8c9220 100644 --- a/docs/agent-runtime/cli-runtime.md +++ b/docs/agent-runtime/cli-runtime.md @@ -228,13 +228,13 @@ CLI runtimes keep the same memory tool surface as the Python runtime. When `AWAR Any process inside a workspace container (cron jobs, scripts, background tasks) can update the canvas card display: ```bash -molecule-monorepo-status "Running weekly SEO audit..." # show on canvas -molecule-monorepo-status "" # clear when done +python3 -m molecule_runtime.molecule_ai_status "Running weekly SEO audit..." # show on canvas +python3 -m molecule_runtime.molecule_ai_status "" # clear when done ``` From Python: ```python -from molecule_ai_status import set_status +from molecule_runtime.molecule_ai_status import set_status set_status("Analyzing competitor data...") ```