From 61d59088179024414f0ddee5300f1cff397a0577 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 14:29:08 -0700 Subject: [PATCH 1/2] fix(workspace files API): write claude-code config to /configs, sudo for root-owned base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause of the user-visible 500 ("install: cannot create directory '/opt/configs': Permission denied") on PUT /workspaces//files/config.yaml: 1. Path map fall-through. claude-code wasn't in workspaceFilePathPrefix, so resolveWorkspaceFilePath returned the default `/opt/configs/...`. That directory doesn't exist on the workspace EC2 — cloud-init in provisioner/userdata_containerized.go runs `mkdir -p /configs` only. Even if the SSH write had succeeded at /opt/configs, the docker container's bind-mount is host:/configs → container:/configs, so the file would have been invisible to the runtime. 2. /configs ownership. cloud-init runs as root, so /configs is root-owned. The SSH-as-ubuntu install command can't write into it without sudo. Hermes wasn't affected because its base path (/home/ubuntu/.hermes) is ubuntu-owned. Two-line fix: - Add `claude-code: /configs` to the runtime → base-path map and flip the default fall-through from `/opt/configs` to `/configs`. Leave the pre-existing langgraph/external entries pointing at /opt/configs pending a migration audit (no user report on those today, and flipping them would silently relocate any files those runtimes already wrote). - Prefix the remote install command with `sudo -n` so the write succeeds under the standard EC2 ubuntu/passwordless-sudo posture. `-n` (non-interactive) ensures clean failure if that ever changes, rather than a hang waiting for a password prompt. Tests: - TestResolveWorkspaceFilePath_KnownRuntimes adds claude-code + CLAUDE-CODE coverage and updates the empty/unknown default cases to expect /configs. The langgraph/external rows stay green (unchanged values), confirming the scope of the rename. Verification: - go build ./... clean - go test ./internal/handlers/ green - The user-reported bug (PUT /workspaces/57fb7043-79a0-4a53-ae4a-efb39deb457f/files/config.yaml → 500 EACCES on /opt/configs) is the failure mode this fix addresses on both axes (path + sudo). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/handlers/template_files_eic.go | 40 +++++++++++++++---- .../handlers/template_files_eic_test.go | 10 ++++- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/workspace-server/internal/handlers/template_files_eic.go b/workspace-server/internal/handlers/template_files_eic.go index 2c8858be..3dcfb98b 100644 --- a/workspace-server/internal/handlers/template_files_eic.go +++ b/workspace-server/internal/handlers/template_files_eic.go @@ -38,13 +38,26 @@ import ( // Keep these stable — changing the base path for an existing runtime // without a migration shim will make previously-saved files disappear from // the runtime's POV. +// +// Path source-of-truth: cloud-init in +// `molecule-controlplane/internal/provisioner/userdata_containerized.go` +// runs `mkdir -p /configs` and writes the canonical config.yaml there. +// The workspace container bind-mounts host `/configs` to read it back. +// Files written anywhere else on the host are invisible to the runtime, +// so `claude-code` (and any future containerized runtime) must point here. +// +// `/configs` is root-owned (cloud-init runs as root); the SSH-as-ubuntu +// install command at the call site below uses `sudo` to write into it. var workspaceFilePathPrefix = map[string]string{ - "hermes": "/home/ubuntu/.hermes", - "langgraph": "/opt/configs", - "external": "/opt/configs", - // Default for unknown / future runtimes is /opt/configs — most - // conservative place that doesn't collide with system or runtime- - // private directories. + "hermes": "/home/ubuntu/.hermes", + "claude-code": "/configs", + "langgraph": "/opt/configs", + "external": "/opt/configs", + // Default for unknown / future runtimes is /configs — matches the + // containerized user-data layout. The `langgraph` / `external` + // entries pre-date the unified user-data path and are retained + // until a migration audit confirms what the running tenants of + // those runtimes actually have on disk. } func resolveWorkspaceFilePath(runtime, relPath string) (string, error) { @@ -53,7 +66,7 @@ func resolveWorkspaceFilePath(runtime, relPath string) (string, error) { } base, ok := workspaceFilePathPrefix[strings.ToLower(strings.TrimSpace(runtime))] if !ok { - base = "/opt/configs" + base = "/configs" } return filepath.Join(base, filepath.Clean(relPath)), nil } @@ -148,6 +161,17 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c // writes the file atomically via temp-file-rename. Permissions 0644 // match the existing tar-unpack defaults on the Docker path. // + // `sudo -n` (non-interactive) prefix: the canonical containerized + // workspace layout puts /configs at the root, owned by root because + // cloud-init runs as root (see + // molecule-controlplane/internal/provisioner/userdata_containerized.go). + // SSH-as-ubuntu can't write into /configs without escalation. + // Ubuntu has passwordless sudo on EC2 by default; sudo -n fails fast + // (no prompt) if that ever changes, surfacing a clean error instead + // of a hang. The hermes path /home/ubuntu/.hermes is ubuntu-owned + // and doesn't strictly need sudo, but using it uniformly avoids + // per-runtime branching here. + // // The remote command is fully deterministic — no user-controlled // input reaches a shell eval (absPath is built from a map + Clean()). sshArgs := []string{ @@ -157,7 +181,7 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c "-o", "ServerAliveInterval=15", "-p", fmt.Sprintf("%d", localPort), fmt.Sprintf("%s@127.0.0.1", osUser), - fmt.Sprintf("install -D -m 0644 /dev/stdin %s", shellQuote(absPath)), + fmt.Sprintf("sudo -n install -D -m 0644 /dev/stdin %s", shellQuote(absPath)), } sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...) sshCmd.Env = os.Environ() diff --git a/workspace-server/internal/handlers/template_files_eic_test.go b/workspace-server/internal/handlers/template_files_eic_test.go index 6e8a901f..30bd9988 100644 --- a/workspace-server/internal/handlers/template_files_eic_test.go +++ b/workspace-server/internal/handlers/template_files_eic_test.go @@ -18,10 +18,16 @@ func TestResolveWorkspaceFilePath_KnownRuntimes(t *testing.T) { {"hermes", "config.yaml", "/home/ubuntu/.hermes/config.yaml"}, {"HERMES", "config.yaml", "/home/ubuntu/.hermes/config.yaml"}, // case-insensitive {"hermes", "nested/a.yaml", "/home/ubuntu/.hermes/nested/a.yaml"}, + // claude-code (and any future containerized runtime) lands at /configs — + // the path user-data creates and bind-mounts into the container. Pre-fix + // this fell through to /opt/configs which doesn't exist on workspace EC2s + // and would 500 with EACCES on save (the bug that motivated this gate). + {"claude-code", "config.yaml", "/configs/config.yaml"}, + {"CLAUDE-CODE", "config.yaml", "/configs/config.yaml"}, // case-insensitive {"langgraph", "config.yaml", "/opt/configs/config.yaml"}, {"external", "skills.json", "/opt/configs/skills.json"}, - {"", "config.yaml", "/opt/configs/config.yaml"}, // empty → default - {"unknown", "config.yaml", "/opt/configs/config.yaml"}, // unknown → default + {"", "config.yaml", "/configs/config.yaml"}, // empty → default + {"unknown", "config.yaml", "/configs/config.yaml"}, // unknown → default } for _, tc := range cases { t.Run(tc.runtime+"/"+tc.relPath, func(t *testing.T) { From 41ae4ec50b5a1961f63d72f571839f73285adecb Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 14:41:24 -0700 Subject: [PATCH 2/2] fix(mcp): wire source_workspace_id through dispatcher for memory + chat_history + workspace_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-review of merged PR #2766 (multi-workspace MCP routing) revealed a silent gap: PR #2766 added the ``source_workspace_id`` parameter to ``tool_commit_memory`` / ``tool_recall_memory`` / ``tool_chat_history`` / ``tool_get_workspace_info`` AND advertised it in the registry's input schemas, but the MCP server's dispatch arms in ``a2a_mcp_server.py`` were never updated to forward ``arguments["source_workspace_id"]`` to those four tools. Result: the schema lied. The LLM saw ``source_workspace_id`` as a valid tool parameter, could correctly populate it from the inbound message's ``arrival_workspace_id``, but the dispatcher dropped it on the floor and every memory commit / recall / chat-history fetch silently fell back to the module-level ``WORKSPACE_ID``. The cross-tenant leak that PR #2766 was meant to prevent is NOT prevented for these four tools without this follow-up. Why the existing dispatcher tests didn't catch it: the tests asserted return-value strings (``"working" in result``) but never asserted what arguments the inner tool was called with. So the dispatcher could ignore any kwarg and the tests would still pass. Fix: 1. Wire ``source_workspace_id=arguments.get("source_workspace_id") or None`` into the four dispatch arms, mirroring the pattern already used for ``delegate_task`` / ``delegate_task_async`` / ``check_task_status`` / ``list_peers``. 2. Add five tests in ``test_a2a_mcp_server.py`` that assert the inner tool was awaited with the exact source_workspace_id kwarg (``assert_awaited_once_with(..., source_workspace_id="ws-X")``) — substring-on-result tests can't catch this class of bug. 3. Add a fallback test ensuring single-workspace operators (no source_workspace_id key) get ``source_workspace_id=None`` — pinning the documented None contract over an accidental empty-string forward. Suite: 1705 passed (was 1700 + 5 new), 3 skipped, 2 xfailed. Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace/a2a_mcp_server.py | 7 +- workspace/tests/test_a2a_mcp_server.py | 99 ++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/workspace/a2a_mcp_server.py b/workspace/a2a_mcp_server.py index ea8e7755..687c62fd 100644 --- a/workspace/a2a_mcp_server.py +++ b/workspace/a2a_mcp_server.py @@ -123,16 +123,20 @@ async def handle_tool_call(name: str, arguments: dict) -> str: source_workspace_id=arguments.get("source_workspace_id") or None, ) elif name == "get_workspace_info": - return await tool_get_workspace_info() + return await tool_get_workspace_info( + source_workspace_id=arguments.get("source_workspace_id") or None, + ) elif name == "commit_memory": return await tool_commit_memory( arguments.get("content", ""), arguments.get("scope", "LOCAL"), + source_workspace_id=arguments.get("source_workspace_id") or None, ) elif name == "recall_memory": return await tool_recall_memory( arguments.get("query", ""), arguments.get("scope", ""), + source_workspace_id=arguments.get("source_workspace_id") or None, ) elif name == "wait_for_message": return await tool_wait_for_message( @@ -151,6 +155,7 @@ async def handle_tool_call(name: str, arguments: dict) -> str: arguments.get("peer_id", ""), arguments.get("limit", 20), arguments.get("before_ts", ""), + source_workspace_id=arguments.get("source_workspace_id") or None, ) return f"Unknown tool: {name}" diff --git a/workspace/tests/test_a2a_mcp_server.py b/workspace/tests/test_a2a_mcp_server.py index 3e3d00ae..1a690830 100644 --- a/workspace/tests/test_a2a_mcp_server.py +++ b/workspace/tests/test_a2a_mcp_server.py @@ -71,6 +71,105 @@ async def test_handle_tool_call_unknown_tool(): assert "Unknown tool" in result +# --------------------------------------------------------------------------- +# source_workspace_id propagation — every workspace-scoped tool's schema +# advertises this parameter (PR #2766) so the LLM can route a memory commit +# or chat-history query through the workspace the inbound message arrived +# on. The dispatch path itself MUST forward the kwarg — otherwise the +# schema lies and every call silently falls back to the module-level +# WORKSPACE_ID, defeating multi-workspace isolation. These tests pin +# end-to-end argument flow on the four tools that ship in PR #2766. +# --------------------------------------------------------------------------- + + +async def test_dispatch_get_workspace_info_forwards_source_workspace_id(): + from a2a_mcp_server import handle_tool_call + mock = AsyncMock(return_value='{"id":"ws-X"}') + with patch("a2a_mcp_server.tool_get_workspace_info", new=mock): + await handle_tool_call( + "get_workspace_info", + {"source_workspace_id": "ws-X"}, + ) + mock.assert_awaited_once_with(source_workspace_id="ws-X") + + +async def test_dispatch_commit_memory_forwards_source_workspace_id(): + from a2a_mcp_server import handle_tool_call + mock = AsyncMock(return_value='{"success":true}') + with patch("a2a_mcp_server.tool_commit_memory", new=mock): + await handle_tool_call( + "commit_memory", + { + "content": "remember this", + "scope": "LOCAL", + "source_workspace_id": "ws-Y", + }, + ) + mock.assert_awaited_once_with( + "remember this", + "LOCAL", + source_workspace_id="ws-Y", + ) + + +async def test_dispatch_recall_memory_forwards_source_workspace_id(): + from a2a_mcp_server import handle_tool_call + mock = AsyncMock(return_value="[LOCAL] remember this") + with patch("a2a_mcp_server.tool_recall_memory", new=mock): + await handle_tool_call( + "recall_memory", + { + "query": "remember", + "scope": "LOCAL", + "source_workspace_id": "ws-Z", + }, + ) + mock.assert_awaited_once_with( + "remember", + "LOCAL", + source_workspace_id="ws-Z", + ) + + +async def test_dispatch_chat_history_forwards_source_workspace_id(): + from a2a_mcp_server import handle_tool_call + mock = AsyncMock(return_value="[]") + with patch("a2a_mcp_server.tool_chat_history", new=mock): + await handle_tool_call( + "chat_history", + { + "peer_id": "peer-A", + "limit": 10, + "source_workspace_id": "ws-W", + }, + ) + mock.assert_awaited_once_with( + "peer-A", + 10, + "", + source_workspace_id="ws-W", + ) + + +async def test_dispatch_omits_source_workspace_id_when_unset(): + """Single-workspace operators (no source_workspace_id key in args) must + forward None — preserving the legacy fallback to module-level WORKSPACE_ID + inside the tool. An accidental empty-string forward would also fall back, + but None is the documented contract.""" + from a2a_mcp_server import handle_tool_call + mock = AsyncMock(return_value='{"success":true}') + with patch("a2a_mcp_server.tool_commit_memory", new=mock): + await handle_tool_call( + "commit_memory", + {"content": "x", "scope": "LOCAL"}, + ) + mock.assert_awaited_once_with( + "x", + "LOCAL", + source_workspace_id=None, + ) + + async def test_handle_tool_call_missing_args_defaults(): """Test that missing args default to empty strings (defensive).""" from a2a_mcp_server import handle_tool_call