From b5e2142c461e951ef3e665653bafe435a32ffdf4 Mon Sep 17 00:00:00 2001 From: Molecule AI Core-BE Date: Thu, 23 Apr 2026 22:46:48 +0000 Subject: [PATCH 1/3] =?UTF-8?q?fix(#1877):=20close=20token-rotation=20race?= =?UTF-8?q?=20on=20restart=20=E2=80=94=20Option=20A+Option=20B=20combined?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Platform side (Option B): - provisioner.go: add WriteAuthTokenToVolume() — writes .auth_token to the Docker named volume BEFORE ContainerStart using a throwaway alpine container, eliminating the race window where a restarted container could read a stale token before WriteFilesToContainer writes the new one. - workspace_provision.go: call WriteAuthTokenToVolume() in issueAndInjectToken as a best-effort pre-write before the container starts. Runtime side (Option A): - heartbeat.py: on HTTPStatusError 401 from /registry/heartbeat, call refresh_cache() to force re-read of /configs/.auth_token from disk, then retry the heartbeat once. Fall through to normal failure tracking if the retry also fails. - platform_auth.py: add refresh_cache() which discards the in-process _cached_token and calls get_token() to re-read from disk. Together these eliminate the >1 consecutive 401 window described in issue #1877. Pre-write (B) is the primary fix; runtime retry (A) is the self-healing fallback for any residual race. Co-Authored-By: Claude Sonnet 4.6 --- .../internal/handlers/workspace_provision.go | 8 +++++ .../internal/provisioner/provisioner.go | 35 +++++++++++++++++++ workspace/heartbeat.py | 31 +++++++++++++++- workspace/platform_auth.py | 11 ++++++ 4 files changed, 84 insertions(+), 1 deletion(-) diff --git a/workspace-server/internal/handlers/workspace_provision.go b/workspace-server/internal/handlers/workspace_provision.go index 5e74ee73..98e70875 100644 --- a/workspace-server/internal/handlers/workspace_provision.go +++ b/workspace-server/internal/handlers/workspace_provision.go @@ -402,6 +402,14 @@ func (h *WorkspaceHandler) issueAndInjectToken(ctx context.Context, workspaceID cfg.ConfigFiles = make(map[string][]byte) } cfg.ConfigFiles[".auth_token"] = []byte(token) + // Option B (issue #1877): write token to volume BEFORE ContainerStart. + // Pre-write eliminates the race window where a restarted container could + // read a stale /configs/.auth_token before WriteFilesToContainer runs. + // This call is best-effort — if it fails we still log and fall through; + // the runtime's heartbeat.py will retry on 401 if needed. + if writeErr := h.provisioner.WriteAuthTokenToVolume(ctx, workspaceID, token); writeErr != nil { + log.Printf("Provisioner: warning — pre-write token to volume failed for %s: %v (token still injected via WriteFilesToContainer after start)", workspaceID, writeErr) + } log.Printf("Provisioner: injected fresh auth token for workspace %s into config volume", workspaceID) } diff --git a/workspace-server/internal/provisioner/provisioner.go b/workspace-server/internal/provisioner/provisioner.go index 481f09b7..ac04b15f 100644 --- a/workspace-server/internal/provisioner/provisioner.go +++ b/workspace-server/internal/provisioner/provisioner.go @@ -749,6 +749,41 @@ func (p *Provisioner) ReadFromVolume(ctx context.Context, volumeName, filePath s return clean, nil } +// WriteAuthTokenToVolume writes the workspace auth token into the config volume +// BEFORE the container starts, eliminating the token-injection race window where +// a restarted container could read a stale token from /configs/.auth_token before +// WriteFilesToContainer writes the new one. Issue #1877. +// +// Uses a throwaway alpine container to write directly to the named volume, +// bypassing the container lifecycle entirely. +func (p *Provisioner) WriteAuthTokenToVolume(ctx context.Context, workspaceID, token string) error { + volName := ConfigVolumeName(workspaceID) + resp, err := p.cli.ContainerCreate(ctx, &container.Config{ + Image: "alpine", + Cmd: []string{"sh", "-c", "mkdir -p /vol && printf '%s' $TOKEN > /vol/.auth_token && chmod 0600 /vol/.auth_token"}, + Env: []string{"TOKEN=" + token}, + }, &container.HostConfig{ + Binds: []string{volName + ":/vol"}, + }, nil, nil, "") + if err != nil { + return fmt.Errorf("failed to create token-write container: %w", err) + } + defer p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true}) + if err := p.cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil { + return fmt.Errorf("failed to start token-write container: %w", err) + } + waitCh, errCh := p.cli.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning) + select { + case <-waitCh: + case writeErr := <-errCh: + if writeErr != nil { + return fmt.Errorf("token-write container exited with error: %w", writeErr) + } + } + log.Printf("Provisioner: wrote auth token to volume %s/.auth_token", volName) + return nil +} + // execInContainer runs a command inside a running container as root. // Best-effort: logs errors but does not fail the caller. func (p *Provisioner) execInContainer(ctx context.Context, containerID string, cmd []string) { diff --git a/workspace/heartbeat.py b/workspace/heartbeat.py index a67bec7b..1eb5b4fd 100644 --- a/workspace/heartbeat.py +++ b/workspace/heartbeat.py @@ -17,7 +17,7 @@ from pathlib import Path import httpx -from platform_auth import auth_headers +from platform_auth import auth_headers, refresh_cache logger = logging.getLogger(__name__) @@ -102,6 +102,35 @@ class HeartbeatLoop: self._consecutive_failures = 0 except Exception as e: self._consecutive_failures += 1 + # Issue #1877: if heartbeat 401'd, re-read the token from disk + # and retry once. This handles the platform's token-rotation race + # where WriteFilesToContainer hasn't finished writing the new + # token before the runtime boots and caches the old value. + is_401 = False + if isinstance(e, httpx.HTTPStatusError) and e.response.status_code == 401: + is_401 = True + if is_401: + logger.warning("Heartbeat 401 for %s — refreshing token cache and retrying once", self.workspace_id) + refresh_cache() + try: + await client.post( + f"{self.platform_url}/registry/heartbeat", + json={ + "workspace_id": self.workspace_id, + "error_rate": self.error_rate, + "sample_error": self.sample_error, + "active_tasks": self.active_tasks, + "current_task": self.current_task, + "uptime_seconds": int(time.time() - self.start_time), + }, + headers=auth_headers(), + ) + self._consecutive_failures = 0 + self.request_count += 1 + except Exception: + # Retry also failed — fall through to the normal + # failure tracking below. + pass if self._consecutive_failures <= 3 or self._consecutive_failures % MAX_CONSECUTIVE_FAILURES == 0: logger.warning("Heartbeat failed (%d consecutive): %s", self._consecutive_failures, e) if self._consecutive_failures >= MAX_CONSECUTIVE_FAILURES: diff --git a/workspace/platform_auth.py b/workspace/platform_auth.py index d4a1e180..39a17075 100644 --- a/workspace/platform_auth.py +++ b/workspace/platform_auth.py @@ -103,3 +103,14 @@ def clear_cache() -> None: files between cases.""" global _cached_token _cached_token = None + + +def refresh_cache() -> str | None: + """Force re-read of the token from disk, discarding the in-process cache. + + Use this when a 401 response suggests the cached token is stale — + e.g. after the platform rotates tokens during a restart (issue #1877). + Returns the (new) token value or None if not found/error.""" + global _cached_token + _cached_token = None + return get_token() From 88c929875e20c9c83e069950739540d4a55d7220 Mon Sep 17 00:00:00 2001 From: Molecule AI Core-BE Date: Thu, 23 Apr 2026 22:52:17 +0000 Subject: [PATCH 2/3] fix(#1877): nil provisioner guard in issueAndInjectToken MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix panic in TestIssueAndInjectToken_HappyPath where h.provisioner is nil (the handler was created without a real provisioner in unit tests). Add nil guard so the pre-write step is skipped gracefully — token is still injected into ConfigFiles as before, and the runtime-side 401 retry handles any race. Co-Authored-By: Claude Sonnet 4.6 --- .../internal/handlers/workspace_provision.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/workspace-server/internal/handlers/workspace_provision.go b/workspace-server/internal/handlers/workspace_provision.go index 98e70875..0ebb0503 100644 --- a/workspace-server/internal/handlers/workspace_provision.go +++ b/workspace-server/internal/handlers/workspace_provision.go @@ -405,10 +405,13 @@ func (h *WorkspaceHandler) issueAndInjectToken(ctx context.Context, workspaceID // Option B (issue #1877): write token to volume BEFORE ContainerStart. // Pre-write eliminates the race window where a restarted container could // read a stale /configs/.auth_token before WriteFilesToContainer runs. - // This call is best-effort — if it fails we still log and fall through; - // the runtime's heartbeat.py will retry on 401 if needed. - if writeErr := h.provisioner.WriteAuthTokenToVolume(ctx, workspaceID, token); writeErr != nil { - log.Printf("Provisioner: warning — pre-write token to volume failed for %s: %v (token still injected via WriteFilesToContainer after start)", workspaceID, writeErr) + // This call is best-effort — if it fails (or provisioner is nil in tests) + // we still log and fall through; the runtime's heartbeat.py will retry + // on 401 if needed. + if h.provisioner != nil { + if writeErr := h.provisioner.WriteAuthTokenToVolume(ctx, workspaceID, token); writeErr != nil { + log.Printf("Provisioner: warning — pre-write token to volume failed for %s: %v (token still injected via WriteFilesToContainer after start)", workspaceID, writeErr) + } } log.Printf("Provisioner: injected fresh auth token for workspace %s into config volume", workspaceID) } From 61c5f8ad9a0c10d3c650a94e12579ad41f327eb8 Mon Sep 17 00:00:00 2001 From: Molecule AI Plugin-Dev Date: Thu, 23 Apr 2026 22:12:10 +0000 Subject: [PATCH 3/3] feat(plugin): implement MCPServerAdaptor (issue #847) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rule-of-three threshold met: 4 plugin proposals (molecule-firecrawl #512, molecule-github-mcp #520, molecule-browser-use #553, mcp-connector #573) all independently shipped the same mcpServers-adapter pattern. Adds MCPServerAdaptor to builtins.py — plugins wrapping an MCP server now declare `from plugins_registry.builtins import MCPServerAdaptor as Adaptor` in their per-runtime adapter file. The adaptor: - Merges mcpServers from settings-fragment.json into /.claude/settings.json (deep-merge so multiple plugins' servers coexist). - Optionally ships skills/rules/setup.sh via AgentskillsAdaptor delegation. - On uninstall: removes skills/rules but intentionally leaves mcpServers entries in settings.json (users may share configs with other tools or have manually curated entries). Also fixes _deep_merge_hooks: non-hook top-level keys that are dicts (e.g. mcpServers) are now deep-merged with existing values instead of being skipped via setdefault. Co-Authored-By: Claude Sonnet 4.6 --- workspace/plugins_registry/builtins.py | 94 ++++++++- workspace/tests/test_plugins_builtins.py | 231 +++++++++++++++++++++++ 2 files changed, 323 insertions(+), 2 deletions(-) diff --git a/workspace/plugins_registry/builtins.py b/workspace/plugins_registry/builtins.py index 9816ee85..c065aaff 100644 --- a/workspace/plugins_registry/builtins.py +++ b/workspace/plugins_registry/builtins.py @@ -24,7 +24,7 @@ Planned as the ecosystem matures (none are implemented yet — rule of three: promote a class here only after 3+ plugins ship the same custom shape via their own ``adapters/.py``): -* ``MCPServerAdaptor`` — install a plugin as an MCP server *(TODO)* +* :class:`MCPServerAdaptor` — install a plugin as an MCP server ✅ (issue #847) * ``DeepAgentsSubagentAdaptor`` — register a DeepAgents sub-agent (runtime-locked to deepagents) *(TODO)* * ``LangGraphSubgraphAdaptor`` — install a LangGraph sub-graph *(TODO)* @@ -339,5 +339,95 @@ def _deep_merge_hooks(existing: dict, fragment: dict) -> dict: for top_key, val in fragment.items(): if top_key == "hooks": continue - out.setdefault(top_key, val) + # mcpServers must be deep-merged: plugin A ships "firecrawl" and + # plugin B ships "github" → both entries land in settings.json. + # Using setdefault would skip the fragment's value when the key + # already exists, so we explicitly handle the dict case. + if top_key in out and isinstance(out[top_key], dict) and isinstance(val, dict): + out[top_key] = {**out[top_key], **val} + else: + out.setdefault(top_key, val) return out + + +# ---------------------------------------------------------------------- +# MCPServerAdaptor — issue #847. +# Promoted from custom adapters after 4 plugin proposals (molecule-firecrawl +# #512, molecule-github-mcp #520, molecule-browser-use #553, mcp-connector +# #573) all shipped the same pattern independently. +# ---------------------------------------------------------------------- + + +class MCPServerAdaptor: + """Sub-type adaptor for plugins that wrap an MCP server. + + The plugin ships: + + * ``settings-fragment.json`` with an ``mcpServers`` block — standard + Claude Code ``claude_desktop_config`` format, e.g.: + + .. code-block:: json + + { + "mcpServers": { + "my-server": { + "command": "npx", + "args": ["-y", "@org/my-mcp-server"] + } + } + } + + * ``skills//SKILL.md`` (optional) — agentskills.io skill docs; + ``AgentskillsAdaptor`` logic handles these. + * ``rules/*.md`` (optional) — always-on prose appended to CLAUDE.md; + ``AgentskillsAdaptor`` logic handles these. + * ``setup.sh`` (optional) — install npm packages, build binaries, etc.; + ``AgentskillsAdaptor`` logic handles these. + + On ``install()``: + + 1. ``settings-fragment.json`` → ``_install_claude_layer()`` merges the + ``mcpServers`` block into ``/.claude/settings.json``. + Hooks are also merged via the same path (so MCP-server plugins + can also ship hooks if they need them). + 2. Skills + rules + setup.sh → delegated to ``AgentskillsAdaptor``. + + On ``uninstall()``: + + 1. Skills + rules → delegated to ``AgentskillsAdaptor.uninstall()``. + 2. ``mcpServers`` entries are intentionally **not** removed from + ``settings.json`` on uninstall. MCP server configurations are + often shared with other tools or manually curated, so removing + them could break a user's setup. The user must remove them + manually if desired. + + Usage — in the plugin's per-runtime adapter file: + + .. code-block:: python + + # plugins//adapters/claude_code.py + from plugins_registry.builtins import MCPServerAdaptor as Adaptor + """ + + def __init__(self, plugin_name: str, runtime: str) -> None: + self.plugin_name = plugin_name + self.runtime = runtime + + async def install(self, ctx: InstallContext) -> InstallResult: + result = InstallResult( + plugin_name=self.plugin_name, + runtime=self.runtime, + source="plugin", + ) + # 1. Merge mcpServers (and any hooks) from settings-fragment.json. + _install_claude_layer(ctx, result, self.plugin_name) + # 2. Skills + rules + setup.sh — reuse AgentskillsAdaptor logic. + sub = await AgentskillsAdaptor(self.plugin_name, self.runtime).install(ctx) + result.files_written.extend(sub.files_written) + result.warnings.extend(sub.warnings) + return result + + async def uninstall(self, ctx: InstallContext) -> None: + # Delegate to AgentskillsAdaptor for skills + rules cleanup. + # NOTE: mcpServers entries are intentionally NOT removed (see class docstring). + await AgentskillsAdaptor(self.plugin_name, self.runtime).uninstall(ctx) diff --git a/workspace/tests/test_plugins_builtins.py b/workspace/tests/test_plugins_builtins.py index 31d14cae..fe6b5607 100644 --- a/workspace/tests/test_plugins_builtins.py +++ b/workspace/tests/test_plugins_builtins.py @@ -481,3 +481,234 @@ def test_deep_merge_hooks_top_level_keys_merged(): # setdefault semantics: existing keys win, new keys are added assert result["someKey"] == "old" assert result["anotherKey"] == "value" + + +def test_deep_merge_hooks_mcpServers_deep_merged(): + """mcpServers dicts from two plugins must be merged, not replaced. + + Plugin A ships firecrawl, plugin B ships github → both land in the + final settings.json (issue #847 motivation). + """ + existing = { + "mcpServers": { + "firecrawl": { + "command": "npx", + "args": ["-y", "@org/firecrawl-mcp"], + } + } + } + fragment = { + "mcpServers": { + "github": { + "command": "npx", + "args": ["-y", "@github/github-mcp-server"], + } + }, + "hooks": {}, + } + result = _deep_merge_hooks(existing, fragment) + assert "firecrawl" in result["mcpServers"] + assert "github" in result["mcpServers"] + # existing entries must not be overwritten + assert result["mcpServers"]["firecrawl"]["command"] == "npx" + + +def test_deep_merge_hooks_mcpServers_idempotent(): + """Re-merging the same mcpServers fragment must not duplicate entries.""" + fragment = { + "mcpServers": { + "firecrawl": {"command": "npx", "args": ["-y", "@org/firecrawl-mcp"]} + }, + "hooks": {}, + } + state = _deep_merge_hooks({}, fragment) + state = _deep_merge_hooks(state, fragment) + state = _deep_merge_hooks(state, fragment) + assert len(state["mcpServers"]) == 1 + + +def test_deep_merge_hooks_mcpServers_three_plugins(): + """Three plugins each contributing one mcpServer all land in final output.""" + state = {} + for name in ["firecrawl", "github", "browser-use"]: + fragment = { + "mcpServers": {name: {"command": "npx", "args": [f"-y @{name}"]}}, + "hooks": {}, + } + state = _deep_merge_hooks(state, fragment) + + assert set(state["mcpServers"].keys()) == {"firecrawl", "github", "browser-use"} + + +# --------------------------------------------------------------------------- +# MCPServerAdaptor tests — issue #847 +# --------------------------------------------------------------------------- + +from plugins_registry.builtins import MCPServerAdaptor # noqa: E402 + + +async def test_mcp_server_adaptor_install_writes_mcpServers(tmp_path: Path): + """install() must merge mcpServers from settings-fragment.json into settings.json.""" + plugin = tmp_path / "my-mcp-plugin" + plugin.mkdir() + (plugin / "settings-fragment.json").write_text( + json.dumps({ + "mcpServers": { + "my-server": { + "command": "npx", + "args": ["-y", "@org/my-mcp-server"], + } + } + }) + ) + # Also add a skill so we can verify AgentskillsAdaptor delegation. + (plugin / "skills" / "docs").mkdir(parents=True) + (plugin / "skills" / "docs" / "SKILL.md").write_text("# docs skill\n") + + configs = tmp_path / "configs" + configs.mkdir() + result = await MCPServerAdaptor("my-mcp-plugin", "claude_code").install( + _make_ctx(configs, plugin) + ) + + settings = json.loads((configs / ".claude" / "settings.json").read_text()) + assert "mcpServers" in settings + assert "my-server" in settings["mcpServers"] + assert settings["mcpServers"]["my-server"]["command"] == "npx" + # Skills were also installed (AgentskillsAdaptor delegation). + assert (configs / "skills" / "docs" / "SKILL.md").exists() + assert ".claude/settings.json" in result.files_written + + +async def test_mcp_server_adaptor_install_no_fragment_no_warning(tmp_path: Path): + """Plugin without settings-fragment.json must install silently (no settings.json created).""" + plugin = tmp_path / "bare-mcp" + plugin.mkdir() + configs = tmp_path / "configs" + configs.mkdir() + + result = await MCPServerAdaptor("bare-mcp", "claude_code").install( + _make_ctx(configs, plugin) + ) + # _install_claude_layer creates .claude dir, but no settings.json when + # there's no settings-fragment.json. + assert not (configs / ".claude" / "settings.json").exists() + assert result.warnings == [] + + +async def test_mcp_server_adaptor_uninstall_does_not_remove_mcpServers(tmp_path: Path): + """uninstall() must remove skills/rules but leave mcpServers in settings.json. + + Rationale: MCP server configs are often shared or manually curated; + removing them on plugin uninstall could break the user's environment. + """ + plugin = tmp_path / "my-mcp-plugin" + plugin.mkdir() + (plugin / "settings-fragment.json").write_text( + json.dumps({ + "mcpServers": { + "my-server": { + "command": "npx", + "args": ["-y", "@org/my-mcp-server"], + } + } + }) + ) + (plugin / "rules").mkdir(parents=True) + (plugin / "rules" / "r.md").write_text("- my rule\n") + (plugin / "skills" / "s").mkdir(parents=True) + (plugin / "skills" / "s" / "SKILL.md").write_text("# skill\n") + + configs = tmp_path / "configs" + configs.mkdir() + adaptor = MCPServerAdaptor("my-mcp-plugin", "claude_code") + + await adaptor.install(_make_ctx(configs, plugin)) + assert (configs / "skills" / "s").exists() + assert "my-server" in json.loads((configs / ".claude" / "settings.json").read_text()).get("mcpServers", {}) + + await adaptor.uninstall(_make_ctx(configs, plugin)) + + # Skills and rules removed by AgentskillsAdaptor delegation. + assert not (configs / "skills" / "s").exists() + assert not (configs / "CLAUDE.md").exists() or "# Plugin: my-mcp-plugin" not in (configs / "CLAUDE.md").read_text() + # mcpServers intentionally kept. + settings = json.loads((configs / ".claude" / "settings.json").read_text()) + assert "mcpServers" in settings + assert "my-server" in settings["mcpServers"] + + +async def test_mcp_server_adaptor_install_merges_with_existing_settings(tmp_path: Path): + """install() must deep-merge mcpServers with an already-populated settings.json.""" + plugin = tmp_path / "second-mcp" + plugin.mkdir() + (plugin / "settings-fragment.json").write_text( + json.dumps({ + "mcpServers": { + "github": { + "command": "npx", + "args": ["-y", "@github/github-mcp-server"], + } + } + }) + ) + + configs = tmp_path / "configs" + configs.mkdir() + # Pre-existing settings.json with an mcpServer already present. + claude_dir = configs / ".claude" + claude_dir.mkdir(parents=True) + (claude_dir / "settings.json").write_text( + json.dumps({ + "mcpServers": { + "firecrawl": { + "command": "npx", + "args": ["-y", "@firecrawl/firecrawl-mcp"], + } + } + }) + ) + + await MCPServerAdaptor("second-mcp", "claude_code").install(_make_ctx(configs, plugin)) + + settings = json.loads((claude_dir / "settings.json").read_text()) + assert "firecrawl" in settings["mcpServers"] + assert "github" in settings["mcpServers"] + + +async def test_mcp_server_adaptor_install_also_handles_hooks(tmp_path: Path): + """An MCPServer plugin can also ship PreToolUse/PostToolUse hooks via the + same settings-fragment.json; they must be merged without duplication.""" + plugin = tmp_path / "mcp-with-hooks" + plugin.mkdir() + (plugin / "hooks").mkdir(parents=True) + (plugin / "hooks" / "lint.sh").write_text("#!/bin/bash\necho ok\n") + (plugin / "hooks" / "lint.sh").chmod(0o755) + (plugin / "settings-fragment.json").write_text( + json.dumps({ + "mcpServers": { + "my-server": {"command": "npx", "args": ["-y", "@x/server"]} + }, + "hooks": { + "PreToolUse": [ + { + "matcher": "Bash", + "hooks": [{"type": "command", "command": "${CLAUDE_DIR}/hooks/lint.sh"}], + } + ] + }, + }) + ) + + configs = tmp_path / "configs" + configs.mkdir() + await MCPServerAdaptor("mcp-with-hooks", "claude_code").install(_make_ctx(configs, plugin)) + + settings = json.loads((configs / ".claude" / "settings.json").read_text()) + assert "my-server" in settings["mcpServers"] + assert len(settings["hooks"]["PreToolUse"]) == 1 + assert settings["hooks"]["PreToolUse"][0]["matcher"] == "Bash" + + +import json # noqa: E402 — also used in new tests above +