Merge branch 'staging' into test/2026-04-23-regression-suite

This commit is contained in:
molecule-ai[bot] 2026-04-24 01:55:06 +00:00 committed by GitHub
commit b1dce3405c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 410 additions and 3 deletions

View File

@ -402,6 +402,17 @@ func (h *WorkspaceHandler) issueAndInjectToken(ctx context.Context, workspaceID
cfg.ConfigFiles = make(map[string][]byte)
}
cfg.ConfigFiles[".auth_token"] = []byte(token)
// Option B (issue #1877): write token to volume BEFORE ContainerStart.
// Pre-write eliminates the race window where a restarted container could
// read a stale /configs/.auth_token before WriteFilesToContainer runs.
// This call is best-effort — if it fails (or provisioner is nil in tests)
// we still log and fall through; the runtime's heartbeat.py will retry
// on 401 if needed.
if h.provisioner != nil {
if writeErr := h.provisioner.WriteAuthTokenToVolume(ctx, workspaceID, token); writeErr != nil {
log.Printf("Provisioner: warning — pre-write token to volume failed for %s: %v (token still injected via WriteFilesToContainer after start)", workspaceID, writeErr)
}
}
log.Printf("Provisioner: injected fresh auth token for workspace %s into config volume", workspaceID)
}

View File

@ -749,6 +749,41 @@ func (p *Provisioner) ReadFromVolume(ctx context.Context, volumeName, filePath s
return clean, nil
}
// WriteAuthTokenToVolume writes the workspace auth token into the config volume
// BEFORE the container starts, eliminating the token-injection race window where
// a restarted container could read a stale token from /configs/.auth_token before
// WriteFilesToContainer writes the new one. Issue #1877.
//
// Uses a throwaway alpine container to write directly to the named volume,
// bypassing the container lifecycle entirely.
func (p *Provisioner) WriteAuthTokenToVolume(ctx context.Context, workspaceID, token string) error {
volName := ConfigVolumeName(workspaceID)
resp, err := p.cli.ContainerCreate(ctx, &container.Config{
Image: "alpine",
Cmd: []string{"sh", "-c", "mkdir -p /vol && printf '%s' $TOKEN > /vol/.auth_token && chmod 0600 /vol/.auth_token"},
Env: []string{"TOKEN=" + token},
}, &container.HostConfig{
Binds: []string{volName + ":/vol"},
}, nil, nil, "")
if err != nil {
return fmt.Errorf("failed to create token-write container: %w", err)
}
defer p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
if err := p.cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil {
return fmt.Errorf("failed to start token-write container: %w", err)
}
waitCh, errCh := p.cli.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning)
select {
case <-waitCh:
case writeErr := <-errCh:
if writeErr != nil {
return fmt.Errorf("token-write container exited with error: %w", writeErr)
}
}
log.Printf("Provisioner: wrote auth token to volume %s/.auth_token", volName)
return nil
}
// execInContainer runs a command inside a running container as root.
// Best-effort: logs errors but does not fail the caller.
func (p *Provisioner) execInContainer(ctx context.Context, containerID string, cmd []string) {

View File

@ -17,7 +17,7 @@ from pathlib import Path
import httpx
from platform_auth import auth_headers
from platform_auth import auth_headers, refresh_cache
logger = logging.getLogger(__name__)
@ -102,6 +102,35 @@ class HeartbeatLoop:
self._consecutive_failures = 0
except Exception as e:
self._consecutive_failures += 1
# Issue #1877: if heartbeat 401'd, re-read the token from disk
# and retry once. This handles the platform's token-rotation race
# where WriteFilesToContainer hasn't finished writing the new
# token before the runtime boots and caches the old value.
is_401 = False
if isinstance(e, httpx.HTTPStatusError) and e.response.status_code == 401:
is_401 = True
if is_401:
logger.warning("Heartbeat 401 for %s — refreshing token cache and retrying once", self.workspace_id)
refresh_cache()
try:
await client.post(
f"{self.platform_url}/registry/heartbeat",
json={
"workspace_id": self.workspace_id,
"error_rate": self.error_rate,
"sample_error": self.sample_error,
"active_tasks": self.active_tasks,
"current_task": self.current_task,
"uptime_seconds": int(time.time() - self.start_time),
},
headers=auth_headers(),
)
self._consecutive_failures = 0
self.request_count += 1
except Exception:
# Retry also failed — fall through to the normal
# failure tracking below.
pass
if self._consecutive_failures <= 3 or self._consecutive_failures % MAX_CONSECUTIVE_FAILURES == 0:
logger.warning("Heartbeat failed (%d consecutive): %s", self._consecutive_failures, e)
if self._consecutive_failures >= MAX_CONSECUTIVE_FAILURES:

View File

@ -103,3 +103,14 @@ def clear_cache() -> None:
files between cases."""
global _cached_token
_cached_token = None
def refresh_cache() -> str | None:
"""Force re-read of the token from disk, discarding the in-process cache.
Use this when a 401 response suggests the cached token is stale
e.g. after the platform rotates tokens during a restart (issue #1877).
Returns the (new) token value or None if not found/error."""
global _cached_token
_cached_token = None
return get_token()

View File

@ -24,7 +24,7 @@ Planned as the ecosystem matures (none are implemented yet — rule of
three: promote a class here only after 3+ plugins ship the same custom
shape via their own ``adapters/<runtime>.py``):
* ``MCPServerAdaptor`` install a plugin as an MCP server *(TODO)*
* :class:`MCPServerAdaptor` install a plugin as an MCP server (issue #847)
* ``DeepAgentsSubagentAdaptor`` register a DeepAgents sub-agent
(runtime-locked to deepagents) *(TODO)*
* ``LangGraphSubgraphAdaptor`` install a LangGraph sub-graph *(TODO)*
@ -339,5 +339,95 @@ def _deep_merge_hooks(existing: dict, fragment: dict) -> dict:
for top_key, val in fragment.items():
if top_key == "hooks":
continue
out.setdefault(top_key, val)
# mcpServers must be deep-merged: plugin A ships "firecrawl" and
# plugin B ships "github" → both entries land in settings.json.
# Using setdefault would skip the fragment's value when the key
# already exists, so we explicitly handle the dict case.
if top_key in out and isinstance(out[top_key], dict) and isinstance(val, dict):
out[top_key] = {**out[top_key], **val}
else:
out.setdefault(top_key, val)
return out
# ----------------------------------------------------------------------
# MCPServerAdaptor — issue #847.
# Promoted from custom adapters after 4 plugin proposals (molecule-firecrawl
# #512, molecule-github-mcp #520, molecule-browser-use #553, mcp-connector
# #573) all shipped the same pattern independently.
# ----------------------------------------------------------------------
class MCPServerAdaptor:
"""Sub-type adaptor for plugins that wrap an MCP server.
The plugin ships:
* ``settings-fragment.json`` with an ``mcpServers`` block standard
Claude Code ``claude_desktop_config`` format, e.g.:
.. code-block:: json
{
"mcpServers": {
"my-server": {
"command": "npx",
"args": ["-y", "@org/my-mcp-server"]
}
}
}
* ``skills/<name>/SKILL.md`` (optional) agentskills.io skill docs;
``AgentskillsAdaptor`` logic handles these.
* ``rules/*.md`` (optional) always-on prose appended to CLAUDE.md;
``AgentskillsAdaptor`` logic handles these.
* ``setup.sh`` (optional) install npm packages, build binaries, etc.;
``AgentskillsAdaptor`` logic handles these.
On ``install()``:
1. ``settings-fragment.json`` ``_install_claude_layer()`` merges the
``mcpServers`` block into ``<configs>/.claude/settings.json``.
Hooks are also merged via the same path (so MCP-server plugins
can also ship hooks if they need them).
2. Skills + rules + setup.sh delegated to ``AgentskillsAdaptor``.
On ``uninstall()``:
1. Skills + rules delegated to ``AgentskillsAdaptor.uninstall()``.
2. ``mcpServers`` entries are intentionally **not** removed from
``settings.json`` on uninstall. MCP server configurations are
often shared with other tools or manually curated, so removing
them could break a user's setup. The user must remove them
manually if desired.
Usage in the plugin's per-runtime adapter file:
.. code-block:: python
# plugins/<name>/adapters/claude_code.py
from plugins_registry.builtins import MCPServerAdaptor as Adaptor
"""
def __init__(self, plugin_name: str, runtime: str) -> None:
self.plugin_name = plugin_name
self.runtime = runtime
async def install(self, ctx: InstallContext) -> InstallResult:
result = InstallResult(
plugin_name=self.plugin_name,
runtime=self.runtime,
source="plugin",
)
# 1. Merge mcpServers (and any hooks) from settings-fragment.json.
_install_claude_layer(ctx, result, self.plugin_name)
# 2. Skills + rules + setup.sh — reuse AgentskillsAdaptor logic.
sub = await AgentskillsAdaptor(self.plugin_name, self.runtime).install(ctx)
result.files_written.extend(sub.files_written)
result.warnings.extend(sub.warnings)
return result
async def uninstall(self, ctx: InstallContext) -> None:
# Delegate to AgentskillsAdaptor for skills + rules cleanup.
# NOTE: mcpServers entries are intentionally NOT removed (see class docstring).
await AgentskillsAdaptor(self.plugin_name, self.runtime).uninstall(ctx)

View File

@ -481,3 +481,234 @@ def test_deep_merge_hooks_top_level_keys_merged():
# setdefault semantics: existing keys win, new keys are added
assert result["someKey"] == "old"
assert result["anotherKey"] == "value"
def test_deep_merge_hooks_mcpServers_deep_merged():
"""mcpServers dicts from two plugins must be merged, not replaced.
Plugin A ships firecrawl, plugin B ships github both land in the
final settings.json (issue #847 motivation).
"""
existing = {
"mcpServers": {
"firecrawl": {
"command": "npx",
"args": ["-y", "@org/firecrawl-mcp"],
}
}
}
fragment = {
"mcpServers": {
"github": {
"command": "npx",
"args": ["-y", "@github/github-mcp-server"],
}
},
"hooks": {},
}
result = _deep_merge_hooks(existing, fragment)
assert "firecrawl" in result["mcpServers"]
assert "github" in result["mcpServers"]
# existing entries must not be overwritten
assert result["mcpServers"]["firecrawl"]["command"] == "npx"
def test_deep_merge_hooks_mcpServers_idempotent():
"""Re-merging the same mcpServers fragment must not duplicate entries."""
fragment = {
"mcpServers": {
"firecrawl": {"command": "npx", "args": ["-y", "@org/firecrawl-mcp"]}
},
"hooks": {},
}
state = _deep_merge_hooks({}, fragment)
state = _deep_merge_hooks(state, fragment)
state = _deep_merge_hooks(state, fragment)
assert len(state["mcpServers"]) == 1
def test_deep_merge_hooks_mcpServers_three_plugins():
"""Three plugins each contributing one mcpServer all land in final output."""
state = {}
for name in ["firecrawl", "github", "browser-use"]:
fragment = {
"mcpServers": {name: {"command": "npx", "args": [f"-y @{name}"]}},
"hooks": {},
}
state = _deep_merge_hooks(state, fragment)
assert set(state["mcpServers"].keys()) == {"firecrawl", "github", "browser-use"}
# ---------------------------------------------------------------------------
# MCPServerAdaptor tests — issue #847
# ---------------------------------------------------------------------------
from plugins_registry.builtins import MCPServerAdaptor # noqa: E402
async def test_mcp_server_adaptor_install_writes_mcpServers(tmp_path: Path):
"""install() must merge mcpServers from settings-fragment.json into settings.json."""
plugin = tmp_path / "my-mcp-plugin"
plugin.mkdir()
(plugin / "settings-fragment.json").write_text(
json.dumps({
"mcpServers": {
"my-server": {
"command": "npx",
"args": ["-y", "@org/my-mcp-server"],
}
}
})
)
# Also add a skill so we can verify AgentskillsAdaptor delegation.
(plugin / "skills" / "docs").mkdir(parents=True)
(plugin / "skills" / "docs" / "SKILL.md").write_text("# docs skill\n")
configs = tmp_path / "configs"
configs.mkdir()
result = await MCPServerAdaptor("my-mcp-plugin", "claude_code").install(
_make_ctx(configs, plugin)
)
settings = json.loads((configs / ".claude" / "settings.json").read_text())
assert "mcpServers" in settings
assert "my-server" in settings["mcpServers"]
assert settings["mcpServers"]["my-server"]["command"] == "npx"
# Skills were also installed (AgentskillsAdaptor delegation).
assert (configs / "skills" / "docs" / "SKILL.md").exists()
assert ".claude/settings.json" in result.files_written
async def test_mcp_server_adaptor_install_no_fragment_no_warning(tmp_path: Path):
"""Plugin without settings-fragment.json must install silently (no settings.json created)."""
plugin = tmp_path / "bare-mcp"
plugin.mkdir()
configs = tmp_path / "configs"
configs.mkdir()
result = await MCPServerAdaptor("bare-mcp", "claude_code").install(
_make_ctx(configs, plugin)
)
# _install_claude_layer creates .claude dir, but no settings.json when
# there's no settings-fragment.json.
assert not (configs / ".claude" / "settings.json").exists()
assert result.warnings == []
async def test_mcp_server_adaptor_uninstall_does_not_remove_mcpServers(tmp_path: Path):
"""uninstall() must remove skills/rules but leave mcpServers in settings.json.
Rationale: MCP server configs are often shared or manually curated;
removing them on plugin uninstall could break the user's environment.
"""
plugin = tmp_path / "my-mcp-plugin"
plugin.mkdir()
(plugin / "settings-fragment.json").write_text(
json.dumps({
"mcpServers": {
"my-server": {
"command": "npx",
"args": ["-y", "@org/my-mcp-server"],
}
}
})
)
(plugin / "rules").mkdir(parents=True)
(plugin / "rules" / "r.md").write_text("- my rule\n")
(plugin / "skills" / "s").mkdir(parents=True)
(plugin / "skills" / "s" / "SKILL.md").write_text("# skill\n")
configs = tmp_path / "configs"
configs.mkdir()
adaptor = MCPServerAdaptor("my-mcp-plugin", "claude_code")
await adaptor.install(_make_ctx(configs, plugin))
assert (configs / "skills" / "s").exists()
assert "my-server" in json.loads((configs / ".claude" / "settings.json").read_text()).get("mcpServers", {})
await adaptor.uninstall(_make_ctx(configs, plugin))
# Skills and rules removed by AgentskillsAdaptor delegation.
assert not (configs / "skills" / "s").exists()
assert not (configs / "CLAUDE.md").exists() or "# Plugin: my-mcp-plugin" not in (configs / "CLAUDE.md").read_text()
# mcpServers intentionally kept.
settings = json.loads((configs / ".claude" / "settings.json").read_text())
assert "mcpServers" in settings
assert "my-server" in settings["mcpServers"]
async def test_mcp_server_adaptor_install_merges_with_existing_settings(tmp_path: Path):
"""install() must deep-merge mcpServers with an already-populated settings.json."""
plugin = tmp_path / "second-mcp"
plugin.mkdir()
(plugin / "settings-fragment.json").write_text(
json.dumps({
"mcpServers": {
"github": {
"command": "npx",
"args": ["-y", "@github/github-mcp-server"],
}
}
})
)
configs = tmp_path / "configs"
configs.mkdir()
# Pre-existing settings.json with an mcpServer already present.
claude_dir = configs / ".claude"
claude_dir.mkdir(parents=True)
(claude_dir / "settings.json").write_text(
json.dumps({
"mcpServers": {
"firecrawl": {
"command": "npx",
"args": ["-y", "@firecrawl/firecrawl-mcp"],
}
}
})
)
await MCPServerAdaptor("second-mcp", "claude_code").install(_make_ctx(configs, plugin))
settings = json.loads((claude_dir / "settings.json").read_text())
assert "firecrawl" in settings["mcpServers"]
assert "github" in settings["mcpServers"]
async def test_mcp_server_adaptor_install_also_handles_hooks(tmp_path: Path):
"""An MCPServer plugin can also ship PreToolUse/PostToolUse hooks via the
same settings-fragment.json; they must be merged without duplication."""
plugin = tmp_path / "mcp-with-hooks"
plugin.mkdir()
(plugin / "hooks").mkdir(parents=True)
(plugin / "hooks" / "lint.sh").write_text("#!/bin/bash\necho ok\n")
(plugin / "hooks" / "lint.sh").chmod(0o755)
(plugin / "settings-fragment.json").write_text(
json.dumps({
"mcpServers": {
"my-server": {"command": "npx", "args": ["-y", "@x/server"]}
},
"hooks": {
"PreToolUse": [
{
"matcher": "Bash",
"hooks": [{"type": "command", "command": "${CLAUDE_DIR}/hooks/lint.sh"}],
}
]
},
})
)
configs = tmp_path / "configs"
configs.mkdir()
await MCPServerAdaptor("mcp-with-hooks", "claude_code").install(_make_ctx(configs, plugin))
settings = json.loads((configs / ".claude" / "settings.json").read_text())
assert "my-server" in settings["mcpServers"]
assert len(settings["hooks"]["PreToolUse"]) == 1
assert settings["hooks"]["PreToolUse"][0]["matcher"] == "Bash"
import json # noqa: E402 — also used in new tests above