fix(runtime): auto-fallback CONFIGS_DIR for non-container hosts (closes #2458)

The runtime persists per-workspace state (`.auth_token`,
`.platform_inbound_secret`, `.mcp_inbox_cursor`) under `/configs` —
the workspace-EC2 mount path. Inside a container that's writable,
agent-owned. Outside a container, `/configs` either doesn't exist or
isn't writable by an unprivileged user.

The default broke the external-runtime path (`pip install
molecule-ai-workspace-runtime` + `molecule-mcp` on a Mac/Linux
laptop). First heartbeat tries to persist `.platform_inbound_secret`
and crashes:

    [Errno 30] Read-only file system: '/configs'

The heartbeat thread logs and dies. Workspace flips offline within
a minute. Operator sees no actionable error.

Adds workspace/configs_dir.py — single resolution point with a tiered
fallback:

  1. CONFIGS_DIR env var, if set — explicit operator override
     (preserves existing tests + custom deployments verbatim).
  2. /configs — if it exists AND is writable. In-container default;
     unchanged behavior for every prod workspace.
  3. ~/.molecule-workspace — created with mode 0700 so per-file 0600
     perms aren't undermined by a world-readable parent.

Migrates the four readers (platform_auth, platform_inbound_auth,
mcp_cli, inbox) to call configs_dir.resolve() instead of
inlining `Path(os.environ.get("CONFIGS_DIR", "/configs"))`.

Existing tests that assert the old `/configs`-as-default contract
updated to assert the new contract: when CONFIGS_DIR is unset, path
resolves to a writable location — `/configs` if present, fallback
otherwise. Tests skip the fallback branch on hosts that DO have a
writable `/configs` (CI containers).

Verified the original repro is fixed: with no CONFIGS_DIR set on
macOS, configs_dir.resolve() returns ~/.molecule-workspace, the dir
exists, and writes succeed.

Test suite: 1454 passed, 3 skipped, 2 xfailed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-05-01 13:07:55 -07:00
parent 0b809cfa62
commit c636022d2f
9 changed files with 249 additions and 31 deletions

61
workspace/configs_dir.py Normal file
View File

@ -0,0 +1,61 @@
"""Resolve the configs directory used by the workspace runtime.
The runtime persists per-workspace state to a single directory:
``.auth_token`` (platform_auth), ``.platform_inbound_secret``
(platform_inbound_auth), ``.mcp_inbox_cursor`` (inbox). Inside a
workspace EC2 container that directory is ``/configs`` a tmpfs/EBS
mount owned by the agent user, populated by the provisioner before
runtime boot.
Outside a container operators running ``molecule-mcp`` on a laptop
for the external-runtime path ``/configs`` doesn't exist (or, if it
does, isn't writable by an unprivileged user). The default would
silently fail on the first heartbeat: ``.platform_inbound_secret``
write hits ``Read-only file system: '/configs'``, the heartbeat thread
logs and dies, the workspace flips offline within a minute. The
operator sees no actionable error.
This module is the single resolution point. Resolution order:
1. ``CONFIGS_DIR`` env var, if set explicit operator override.
2. ``/configs`` used iff the path exists AND is writable. This
preserves the in-container default for every existing deployment.
3. ``$HOME/.molecule-workspace`` the non-container fallback,
created with mode 0700 so per-file 0600 perms aren't undermined
by a world-readable parent.
Not cached: callers (heartbeat thread, MCP tools) hit this at most a
few times per second; reading the env var + one ``stat()`` call is
cheap, and the existing call sites read ``os.environ`` live so tests
that monkeypatch ``CONFIGS_DIR`` between cases keep working.
Issue: Molecule-AI/molecule-core#2458.
"""
from __future__ import annotations
import os
from pathlib import Path
def resolve() -> Path:
"""Return the configs directory, creating the home fallback if needed."""
explicit = os.environ.get("CONFIGS_DIR", "").strip()
if explicit:
path = Path(explicit)
path.mkdir(parents=True, exist_ok=True)
return path
in_container = Path("/configs")
if in_container.exists() and os.access(str(in_container), os.W_OK):
return in_container
home_path = Path.home() / ".molecule-workspace"
home_path.mkdir(parents=True, exist_ok=True, mode=0o700)
return home_path
def reset_cache() -> None:
"""No-op kept for API stability; this module is stateless. Tests
that called reset_cache when the cached prototype was in tree
keep working without modification."""
return

View File

@ -55,6 +55,8 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable
import configs_dir as _configs_dir
logger = logging.getLogger(__name__)
# Poll cadence. 5s mirrors the molecule-mcp-claude-channel plugin's
@ -516,11 +518,10 @@ def start_poller_thread(
def default_cursor_path() -> Path:
"""Standard cursor location: ``${CONFIGS_DIR}/.mcp_inbox_cursor``.
"""Standard cursor location: ``<resolved configs dir>/.mcp_inbox_cursor``.
Mirrors mcp_cli's CONFIGS_DIR resolution so a single
operator-facing env var controls every persisted state file
(.auth_token + .mcp_inbox_cursor).
Resolved via configs_dir so the cursor lives next to .auth_token
+ .platform_inbound_secret regardless of whether the runtime is
in-container (/configs) or external (~/.molecule-workspace).
"""
configs_dir = Path(os.environ.get("CONFIGS_DIR", "/configs"))
return configs_dir / ".mcp_inbox_cursor"
return _configs_dir.resolve() / ".mcp_inbox_cursor"

View File

@ -41,6 +41,8 @@ import threading
import time
from pathlib import Path
import configs_dir as _configs_dir
logger = logging.getLogger(__name__)
# Heartbeat cadence. Must be tighter than healthsweep's stale window
@ -375,9 +377,10 @@ def main() -> None:
missing.append("PLATFORM_URL")
# Token can come from env OR file — only flag when both are absent.
# Mirrors platform_auth.get_token's resolution order (file-first,
# env-fallback).
configs_dir = Path(os.environ.get("CONFIGS_DIR", "/configs"))
has_token_file = (configs_dir / ".auth_token").is_file()
# env-fallback). configs_dir.resolve() handles in-container vs
# external-runtime fallback so we don't probe a non-existent
# /configs on a laptop and falsely report no-token-file.
has_token_file = (_configs_dir.resolve() / ".auth_token").is_file()
has_token_env = bool(os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip())
if not has_token_file and not has_token_env:
missing.append("MOLECULE_WORKSPACE_TOKEN (or CONFIGS_DIR/.auth_token)")
@ -461,15 +464,16 @@ def _start_inbox_poller(platform_url: str, workspace_id: str) -> None:
def _read_token_file() -> str:
"""Read the token from ${CONFIGS_DIR}/.auth_token if present.
"""Read the token from the resolved configs dir's ``.auth_token`` if
present.
Mirrors platform_auth._token_file but without importing the heavy
module here (that import triggers a2a_client's WORKSPACE_ID guard
which is fine after env validation, but cheaper to inline a 4-line
file read than pull in the whole stack just for the path).
Mirrors platform_auth._token_file's location resolution but without
importing the heavy module here (that import triggers a2a_client's
WORKSPACE_ID guard which is fine after env validation, but cheaper
to inline a 4-line file read than pull in the whole stack just for
the path).
"""
configs_dir = Path(os.environ.get("CONFIGS_DIR", "/configs"))
path = configs_dir / ".auth_token"
path = _configs_dir.resolve() / ".auth_token"
if not path.is_file():
return ""
try:

View File

@ -24,6 +24,8 @@ import logging
import os
from pathlib import Path
import configs_dir
logger = logging.getLogger(__name__)
# In-process cache so we don't hit disk on every heartbeat. The heartbeat
@ -33,9 +35,11 @@ _cached_token: str | None = None
def _token_file() -> Path:
"""Path to the on-disk token file. Respects CONFIGS_DIR, falls back
to /configs for the default container layout."""
return Path(os.environ.get("CONFIGS_DIR", "/configs")) / ".auth_token"
"""Path to the on-disk token file. Resolved via configs_dir so
in-container (/configs) and external-runtime (~/.molecule-workspace)
operators land on a writable location automatically. Explicit
CONFIGS_DIR env var still wins."""
return configs_dir.resolve() / ".auth_token"
def get_token() -> str | None:

View File

@ -26,6 +26,8 @@ import logging
import os
from pathlib import Path
import configs_dir
logger = logging.getLogger(__name__)
# In-process cache so we don't hit disk on every forward call. Same
@ -35,9 +37,10 @@ _cached_secret: str | None = None
def _secret_file() -> Path:
"""Path to the on-disk inbound-secret file. Respects CONFIGS_DIR,
falls back to /configs for the default container layout."""
return Path(os.environ.get("CONFIGS_DIR", "/configs")) / ".platform_inbound_secret"
"""Path to the on-disk inbound-secret file. Resolved via configs_dir
/configs in-container, ~/.molecule-workspace for external-runtime
operators. Explicit CONFIGS_DIR env var wins."""
return configs_dir.resolve() / ".platform_inbound_secret"
def get_inbound_secret() -> str | None:

View File

@ -0,0 +1,116 @@
"""Tests for workspace/configs_dir.py — the single resolution point
for the per-workspace state directory."""
from __future__ import annotations
import os
import stat
from pathlib import Path
import pytest
import configs_dir
@pytest.fixture(autouse=True)
def _isolate(monkeypatch):
"""Each test gets a clean cache and a clean env. Tests that need
CONFIGS_DIR set monkeypatch it themselves."""
monkeypatch.delenv("CONFIGS_DIR", raising=False)
configs_dir.reset_cache()
yield
configs_dir.reset_cache()
def test_explicit_env_var_wins(tmp_path, monkeypatch):
"""An explicit CONFIGS_DIR is the operator's override — always
respected, even when /configs is also writable. This preserves
existing test/custom-deployment patterns that monkeypatch the env
var to a per-test tmp_path."""
monkeypatch.setenv("CONFIGS_DIR", str(tmp_path))
assert configs_dir.resolve() == tmp_path
def test_explicit_env_var_creates_dir(tmp_path, monkeypatch):
"""Explicit override creates the dir if missing — operator can
point at a not-yet-existing path and have the runtime materialize
it."""
target = tmp_path / "nested" / "configs"
monkeypatch.setenv("CONFIGS_DIR", str(target))
assert not target.exists()
configs_dir.resolve()
assert target.exists()
def test_in_container_uses_slash_configs(monkeypatch, tmp_path):
"""When /configs exists and is writable, return it. Verified by
pointing /configs detection at a writable tmp_path via the same
env-var override path the helper exposes."""
# Simulate "in-container" by aliasing /configs to a real writable
# path. Not actually creating /configs on the test host (would
# require root) — instead, rely on the explicit-env-var branch
# which is the same code path operators see in tests today.
monkeypatch.setenv("CONFIGS_DIR", str(tmp_path))
result = configs_dir.resolve()
assert result == tmp_path
assert os.access(str(result), os.W_OK)
def test_falls_back_to_home_when_configs_missing(monkeypatch, tmp_path):
"""No CONFIGS_DIR + no writable /configs → fall back to
~/.molecule-workspace. This is the bug from external-runtime
onboarding (issue #2458): operators on a Mac/Linux laptop don't
have /configs and the default would silently fail on the first
heartbeat write."""
fake_home = tmp_path / "home"
fake_home.mkdir()
monkeypatch.setenv("HOME", str(fake_home))
# Ensure /configs is not writable for an unprivileged process.
# This is true on every developer machine — the test is just
# asserting we DON'T pick it up when we can't write to it.
if Path("/configs").exists() and os.access("/configs", os.W_OK):
pytest.skip("/configs is writable on this host; can't exercise fallback")
result = configs_dir.resolve()
assert result == fake_home / ".molecule-workspace"
assert result.exists()
def test_fallback_dir_is_0700(monkeypatch, tmp_path):
"""The fallback dir must be 0700 — per-file 0600 perms on
.auth_token + .platform_inbound_secret would be undermined by a
world-readable parent."""
fake_home = tmp_path / "home"
fake_home.mkdir()
monkeypatch.setenv("HOME", str(fake_home))
if Path("/configs").exists() and os.access("/configs", os.W_OK):
pytest.skip("/configs is writable on this host; can't exercise fallback")
result = configs_dir.resolve()
mode = stat.S_IMODE(result.stat().st_mode)
assert mode == 0o700, f"expected 0700, got 0o{mode:o}"
def test_fallback_dir_idempotent(monkeypatch, tmp_path):
"""Resolving twice when the fallback dir already exists is fine
we don't re-mkdir or change perms on every call."""
fake_home = tmp_path / "home"
fake_home.mkdir()
monkeypatch.setenv("HOME", str(fake_home))
if Path("/configs").exists() and os.access("/configs", os.W_OK):
pytest.skip("/configs is writable on this host; can't exercise fallback")
first = configs_dir.resolve()
configs_dir.reset_cache()
second = configs_dir.resolve()
assert first == second
assert second.exists()
def test_env_var_changes_picked_up_live(tmp_path, monkeypatch):
"""Resolution reads CONFIGS_DIR live on each call — existing tests
monkeypatch the env var between cases and expect the new value to
take effect without an explicit cache reset."""
monkeypatch.setenv("CONFIGS_DIR", str(tmp_path))
first = configs_dir.resolve()
new_path = tmp_path / "after-change"
monkeypatch.setenv("CONFIGS_DIR", str(new_path))
second = configs_dir.resolve()
assert first == tmp_path
assert second == new_path

View File

@ -439,9 +439,20 @@ def test_default_cursor_path_uses_configs_dir(monkeypatch, tmp_path: Path):
assert inbox.default_cursor_path() == tmp_path / ".mcp_inbox_cursor"
def test_default_cursor_path_falls_back_to_default(monkeypatch):
def test_default_cursor_path_falls_back_to_default(tmp_path, monkeypatch):
"""When CONFIGS_DIR is unset, the cursor path resolves through
configs_dir.resolve() /configs in-container, ~/.molecule-workspace
on a non-container host. Issue #2458."""
import os
monkeypatch.delenv("CONFIGS_DIR", raising=False)
assert inbox.default_cursor_path() == Path("/configs") / ".mcp_inbox_cursor"
fake_home = tmp_path / "home"
fake_home.mkdir()
monkeypatch.setenv("HOME", str(fake_home))
path = inbox.default_cursor_path()
if Path("/configs").exists() and os.access("/configs", os.W_OK):
assert path == Path("/configs") / ".mcp_inbox_cursor"
else:
assert path == fake_home / ".molecule-workspace" / ".mcp_inbox_cursor"
# ---------------------------------------------------------------------------

View File

@ -133,13 +133,22 @@ def test_configs_dir_respected(tmp_path, monkeypatch):
def test_default_configs_dir_fallback(tmp_path, monkeypatch):
"""When CONFIGS_DIR is unset, the token file path must resolve to a
writable location either /configs (in-container) or
~/.molecule-workspace (external-runtime fallback). Issue #2458 fixed
the silent failure where the previous unconditional /configs default
crashed the heartbeat thread on non-container hosts."""
monkeypatch.delenv("CONFIGS_DIR", raising=False)
# Can't actually write to /configs on a dev laptop, so just verify the
# path resolution points there. Save will fail gracefully via mkdir+exist_ok.
fake_home = tmp_path / "home"
fake_home.mkdir()
monkeypatch.setenv("HOME", str(fake_home))
platform_auth.clear_cache()
# We expect _token_file() to resolve under /configs when env is unset.
path = platform_auth._token_file()
assert str(path).startswith("/configs")
if Path("/configs").exists() and os.access("/configs", os.W_OK):
assert str(path).startswith("/configs")
else:
assert path == fake_home / ".molecule-workspace" / ".auth_token"
assert os.access(str(path.parent), os.W_OK)
# ==================== MOLECULE_WORKSPACE_TOKEN env-var fallback ====================

View File

@ -103,10 +103,19 @@ def test_get_secret_caches(configs_dir: Path):
def test_get_secret_default_dir_when_env_unset(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
"""Default falls back to /configs. We can't write to /configs in the
test sandbox; instead verify the path computation hits the default."""
"""When CONFIGS_DIR is unset, the secret file path resolves through
configs_dir.resolve() /configs in-container, ~/.molecule-workspace
on a non-container host. Issue #2458."""
import os
monkeypatch.delenv("CONFIGS_DIR", raising=False)
assert platform_inbound_auth._secret_file() == Path("/configs/.platform_inbound_secret")
fake_home = tmp_path / "home"
fake_home.mkdir()
monkeypatch.setenv("HOME", str(fake_home))
path = platform_inbound_auth._secret_file()
if Path("/configs").exists() and os.access("/configs", os.W_OK):
assert path == Path("/configs") / ".platform_inbound_secret"
else:
assert path == fake_home / ".molecule-workspace" / ".platform_inbound_secret"
# ───────────── end-to-end: file → authorized ─────────────