forked from molecule-ai/molecule-core
The runtime persists per-workspace state (`.auth_token`,
`.platform_inbound_secret`, `.mcp_inbox_cursor`) under `/configs` —
the workspace-EC2 mount path. Inside a container that's writable,
agent-owned. Outside a container, `/configs` either doesn't exist or
isn't writable by an unprivileged user.
The default broke the external-runtime path (`pip install
molecule-ai-workspace-runtime` + `molecule-mcp` on a Mac/Linux
laptop). First heartbeat tries to persist `.platform_inbound_secret`
and crashes:
[Errno 30] Read-only file system: '/configs'
The heartbeat thread logs and dies. Workspace flips offline within
a minute. Operator sees no actionable error.
Adds workspace/configs_dir.py — single resolution point with a tiered
fallback:
1. CONFIGS_DIR env var, if set — explicit operator override
(preserves existing tests + custom deployments verbatim).
2. /configs — if it exists AND is writable. In-container default;
unchanged behavior for every prod workspace.
3. ~/.molecule-workspace — created with mode 0700 so per-file 0600
perms aren't undermined by a world-readable parent.
Migrates the four readers (platform_auth, platform_inbound_auth,
mcp_cli, inbox) to call configs_dir.resolve() instead of
inlining `Path(os.environ.get("CONFIGS_DIR", "/configs"))`.
Existing tests that assert the old `/configs`-as-default contract
updated to assert the new contract: when CONFIGS_DIR is unset, path
resolves to a writable location — `/configs` if present, fallback
otherwise. Tests skip the fallback branch on hosts that DO have a
writable `/configs` (CI containers).
Verified the original repro is fixed: with no CONFIGS_DIR set on
macOS, configs_dir.resolve() returns ~/.molecule-workspace, the dir
exists, and writes succeed.
Test suite: 1454 passed, 3 skipped, 2 xfailed.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
146 lines
5.8 KiB
Python
146 lines
5.8 KiB
Python
"""Auth gate for the /internal/* Starlette routes.
|
|
|
|
The platform calls into the workspace's HTTP server using a per-workspace
|
|
shared secret minted at provision time and stored in
|
|
``/configs/.platform_inbound_secret`` (see migration 044 + RFC #2312).
|
|
The workspace validates by string-equality against the file content —
|
|
the platform side stores the same plaintext in ``workspaces
|
|
.platform_inbound_secret`` and reads it back on every forward call.
|
|
|
|
Asymmetric to ``platform_auth.py``:
|
|
|
|
platform_auth.py platform_inbound_auth.py
|
|
──────────────── ────────────────────────
|
|
workspace → platform platform → workspace
|
|
/configs/.auth_token /configs/.platform_inbound_secret
|
|
workspace presents bearer workspace validates bearer
|
|
|
|
Fail-closed semantics (mirrors transcript_auth.py): if the secret file is
|
|
missing, empty, or unreadable, every request is rejected. The platform
|
|
will surface this as a structural error rather than silently sending
|
|
unauthenticated requests through.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
from pathlib import Path
|
|
|
|
import configs_dir
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# In-process cache so we don't hit disk on every forward call. Same
|
|
# pattern as platform_auth._cached_token. The file is the durable copy;
|
|
# this var is the hot path.
|
|
_cached_secret: str | None = None
|
|
|
|
|
|
def _secret_file() -> Path:
|
|
"""Path to the on-disk inbound-secret file. Resolved via configs_dir
|
|
— /configs in-container, ~/.molecule-workspace for external-runtime
|
|
operators. Explicit CONFIGS_DIR env var wins."""
|
|
return configs_dir.resolve() / ".platform_inbound_secret"
|
|
|
|
|
|
def get_inbound_secret() -> str | None:
|
|
"""Return the cached inbound secret, reading from disk on first call.
|
|
|
|
Returns None if the file is missing, empty, or unreadable. Callers
|
|
MUST treat None as an auth failure (fail-closed) — never substitute
|
|
a default or skip-auth-on-missing semantics.
|
|
"""
|
|
global _cached_secret
|
|
if _cached_secret is not None:
|
|
return _cached_secret
|
|
path = _secret_file()
|
|
if not path.exists():
|
|
return None
|
|
try:
|
|
secret = path.read_text().strip()
|
|
except OSError as exc:
|
|
logger.warning("platform_inbound_auth: read %s failed: %s", path, exc)
|
|
return None
|
|
if not secret:
|
|
return None
|
|
_cached_secret = secret
|
|
return secret
|
|
|
|
|
|
def reset_cache() -> None:
|
|
"""Drop the in-process cache. Used by tests + the rare runtime-side
|
|
path that needs to re-read after the file is overwritten (e.g. a
|
|
rotation flow lands in the future)."""
|
|
global _cached_secret
|
|
_cached_secret = None
|
|
|
|
|
|
def save_inbound_secret(secret: str) -> None:
|
|
"""Persist a freshly-received platform_inbound_secret to disk.
|
|
|
|
Called from the /registry/register response handler when the platform
|
|
returns a `platform_inbound_secret` field. Mirrors platform_auth.save_token's
|
|
pattern: 0600 file in CONFIGS_DIR, atomic write via tmp + rename so a
|
|
concurrent reader never sees a partial file.
|
|
|
|
Idempotent: writing the same value over an existing file is a no-op
|
|
from the workspace's perspective. Resets the in-process cache so the
|
|
next get_inbound_secret() returns the freshly-written value (matters
|
|
when a future rotation flow lands and the platform sends a different
|
|
secret on a subsequent register call).
|
|
"""
|
|
global _cached_secret
|
|
if not secret:
|
|
return
|
|
path = _secret_file()
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
tmp = path.with_suffix(path.suffix + ".tmp")
|
|
try:
|
|
# Open with 0600 from the start so a concurrent reader can never
|
|
# see a 0644-default fd before the chmod. mode= is honored by
|
|
# os.open underneath; pathlib.write_text does not expose it.
|
|
fd = os.open(str(tmp), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
|
with os.fdopen(fd, "w") as f:
|
|
f.write(secret)
|
|
os.replace(str(tmp), str(path))
|
|
# Race-safe in-process cache update: clear first, then let next
|
|
# caller re-read disk. Avoids the "stored new, cache still has
|
|
# old" window if get_inbound_secret races with this write.
|
|
_cached_secret = None
|
|
except OSError as exc:
|
|
logger.warning("platform_inbound_auth: save %s failed: %s", path, exc)
|
|
# Best-effort cleanup of the tmp file.
|
|
try:
|
|
os.unlink(str(tmp))
|
|
except OSError as cleanup_exc:
|
|
logger.debug("platform_inbound_auth: unlink tmp %s failed: %s", tmp, cleanup_exc)
|
|
|
|
|
|
def inbound_authorized(expected_secret: str | None, auth_header: str) -> bool:
|
|
"""Return True iff a /internal/* request should be served.
|
|
|
|
Args:
|
|
expected_secret: the workspace's stored inbound secret, or None
|
|
if /configs/.platform_inbound_secret is absent / empty /
|
|
unreadable.
|
|
auth_header: raw Authorization request header value.
|
|
|
|
Behavior:
|
|
- None / empty expected → fail closed. A missing secret file
|
|
is an auth failure, not a bypass.
|
|
- Non-empty expected → strict string-equality against
|
|
"Bearer <secret>". Bearer prefix is case-sensitive (matches
|
|
the platform's wsauth.BearerTokenFromHeader contract).
|
|
|
|
Constant-time comparison is used to avoid leaking the secret one
|
|
byte at a time via timing analysis on a network-reachable endpoint.
|
|
"""
|
|
if not expected_secret:
|
|
return False
|
|
expected = f"Bearer {expected_secret}"
|
|
# hmac.compare_digest is the stdlib constant-time string compare.
|
|
# Length mismatch is documented to short-circuit safely (returns
|
|
# False without leaking length-difference timing).
|
|
import hmac
|
|
return hmac.compare_digest(auth_header, expected)
|