Renames: - platform/ → workspace-server/ (Go module path stays as "platform" for external dep compat — will update after plugin module republish) - workspace-template/ → workspace/ Removed (moved to separate repos or deleted): - PLAN.md — internal roadmap (move to private project board) - HANDOFF.md, AGENTS.md — one-time internal session docs - .claude/ — gitignored entirely (local agent config) - infra/cloudflare-worker/ → Molecule-AI/molecule-tenant-proxy - org-templates/molecule-dev/ → standalone template repo - .mcp-eval/ → molecule-mcp-server repo - test-results/ — ephemeral, gitignored Security scrubbing: - Cloudflare account/zone/KV IDs → placeholders - Real EC2 IPs → <EC2_IP> in all docs - CF token prefix, Neon project ID, Fly app names → redacted - Langfuse dev credentials → parameterized - Personal runner username/machine name → generic Community files: - CONTRIBUTING.md — build, test, branch conventions - CODE_OF_CONDUCT.md — Contributor Covenant 2.1 All Dockerfiles, CI workflows, docker-compose, railway.toml, render.yaml, README, CLAUDE.md updated for new directory names. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
227 lines
7.0 KiB
Python
227 lines
7.0 KiB
Python
"""Allowlist-based environment sanitization for smolagents (#826 — C3 CRITICAL).
|
||
|
||
Security model
|
||
--------------
|
||
We use an **allowlist** (not a denylist) — only variables explicitly
|
||
enumerated as safe are passed through to agent-executed code. Any key not
|
||
on the list is silently dropped.
|
||
|
||
This is intentionally strict: adding a new safe variable is a deliberate
|
||
engineering act that surfaces in code review, rather than hoping a regex
|
||
denylist catches every new secret name.
|
||
|
||
Thread safety
|
||
-------------
|
||
``SafeLocalPythonExecutor.__call__`` mutates ``os.environ`` temporarily.
|
||
``_ENV_PATCH_LOCK`` serialises concurrent calls so simultaneous executions
|
||
do not see each other's env patches.
|
||
|
||
Extending the allowlist
|
||
-----------------------
|
||
Set ``SMOLAGENTS_ENV_EXTRA_ALLOWLIST`` to a comma-separated list of
|
||
additional uppercase env var names that should be passed through. This is
|
||
intended for workspace-specific non-secret variables (e.g. ``WORKSPACE_ID``
|
||
that you know are safe):
|
||
|
||
SMOLAGENTS_ENV_EXTRA_ALLOWLIST="MY_COMPANY_ENV,REGION"
|
||
|
||
Never add secret names here — use workspace secrets injection instead.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
import threading
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Allowlist configuration
|
||
# ---------------------------------------------------------------------------
|
||
|
||
# Core safe env variables — non-secret system and runtime variables that
|
||
# agent code may legitimately need (e.g. PATH for subprocess-free tools,
|
||
# PYTHONPATH for module resolution, TZ for datetime ops).
|
||
_SAFE_ENV_ALLOWLIST: frozenset = frozenset(
|
||
[
|
||
# Shell / system fundamentals
|
||
"PATH",
|
||
"HOME",
|
||
"USER",
|
||
"LOGNAME",
|
||
"SHELL",
|
||
"TERM",
|
||
"TZ",
|
||
"TMPDIR",
|
||
"TEMP",
|
||
"TMP",
|
||
# Language / locale
|
||
"LANG",
|
||
"LANGUAGE",
|
||
"LC_ALL",
|
||
"LC_CTYPE",
|
||
"LC_MESSAGES",
|
||
"LC_NUMERIC",
|
||
"LC_TIME",
|
||
# Python runtime
|
||
"PYTHONPATH",
|
||
"PYTHONHOME",
|
||
"PYTHONDONTWRITEBYTECODE",
|
||
"PYTHONUNBUFFERED",
|
||
"PYTHONIOENCODING",
|
||
# Molecule workspace non-secret identity vars
|
||
"WORKSPACE_ID",
|
||
"WORKSPACE_NAME",
|
||
"PLATFORM_URL",
|
||
]
|
||
)
|
||
|
||
# Imports permanently excluded from the executor's authorized list.
|
||
# These are well-known sandbox-escape vectors.
|
||
_BANNED_IMPORTS: frozenset = frozenset(
|
||
["subprocess", "socket", "ctypes", "importlib", "importlib.util"]
|
||
)
|
||
|
||
# Baseline imports every SafeLocalPythonExecutor allows — pure-computation
|
||
# modules with no I/O escape surface.
|
||
_BASELINE_SAFE_IMPORTS: List[str] = [
|
||
"math",
|
||
"json",
|
||
"re",
|
||
"datetime",
|
||
"collections",
|
||
"itertools",
|
||
"functools",
|
||
"typing",
|
||
"string",
|
||
"textwrap",
|
||
"decimal",
|
||
"fractions",
|
||
"statistics",
|
||
"random",
|
||
"hashlib",
|
||
"base64",
|
||
"urllib.parse",
|
||
"copy",
|
||
"dataclasses",
|
||
"enum",
|
||
"abc",
|
||
"io",
|
||
]
|
||
|
||
# Thread lock for env patching
|
||
_ENV_PATCH_LOCK = threading.Lock()
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Public API
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def make_safe_env(
|
||
extra_allowed: Optional[List[str]] = None,
|
||
) -> Dict[str, str]:
|
||
"""Return a *copy* of the environment containing only allowlisted keys.
|
||
|
||
``os.environ`` is **never mutated** by this function.
|
||
|
||
Parameters
|
||
----------
|
||
extra_allowed:
|
||
Additional variable names to include beyond the built-in allowlist.
|
||
Also merged with the ``SMOLAGENTS_ENV_EXTRA_ALLOWLIST`` env var.
|
||
|
||
Returns
|
||
-------
|
||
dict
|
||
A copy of ``os.environ`` filtered to allowlisted keys only.
|
||
Keys not on the list are silently dropped.
|
||
"""
|
||
allowed = set(_SAFE_ENV_ALLOWLIST)
|
||
|
||
# Merge caller-provided extras
|
||
if extra_allowed:
|
||
allowed.update(k.upper() for k in extra_allowed)
|
||
|
||
# Merge env-var-configured extras
|
||
env_extra = os.environ.get("SMOLAGENTS_ENV_EXTRA_ALLOWLIST", "")
|
||
if env_extra:
|
||
for key in env_extra.split(","):
|
||
key = key.strip().upper()
|
||
if key:
|
||
allowed.add(key)
|
||
|
||
return {k: v for k, v in os.environ.items() if k in allowed}
|
||
|
||
|
||
class SafeLocalPythonExecutor:
|
||
"""Allowlist-gated wrapper around smolagents ``LocalPythonExecutor``.
|
||
|
||
Guarantees that agent-generated code cannot read secret environment
|
||
variables (``ANTHROPIC_API_KEY``, ``GH_TOKEN``, ``DATABASE_URL``, etc.)
|
||
because they are absent from ``os.environ`` during execution.
|
||
|
||
Parameters
|
||
----------
|
||
additional_imports:
|
||
Extra module names to allow beyond ``_BASELINE_SAFE_IMPORTS``.
|
||
``_BANNED_IMPORTS`` takes precedence — listed names are silently
|
||
removed.
|
||
extra_allowed_env:
|
||
Extra variable names to pass through beyond the core allowlist.
|
||
_inner:
|
||
Inject a mock ``LocalPythonExecutor`` for tests. When ``None``,
|
||
the real smolagents executor is constructed lazily.
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
additional_imports: Optional[List[str]] = None,
|
||
extra_allowed_env: Optional[List[str]] = None,
|
||
*,
|
||
_inner: Any = None,
|
||
) -> None:
|
||
# Compute final import list (baseline + extras − banned)
|
||
combined = list(_BASELINE_SAFE_IMPORTS)
|
||
if additional_imports:
|
||
for imp in additional_imports:
|
||
if imp not in _BANNED_IMPORTS:
|
||
combined.append(imp)
|
||
|
||
self._authorized_imports: List[str] = combined
|
||
self._extra_allowed_env: Optional[List[str]] = extra_allowed_env
|
||
self._inner = _inner # may be None until first call
|
||
|
||
def _get_inner(self) -> Any:
|
||
"""Lazy-construct the real executor on first use (avoids import errors in tests)."""
|
||
if self._inner is None:
|
||
from smolagents import LocalPythonExecutor # type: ignore[import]
|
||
|
||
self._inner = LocalPythonExecutor(
|
||
additional_authorized_imports=self._authorized_imports
|
||
)
|
||
return self._inner
|
||
|
||
def __call__(self, code: str, *args: Any, **kwargs: Any) -> Any:
|
||
"""Execute ``code`` with only allowlisted env vars visible.
|
||
|
||
All keys not on the allowlist are removed from ``os.environ`` for
|
||
the duration of execution and restored afterward, even on exception.
|
||
The lock ensures thread safety across concurrent calls.
|
||
"""
|
||
safe_env = make_safe_env(self._extra_allowed_env)
|
||
inner = self._get_inner()
|
||
|
||
with _ENV_PATCH_LOCK:
|
||
# Snapshot full current env
|
||
original_env = dict(os.environ)
|
||
# Remove everything not in the safe set
|
||
keys_to_remove = [k for k in os.environ if k not in safe_env]
|
||
for k in keys_to_remove:
|
||
del os.environ[k]
|
||
try:
|
||
return inner(code, *args, **kwargs)
|
||
finally:
|
||
# Always restore
|
||
os.environ.clear()
|
||
os.environ.update(original_env)
|