molecule-core/workspace/builtin_tools/sandbox.py
Hongming Wang d8026347e5 chore: open-source restructure — rename dirs, remove internal files, scrub secrets
Renames:
- platform/ → workspace-server/ (Go module path stays as "platform" for
  external dep compat — will update after plugin module republish)
- workspace-template/ → workspace/

Removed (moved to separate repos or deleted):
- PLAN.md — internal roadmap (move to private project board)
- HANDOFF.md, AGENTS.md — one-time internal session docs
- .claude/ — gitignored entirely (local agent config)
- infra/cloudflare-worker/ → Molecule-AI/molecule-tenant-proxy
- org-templates/molecule-dev/ → standalone template repo
- .mcp-eval/ → molecule-mcp-server repo
- test-results/ — ephemeral, gitignored

Security scrubbing:
- Cloudflare account/zone/KV IDs → placeholders
- Real EC2 IPs → <EC2_IP> in all docs
- CF token prefix, Neon project ID, Fly app names → redacted
- Langfuse dev credentials → parameterized
- Personal runner username/machine name → generic

Community files:
- CONTRIBUTING.md — build, test, branch conventions
- CODE_OF_CONDUCT.md — Contributor Covenant 2.1

All Dockerfiles, CI workflows, docker-compose, railway.toml, render.yaml,
README, CLAUDE.md updated for new directory names.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-18 00:24:44 -07:00

282 lines
9.5 KiB
Python

"""Code sandbox tool for safe code execution.
Executes code in an isolated environment. Three backends are supported:
subprocess (default)
Runs code locally via asyncio subprocess with a hard timeout.
Best for Tier 1/2 agents where run_code is lightly used and the
workspace container itself is the isolation boundary.
docker
Throwaway Docker-in-Docker container: network disabled, memory capped,
read-only filesystem. Requires Docker socket access inside the container.
Best for Tier 3 on-prem deployments.
e2b
Cloud-hosted microVM sandbox via E2B (https://e2b.dev).
No local Docker required — code runs in E2B's isolated cloud VMs.
Supports Python and JavaScript.
Requires:
- e2b-code-interpreter Python package (pinned in requirements.txt)
- E2B_API_KEY workspace secret (set via canvas Secrets panel or API)
Best for hosted/cloud Molecule AI deployments.
Backend is selected via the SANDBOX_BACKEND env var, which the provisioner
sets from config.yaml → sandbox.backend. Default: "subprocess".
"""
import asyncio
import logging
import os
import tempfile
from langchain_core.tools import tool
logger = logging.getLogger(__name__)
SANDBOX_BACKEND = os.environ.get("SANDBOX_BACKEND", "subprocess")
SANDBOX_TIMEOUT = int(os.environ.get("SANDBOX_TIMEOUT", "30"))
SANDBOX_MEMORY_LIMIT = os.environ.get("SANDBOX_MEMORY_LIMIT", "256m")
MAX_OUTPUT = 10_000
# E2B kernel names differ from internal language names.
_E2B_KERNEL_MAP = {
"python": "python3",
"javascript": "js",
"js": "js",
}
@tool
async def run_code(code: str, language: str = "python") -> dict:
"""Execute code in an isolated sandbox and return the output.
Args:
code: The code to execute.
language: Programming language — python, javascript, or shell.
The e2b backend supports python and javascript only.
"""
if SANDBOX_BACKEND == "docker":
return await _run_docker(code, language)
elif SANDBOX_BACKEND == "e2b":
return await _run_e2b(code, language)
else:
return await _run_subprocess(code, language)
async def _run_subprocess(code: str, language: str) -> dict:
"""Fallback: run code in a subprocess with timeout."""
cmd_map = {
"python": ["python3", "-c"],
"javascript": ["node", "-e"],
"shell": ["sh", "-c"],
"bash": ["bash", "-c"],
}
cmd_prefix = cmd_map.get(language)
if not cmd_prefix:
return {"error": f"Unsupported language: {language}", "exit_code": -1}
try:
proc = await asyncio.create_subprocess_exec(
*cmd_prefix, code,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=SANDBOX_TIMEOUT)
return {
"exit_code": proc.returncode,
"stdout": stdout.decode("utf-8", errors="replace")[:MAX_OUTPUT],
"stderr": stderr.decode("utf-8", errors="replace")[:MAX_OUTPUT],
"language": language,
"backend": "subprocess",
}
except asyncio.TimeoutError:
try:
proc.kill()
await proc.wait()
except ProcessLookupError:
pass
return {"error": f"Timeout after {SANDBOX_TIMEOUT}s", "exit_code": -1}
except Exception as e:
return {"error": str(e), "exit_code": -1}
async def _run_docker(code: str, language: str) -> dict:
"""Run code in a throwaway Docker container via mounted temp file."""
image_map = {
"python": ("python:3.11-slim", ["python3", "/sandbox/code.py"]),
"javascript": ("node:20-slim", ["node", "/sandbox/code.js"]),
"shell": ("alpine:3.18", ["sh", "/sandbox/code.sh"]),
"bash": ("alpine:3.18", ["sh", "/sandbox/code.sh"]),
}
entry = image_map.get(language)
if not entry:
return {"error": f"Unsupported language: {language}", "exit_code": -1}
image, run_cmd = entry
code_file = None
try:
# Write code to temp file — avoids shell metacharacter injection
ext = {"python": ".py", "javascript": ".js", "shell": ".sh", "bash": ".sh"}.get(language, ".txt")
fd, code_file = tempfile.mkstemp(suffix=ext, prefix="sandbox_")
with os.fdopen(fd, "w") as f:
f.write(code)
cmd = [
"docker", "run", "--rm",
"--network", "none",
"--memory", SANDBOX_MEMORY_LIMIT,
"--cpus", "0.5",
"--read-only",
"--tmpfs", "/tmp:size=32m",
"-v", f"{code_file}:/sandbox/code{ext}:ro",
image,
] + run_cmd
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=SANDBOX_TIMEOUT)
return {
"exit_code": proc.returncode,
"stdout": stdout.decode("utf-8", errors="replace")[:MAX_OUTPUT],
"stderr": stderr.decode("utf-8", errors="replace")[:MAX_OUTPUT],
"language": language,
"backend": "docker",
"image": image,
}
except asyncio.TimeoutError:
return {"error": f"Timeout after {SANDBOX_TIMEOUT}s", "exit_code": -1}
except Exception as e:
return {"error": str(e), "exit_code": -1}
finally:
if code_file:
try:
os.unlink(code_file)
except OSError:
pass
async def _run_e2b(code: str, language: str) -> dict:
"""Run code in an E2B cloud microVM sandbox.
Requires the e2b-code-interpreter package and an E2B_API_KEY secret.
Each call creates a fresh sandbox, runs the code, and destroys the sandbox.
Sandbox lifetime is bounded by SANDBOX_TIMEOUT seconds.
Supported languages: python, javascript.
"""
# Import lazily so the package is only required when the e2b backend is
# actually configured — other backends work without it installed.
try:
from e2b_code_interpreter import Sandbox
except ImportError:
return {
"error": (
"e2b-code-interpreter is not installed. "
"Add it to requirements.txt or switch to the docker/subprocess backend."
),
"exit_code": -1,
}
api_key = os.environ.get("E2B_API_KEY")
if not api_key:
return {
"error": (
"E2B_API_KEY is not set. "
"Add it as a workspace secret via the canvas Secrets panel or platform API."
),
"exit_code": -1,
}
kernel = _E2B_KERNEL_MAP.get(language)
if kernel is None:
return {
"error": (
f"Language '{language}' is not supported by the e2b backend. "
"Supported: python, javascript."
),
"exit_code": -1,
}
sandbox = None
try:
# Create a fresh sandbox for this execution.
# timeout controls the sandbox lifetime in seconds.
sandbox = await asyncio.wait_for(
asyncio.get_running_loop().run_in_executor(
None,
lambda: Sandbox(api_key=api_key, timeout=SANDBOX_TIMEOUT),
),
timeout=SANDBOX_TIMEOUT,
)
# Execute code and collect results.
execution = await asyncio.wait_for(
asyncio.get_running_loop().run_in_executor(
None,
lambda: sandbox.run_code(code, language=kernel),
),
timeout=SANDBOX_TIMEOUT,
)
# E2B returns a list of Result objects; collect text/error output.
stdout_parts = []
stderr_parts = []
for result in execution.results:
# result.text is the primary output (stdout equivalent)
if hasattr(result, "text") and result.text:
stdout_parts.append(str(result.text))
# Some result types expose an error attribute
if hasattr(result, "error") and result.error:
stderr_parts.append(str(result.error))
# Logs are stored separately in execution.logs
if hasattr(execution, "logs"):
logs = execution.logs
if hasattr(logs, "stdout") and logs.stdout:
stdout_parts.extend(logs.stdout)
if hasattr(logs, "stderr") and logs.stderr:
stderr_parts.extend(logs.stderr)
combined_stdout = "".join(stdout_parts)[:MAX_OUTPUT]
combined_stderr = "".join(stderr_parts)[:MAX_OUTPUT]
# Treat any stderr output as a non-zero exit code (e2b doesn't expose
# a numeric exit code at the sandbox level).
exit_code = 1 if combined_stderr else 0
return {
"exit_code": exit_code,
"stdout": combined_stdout,
"stderr": combined_stderr,
"language": language,
"backend": "e2b",
}
except asyncio.TimeoutError:
logger.warning("E2B sandbox timed out after %ds", SANDBOX_TIMEOUT)
return {"error": f"Timeout after {SANDBOX_TIMEOUT}s", "exit_code": -1}
except Exception as e:
logger.exception("E2B sandbox error: %s", e)
return {"error": str(e), "exit_code": -1}
finally:
# Always destroy the sandbox to avoid leaking E2B credits.
if sandbox is not None:
try:
await asyncio.get_running_loop().run_in_executor(
None, sandbox.kill
)
except Exception:
pass # Best-effort cleanup