Renames: - platform/ → workspace-server/ (Go module path stays as "platform" for external dep compat — will update after plugin module republish) - workspace-template/ → workspace/ Removed (moved to separate repos or deleted): - PLAN.md — internal roadmap (move to private project board) - HANDOFF.md, AGENTS.md — one-time internal session docs - .claude/ — gitignored entirely (local agent config) - infra/cloudflare-worker/ → Molecule-AI/molecule-tenant-proxy - org-templates/molecule-dev/ → standalone template repo - .mcp-eval/ → molecule-mcp-server repo - test-results/ — ephemeral, gitignored Security scrubbing: - Cloudflare account/zone/KV IDs → placeholders - Real EC2 IPs → <EC2_IP> in all docs - CF token prefix, Neon project ID, Fly app names → redacted - Langfuse dev credentials → parameterized - Personal runner username/machine name → generic Community files: - CONTRIBUTING.md — build, test, branch conventions - CODE_OF_CONDUCT.md — Contributor Covenant 2.1 All Dockerfiles, CI workflows, docker-compose, railway.toml, render.yaml, README, CLAUDE.md updated for new directory names. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
282 lines
9.5 KiB
Python
282 lines
9.5 KiB
Python
"""Code sandbox tool for safe code execution.
|
|
|
|
Executes code in an isolated environment. Three backends are supported:
|
|
|
|
subprocess (default)
|
|
Runs code locally via asyncio subprocess with a hard timeout.
|
|
Best for Tier 1/2 agents where run_code is lightly used and the
|
|
workspace container itself is the isolation boundary.
|
|
|
|
docker
|
|
Throwaway Docker-in-Docker container: network disabled, memory capped,
|
|
read-only filesystem. Requires Docker socket access inside the container.
|
|
Best for Tier 3 on-prem deployments.
|
|
|
|
e2b
|
|
Cloud-hosted microVM sandbox via E2B (https://e2b.dev).
|
|
No local Docker required — code runs in E2B's isolated cloud VMs.
|
|
Supports Python and JavaScript.
|
|
Requires:
|
|
- e2b-code-interpreter Python package (pinned in requirements.txt)
|
|
- E2B_API_KEY workspace secret (set via canvas Secrets panel or API)
|
|
Best for hosted/cloud Molecule AI deployments.
|
|
|
|
Backend is selected via the SANDBOX_BACKEND env var, which the provisioner
|
|
sets from config.yaml → sandbox.backend. Default: "subprocess".
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
import tempfile
|
|
|
|
from langchain_core.tools import tool
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SANDBOX_BACKEND = os.environ.get("SANDBOX_BACKEND", "subprocess")
|
|
SANDBOX_TIMEOUT = int(os.environ.get("SANDBOX_TIMEOUT", "30"))
|
|
SANDBOX_MEMORY_LIMIT = os.environ.get("SANDBOX_MEMORY_LIMIT", "256m")
|
|
MAX_OUTPUT = 10_000
|
|
|
|
# E2B kernel names differ from internal language names.
|
|
_E2B_KERNEL_MAP = {
|
|
"python": "python3",
|
|
"javascript": "js",
|
|
"js": "js",
|
|
}
|
|
|
|
|
|
@tool
|
|
async def run_code(code: str, language: str = "python") -> dict:
|
|
"""Execute code in an isolated sandbox and return the output.
|
|
|
|
Args:
|
|
code: The code to execute.
|
|
language: Programming language — python, javascript, or shell.
|
|
The e2b backend supports python and javascript only.
|
|
"""
|
|
if SANDBOX_BACKEND == "docker":
|
|
return await _run_docker(code, language)
|
|
elif SANDBOX_BACKEND == "e2b":
|
|
return await _run_e2b(code, language)
|
|
else:
|
|
return await _run_subprocess(code, language)
|
|
|
|
|
|
async def _run_subprocess(code: str, language: str) -> dict:
|
|
"""Fallback: run code in a subprocess with timeout."""
|
|
cmd_map = {
|
|
"python": ["python3", "-c"],
|
|
"javascript": ["node", "-e"],
|
|
"shell": ["sh", "-c"],
|
|
"bash": ["bash", "-c"],
|
|
}
|
|
|
|
cmd_prefix = cmd_map.get(language)
|
|
if not cmd_prefix:
|
|
return {"error": f"Unsupported language: {language}", "exit_code": -1}
|
|
|
|
try:
|
|
proc = await asyncio.create_subprocess_exec(
|
|
*cmd_prefix, code,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
)
|
|
|
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=SANDBOX_TIMEOUT)
|
|
|
|
return {
|
|
"exit_code": proc.returncode,
|
|
"stdout": stdout.decode("utf-8", errors="replace")[:MAX_OUTPUT],
|
|
"stderr": stderr.decode("utf-8", errors="replace")[:MAX_OUTPUT],
|
|
"language": language,
|
|
"backend": "subprocess",
|
|
}
|
|
except asyncio.TimeoutError:
|
|
try:
|
|
proc.kill()
|
|
await proc.wait()
|
|
except ProcessLookupError:
|
|
pass
|
|
return {"error": f"Timeout after {SANDBOX_TIMEOUT}s", "exit_code": -1}
|
|
except Exception as e:
|
|
return {"error": str(e), "exit_code": -1}
|
|
|
|
|
|
async def _run_docker(code: str, language: str) -> dict:
|
|
"""Run code in a throwaway Docker container via mounted temp file."""
|
|
image_map = {
|
|
"python": ("python:3.11-slim", ["python3", "/sandbox/code.py"]),
|
|
"javascript": ("node:20-slim", ["node", "/sandbox/code.js"]),
|
|
"shell": ("alpine:3.18", ["sh", "/sandbox/code.sh"]),
|
|
"bash": ("alpine:3.18", ["sh", "/sandbox/code.sh"]),
|
|
}
|
|
|
|
entry = image_map.get(language)
|
|
if not entry:
|
|
return {"error": f"Unsupported language: {language}", "exit_code": -1}
|
|
|
|
image, run_cmd = entry
|
|
code_file = None
|
|
|
|
try:
|
|
# Write code to temp file — avoids shell metacharacter injection
|
|
ext = {"python": ".py", "javascript": ".js", "shell": ".sh", "bash": ".sh"}.get(language, ".txt")
|
|
fd, code_file = tempfile.mkstemp(suffix=ext, prefix="sandbox_")
|
|
with os.fdopen(fd, "w") as f:
|
|
f.write(code)
|
|
|
|
cmd = [
|
|
"docker", "run", "--rm",
|
|
"--network", "none",
|
|
"--memory", SANDBOX_MEMORY_LIMIT,
|
|
"--cpus", "0.5",
|
|
"--read-only",
|
|
"--tmpfs", "/tmp:size=32m",
|
|
"-v", f"{code_file}:/sandbox/code{ext}:ro",
|
|
image,
|
|
] + run_cmd
|
|
|
|
proc = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
)
|
|
|
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=SANDBOX_TIMEOUT)
|
|
|
|
return {
|
|
"exit_code": proc.returncode,
|
|
"stdout": stdout.decode("utf-8", errors="replace")[:MAX_OUTPUT],
|
|
"stderr": stderr.decode("utf-8", errors="replace")[:MAX_OUTPUT],
|
|
"language": language,
|
|
"backend": "docker",
|
|
"image": image,
|
|
}
|
|
except asyncio.TimeoutError:
|
|
return {"error": f"Timeout after {SANDBOX_TIMEOUT}s", "exit_code": -1}
|
|
except Exception as e:
|
|
return {"error": str(e), "exit_code": -1}
|
|
finally:
|
|
if code_file:
|
|
try:
|
|
os.unlink(code_file)
|
|
except OSError:
|
|
pass
|
|
|
|
|
|
async def _run_e2b(code: str, language: str) -> dict:
|
|
"""Run code in an E2B cloud microVM sandbox.
|
|
|
|
Requires the e2b-code-interpreter package and an E2B_API_KEY secret.
|
|
Each call creates a fresh sandbox, runs the code, and destroys the sandbox.
|
|
Sandbox lifetime is bounded by SANDBOX_TIMEOUT seconds.
|
|
|
|
Supported languages: python, javascript.
|
|
"""
|
|
# Import lazily so the package is only required when the e2b backend is
|
|
# actually configured — other backends work without it installed.
|
|
try:
|
|
from e2b_code_interpreter import Sandbox
|
|
except ImportError:
|
|
return {
|
|
"error": (
|
|
"e2b-code-interpreter is not installed. "
|
|
"Add it to requirements.txt or switch to the docker/subprocess backend."
|
|
),
|
|
"exit_code": -1,
|
|
}
|
|
|
|
api_key = os.environ.get("E2B_API_KEY")
|
|
if not api_key:
|
|
return {
|
|
"error": (
|
|
"E2B_API_KEY is not set. "
|
|
"Add it as a workspace secret via the canvas Secrets panel or platform API."
|
|
),
|
|
"exit_code": -1,
|
|
}
|
|
|
|
kernel = _E2B_KERNEL_MAP.get(language)
|
|
if kernel is None:
|
|
return {
|
|
"error": (
|
|
f"Language '{language}' is not supported by the e2b backend. "
|
|
"Supported: python, javascript."
|
|
),
|
|
"exit_code": -1,
|
|
}
|
|
|
|
sandbox = None
|
|
try:
|
|
# Create a fresh sandbox for this execution.
|
|
# timeout controls the sandbox lifetime in seconds.
|
|
sandbox = await asyncio.wait_for(
|
|
asyncio.get_running_loop().run_in_executor(
|
|
None,
|
|
lambda: Sandbox(api_key=api_key, timeout=SANDBOX_TIMEOUT),
|
|
),
|
|
timeout=SANDBOX_TIMEOUT,
|
|
)
|
|
|
|
# Execute code and collect results.
|
|
execution = await asyncio.wait_for(
|
|
asyncio.get_running_loop().run_in_executor(
|
|
None,
|
|
lambda: sandbox.run_code(code, language=kernel),
|
|
),
|
|
timeout=SANDBOX_TIMEOUT,
|
|
)
|
|
|
|
# E2B returns a list of Result objects; collect text/error output.
|
|
stdout_parts = []
|
|
stderr_parts = []
|
|
|
|
for result in execution.results:
|
|
# result.text is the primary output (stdout equivalent)
|
|
if hasattr(result, "text") and result.text:
|
|
stdout_parts.append(str(result.text))
|
|
# Some result types expose an error attribute
|
|
if hasattr(result, "error") and result.error:
|
|
stderr_parts.append(str(result.error))
|
|
|
|
# Logs are stored separately in execution.logs
|
|
if hasattr(execution, "logs"):
|
|
logs = execution.logs
|
|
if hasattr(logs, "stdout") and logs.stdout:
|
|
stdout_parts.extend(logs.stdout)
|
|
if hasattr(logs, "stderr") and logs.stderr:
|
|
stderr_parts.extend(logs.stderr)
|
|
|
|
combined_stdout = "".join(stdout_parts)[:MAX_OUTPUT]
|
|
combined_stderr = "".join(stderr_parts)[:MAX_OUTPUT]
|
|
|
|
# Treat any stderr output as a non-zero exit code (e2b doesn't expose
|
|
# a numeric exit code at the sandbox level).
|
|
exit_code = 1 if combined_stderr else 0
|
|
|
|
return {
|
|
"exit_code": exit_code,
|
|
"stdout": combined_stdout,
|
|
"stderr": combined_stderr,
|
|
"language": language,
|
|
"backend": "e2b",
|
|
}
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.warning("E2B sandbox timed out after %ds", SANDBOX_TIMEOUT)
|
|
return {"error": f"Timeout after {SANDBOX_TIMEOUT}s", "exit_code": -1}
|
|
except Exception as e:
|
|
logger.exception("E2B sandbox error: %s", e)
|
|
return {"error": str(e), "exit_code": -1}
|
|
finally:
|
|
# Always destroy the sandbox to avoid leaking E2B credits.
|
|
if sandbox is not None:
|
|
try:
|
|
await asyncio.get_running_loop().run_in_executor(
|
|
None, sandbox.kill
|
|
)
|
|
except Exception:
|
|
pass # Best-effort cleanup
|