forked from molecule-ai/molecule-core
Renames: - platform/ → workspace-server/ (Go module path stays as "platform" for external dep compat — will update after plugin module republish) - workspace-template/ → workspace/ Removed (moved to separate repos or deleted): - PLAN.md — internal roadmap (move to private project board) - HANDOFF.md, AGENTS.md — one-time internal session docs - .claude/ — gitignored entirely (local agent config) - infra/cloudflare-worker/ → Molecule-AI/molecule-tenant-proxy - org-templates/molecule-dev/ → standalone template repo - .mcp-eval/ → molecule-mcp-server repo - test-results/ — ephemeral, gitignored Security scrubbing: - Cloudflare account/zone/KV IDs → placeholders - Real EC2 IPs → <EC2_IP> in all docs - CF token prefix, Neon project ID, Fly app names → redacted - Langfuse dev credentials → parameterized - Personal runner username/machine name → generic Community files: - CONTRIBUTING.md — build, test, branch conventions - CODE_OF_CONDUCT.md — Contributor Covenant 2.1 All Dockerfiles, CI workflows, docker-compose, railway.toml, render.yaml, README, CLAUDE.md updated for new directory names. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
679 lines
25 KiB
Python
679 lines
25 KiB
Python
"""Tests for the sandbox run_code tool — subprocess, docker-routing, and e2b backends.
|
|
|
|
The e2b backend tests use a fully mocked e2b_code_interpreter to avoid
|
|
requiring a real E2B_API_KEY or network access in CI.
|
|
|
|
Design notes:
|
|
- sandbox.py lives in tools/ alongside other tool modules.
|
|
- conftest.py stubs sys.modules["tools"] so a plain `import builtin_tools.sandbox`
|
|
would hit the stub. We load sandbox.py via its file path instead.
|
|
- SANDBOX_BACKEND is captured as a module-level constant on load, so
|
|
_load_sandbox() must be called with it set.
|
|
- E2B_API_KEY and e2b_code_interpreter are read at call-time inside
|
|
_run_e2b(), so they must be present in os.environ / sys.modules during
|
|
the actual async call (use monkeypatch or patch.dict).
|
|
"""
|
|
|
|
import asyncio
|
|
import importlib.util
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
_SANDBOX_PATH = Path(__file__).parent.parent / "builtin_tools" / "sandbox.py"
|
|
|
|
|
|
def _load_sandbox(sandbox_backend: str = "subprocess", extra_env: dict | None = None):
|
|
"""
|
|
Load (or reload) tools/sandbox.py from its real file path.
|
|
Only SANDBOX_BACKEND needs to be set at load time — it's a module-level
|
|
constant. Other env vars (E2B_API_KEY etc.) are read at call-time and
|
|
should be set by the caller via monkeypatch or patch.dict.
|
|
"""
|
|
# Evict any previously cached copy.
|
|
for key in list(sys.modules.keys()):
|
|
if "sandbox_mod" in key:
|
|
del sys.modules[key]
|
|
|
|
saved = os.environ.get("SANDBOX_BACKEND")
|
|
os.environ["SANDBOX_BACKEND"] = sandbox_backend
|
|
|
|
for k, v in (extra_env or {}).items():
|
|
os.environ[k] = v
|
|
try:
|
|
spec = importlib.util.spec_from_file_location("sandbox_mod", _SANDBOX_PATH)
|
|
mod = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(mod)
|
|
finally:
|
|
if saved is None:
|
|
os.environ.pop("SANDBOX_BACKEND", None)
|
|
else:
|
|
os.environ["SANDBOX_BACKEND"] = saved
|
|
for k in (extra_env or {}):
|
|
os.environ.pop(k, None)
|
|
|
|
return mod
|
|
|
|
|
|
def _make_e2b_mock(stdout_text: str = "hello e2b\n", stderr_text: str = ""):
|
|
"""Build a mock e2b Sandbox that returns a plausible execution result."""
|
|
result_obj = MagicMock()
|
|
result_obj.text = stdout_text
|
|
result_obj.error = None
|
|
|
|
logs_obj = MagicMock()
|
|
logs_obj.stdout = []
|
|
logs_obj.stderr = [stderr_text] if stderr_text else []
|
|
|
|
exec_obj = MagicMock()
|
|
exec_obj.results = [result_obj]
|
|
exec_obj.logs = logs_obj
|
|
|
|
sandbox_instance = MagicMock()
|
|
sandbox_instance.run_code.return_value = exec_obj
|
|
sandbox_instance.kill.return_value = None
|
|
|
|
sandbox_cls = MagicMock(return_value=sandbox_instance)
|
|
return sandbox_cls, sandbox_instance
|
|
|
|
|
|
def _run_sync(coro):
|
|
return asyncio.run(coro)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# subprocess backend
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSubprocessBackend:
|
|
def test_python_hello(self):
|
|
sb = _load_sandbox("subprocess")
|
|
result = _run_sync(sb._run_subprocess('print("hello subprocess")', "python"))
|
|
assert result["exit_code"] == 0
|
|
assert "hello subprocess" in result["stdout"]
|
|
assert result["backend"] == "subprocess"
|
|
|
|
def test_stderr_nonzero_exit(self):
|
|
sb = _load_sandbox("subprocess")
|
|
result = _run_sync(sb._run_subprocess("import sys; sys.exit(2)", "python"))
|
|
assert result["exit_code"] == 2
|
|
|
|
def test_unsupported_language(self):
|
|
sb = _load_sandbox("subprocess")
|
|
result = _run_sync(sb._run_subprocess("code", "cobol"))
|
|
assert result["exit_code"] == -1
|
|
assert "Unsupported" in result["error"]
|
|
|
|
def test_syntax_error_captured_in_stderr(self):
|
|
sb = _load_sandbox("subprocess")
|
|
result = _run_sync(sb._run_subprocess("def broken(:", "python"))
|
|
assert result["exit_code"] != 0
|
|
|
|
def test_timeout(self):
|
|
sb = _load_sandbox("subprocess", {"SANDBOX_TIMEOUT": "1"})
|
|
# Manually set the module-level constant that was captured at load time
|
|
sb.SANDBOX_TIMEOUT = 1
|
|
result = _run_sync(sb._run_subprocess("import time; time.sleep(10)", "python"))
|
|
assert result["exit_code"] == -1
|
|
assert "Timeout" in result["error"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# E2B backend
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestE2BBackend:
|
|
"""
|
|
All tests mock e2b_code_interpreter to avoid real network calls.
|
|
E2B_API_KEY must be present in os.environ for the duration of _run_e2b
|
|
(it's read at call-time, not module-load time).
|
|
"""
|
|
|
|
def _call_e2b(self, code: str, language: str, sandbox_cls, api_key: str = "test-key"):
|
|
sb = _load_sandbox("e2b")
|
|
mock_mod = MagicMock()
|
|
mock_mod.Sandbox = sandbox_cls
|
|
with patch.dict(os.environ, {"E2B_API_KEY": api_key}):
|
|
with patch.dict("sys.modules", {"e2b_code_interpreter": mock_mod}):
|
|
return _run_sync(sb._run_e2b(code, language)), sb, sandbox_cls
|
|
|
|
def test_python_success(self):
|
|
sandbox_cls, sandbox_instance = _make_e2b_mock(stdout_text="42\n")
|
|
result, _, _ = self._call_e2b("print(6 * 7)", "python", sandbox_cls)
|
|
|
|
assert result["exit_code"] == 0
|
|
assert result["backend"] == "e2b"
|
|
assert result["language"] == "python"
|
|
assert result["stdout"] == "42\n"
|
|
sandbox_instance.kill.assert_called_once()
|
|
|
|
def test_javascript_success(self):
|
|
sandbox_cls, sandbox_instance = _make_e2b_mock(stdout_text="hello js\n")
|
|
result, _, _ = self._call_e2b('console.log("hi")', "javascript", sandbox_cls)
|
|
|
|
assert result["exit_code"] == 0
|
|
assert result["language"] == "javascript"
|
|
# E2B kernel must be remapped: "javascript" → "js"
|
|
call_args = sandbox_instance.run_code.call_args
|
|
called_kernel = (
|
|
call_args.kwargs.get("language")
|
|
or (call_args.args[1] if len(call_args.args) > 1 else None)
|
|
)
|
|
assert called_kernel == "js", f"Expected kernel 'js', got {called_kernel!r}"
|
|
|
|
def test_stderr_produces_nonzero_exit(self):
|
|
sandbox_cls, _ = _make_e2b_mock(
|
|
stdout_text="", stderr_text="NameError: name 'x' is not defined"
|
|
)
|
|
result, _, _ = self._call_e2b("print(x)", "python", sandbox_cls)
|
|
|
|
assert result["exit_code"] == 1
|
|
assert "NameError" in result["stderr"]
|
|
|
|
def test_missing_api_key_returns_error(self):
|
|
sb = _load_sandbox("e2b")
|
|
sandbox_cls, _ = _make_e2b_mock()
|
|
mock_mod = MagicMock()
|
|
mock_mod.Sandbox = sandbox_cls
|
|
# Do NOT set E2B_API_KEY
|
|
with patch.dict("sys.modules", {"e2b_code_interpreter": mock_mod}):
|
|
with patch.dict(os.environ, {}, clear=False):
|
|
os.environ.pop("E2B_API_KEY", None)
|
|
result = _run_sync(sb._run_e2b("print(1)", "python"))
|
|
|
|
assert result["exit_code"] == -1
|
|
assert "E2B_API_KEY" in result["error"]
|
|
|
|
def test_missing_package_returns_error(self):
|
|
sb = _load_sandbox("e2b")
|
|
with patch.dict(os.environ, {"E2B_API_KEY": "key"}):
|
|
# Simulate ImportError by putting None in sys.modules
|
|
with patch.dict("sys.modules", {"e2b_code_interpreter": None}):
|
|
result = _run_sync(sb._run_e2b("print(1)", "python"))
|
|
|
|
assert result["exit_code"] == -1
|
|
assert "e2b-code-interpreter" in result["error"]
|
|
|
|
def test_unsupported_language_returns_error(self):
|
|
sandbox_cls, _ = _make_e2b_mock()
|
|
result, _, _ = self._call_e2b("echo hi", "shell", sandbox_cls)
|
|
|
|
assert result["exit_code"] == -1
|
|
assert "not supported by the e2b backend" in result["error"]
|
|
|
|
def test_sandbox_always_killed_on_exception(self):
|
|
"""sandbox.kill() is called even when run_code raises."""
|
|
sandbox_instance = MagicMock()
|
|
sandbox_instance.run_code.side_effect = RuntimeError("network error")
|
|
sandbox_instance.kill.return_value = None
|
|
sandbox_cls = MagicMock(return_value=sandbox_instance)
|
|
|
|
result, _, _ = self._call_e2b("print(1)", "python", sandbox_cls)
|
|
|
|
assert result["exit_code"] == -1
|
|
assert "network error" in result["error"]
|
|
sandbox_instance.kill.assert_called_once()
|
|
|
|
def test_output_truncated_at_max_output(self):
|
|
big = "x" * 20_000
|
|
sandbox_cls, _ = _make_e2b_mock(stdout_text=big)
|
|
result, sb, _ = self._call_e2b("print('x' * 20000)", "python", sandbox_cls)
|
|
|
|
assert "stdout" in result
|
|
assert len(result["stdout"]) <= sb.MAX_OUTPUT
|
|
|
|
def test_api_key_forwarded_to_constructor(self):
|
|
"""E2B_API_KEY from env is passed to Sandbox(api_key=...)."""
|
|
sandbox_cls, _ = _make_e2b_mock()
|
|
_, _, used_cls = self._call_e2b("print(1)", "python", sandbox_cls, api_key="my-secret")
|
|
|
|
call_kwargs = used_cls.call_args.kwargs
|
|
assert call_kwargs.get("api_key") == "my-secret"
|
|
|
|
def test_timeout_forwarded_to_constructor(self):
|
|
"""SANDBOX_TIMEOUT is forwarded as the sandbox timeout kwarg."""
|
|
sandbox_cls, _ = _make_e2b_mock()
|
|
sb = _load_sandbox("e2b", {"SANDBOX_TIMEOUT": "45"})
|
|
sb.SANDBOX_TIMEOUT = 45
|
|
|
|
mock_mod = MagicMock()
|
|
mock_mod.Sandbox = sandbox_cls
|
|
with patch.dict(os.environ, {"E2B_API_KEY": "key"}):
|
|
with patch.dict("sys.modules", {"e2b_code_interpreter": mock_mod}):
|
|
_run_sync(sb._run_e2b("print(1)", "python"))
|
|
|
|
call_kwargs = sandbox_cls.call_args.kwargs
|
|
assert call_kwargs.get("timeout") == 45
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Dispatcher routing — verify SANDBOX_BACKEND selects the right function
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestRunCodeDispatcher:
|
|
def test_subprocess_backend_dispatched(self):
|
|
sb = _load_sandbox("subprocess")
|
|
assert sb.SANDBOX_BACKEND == "subprocess"
|
|
result = _run_sync(sb._run_subprocess("1 + 1", "python"))
|
|
assert result["exit_code"] == 0
|
|
|
|
def test_e2b_backend_dispatched(self):
|
|
"""run_code routes to _run_e2b when SANDBOX_BACKEND=e2b."""
|
|
sb = _load_sandbox("e2b")
|
|
assert sb.SANDBOX_BACKEND == "e2b"
|
|
|
|
called_with = []
|
|
|
|
async def fake_e2b(code, language):
|
|
called_with.append((code, language))
|
|
return {"exit_code": 0, "stdout": "ok", "backend": "e2b"}
|
|
|
|
with patch.object(sb, "_run_e2b", fake_e2b):
|
|
# conftest mocks @tool as identity, so run_code is the raw async fn
|
|
result = _run_sync(sb.run_code("print(1)", "python"))
|
|
|
|
assert called_with == [("print(1)", "python")]
|
|
assert result["backend"] == "e2b"
|
|
|
|
def test_docker_backend_dispatched(self):
|
|
"""run_code routes to _run_docker when SANDBOX_BACKEND=docker."""
|
|
sb = _load_sandbox("docker")
|
|
assert sb.SANDBOX_BACKEND == "docker"
|
|
|
|
called_with = []
|
|
|
|
async def fake_docker(code, language):
|
|
called_with.append((code, language))
|
|
return {"exit_code": 0, "stdout": "ok", "backend": "docker"}
|
|
|
|
with patch.object(sb, "_run_docker", fake_docker):
|
|
result = _run_sync(sb.run_code("echo hi", "shell"))
|
|
|
|
assert called_with == [("echo hi", "shell")]
|
|
assert result["backend"] == "docker"
|
|
|
|
def test_subprocess_backend_routes_to_run_subprocess(self):
|
|
"""run_code with SANDBOX_BACKEND=subprocess calls _run_subprocess."""
|
|
sb = _load_sandbox("subprocess")
|
|
|
|
called_with = []
|
|
|
|
async def fake_subprocess(code, language):
|
|
called_with.append((code, language))
|
|
return {"exit_code": 0, "stdout": "ok", "backend": "subprocess"}
|
|
|
|
with patch.object(sb, "_run_subprocess", fake_subprocess):
|
|
result = _run_sync(sb.run_code("print(1)", "python"))
|
|
|
|
assert called_with == [("print(1)", "python")]
|
|
assert result["backend"] == "subprocess"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Additional subprocess backend edge-cases
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSubprocessEdgeCases:
|
|
|
|
def test_process_lookup_error_on_kill(self):
|
|
"""ProcessLookupError during proc.kill() after timeout is silently ignored."""
|
|
sb = _load_sandbox("subprocess")
|
|
sb.SANDBOX_TIMEOUT = 1
|
|
|
|
# We need the real timeout path but with proc.kill() raising ProcessLookupError.
|
|
# Patch asyncio.wait_for to raise TimeoutError then patch proc.kill to raise.
|
|
import asyncio as _asyncio
|
|
|
|
original_create = _asyncio.create_subprocess_exec
|
|
|
|
async def fake_create(*args, **kwargs):
|
|
proc = MagicMock()
|
|
proc.returncode = None
|
|
|
|
async def _communicate():
|
|
raise _asyncio.TimeoutError()
|
|
|
|
proc.communicate = _communicate
|
|
|
|
def _kill():
|
|
raise ProcessLookupError("no such process")
|
|
|
|
proc.kill = _kill
|
|
|
|
async def _wait():
|
|
pass
|
|
|
|
proc.wait = _wait
|
|
return proc
|
|
|
|
with patch("asyncio.create_subprocess_exec", fake_create):
|
|
result = _run_sync(sb._run_subprocess("import time; time.sleep(100)", "python"))
|
|
|
|
assert result["exit_code"] == -1
|
|
assert "Timeout" in result["error"]
|
|
|
|
def test_general_exception_in_subprocess_exec(self):
|
|
"""Exception from asyncio.create_subprocess_exec is caught and returned."""
|
|
sb = _load_sandbox("subprocess")
|
|
|
|
async def fake_create(*args, **kwargs):
|
|
raise OSError("no such executable")
|
|
|
|
with patch("asyncio.create_subprocess_exec", fake_create):
|
|
result = _run_sync(sb._run_subprocess("print(1)", "python"))
|
|
|
|
assert result["exit_code"] == -1
|
|
assert "no such executable" in result["error"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Docker backend
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDockerBackend:
|
|
|
|
def _make_docker_proc(self, stdout: bytes = b"", stderr: bytes = b"", returncode: int = 0):
|
|
"""Return a fake asyncio subprocess-like object."""
|
|
proc = MagicMock()
|
|
proc.returncode = returncode
|
|
|
|
async def _communicate():
|
|
return (stdout, stderr)
|
|
|
|
proc.communicate = _communicate
|
|
return proc
|
|
|
|
def test_run_docker_unsupported_language(self):
|
|
sb = _load_sandbox("docker")
|
|
result = _run_sync(sb._run_docker("code", "cobol"))
|
|
assert result["exit_code"] == -1
|
|
assert "Unsupported" in result["error"]
|
|
|
|
def test_run_docker_success(self):
|
|
"""_run_docker returns exit_code=0 and correct stdout on success."""
|
|
import asyncio as _asyncio
|
|
|
|
sb = _load_sandbox("docker")
|
|
fake_proc = self._make_docker_proc(stdout=b"hello docker\n", stderr=b"")
|
|
|
|
async def fake_wait_for(coro, timeout):
|
|
return await coro
|
|
|
|
async def fake_create(*args, **kwargs):
|
|
return fake_proc
|
|
|
|
with patch("asyncio.create_subprocess_exec", fake_create), \
|
|
patch("asyncio.wait_for", fake_wait_for):
|
|
result = _run_sync(sb._run_docker('print("hello docker")', "python"))
|
|
|
|
assert result["exit_code"] == 0
|
|
assert "hello docker" in result["stdout"]
|
|
assert result["backend"] == "docker"
|
|
assert result["language"] == "python"
|
|
|
|
def test_run_docker_timeout(self):
|
|
"""asyncio.wait_for TimeoutError → returns timeout error dict."""
|
|
import asyncio as _asyncio
|
|
|
|
sb = _load_sandbox("docker")
|
|
sb.SANDBOX_TIMEOUT = 1
|
|
|
|
async def fake_create(*args, **kwargs):
|
|
proc = MagicMock()
|
|
return proc
|
|
|
|
async def fake_wait_for(coro, timeout):
|
|
raise _asyncio.TimeoutError()
|
|
|
|
with patch("asyncio.create_subprocess_exec", fake_create), \
|
|
patch("asyncio.wait_for", fake_wait_for):
|
|
result = _run_sync(sb._run_docker("code", "python"))
|
|
|
|
assert result["exit_code"] == -1
|
|
assert "Timeout" in result["error"]
|
|
|
|
def test_run_docker_general_exception(self):
|
|
"""Generic exception in create_subprocess_exec → returns error dict."""
|
|
sb = _load_sandbox("docker")
|
|
|
|
async def fake_create(*args, **kwargs):
|
|
raise RuntimeError("docker not available")
|
|
|
|
with patch("asyncio.create_subprocess_exec", fake_create):
|
|
result = _run_sync(sb._run_docker("code", "python"))
|
|
|
|
assert result["exit_code"] == -1
|
|
assert "docker not available" in result["error"]
|
|
|
|
def test_run_docker_cleanup_on_success(self, tmp_path, monkeypatch):
|
|
"""Temp file is removed after successful run."""
|
|
import asyncio as _asyncio
|
|
import tempfile
|
|
import os
|
|
|
|
sb = _load_sandbox("docker")
|
|
|
|
created_files = []
|
|
original_mkstemp = tempfile.mkstemp
|
|
|
|
def fake_mkstemp(suffix="", prefix="", dir=None, text=False):
|
|
fd, path = original_mkstemp(suffix=suffix, prefix=prefix)
|
|
created_files.append(path)
|
|
return fd, path
|
|
|
|
fake_proc = self._make_docker_proc(stdout=b"done\n", stderr=b"")
|
|
|
|
async def fake_wait_for(coro, timeout):
|
|
return await coro
|
|
|
|
async def fake_create(*args, **kwargs):
|
|
return fake_proc
|
|
|
|
with patch("tempfile.mkstemp", fake_mkstemp), \
|
|
patch("asyncio.create_subprocess_exec", fake_create), \
|
|
patch("asyncio.wait_for", fake_wait_for):
|
|
result = _run_sync(sb._run_docker("print('done')", "python"))
|
|
|
|
assert result["exit_code"] == 0
|
|
for f in created_files:
|
|
assert not os.path.exists(f), f"temp file {f} was not cleaned up"
|
|
|
|
def test_run_docker_cleanup_on_exception(self, tmp_path, monkeypatch):
|
|
"""Temp file is removed even when an exception is raised."""
|
|
import tempfile
|
|
import os
|
|
|
|
sb = _load_sandbox("docker")
|
|
|
|
created_files = []
|
|
original_mkstemp = tempfile.mkstemp
|
|
|
|
def fake_mkstemp(suffix="", prefix="", dir=None, text=False):
|
|
fd, path = original_mkstemp(suffix=suffix, prefix=prefix)
|
|
created_files.append(path)
|
|
return fd, path
|
|
|
|
async def fake_create(*args, **kwargs):
|
|
raise RuntimeError("crash")
|
|
|
|
with patch("tempfile.mkstemp", fake_mkstemp), \
|
|
patch("asyncio.create_subprocess_exec", fake_create):
|
|
result = _run_sync(sb._run_docker("print(1)", "python"))
|
|
|
|
assert result["exit_code"] == -1
|
|
for f in created_files:
|
|
assert not os.path.exists(f), f"temp file {f} was not cleaned up after exception"
|
|
|
|
def test_run_docker_cleanup_oserror_swallowed(self, tmp_path):
|
|
"""Lines 165-166: os.unlink raises OSError in finally block — swallowed, result still returned."""
|
|
import tempfile
|
|
import os
|
|
|
|
sb = _load_sandbox("docker")
|
|
fake_proc = self._make_docker_proc(stdout=b"ok\n", stderr=b"")
|
|
|
|
created_files = []
|
|
original_mkstemp = tempfile.mkstemp
|
|
|
|
def fake_mkstemp(suffix="", prefix="", dir=None, text=False):
|
|
fd, path = original_mkstemp(suffix=suffix, prefix=prefix)
|
|
created_files.append(path)
|
|
return fd, path
|
|
|
|
async def fake_wait_for(coro, timeout):
|
|
return await coro
|
|
|
|
async def fake_create(*args, **kwargs):
|
|
return fake_proc
|
|
|
|
original_unlink = os.unlink
|
|
unlink_calls = []
|
|
|
|
def raising_unlink(path):
|
|
unlink_calls.append(path)
|
|
raise OSError("permission denied")
|
|
|
|
with patch("tempfile.mkstemp", fake_mkstemp), \
|
|
patch("asyncio.create_subprocess_exec", fake_create), \
|
|
patch("asyncio.wait_for", fake_wait_for), \
|
|
patch("os.unlink", raising_unlink):
|
|
result = _run_sync(sb._run_docker("print('ok')", "python"))
|
|
|
|
# OSError is swallowed; result is still returned
|
|
assert result["exit_code"] == 0
|
|
assert len(unlink_calls) > 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Gap 4: E2B backend — additional coverage paths
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestE2BBackendGapCoverage:
|
|
"""Cover lines 242, 248, 268-269, 280-281 in _run_e2b."""
|
|
|
|
def _call_e2b(self, code, language, mock_e2b_mod, api_key="test-key"):
|
|
sb = _load_sandbox("e2b")
|
|
with patch.dict(os.environ, {"E2B_API_KEY": api_key}):
|
|
with patch.dict("sys.modules", {"e2b_code_interpreter": mock_e2b_mod}):
|
|
return _run_sync(sb._run_e2b(code, language)), sb
|
|
|
|
def test_result_error_attribute_captured(self):
|
|
"""Line 242: result.error in execution.results → captured in stderr."""
|
|
result_obj = MagicMock()
|
|
result_obj.text = None
|
|
result_obj.error = "NameError: x not defined"
|
|
|
|
logs_obj = MagicMock()
|
|
logs_obj.stdout = []
|
|
logs_obj.stderr = []
|
|
|
|
exec_obj = MagicMock()
|
|
exec_obj.results = [result_obj]
|
|
exec_obj.logs = logs_obj
|
|
|
|
sandbox_instance = MagicMock()
|
|
sandbox_instance.run_code.return_value = exec_obj
|
|
sandbox_instance.kill.return_value = None
|
|
sandbox_cls = MagicMock(return_value=sandbox_instance)
|
|
|
|
mock_mod = MagicMock()
|
|
mock_mod.Sandbox = sandbox_cls
|
|
|
|
result, _ = self._call_e2b("print(x)", "python", mock_mod)
|
|
|
|
assert result["exit_code"] == 1
|
|
assert "NameError" in result["stderr"]
|
|
|
|
def test_logs_stdout_captured(self):
|
|
"""Line 248: execution.logs.stdout → appended to stdout_parts."""
|
|
result_obj = MagicMock()
|
|
result_obj.text = None
|
|
result_obj.error = None
|
|
|
|
logs_obj = MagicMock()
|
|
logs_obj.stdout = ["hello from logs\n"]
|
|
logs_obj.stderr = []
|
|
|
|
exec_obj = MagicMock()
|
|
exec_obj.results = [result_obj]
|
|
exec_obj.logs = logs_obj
|
|
|
|
sandbox_instance = MagicMock()
|
|
sandbox_instance.run_code.return_value = exec_obj
|
|
sandbox_instance.kill.return_value = None
|
|
sandbox_cls = MagicMock(return_value=sandbox_instance)
|
|
|
|
mock_mod = MagicMock()
|
|
mock_mod.Sandbox = sandbox_cls
|
|
|
|
result, _ = self._call_e2b("print('hello from logs')", "python", mock_mod)
|
|
|
|
assert result["exit_code"] == 0
|
|
assert "hello from logs" in result["stdout"]
|
|
|
|
def test_e2b_timeout_returns_error(self):
|
|
"""Lines 268-269: asyncio.TimeoutError raised → returns timeout error dict."""
|
|
import asyncio as _asyncio
|
|
|
|
# Sandbox constructor itself raises TimeoutError via wait_for
|
|
sandbox_instance = MagicMock()
|
|
sandbox_cls = MagicMock(return_value=sandbox_instance)
|
|
|
|
mock_mod = MagicMock()
|
|
mock_mod.Sandbox = sandbox_cls
|
|
|
|
sb = _load_sandbox("e2b")
|
|
|
|
original_wait_for = _asyncio.wait_for
|
|
|
|
call_count = {"n": 0}
|
|
|
|
async def raising_wait_for(coro, timeout):
|
|
call_count["n"] += 1
|
|
if call_count["n"] == 1:
|
|
raise _asyncio.TimeoutError()
|
|
return await original_wait_for(coro, timeout)
|
|
|
|
with patch.dict(os.environ, {"E2B_API_KEY": "test-key"}):
|
|
with patch.dict("sys.modules", {"e2b_code_interpreter": mock_mod}):
|
|
with patch("asyncio.wait_for", raising_wait_for):
|
|
result = _run_sync(sb._run_e2b("print(1)", "python"))
|
|
|
|
assert result["exit_code"] == -1
|
|
assert "Timeout" in result["error"]
|
|
|
|
def test_e2b_cleanup_exception_swallowed(self):
|
|
"""Lines 280-281: sandbox.kill raises in finally → exception swallowed."""
|
|
result_obj = MagicMock()
|
|
result_obj.text = "42\n"
|
|
result_obj.error = None
|
|
|
|
logs_obj = MagicMock()
|
|
logs_obj.stdout = []
|
|
logs_obj.stderr = []
|
|
|
|
exec_obj = MagicMock()
|
|
exec_obj.results = [result_obj]
|
|
exec_obj.logs = logs_obj
|
|
|
|
sandbox_instance = MagicMock()
|
|
sandbox_instance.run_code.return_value = exec_obj
|
|
# Make kill raise an exception
|
|
sandbox_instance.kill.side_effect = RuntimeError("kill failed")
|
|
sandbox_cls = MagicMock(return_value=sandbox_instance)
|
|
|
|
mock_mod = MagicMock()
|
|
mock_mod.Sandbox = sandbox_cls
|
|
|
|
result, _ = self._call_e2b("print(42)", "python", mock_mod)
|
|
|
|
# Result is still returned despite kill() failing
|
|
assert result["exit_code"] == 0
|
|
assert "42" in result["stdout"]
|