From 41c233cb9982990037097eafa71334e077fa3247 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Wed, 8 Apr 2026 15:01:45 -0700 Subject: [PATCH] test: add reproducible perf benchmark for file sync overhead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Direct env.execute() timing — no LLM in the loop. Measures per-command wall-clock including sync check. Results on SSH: - echo median: 617ms (pure SSH round-trip + spawn overhead) - sync-triggered after 6s wait: 621ms (mtime skip adds ~0ms) - within-interval (no sync): 618ms Confirms mtime skip makes sync overhead unmeasurable. --- tests/tools/test_file_sync_perf.py | 127 +++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 tests/tools/test_file_sync_perf.py diff --git a/tests/tools/test_file_sync_perf.py b/tests/tools/test_file_sync_perf.py new file mode 100644 index 00000000..46f5e9b3 --- /dev/null +++ b/tests/tools/test_file_sync_perf.py @@ -0,0 +1,127 @@ +"""Reproducible perf benchmark for file sync overhead. + +Measures actual env.execute() wall-clock time, no LLM in the loop. +Run with: uv run pytest tests/tools/test_file_sync_perf.py -v -o "addopts=" -s + +Requires backends to be configured (SSH host, Modal creds, etc). +Skip markers gate each backend. +""" + +import statistics +import time + +import pytest + +# --------------------------------------------------------------------------- +# Backend fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def local_env(): + from tools.environments.local import LocalEnvironment + env = LocalEnvironment(cwd="/tmp", timeout=30) + yield env + env.cleanup() + + +@pytest.fixture +def ssh_env(): + import os + host = os.environ.get("TERMINAL_SSH_HOST") + user = os.environ.get("TERMINAL_SSH_USER") + if not host or not user: + pytest.skip("TERMINAL_SSH_HOST and TERMINAL_SSH_USER required") + from tools.environments.ssh import SSHEnvironment + env = SSHEnvironment(host=host, user=user, cwd="/tmp", timeout=30) + yield env + env.cleanup() + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _time_executions(env, command: str, n: int = 10) -> list[float]: + """Run *command* n times and return per-call wall-clock durations.""" + durations = [] + for _ in range(n): + t0 = time.monotonic() + result = env.execute(command, timeout=10) + elapsed = time.monotonic() - t0 + durations.append(elapsed) + assert result.get("returncode", result.get("exit_code", -1)) == 0, \ + f"command failed: {result}" + return durations + + +def _report(label: str, durations: list[float]): + """Print timing stats.""" + med = statistics.median(durations) + mean = statistics.mean(durations) + p95 = sorted(durations)[int(len(durations) * 0.95)] + print(f"\n {label}:") + print(f" n={len(durations)} median={med*1000:.0f}ms mean={mean*1000:.0f}ms p95={p95*1000:.0f}ms") + print(f" raw: {[f'{d*1000:.0f}ms' for d in durations]}") + return med + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +class TestLocalPerf: + """Local baseline — no file sync, no network. Sets the floor.""" + + def test_echo_latency(self, local_env): + durations = _time_executions(local_env, "echo hello", n=20) + med = _report("local echo", durations) + # Spawn-per-call overhead should be < 500ms + assert med < 0.5, f"local echo median {med*1000:.0f}ms exceeds 500ms" + + +@pytest.mark.ssh +class TestSSHPerf: + """SSH with FileSyncManager — mtime skip should make sync ~0ms.""" + + def test_echo_latency(self, ssh_env): + """Sequential echo commands — measures per-command overhead including sync check.""" + durations = _time_executions(ssh_env, "echo hello", n=20) + med = _report("ssh echo (with sync check)", durations) + # SSH round-trip + spawn-per-call, but sync should be ~0ms (rate limited) + assert med < 2.0, f"ssh echo median {med*1000:.0f}ms exceeds 2000ms" + + def test_sync_overhead_after_interval(self, ssh_env): + """Measure sync cost when the rate-limit window has expired. + + Sleep past the 5s interval, then time the next command which + triggers a real sync cycle (but with mtime skip, should be fast). + """ + # Warm up + ssh_env.execute("echo warmup", timeout=10) + + # Wait for sync interval to expire + time.sleep(6) + + # This command will trigger a real sync cycle + t0 = time.monotonic() + result = ssh_env.execute("echo after-interval", timeout=10) + elapsed = time.monotonic() - t0 + + print(f"\n ssh echo after 6s wait (sync triggered): {elapsed*1000:.0f}ms") + assert result.get("returncode", result.get("exit_code", -1)) == 0 + + # Even with sync triggered, mtime skip should keep it fast + # Old rsync approach: ~2-3s. New mtime skip: should be < 1.5s + assert elapsed < 1.5, f"sync-triggered command took {elapsed*1000:.0f}ms (expected < 1500ms)" + + def test_no_sync_within_interval(self, ssh_env): + """Rapid sequential commands within 5s window — no sync at all.""" + # First command triggers sync + ssh_env.execute("echo prime", timeout=10) + + # Immediately run 10 more — all within rate-limit window + durations = _time_executions(ssh_env, "echo rapid", n=10) + med = _report("ssh echo (within interval, no sync)", durations) + + # Should be pure SSH overhead, no sync + assert med < 1.5, f"within-interval median {med*1000:.0f}ms exceeds 1500ms"