fix(tui): address lazy startup review races

Copilot correctly flagged two concurrency windows: - memoryMonitor could re-enter while awaiting the lazy @hermes/ink import or heap dump, producing duplicate imports/dumps under sustained pressure. - _start_agent_build used a check-then-set guard without synchronization, so concurrent agent-backed RPCs could start duplicate agent builders. Fix both with single-flight guards: cache the dynamic import promise and track per-level dump in-flight state in memoryMonitor, and protect the TUI agent build flag with a per-session lock. Tests: - python -m py_compile tui_gateway/server.py - cd ui-tui && npm run type-check && npm run build - cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts - scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py
2026-04-28 23:54:33 -05:00 · 2026-04-28 23:54:33 -05:00 · a2819e1820
commit a2819e1820
parent 0a6ecea676
2 changed files with 29 additions and 10 deletions
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -478,9 +478,11 @@ def _start_agent_build(sid: str, session: dict) -> None:
    ready = session.get("agent_ready")
    if ready is None:
        return
-    if ready.is_set() or session.get("agent_build_started"):
-        return
-    session["agent_build_started"] = True
+    lock = session.setdefault("agent_build_lock", threading.Lock())
+    with lock:
+        if ready.is_set() or session.get("agent_build_started"):
+            return
+        session["agent_build_started"] = True
    key = session["session_key"]

    def _build() -> None:
--- a/ui-tui/src/lib/memoryMonitor.ts
+++ b/ui-tui/src/lib/memoryMonitor.ts
@ -31,11 +31,20 @@ const GB = 1024 ** 3
 // spike somehow trips the threshold before the app registers its own Ink
 // import, we pay the load cost exactly once, inside the tick that needs it.
 let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null
+let _evictInkCachesPromise: Promise<(level: 'all' | 'half') => unknown> | null = null
+
 async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> {
-  if (_evictInkCaches) return _evictInkCaches
-  const mod = await import('@hermes/ink')
-  _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
-  return _evictInkCaches
+  if (_evictInkCaches) {
+    return _evictInkCaches
+  }
+
+  _evictInkCachesPromise ??= import('@hermes/ink').then(mod => {
+    _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
+
+    return _evictInkCaches
+  })
+
+  return _evictInkCachesPromise
 }

 export function startMemoryMonitor({
@ -46,19 +55,25 @@ export function startMemoryMonitor({
  onHigh
 }: MemoryMonitorOptions = {}): () => void {
  const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
+  const inFlight = new Set<Exclude<MemoryLevel, 'normal'>>()

  const tick = async () => {
    const { heapUsed, rss } = process.memoryUsage()
    const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'

    if (level === 'normal') {
-      return void dumped.clear()
-    }
+      dumped.clear()
+      inFlight.clear()

-    if (dumped.has(level)) {
      return
    }

+    if (dumped.has(level) || inFlight.has(level)) {
+      return
+    }
+
+    inFlight.add(level)
+
    // Prune Ink content caches before dump/exit — half on 'high' (recoverable),
    // full on 'critical' (post-dump RSS reduction, keeps user running).
    // Deferred import keeps `@hermes/ink` off the cold-start critical path;
@ -75,6 +90,8 @@ export function startMemoryMonitor({
    dumped.add(level)
    const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)

+    inFlight.delete(level)
+
    const snap: MemorySnapshot = { heapUsed, level, rss }

    ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)