fix: reject foreground timeout above cap instead of clamping

Change behavior from silent clamping to returning an error when the model requests a foreground timeout exceeding FOREGROUND_MAX_TIMEOUT. This forces the model to use background=true for long-running commands rather than silently changing its intent. - Config default timeouts above the cap are NOT rejected (user's choice) - Only explicit model-requested timeouts trigger rejection - Added boundary test for timeout exactly at the limit
2026-04-10 02:56:56 -07:00 · 2026-04-10 02:56:56 -07:00 · a420235b66
commit a420235b66
parent 6c3565df57
2 changed files with 74 additions and 76 deletions
--- a/tests/tools/test_terminal_foreground_timeout_cap.py
+++ b/tests/tools/test_terminal_foreground_timeout_cap.py
@ -1,7 +1,7 @@
-"""Tests for foreground timeout clamping in terminal_tool.
+"""Tests for foreground timeout cap in terminal_tool.

-Ensures that foreground commands have a hard timeout cap to prevent
-a single tool call from blocking the entire agent session.
+Ensures that foreground commands with timeout > FOREGROUND_MAX_TIMEOUT
+are rejected with an error suggesting background=true.
 """
 import json
 import os
@ -29,36 +29,27 @@ def _make_env_config(**overrides):


 class TestForegroundTimeoutCap:
-    """FOREGROUND_MAX_TIMEOUT prevents foreground commands from blocking too long."""
+    """FOREGROUND_MAX_TIMEOUT rejects foreground commands that exceed it."""

-    def test_foreground_timeout_clamped_to_max(self):
-        """When model requests timeout > FOREGROUND_MAX_TIMEOUT, it's clamped."""
+    def test_foreground_timeout_rejected_above_max(self):
+        """When model requests timeout > FOREGROUND_MAX_TIMEOUT, return error."""
        from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT

        with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
             patch("tools.terminal_tool._start_cleanup_thread"):

-            mock_env = MagicMock()
-            mock_env.execute.return_value = {"output": "done", "returncode": 0}
+            result = json.loads(terminal_tool(
+                command="echo hello",
+                timeout=9999,  # Way above max
+            ))

-            with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \
-                 patch("tools.terminal_tool._last_activity", {"default": 0}), \
-                 patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
-                result = json.loads(terminal_tool(
-                    command="echo hello",
-                    timeout=9999,  # Way above max
-                ))
+        assert "error" in result
+        assert "9999" in result["error"]
+        assert str(FOREGROUND_MAX_TIMEOUT) in result["error"]
+        assert "background=true" in result["error"]

-            # Verify the timeout was clamped
-            call_kwargs = mock_env.execute.call_args
-            assert call_kwargs[1]["timeout"] == FOREGROUND_MAX_TIMEOUT
-            assert result.get("timeout_note") is not None
-            assert "clamped" in result["timeout_note"]
-            assert "9999" in result["timeout_note"]
-            assert "background=true" in result["timeout_note"]
-
-    def test_foreground_timeout_within_max_not_clamped(self):
-        """When model requests timeout <= FOREGROUND_MAX_TIMEOUT, no clamping."""
+    def test_foreground_timeout_within_max_executes(self):
+        """When model requests timeout <= FOREGROUND_MAX_TIMEOUT, execute normally."""
        from tools.terminal_tool import terminal_tool

        with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
@ -75,12 +66,16 @@ class TestForegroundTimeoutCap:
                    timeout=300,  # Within max
                ))

-            call_kwargs = mock_env.execute.call_args
-            assert call_kwargs[1]["timeout"] == 300
-            assert "timeout_note" not in result
+        call_kwargs = mock_env.execute.call_args
+        assert call_kwargs[1]["timeout"] == 300
+        assert "error" not in result or result["error"] is None

-    def test_config_default_exceeds_cap_no_model_timeout(self):
-        """When config default timeout > cap and model passes no timeout, clamping fires."""
+    def test_config_default_above_cap_not_rejected(self):
+        """When config default timeout > cap but model passes no timeout, execute normally.
+
+        Only the model's explicit timeout parameter triggers rejection,
+        not the user's configured default.
+        """
        from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT

        # User configured TERMINAL_TIMEOUT=900 in their env
@ -96,16 +91,12 @@ class TestForegroundTimeoutCap:
                 patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
                result = json.loads(terminal_tool(command="make build"))

-            # Should be clamped
-            call_kwargs = mock_env.execute.call_args
-            assert call_kwargs[1]["timeout"] == FOREGROUND_MAX_TIMEOUT
-            # Note should reference the original 900s, NOT "None"
-            note = result.get("timeout_note", "")
-            assert "900" in note, f"Expected '900' in timeout_note but got: {note!r}"
-            assert "None" not in note, f"timeout_note contains 'None': {note!r}"
-            assert "clamped" in note
+        # Should execute with the config default, NOT be rejected
+        call_kwargs = mock_env.execute.call_args
+        assert call_kwargs[1]["timeout"] == 900
+        assert "error" not in result or result["error"] is None

-    def test_background_not_clamped(self):
+    def test_background_not_rejected(self):
        """Background commands should NOT be subject to foreground timeout cap."""
        from tools.terminal_tool import terminal_tool

@ -132,14 +123,14 @@ class TestForegroundTimeoutCap:
                    timeout=9999,
                ))

-            # Background should NOT be clamped
-            assert result.get("timeout_note") is None
+        # Background should NOT be rejected
+        assert "error" not in result or result["error"] is None

-    def test_default_timeout_not_clamped(self):
-        """Default timeout (180s) should not trigger clamping."""
+    def test_default_timeout_not_rejected(self):
+        """Default timeout (180s) should not trigger rejection."""
        from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT

-        # 180 < 600, so no clamping
+        # 180 < 600, so no rejection
        assert 180 < FOREGROUND_MAX_TIMEOUT

        with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
@ -153,9 +144,31 @@ class TestForegroundTimeoutCap:
                 patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
                result = json.loads(terminal_tool(command="echo hello"))

-            call_kwargs = mock_env.execute.call_args
-            assert call_kwargs[1]["timeout"] == 180
-            assert "timeout_note" not in result
+        call_kwargs = mock_env.execute.call_args
+        assert call_kwargs[1]["timeout"] == 180
+        assert "error" not in result or result["error"] is None
+
+    def test_exactly_at_max_not_rejected(self):
+        """Timeout exactly at FOREGROUND_MAX_TIMEOUT should execute normally."""
+        from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT
+
+        with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
+             patch("tools.terminal_tool._start_cleanup_thread"):
+
+            mock_env = MagicMock()
+            mock_env.execute.return_value = {"output": "done", "returncode": 0}
+
+            with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \
+                 patch("tools.terminal_tool._last_activity", {"default": 0}), \
+                 patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
+                result = json.loads(terminal_tool(
+                    command="echo hello",
+                    timeout=FOREGROUND_MAX_TIMEOUT,  # Exactly at limit
+                ))
+
+        call_kwargs = mock_env.execute.call_args
+        assert call_kwargs[1]["timeout"] == FOREGROUND_MAX_TIMEOUT
+        assert "error" not in result or result["error"] is None


 class TestForegroundMaxTimeoutConstant:
@ -164,9 +177,6 @@ class TestForegroundMaxTimeoutConstant:
    def test_default_value_is_600(self):
        """Default FOREGROUND_MAX_TIMEOUT is 600 when env var is not set."""
        from tools.terminal_tool import FOREGROUND_MAX_TIMEOUT
-        # Module-level constant should be 600 in a clean test environment.
-        # If TERMINAL_MAX_FOREGROUND_TIMEOUT is set, it may differ — but the
-        # conftest _isolate_hermes_home fixture ensures a clean env for tests.
        assert FOREGROUND_MAX_TIMEOUT == 600

    def test_schema_mentions_max(self):
@ -174,4 +184,4 @@ class TestForegroundMaxTimeoutConstant:
        from tools.terminal_tool import TERMINAL_SCHEMA, FOREGROUND_MAX_TIMEOUT
        timeout_desc = TERMINAL_SCHEMA["parameters"]["properties"]["timeout"]["description"]
        assert str(FOREGROUND_MAX_TIMEOUT) in timeout_desc
-        assert "max" in timeout_desc.lower()
+        assert "background=true" in timeout_desc
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -1210,16 +1210,17 @@ def terminal_tool(
        cwd = overrides.get("cwd") or config["cwd"]
        default_timeout = config["timeout"]
        effective_timeout = timeout or default_timeout
-        unclamped_timeout = effective_timeout

-        # Clamp foreground commands to FOREGROUND_MAX_TIMEOUT to prevent
-        # a single tool call from blocking the entire agent session.
-        if not background and effective_timeout > FOREGROUND_MAX_TIMEOUT:
-            logger.info(
-                "Clamping foreground timeout from %ds to %ds (max: TERMINAL_MAX_FOREGROUND_TIMEOUT=%d)",
-                effective_timeout, FOREGROUND_MAX_TIMEOUT, FOREGROUND_MAX_TIMEOUT,
-            )
-            effective_timeout = FOREGROUND_MAX_TIMEOUT
+        # Reject foreground commands where the model explicitly requests
+        # a timeout above FOREGROUND_MAX_TIMEOUT — nudge it toward background.
+        if not background and timeout and timeout > FOREGROUND_MAX_TIMEOUT:
+            return json.dumps({
+                "error": (
+                    f"Foreground timeout {timeout}s exceeds the maximum of "
+                    f"{FOREGROUND_MAX_TIMEOUT}s. Use background=true with "
+                    f"notify_on_complete=true for long-running commands."
+                ),
+            }, ensure_ascii=False)

        # Start cleanup thread
        _start_cleanup_thread()
@ -1485,18 +1486,11 @@ def terminal_tool(
                except Exception as e:
                    error_str = str(e).lower()
                    if "timeout" in error_str:
-                        timeout_result = {
+                        return json.dumps({
                            "output": "",
                            "exit_code": 124,
                            "error": f"Command timed out after {effective_timeout} seconds"
-                        }
-                        if unclamped_timeout != effective_timeout:
-                            timeout_result["timeout_note"] = (
-                                f"Timeout of {unclamped_timeout}s was clamped to "
-                                f"the foreground maximum of {FOREGROUND_MAX_TIMEOUT}s. "
-                                f"Use background=true for long-running processes."
-                            )
-                        return json.dumps(timeout_result, ensure_ascii=False)
+                        }, ensure_ascii=False)
                    
                    # Retry on transient errors
                    if retry_count < max_retries:
@ -1559,12 +1553,6 @@ def terminal_tool(
                result_dict["approval"] = approval_note
            if exit_note:
                result_dict["exit_code_meaning"] = exit_note
-            if unclamped_timeout != effective_timeout:
-                result_dict["timeout_note"] = (
-                    f"Timeout of {unclamped_timeout}s was clamped to "
-                    f"the foreground maximum of {FOREGROUND_MAX_TIMEOUT}s. "
-                    f"Use background=true for long-running processes."
-                )

            return json.dumps(result_dict, ensure_ascii=False)

@ -1751,7 +1739,7 @@ TERMINAL_SCHEMA = {
            },
            "timeout": {
                "type": "integer",
-                "description": f"Max seconds to wait (default: 180, max: {FOREGROUND_MAX_TIMEOUT}). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily.",
+                "description": f"Max seconds to wait (default: 180, foreground max: {FOREGROUND_MAX_TIMEOUT}). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily. Foreground timeout above {FOREGROUND_MAX_TIMEOUT}s is rejected; use background=true for longer commands.",
                "minimum": 1
            },
            "workdir": {