fix: reject foreground timeout above cap instead of clamping

Change behavior from silent clamping to returning an error when the
model requests a foreground timeout exceeding FOREGROUND_MAX_TIMEOUT.
This forces the model to use background=true for long-running commands
rather than silently changing its intent.

- Config default timeouts above the cap are NOT rejected (user's choice)
- Only explicit model-requested timeouts trigger rejection
- Added boundary test for timeout exactly at the limit
This commit is contained in:
Teknium 2026-04-10 02:56:56 -07:00 committed by Teknium
parent 6c3565df57
commit a420235b66
2 changed files with 74 additions and 76 deletions

View File

@ -1,7 +1,7 @@
"""Tests for foreground timeout clamping in terminal_tool.
"""Tests for foreground timeout cap in terminal_tool.
Ensures that foreground commands have a hard timeout cap to prevent
a single tool call from blocking the entire agent session.
Ensures that foreground commands with timeout > FOREGROUND_MAX_TIMEOUT
are rejected with an error suggesting background=true.
"""
import json
import os
@ -29,36 +29,27 @@ def _make_env_config(**overrides):
class TestForegroundTimeoutCap:
"""FOREGROUND_MAX_TIMEOUT prevents foreground commands from blocking too long."""
"""FOREGROUND_MAX_TIMEOUT rejects foreground commands that exceed it."""
def test_foreground_timeout_clamped_to_max(self):
"""When model requests timeout > FOREGROUND_MAX_TIMEOUT, it's clamped."""
def test_foreground_timeout_rejected_above_max(self):
"""When model requests timeout > FOREGROUND_MAX_TIMEOUT, return error."""
from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT
with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
patch("tools.terminal_tool._start_cleanup_thread"):
mock_env = MagicMock()
mock_env.execute.return_value = {"output": "done", "returncode": 0}
result = json.loads(terminal_tool(
command="echo hello",
timeout=9999, # Way above max
))
with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \
patch("tools.terminal_tool._last_activity", {"default": 0}), \
patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
result = json.loads(terminal_tool(
command="echo hello",
timeout=9999, # Way above max
))
assert "error" in result
assert "9999" in result["error"]
assert str(FOREGROUND_MAX_TIMEOUT) in result["error"]
assert "background=true" in result["error"]
# Verify the timeout was clamped
call_kwargs = mock_env.execute.call_args
assert call_kwargs[1]["timeout"] == FOREGROUND_MAX_TIMEOUT
assert result.get("timeout_note") is not None
assert "clamped" in result["timeout_note"]
assert "9999" in result["timeout_note"]
assert "background=true" in result["timeout_note"]
def test_foreground_timeout_within_max_not_clamped(self):
"""When model requests timeout <= FOREGROUND_MAX_TIMEOUT, no clamping."""
def test_foreground_timeout_within_max_executes(self):
"""When model requests timeout <= FOREGROUND_MAX_TIMEOUT, execute normally."""
from tools.terminal_tool import terminal_tool
with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
@ -75,12 +66,16 @@ class TestForegroundTimeoutCap:
timeout=300, # Within max
))
call_kwargs = mock_env.execute.call_args
assert call_kwargs[1]["timeout"] == 300
assert "timeout_note" not in result
call_kwargs = mock_env.execute.call_args
assert call_kwargs[1]["timeout"] == 300
assert "error" not in result or result["error"] is None
def test_config_default_exceeds_cap_no_model_timeout(self):
"""When config default timeout > cap and model passes no timeout, clamping fires."""
def test_config_default_above_cap_not_rejected(self):
"""When config default timeout > cap but model passes no timeout, execute normally.
Only the model's explicit timeout parameter triggers rejection,
not the user's configured default.
"""
from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT
# User configured TERMINAL_TIMEOUT=900 in their env
@ -96,16 +91,12 @@ class TestForegroundTimeoutCap:
patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
result = json.loads(terminal_tool(command="make build"))
# Should be clamped
call_kwargs = mock_env.execute.call_args
assert call_kwargs[1]["timeout"] == FOREGROUND_MAX_TIMEOUT
# Note should reference the original 900s, NOT "None"
note = result.get("timeout_note", "")
assert "900" in note, f"Expected '900' in timeout_note but got: {note!r}"
assert "None" not in note, f"timeout_note contains 'None': {note!r}"
assert "clamped" in note
# Should execute with the config default, NOT be rejected
call_kwargs = mock_env.execute.call_args
assert call_kwargs[1]["timeout"] == 900
assert "error" not in result or result["error"] is None
def test_background_not_clamped(self):
def test_background_not_rejected(self):
"""Background commands should NOT be subject to foreground timeout cap."""
from tools.terminal_tool import terminal_tool
@ -132,14 +123,14 @@ class TestForegroundTimeoutCap:
timeout=9999,
))
# Background should NOT be clamped
assert result.get("timeout_note") is None
# Background should NOT be rejected
assert "error" not in result or result["error"] is None
def test_default_timeout_not_clamped(self):
"""Default timeout (180s) should not trigger clamping."""
def test_default_timeout_not_rejected(self):
"""Default timeout (180s) should not trigger rejection."""
from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT
# 180 < 600, so no clamping
# 180 < 600, so no rejection
assert 180 < FOREGROUND_MAX_TIMEOUT
with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
@ -153,9 +144,31 @@ class TestForegroundTimeoutCap:
patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
result = json.loads(terminal_tool(command="echo hello"))
call_kwargs = mock_env.execute.call_args
assert call_kwargs[1]["timeout"] == 180
assert "timeout_note" not in result
call_kwargs = mock_env.execute.call_args
assert call_kwargs[1]["timeout"] == 180
assert "error" not in result or result["error"] is None
def test_exactly_at_max_not_rejected(self):
"""Timeout exactly at FOREGROUND_MAX_TIMEOUT should execute normally."""
from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT
with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
patch("tools.terminal_tool._start_cleanup_thread"):
mock_env = MagicMock()
mock_env.execute.return_value = {"output": "done", "returncode": 0}
with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \
patch("tools.terminal_tool._last_activity", {"default": 0}), \
patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
result = json.loads(terminal_tool(
command="echo hello",
timeout=FOREGROUND_MAX_TIMEOUT, # Exactly at limit
))
call_kwargs = mock_env.execute.call_args
assert call_kwargs[1]["timeout"] == FOREGROUND_MAX_TIMEOUT
assert "error" not in result or result["error"] is None
class TestForegroundMaxTimeoutConstant:
@ -164,9 +177,6 @@ class TestForegroundMaxTimeoutConstant:
def test_default_value_is_600(self):
"""Default FOREGROUND_MAX_TIMEOUT is 600 when env var is not set."""
from tools.terminal_tool import FOREGROUND_MAX_TIMEOUT
# Module-level constant should be 600 in a clean test environment.
# If TERMINAL_MAX_FOREGROUND_TIMEOUT is set, it may differ — but the
# conftest _isolate_hermes_home fixture ensures a clean env for tests.
assert FOREGROUND_MAX_TIMEOUT == 600
def test_schema_mentions_max(self):
@ -174,4 +184,4 @@ class TestForegroundMaxTimeoutConstant:
from tools.terminal_tool import TERMINAL_SCHEMA, FOREGROUND_MAX_TIMEOUT
timeout_desc = TERMINAL_SCHEMA["parameters"]["properties"]["timeout"]["description"]
assert str(FOREGROUND_MAX_TIMEOUT) in timeout_desc
assert "max" in timeout_desc.lower()
assert "background=true" in timeout_desc

View File

@ -1210,16 +1210,17 @@ def terminal_tool(
cwd = overrides.get("cwd") or config["cwd"]
default_timeout = config["timeout"]
effective_timeout = timeout or default_timeout
unclamped_timeout = effective_timeout
# Clamp foreground commands to FOREGROUND_MAX_TIMEOUT to prevent
# a single tool call from blocking the entire agent session.
if not background and effective_timeout > FOREGROUND_MAX_TIMEOUT:
logger.info(
"Clamping foreground timeout from %ds to %ds (max: TERMINAL_MAX_FOREGROUND_TIMEOUT=%d)",
effective_timeout, FOREGROUND_MAX_TIMEOUT, FOREGROUND_MAX_TIMEOUT,
)
effective_timeout = FOREGROUND_MAX_TIMEOUT
# Reject foreground commands where the model explicitly requests
# a timeout above FOREGROUND_MAX_TIMEOUT — nudge it toward background.
if not background and timeout and timeout > FOREGROUND_MAX_TIMEOUT:
return json.dumps({
"error": (
f"Foreground timeout {timeout}s exceeds the maximum of "
f"{FOREGROUND_MAX_TIMEOUT}s. Use background=true with "
f"notify_on_complete=true for long-running commands."
),
}, ensure_ascii=False)
# Start cleanup thread
_start_cleanup_thread()
@ -1485,18 +1486,11 @@ def terminal_tool(
except Exception as e:
error_str = str(e).lower()
if "timeout" in error_str:
timeout_result = {
return json.dumps({
"output": "",
"exit_code": 124,
"error": f"Command timed out after {effective_timeout} seconds"
}
if unclamped_timeout != effective_timeout:
timeout_result["timeout_note"] = (
f"Timeout of {unclamped_timeout}s was clamped to "
f"the foreground maximum of {FOREGROUND_MAX_TIMEOUT}s. "
f"Use background=true for long-running processes."
)
return json.dumps(timeout_result, ensure_ascii=False)
}, ensure_ascii=False)
# Retry on transient errors
if retry_count < max_retries:
@ -1559,12 +1553,6 @@ def terminal_tool(
result_dict["approval"] = approval_note
if exit_note:
result_dict["exit_code_meaning"] = exit_note
if unclamped_timeout != effective_timeout:
result_dict["timeout_note"] = (
f"Timeout of {unclamped_timeout}s was clamped to "
f"the foreground maximum of {FOREGROUND_MAX_TIMEOUT}s. "
f"Use background=true for long-running processes."
)
return json.dumps(result_dict, ensure_ascii=False)
@ -1751,7 +1739,7 @@ TERMINAL_SCHEMA = {
},
"timeout": {
"type": "integer",
"description": f"Max seconds to wait (default: 180, max: {FOREGROUND_MAX_TIMEOUT}). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily.",
"description": f"Max seconds to wait (default: 180, foreground max: {FOREGROUND_MAX_TIMEOUT}). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily. Foreground timeout above {FOREGROUND_MAX_TIMEOUT}s is rejected; use background=true for longer commands.",
"minimum": 1
},
"workdir": {