fix(agent): make tool loop guardrails warning-first

This commit is contained in:
Mind-Dragon 2026-04-27 17:01:30 +02:00 committed by Teknium
parent 58b89965c8
commit 0704589ceb
6 changed files with 326 additions and 31 deletions

View File

@ -2,7 +2,8 @@
The controller in this module is intentionally side-effect free: it tracks The controller in this module is intentionally side-effect free: it tracks
per-turn tool-call observations and returns decisions. Runtime code owns whether per-turn tool-call observations and returns decisions. Runtime code owns whether
those decisions become synthetic tool results or controlled turn halts. those decisions become warning guidance, synthetic tool results, or controlled
turn halts.
""" """
from __future__ import annotations from __future__ import annotations
@ -60,17 +61,67 @@ MUTATING_TOOL_NAMES = frozenset(
@dataclass(frozen=True) @dataclass(frozen=True)
class ToolCallGuardrailConfig: class ToolCallGuardrailConfig:
"""Thresholds for per-turn tool-call loop detection.""" """Thresholds for per-turn tool-call loop detection.
Warnings are enabled by default and never prevent tool execution. Hard stops
are explicit opt-in so interactive CLI/TUI sessions get a gentle nudge unless
the user enables circuit-breaker behavior in config.yaml.
"""
warnings_enabled: bool = True
hard_stop_enabled: bool = False
exact_failure_warn_after: int = 2 exact_failure_warn_after: int = 2
exact_failure_block_after: int = 2 exact_failure_block_after: int = 5
same_tool_failure_warn_after: int = 3 same_tool_failure_warn_after: int = 3
same_tool_failure_halt_after: int = 5 same_tool_failure_halt_after: int = 8
no_progress_warn_after: int = 2 no_progress_warn_after: int = 2
no_progress_block_after: int = 2 no_progress_block_after: int = 5
idempotent_tools: frozenset[str] = field(default_factory=lambda: IDEMPOTENT_TOOL_NAMES) idempotent_tools: frozenset[str] = field(default_factory=lambda: IDEMPOTENT_TOOL_NAMES)
mutating_tools: frozenset[str] = field(default_factory=lambda: MUTATING_TOOL_NAMES) mutating_tools: frozenset[str] = field(default_factory=lambda: MUTATING_TOOL_NAMES)
@classmethod
def from_mapping(cls, data: Mapping[str, Any] | None) -> "ToolCallGuardrailConfig":
"""Build config from the `tool_loop_guardrails` config.yaml section."""
if not isinstance(data, Mapping):
return cls()
warn_after = data.get("warn_after")
if not isinstance(warn_after, Mapping):
warn_after = {}
hard_stop_after = data.get("hard_stop_after")
if not isinstance(hard_stop_after, Mapping):
hard_stop_after = {}
defaults = cls()
return cls(
warnings_enabled=_as_bool(data.get("warnings_enabled"), defaults.warnings_enabled),
hard_stop_enabled=_as_bool(data.get("hard_stop_enabled"), defaults.hard_stop_enabled),
exact_failure_warn_after=_positive_int(
warn_after.get("exact_failure", data.get("exact_failure_warn_after")),
defaults.exact_failure_warn_after,
),
same_tool_failure_warn_after=_positive_int(
warn_after.get("same_tool_failure", data.get("same_tool_failure_warn_after")),
defaults.same_tool_failure_warn_after,
),
no_progress_warn_after=_positive_int(
warn_after.get("idempotent_no_progress", data.get("no_progress_warn_after")),
defaults.no_progress_warn_after,
),
exact_failure_block_after=_positive_int(
hard_stop_after.get("exact_failure", data.get("exact_failure_block_after")),
defaults.exact_failure_block_after,
),
same_tool_failure_halt_after=_positive_int(
hard_stop_after.get("same_tool_failure", data.get("same_tool_failure_halt_after")),
defaults.same_tool_failure_halt_after,
),
no_progress_block_after=_positive_int(
hard_stop_after.get("idempotent_no_progress", data.get("no_progress_block_after")),
defaults.no_progress_block_after,
),
)
@dataclass(frozen=True) @dataclass(frozen=True)
class ToolCallSignature: class ToolCallSignature:
@ -192,6 +243,8 @@ class ToolCallGuardrailController:
def before_call(self, tool_name: str, args: Mapping[str, Any] | None) -> ToolGuardrailDecision: def before_call(self, tool_name: str, args: Mapping[str, Any] | None) -> ToolGuardrailDecision:
signature = ToolCallSignature.from_call(tool_name, _coerce_args(args)) signature = ToolCallSignature.from_call(tool_name, _coerce_args(args))
if not self.config.hard_stop_enabled:
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
exact_count = self._exact_failure_counts.get(signature, 0) exact_count = self._exact_failure_counts.get(signature, 0)
if exact_count >= self.config.exact_failure_block_after: if exact_count >= self.config.exact_failure_block_after:
@ -253,7 +306,7 @@ class ToolCallGuardrailController:
same_count = self._same_tool_failure_counts.get(tool_name, 0) + 1 same_count = self._same_tool_failure_counts.get(tool_name, 0) + 1
self._same_tool_failure_counts[tool_name] = same_count self._same_tool_failure_counts[tool_name] = same_count
if same_count >= self.config.same_tool_failure_halt_after: if self.config.hard_stop_enabled and same_count >= self.config.same_tool_failure_halt_after:
decision = ToolGuardrailDecision( decision = ToolGuardrailDecision(
action="halt", action="halt",
code="same_tool_failure_halt", code="same_tool_failure_halt",
@ -268,27 +321,27 @@ class ToolCallGuardrailController:
self._halt_decision = decision self._halt_decision = decision
return decision return decision
if exact_count >= self.config.exact_failure_warn_after: if self.config.warnings_enabled and exact_count >= self.config.exact_failure_warn_after:
return ToolGuardrailDecision( return ToolGuardrailDecision(
action="warn", action="warn",
code="repeated_exact_failure_warning", code="repeated_exact_failure_warning",
message=( message=(
f"Tool guardrail: {tool_name} has failed {exact_count} times " f"{tool_name} has failed {exact_count} times with identical arguments. "
"with identical arguments. Do not retry it unchanged; inspect the " "This looks like a loop; inspect the error and change strategy "
"error and change strategy." "instead of retrying it unchanged."
), ),
tool_name=tool_name, tool_name=tool_name,
count=exact_count, count=exact_count,
signature=signature, signature=signature,
) )
if same_count >= self.config.same_tool_failure_warn_after: if self.config.warnings_enabled and same_count >= self.config.same_tool_failure_warn_after:
return ToolGuardrailDecision( return ToolGuardrailDecision(
action="warn", action="warn",
code="same_tool_failure_warning", code="same_tool_failure_warning",
message=( message=(
f"Tool guardrail: {tool_name} has failed {same_count} times " f"{tool_name} has failed {same_count} times this turn. "
"this turn. Change approach before retrying." "This looks like a loop; change approach before retrying."
), ),
tool_name=tool_name, tool_name=tool_name,
count=same_count, count=same_count,
@ -311,14 +364,14 @@ class ToolCallGuardrailController:
repeat_count = previous[1] + 1 repeat_count = previous[1] + 1
self._no_progress[signature] = (result_hash, repeat_count) self._no_progress[signature] = (result_hash, repeat_count)
if repeat_count >= self.config.no_progress_warn_after: if self.config.warnings_enabled and repeat_count >= self.config.no_progress_warn_after:
return ToolGuardrailDecision( return ToolGuardrailDecision(
action="warn", action="warn",
code="idempotent_no_progress_warning", code="idempotent_no_progress_warning",
message=( message=(
f"Tool guardrail: {tool_name} returned the same result " f"{tool_name} returned the same result {repeat_count} times. "
f"{repeat_count} times. Use the result or change the query instead " "Use the result already provided or change the query instead of "
"of repeating it unchanged." "repeating it unchanged."
), ),
tool_name=tool_name, tool_name=tool_name,
count=repeat_count, count=repeat_count,
@ -348,8 +401,9 @@ def append_toolguard_guidance(result: str, decision: ToolGuardrailDecision) -> s
"""Append runtime guidance to the current tool result content.""" """Append runtime guidance to the current tool result content."""
if decision.action not in {"warn", "halt"} or not decision.message: if decision.action not in {"warn", "halt"} or not decision.message:
return result return result
label = "Tool loop hard stop" if decision.action == "halt" else "Tool loop warning"
suffix = ( suffix = (
"\n\n[Tool guardrail: " f"\n\n[{label}: "
f"{decision.code}; count={decision.count}; {decision.message}]" f"{decision.code}; count={decision.count}; {decision.message}]"
) )
return (result or "") + suffix return (result or "") + suffix
@ -377,5 +431,31 @@ def _result_hash(result: str | None) -> str:
return _sha256(canonical) return _sha256(canonical)
def _as_bool(value: Any, default: bool) -> bool:
if value is None:
return default
if isinstance(value, bool):
return value
if isinstance(value, (int, float)):
return bool(value)
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in {"1", "true", "yes", "on", "enabled"}:
return True
if lowered in {"0", "false", "no", "off", "disabled"}:
return False
return default
def _positive_int(value: Any, default: int) -> int:
if value is None:
return default
try:
parsed = int(value)
except (TypeError, ValueError):
return default
return parsed if parsed >= 1 else default
def _sha256(value: str) -> str: def _sha256(value: str) -> str:
return hashlib.sha256(value.encode("utf-8")).hexdigest() return hashlib.sha256(value.encode("utf-8")).hexdigest()

View File

@ -289,6 +289,25 @@ browser:
# after this period of no activity between agent loops (default: 120 = 2 minutes) # after this period of no activity between agent loops (default: 120 = 2 minutes)
inactivity_timeout: 120 inactivity_timeout: 120
# =============================================================================
# Tool Loop Guardrails
# =============================================================================
# Soft warnings are enabled by default. They append guidance to repeated failed
# or non-progressing tool results but still let the tool execute. Hard stops are
# opt-in circuit breakers for autonomous/cron sessions where stopping a loop is
# preferable to spending the full iteration budget.
tool_loop_guardrails:
warnings_enabled: true
hard_stop_enabled: false
warn_after:
exact_failure: 2
same_tool_failure: 3
idempotent_no_progress: 2
hard_stop_after:
exact_failure: 5
same_tool_failure: 8
idempotent_no_progress: 5
# ============================================================================= # =============================================================================
# Context Compression (Auto-shrinks long conversations) # Context Compression (Auto-shrinks long conversations)
# ============================================================================= # =============================================================================

View File

@ -607,6 +607,24 @@ DEFAULT_CONFIG = {
"max_line_length": 2000, "max_line_length": 2000,
}, },
# Tool loop guardrails nudge models when they repeat failed or
# non-progressing tool calls. Soft warnings are always-on by default;
# hard stops are opt-in so interactive CLI/TUI sessions keep flowing.
"tool_loop_guardrails": {
"warnings_enabled": True,
"hard_stop_enabled": False,
"warn_after": {
"exact_failure": 2,
"same_tool_failure": 3,
"idempotent_no_progress": 2,
},
"hard_stop_after": {
"exact_failure": 5,
"same_tool_failure": 8,
"idempotent_no_progress": 5,
},
},
"compression": { "compression": {
"enabled": True, "enabled": True,
"threshold": 0.50, # compress when context usage exceeds this ratio "threshold": 0.50, # compress when context usage exceeds this ratio

View File

@ -163,6 +163,7 @@ from agent.display import (
get_tool_emoji as _get_tool_emoji, get_tool_emoji as _get_tool_emoji,
) )
from agent.tool_guardrails import ( from agent.tool_guardrails import (
ToolCallGuardrailConfig,
ToolCallGuardrailController, ToolCallGuardrailController,
ToolGuardrailDecision, ToolGuardrailDecision,
append_toolguard_guidance, append_toolguard_guidance,
@ -1666,6 +1667,14 @@ class AIAgent:
_agent_cfg = _load_agent_config() _agent_cfg = _load_agent_config()
except Exception: except Exception:
_agent_cfg = {} _agent_cfg = {}
try:
self._tool_guardrails = ToolCallGuardrailController(
ToolCallGuardrailConfig.from_mapping(
_agent_cfg.get("tool_loop_guardrails", {})
)
)
except Exception as _tlg_err:
logger.warning("Tool loop guardrail config ignored: %s", _tlg_err)
# Cache only the derived auxiliary compression context override that is # Cache only the derived auxiliary compression context override that is
# needed later by the startup feasibility check. Avoid exposing a # needed later by the startup feasibility check. Avoid exposing a
# broad pseudo-public config object on the agent instance. # broad pseudo-public config object on the agent instance.

View File

@ -32,9 +32,69 @@ def test_tool_call_signature_hashes_canonical_nested_unicode_args_without_exposi
assert "" not in json.dumps(metadata) assert "" not in json.dumps(metadata)
def test_repeated_identical_failed_call_warns_then_blocks_before_third_execution(): def test_default_config_is_soft_warning_only_with_hard_stop_disabled():
cfg = ToolCallGuardrailConfig()
assert cfg.warnings_enabled is True
assert cfg.hard_stop_enabled is False
assert cfg.exact_failure_warn_after == 2
assert cfg.same_tool_failure_warn_after == 3
assert cfg.no_progress_warn_after == 2
assert cfg.exact_failure_block_after == 5
assert cfg.same_tool_failure_halt_after == 8
assert cfg.no_progress_block_after == 5
def test_config_parses_nested_warn_and_hard_stop_thresholds():
cfg = ToolCallGuardrailConfig.from_mapping(
{
"warnings_enabled": False,
"hard_stop_enabled": True,
"warn_after": {
"exact_failure": 3,
"same_tool_failure": 4,
"idempotent_no_progress": 5,
},
"hard_stop_after": {
"exact_failure": 6,
"same_tool_failure": 7,
"idempotent_no_progress": 8,
},
}
)
assert cfg.warnings_enabled is False
assert cfg.hard_stop_enabled is True
assert cfg.exact_failure_warn_after == 3
assert cfg.same_tool_failure_warn_after == 4
assert cfg.no_progress_warn_after == 5
assert cfg.exact_failure_block_after == 6
assert cfg.same_tool_failure_halt_after == 7
assert cfg.no_progress_block_after == 8
def test_default_repeated_identical_failed_call_warns_without_blocking():
controller = ToolCallGuardrailController()
args = {"query": "same"}
decisions = []
for _ in range(5):
assert controller.before_call("web_search", args).action == "allow"
decisions.append(
controller.after_call("web_search", args, '{"error":"boom"}', failed=True)
)
assert decisions[0].action == "allow"
assert [d.action for d in decisions[1:]] == ["warn", "warn", "warn", "warn"]
assert {d.code for d in decisions[1:]} == {"repeated_exact_failure_warning"}
assert controller.before_call("web_search", args).action == "allow"
assert controller.halt_decision is None
def test_hard_stop_enabled_blocks_repeated_exact_failure_before_next_execution():
controller = ToolCallGuardrailController( controller = ToolCallGuardrailController(
ToolCallGuardrailConfig( ToolCallGuardrailConfig(
hard_stop_enabled=True,
exact_failure_warn_after=2, exact_failure_warn_after=2,
exact_failure_block_after=2, exact_failure_block_after=2,
same_tool_failure_halt_after=99, same_tool_failure_halt_after=99,
@ -50,18 +110,16 @@ def test_repeated_identical_failed_call_warns_then_blocks_before_third_execution
second = controller.after_call("web_search", args, '{"error":"boom"}', failed=True) second = controller.after_call("web_search", args, '{"error":"boom"}', failed=True)
assert second.action == "warn" assert second.action == "warn"
assert second.code == "repeated_exact_failure_warning" assert second.code == "repeated_exact_failure_warning"
assert second.count == 2
blocked = controller.before_call("web_search", args) blocked = controller.before_call("web_search", args)
assert blocked.action == "block" assert blocked.action == "block"
assert blocked.code == "repeated_exact_failure_block" assert blocked.code == "repeated_exact_failure_block"
assert blocked.tool_name == "web_search"
assert blocked.count == 2 assert blocked.count == 2
def test_success_resets_exact_signature_failure_streak(): def test_success_resets_exact_signature_failure_streak():
controller = ToolCallGuardrailController( controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(exact_failure_block_after=2, same_tool_failure_halt_after=99) ToolCallGuardrailConfig(hard_stop_enabled=True, exact_failure_block_after=2, same_tool_failure_halt_after=99)
) )
args = {"query": "same"} args = {"query": "same"}
@ -73,9 +131,26 @@ def test_success_resets_exact_signature_failure_streak():
assert controller.before_call("web_search", args).action == "allow" assert controller.before_call("web_search", args).action == "allow"
def test_same_tool_varying_args_failure_streak_warns_then_halts_independent_of_exact_streak(): def test_same_tool_varying_args_warns_by_default_without_halting():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(same_tool_failure_warn_after=2, same_tool_failure_halt_after=3)
)
first = controller.after_call("terminal", {"command": "cmd-1"}, '{"exit_code":1}', failed=True)
second = controller.after_call("terminal", {"command": "cmd-2"}, '{"exit_code":1}', failed=True)
third = controller.after_call("terminal", {"command": "cmd-3"}, '{"exit_code":1}', failed=True)
fourth = controller.after_call("terminal", {"command": "cmd-4"}, '{"exit_code":1}', failed=True)
assert first.action == "allow"
assert [second.action, third.action, fourth.action] == ["warn", "warn", "warn"]
assert {second.code, third.code, fourth.code} == {"same_tool_failure_warning"}
assert controller.halt_decision is None
def test_hard_stop_enabled_halts_same_tool_varying_args_failure_streak():
controller = ToolCallGuardrailController( controller = ToolCallGuardrailController(
ToolCallGuardrailConfig( ToolCallGuardrailConfig(
hard_stop_enabled=True,
exact_failure_block_after=99, exact_failure_block_after=99,
same_tool_failure_warn_after=2, same_tool_failure_warn_after=2,
same_tool_failure_halt_after=3, same_tool_failure_halt_after=3,
@ -93,13 +168,34 @@ def test_same_tool_varying_args_failure_streak_warns_then_halts_independent_of_e
assert third.count == 3 assert third.count == 3
def test_idempotent_no_progress_repeated_result_warns_then_blocks_future_repeat(): def test_idempotent_no_progress_repeated_result_warns_without_blocking_by_default():
controller = ToolCallGuardrailController( controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(no_progress_warn_after=2, no_progress_block_after=2) ToolCallGuardrailConfig(no_progress_warn_after=2, no_progress_block_after=2)
) )
args = {"path": "/tmp/same.txt"} args = {"path": "/tmp/same.txt"}
result = "same file contents" result = "same file contents"
for _ in range(4):
assert controller.before_call("read_file", args).action == "allow"
decision = controller.after_call("read_file", args, result, failed=False)
assert decision.action == "warn"
assert decision.code == "idempotent_no_progress_warning"
assert controller.before_call("read_file", args).action == "allow"
assert controller.halt_decision is None
def test_hard_stop_enabled_blocks_idempotent_no_progress_future_repeat():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(
hard_stop_enabled=True,
no_progress_warn_after=2,
no_progress_block_after=2,
)
)
args = {"path": "/tmp/same.txt"}
result = "same file contents"
assert controller.before_call("read_file", args).action == "allow" assert controller.before_call("read_file", args).action == "allow"
assert controller.after_call("read_file", args, result, failed=False).action == "allow" assert controller.after_call("read_file", args, result, failed=False).action == "allow"
assert controller.before_call("read_file", args).action == "allow" assert controller.before_call("read_file", args).action == "allow"
@ -126,7 +222,7 @@ def test_mutating_or_unknown_tools_are_not_blocked_for_repeated_identical_succes
def test_reset_for_turn_clears_bounded_guardrail_state(): def test_reset_for_turn_clears_bounded_guardrail_state():
controller = ToolCallGuardrailController( controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(exact_failure_block_after=2, no_progress_block_after=2) ToolCallGuardrailConfig(hard_stop_enabled=True, exact_failure_block_after=2, no_progress_block_after=2)
) )
controller.after_call("web_search", {"query": "same"}, '{"error":"boom"}', failed=True) controller.after_call("web_search", {"query": "same"}, '{"error":"boom"}', failed=True)
controller.after_call("web_search", {"query": "same"}, '{"error":"boom"}', failed=True) controller.after_call("web_search", {"query": "same"}, '{"error":"boom"}', failed=True)

View File

@ -36,10 +36,11 @@ def _mock_response(content="Hello", finish_reason="stop", tool_calls=None):
return SimpleNamespace(choices=[choice], model="test/model", usage=None) return SimpleNamespace(choices=[choice], model="test/model", usage=None)
def _make_agent(*tool_names: str, max_iterations: int = 10) -> AIAgent: def _make_agent(*tool_names: str, max_iterations: int = 10, config: dict | None = None) -> AIAgent:
with ( with (
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs(*tool_names)), patch("run_agent.get_tool_definitions", return_value=_make_tool_defs(*tool_names)),
patch("run_agent.check_toolset_requirements", return_value={}), patch("run_agent.check_toolset_requirements", return_value={}),
patch("hermes_cli.config.load_config", return_value=config or {}),
patch("run_agent.OpenAI"), patch("run_agent.OpenAI"),
): ):
agent = AIAgent( agent = AIAgent(
@ -69,7 +70,23 @@ def _seed_exact_failures(agent: AIAgent, tool_name: str, args: dict, count: int
) )
def test_sequential_path_blocks_repeated_exact_failure_before_execution(): def _hard_stop_config(**overrides) -> dict:
cfg = {
"tool_loop_guardrails": {
"warnings_enabled": True,
"hard_stop_enabled": True,
"hard_stop_after": {
"exact_failure": 2,
"same_tool_failure": 8,
"idempotent_no_progress": 5,
},
}
}
cfg["tool_loop_guardrails"].update(overrides)
return cfg
def test_default_sequential_path_warns_repeated_exact_failure_without_blocking_execution():
agent = _make_agent("web_search") agent = _make_agent("web_search")
args = {"query": "same"} args = {"query": "same"}
_seed_exact_failures(agent, "web_search", args) _seed_exact_failures(agent, "web_search", args)
@ -77,6 +94,32 @@ def test_sequential_path_blocks_repeated_exact_failure_before_execution():
progress = [] progress = []
agent.tool_start_callback = lambda *a, **k: starts.append((a, k)) agent.tool_start_callback = lambda *a, **k: starts.append((a, k))
agent.tool_progress_callback = lambda *a, **k: progress.append((a, k)) agent.tool_progress_callback = lambda *a, **k: progress.append((a, k))
tc = _mock_tool_call("web_search", json.dumps(args), "c-soft")
msg = SimpleNamespace(content="", tool_calls=[tc])
messages = []
with patch("run_agent.handle_function_call", return_value=json.dumps({"error": "boom"})) as mock_hfc:
agent._execute_tool_calls_sequential(msg, messages, "task-1")
mock_hfc.assert_called_once()
assert len(starts) == 1
assert any(event[0][0] == "tool.completed" for event in progress)
assert len(messages) == 1
assert messages[0]["role"] == "tool"
assert messages[0]["tool_call_id"] == "c-soft"
assert "repeated_exact_failure_warning" in messages[0]["content"]
assert "repeated_exact_failure_block" not in messages[0]["content"]
assert agent._tool_guardrail_halt_decision is None
def test_config_enabled_hard_stop_blocks_repeated_exact_failure_before_execution():
agent = _make_agent("web_search", config=_hard_stop_config())
args = {"query": "same"}
_seed_exact_failures(agent, "web_search", args)
starts = []
progress = []
agent.tool_start_callback = lambda *a, **k: starts.append((a, k))
agent.tool_progress_callback = lambda *a, **k: progress.append((a, k))
tc = _mock_tool_call("web_search", json.dumps(args), "c-block") tc = _mock_tool_call("web_search", json.dumps(args), "c-block")
msg = SimpleNamespace(content="", tool_calls=[tc]) msg = SimpleNamespace(content="", tool_calls=[tc])
messages = [] messages = []
@ -106,12 +149,12 @@ def test_sequential_after_call_appends_guidance_to_tool_result_without_extra_mes
assert [m["role"] for m in messages] == ["tool"] assert [m["role"] for m in messages] == ["tool"]
assert messages[0]["tool_call_id"] == "c-warn" assert messages[0]["tool_call_id"] == "c-warn"
assert "Tool guardrail" in messages[0]["content"] assert "Tool loop warning" in messages[0]["content"]
assert "repeated_exact_failure_warning" in messages[0]["content"] assert "repeated_exact_failure_warning" in messages[0]["content"]
def test_concurrent_path_does_not_submit_blocked_calls_and_preserves_result_order(): def test_config_enabled_hard_stop_concurrent_path_does_not_submit_blocked_calls_and_preserves_result_order():
agent = _make_agent("web_search") agent = _make_agent("web_search", config=_hard_stop_config())
blocked_args = {"query": "blocked"} blocked_args = {"query": "blocked"}
allowed_args = {"query": "allowed"} allowed_args = {"query": "allowed"}
_seed_exact_failures(agent, "web_search", blocked_args) _seed_exact_failures(agent, "web_search", blocked_args)
@ -164,9 +207,39 @@ def test_plugin_pre_tool_block_wins_without_counting_as_toolguard_block():
assert agent._tool_guardrails.before_call("web_search", args).action == "allow" assert agent._tool_guardrails.before_call("web_search", args).action == "allow"
def test_run_conversation_returns_controlled_guardrail_halt_without_top_level_error(): def test_default_run_conversation_warns_without_guardrail_halt():
agent = _make_agent("web_search", max_iterations=10) agent = _make_agent("web_search", max_iterations=10)
same_args = {"query": "same"} same_args = {"query": "same"}
responses = [
_mock_response(
content="",
finish_reason="tool_calls",
tool_calls=[_mock_tool_call("web_search", json.dumps(same_args), f"c{i}")],
)
for i in range(1, 4)
]
responses.append(_mock_response(content="done", finish_reason="stop", tool_calls=None))
agent.client.chat.completions.create.side_effect = responses
with (
patch("run_agent.handle_function_call", return_value=json.dumps({"error": "boom"})) as mock_hfc,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("search repeatedly")
assert mock_hfc.call_count == 3
assert result["turn_exit_reason"].startswith("text_response")
assert "guardrail" not in result
assert result["final_response"] == "done"
tool_contents = [m["content"] for m in result["messages"] if m.get("role") == "tool"]
assert any("repeated_exact_failure_warning" in content for content in tool_contents)
def test_config_enabled_hard_stop_run_conversation_returns_controlled_guardrail_halt_without_top_level_error():
agent = _make_agent("web_search", max_iterations=10, config=_hard_stop_config())
same_args = {"query": "same"}
responses = [ responses = [
_mock_response( _mock_response(
content="", content="",