diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 30958f0f..1bf0e951 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -935,6 +935,26 @@ def convert_messages_to_anthropic( if not m["content"]: m["content"] = [{"type": "text", "text": "(tool call removed)"}] + # Strip orphaned tool_result blocks (no matching tool_use precedes them). + # This is the mirror of the above: context compression or session truncation + # can remove an assistant message containing a tool_use while leaving the + # subsequent tool_result intact. Anthropic rejects these with a 400. + tool_use_ids = set() + for m in result: + if m["role"] == "assistant" and isinstance(m["content"], list): + for block in m["content"]: + if block.get("type") == "tool_use": + tool_use_ids.add(block.get("id")) + for m in result: + if m["role"] == "user" and isinstance(m["content"], list): + m["content"] = [ + b + for b in m["content"] + if b.get("type") != "tool_result" or b.get("tool_use_id") in tool_use_ids + ] + if not m["content"]: + m["content"] = [{"type": "text", "text": "(tool result removed)"}] + # Enforce strict role alternation (Anthropic rejects consecutive same-role messages) fixed = [] for m in result: diff --git a/gateway/run.py b/gateway/run.py index c1acf39d..3b8cfc70 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1344,6 +1344,31 @@ class GatewayRunner: if event.get_command() == "status": return await self._handle_status_command(event) + # /reset and /new must bypass the running-agent guard so they + # actually dispatch as commands instead of being queued as user + # text (which would be fed back to the agent with the same + # broken history — #2170). Interrupt the agent first, then + # clear the adapter's pending queue so the stale "/reset" text + # doesn't get re-processed as a user message after the + # interrupt completes. + from hermes_cli.commands import resolve_command as _resolve_cmd_inner + _evt_cmd = event.get_command() + _cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None + if _cmd_def_inner and _cmd_def_inner.name == "new": + running_agent = self._running_agents.get(_quick_key) + if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: + running_agent.interrupt("Session reset requested") + # Clear any pending messages so the old text doesn't replay + adapter = self.adapters.get(source.platform) + if adapter and hasattr(adapter, 'get_pending_message'): + adapter.get_pending_message(_quick_key) # consume and discard + self._pending_messages.pop(_quick_key, None) + # Clean up the running agent entry so the reset handler + # doesn't think an agent is still active. + if _quick_key in self._running_agents: + del self._running_agents[_quick_key] + return await self._handle_reset_command(event) + if event.message_type == MessageType.PHOTO: logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20]) adapter = self.adapters.get(source.platform) diff --git a/tests/test_anthropic_adapter.py b/tests/test_anthropic_adapter.py index 7203de7e..c908be69 100644 --- a/tests/test_anthropic_adapter.py +++ b/tests/test_anthropic_adapter.py @@ -578,21 +578,39 @@ class TestConvertMessages: def test_converts_tool_results(self): messages = [ + { + "role": "assistant", + "content": "", + "tool_calls": [ + {"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}}, + ], + }, {"role": "tool", "tool_call_id": "tc_1", "content": "result data"}, ] _, result = convert_messages_to_anthropic(messages) - assert result[0]["role"] == "user" - assert result[0]["content"][0]["type"] == "tool_result" - assert result[0]["content"][0]["tool_use_id"] == "tc_1" + # tool result is in the second message (user role) + user_msg = [m for m in result if m["role"] == "user"][0] + assert user_msg["content"][0]["type"] == "tool_result" + assert user_msg["content"][0]["tool_use_id"] == "tc_1" def test_merges_consecutive_tool_results(self): messages = [ + { + "role": "assistant", + "content": "", + "tool_calls": [ + {"id": "tc_1", "function": {"name": "tool_a", "arguments": "{}"}}, + {"id": "tc_2", "function": {"name": "tool_b", "arguments": "{}"}}, + ], + }, {"role": "tool", "tool_call_id": "tc_1", "content": "result 1"}, {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"}, ] _, result = convert_messages_to_anthropic(messages) - assert len(result) == 1 - assert len(result[0]["content"]) == 2 + # assistant + merged user (with 2 tool_results) + user_msgs = [m for m in result if m["role"] == "user"] + assert len(user_msgs) == 1 + assert len(user_msgs[0]["content"]) == 2 def test_strips_orphaned_tool_use(self): messages = [ @@ -610,6 +628,51 @@ class TestConvertMessages: assistant_blocks = result[0]["content"] assert all(b.get("type") != "tool_use" for b in assistant_blocks) + def test_strips_orphaned_tool_result(self): + """tool_result with no matching tool_use should be stripped. + + This happens when context compression removes the assistant message + containing the tool_use but leaves the subsequent tool_result intact. + Anthropic rejects orphaned tool_results with a 400. + """ + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + # The assistant tool_use message was removed by compression, + # but the tool_result survived: + {"role": "tool", "tool_call_id": "tc_gone", "content": "stale result"}, + {"role": "user", "content": "Thanks"}, + ] + _, result = convert_messages_to_anthropic(messages) + # tc_gone has no matching tool_use — its tool_result should be stripped + for m in result: + if m["role"] == "user" and isinstance(m["content"], list): + assert all( + b.get("type") != "tool_result" + for b in m["content"] + ), "Orphaned tool_result should have been stripped" + + def test_strips_orphaned_tool_result_preserves_valid(self): + """Orphaned tool_results are stripped while valid ones survive.""" + messages = [ + { + "role": "assistant", + "content": "", + "tool_calls": [ + {"id": "tc_valid", "function": {"name": "search", "arguments": "{}"}}, + ], + }, + {"role": "tool", "tool_call_id": "tc_valid", "content": "good result"}, + {"role": "tool", "tool_call_id": "tc_orphan", "content": "stale result"}, + ] + _, result = convert_messages_to_anthropic(messages) + user_msg = [m for m in result if m["role"] == "user"][0] + tool_results = [ + b for b in user_msg["content"] if b.get("type") == "tool_result" + ] + assert len(tool_results) == 1 + assert tool_results[0]["tool_use_id"] == "tc_valid" + def test_system_with_cache_control(self): messages = [ { @@ -641,11 +704,19 @@ class TestConvertMessages: def test_tool_cache_control_is_preserved_on_tool_result_block(self): messages = apply_anthropic_cache_control([ {"role": "system", "content": "System prompt"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + {"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}}, + ], + }, {"role": "tool", "tool_call_id": "tc_1", "content": "result"}, ]) _, result = convert_messages_to_anthropic(messages) - tool_block = result[0]["content"][0] + user_msg = [m for m in result if m["role"] == "user"][0] + tool_block = user_msg["content"][0] assert tool_block["type"] == "tool_result" assert tool_block["tool_use_id"] == "tc_1"