fix(agent): sanitize Codex tool-call history summaries

This commit is contained in:
Stephen Schoettler 2026-04-29 15:06:42 -07:00 committed by Teknium
parent f43b126677
commit b29b709a71
4 changed files with 102 additions and 4 deletions

View File

@ -992,8 +992,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
def _get_tool_call_id(tc) -> str:
"""Extract the call ID from a tool_call entry (dict or SimpleNamespace)."""
if isinstance(tc, dict):
return tc.get("id", "")
return getattr(tc, "id", "") or ""
return tc.get("call_id", "") or tc.get("id", "") or ""
return getattr(tc, "call_id", "") or getattr(tc, "id", "") or ""
def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Fix orphaned tool_call / tool_result pairs after compression.

View File

@ -4982,8 +4982,8 @@ class AIAgent:
def _get_tool_call_id_static(tc) -> str:
"""Extract call ID from a tool_call entry (dict or object)."""
if isinstance(tc, dict):
return tc.get("id", "") or ""
return getattr(tc, "id", "") or ""
return tc.get("call_id", "") or tc.get("id", "") or ""
return getattr(tc, "call_id", "") or getattr(tc, "id", "") or ""
_VALID_API_ROLES = frozenset({"system", "user", "assistant", "tool", "function", "developer"})
@ -10013,6 +10013,13 @@ class AIAgent:
for idx, pfm in enumerate(self.prefill_messages):
api_messages.insert(sys_offset + idx, pfm.copy())
# Same safety net as the main loop: repair tool-call/result
# pairing before asking for a final summary. Compression and
# session resume can leave a tool result whose parent assistant
# tool_call was summarized away; Responses API rejects that as
# "No tool call found for function call output".
api_messages = self._sanitize_api_messages(api_messages)
# Same safety net as the main loop: drop thinking-only assistant
# turns so Anthropic-family providers don't 400 the summary call.
api_messages = self._drop_thinking_only_and_merge_users(api_messages)

View File

@ -640,6 +640,30 @@ class TestCompressWithClient:
for tc in msg["tool_calls"]:
assert tc["id"] in answered_ids
def test_sanitizer_matches_responses_call_id_when_id_differs(self, compressor):
msgs = [
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "fc_123",
"call_id": "call_123",
"response_item_id": "fc_123",
"type": "function",
"function": {"name": "search_files", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_123", "content": "result"},
]
sanitized = compressor._sanitize_tool_pairs(msgs)
assert [m.get("tool_call_id") for m in sanitized if m.get("role") == "tool"] == [
"call_123"
]
def test_summary_role_avoids_consecutive_user_messages(self):
"""Summary role should alternate with the last head message to avoid consecutive same-role messages."""
mock_client = MagicMock()

View File

@ -2181,6 +2181,73 @@ class TestHandleMaxIterations:
kwargs = agent.client.chat.completions.create.call_args.kwargs
assert "reasoning" not in kwargs.get("extra_body", {})
def test_codex_summary_sanitizes_orphan_tool_results(self, agent):
agent.api_mode = "codex_responses"
agent.provider = "openai-codex"
agent.base_url = "https://chatgpt.com/backend-api/codex"
agent._base_url_lower = agent.base_url.lower()
agent._base_url_hostname = "chatgpt.com"
agent.model = "gpt-5.5"
agent._cached_system_prompt = "You are helpful."
captured = {}
def fake_run_codex_stream(kwargs):
captured.update(kwargs)
return SimpleNamespace(
status="completed",
output=[
SimpleNamespace(
type="message",
status="completed",
content=[SimpleNamespace(type="output_text", text="Summary")],
)
],
)
messages = [
{"role": "user", "content": "do stuff"},
{
"role": "tool",
"tool_call_id": "call_orphan",
"content": "orphaned result from compressed history",
},
]
with patch.object(agent, "_run_codex_stream", side_effect=fake_run_codex_stream):
result = agent._handle_max_iterations(messages, 90)
assert result == "Summary"
input_items = captured["input"]
assert not any(
item.get("type") == "function_call_output"
and item.get("call_id") == "call_orphan"
for item in input_items
)
def test_api_sanitizer_matches_responses_call_id_when_id_differs(self, agent):
messages = [
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "fc_123",
"call_id": "call_123",
"response_item_id": "fc_123",
"type": "function",
"function": {"name": "web_search", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_123", "content": "result"},
]
sanitized = agent._sanitize_api_messages(messages)
assert [m.get("tool_call_id") for m in sanitized if m.get("role") == "tool"] == [
"call_123"
]
class TestRunConversation:
"""Tests for the main run_conversation method.