fix(agent): sanitize Codex tool-call history summaries
This commit is contained in:
parent
f43b126677
commit
b29b709a71
@ -992,8 +992,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
def _get_tool_call_id(tc) -> str:
|
||||
"""Extract the call ID from a tool_call entry (dict or SimpleNamespace)."""
|
||||
if isinstance(tc, dict):
|
||||
return tc.get("id", "")
|
||||
return getattr(tc, "id", "") or ""
|
||||
return tc.get("call_id", "") or tc.get("id", "") or ""
|
||||
return getattr(tc, "call_id", "") or getattr(tc, "id", "") or ""
|
||||
|
||||
def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Fix orphaned tool_call / tool_result pairs after compression.
|
||||
|
||||
11
run_agent.py
11
run_agent.py
@ -4982,8 +4982,8 @@ class AIAgent:
|
||||
def _get_tool_call_id_static(tc) -> str:
|
||||
"""Extract call ID from a tool_call entry (dict or object)."""
|
||||
if isinstance(tc, dict):
|
||||
return tc.get("id", "") or ""
|
||||
return getattr(tc, "id", "") or ""
|
||||
return tc.get("call_id", "") or tc.get("id", "") or ""
|
||||
return getattr(tc, "call_id", "") or getattr(tc, "id", "") or ""
|
||||
|
||||
_VALID_API_ROLES = frozenset({"system", "user", "assistant", "tool", "function", "developer"})
|
||||
|
||||
@ -10013,6 +10013,13 @@ class AIAgent:
|
||||
for idx, pfm in enumerate(self.prefill_messages):
|
||||
api_messages.insert(sys_offset + idx, pfm.copy())
|
||||
|
||||
# Same safety net as the main loop: repair tool-call/result
|
||||
# pairing before asking for a final summary. Compression and
|
||||
# session resume can leave a tool result whose parent assistant
|
||||
# tool_call was summarized away; Responses API rejects that as
|
||||
# "No tool call found for function call output".
|
||||
api_messages = self._sanitize_api_messages(api_messages)
|
||||
|
||||
# Same safety net as the main loop: drop thinking-only assistant
|
||||
# turns so Anthropic-family providers don't 400 the summary call.
|
||||
api_messages = self._drop_thinking_only_and_merge_users(api_messages)
|
||||
|
||||
@ -640,6 +640,30 @@ class TestCompressWithClient:
|
||||
for tc in msg["tool_calls"]:
|
||||
assert tc["id"] in answered_ids
|
||||
|
||||
def test_sanitizer_matches_responses_call_id_when_id_differs(self, compressor):
|
||||
msgs = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "fc_123",
|
||||
"call_id": "call_123",
|
||||
"response_item_id": "fc_123",
|
||||
"type": "function",
|
||||
"function": {"name": "search_files", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_123", "content": "result"},
|
||||
]
|
||||
|
||||
sanitized = compressor._sanitize_tool_pairs(msgs)
|
||||
|
||||
assert [m.get("tool_call_id") for m in sanitized if m.get("role") == "tool"] == [
|
||||
"call_123"
|
||||
]
|
||||
|
||||
def test_summary_role_avoids_consecutive_user_messages(self):
|
||||
"""Summary role should alternate with the last head message to avoid consecutive same-role messages."""
|
||||
mock_client = MagicMock()
|
||||
|
||||
@ -2181,6 +2181,73 @@ class TestHandleMaxIterations:
|
||||
kwargs = agent.client.chat.completions.create.call_args.kwargs
|
||||
assert "reasoning" not in kwargs.get("extra_body", {})
|
||||
|
||||
def test_codex_summary_sanitizes_orphan_tool_results(self, agent):
|
||||
agent.api_mode = "codex_responses"
|
||||
agent.provider = "openai-codex"
|
||||
agent.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent._base_url_hostname = "chatgpt.com"
|
||||
agent.model = "gpt-5.5"
|
||||
agent._cached_system_prompt = "You are helpful."
|
||||
captured = {}
|
||||
|
||||
def fake_run_codex_stream(kwargs):
|
||||
captured.update(kwargs)
|
||||
return SimpleNamespace(
|
||||
status="completed",
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text="Summary")],
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "do stuff"},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_orphan",
|
||||
"content": "orphaned result from compressed history",
|
||||
},
|
||||
]
|
||||
|
||||
with patch.object(agent, "_run_codex_stream", side_effect=fake_run_codex_stream):
|
||||
result = agent._handle_max_iterations(messages, 90)
|
||||
|
||||
assert result == "Summary"
|
||||
input_items = captured["input"]
|
||||
assert not any(
|
||||
item.get("type") == "function_call_output"
|
||||
and item.get("call_id") == "call_orphan"
|
||||
for item in input_items
|
||||
)
|
||||
|
||||
def test_api_sanitizer_matches_responses_call_id_when_id_differs(self, agent):
|
||||
messages = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "fc_123",
|
||||
"call_id": "call_123",
|
||||
"response_item_id": "fc_123",
|
||||
"type": "function",
|
||||
"function": {"name": "web_search", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_123", "content": "result"},
|
||||
]
|
||||
|
||||
sanitized = agent._sanitize_api_messages(messages)
|
||||
|
||||
assert [m.get("tool_call_id") for m in sanitized if m.get("role") == "tool"] == [
|
||||
"call_123"
|
||||
]
|
||||
|
||||
|
||||
class TestRunConversation:
|
||||
"""Tests for the main run_conversation method.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user