fix: use output_text for assistant message content in Codex Responses API (#15690)
The Codex Responses API rejects input_text inside assistant messages — only output_text and refusal are valid content types for assistant role. _chat_content_to_responses_parts() previously hardcoded all text content to input_text regardless of the message role. When an assistant message had list-format content (multimodal or structured), this produced invalid input_text parts that the API rejected with: Invalid value: 'input_text'. Supported values are: 'output_text' and 'refusal'. Fix: add a role parameter to _chat_content_to_responses_parts() that selects output_text for assistant messages and input_text for user messages. Thread this through _chat_messages_to_responses_input() and _preflight_codex_input_items(). Fixes #15687
This commit is contained in:
parent
7c17accb29
commit
648b89911f
@ -44,22 +44,31 @@ _TOOL_CALL_LEAK_PATTERN = re.compile(
|
||||
# Multimodal content helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
|
||||
"""Convert chat-style multimodal content to Responses API input parts.
|
||||
|
||||
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
|
||||
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
|
||||
Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
The ``role`` parameter controls the text content type:
|
||||
- ``"user"`` (default) → ``"input_text"``
|
||||
- ``"assistant"`` → ``"output_text"``
|
||||
|
||||
The Responses API rejects ``input_text`` inside assistant messages and
|
||||
``output_text`` inside user messages, so callers MUST pass the correct
|
||||
role for the message being converted.
|
||||
|
||||
Returns an empty list when ``content`` is not a list or contains no
|
||||
recognized parts — callers fall back to the string path.
|
||||
"""
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
if not isinstance(content, list):
|
||||
return []
|
||||
converted: List[Dict[str, Any]] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
converted.append({"type": "input_text", "text": part})
|
||||
converted.append({"type": text_type, "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
@ -67,7 +76,7 @@ def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
if ptype in {"text", "input_text", "output_text"}:
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
converted.append({"type": "input_text", "text": text})
|
||||
converted.append({"type": text_type, "text": text})
|
||||
continue
|
||||
if ptype in {"image_url", "input_image"}:
|
||||
image_ref = part.get("image_url")
|
||||
@ -233,9 +242,10 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
if role in {"user", "assistant"}:
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
content_parts = _chat_content_to_responses_parts(content)
|
||||
content_parts = _chat_content_to_responses_parts(content, role=role)
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
content_text = "".join(
|
||||
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
|
||||
p.get("text", "") for p in content_parts if p.get("type") == text_type
|
||||
)
|
||||
else:
|
||||
content_parts = []
|
||||
@ -429,13 +439,16 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
content = ""
|
||||
if isinstance(content, list):
|
||||
# Multimodal content from ``_chat_messages_to_responses_input``
|
||||
# is already in Responses format (``input_text`` / ``input_image``).
|
||||
# Validate each part and pass through.
|
||||
# is already in Responses format (``input_text`` / ``output_text``
|
||||
# / ``input_image``). Validate each part and pass through.
|
||||
# Use the correct text type for the role — ``output_text`` for
|
||||
# assistant messages, ``input_text`` for user messages.
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
validated: List[Dict[str, Any]] = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
validated.append({"type": "input_text", "text": part})
|
||||
validated.append({"type": text_type, "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
@ -446,7 +459,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
text = part.get("text", "")
|
||||
if not isinstance(text, str):
|
||||
text = str(text or "")
|
||||
validated.append({"type": "input_text", "text": text})
|
||||
validated.append({"type": text_type, "text": text})
|
||||
elif ptype in {"input_image", "image_url"}:
|
||||
image_ref = part.get("image_url", "")
|
||||
detail = part.get("detail")
|
||||
|
||||
@ -12,7 +12,7 @@ from types import SimpleNamespace
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
|
||||
from agent.codex_responses_adapter import _chat_content_to_responses_parts, _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
|
||||
|
||||
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
|
||||
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
|
||||
@ -520,6 +520,111 @@ class TestChatMessagesToResponsesInput:
|
||||
reasoning_items = [i for i in items if i.get("type") == "reasoning"]
|
||||
assert len(reasoning_items) == 0
|
||||
|
||||
def test_user_multimodal_content_uses_input_text(self, monkeypatch):
|
||||
"""User messages with list content must use input_text type."""
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [{"role": "user", "content": [
|
||||
{"type": "text", "text": "find files"},
|
||||
]}]
|
||||
items = _chat_messages_to_responses_input(messages)
|
||||
assert len(items) == 1
|
||||
assert items[0]["role"] == "user"
|
||||
content = items[0]["content"]
|
||||
assert isinstance(content, list)
|
||||
assert content[0]["type"] == "input_text"
|
||||
assert content[0]["text"] == "find files"
|
||||
|
||||
def test_assistant_multimodal_content_uses_output_text(self, monkeypatch):
|
||||
"""Assistant messages with list content must use output_text type.
|
||||
|
||||
This is the fix for #15687 — the Responses API rejects input_text
|
||||
inside assistant messages.
|
||||
"""
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [{"role": "assistant", "content": [
|
||||
{"type": "text", "text": "I found the files."},
|
||||
]}]
|
||||
items = _chat_messages_to_responses_input(messages)
|
||||
assert len(items) == 1
|
||||
assert items[0]["role"] == "assistant"
|
||||
content = items[0]["content"]
|
||||
assert isinstance(content, list)
|
||||
assert content[0]["type"] == "output_text"
|
||||
assert content[0]["text"] == "I found the files."
|
||||
|
||||
def test_preflight_preserves_assistant_output_text(self, monkeypatch):
|
||||
"""_preflight_codex_input_items must preserve output_text for assistant."""
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
raw_input = [
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "hi"}]},
|
||||
{"role": "assistant", "content": [{"type": "output_text", "text": "hello"}]},
|
||||
]
|
||||
normalized = _preflight_codex_input_items(raw_input)
|
||||
user_content = normalized[0]["content"]
|
||||
asst_content = normalized[1]["content"]
|
||||
assert user_content[0]["type"] == "input_text"
|
||||
assert asst_content[0]["type"] == "output_text"
|
||||
|
||||
def test_full_round_trip_with_list_content(self, monkeypatch):
|
||||
"""End-to-end: user + assistant with list content through both stages."""
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [
|
||||
{"role": "user", "content": [{"type": "text", "text": "hello"}]},
|
||||
{"role": "assistant", "content": [{"type": "text", "text": "hi there"}]},
|
||||
{"role": "user", "content": [{"type": "text", "text": "continue"}]},
|
||||
]
|
||||
items = _chat_messages_to_responses_input(messages)
|
||||
normalized = _preflight_codex_input_items(items)
|
||||
|
||||
# User items use input_text
|
||||
assert normalized[0]["content"][0]["type"] == "input_text"
|
||||
assert normalized[2]["content"][0]["type"] == "input_text"
|
||||
# Assistant item uses output_text
|
||||
assert normalized[1]["content"][0]["type"] == "output_text"
|
||||
|
||||
|
||||
class TestChatContentToResponsesParts:
|
||||
"""Unit tests for _chat_content_to_responses_parts role parameter (#15687)."""
|
||||
|
||||
def test_default_role_emits_input_text(self):
|
||||
"""Default (user) role emits input_text."""
|
||||
result = _chat_content_to_responses_parts([{"type": "text", "text": "hello"}])
|
||||
assert result[0]["type"] == "input_text"
|
||||
|
||||
def test_explicit_user_role_emits_input_text(self):
|
||||
result = _chat_content_to_responses_parts(
|
||||
[{"type": "text", "text": "hello"}], role="user"
|
||||
)
|
||||
assert result[0]["type"] == "input_text"
|
||||
|
||||
def test_assistant_role_emits_output_text(self):
|
||||
result = _chat_content_to_responses_parts(
|
||||
[{"type": "text", "text": "hello"}], role="assistant"
|
||||
)
|
||||
assert result[0]["type"] == "output_text"
|
||||
|
||||
def test_assistant_role_with_string_parts(self):
|
||||
"""String parts in assistant content also get output_text."""
|
||||
result = _chat_content_to_responses_parts(["hello"], role="assistant")
|
||||
assert result[0]["type"] == "output_text"
|
||||
assert result[0]["text"] == "hello"
|
||||
|
||||
def test_assistant_role_with_mixed_input_output_text_types(self):
|
||||
"""Parts already marked input_text or output_text get normalized to role's type."""
|
||||
parts = [
|
||||
{"type": "input_text", "text": "a"},
|
||||
{"type": "output_text", "text": "b"},
|
||||
{"type": "text", "text": "c"},
|
||||
]
|
||||
result = _chat_content_to_responses_parts(parts, role="assistant")
|
||||
# All text parts should become output_text regardless of original type
|
||||
assert all(p["type"] == "output_text" for p in result)
|
||||
assert [p["text"] for p in result] == ["a", "b", "c"]
|
||||
|
||||
|
||||
# ── Response normalization tests ─────────────────────────────────────────────
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user