From d63abbc3290b8052078e63ae5899819280fcb07a Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 28 Apr 2026 01:19:18 -0700 Subject: [PATCH] fix(agent): persist streamed reasoning_content on assistant turns (#16844) (#16892) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Streaming-only providers (glm, MiniMax, gpt-5.x via aigw, Anthropic via openai-compat shims) emit reasoning through delta.reasoning_content chunks that get accumulated into the local reasoning_text string — but never land on the assistant message object as a top-level attribute. The prior guard at _build_assistant_message only wrote reasoning_content when the SDK exposed hasattr(msg, 'reasoning_content'), so these providers persisted the chain-of-thought under the internal 'reasoning' key and omitted the protocol-standard field. The poison was silent until the user later switched to a DeepSeek-v4 or Kimi thinking model, at which point replay failed with HTTP 400: 'The reasoning_content in the thinking mode must be passed back to the API.' One reported session store accumulated 4,031 poisoned messages across 1,101 files (#16844). Fix: add an additive fallback that promotes the already-sanitized reasoning_text to reasoning_content when no earlier branch wrote it AND reasoning text was actually captured. Layered on top of the existing SDK-attr branch and DeepSeek ''-pad (#15250) rather than replacing them, so every existing behavior is preserved: - SDK-exposed reasoning_content (OpenAI/Moonshot/DeepSeek SDK) still wins. - DeepSeek tool-call ''-pad still fires when the SDK exposes the attr but the value is None. - Non-thinking turns with no reasoning leave the field absent, so _copy_reasoning_content_for_api's cross-provider leak guard (#15748), promote-from-'reasoning' tier, and thinking-pad tier remain live at replay time. - No empty '' gets eagerly written on every assistant turn (which would have bypassed the read-side ladder and triggered empty thinking-block insertion in the Anthropic adapter). Tests: three new TestBuildAssistantMessage cases covering the streaming promotion path, SDK precedence, and field-absent-when-no-reasoning invariant. Credit @Sanjays2402 for the original diagnosis and patch in #16884; this is a scoped rework that preserves the existing read-side compensation code as defense in depth. Refs #16844, #16884, #15250, #15353, #15748. --- run_agent.py | 25 ++++++++++++++ tests/run_agent/test_run_agent.py | 56 +++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/run_agent.py b/run_agent.py index 3f2b7830..4976861b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8100,6 +8100,31 @@ class AIAgent: # as a defensive compatibility fallback (refs #15250). msg["reasoning_content"] = "" + # Additive fallback (refs #16844, #16884). Streaming-only providers + # (glm, MiniMax, gpt-5.x via aigw, Anthropic via openai-compat shims) + # accumulate reasoning through ``delta.reasoning_content`` chunks + # but never land it on the message object as a top-level attribute, + # so neither branch above fires and the chain-of-thought is stored + # only under the internal ``reasoning`` key. When the user later + # replays that history through a DeepSeek-v4 / Kimi thinking model, + # the missing ``reasoning_content`` causes HTTP 400 ("The + # reasoning_content in the thinking mode must be passed back to the + # API."). + # + # Promote the already-sanitized streamed ``reasoning_text`` to + # ``reasoning_content`` at write time, but ONLY when no prior branch + # already set it AND we actually captured reasoning text. This + # preserves every existing behavior: + # - SDK-exposed ``reasoning_content`` (OpenAI/Moonshot/DeepSeek SDK) + # still wins. + # - DeepSeek tool-call ""-pad (#15250) still fires. + # - Non-thinking turns with no reasoning leave the field absent, + # so ``_copy_reasoning_content_for_api``'s cross-provider leak + # guard (#15748) and ``reasoning``→``reasoning_content`` + # promotion tiers still apply at replay time. + if "reasoning_content" not in msg and reasoning_text: + msg["reasoning_content"] = reasoning_text + if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details: # Pass reasoning_details back unmodified so providers (OpenRouter, # Anthropic, OpenAI) can maintain reasoning continuity across turns. diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index eb2b47f8..2c13a156 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1397,6 +1397,62 @@ class TestBuildAssistantMessage: result = agent._build_assistant_message(msg, "stop") assert result["content"] == "" + def test_streaming_only_reasoning_promoted_to_reasoning_content(self, agent): + """Refs #16844 / #16884. Streaming-only providers (glm, MiniMax, + gpt-5.x via aigw, Anthropic via openai-compat shims) accumulate + reasoning through delta chunks but never expose + ``reasoning_content`` as a top-level attribute on the finalized + message — only ``reasoning`` (or the internal accumulator). + + Without write-side promotion, the persisted message stores the + chain-of-thought under the internal ``reasoning`` key and omits + ``reasoning_content``. When the user later replays that history + through a DeepSeek-v4 / Kimi thinking model, the missing field + causes HTTP 400 ("The reasoning_content in the thinking mode + must be passed back to the API."). + + Fix: when ``reasoning_content`` wasn't written by an earlier + branch AND we captured reasoning text from streaming deltas, + promote it to ``reasoning_content`` at write time. + """ + # SDK-style object that exposes ``reasoning`` but NOT + # ``reasoning_content`` — the streaming-only provider shape. + msg = _mock_assistant_msg(content="answer", reasoning="hidden thinking") + assert not hasattr(msg, "reasoning_content") + + result = agent._build_assistant_message(msg, "stop") + + assert result["reasoning"] == "hidden thinking" + assert result["reasoning_content"] == "hidden thinking" + + def test_sdk_reasoning_content_still_wins_over_fallback(self, agent): + """Additive fallback must not override SDK-supplied reasoning_content. + + When both ``reasoning`` and ``reasoning_content`` are present, the + SDK's own ``reasoning_content`` is authoritative (may carry + structured data the accumulator doesn't have). + """ + msg = _mock_assistant_msg( + content="answer", + reasoning="summary only", + reasoning_content="structured provider scratchpad", + ) + result = agent._build_assistant_message(msg, "stop") + assert result["reasoning_content"] == "structured provider scratchpad" + + def test_no_reasoning_text_leaves_field_absent(self, agent): + """Non-thinking turns with no reasoning leave reasoning_content absent. + + This preserves ``_copy_reasoning_content_for_api``'s downstream + tiers at replay time — cross-provider leak guard (#15748), + promote-from-``reasoning``, and DeepSeek/Kimi ""-pad — which + would all be bypassed if we eagerly wrote ``reasoning_content=""`` + on every assistant turn regardless of provider. + """ + msg = _mock_assistant_msg(content="plain answer") + result = agent._build_assistant_message(msg, "stop") + assert "reasoning_content" not in result + def test_tool_call_extra_content_preserved(self, agent): """Gemini thinking models attach extra_content with thought_signature to tool calls. This must be preserved so subsequent API calls include it."""