From d63abbc3290b8052078e63ae5899819280fcb07a Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 28 Apr 2026 01:19:18 -0700
Subject: [PATCH] fix(agent): persist streamed reasoning_content on assistant
 turns (#16844) (#16892)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Streaming-only providers (glm, MiniMax, gpt-5.x via aigw, Anthropic via
openai-compat shims) emit reasoning through delta.reasoning_content
chunks that get accumulated into the local reasoning_text string — but
never land on the assistant message object as a top-level attribute. The
prior guard at _build_assistant_message only wrote reasoning_content
when the SDK exposed hasattr(msg, 'reasoning_content'), so these
providers persisted the chain-of-thought under the internal 'reasoning'
key and omitted the protocol-standard field.

The poison was silent until the user later switched to a DeepSeek-v4 or
Kimi thinking model, at which point replay failed with HTTP 400:
'The reasoning_content in the thinking mode must be passed back to the
API.' One reported session store accumulated 4,031 poisoned messages
across 1,101 files (#16844).

Fix: add an additive fallback that promotes the already-sanitized
reasoning_text to reasoning_content when no earlier branch wrote it AND
reasoning text was actually captured. Layered on top of the existing
SDK-attr branch and DeepSeek ''-pad (#15250) rather than replacing them,
so every existing behavior is preserved:

- SDK-exposed reasoning_content (OpenAI/Moonshot/DeepSeek SDK) still
  wins.
- DeepSeek tool-call ''-pad still fires when the SDK exposes the attr
  but the value is None.
- Non-thinking turns with no reasoning leave the field absent, so
  _copy_reasoning_content_for_api's cross-provider leak guard (#15748),
  promote-from-'reasoning' tier, and thinking-pad tier remain live at
  replay time.
- No empty '' gets eagerly written on every assistant turn (which would
  have bypassed the read-side ladder and triggered empty thinking-block
  insertion in the Anthropic adapter).

Tests: three new TestBuildAssistantMessage cases covering the streaming
promotion path, SDK precedence, and field-absent-when-no-reasoning
invariant.

Credit @Sanjays2402 for the original diagnosis and patch in #16884;
this is a scoped rework that preserves the existing read-side
compensation code as defense in depth.

Refs #16844, #16884, #15250, #15353, #15748.
---
 run_agent.py                      | 25 ++++++++++++++
 tests/run_agent/test_run_agent.py | 56 +++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index 3f2b7830..4976861b 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -8100,6 +8100,31 @@ class AIAgent:
                 # as a defensive compatibility fallback (refs #15250).
                 msg["reasoning_content"] = ""
 
+        # Additive fallback (refs #16844, #16884). Streaming-only providers
+        # (glm, MiniMax, gpt-5.x via aigw, Anthropic via openai-compat shims)
+        # accumulate reasoning through ``delta.reasoning_content`` chunks
+        # but never land it on the message object as a top-level attribute,
+        # so neither branch above fires and the chain-of-thought is stored
+        # only under the internal ``reasoning`` key. When the user later
+        # replays that history through a DeepSeek-v4 / Kimi thinking model,
+        # the missing ``reasoning_content`` causes HTTP 400 ("The
+        # reasoning_content in the thinking mode must be passed back to the
+        # API.").
+        #
+        # Promote the already-sanitized streamed ``reasoning_text`` to
+        # ``reasoning_content`` at write time, but ONLY when no prior branch
+        # already set it AND we actually captured reasoning text. This
+        # preserves every existing behavior:
+        #   - SDK-exposed ``reasoning_content`` (OpenAI/Moonshot/DeepSeek SDK)
+        #     still wins.
+        #   - DeepSeek tool-call ""-pad (#15250) still fires.
+        #   - Non-thinking turns with no reasoning leave the field absent,
+        #     so ``_copy_reasoning_content_for_api``'s cross-provider leak
+        #     guard (#15748) and ``reasoning``→``reasoning_content``
+        #     promotion tiers still apply at replay time.
+        if "reasoning_content" not in msg and reasoning_text:
+            msg["reasoning_content"] = reasoning_text
+
         if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
             # Pass reasoning_details back unmodified so providers (OpenRouter,
             # Anthropic, OpenAI) can maintain reasoning continuity across turns.
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index eb2b47f8..2c13a156 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -1397,6 +1397,62 @@ class TestBuildAssistantMessage:
         result = agent._build_assistant_message(msg, "stop")
         assert result["content"] == ""
 
+    def test_streaming_only_reasoning_promoted_to_reasoning_content(self, agent):
+        """Refs #16844 / #16884. Streaming-only providers (glm, MiniMax,
+        gpt-5.x via aigw, Anthropic via openai-compat shims) accumulate
+        reasoning through delta chunks but never expose
+        ``reasoning_content`` as a top-level attribute on the finalized
+        message — only ``reasoning`` (or the internal accumulator).
+
+        Without write-side promotion, the persisted message stores the
+        chain-of-thought under the internal ``reasoning`` key and omits
+        ``reasoning_content``. When the user later replays that history
+        through a DeepSeek-v4 / Kimi thinking model, the missing field
+        causes HTTP 400 ("The reasoning_content in the thinking mode
+        must be passed back to the API.").
+
+        Fix: when ``reasoning_content`` wasn't written by an earlier
+        branch AND we captured reasoning text from streaming deltas,
+        promote it to ``reasoning_content`` at write time.
+        """
+        # SDK-style object that exposes ``reasoning`` but NOT
+        # ``reasoning_content`` — the streaming-only provider shape.
+        msg = _mock_assistant_msg(content="answer", reasoning="hidden thinking")
+        assert not hasattr(msg, "reasoning_content")
+
+        result = agent._build_assistant_message(msg, "stop")
+
+        assert result["reasoning"] == "hidden thinking"
+        assert result["reasoning_content"] == "hidden thinking"
+
+    def test_sdk_reasoning_content_still_wins_over_fallback(self, agent):
+        """Additive fallback must not override SDK-supplied reasoning_content.
+
+        When both ``reasoning`` and ``reasoning_content`` are present, the
+        SDK's own ``reasoning_content`` is authoritative (may carry
+        structured data the accumulator doesn't have).
+        """
+        msg = _mock_assistant_msg(
+            content="answer",
+            reasoning="summary only",
+            reasoning_content="structured provider scratchpad",
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["reasoning_content"] == "structured provider scratchpad"
+
+    def test_no_reasoning_text_leaves_field_absent(self, agent):
+        """Non-thinking turns with no reasoning leave reasoning_content absent.
+
+        This preserves ``_copy_reasoning_content_for_api``'s downstream
+        tiers at replay time — cross-provider leak guard (#15748),
+        promote-from-``reasoning``, and DeepSeek/Kimi ""-pad — which
+        would all be bypassed if we eagerly wrote ``reasoning_content=""``
+        on every assistant turn regardless of provider.
+        """
+        msg = _mock_assistant_msg(content="plain answer")
+        result = agent._build_assistant_message(msg, "stop")
+        assert "reasoning_content" not in result
+
     def test_tool_call_extra_content_preserved(self, agent):
         """Gemini thinking models attach extra_content with thought_signature
         to tool calls. This must be preserved so subsequent API calls include it."""