From bfb704684ec64675650bc39fa0f731604b12aba2 Mon Sep 17 00:00:00 2001
From: IMHaoyan <657290301@qq.com>
Date: Thu, 30 Apr 2026 22:49:55 -0700
Subject: [PATCH] fix(deepseek): use non-empty reasoning_content placeholder
 for V4 Pro thinking mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DeepSeek V4 Pro tightened thinking-mode validation and rejects empty-string
reasoning_content with HTTP 400:

    The reasoning content in the thinking mode must be passed back to the API.

run_agent.py injected "" at three fallback sites — the tool-call pad in
_build_assistant_message and both injection branches of
_copy_reasoning_content_for_api (cross-provider poison guard + unconditional
thinking pad). All three now emit " " (single space), which satisfies the
non-empty check on V4 Pro without leaking fabricated reasoning.

Also upgrades stale empty-string placeholders on replay: sessions persisted
before this change have reasoning_content="" pinned at creation time; when
the active provider enforces thinking-mode echo, the replay path now rewrites
"" -> " " so existing users don't 400 on their first V4 Pro turn after
updating. Non-thinking providers still round-trip "" verbatim.

Updates 9 existing assertions + adds 2 regression tests (stale-placeholder
upgrade, non-thinking verbatim preservation).

Refs #15250, #17400.
Closes #17341.
---
 run_agent.py                                  | 46 +++++++----
 scripts/release.py                            |  1 +
 .../test_deepseek_reasoning_content_echo.py   | 80 ++++++++++++++-----
 tests/run_agent/test_run_agent.py             |  8 +-
 4 files changed, 98 insertions(+), 37 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 4ea0fafe..26933994 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -8603,9 +8603,13 @@ class AIAgent:
             # message. Without it, replaying the persisted message causes
             # HTTP 400 ("The reasoning_content in the thinking mode must
             # be passed back to the API"). Include streamed reasoning
-            # text when captured; otherwise pad with empty string.
-            # Refs #15250, #17400.
-            msg["reasoning_content"] = reasoning_text or ""
+            # text when captured; otherwise pad with a single space —
+            # DeepSeek V4 Pro tightened validation and rejects empty
+            # string ("The reasoning content in the thinking mode must
+            # be passed back to the API"). A space satisfies non-empty
+            # checks everywhere without leaking fabricated reasoning.
+            # Refs #15250, #17400, #17341.
+            msg["reasoning_content"] = reasoning_text or " "
 
         # Additive fallback (refs #16844, #16884). Streaming-only providers
         # (glm, MiniMax, gpt-5.x via aigw, Anthropic via openai-compat shims)
@@ -8760,11 +8764,20 @@ class AIAgent:
             return
 
         # 1. Explicit reasoning_content already set — preserve it verbatim
-        # (includes DeepSeek/Kimi's own empty-string placeholder written at
-        # creation time, and any valid reasoning content from the same provider).
+        # (includes DeepSeek/Kimi's own space-placeholder written at creation
+        # time, and any valid reasoning content from the same provider).
+        #
+        # Exception: sessions persisted BEFORE #17341 have empty-string
+        # placeholders pinned at creation time. DeepSeek V4 Pro rejects
+        # those with HTTP 400. When the active provider enforces the
+        # thinking-mode echo, upgrade "" → " " on replay so stale history
+        # doesn't 400 the user on the next turn.
         existing = source_msg.get("reasoning_content")
         if isinstance(existing, str):
-            api_msg["reasoning_content"] = existing
+            if existing == "" and self._needs_thinking_reasoning_pad():
+                api_msg["reasoning_content"] = " "
+            else:
+                api_msg["reasoning_content"] = existing
             return
 
         needs_thinking_pad = self._needs_thinking_reasoning_pad()
@@ -8776,8 +8789,10 @@ class AIAgent:
         # pins reasoning_content at creation time for tool-call turns, so the
         # shape (reasoning set, reasoning_content absent, tool_calls present)
         # is unreachable from same-provider DeepSeek history after this fix.
-        # Inject "" to satisfy the API without leaking another provider's
-        # chain of thought to DeepSeek/Kimi.
+        # Inject a single space to satisfy the API without leaking another
+        # provider's chain of thought to DeepSeek/Kimi. Space (not "")
+        # because DeepSeek V4 Pro rejects empty-string reasoning_content
+        # in thinking mode (refs #17341).
         normalized_reasoning = source_msg.get("reasoning")
         if (
             needs_thinking_pad
@@ -8785,7 +8800,7 @@ class AIAgent:
             and isinstance(normalized_reasoning, str)
             and normalized_reasoning
         ):
-            api_msg["reasoning_content"] = ""
+            api_msg["reasoning_content"] = " "
             return
 
         # 3. Healthy session: promote 'reasoning' field to 'reasoning_content'
@@ -8798,12 +8813,15 @@ class AIAgent:
             return
 
         # 4. DeepSeek / Kimi thinking mode: all assistant messages need
-        # reasoning_content. Inject "" to satisfy the provider's requirement
-        # when no explicit reasoning content is present. Covers both
-        # tool-call turns (already-poisoned history with no reasoning at all)
-        # and plain text turns.
+        # reasoning_content. Inject a single space to satisfy the provider's
+        # requirement when no explicit reasoning content is present. Covers
+        # both tool-call turns (already-poisoned history with no reasoning
+        # at all) and plain text turns. Space (not "") because DeepSeek V4
+        # Pro tightened validation and rejects empty string with HTTP 400
+        # ("The reasoning content in the thinking mode must be passed back
+        # to the API"). Refs #17341.
         if needs_thinking_pad:
-            api_msg["reasoning_content"] = ""
+            api_msg["reasoning_content"] = " "
             return
 
         # 5. reasoning_content was present but not a string (e.g. None after
diff --git a/scripts/release.py b/scripts/release.py
index ee6a65d7..56f40795 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -50,6 +50,7 @@ AUTHOR_MAP = {
     "rylen.anil@gmail.com": "rylena",
     "godnanijatin@gmail.com": "jatingodnani",
     "14046872+tmimmanuel@users.noreply.github.com": "tmimmanuel",
+    "657290301@qq.com": "IMHaoyan",
     "revar@users.noreply.github.com": "revaraver",
     # Matrix parity salvage batch (April 2026)
     "sr@samirusani": "samrusani",
diff --git a/tests/run_agent/test_deepseek_reasoning_content_echo.py b/tests/run_agent/test_deepseek_reasoning_content_echo.py
index d6e4e341..0efdb2c5 100644
--- a/tests/run_agent/test_deepseek_reasoning_content_echo.py
+++ b/tests/run_agent/test_deepseek_reasoning_content_echo.py
@@ -10,15 +10,21 @@ field, DeepSeek rejects the next request with HTTP 400::
 Fix covers three paths:
 
 1. ``_build_assistant_message`` — new tool-call messages without raw
-   reasoning_content get ``""`` pinned at creation time so nothing gets
+   reasoning_content get ``" "`` pinned at creation time so nothing gets
    persisted poisoned.
 2. ``_copy_reasoning_content_for_api`` — already-poisoned history replays
-   with ``reasoning_content=""`` injected defensively.
+   with ``reasoning_content=" "`` injected defensively.
 3. Detection covers three signals: ``provider == "deepseek"``,
    ``"deepseek" in model``, and ``api.deepseek.com`` host match. The third
    catches custom-provider setups pointing at DeepSeek.
 
-Refs #15250 / #15353.
+The placeholder is a single space (not empty string) because DeepSeek V4 Pro
+tightened validation and rejects empty-string reasoning_content with a
+400 ("The reasoning content in the thinking mode must be passed back to
+the API"). A space satisfies non-empty checks everywhere without leaking
+fabricated reasoning.
+
+Refs #15250 / #15353 / #17341.
 """
 
 from __future__ import annotations
@@ -105,8 +111,8 @@ class TestNeedsDeepSeekToolReasoning:
 class TestCopyReasoningContentForApi:
     """_copy_reasoning_content_for_api pads reasoning_content for DeepSeek tool-calls."""
 
-    def test_deepseek_tool_call_poisoned_history_gets_empty_string(self) -> None:
-        """Already-poisoned history (no reasoning_content, no reasoning) gets ''."""
+    def test_deepseek_tool_call_poisoned_history_gets_space_placeholder(self) -> None:
+        """Already-poisoned history (no reasoning_content, no reasoning) gets ' '."""
         agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
         source = {
             "role": "assistant",
@@ -115,7 +121,7 @@ class TestCopyReasoningContentForApi:
         }
         api_msg: dict = {}
         agent._copy_reasoning_content_for_api(source, api_msg)
-        assert api_msg.get("reasoning_content") == ""
+        assert api_msg.get("reasoning_content") == " "
 
     def test_deepseek_assistant_no_tool_call_gets_padded(self) -> None:
         """DeepSeek thinking mode pads ALL assistant turns, even without tool_calls."""
@@ -123,7 +129,7 @@ class TestCopyReasoningContentForApi:
         source = {"role": "assistant", "content": "hello"}
         api_msg: dict = {}
         agent._copy_reasoning_content_for_api(source, api_msg)
-        assert api_msg.get("reasoning_content") == ""
+        assert api_msg.get("reasoning_content") == " "
 
     def test_deepseek_explicit_reasoning_content_preserved(self) -> None:
         """When reasoning_content is already set, it's copied verbatim."""
@@ -137,6 +143,42 @@ class TestCopyReasoningContentForApi:
         agent._copy_reasoning_content_for_api(source, api_msg)
         assert api_msg["reasoning_content"] == "<think>real chain of thought</think>"
 
+    def test_deepseek_stale_empty_placeholder_upgraded_to_space(self) -> None:
+        """Sessions persisted before #17341 have ``reasoning_content=""`` pinned
+        at creation time. DeepSeek V4 Pro rejects "" with HTTP 400. When the
+        active provider enforces the thinking-mode echo, the replay path
+        upgrades "" → " " so stale history doesn't break the next turn.
+        """
+        agent = _make_agent(provider="deepseek", model="deepseek-v4-pro")
+        source = {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "",
+            "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
+        }
+        api_msg: dict = {}
+        agent._copy_reasoning_content_for_api(source, api_msg)
+        assert api_msg["reasoning_content"] == " "
+
+    def test_non_thinking_provider_preserves_empty_reasoning_content_verbatim(self) -> None:
+        """The stale-placeholder upgrade ONLY fires when the active provider
+        enforces thinking-mode echo. On non-thinking providers, an empty
+        reasoning_content must still round-trip verbatim.
+        """
+        agent = _make_agent(
+            provider="openrouter",
+            model="anthropic/claude-sonnet-4.6",
+            base_url="https://openrouter.ai/api/v1",
+        )
+        source = {
+            "role": "assistant",
+            "content": "hi",
+            "reasoning_content": "",
+        }
+        api_msg: dict = {}
+        agent._copy_reasoning_content_for_api(source, api_msg)
+        assert api_msg["reasoning_content"] == ""
+
     def test_deepseek_reasoning_field_promoted(self) -> None:
         """When only 'reasoning' is set, it gets promoted to reasoning_content."""
         agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
@@ -155,7 +197,7 @@ class TestCopyReasoningContentForApi:
 
         If the source turn has tool_calls AND a 'reasoning' field but NO
         'reasoning_content' key, it's from a prior provider (the DeepSeek
-        build path pins reasoning_content at creation). Inject "" instead
+        build path pins reasoning_content at creation). Inject " " instead
         of forwarding the prior provider's chain of thought.
         """
         agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
@@ -167,7 +209,7 @@ class TestCopyReasoningContentForApi:
         }
         api_msg: dict = {}
         agent._copy_reasoning_content_for_api(source, api_msg)
-        assert api_msg["reasoning_content"] == ""
+        assert api_msg["reasoning_content"] == " "
 
     def test_kimi_poisoned_cross_provider_history_padded(self) -> None:
         """Kimi path of #15748 — same rule as DeepSeek."""
@@ -180,7 +222,7 @@ class TestCopyReasoningContentForApi:
         }
         api_msg: dict = {}
         agent._copy_reasoning_content_for_api(source, api_msg)
-        assert api_msg["reasoning_content"] == ""
+        assert api_msg["reasoning_content"] == " "
 
     def test_kimi_path_still_works(self) -> None:
         """Existing Kimi detection still pads reasoning_content."""
@@ -192,7 +234,7 @@ class TestCopyReasoningContentForApi:
         }
         api_msg: dict = {}
         agent._copy_reasoning_content_for_api(source, api_msg)
-        assert api_msg.get("reasoning_content") == ""
+        assert api_msg.get("reasoning_content") == " "
 
     def test_kimi_moonshot_base_url(self) -> None:
         agent = _make_agent(
@@ -205,7 +247,7 @@ class TestCopyReasoningContentForApi:
         }
         api_msg: dict = {}
         agent._copy_reasoning_content_for_api(source, api_msg)
-        assert api_msg.get("reasoning_content") == ""
+        assert api_msg.get("reasoning_content") == " "
 
     def test_non_thinking_provider_not_padded(self) -> None:
         """Providers that don't require the echo are untouched."""
@@ -237,7 +279,7 @@ class TestCopyReasoningContentForApi:
         }
         api_msg: dict = {}
         agent._copy_reasoning_content_for_api(source, api_msg)
-        assert api_msg.get("reasoning_content") == ""
+        assert api_msg.get("reasoning_content") == " "
 
     def test_non_assistant_role_ignored(self) -> None:
         """User/tool messages are left alone."""
@@ -302,7 +344,7 @@ class TestBuildAssistantMessageDeepSeekReasoningContent:
 
         assert msg["reasoning_content"] == "DeepSeek model_extra reasoning"
 
-    def test_deepseek_tool_call_without_raw_reasoning_content_gets_empty_string(self) -> None:
+    def test_deepseek_tool_call_without_raw_reasoning_content_gets_space_placeholder(self) -> None:
         agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
         assistant_message = SimpleNamespace(
             content=None,
@@ -324,7 +366,7 @@ class TestBuildAssistantMessageDeepSeekReasoningContent:
 
         msg = agent._build_assistant_message(assistant_message, "tool_calls")
 
-        assert msg["reasoning_content"] == ""
+        assert msg["reasoning_content"] == " "
         assert msg["tool_calls"][0]["id"] == "call_1"
 
 
@@ -345,22 +387,22 @@ class TestBuildAssistantMessagePadsStrictProviders:
         [
             pytest.param(
                 "deepseek", "deepseek-v4-pro", "",
-                None, "",
+                None, " ",
                 id="deepseek-attr-none",
             ),
             pytest.param(
                 "deepseek", "deepseek-v4-pro", "",
-                _ATTR_ABSENT, "",
+                _ATTR_ABSENT, " ",
                 id="deepseek-attr-absent",
             ),
             pytest.param(
                 "kimi-coding", "kimi-k2.6", "",
-                None, "",
+                None, " ",
                 id="kimi-attr-none",
             ),
             pytest.param(
                 "custom", "kimi-k2", "https://api.moonshot.ai/v1",
-                _ATTR_ABSENT, "",
+                _ATTR_ABSENT, " ",
                 id="moonshot-base-url",
             ),
             pytest.param(
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 03cef830..55ce86e5 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -1465,8 +1465,8 @@ class TestBuildAssistantMessage:
 
         This preserves ``_copy_reasoning_content_for_api``'s downstream
         tiers at replay time — cross-provider leak guard (#15748),
-        promote-from-``reasoning``, and DeepSeek/Kimi ""-pad — which
-        would all be bypassed if we eagerly wrote ``reasoning_content=""``
+        promote-from-``reasoning``, and DeepSeek/Kimi " "-pad — which
+        would all be bypassed if we eagerly wrote ``reasoning_content=" "``
         on every assistant turn regardless of provider.
         """
         msg = _mock_assistant_msg(content="plain answer")
@@ -4617,7 +4617,7 @@ class TestReasoningReplayForStrictProviders:
         agent.compression_enabled = False
         agent.save_trajectories = False
 
-    def test_kimi_tool_replay_includes_empty_reasoning_content(self, agent):
+    def test_kimi_tool_replay_includes_space_reasoning_content(self, agent):
         self._setup_agent(agent)
         agent.base_url = "https://api.kimi.com/coding/v1"
         agent._base_url_lower = agent.base_url.lower()
@@ -4654,7 +4654,7 @@ class TestReasoningReplayForStrictProviders:
         assert replayed_assistant["role"] == "assistant"
         assert replayed_assistant["tool_calls"][0]["function"]["name"] == "terminal"
         assert "reasoning_content" in replayed_assistant
-        assert replayed_assistant["reasoning_content"] == ""
+        assert replayed_assistant["reasoning_content"] == " "
 
     def test_explicit_reasoning_content_beats_normalized_reasoning_on_replay(self, agent):
         self._setup_agent(agent)