From bfb704684ec64675650bc39fa0f731604b12aba2 Mon Sep 17 00:00:00 2001 From: IMHaoyan <657290301@qq.com> Date: Thu, 30 Apr 2026 22:49:55 -0700 Subject: [PATCH] fix(deepseek): use non-empty reasoning_content placeholder for V4 Pro thinking mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DeepSeek V4 Pro tightened thinking-mode validation and rejects empty-string reasoning_content with HTTP 400: The reasoning content in the thinking mode must be passed back to the API. run_agent.py injected "" at three fallback sites — the tool-call pad in _build_assistant_message and both injection branches of _copy_reasoning_content_for_api (cross-provider poison guard + unconditional thinking pad). All three now emit " " (single space), which satisfies the non-empty check on V4 Pro without leaking fabricated reasoning. Also upgrades stale empty-string placeholders on replay: sessions persisted before this change have reasoning_content="" pinned at creation time; when the active provider enforces thinking-mode echo, the replay path now rewrites "" -> " " so existing users don't 400 on their first V4 Pro turn after updating. Non-thinking providers still round-trip "" verbatim. Updates 9 existing assertions + adds 2 regression tests (stale-placeholder upgrade, non-thinking verbatim preservation). Refs #15250, #17400. Closes #17341. --- run_agent.py | 46 +++++++---- scripts/release.py | 1 + .../test_deepseek_reasoning_content_echo.py | 80 ++++++++++++++----- tests/run_agent/test_run_agent.py | 8 +- 4 files changed, 98 insertions(+), 37 deletions(-) diff --git a/run_agent.py b/run_agent.py index 4ea0fafe..26933994 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8603,9 +8603,13 @@ class AIAgent: # message. Without it, replaying the persisted message causes # HTTP 400 ("The reasoning_content in the thinking mode must # be passed back to the API"). Include streamed reasoning - # text when captured; otherwise pad with empty string. - # Refs #15250, #17400. - msg["reasoning_content"] = reasoning_text or "" + # text when captured; otherwise pad with a single space — + # DeepSeek V4 Pro tightened validation and rejects empty + # string ("The reasoning content in the thinking mode must + # be passed back to the API"). A space satisfies non-empty + # checks everywhere without leaking fabricated reasoning. + # Refs #15250, #17400, #17341. + msg["reasoning_content"] = reasoning_text or " " # Additive fallback (refs #16844, #16884). Streaming-only providers # (glm, MiniMax, gpt-5.x via aigw, Anthropic via openai-compat shims) @@ -8760,11 +8764,20 @@ class AIAgent: return # 1. Explicit reasoning_content already set — preserve it verbatim - # (includes DeepSeek/Kimi's own empty-string placeholder written at - # creation time, and any valid reasoning content from the same provider). + # (includes DeepSeek/Kimi's own space-placeholder written at creation + # time, and any valid reasoning content from the same provider). + # + # Exception: sessions persisted BEFORE #17341 have empty-string + # placeholders pinned at creation time. DeepSeek V4 Pro rejects + # those with HTTP 400. When the active provider enforces the + # thinking-mode echo, upgrade "" → " " on replay so stale history + # doesn't 400 the user on the next turn. existing = source_msg.get("reasoning_content") if isinstance(existing, str): - api_msg["reasoning_content"] = existing + if existing == "" and self._needs_thinking_reasoning_pad(): + api_msg["reasoning_content"] = " " + else: + api_msg["reasoning_content"] = existing return needs_thinking_pad = self._needs_thinking_reasoning_pad() @@ -8776,8 +8789,10 @@ class AIAgent: # pins reasoning_content at creation time for tool-call turns, so the # shape (reasoning set, reasoning_content absent, tool_calls present) # is unreachable from same-provider DeepSeek history after this fix. - # Inject "" to satisfy the API without leaking another provider's - # chain of thought to DeepSeek/Kimi. + # Inject a single space to satisfy the API without leaking another + # provider's chain of thought to DeepSeek/Kimi. Space (not "") + # because DeepSeek V4 Pro rejects empty-string reasoning_content + # in thinking mode (refs #17341). normalized_reasoning = source_msg.get("reasoning") if ( needs_thinking_pad @@ -8785,7 +8800,7 @@ class AIAgent: and isinstance(normalized_reasoning, str) and normalized_reasoning ): - api_msg["reasoning_content"] = "" + api_msg["reasoning_content"] = " " return # 3. Healthy session: promote 'reasoning' field to 'reasoning_content' @@ -8798,12 +8813,15 @@ class AIAgent: return # 4. DeepSeek / Kimi thinking mode: all assistant messages need - # reasoning_content. Inject "" to satisfy the provider's requirement - # when no explicit reasoning content is present. Covers both - # tool-call turns (already-poisoned history with no reasoning at all) - # and plain text turns. + # reasoning_content. Inject a single space to satisfy the provider's + # requirement when no explicit reasoning content is present. Covers + # both tool-call turns (already-poisoned history with no reasoning + # at all) and plain text turns. Space (not "") because DeepSeek V4 + # Pro tightened validation and rejects empty string with HTTP 400 + # ("The reasoning content in the thinking mode must be passed back + # to the API"). Refs #17341. if needs_thinking_pad: - api_msg["reasoning_content"] = "" + api_msg["reasoning_content"] = " " return # 5. reasoning_content was present but not a string (e.g. None after diff --git a/scripts/release.py b/scripts/release.py index ee6a65d7..56f40795 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -50,6 +50,7 @@ AUTHOR_MAP = { "rylen.anil@gmail.com": "rylena", "godnanijatin@gmail.com": "jatingodnani", "14046872+tmimmanuel@users.noreply.github.com": "tmimmanuel", + "657290301@qq.com": "IMHaoyan", "revar@users.noreply.github.com": "revaraver", # Matrix parity salvage batch (April 2026) "sr@samirusani": "samrusani", diff --git a/tests/run_agent/test_deepseek_reasoning_content_echo.py b/tests/run_agent/test_deepseek_reasoning_content_echo.py index d6e4e341..0efdb2c5 100644 --- a/tests/run_agent/test_deepseek_reasoning_content_echo.py +++ b/tests/run_agent/test_deepseek_reasoning_content_echo.py @@ -10,15 +10,21 @@ field, DeepSeek rejects the next request with HTTP 400:: Fix covers three paths: 1. ``_build_assistant_message`` — new tool-call messages without raw - reasoning_content get ``""`` pinned at creation time so nothing gets + reasoning_content get ``" "`` pinned at creation time so nothing gets persisted poisoned. 2. ``_copy_reasoning_content_for_api`` — already-poisoned history replays - with ``reasoning_content=""`` injected defensively. + with ``reasoning_content=" "`` injected defensively. 3. Detection covers three signals: ``provider == "deepseek"``, ``"deepseek" in model``, and ``api.deepseek.com`` host match. The third catches custom-provider setups pointing at DeepSeek. -Refs #15250 / #15353. +The placeholder is a single space (not empty string) because DeepSeek V4 Pro +tightened validation and rejects empty-string reasoning_content with a +400 ("The reasoning content in the thinking mode must be passed back to +the API"). A space satisfies non-empty checks everywhere without leaking +fabricated reasoning. + +Refs #15250 / #15353 / #17341. """ from __future__ import annotations @@ -105,8 +111,8 @@ class TestNeedsDeepSeekToolReasoning: class TestCopyReasoningContentForApi: """_copy_reasoning_content_for_api pads reasoning_content for DeepSeek tool-calls.""" - def test_deepseek_tool_call_poisoned_history_gets_empty_string(self) -> None: - """Already-poisoned history (no reasoning_content, no reasoning) gets ''.""" + def test_deepseek_tool_call_poisoned_history_gets_space_placeholder(self) -> None: + """Already-poisoned history (no reasoning_content, no reasoning) gets ' '.""" agent = _make_agent(provider="deepseek", model="deepseek-v4-flash") source = { "role": "assistant", @@ -115,7 +121,7 @@ class TestCopyReasoningContentForApi: } api_msg: dict = {} agent._copy_reasoning_content_for_api(source, api_msg) - assert api_msg.get("reasoning_content") == "" + assert api_msg.get("reasoning_content") == " " def test_deepseek_assistant_no_tool_call_gets_padded(self) -> None: """DeepSeek thinking mode pads ALL assistant turns, even without tool_calls.""" @@ -123,7 +129,7 @@ class TestCopyReasoningContentForApi: source = {"role": "assistant", "content": "hello"} api_msg: dict = {} agent._copy_reasoning_content_for_api(source, api_msg) - assert api_msg.get("reasoning_content") == "" + assert api_msg.get("reasoning_content") == " " def test_deepseek_explicit_reasoning_content_preserved(self) -> None: """When reasoning_content is already set, it's copied verbatim.""" @@ -137,6 +143,42 @@ class TestCopyReasoningContentForApi: agent._copy_reasoning_content_for_api(source, api_msg) assert api_msg["reasoning_content"] == "real chain of thought" + def test_deepseek_stale_empty_placeholder_upgraded_to_space(self) -> None: + """Sessions persisted before #17341 have ``reasoning_content=""`` pinned + at creation time. DeepSeek V4 Pro rejects "" with HTTP 400. When the + active provider enforces the thinking-mode echo, the replay path + upgrades "" → " " so stale history doesn't break the next turn. + """ + agent = _make_agent(provider="deepseek", model="deepseek-v4-pro") + source = { + "role": "assistant", + "content": "", + "reasoning_content": "", + "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}], + } + api_msg: dict = {} + agent._copy_reasoning_content_for_api(source, api_msg) + assert api_msg["reasoning_content"] == " " + + def test_non_thinking_provider_preserves_empty_reasoning_content_verbatim(self) -> None: + """The stale-placeholder upgrade ONLY fires when the active provider + enforces thinking-mode echo. On non-thinking providers, an empty + reasoning_content must still round-trip verbatim. + """ + agent = _make_agent( + provider="openrouter", + model="anthropic/claude-sonnet-4.6", + base_url="https://openrouter.ai/api/v1", + ) + source = { + "role": "assistant", + "content": "hi", + "reasoning_content": "", + } + api_msg: dict = {} + agent._copy_reasoning_content_for_api(source, api_msg) + assert api_msg["reasoning_content"] == "" + def test_deepseek_reasoning_field_promoted(self) -> None: """When only 'reasoning' is set, it gets promoted to reasoning_content.""" agent = _make_agent(provider="deepseek", model="deepseek-v4-flash") @@ -155,7 +197,7 @@ class TestCopyReasoningContentForApi: If the source turn has tool_calls AND a 'reasoning' field but NO 'reasoning_content' key, it's from a prior provider (the DeepSeek - build path pins reasoning_content at creation). Inject "" instead + build path pins reasoning_content at creation). Inject " " instead of forwarding the prior provider's chain of thought. """ agent = _make_agent(provider="deepseek", model="deepseek-v4-flash") @@ -167,7 +209,7 @@ class TestCopyReasoningContentForApi: } api_msg: dict = {} agent._copy_reasoning_content_for_api(source, api_msg) - assert api_msg["reasoning_content"] == "" + assert api_msg["reasoning_content"] == " " def test_kimi_poisoned_cross_provider_history_padded(self) -> None: """Kimi path of #15748 — same rule as DeepSeek.""" @@ -180,7 +222,7 @@ class TestCopyReasoningContentForApi: } api_msg: dict = {} agent._copy_reasoning_content_for_api(source, api_msg) - assert api_msg["reasoning_content"] == "" + assert api_msg["reasoning_content"] == " " def test_kimi_path_still_works(self) -> None: """Existing Kimi detection still pads reasoning_content.""" @@ -192,7 +234,7 @@ class TestCopyReasoningContentForApi: } api_msg: dict = {} agent._copy_reasoning_content_for_api(source, api_msg) - assert api_msg.get("reasoning_content") == "" + assert api_msg.get("reasoning_content") == " " def test_kimi_moonshot_base_url(self) -> None: agent = _make_agent( @@ -205,7 +247,7 @@ class TestCopyReasoningContentForApi: } api_msg: dict = {} agent._copy_reasoning_content_for_api(source, api_msg) - assert api_msg.get("reasoning_content") == "" + assert api_msg.get("reasoning_content") == " " def test_non_thinking_provider_not_padded(self) -> None: """Providers that don't require the echo are untouched.""" @@ -237,7 +279,7 @@ class TestCopyReasoningContentForApi: } api_msg: dict = {} agent._copy_reasoning_content_for_api(source, api_msg) - assert api_msg.get("reasoning_content") == "" + assert api_msg.get("reasoning_content") == " " def test_non_assistant_role_ignored(self) -> None: """User/tool messages are left alone.""" @@ -302,7 +344,7 @@ class TestBuildAssistantMessageDeepSeekReasoningContent: assert msg["reasoning_content"] == "DeepSeek model_extra reasoning" - def test_deepseek_tool_call_without_raw_reasoning_content_gets_empty_string(self) -> None: + def test_deepseek_tool_call_without_raw_reasoning_content_gets_space_placeholder(self) -> None: agent = _make_agent(provider="deepseek", model="deepseek-v4-flash") assistant_message = SimpleNamespace( content=None, @@ -324,7 +366,7 @@ class TestBuildAssistantMessageDeepSeekReasoningContent: msg = agent._build_assistant_message(assistant_message, "tool_calls") - assert msg["reasoning_content"] == "" + assert msg["reasoning_content"] == " " assert msg["tool_calls"][0]["id"] == "call_1" @@ -345,22 +387,22 @@ class TestBuildAssistantMessagePadsStrictProviders: [ pytest.param( "deepseek", "deepseek-v4-pro", "", - None, "", + None, " ", id="deepseek-attr-none", ), pytest.param( "deepseek", "deepseek-v4-pro", "", - _ATTR_ABSENT, "", + _ATTR_ABSENT, " ", id="deepseek-attr-absent", ), pytest.param( "kimi-coding", "kimi-k2.6", "", - None, "", + None, " ", id="kimi-attr-none", ), pytest.param( "custom", "kimi-k2", "https://api.moonshot.ai/v1", - _ATTR_ABSENT, "", + _ATTR_ABSENT, " ", id="moonshot-base-url", ), pytest.param( diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 03cef830..55ce86e5 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1465,8 +1465,8 @@ class TestBuildAssistantMessage: This preserves ``_copy_reasoning_content_for_api``'s downstream tiers at replay time — cross-provider leak guard (#15748), - promote-from-``reasoning``, and DeepSeek/Kimi ""-pad — which - would all be bypassed if we eagerly wrote ``reasoning_content=""`` + promote-from-``reasoning``, and DeepSeek/Kimi " "-pad — which + would all be bypassed if we eagerly wrote ``reasoning_content=" "`` on every assistant turn regardless of provider. """ msg = _mock_assistant_msg(content="plain answer") @@ -4617,7 +4617,7 @@ class TestReasoningReplayForStrictProviders: agent.compression_enabled = False agent.save_trajectories = False - def test_kimi_tool_replay_includes_empty_reasoning_content(self, agent): + def test_kimi_tool_replay_includes_space_reasoning_content(self, agent): self._setup_agent(agent) agent.base_url = "https://api.kimi.com/coding/v1" agent._base_url_lower = agent.base_url.lower() @@ -4654,7 +4654,7 @@ class TestReasoningReplayForStrictProviders: assert replayed_assistant["role"] == "assistant" assert replayed_assistant["tool_calls"][0]["function"]["name"] == "terminal" assert "reasoning_content" in replayed_assistant - assert replayed_assistant["reasoning_content"] == "" + assert replayed_assistant["reasoning_content"] == " " def test_explicit_reasoning_content_beats_normalized_reasoning_on_replay(self, agent): self._setup_agent(agent)