From ec48ec5530871edda11e068d0f03c16985f43455 Mon Sep 17 00:00:00 2001
From: Tranquil-Flow <tranquil_flow@protonmail.com>
Date: Sat, 18 Apr 2026 19:18:03 -0700
Subject: [PATCH] fix(agent): strip <think> blocks from stored assistant
 content
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Inline reasoning tags in an assistant message's content field leak to every downstream consumer: messaging platforms (#8878, #9568), API replay of prior turns, session transcript, CLI recap, generated session titles, and context compression.  _extract_reasoning() already captures the reasoning text into msg['reasoning'] separately, so the raw tags in content are redundant.

Stripping once at the storage boundary in _build_assistant_message() cleans the content for every downstream path in one place — no per-platform or per-path stripper needed.  Measured impact on a real MiniMax M2.7-highspeed session (per @luoyejiaoe-source, #9306): 55% of assistant messages started with <think> blocks, 51/100 session titles were polluted, 16% content-size reduction.

3 new regression tests in TestBuildAssistantMessage: closed-pair strip with reasoning capture, no-think-tag passthrough, and unterminated-block strip.

Resolves #8878 and #9568.

Originally proposed as PR #9250.
---
 run_agent.py                      | 14 +++++++++++++
 tests/run_agent/test_run_agent.py | 35 +++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)
diff --git a/run_agent.py b/run_agent.py
index 33635ef2..c87bd351 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7294,6 +7294,20 @@ class AIAgent:
         if reasoning_text:
             reasoning_text = _sanitize_surrogates(reasoning_text)
 
+        # Strip inline reasoning tags (<think>…</think> etc.) from the stored
+        # assistant content.  Reasoning was already captured into
+        # ``reasoning_text`` above (either from structured fields or the
+        # inline-block fallback), so the raw tags in content are redundant.
+        # Leaving them in place caused reasoning to leak to messaging
+        # platforms (#8878, #9568), inflate context on subsequent turns
+        # (#9306 observed 16% content-size reduction on a real MiniMax
+        # session), and pollute generated session titles.  One strip at the
+        # storage boundary cleans content for every downstream consumer:
+        # API replay, session transcript, gateway delivery, CLI display,
+        # compression, title generation.
+        if isinstance(_san_content, str) and _san_content:
+            _san_content = self._strip_think_blocks(_san_content).strip()
+
         msg = {
             "role": "assistant",
             "content": _san_content,
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index bde5ed5a..d30445cf 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -1142,6 +1142,41 @@ class TestBuildAssistantMessage:
         result = agent._build_assistant_message(msg, "tool_calls")
         assert "extra_content" not in result["tool_calls"][0]
 
+    def test_think_blocks_stripped_from_content(self, agent):
+        """Inline <think> blocks are stripped from stored content (#8878, #9568).
+
+        The reasoning is captured into ``msg['reasoning']`` via the inline
+        fallback in ``_extract_reasoning``; the raw tags in ``content`` are
+        redundant and leak to messaging platforms / pollute titles /
+        inflate context if left in place.
+        """
+        msg = _mock_assistant_msg(
+            content="<think>internal reasoning</think>The actual answer."
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert "<think>" not in result["content"]
+        assert "internal reasoning" not in result["content"]
+        assert "The actual answer." in result["content"]
+        # Reasoning preserved separately via inline extraction fallback
+        assert result["reasoning"] == "internal reasoning"
+
+    def test_think_blocks_stripped_preserves_normal_content(self, agent):
+        """Content without reasoning tags passes through unchanged."""
+        msg = _mock_assistant_msg(content="No thinking here.")
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["content"] == "No thinking here."
+
+    def test_unterminated_think_block_stripped(self, agent):
+        """Unterminated <think> block (MiniMax / NIM dropped close tag) is
+        fully stripped from stored content."""
+        msg = _mock_assistant_msg(
+            content="<think>reasoning that never closes on this NIM endpoint"
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert "<think>" not in result["content"]
+        assert "reasoning that never closes" not in result["content"]
+        assert result["content"] == ""
+
 
 class TestFormatToolsForSystemMessage:
     def test_no_tools_returns_empty_array(self, agent):