From ec48ec5530871edda11e068d0f03c16985f43455 Mon Sep 17 00:00:00 2001 From: Tranquil-Flow Date: Sat, 18 Apr 2026 19:18:03 -0700 Subject: [PATCH] fix(agent): strip blocks from stored assistant content MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inline reasoning tags in an assistant message's content field leak to every downstream consumer: messaging platforms (#8878, #9568), API replay of prior turns, session transcript, CLI recap, generated session titles, and context compression. _extract_reasoning() already captures the reasoning text into msg['reasoning'] separately, so the raw tags in content are redundant. Stripping once at the storage boundary in _build_assistant_message() cleans the content for every downstream path in one place — no per-platform or per-path stripper needed. Measured impact on a real MiniMax M2.7-highspeed session (per @luoyejiaoe-source, #9306): 55% of assistant messages started with blocks, 51/100 session titles were polluted, 16% content-size reduction. 3 new regression tests in TestBuildAssistantMessage: closed-pair strip with reasoning capture, no-think-tag passthrough, and unterminated-block strip. Resolves #8878 and #9568. Originally proposed as PR #9250. --- run_agent.py | 14 +++++++++++++ tests/run_agent/test_run_agent.py | 35 +++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/run_agent.py b/run_agent.py index 33635ef2..c87bd351 100644 --- a/run_agent.py +++ b/run_agent.py @@ -7294,6 +7294,20 @@ class AIAgent: if reasoning_text: reasoning_text = _sanitize_surrogates(reasoning_text) + # Strip inline reasoning tags ( etc.) from the stored + # assistant content. Reasoning was already captured into + # ``reasoning_text`` above (either from structured fields or the + # inline-block fallback), so the raw tags in content are redundant. + # Leaving them in place caused reasoning to leak to messaging + # platforms (#8878, #9568), inflate context on subsequent turns + # (#9306 observed 16% content-size reduction on a real MiniMax + # session), and pollute generated session titles. One strip at the + # storage boundary cleans content for every downstream consumer: + # API replay, session transcript, gateway delivery, CLI display, + # compression, title generation. + if isinstance(_san_content, str) and _san_content: + _san_content = self._strip_think_blocks(_san_content).strip() + msg = { "role": "assistant", "content": _san_content, diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index bde5ed5a..d30445cf 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1142,6 +1142,41 @@ class TestBuildAssistantMessage: result = agent._build_assistant_message(msg, "tool_calls") assert "extra_content" not in result["tool_calls"][0] + def test_think_blocks_stripped_from_content(self, agent): + """Inline blocks are stripped from stored content (#8878, #9568). + + The reasoning is captured into ``msg['reasoning']`` via the inline + fallback in ``_extract_reasoning``; the raw tags in ``content`` are + redundant and leak to messaging platforms / pollute titles / + inflate context if left in place. + """ + msg = _mock_assistant_msg( + content="internal reasoningThe actual answer." + ) + result = agent._build_assistant_message(msg, "stop") + assert "" not in result["content"] + assert "internal reasoning" not in result["content"] + assert "The actual answer." in result["content"] + # Reasoning preserved separately via inline extraction fallback + assert result["reasoning"] == "internal reasoning" + + def test_think_blocks_stripped_preserves_normal_content(self, agent): + """Content without reasoning tags passes through unchanged.""" + msg = _mock_assistant_msg(content="No thinking here.") + result = agent._build_assistant_message(msg, "stop") + assert result["content"] == "No thinking here." + + def test_unterminated_think_block_stripped(self, agent): + """Unterminated block (MiniMax / NIM dropped close tag) is + fully stripped from stored content.""" + msg = _mock_assistant_msg( + content="reasoning that never closes on this NIM endpoint" + ) + result = agent._build_assistant_message(msg, "stop") + assert "" not in result["content"] + assert "reasoning that never closes" not in result["content"] + assert result["content"] == "" + class TestFormatToolsForSystemMessage: def test_no_tools_returns_empty_array(self, agent):