From 156b50358b14d170985bd66b02b57eeea030442d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 25 Mar 2026 18:57:18 -0700 Subject: [PATCH] fix(reasoning): skip duplicate callback for -extracted reasoning during streaming (#3116) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Local models (Ollama, LM Studio) embed reasoning in tags in delta.content. During streaming, _stream_delta() already displays these blocks. Then _build_assistant_message() extracts them again and fires reasoning_callback, causing duplicate display. Track whether reasoning came from structured fields (reasoning_content) vs tag extraction. Only fire the callback for -extracted reasoning when stream_delta_callback is NOT active. Structured reasoning always fires regardless. Salvaged from PR #2076 by dusterbloom (Fix A only — Fix B was already covered by PR #3013's _current_reasoning_callback centralization). Closes #2069. --- run_agent.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/run_agent.py b/run_agent.py index 8f62a3b2..28519905 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4494,6 +4494,7 @@ class AIAgent: so both the tool-call path and the final-response path share one builder. """ reasoning_text = self._extract_reasoning(assistant_message) + _from_structured = bool(reasoning_text) # Fallback: extract inline blocks from content when no structured # reasoning fields are present (some models/providers embed thinking @@ -4509,10 +4510,15 @@ class AIAgent: logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {reasoning_text}") if reasoning_text and self.reasoning_callback: - try: - self.reasoning_callback(reasoning_text) - except Exception: - pass + # Skip callback for -extracted reasoning when streaming is active. + # _stream_delta() already displayed blocks during streaming; + # firing the callback again would cause duplicate display. + # Structured reasoning (from reasoning_content field) always fires. + if _from_structured or not self.stream_delta_callback: + try: + self.reasoning_callback(reasoning_text) + except Exception: + pass msg = { "role": "assistant",