fix(reasoning): skip duplicate callback for <think>-extracted reasoning during streaming (#3116)

Local models (Ollama, LM Studio) embed reasoning in <think> tags in delta.content. During streaming, _stream_delta() already displays these blocks. Then _build_assistant_message() extracts them again and fires reasoning_callback, causing duplicate display. Track whether reasoning came from structured fields (reasoning_content) vs <think> tag extraction. Only fire the callback for <think>-extracted reasoning when stream_delta_callback is NOT active. Structured reasoning always fires regardless. Salvaged from PR #2076 by dusterbloom (Fix A only — Fix B was already covered by PR #3013's _current_reasoning_callback centralization). Closes #2069.
2026-03-25 18:57:18 -07:00 · 2026-03-25 18:57:18 -07:00 · 156b50358b
commit 156b50358b
parent 59575d6a91
1 changed files with 10 additions and 4 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -4494,6 +4494,7 @@ class AIAgent:
        so both the tool-call path and the final-response path share one builder.
        """
        reasoning_text = self._extract_reasoning(assistant_message)
+        _from_structured = bool(reasoning_text)

        # Fallback: extract inline <think> blocks from content when no structured
        # reasoning fields are present (some models/providers embed thinking
@ -4509,10 +4510,15 @@ class AIAgent:
            logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {reasoning_text}")

        if reasoning_text and self.reasoning_callback:
-            try:
-                self.reasoning_callback(reasoning_text)
-            except Exception:
-                pass
+            # Skip callback for <think>-extracted reasoning when streaming is active.
+            # _stream_delta() already displayed <think> blocks during streaming;
+            # firing the callback again would cause duplicate display.
+            # Structured reasoning (from reasoning_content field) always fires.
+            if _from_structured or not self.stream_delta_callback:
+                try:
+                    self.reasoning_callback(reasoning_text)
+                except Exception:
+                    pass

        msg = {
            "role": "assistant",