From 1eabbe905e86bfadcdfbc417044decb9bc4f93c8 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Thu, 9 Apr 2026 02:06:12 -0700
Subject: [PATCH] fix: retry 3 times when model returns truly empty response
 (#6488)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a model returns no content, no structured reasoning, and no tool
calls (common with open models), the agent now silently retries up to
3 times before falling through to (empty).

Silent retry (no synthetic messages) keeps the conversation history
clean, preserves prompt caching, and respects the no-synthetic-user-
injection invariant.  Most empty responses from open models are
transient (provider hiccups, rate limits, sampling flukes) so a
simple retry is sufficient.

This fills the last gap in the empty-response recovery chain:
1. _last_content_with_tools fallback (prior tool turn had content)
2. Thinking-only prefill continuation (#5931 — structured reasoning)
3. Empty response silent retry (NEW — truly empty, no reasoning)
4. (empty) terminal (last resort after all retries exhausted)

Inline <think> blocks are excluded — the model chose to reason, it
just produced no visible text.  That differs from truly empty.

Tests:
- Updated test_truly_empty to expect 4 API calls (1 + 3 retries)
- Added test_truly_empty_response_succeeds_on_nudge
---
 run_agent.py                      | 25 +++++++++++++++++++++---
 tests/run_agent/test_run_agent.py | 32 +++++++++++++++++++++++++++----
 2 files changed, 50 insertions(+), 7 deletions(-)
diff --git a/run_agent.py b/run_agent.py
index 793ddd67..3c5661a1 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -9109,8 +9109,27 @@ class AIAgent:
                             self._save_session_log(messages)
                             continue
 
-                        # Exhausted prefill attempts or no structured
-                        # reasoning — fall through to "(empty)" terminal.
+                        # ── Empty response retry (no reasoning) ──────
+                        # Model returned nothing — no content, no
+                        # structured reasoning, no tool calls.  Common
+                        # with open models (transient provider issues,
+                        # rate limits, sampling flukes).  Silently retry
+                        # up to 3 times before giving up.  Skip when
+                        # content has inline <think> tags (model chose
+                        # to reason, just no visible text).
+                        _truly_empty = not final_response.strip()
+                        if _truly_empty and not _has_structured and self._empty_content_retries < 3:
+                            self._empty_content_retries += 1
+                            self._vprint(
+                                f"{self.log_prefix}↻ Empty response (no content or reasoning) "
+                                f"— retrying ({self._empty_content_retries}/3)",
+                                force=True,
+                            )
+                            continue
+
+                        # Exhausted prefill attempts, empty retries, or
+                        # structured reasoning with no content —
+                        # fall through to "(empty)" terminal.
                         reasoning_text = self._extract_reasoning(assistant_message)
                         assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
                         assistant_msg["content"] = "(empty)"
@@ -9120,7 +9139,7 @@ class AIAgent:
                             reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
                             self._vprint(f"{self.log_prefix}ℹ️  Reasoning-only response (no visible content). Reasoning: {reasoning_preview}")
                         else:
-                            self._vprint(f"{self.log_prefix}ℹ️  Empty response (no content or reasoning).")
+                            self._vprint(f"{self.log_prefix}ℹ️  Empty response (no content or reasoning) after 3 retries.")
 
                         final_response = "(empty)"
                         break
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 59f88601..98d799ae 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -1668,12 +1668,15 @@ class TestRunConversation:
             if roles[i] == "assistant" and roles[i + 1] == "assistant":
                 raise AssertionError("Consecutive assistant messages found in history")
 
-    def test_truly_empty_response_accepted_without_retry(self, agent):
-        """Truly empty response (no content, no reasoning) should still complete with (empty)."""
+    def test_truly_empty_response_retries_3_times_then_empty(self, agent):
+        """Truly empty response (no content, no reasoning) retries 3 times then falls through to (empty)."""
         self._setup_agent(agent)
         agent.base_url = "http://127.0.0.1:1234/v1"
         empty_resp = _mock_response(content=None, finish_reason="stop")
-        agent.client.chat.completions.create.side_effect = [empty_resp]
+        # 4 responses: 1 original + 3 nudge retries, all empty
+        agent.client.chat.completions.create.side_effect = [
+            empty_resp, empty_resp, empty_resp, empty_resp,
+        ]
         with (
             patch.object(agent, "_persist_session"),
             patch.object(agent, "_save_trajectory"),
@@ -1682,7 +1685,28 @@ class TestRunConversation:
             result = agent.run_conversation("answer me")
         assert result["completed"] is True
         assert result["final_response"] == "(empty)"
-        assert result["api_calls"] == 1  # no retries
+        assert result["api_calls"] == 4  # 1 original + 3 retries
+
+    def test_truly_empty_response_succeeds_on_nudge(self, agent):
+        """Model produces content after being nudged for empty response."""
+        self._setup_agent(agent)
+        agent.base_url = "http://127.0.0.1:1234/v1"
+        empty_resp = _mock_response(content=None, finish_reason="stop")
+        content_resp = _mock_response(
+            content="Here is the actual answer.",
+            finish_reason="stop",
+        )
+        # 1 empty response, then model produces content on nudge
+        agent.client.chat.completions.create.side_effect = [empty_resp, content_resp]
+        with (
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("answer me")
+        assert result["completed"] is True
+        assert result["final_response"] == "Here is the actual answer."
+        assert result["api_calls"] == 2  # 1 original + 1 nudge retry
 
     def test_nous_401_refreshes_after_remint_and_retries(self, agent):
         self._setup_agent(agent)