From 1eabbe905e86bfadcdfbc417044decb9bc4f93c8 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 9 Apr 2026 02:06:12 -0700 Subject: [PATCH] fix: retry 3 times when model returns truly empty response (#6488) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a model returns no content, no structured reasoning, and no tool calls (common with open models), the agent now silently retries up to 3 times before falling through to (empty). Silent retry (no synthetic messages) keeps the conversation history clean, preserves prompt caching, and respects the no-synthetic-user- injection invariant. Most empty responses from open models are transient (provider hiccups, rate limits, sampling flukes) so a simple retry is sufficient. This fills the last gap in the empty-response recovery chain: 1. _last_content_with_tools fallback (prior tool turn had content) 2. Thinking-only prefill continuation (#5931 — structured reasoning) 3. Empty response silent retry (NEW — truly empty, no reasoning) 4. (empty) terminal (last resort after all retries exhausted) Inline blocks are excluded — the model chose to reason, it just produced no visible text. That differs from truly empty. Tests: - Updated test_truly_empty to expect 4 API calls (1 + 3 retries) - Added test_truly_empty_response_succeeds_on_nudge --- run_agent.py | 25 +++++++++++++++++++++--- tests/run_agent/test_run_agent.py | 32 +++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/run_agent.py b/run_agent.py index 793ddd67..3c5661a1 100644 --- a/run_agent.py +++ b/run_agent.py @@ -9109,8 +9109,27 @@ class AIAgent: self._save_session_log(messages) continue - # Exhausted prefill attempts or no structured - # reasoning — fall through to "(empty)" terminal. + # ── Empty response retry (no reasoning) ────── + # Model returned nothing — no content, no + # structured reasoning, no tool calls. Common + # with open models (transient provider issues, + # rate limits, sampling flukes). Silently retry + # up to 3 times before giving up. Skip when + # content has inline tags (model chose + # to reason, just no visible text). + _truly_empty = not final_response.strip() + if _truly_empty and not _has_structured and self._empty_content_retries < 3: + self._empty_content_retries += 1 + self._vprint( + f"{self.log_prefix}↻ Empty response (no content or reasoning) " + f"— retrying ({self._empty_content_retries}/3)", + force=True, + ) + continue + + # Exhausted prefill attempts, empty retries, or + # structured reasoning with no content — + # fall through to "(empty)" terminal. reasoning_text = self._extract_reasoning(assistant_message) assistant_msg = self._build_assistant_message(assistant_message, finish_reason) assistant_msg["content"] = "(empty)" @@ -9120,7 +9139,7 @@ class AIAgent: reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text self._vprint(f"{self.log_prefix}ℹ️ Reasoning-only response (no visible content). Reasoning: {reasoning_preview}") else: - self._vprint(f"{self.log_prefix}ℹ️ Empty response (no content or reasoning).") + self._vprint(f"{self.log_prefix}ℹ️ Empty response (no content or reasoning) after 3 retries.") final_response = "(empty)" break diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 59f88601..98d799ae 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1668,12 +1668,15 @@ class TestRunConversation: if roles[i] == "assistant" and roles[i + 1] == "assistant": raise AssertionError("Consecutive assistant messages found in history") - def test_truly_empty_response_accepted_without_retry(self, agent): - """Truly empty response (no content, no reasoning) should still complete with (empty).""" + def test_truly_empty_response_retries_3_times_then_empty(self, agent): + """Truly empty response (no content, no reasoning) retries 3 times then falls through to (empty).""" self._setup_agent(agent) agent.base_url = "http://127.0.0.1:1234/v1" empty_resp = _mock_response(content=None, finish_reason="stop") - agent.client.chat.completions.create.side_effect = [empty_resp] + # 4 responses: 1 original + 3 nudge retries, all empty + agent.client.chat.completions.create.side_effect = [ + empty_resp, empty_resp, empty_resp, empty_resp, + ] with ( patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), @@ -1682,7 +1685,28 @@ class TestRunConversation: result = agent.run_conversation("answer me") assert result["completed"] is True assert result["final_response"] == "(empty)" - assert result["api_calls"] == 1 # no retries + assert result["api_calls"] == 4 # 1 original + 3 retries + + def test_truly_empty_response_succeeds_on_nudge(self, agent): + """Model produces content after being nudged for empty response.""" + self._setup_agent(agent) + agent.base_url = "http://127.0.0.1:1234/v1" + empty_resp = _mock_response(content=None, finish_reason="stop") + content_resp = _mock_response( + content="Here is the actual answer.", + finish_reason="stop", + ) + # 1 empty response, then model produces content on nudge + agent.client.chat.completions.create.side_effect = [empty_resp, content_resp] + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("answer me") + assert result["completed"] is True + assert result["final_response"] == "Here is the actual answer." + assert result["api_calls"] == 2 # 1 original + 1 nudge retry def test_nous_401_refreshes_after_remint_and_retries(self, agent): self._setup_agent(agent)