fix: empty response recovery for reasoning models (mimo, qwen, GLM) (#8609)

Three fixes for the (empty) response bug affecting open reasoning models:

1. Allow retries after prefill exhaustion — models like mimo-v2-pro always
   populate reasoning fields via OpenRouter, so the old 'not _has_structured'
   guard on the retry path blocked retries for EVERY reasoning model after
   the 2 prefill attempts.  Now: 2 prefills + 3 retries = 6 total attempts
   before (empty).

2. Reset prefill/retry counters on tool-call recovery — the counters
   accumulated across the entire conversation, never resetting during
   tool-calling turns.  A model cycling empty→prefill→tools→empty burned
   both prefill attempts and the third empty got zero recovery.  Now
   counters reset when prefill succeeds with tool calls.

3. Strip think blocks before _truly_empty check — inline <think> content
   made the string non-empty, skipping both retry paths.

Reported by users on Telegram with xiaomi/mimo-v2-pro and qwen3.5 models.
Reproduced: qwen3.5-9b emits tool calls as XML in reasoning field instead
of proper function calls, causing content=None + tool_calls=None + reasoning
with embedded <tool_call> XML.  Prefill recovery works but counter
accumulation caused permanent (empty) in long sessions.
This commit is contained in:
Teknium 2026-04-12 15:38:11 -07:00 committed by GitHub
parent a4593f8b21
commit d6785dc4d4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 37 additions and 17 deletions

View File

@ -9736,12 +9736,25 @@ class AIAgent:
# Pop thinking-only prefill message(s) before appending
# (tool-call path — same rationale as the final-response path).
_had_prefill = False
while (
messages
and isinstance(messages[-1], dict)
and messages[-1].get("_thinking_prefill")
):
messages.pop()
_had_prefill = True
# Reset prefill counter when tool calls follow a prefill
# recovery. Without this, the counter accumulates across
# the whole conversation — a model that intermittently
# empties (empty → prefill → tools → empty → prefill →
# tools) burns both prefill attempts and the third empty
# gets zero recovery. Resetting here treats each tool-
# call success as a fresh start.
if _had_prefill:
self._thinking_prefill_retries = 0
self._empty_content_retries = 0
messages.append(assistant_msg)
self._emit_interim_assistant_message(assistant_msg)
@ -9917,16 +9930,23 @@ class AIAgent:
self._save_session_log(messages)
continue
# ── Empty response retry (no reasoning) ──────
# Model returned nothing — no content, no
# structured reasoning, no tool calls. Common
# with open models (transient provider issues,
# rate limits, sampling flukes). Retry up to 3
# times before attempting fallback. Skip when
# content has inline <think> tags (model chose
# to reason, just no visible text).
_truly_empty = not final_response.strip()
if _truly_empty and not _has_structured and self._empty_content_retries < 3:
# ── Empty response retry ──────────────────────
# Model returned nothing usable. Retry up to 3
# times before attempting fallback. This covers
# both truly empty responses (no content, no
# reasoning) AND reasoning-only responses after
# prefill exhaustion — models like mimo-v2-pro
# always populate reasoning fields via OpenRouter,
# so the old `not _has_structured` guard blocked
# retries for every reasoning model after prefill.
_truly_empty = not self._strip_think_blocks(
final_response
).strip()
_prefill_exhausted = (
_has_structured
and self._thinking_prefill_retries >= 2
)
if _truly_empty and (not _has_structured or _prefill_exhausted) and self._empty_content_retries < 3:
self._empty_content_retries += 1
logger.warning(
"Empty response (no content or reasoning) — "

View File

@ -1741,9 +1741,9 @@ class TestRunConversation:
{"role": "assistant", "content": "old answer"},
]
# 3 responses: original + 2 prefill continuations (structured reasoning triggers prefill)
# 6 responses: original + 2 prefill + 3 retries after prefill exhaustion
with (
patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp, empty_resp, empty_resp]),
patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp] * 6),
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
@ -1754,18 +1754,18 @@ class TestRunConversation:
mock_compress.assert_not_called() # no compression triggered
assert result["completed"] is True
assert result["final_response"] == "(empty)"
assert result["api_calls"] == 3 # 1 original + 2 prefill continuations
assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries
def test_reasoning_only_response_prefill_then_empty(self, agent):
"""Structured reasoning-only triggers prefill continuation (up to 2), then falls through to (empty)."""
"""Structured reasoning-only triggers prefill (2), then retries (3), then (empty)."""
self._setup_agent(agent)
empty_resp = _mock_response(
content=None,
finish_reason="stop",
reasoning_content="structured reasoning answer",
)
# 3 responses: original + 2 prefill continuations, all reasoning-only
agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp, empty_resp]
# 6 responses: 1 original + 2 prefill + 3 retries after prefill exhaustion
agent.client.chat.completions.create.side_effect = [empty_resp] * 6
with (
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
@ -1774,7 +1774,7 @@ class TestRunConversation:
result = agent.run_conversation("answer me")
assert result["completed"] is True
assert result["final_response"] == "(empty)"
assert result["api_calls"] == 3 # 1 original + 2 prefill continuations
assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries
def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
"""When prefill continuation produces content, it becomes the final response."""