From a9ebb331bcbf4f76be8aae9ae828467188753aa8 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 12 Apr 2026 13:00:07 -0700 Subject: [PATCH] fix: contextual error diagnostics for invalid API responses (#8565) Previously, all invalid API responses (choices=None) were diagnosed as 'fast response often indicates rate limiting' regardless of actual response time or error code. A 738s Cloudflare 524 timeout was labeled as 'fast response' and 'possible rate limit'. Now extracts the error code from response.error and classifies: - 524: upstream provider timed out (Cloudflare) - 504: upstream gateway timeout - 429: rate limited by upstream provider - 500/502: upstream server error - 503/529: upstream provider overloaded - Other codes: shown with code number - No code + <10s: likely rate limited (timing heuristic) - No code + >60s: likely upstream timeout - No code + 10-60s: neutral response time All downstream messages (retry status, final error, interrupt message) now use the classified hint instead of generic rate-limit language. Reported by community member Lumen Radley (MiMo provider timeouts). --- run_agent.py | 47 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/run_agent.py b/run_agent.py index 2fd73160..360ef051 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8225,7 +8225,8 @@ class AIAgent: if self.thinking_callback: self.thinking_callback("") - # This is often rate limiting or provider returning malformed response + # Invalid response — could be rate limiting, provider timeout, + # upstream server error, or malformed response. retry_count += 1 # Eager fallback: empty/malformed responses are a common @@ -8261,11 +8262,44 @@ class AIAgent: if self.verbose_logging: logging.debug(f"Response attributes for invalid response: {resp_attrs}") + # Extract error code from response for contextual diagnostics + _resp_error_code = None + if response and hasattr(response, 'error') and response.error: + _code_raw = getattr(response.error, 'code', None) + if _code_raw is None and isinstance(response.error, dict): + _code_raw = response.error.get('code') + if _code_raw is not None: + try: + _resp_error_code = int(_code_raw) + except (TypeError, ValueError): + pass + + # Build a human-readable failure hint from the error code + # and response time, instead of always assuming rate limiting. + if _resp_error_code == 524: + _failure_hint = f"upstream provider timed out (Cloudflare 524, {api_duration:.0f}s)" + elif _resp_error_code == 504: + _failure_hint = f"upstream gateway timeout (504, {api_duration:.0f}s)" + elif _resp_error_code == 429: + _failure_hint = f"rate limited by upstream provider (429)" + elif _resp_error_code in (500, 502): + _failure_hint = f"upstream server error ({_resp_error_code}, {api_duration:.0f}s)" + elif _resp_error_code in (503, 529): + _failure_hint = f"upstream provider overloaded ({_resp_error_code})" + elif _resp_error_code is not None: + _failure_hint = f"upstream error (code {_resp_error_code}, {api_duration:.0f}s)" + elif api_duration < 10: + _failure_hint = f"fast response ({api_duration:.1f}s) — likely rate limited" + elif api_duration > 60: + _failure_hint = f"slow response ({api_duration:.0f}s) — likely upstream timeout" + else: + _failure_hint = f"response time {api_duration:.1f}s" + self._vprint(f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}", force=True) self._vprint(f"{self.log_prefix} 🏢 Provider: {provider_name}", force=True) cleaned_provider_error = self._clean_error_message(error_msg) self._vprint(f"{self.log_prefix} 📝 Provider message: {cleaned_provider_error}", force=True) - self._vprint(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)", force=True) + self._vprint(f"{self.log_prefix} ⏱️ {_failure_hint}", force=True) if retry_count >= max_retries: # Try fallback before giving up @@ -8282,14 +8316,13 @@ class AIAgent: "messages": messages, "completed": False, "api_calls": api_call_count, - "error": "Invalid API response shape. Likely rate limited or malformed provider response.", + "error": f"Invalid API response after {max_retries} retries: {_failure_hint}", "failed": True # Mark as failure for filtering } - # Longer backoff for rate limiting (likely cause of None choices) - # Jittered exponential: 5s base, 120s cap + random jitter + # Backoff before retry — jittered exponential: 5s base, 120s cap wait_time = jittered_backoff(retry_count, base_delay=5.0, max_delay=120.0) - self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...", force=True) + self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...", force=True) logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}") # Sleep in small increments to stay responsive to interrupts @@ -8300,7 +8333,7 @@ class AIAgent: self._persist_session(messages, conversation_history) self.clear_interrupt() return { - "final_response": f"Operation interrupted: retrying API call after rate limit (retry {retry_count}/{max_retries}).", + "final_response": f"Operation interrupted during retry ({_failure_hint}, attempt {retry_count}/{max_retries}).", "messages": messages, "api_calls": api_call_count, "completed": False,