fix(codex): route auth failures to fallback provider chain

Two related paths where Codex auth failures silently swallowed the fallback chain instead of switching to the next provider: 1. cli.py — _ensure_runtime_credentials() calls resolve_runtime_provider() before each turn. When provider is explicitly configured (not "auto"), an AuthError from token refresh is re-raised and printed as a bold-red error, returning False before the agent ever starts. The fallback chain was never tried. Fix: on AuthError, iterate fallback_providers and switch to the first one that resolves successfully. 2. run_agent.py — inside the codex_responses validity gate (inner retry loop), response.status in {"failed","cancelled"} with non-empty output items was treated as a valid response and broke out of the retry loop, reaching _normalize_codex_response() outside the fallback machinery. That function raises RuntimeError on status="failed", which propagates to the outer except with no fallback logic. Fix: detect terminal status codes before the output_items check and set response_invalid=True so the existing fallback chain fires normally.
2026-04-07 23:27:50 +02:00 · 2026-04-07 23:27:50 +02:00 · 813dbd9b40
commit 813dbd9b40
parent f76df30e08
2 changed files with 68 additions and 20 deletions
--- a/cli.py
+++ b/cli.py
@ -3083,6 +3083,8 @@ class HermesCLI:
            format_runtime_provider_error,
        )

+        _primary_exc = None
+        runtime = None
        try:
            runtime = resolve_runtime_provider(
                requested=self.requested_provider,
@ -3090,7 +3092,34 @@ class HermesCLI:
                explicit_base_url=self._explicit_base_url,
            )
        except Exception as exc:
-            message = format_runtime_provider_error(exc)
+            _primary_exc = exc
+
+        # Primary provider auth failed — try fallback providers before giving up.
+        if runtime is None and _primary_exc is not None:
+            from hermes_cli.auth import AuthError
+            if isinstance(_primary_exc, AuthError):
+                _fb_chain = self._fallback_model if isinstance(self._fallback_model, list) else []
+                for _fb in _fb_chain:
+                    _fb_provider = (_fb.get("provider") or "").strip().lower()
+                    _fb_model = (_fb.get("model") or "").strip()
+                    if not _fb_provider or not _fb_model:
+                        continue
+                    try:
+                        runtime = resolve_runtime_provider(requested=_fb_provider)
+                        logger.warning(
+                            "Primary provider auth failed (%s). Falling through to fallback: %s/%s",
+                            _primary_exc, _fb_provider, _fb_model,
+                        )
+                        _cprint(f"⚠️  Primary auth failed — switching to fallback: {_fb_provider} / {_fb_model}")
+                        self.requested_provider = _fb_provider
+                        self.model = _fb_model
+                        _primary_exc = None
+                        break
+                    except Exception:
+                        continue
+
+        if runtime is None:
+            message = format_runtime_provider_error(_primary_exc) if _primary_exc else "Provider resolution failed."
            ChatConsole().print(f"[bold red]{message}[/]")
            return False

--- a/run_agent.py
+++ b/run_agent.py
@ -9532,28 +9532,47 @@ class AIAgent:
                                response_invalid = True
                                error_details.append("response is None")
                            else:
-                                # output_text fallback: stream backfill may have failed
-                                # but normalize can still recover from output_text
-                                _out_text = getattr(response, "output_text", None)
-                                _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
-                                if _out_text_stripped:
-                                    logger.debug(
-                                        "Codex response.output is empty but output_text is present "
-                                        "(%d chars); deferring to normalization.",
-                                        len(_out_text_stripped),
+                                # Provider returned a terminal failure (e.g. quota exhaustion).
+                                # Treat as invalid so the fallback chain is triggered instead of
+                                # letting the error bubble up outside the retry/fallback loop.
+                                _codex_resp_status = str(getattr(response, "status", "") or "").strip().lower()
+                                if _codex_resp_status in {"failed", "cancelled"}:
+                                    _codex_error_obj = getattr(response, "error", None)
+                                    _codex_error_msg = (
+                                        _codex_error_obj.get("message") if isinstance(_codex_error_obj, dict)
+                                        else str(_codex_error_obj) if _codex_error_obj
+                                        else f"Responses API returned status '{_codex_resp_status}'"
                                    )
-                                else:
-                                    _resp_status = getattr(response, "status", None)
-                                    _resp_incomplete = getattr(response, "incomplete_details", None)
-                                    logger.warning(
-                                        "Codex response.output is empty after stream backfill "
-                                        "(status=%s, incomplete_details=%s, model=%s). %s",
-                                        _resp_status, _resp_incomplete,
-                                        getattr(response, "model", None),
-                                        f"api_mode={self.api_mode} provider={self.provider}",
+                                    logging.warning(
+                                        "Codex response status='%s' (error=%s). Routing to fallback. %s",
+                                        _codex_resp_status, _codex_error_msg,
+                                        self._client_log_context(),
                                    )
                                    response_invalid = True
-                                    error_details.append("response.output is empty")
+                                    error_details.append(f"response.status={_codex_resp_status}: {_codex_error_msg}")
+                                else:
+                                    # output_text fallback: stream backfill may have failed
+                                    # but normalize can still recover from output_text
+                                    _out_text = getattr(response, "output_text", None)
+                                    _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
+                                    if _out_text_stripped:
+                                        logger.debug(
+                                            "Codex response.output is empty but output_text is present "
+                                            "(%d chars); deferring to normalization.",
+                                            len(_out_text_stripped),
+                                        )
+                                    else:
+                                        _resp_status = getattr(response, "status", None)
+                                        _resp_incomplete = getattr(response, "incomplete_details", None)
+                                        logger.warning(
+                                            "Codex response.output is empty after stream backfill "
+                                            "(status=%s, incomplete_details=%s, model=%s). %s",
+                                            _resp_status, _resp_incomplete,
+                                            getattr(response, "model", None),
+                                            f"api_mode={self.api_mode} provider={self.provider}",
+                                        )
+                                        response_invalid = True
+                                        error_details.append("response.output is empty")
                    elif self.api_mode == "anthropic_messages":
                        _tv = self._get_transport()
                        if not _tv.validate_response(response):