fix(aux): remove hardcoded Codex fallback model, drop Codex from auto chain (#17765)

The _CODEX_AUX_MODEL constant had already rotated twice in 6 weeks (gpt-5.3-codex -> gpt-5.2-codex -> now broken again at gpt-5.2-codex) because ChatGPT-account Codex gates which models it accepts via an undocumented, shifting allow-list that OpenAI publishes no changelog for. Any pinned default will keep going stale. Issue #17533 reports the current breakage: every ChatGPT-account auxiliary fallback fails with HTTP 400 "model is not supported" and the 60s pause loop degrades long sessions. Rather than reset the clock with another stale pin (PR #17544 proposes gpt-5.2-codex -> gpt-5.4), remove the hardcoded second-order Codex fallback entirely: - Delete `_CODEX_AUX_MODEL`. - Drop `_try_codex` from `_get_provider_chain()` (the auto chain now ends at api-key providers; 4 rungs instead of 5). - Rename `_try_codex() -> _build_codex_client(model)` and require an explicit model from the caller. No more guessing. - `resolve_provider_client("openai-codex", model=None)` now warns and returns (None, None) instead of silently guessing a stale model ID. - Remove `_try_codex` from the `provider="custom"` fallback ladder (same stale-constant trap). - `_resolve_strict_vision_backend("openai-codex")` routes through `resolve_provider_client` so the caller's explicit model is honored. Codex-main users are unaffected: Step 1 of `_resolve_auto` already uses `main_provider` + `main_model` directly and passes the user's configured Codex model through `resolve_provider_client`, which never touched `_CODEX_AUX_MODEL`. Per-task overrides (`auxiliary.<task>.provider/model`) continue to work and are the supported way to route specific aux tasks through Codex. Users whose main provider fails with a payment/connection error and who have ONLY ChatGPT-account Codex auth will now see the 60s pause without a stale-model-rejection noise line in between -- same outcome, cleaner failure. Closes #17533. Supersedes #17544 (which resets the clock on the same stale-constant problem).
2026-04-29 23:23:50 -07:00 · 2026-04-29 23:23:50 -07:00 · ce0c3ae493
commit ce0c3ae493
parent f73364b1c4
4 changed files with 123 additions and 61 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -5,11 +5,11 @@ session search, web extraction, vision analysis, browser vision) picks up
 the best available backend without duplicating fallback logic.

 Resolution order for text tasks (auto mode):
-  1. OpenRouter  (OPENROUTER_API_KEY)
-  2. Nous Portal (~/.hermes/auth.json active provider)
-  3. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
-  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
-     wrapped to look like a chat.completions client)
+  1. User's main provider + main model (used regardless of provider type —
+     aggregators, direct API-key providers, native Anthropic, Codex, etc.)
+  2. OpenRouter  (OPENROUTER_API_KEY)
+  3. Nous Portal (~/.hermes/auth.json active provider)
+  4. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
  5. Native Anthropic
  6. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
  7. None
@ -18,10 +18,16 @@ Resolution order for vision/multimodal tasks (auto mode):
  1. Selected main provider, if it is one of the supported vision backends below
  2. OpenRouter
  3. Nous Portal
-  4. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
-  5. Native Anthropic
-  6. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
-  7. None
+  4. Native Anthropic
+  5. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
+  6. None
+
+Codex OAuth (ChatGPT-account auth) is intentionally NOT in either
+fallback chain: OpenAI gates this endpoint behind an undocumented,
+shifting model allow-list, so "just try Codex with a hardcoded model"
+rots on its own.  Codex is used only when the user's main provider *is*
+openai-codex (Step 1 above) or when a caller explicitly requests it with
+a model (auxiliary.<task>.provider + auxiliary.<task>.model).

 Per-task overrides are configured in config.yaml under the ``auxiliary:`` section
 (e.g. ``auxiliary.vision.provider``, ``auxiliary.compression.model``).
@ -285,12 +291,14 @@ _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"

-# Codex fallback: uses the Responses API (the only endpoint the Codex
-# OAuth token can access) with a fast model for auxiliary tasks.
-# ChatGPT-backed Codex accounts currently reject gpt-5.3-codex for these
-# auxiliary flows, while gpt-5.2-codex remains broadly available and supports
-# vision via Responses.
-_CODEX_AUX_MODEL = "gpt-5.2-codex"
+# Codex OAuth endpoint used when a caller explicitly requests
+# provider="openai-codex".  There is deliberately no hardcoded default
+# model: the set of models OpenAI accepts on this endpoint for
+# ChatGPT-account auth is an undocumented, shifting allow-list, and
+# pinning one here has drifted silently twice (gpt-5.3-codex → gpt-5.2-codex
+# → gpt-5.4 over 6 weeks in early 2026).  Callers must pass the model
+# they want explicitly (from config.yaml model.model, auxiliary.<task>.model,
+# or the user's active Codex model selection).
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"


@ -1420,7 +1428,23 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
    return _fallback_client, model


-def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
+def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
+    """Build a CodexAuxiliaryClient for an explicitly-requested model.
+
+    There is no auto-selection of the Codex model: the ChatGPT-account
+    Codex endpoint's accepted model list is an undocumented, drifting
+    allow-list, so any hardcoded default we pick goes stale.  The caller
+    is responsible for passing the model (e.g. from the user's own
+    ``model.model`` or ``auxiliary.<task>.model`` config).
+
+    Returns (None, None) when no Codex OAuth token is available.
+    """
+    if not model:
+        logger.warning(
+            "Auxiliary client: openai-codex requested without a model; "
+            "pass model explicitly (auxiliary.<task>.model in config.yaml)."
+        )
+        return None, None
    pool_present, entry = _select_pool_entry("openai-codex")
    if pool_present:
        codex_token = _pool_runtime_api_key(entry)
@ -1436,13 +1460,13 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
        if not codex_token:
            return None, None
        base_url = _CODEX_AUX_BASE_URL
-    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
+    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model)
    real_client = OpenAI(
        api_key=codex_token,
        base_url=base_url,
        default_headers=_codex_cloudflare_headers(codex_token),
    )
-    return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
+    return CodexAuxiliaryClient(real_client, model), model


 def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
@ -1497,7 +1521,6 @@ _AUTO_PROVIDER_LABELS = {
    "_try_openrouter": "openrouter",
    "_try_nous": "nous",
    "_try_custom_endpoint": "local/custom",
-    "_try_codex": "openai-codex",
    "_resolve_api_key_provider": "api-key",
 }

@ -1524,12 +1547,18 @@ def _get_provider_chain() -> List[tuple]:

    Built at call time (not module level) so that test patches
    on the ``_try_*`` functions are picked up correctly.
+
+    NOTE: ``openai-codex`` is deliberately NOT in this chain.  The
+    ChatGPT-account Codex endpoint only accepts a shifting, undocumented
+    allow-list of model IDs, so falling back to it with a guessed model
+    fails more often than not.  Codex is used only when the user's main
+    provider *is* openai-codex (see Step 1 of ``_resolve_auto``) or when
+    a caller explicitly requests it with a model.
    """
    return [
        ("openrouter", _try_openrouter),
        ("nous", _try_nous),
        ("local/custom", _try_custom_endpoint),
-        ("openai-codex", _try_codex),
        ("api-key", _resolve_api_key_provider),
    ]

@ -2045,6 +2074,13 @@ def resolve_provider_client(

    # ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
    if provider == "openai-codex":
+        if not model:
+            logger.warning(
+                "resolve_provider_client: openai-codex requested without a "
+                "model; pass model explicitly (e.g. model.model in config.yaml "
+                "or auxiliary.<task>.model for per-task aux routing)."
+            )
+            return None, None
        if raw_codex:
            # Return the raw OpenAI client for callers that need direct
            # access to responses.stream() (e.g., the main agent loop).
@ -2053,7 +2089,7 @@ def resolve_provider_client(
                logger.warning("resolve_provider_client: openai-codex requested "
                               "but no Codex OAuth token found (run: hermes model)")
                return None, None
-            final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
+            final_model = _normalize_resolved_model(model, provider)
            raw_client = OpenAI(
                api_key=codex_token,
                base_url=_CODEX_AUX_BASE_URL,
@ -2061,7 +2097,7 @@ def resolve_provider_client(
            )
            return (raw_client, final_model)
        # Standard path: wrap in CodexAuxiliaryClient adapter
-        client, default = _try_codex()
+        client, default = _build_codex_client(model)
        if client is None:
            logger.warning("resolve_provider_client: openai-codex requested "
                           "but no Codex OAuth token found (run: hermes model)")
@ -2104,9 +2140,9 @@ def resolve_provider_client(
            client = _wrap_if_needed(client, final_model, custom_base, custom_key)
            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
-        # Try custom first, then codex, then API-key providers
-        for try_fn in (_try_custom_endpoint, _try_codex,
-                       _resolve_api_key_provider):
+        # Try custom first, then API-key providers (Codex excluded here:
+        # falling through to Codex with no model is a stale-constant trap).
+        for try_fn in (_try_custom_endpoint, _resolve_api_key_provider):
            client, default = try_fn()
            if client is not None:
                final_model = _normalize_resolved_model(model or default, provider)
@ -2453,7 +2489,10 @@ def _resolve_strict_vision_backend(
    if provider == "nous":
        return _try_nous(vision=True)
    if provider == "openai-codex":
-        return _try_codex()
+        # Route through resolve_provider_client so the caller's explicit
+        # model is used.  There is no safe default Codex model (shifting
+        # allow-list); callers must specify via auxiliary.<task>.model.
+        return resolve_provider_client("openai-codex", model, is_vision=True)
    if provider == "anthropic":
        return _try_anthropic()
    if provider == "custom":
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -259,7 +259,7 @@ class TestAnthropicOAuthFlag:
        assert mock_build.call_args.args[0] == "sk-ant-oat01-pooled"


-class TestTryCodex:
+class TestBuildCodexClient:
    def test_pool_without_selected_entry_falls_back_to_auth_store(self):
        with (
            patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)),
@ -267,15 +267,23 @@ class TestTryCodex:
            patch("agent.auxiliary_client.OpenAI") as mock_openai,
        ):
            mock_openai.return_value = MagicMock()
-            from agent.auxiliary_client import _try_codex
+            from agent.auxiliary_client import _build_codex_client

-            client, model = _try_codex()
+            client, model = _build_codex_client("gpt-5.4")

        assert client is not None
-        assert model == "gpt-5.2-codex"
+        assert model == "gpt-5.4"
        assert mock_openai.call_args.kwargs["api_key"] == "codex-auth-token"
        assert mock_openai.call_args.kwargs["base_url"] == "https://chatgpt.com/backend-api/codex"

+    def test_rejects_missing_model(self):
+        """Callers must pass an explicit model; no hardcoded default."""
+        from agent.auxiliary_client import _build_codex_client
+
+        client, model = _build_codex_client("")
+        assert client is None
+        assert model is None
+

 class TestExpiredCodexFallback:
    """Test that expired Codex tokens don't block the auto chain."""
@ -507,14 +515,14 @@ class TestGetTextAuxiliaryClient:
            patch("agent.auxiliary_client.OpenAI"),
            patch("hermes_cli.auth._read_codex_tokens", side_effect=AssertionError("legacy codex store should not run")),
        ):
-            from agent.auxiliary_client import _try_codex
+            from agent.auxiliary_client import _build_codex_client

-            client, model = _try_codex()
+            client, model = _build_codex_client("gpt-5.4")

        from agent.auxiliary_client import CodexAuxiliaryClient

        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.2-codex"
+        assert model == "gpt-5.4"

    def test_returns_none_when_nothing_available(self, monkeypatch):
        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
@ -783,11 +791,15 @@ class TestIsPaymentError:
 class TestGetProviderChain:
    """_get_provider_chain() resolves functions at call time (testable)."""

-    def test_returns_five_entries(self):
+    def test_returns_four_entries(self):
        chain = _get_provider_chain()
-        assert len(chain) == 5
+        assert len(chain) == 4
        labels = [label for label, _ in chain]
-        assert labels == ["openrouter", "nous", "local/custom", "openai-codex", "api-key"]
+        assert labels == ["openrouter", "nous", "local/custom", "api-key"]
+        # Codex is deliberately NOT in this chain — see _get_provider_chain
+        # docstring. ChatGPT-account Codex has a shifting model allow-list;
+        # guessing a model to fall back on breaks more often than it helps.
+        assert "openai-codex" not in labels

    def test_picks_up_patched_functions(self):
        """Patches on _try_* functions must be visible in the chain."""
@ -814,7 +826,6 @@ class TestTryPaymentFallback:
        with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
             patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \
             patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \
-             patch("agent.auxiliary_client._try_codex", return_value=(None, None)), \
             patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \
             patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"):
            client, model, label = _try_payment_fallback("openrouter")
@ -825,23 +836,26 @@ class TestTryPaymentFallback:
        """'codex' should map to 'openai-codex' in the skip set."""
        mock_client = MagicMock()
        with patch("agent.auxiliary_client._try_openrouter", return_value=(mock_client, "or-model")), \
-             patch("agent.auxiliary_client._try_codex", return_value=(None, None)), \
             patch("agent.auxiliary_client._read_main_provider", return_value="openai-codex"):
            client, model, label = _try_payment_fallback("openai-codex", task="vision")
        assert client is mock_client
        assert label == "openrouter"

-    def test_skips_to_codex_when_or_and_nous_fail(self):
-        mock_codex = MagicMock()
+    def test_codex_not_in_fallback_chain(self):
+        """Codex is deliberately NOT a fallback rung (shifting model allow-list).
+
+        When OR/Nous/custom/api-key all fail, payment-fallback returns None —
+        Codex is never tried with a guessed model.
+        """
        with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
             patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \
             patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \
-             patch("agent.auxiliary_client._try_codex", return_value=(mock_codex, "gpt-5.2-codex")), \
+             patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \
             patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"):
            client, model, label = _try_payment_fallback("openrouter")
-        assert client is mock_codex
-        assert model == "gpt-5.2-codex"
-        assert label == "openai-codex"
+        assert client is None
+        assert model is None
+        assert label == ""


 class TestCallLlmPaymentFallback:
@ -1360,14 +1374,14 @@ class TestAuxiliaryAuthRefreshRetry:
        with (
            patch(
                "agent.auxiliary_client.resolve_vision_provider_client",
-                side_effect=[("openai-codex", failing_client, "gpt-5.2-codex"), ("openai-codex", fresh_client, "gpt-5.2-codex")],
+                side_effect=[("openai-codex", failing_client, "gpt-5.4"), ("openai-codex", fresh_client, "gpt-5.4")],
            ),
            patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
        ):
            resp = call_llm(
                task="vision",
                provider="openai-codex",
-                model="gpt-5.2-codex",
+                model="gpt-5.4",
                messages=[{"role": "user", "content": "hi"}],
            )

@ -1384,14 +1398,14 @@ class TestAuxiliaryAuthRefreshRetry:
        fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-non-vision")

        with (
-            patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.2-codex", None, None, None)),
-            patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "gpt-5.2-codex"), (fresh_client, "gpt-5.2-codex")]),
+            patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.4", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "gpt-5.4"), (fresh_client, "gpt-5.4")]),
            patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
        ):
            resp = call_llm(
                task="compression",
                provider="openai-codex",
-                model="gpt-5.2-codex",
+                model="gpt-5.4",
                messages=[{"role": "user", "content": "hi"}],
            )

@ -1439,14 +1453,14 @@ class TestAuxiliaryAuthRefreshRetry:
        with (
            patch(
                "agent.auxiliary_client.resolve_vision_provider_client",
-                side_effect=[("openai-codex", failing_client, "gpt-5.2-codex"), ("openai-codex", fresh_client, "gpt-5.2-codex")],
+                side_effect=[("openai-codex", failing_client, "gpt-5.4"), ("openai-codex", fresh_client, "gpt-5.4")],
            ),
            patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
        ):
            resp = await async_call_llm(
                task="vision",
                provider="openai-codex",
-                model="gpt-5.2-codex",
+                model="gpt-5.4",
                messages=[{"role": "user", "content": "hi"}],
            )

--- a/tests/agent/test_codex_cloudflare_headers.py
+++ b/tests/agent/test_codex_cloudflare_headers.py
@ -10,7 +10,7 @@ of auth correctness.
 ``_codex_cloudflare_headers`` in ``agent.auxiliary_client`` centralizes the
 header set so the primary chat client (``run_agent.AIAgent.__init__`` +
 ``_apply_client_headers_for_base_url``) and the auxiliary client paths
-(``_try_codex`` and the ``raw_codex`` branch of ``resolve_provider_client``)
+(``_build_codex_client`` and the ``raw_codex`` branch of ``resolve_provider_client``)
 all emit the same headers.

 These tests pin:
@ -207,9 +207,10 @@ class TestPrimaryClientWiring:
 # ---------------------------------------------------------------------------

 class TestAuxiliaryClientWiring:
-    def test_try_codex_passes_codex_headers(self, monkeypatch):
-        """_try_codex builds the OpenAI client used for compression / vision /
-        title generation when routed through Codex. Must emit codex headers."""
+    def test_build_codex_client_passes_codex_headers(self, monkeypatch):
+        """_build_codex_client builds the OpenAI client used for compression /
+        vision / title generation when routed through Codex. Must emit codex
+        headers."""
        from agent import auxiliary_client
        token = _make_codex_jwt("acct-aux-try-codex")

@ -225,7 +226,7 @@ class TestAuxiliaryClientWiring:
        )
        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
            mock_openai.return_value = MagicMock()
-            client, model = auxiliary_client._try_codex()
+            client, model = auxiliary_client._build_codex_client("gpt-5.4")
            assert client is not None
            headers = mock_openai.call_args.kwargs.get("default_headers") or {}
            assert headers.get("originator") == "codex_cli_rs"
@ -244,7 +245,7 @@ class TestAuxiliaryClientWiring:
        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
            mock_openai.return_value = MagicMock()
            client, model = auxiliary_client.resolve_provider_client(
-                "openai-codex", raw_codex=True,
+                "openai-codex", model="gpt-5.4", raw_codex=True,
            )
            assert client is not None
            headers = mock_openai.call_args.kwargs.get("default_headers") or {}
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@ -966,17 +966,25 @@ class TestAuxiliaryClientProviderPriority:
            client, model = get_text_auxiliary_client()
        assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1"

-    def test_codex_fallback_last_resort(self, monkeypatch):
+    def test_codex_not_in_auto_fallback(self, monkeypatch):
+        """Codex is deliberately NOT part of the auto fallback chain.
+
+        ChatGPT-account Codex gates which models it accepts via an
+        undocumented, shifting allow-list, so falling through to Codex with
+        a hardcoded default model breaks silently whenever OpenAI rotates
+        the list.  When nothing else is available, ``get_text_auxiliary_client``
+        now returns (None, None) rather than guessing a Codex model.
+        """
        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
-        from agent.auxiliary_client import get_text_auxiliary_client, CodexAuxiliaryClient
+        from agent.auxiliary_client import get_text_auxiliary_client
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \
             patch("agent.auxiliary_client.OpenAI"):
            client, model = get_text_auxiliary_client()
-        assert model == "gpt-5.2-codex"
-        assert isinstance(client, CodexAuxiliaryClient)
+        assert client is None
+        assert model is None


 # ── Provider routing tests ───────────────────────────────────────────────────