fix(agent): try fallback providers at init when primary credential pool is exhausted (#17929)

When a provider's credential pool has a single entry in 429-cooldown,
resolve_provider_client returns None and AIAgent.__init__ raises a
misleading RuntimeError suggesting the API key is missing — even when
valid fallback_providers are configured.

This patch makes __init__ iterate the fallback chain before raising,
mirroring the existing in-flight fallback logic in the request loop.
If a fallback resolves, the agent initializes against it and sets
_fallback_activated=True so _restore_primary_runtime can pick the
primary back up after cooldown.

Closes #17929
This commit is contained in:
luyao618 2026-04-30 20:45:20 +08:00 committed by Teknium
parent 1dce908930
commit 13f344c5ce
2 changed files with 111 additions and 10 deletions

View File

@ -1473,11 +1473,43 @@ class AIAgent:
_env_hint = _pcfg.api_key_env_vars[0]
except Exception:
pass
# --- Init-time fallback (#17929) ---
_fb_entries = []
if isinstance(fallback_model, list):
_fb_entries = [
f for f in fallback_model
if isinstance(f, dict) and f.get("provider") and f.get("model")
]
elif isinstance(fallback_model, dict) and fallback_model.get("provider") and fallback_model.get("model"):
_fb_entries = [fallback_model]
_fb_resolved = False
for _fb in _fb_entries:
_fb_client, _fb_model = resolve_provider_client(
_fb["provider"], model=_fb["model"], raw_codex=True,
explicit_base_url=_fb.get("base_url"),
explicit_api_key=_fb.get("api_key"),
)
if _fb_client is not None:
self.provider = _fb["provider"]
self.model = _fb_model or _fb["model"]
self._fallback_activated = True
client_kwargs = {
"api_key": _fb_client.api_key,
"base_url": str(_fb_client.base_url),
}
if _provider_timeout is not None:
client_kwargs["timeout"] = _provider_timeout
if hasattr(_fb_client, "_default_headers") and _fb_client._default_headers:
client_kwargs["default_headers"] = dict(_fb_client._default_headers)
_fb_resolved = True
break
if not _fb_resolved:
raise RuntimeError(
f"Provider '{_explicit}' is set in config.yaml but no API key "
f"was found. Set the {_env_hint} environment "
f"variable, or switch to a different provider with `hermes model`."
)
if not getattr(self, "_fallback_activated", False):
# No provider configured — reject with a clear message.
raise RuntimeError(
"No LLM provider configured. Run `hermes model` to "
@ -1536,7 +1568,7 @@ class AIAgent:
else:
self._fallback_chain = []
self._fallback_index = 0
self._fallback_activated = False
self._fallback_activated = getattr(self, "_fallback_activated", False)
# Legacy attribute kept for backward compat (tests, external callers)
self._fallback_model = self._fallback_chain[0] if self._fallback_chain else None
if self._fallback_chain and not self.quiet_mode:

View File

@ -0,0 +1,69 @@
"""Regression test for #17929: AIAgent.__init__ should try fallback_model
when primary provider credentials are exhausted."""
import pytest
from unittest.mock import patch, MagicMock
from run_agent import AIAgent
def _make_tool_defs():
return [{"type": "function", "function": {"name": "web_search",
"description": "search", "parameters": {"type": "object", "properties": {}}}}]
def _mock_client(api_key="fb-key-1234567890", base_url="https://fb.example.com/v1"):
c = MagicMock()
c.api_key = api_key
c.base_url = base_url
c._default_headers = None
return c
def test_init_tries_fallback_when_primary_returns_none():
"""When resolve_provider_client returns None for primary but succeeds for
a fallback entry, __init__ should NOT raise RuntimeError."""
fb = _mock_client()
def fake_resolve(provider, model=None, raw_codex=False,
explicit_base_url=None, explicit_api_key=None):
if provider == "tencent-token-plan":
return fb, "kimi2.5"
return None, None # primary exhausted
with patch("agent.auxiliary_client.resolve_provider_client", side_effect=fake_resolve), \
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs()), \
patch("run_agent.check_toolset_requirements", return_value={}), \
patch("run_agent.OpenAI", return_value=MagicMock()):
agent = AIAgent(
provider="alibaba-coding-plan",
model="qwen3.6-plus",
api_key=None,
base_url=None,
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
fallback_model=[{"provider": "tencent-token-plan", "model": "kimi2.5"}],
)
assert agent.provider == "tencent-token-plan"
assert agent.model == "kimi2.5"
assert agent._fallback_activated is True
def test_init_raises_when_no_fallback_configured():
"""When primary returns None and no fallback is set, should raise."""
with patch("agent.auxiliary_client.resolve_provider_client", return_value=(None, None)), \
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs()), \
patch("run_agent.check_toolset_requirements", return_value={}), \
patch("run_agent.OpenAI", return_value=MagicMock()):
with pytest.raises(RuntimeError, match="no API key was found"):
AIAgent(
provider="alibaba-coding-plan",
model="qwen3.6-plus",
api_key=None,
base_url=None,
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
fallback_model=None,
)