diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 288ec9bb..026f11dc 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -229,6 +229,21 @@ _PROVIDER_VISION_MODELS: Dict[str, str] = { "zai": "glm-5v-turbo", } +# Providers whose endpoint does not accept image input, even though the +# provider's broader ecosystem has vision models available elsewhere. When +# `auxiliary.vision.provider: auto` sees one of these as the main provider, +# it must skip straight to the aggregator chain instead of returning a client +# that will 404 on every vision request. +# +# kimi-coding / kimi-coding-cn: the Kimi Coding Plan routes through +# api.kimi.com/coding (Anthropic Messages wire) which Kimi's own docs +# describe as having no image_in capability. Vision lives on the separate +# Kimi Platform (api.moonshot.ai, OpenAI-wire, pay-as-you-go). See #17076. +_PROVIDERS_WITHOUT_VISION: frozenset = frozenset({ + "kimi-coding", + "kimi-coding-cn", +}) + # OpenRouter app attribution headers _OR_HEADERS = { "HTTP-Referer": "https://hermes-agent.nousresearch.com", @@ -2534,6 +2549,19 @@ def resolve_vision_provider_client( main_provider, default_model or resolved_model or main_model, ) return _finalize(main_provider, sync_client, default_model) + elif main_provider in _PROVIDERS_WITHOUT_VISION: + # Kimi Coding Plan's /coding endpoint (Anthropic Messages wire) + # does not accept image input — Kimi's own docs say "Current + # model does not support image input, switch to a model with + # image_in capability" and vision lives on the separate Kimi + # Platform (api.moonshot.ai). Skip the main provider and fall + # through to the aggregator chain instead of returning a + # client that will 404 on every vision request (#17076). + logger.debug( + "Vision auto-detect: skipping main provider %s (no " + "vision support) — falling through to aggregator chain", + main_provider, + ) else: rpc_client, rpc_model = resolve_provider_client( main_provider, vision_model, diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index fb23a59b..c15e655d 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -1635,3 +1635,106 @@ class TestCodexAdapterReasoningTranslation: ) assert "reasoning" not in captured + + +class TestVisionAutoSkipsKimiCoding: + """_resolve_auto vision branch skips providers that have no vision on + their main endpoint (e.g. Kimi Coding Plan /coding) and falls through + to the aggregator chain instead of handing back a client that will 404 + on every request (#17076). + """ + + def test_kimi_coding_skipped_falls_through_to_openrouter(self, monkeypatch): + """kimi-coding as main + vision auto → OpenRouter (not kimi).""" + fake_or_client = MagicMock(name="openrouter_client") + + monkeypatch.setattr( + "agent.auxiliary_client._read_main_provider", lambda: "kimi-coding", + ) + monkeypatch.setattr( + "agent.auxiliary_client._read_main_model", lambda: "kimi-code", + ) + # Guard: if the skip doesn't fire, _resolve_strict_vision_backend + # and resolve_provider_client both would try kimi-coding — detect + # either via the main-provider call and fail loud. + rpc_mock = MagicMock(side_effect=AssertionError( + "resolve_provider_client should NOT be called for kimi-coding " + "on the vision auto path")) + monkeypatch.setattr( + "agent.auxiliary_client.resolve_provider_client", rpc_mock, + ) + + def fake_strict(provider, model=None): + if provider == "openrouter": + return fake_or_client, "google/gemini-3-flash-preview" + if provider == "nous": + return None, None + raise AssertionError( + f"strict vision backend should not be called for {provider!r} " + "when main provider is kimi-coding" + ) + monkeypatch.setattr( + "agent.auxiliary_client._resolve_strict_vision_backend", + fake_strict, + ) + + provider, client, model = resolve_vision_provider_client() + assert provider == "openrouter" + assert client is fake_or_client + assert model == "google/gemini-3-flash-preview" + + def test_kimi_coding_cn_skipped_too(self, monkeypatch): + """Same skip applies to the CN variant.""" + fake_or_client = MagicMock(name="openrouter_client") + + monkeypatch.setattr( + "agent.auxiliary_client._read_main_provider", lambda: "kimi-coding-cn", + ) + monkeypatch.setattr( + "agent.auxiliary_client._read_main_model", lambda: "kimi-code", + ) + rpc_mock = MagicMock(side_effect=AssertionError( + "resolve_provider_client should NOT be called for kimi-coding-cn")) + monkeypatch.setattr( + "agent.auxiliary_client.resolve_provider_client", rpc_mock, + ) + monkeypatch.setattr( + "agent.auxiliary_client._resolve_strict_vision_backend", + lambda p, m=None: (fake_or_client, "gemini") + if p == "openrouter" + else (None, None), + ) + + provider, client, _ = resolve_vision_provider_client() + assert provider == "openrouter" + assert client is fake_or_client + + def test_explicit_override_to_kimi_coding_still_honored(self, monkeypatch): + """When a user *explicitly* requests kimi-coding for vision (e.g. + they know what they're doing, or are running a future build that + adds image_in capability to Kimi Code), the explicit path still + routes to kimi-coding — only the auto branch applies the skip. + """ + monkeypatch.setattr( + "agent.auxiliary_client._read_main_provider", lambda: "openrouter", + ) + fake_kimi_client = MagicMock(name="kimi_client") + gcc_mock = MagicMock(return_value=(fake_kimi_client, "kimi-code")) + monkeypatch.setattr( + "agent.auxiliary_client._get_cached_client", gcc_mock, + ) + + provider, client, model = resolve_vision_provider_client( + provider="kimi-coding", + ) + assert provider == "kimi-coding" + assert client is fake_kimi_client + gcc_mock.assert_called_once() + + def test_skip_set_covers_exactly_known_entries(self): + """Guard against accidental widening of the skip list.""" + from agent.auxiliary_client import _PROVIDERS_WITHOUT_VISION + assert _PROVIDERS_WITHOUT_VISION == frozenset({ + "kimi-coding", + "kimi-coding-cn", + })