From df0e97a168297232cc4231b4e3b6a993147aa0b8 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 29 Apr 2026 04:56:55 -0700 Subject: [PATCH] fix(minimax): enable Anthropic prompt caching for MiniMax's own models (#17425) MiniMax's /anthropic endpoint documents cache_control support (0.1x read pricing, 5-min TTL) for MiniMax-M2.7, M2.5, M2.1, M2. PR #12846 gated third-party Anthropic-wire caching on 'claude' in model name, which left MiniMax's own model family re-paying full input tokens every turn. Opt in explicitly via provider id (minimax / minimax-cn) or host match (api.minimax.io / api.minimaxi.com). Narrow allowlist mirroring the existing Qwen/Alibaba branch below; leaves room for a capability-based surface (ProviderConfig.supports_anthropic_cache) if a third provider needs it. Closes #17332 --- run_agent.py | 18 +++++ .../test_anthropic_prompt_cache_policy.py | 66 ++++++++++++++++++- 2 files changed, 81 insertions(+), 3 deletions(-) diff --git a/run_agent.py b/run_agent.py index 7bb47fcd..2b386db2 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2814,6 +2814,24 @@ class AIAgent: # Third-party Anthropic-compatible gateway. return True, True + # MiniMax on its Anthropic-compatible endpoint serves its own + # model family (MiniMax-M2.7, M2.5, M2.1, M2) with documented + # cache_control support (0.1× read pricing, 5-minute TTL). The + # blanket is_claude gate above excludes these — opt them in + # explicitly via provider id or host match so users on + # provider=minimax / minimax-cn (or custom endpoints pointing at + # api.minimax.io/anthropic / api.minimaxi.com/anthropic) get the + # same cost reduction as Claude traffic. + # Docs: https://platform.minimax.io/docs/api-reference/anthropic-api-compatible-cache + if is_anthropic_wire: + is_minimax_provider = provider_lower in {"minimax", "minimax-cn"} + is_minimax_host = ( + base_url_host_matches(eff_base_url, "api.minimax.io") + or base_url_host_matches(eff_base_url, "api.minimaxi.com") + ) + if is_minimax_provider or is_minimax_host: + return True, True + # Qwen/Alibaba on OpenCode (Zen/Go) and native DashScope: OpenAI-wire # transport that accepts Anthropic-style cache_control markers and # rewards them with real cache hits. Without this branch diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py index 7a85022a..b8a380a6 100644 --- a/tests/run_agent/test_anthropic_prompt_cache_policy.py +++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py @@ -89,15 +89,75 @@ class TestThirdPartyAnthropicGateway: assert should is True, "Third-party Anthropic gateway with Claude must cache" assert native is True, "Third-party Anthropic gateway uses native cache_control layout" - def test_third_party_without_claude_name_does_not_cache(self): - # A provider exposing e.g. GLM via anthropic_messages transport — we - # don't know whether it supports cache_control, so stay conservative. + def test_third_party_anthropic_non_claude_unknown_provider_does_not_cache(self): + # A provider exposing e.g. GLM via anthropic_messages transport from + # a host we don't recognize — we don't know whether it supports + # cache_control, so stay conservative. + agent = _make_agent( + provider="custom", + base_url="https://some-unknown-gateway.example.com/anthropic", + api_mode="anthropic_messages", + model="glm-4.5", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + +class TestMiniMaxAnthropicWire: + """MiniMax's own model family on its Anthropic-compatible endpoint. + + MiniMax documents cache_control support on ``/anthropic`` (0.1× read + pricing, 5-minute TTL). Issue #17332: the blanket ``is_claude`` gate on + the third-party-gateway branch left MiniMax-M2.7 etc. paying full input + cost every turn. Allowlist MiniMax explicitly via provider id or host. + """ + + def test_minimax_m27_on_provider_minimax_caches_native_layout(self): + agent = _make_agent( + provider="minimax", + base_url="https://api.minimax.io/anthropic", + api_mode="anthropic_messages", + model="minimax-m2.7", + ) + assert agent._anthropic_prompt_cache_policy() == (True, True) + + def test_minimax_m25_on_provider_minimax_cn_caches_native_layout(self): + agent = _make_agent( + provider="minimax-cn", + base_url="https://api.minimaxi.com/anthropic", + api_mode="anthropic_messages", + model="minimax-m2.5", + ) + assert agent._anthropic_prompt_cache_policy() == (True, True) + + def test_custom_provider_pointed_at_minimax_host_caches(self): + # User wires a custom provider manually at MiniMax's Anthropic URL; + # host match alone should be sufficient to enable caching. agent = _make_agent( provider="custom", base_url="https://api.minimax.io/anthropic", api_mode="anthropic_messages", model="minimax-m2.7", ) + assert agent._anthropic_prompt_cache_policy() == (True, True) + + def test_minimax_host_china_endpoint_caches(self): + agent = _make_agent( + provider="custom", + base_url="https://api.minimaxi.com/anthropic", + api_mode="anthropic_messages", + model="minimax-m2.1", + ) + assert agent._anthropic_prompt_cache_policy() == (True, True) + + def test_minimax_provider_on_openai_wire_does_not_cache(self): + # chat_completions transport — MiniMax's cache_control support is + # documented only for the /anthropic endpoint. Stay off. + agent = _make_agent( + provider="minimax", + base_url="https://api.minimax.io/v1", + api_mode="chat_completions", + model="minimax-m2.7", + ) assert agent._anthropic_prompt_cache_policy() == (False, False)