From df0e97a168297232cc4231b4e3b6a993147aa0b8 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 29 Apr 2026 04:56:55 -0700
Subject: [PATCH] fix(minimax): enable Anthropic prompt caching for MiniMax's
 own models (#17425)

MiniMax's /anthropic endpoint documents cache_control support (0.1x read
pricing, 5-min TTL) for MiniMax-M2.7, M2.5, M2.1, M2. PR #12846 gated
third-party Anthropic-wire caching on 'claude' in model name, which left
MiniMax's own model family re-paying full input tokens every turn.

Opt in explicitly via provider id (minimax / minimax-cn) or host match
(api.minimax.io / api.minimaxi.com). Narrow allowlist mirroring the
existing Qwen/Alibaba branch below; leaves room for a capability-based
surface (ProviderConfig.supports_anthropic_cache) if a third provider
needs it.

Closes #17332
---
 run_agent.py                                  | 18 +++++
 .../test_anthropic_prompt_cache_policy.py     | 66 ++++++++++++++++++-
 2 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 7bb47fcd..2b386db2 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2814,6 +2814,24 @@ class AIAgent:
             # Third-party Anthropic-compatible gateway.
             return True, True
 
+        # MiniMax on its Anthropic-compatible endpoint serves its own
+        # model family (MiniMax-M2.7, M2.5, M2.1, M2) with documented
+        # cache_control support (0.1× read pricing, 5-minute TTL).  The
+        # blanket is_claude gate above excludes these — opt them in
+        # explicitly via provider id or host match so users on
+        # provider=minimax / minimax-cn (or custom endpoints pointing at
+        # api.minimax.io/anthropic / api.minimaxi.com/anthropic) get the
+        # same cost reduction as Claude traffic.
+        # Docs: https://platform.minimax.io/docs/api-reference/anthropic-api-compatible-cache
+        if is_anthropic_wire:
+            is_minimax_provider = provider_lower in {"minimax", "minimax-cn"}
+            is_minimax_host = (
+                base_url_host_matches(eff_base_url, "api.minimax.io")
+                or base_url_host_matches(eff_base_url, "api.minimaxi.com")
+            )
+            if is_minimax_provider or is_minimax_host:
+                return True, True
+
         # Qwen/Alibaba on OpenCode (Zen/Go) and native DashScope: OpenAI-wire
         # transport that accepts Anthropic-style cache_control markers and
         # rewards them with real cache hits.  Without this branch
diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py
index 7a85022a..b8a380a6 100644
--- a/tests/run_agent/test_anthropic_prompt_cache_policy.py
+++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py
@@ -89,15 +89,75 @@ class TestThirdPartyAnthropicGateway:
         assert should is True, "Third-party Anthropic gateway with Claude must cache"
         assert native is True, "Third-party Anthropic gateway uses native cache_control layout"
 
-    def test_third_party_without_claude_name_does_not_cache(self):
-        # A provider exposing e.g. GLM via anthropic_messages transport — we
-        # don't know whether it supports cache_control, so stay conservative.
+    def test_third_party_anthropic_non_claude_unknown_provider_does_not_cache(self):
+        # A provider exposing e.g. GLM via anthropic_messages transport from
+        # a host we don't recognize — we don't know whether it supports
+        # cache_control, so stay conservative.
+        agent = _make_agent(
+            provider="custom",
+            base_url="https://some-unknown-gateway.example.com/anthropic",
+            api_mode="anthropic_messages",
+            model="glm-4.5",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+
+class TestMiniMaxAnthropicWire:
+    """MiniMax's own model family on its Anthropic-compatible endpoint.
+
+    MiniMax documents cache_control support on ``/anthropic`` (0.1× read
+    pricing, 5-minute TTL). Issue #17332: the blanket ``is_claude`` gate on
+    the third-party-gateway branch left MiniMax-M2.7 etc. paying full input
+    cost every turn. Allowlist MiniMax explicitly via provider id or host.
+    """
+
+    def test_minimax_m27_on_provider_minimax_caches_native_layout(self):
+        agent = _make_agent(
+            provider="minimax",
+            base_url="https://api.minimax.io/anthropic",
+            api_mode="anthropic_messages",
+            model="minimax-m2.7",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, True)
+
+    def test_minimax_m25_on_provider_minimax_cn_caches_native_layout(self):
+        agent = _make_agent(
+            provider="minimax-cn",
+            base_url="https://api.minimaxi.com/anthropic",
+            api_mode="anthropic_messages",
+            model="minimax-m2.5",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, True)
+
+    def test_custom_provider_pointed_at_minimax_host_caches(self):
+        # User wires a custom provider manually at MiniMax's Anthropic URL;
+        # host match alone should be sufficient to enable caching.
         agent = _make_agent(
             provider="custom",
             base_url="https://api.minimax.io/anthropic",
             api_mode="anthropic_messages",
             model="minimax-m2.7",
         )
+        assert agent._anthropic_prompt_cache_policy() == (True, True)
+
+    def test_minimax_host_china_endpoint_caches(self):
+        agent = _make_agent(
+            provider="custom",
+            base_url="https://api.minimaxi.com/anthropic",
+            api_mode="anthropic_messages",
+            model="minimax-m2.1",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, True)
+
+    def test_minimax_provider_on_openai_wire_does_not_cache(self):
+        # chat_completions transport — MiniMax's cache_control support is
+        # documented only for the /anthropic endpoint. Stay off.
+        agent = _make_agent(
+            provider="minimax",
+            base_url="https://api.minimax.io/v1",
+            api_mode="chat_completions",
+            model="minimax-m2.7",
+        )
         assert agent._anthropic_prompt_cache_policy() == (False, False)