test(anthropic): regression guard for DeepSeek /anthropic thinking replay

Covers the #16748 fix: - unsigned thinking blocks synthesised from reasoning_content survive replay - non-latest assistant turns keep their thinking (DeepSeek validates every turn) - signed Anthropic blocks are stripped (DeepSeek can't validate them) - cache_control is stripped from thinking blocks - OpenAI-compat base (api.deepseek.com without /anthropic) is NOT matched - non-DeepSeek third parties (minimax) keep the generic strip-all behaviour
2026-04-29 08:09:54 -07:00 · 2026-04-29 08:09:54 -07:00 · fa3338c171
commit fa3338c171
parent fd5479a4fc
1 changed files with 242 additions and 0 deletions
--- a/tests/agent/test_deepseek_anthropic_thinking.py
+++ b/tests/agent/test_deepseek_anthropic_thinking.py
@ -0,0 +1,242 @@
+"""Regression guard: preserve thinking blocks on DeepSeek's /anthropic endpoint.
+
+DeepSeek's ``api.deepseek.com/anthropic`` route speaks the Anthropic Messages
+protocol but, when thinking mode is enabled, requires ``thinking`` blocks from
+prior assistant turns to round-trip on subsequent requests.  The generic
+third-party path strips them (signatures are Anthropic-proprietary and other
+proxies cannot validate them), so without a DeepSeek-specific carve-out the
+next tool-call turn fails with HTTP 400::
+
+    The content[].thinking in the thinking mode must be passed back to the
+    API.
+
+DeepSeek's compatibility matrix lists ``thinking`` as supported but
+``redacted_thinking`` and ``cache_control`` on thinking blocks as not
+supported.  Handling is the same as Kimi's ``/coding`` endpoint: strip
+Anthropic-signed blocks (DeepSeek can't validate them) but preserve unsigned
+blocks that Hermes synthesises from ``reasoning_content``.
+
+See hermes-agent#16748.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+class TestDeepSeekAnthropicPreservesThinking:
+    """convert_messages_to_anthropic must replay DeepSeek thinking blocks."""
+
+    @pytest.mark.parametrize(
+        "base_url",
+        [
+            "https://api.deepseek.com/anthropic",
+            "https://api.deepseek.com/anthropic/",
+            "https://api.deepseek.com/anthropic/v1",
+            "https://API.DeepSeek.com/anthropic",
+        ],
+    )
+    def test_unsigned_thinking_block_survives_replay(self, base_url: str) -> None:
+        """Unsigned thinking (synthesised from reasoning_content) must be preserved."""
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "reasoning_content": "planning the tool call",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "skill_view", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
+        ]
+        _system, converted = convert_messages_to_anthropic(
+            messages, base_url=base_url
+        )
+
+        assistant_msg = next(m for m in converted if m["role"] == "assistant")
+        thinking_blocks = [
+            b for b in assistant_msg["content"]
+            if isinstance(b, dict) and b.get("type") == "thinking"
+        ]
+        assert len(thinking_blocks) == 1, (
+            f"DeepSeek /anthropic ({base_url}) must preserve unsigned thinking "
+            "blocks synthesised from reasoning_content — upstream rejects "
+            "replayed tool-call messages without them."
+        )
+        assert thinking_blocks[0]["thinking"] == "planning the tool call"
+        # Synthesised block — never has a signature
+        assert "signature" not in thinking_blocks[0]
+
+    def test_unsigned_thinking_preserved_on_non_latest_assistant_turn(self) -> None:
+        """DeepSeek validates history across every prior assistant turn, not just last."""
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        messages = [
+            {"role": "user", "content": "q1"},
+            {
+                "role": "assistant",
+                "reasoning_content": "r1",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "f", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
+            {"role": "user", "content": "q2"},
+            {
+                "role": "assistant",
+                "reasoning_content": "r2",
+                "tool_calls": [
+                    {
+                        "id": "call_2",
+                        "type": "function",
+                        "function": {"name": "f", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_2", "content": "ok"},
+        ]
+        _system, converted = convert_messages_to_anthropic(
+            messages, base_url="https://api.deepseek.com/anthropic"
+        )
+
+        assistants = [m for m in converted if m["role"] == "assistant"]
+        assert len(assistants) == 2
+        for assistant, expected in zip(assistants, ("r1", "r2")):
+            thinking = [
+                b for b in assistant["content"]
+                if isinstance(b, dict) and b.get("type") == "thinking"
+            ]
+            assert len(thinking) == 1
+            assert thinking[0]["thinking"] == expected
+
+    def test_signed_anthropic_thinking_block_is_stripped(self) -> None:
+        """Anthropic-signed blocks (that leaked through) must still be stripped.
+
+        DeepSeek issues its own signatures and cannot validate Anthropic's —
+        the strip-signed / keep-unsigned split matches the Kimi policy.
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "content": [
+                    {
+                        "type": "thinking",
+                        "thinking": "anthropic-signed payload",
+                        "signature": "anthropic-sig-xyz",
+                    },
+                    {"type": "text", "text": "hello"},
+                ],
+            },
+            {"role": "user", "content": "again"},
+        ]
+        _system, converted = convert_messages_to_anthropic(
+            messages, base_url="https://api.deepseek.com/anthropic"
+        )
+
+        assistant_msg = next(m for m in converted if m["role"] == "assistant")
+        thinking_blocks = [
+            b for b in assistant_msg["content"]
+            if isinstance(b, dict) and b.get("type") == "thinking"
+        ]
+        assert thinking_blocks == [], (
+            "Signed Anthropic thinking blocks must be stripped on DeepSeek — "
+            "DeepSeek cannot validate Anthropic-proprietary signatures."
+        )
+
+    def test_cache_control_stripped_from_thinking_block(self) -> None:
+        """cache_control must still be stripped even when the block is preserved.
+
+        DeepSeek's compatibility matrix lists cache_control on thinking blocks
+        as ignored — cache markers interfere with signature validation on
+        upstreams that do check them, so Hermes strips them everywhere.
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "reasoning_content": "r1",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "f", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
+        ]
+        # Inject cache_control on the synthesised thinking block after-the-fact
+        # by running conversion once, mutating, then re-running would be
+        # indirect.  Instead check the simpler invariant: no thinking block in
+        # the converted output carries cache_control.
+        _system, converted = convert_messages_to_anthropic(
+            messages, base_url="https://api.deepseek.com/anthropic"
+        )
+        for m in converted:
+            if not isinstance(m.get("content"), list):
+                continue
+            for b in m["content"]:
+                if isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"):
+                    assert "cache_control" not in b
+
+    def test_openai_compat_deepseek_base_is_not_matched(self) -> None:
+        """The OpenAI-compatible ``api.deepseek.com`` base must NOT trigger the
+        DeepSeek /anthropic branch — it never reaches this adapter, but the
+        detector should still fail closed so an accidental misuse doesn't
+        quietly send signed Anthropic blocks to an OpenAI endpoint.
+        """
+        from agent.anthropic_adapter import _is_deepseek_anthropic_endpoint
+
+        assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com") is False
+        assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/v1") is False
+        assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic") is True
+        assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic/v1") is True
+
+    def test_non_deepseek_third_party_still_strips_all_thinking(self) -> None:
+        """MiniMax and other third-party Anthropic endpoints must keep the
+        generic strip-all behaviour (they reject unsigned blocks outright).
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "reasoning_content": "r1",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "f", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
+        ]
+        _system, converted = convert_messages_to_anthropic(
+            messages, base_url="https://api.minimax.io/anthropic"
+        )
+        assistant_msg = next(m for m in converted if m["role"] == "assistant")
+        thinking_blocks = [
+            b for b in assistant_msg["content"]
+            if isinstance(b, dict) and b.get("type") == "thinking"
+        ]
+        assert thinking_blocks == [], (
+            "Non-DeepSeek third-party endpoints must keep the generic "
+            "strip-all-thinking behaviour — unsigned blocks get rejected."
+        )