test(anthropic): regression guard for DeepSeek /anthropic thinking replay

Covers the #16748 fix:
- unsigned thinking blocks synthesised from reasoning_content survive replay
- non-latest assistant turns keep their thinking (DeepSeek validates every turn)
- signed Anthropic blocks are stripped (DeepSeek can't validate them)
- cache_control is stripped from thinking blocks
- OpenAI-compat base (api.deepseek.com without /anthropic) is NOT matched
- non-DeepSeek third parties (minimax) keep the generic strip-all behaviour
This commit is contained in:
teknium1 2026-04-29 08:09:54 -07:00 committed by Teknium
parent fd5479a4fc
commit fa3338c171

View File

@ -0,0 +1,242 @@
"""Regression guard: preserve thinking blocks on DeepSeek's /anthropic endpoint.
DeepSeek's ``api.deepseek.com/anthropic`` route speaks the Anthropic Messages
protocol but, when thinking mode is enabled, requires ``thinking`` blocks from
prior assistant turns to round-trip on subsequent requests. The generic
third-party path strips them (signatures are Anthropic-proprietary and other
proxies cannot validate them), so without a DeepSeek-specific carve-out the
next tool-call turn fails with HTTP 400::
The content[].thinking in the thinking mode must be passed back to the
API.
DeepSeek's compatibility matrix lists ``thinking`` as supported but
``redacted_thinking`` and ``cache_control`` on thinking blocks as not
supported. Handling is the same as Kimi's ``/coding`` endpoint: strip
Anthropic-signed blocks (DeepSeek can't validate them) but preserve unsigned
blocks that Hermes synthesises from ``reasoning_content``.
See hermes-agent#16748.
"""
from __future__ import annotations
import pytest
class TestDeepSeekAnthropicPreservesThinking:
"""convert_messages_to_anthropic must replay DeepSeek thinking blocks."""
@pytest.mark.parametrize(
"base_url",
[
"https://api.deepseek.com/anthropic",
"https://api.deepseek.com/anthropic/",
"https://api.deepseek.com/anthropic/v1",
"https://API.DeepSeek.com/anthropic",
],
)
def test_unsigned_thinking_block_survives_replay(self, base_url: str) -> None:
"""Unsigned thinking (synthesised from reasoning_content) must be preserved."""
from agent.anthropic_adapter import convert_messages_to_anthropic
messages = [
{"role": "user", "content": "hi"},
{
"role": "assistant",
"reasoning_content": "planning the tool call",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {"name": "skill_view", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_1", "content": "ok"},
]
_system, converted = convert_messages_to_anthropic(
messages, base_url=base_url
)
assistant_msg = next(m for m in converted if m["role"] == "assistant")
thinking_blocks = [
b for b in assistant_msg["content"]
if isinstance(b, dict) and b.get("type") == "thinking"
]
assert len(thinking_blocks) == 1, (
f"DeepSeek /anthropic ({base_url}) must preserve unsigned thinking "
"blocks synthesised from reasoning_content — upstream rejects "
"replayed tool-call messages without them."
)
assert thinking_blocks[0]["thinking"] == "planning the tool call"
# Synthesised block — never has a signature
assert "signature" not in thinking_blocks[0]
def test_unsigned_thinking_preserved_on_non_latest_assistant_turn(self) -> None:
"""DeepSeek validates history across every prior assistant turn, not just last."""
from agent.anthropic_adapter import convert_messages_to_anthropic
messages = [
{"role": "user", "content": "q1"},
{
"role": "assistant",
"reasoning_content": "r1",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {"name": "f", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_1", "content": "ok"},
{"role": "user", "content": "q2"},
{
"role": "assistant",
"reasoning_content": "r2",
"tool_calls": [
{
"id": "call_2",
"type": "function",
"function": {"name": "f", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_2", "content": "ok"},
]
_system, converted = convert_messages_to_anthropic(
messages, base_url="https://api.deepseek.com/anthropic"
)
assistants = [m for m in converted if m["role"] == "assistant"]
assert len(assistants) == 2
for assistant, expected in zip(assistants, ("r1", "r2")):
thinking = [
b for b in assistant["content"]
if isinstance(b, dict) and b.get("type") == "thinking"
]
assert len(thinking) == 1
assert thinking[0]["thinking"] == expected
def test_signed_anthropic_thinking_block_is_stripped(self) -> None:
"""Anthropic-signed blocks (that leaked through) must still be stripped.
DeepSeek issues its own signatures and cannot validate Anthropic's —
the strip-signed / keep-unsigned split matches the Kimi policy.
"""
from agent.anthropic_adapter import convert_messages_to_anthropic
messages = [
{"role": "user", "content": "hi"},
{
"role": "assistant",
"content": [
{
"type": "thinking",
"thinking": "anthropic-signed payload",
"signature": "anthropic-sig-xyz",
},
{"type": "text", "text": "hello"},
],
},
{"role": "user", "content": "again"},
]
_system, converted = convert_messages_to_anthropic(
messages, base_url="https://api.deepseek.com/anthropic"
)
assistant_msg = next(m for m in converted if m["role"] == "assistant")
thinking_blocks = [
b for b in assistant_msg["content"]
if isinstance(b, dict) and b.get("type") == "thinking"
]
assert thinking_blocks == [], (
"Signed Anthropic thinking blocks must be stripped on DeepSeek — "
"DeepSeek cannot validate Anthropic-proprietary signatures."
)
def test_cache_control_stripped_from_thinking_block(self) -> None:
"""cache_control must still be stripped even when the block is preserved.
DeepSeek's compatibility matrix lists cache_control on thinking blocks
as ignored cache markers interfere with signature validation on
upstreams that do check them, so Hermes strips them everywhere.
"""
from agent.anthropic_adapter import convert_messages_to_anthropic
messages = [
{"role": "user", "content": "hi"},
{
"role": "assistant",
"reasoning_content": "r1",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {"name": "f", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_1", "content": "ok"},
]
# Inject cache_control on the synthesised thinking block after-the-fact
# by running conversion once, mutating, then re-running would be
# indirect. Instead check the simpler invariant: no thinking block in
# the converted output carries cache_control.
_system, converted = convert_messages_to_anthropic(
messages, base_url="https://api.deepseek.com/anthropic"
)
for m in converted:
if not isinstance(m.get("content"), list):
continue
for b in m["content"]:
if isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"):
assert "cache_control" not in b
def test_openai_compat_deepseek_base_is_not_matched(self) -> None:
"""The OpenAI-compatible ``api.deepseek.com`` base must NOT trigger the
DeepSeek /anthropic branch it never reaches this adapter, but the
detector should still fail closed so an accidental misuse doesn't
quietly send signed Anthropic blocks to an OpenAI endpoint.
"""
from agent.anthropic_adapter import _is_deepseek_anthropic_endpoint
assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com") is False
assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/v1") is False
assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic") is True
assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic/v1") is True
def test_non_deepseek_third_party_still_strips_all_thinking(self) -> None:
"""MiniMax and other third-party Anthropic endpoints must keep the
generic strip-all behaviour (they reject unsigned blocks outright).
"""
from agent.anthropic_adapter import convert_messages_to_anthropic
messages = [
{"role": "user", "content": "hi"},
{
"role": "assistant",
"reasoning_content": "r1",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {"name": "f", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_1", "content": "ok"},
]
_system, converted = convert_messages_to_anthropic(
messages, base_url="https://api.minimax.io/anthropic"
)
assistant_msg = next(m for m in converted if m["role"] == "assistant")
thinking_blocks = [
b for b in assistant_msg["content"]
if isinstance(b, dict) and b.get("type") == "thinking"
]
assert thinking_blocks == [], (
"Non-DeepSeek third-party endpoints must keep the generic "
"strip-all-thinking behaviour — unsigned blocks get rejected."
)