test(anthropic): regression guard for DeepSeek /anthropic thinking replay
Covers the #16748 fix: - unsigned thinking blocks synthesised from reasoning_content survive replay - non-latest assistant turns keep their thinking (DeepSeek validates every turn) - signed Anthropic blocks are stripped (DeepSeek can't validate them) - cache_control is stripped from thinking blocks - OpenAI-compat base (api.deepseek.com without /anthropic) is NOT matched - non-DeepSeek third parties (minimax) keep the generic strip-all behaviour
This commit is contained in:
parent
fd5479a4fc
commit
fa3338c171
242
tests/agent/test_deepseek_anthropic_thinking.py
Normal file
242
tests/agent/test_deepseek_anthropic_thinking.py
Normal file
@ -0,0 +1,242 @@
|
||||
"""Regression guard: preserve thinking blocks on DeepSeek's /anthropic endpoint.
|
||||
|
||||
DeepSeek's ``api.deepseek.com/anthropic`` route speaks the Anthropic Messages
|
||||
protocol but, when thinking mode is enabled, requires ``thinking`` blocks from
|
||||
prior assistant turns to round-trip on subsequent requests. The generic
|
||||
third-party path strips them (signatures are Anthropic-proprietary and other
|
||||
proxies cannot validate them), so without a DeepSeek-specific carve-out the
|
||||
next tool-call turn fails with HTTP 400::
|
||||
|
||||
The content[].thinking in the thinking mode must be passed back to the
|
||||
API.
|
||||
|
||||
DeepSeek's compatibility matrix lists ``thinking`` as supported but
|
||||
``redacted_thinking`` and ``cache_control`` on thinking blocks as not
|
||||
supported. Handling is the same as Kimi's ``/coding`` endpoint: strip
|
||||
Anthropic-signed blocks (DeepSeek can't validate them) but preserve unsigned
|
||||
blocks that Hermes synthesises from ``reasoning_content``.
|
||||
|
||||
See hermes-agent#16748.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestDeepSeekAnthropicPreservesThinking:
|
||||
"""convert_messages_to_anthropic must replay DeepSeek thinking blocks."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"base_url",
|
||||
[
|
||||
"https://api.deepseek.com/anthropic",
|
||||
"https://api.deepseek.com/anthropic/",
|
||||
"https://api.deepseek.com/anthropic/v1",
|
||||
"https://API.DeepSeek.com/anthropic",
|
||||
],
|
||||
)
|
||||
def test_unsigned_thinking_block_survives_replay(self, base_url: str) -> None:
|
||||
"""Unsigned thinking (synthesised from reasoning_content) must be preserved."""
|
||||
from agent.anthropic_adapter import convert_messages_to_anthropic
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"reasoning_content": "planning the tool call",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {"name": "skill_view", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_1", "content": "ok"},
|
||||
]
|
||||
_system, converted = convert_messages_to_anthropic(
|
||||
messages, base_url=base_url
|
||||
)
|
||||
|
||||
assistant_msg = next(m for m in converted if m["role"] == "assistant")
|
||||
thinking_blocks = [
|
||||
b for b in assistant_msg["content"]
|
||||
if isinstance(b, dict) and b.get("type") == "thinking"
|
||||
]
|
||||
assert len(thinking_blocks) == 1, (
|
||||
f"DeepSeek /anthropic ({base_url}) must preserve unsigned thinking "
|
||||
"blocks synthesised from reasoning_content — upstream rejects "
|
||||
"replayed tool-call messages without them."
|
||||
)
|
||||
assert thinking_blocks[0]["thinking"] == "planning the tool call"
|
||||
# Synthesised block — never has a signature
|
||||
assert "signature" not in thinking_blocks[0]
|
||||
|
||||
def test_unsigned_thinking_preserved_on_non_latest_assistant_turn(self) -> None:
|
||||
"""DeepSeek validates history across every prior assistant turn, not just last."""
|
||||
from agent.anthropic_adapter import convert_messages_to_anthropic
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "q1"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"reasoning_content": "r1",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {"name": "f", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_1", "content": "ok"},
|
||||
{"role": "user", "content": "q2"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"reasoning_content": "r2",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_2",
|
||||
"type": "function",
|
||||
"function": {"name": "f", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_2", "content": "ok"},
|
||||
]
|
||||
_system, converted = convert_messages_to_anthropic(
|
||||
messages, base_url="https://api.deepseek.com/anthropic"
|
||||
)
|
||||
|
||||
assistants = [m for m in converted if m["role"] == "assistant"]
|
||||
assert len(assistants) == 2
|
||||
for assistant, expected in zip(assistants, ("r1", "r2")):
|
||||
thinking = [
|
||||
b for b in assistant["content"]
|
||||
if isinstance(b, dict) and b.get("type") == "thinking"
|
||||
]
|
||||
assert len(thinking) == 1
|
||||
assert thinking[0]["thinking"] == expected
|
||||
|
||||
def test_signed_anthropic_thinking_block_is_stripped(self) -> None:
|
||||
"""Anthropic-signed blocks (that leaked through) must still be stripped.
|
||||
|
||||
DeepSeek issues its own signatures and cannot validate Anthropic's —
|
||||
the strip-signed / keep-unsigned split matches the Kimi policy.
|
||||
"""
|
||||
from agent.anthropic_adapter import convert_messages_to_anthropic
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "thinking",
|
||||
"thinking": "anthropic-signed payload",
|
||||
"signature": "anthropic-sig-xyz",
|
||||
},
|
||||
{"type": "text", "text": "hello"},
|
||||
],
|
||||
},
|
||||
{"role": "user", "content": "again"},
|
||||
]
|
||||
_system, converted = convert_messages_to_anthropic(
|
||||
messages, base_url="https://api.deepseek.com/anthropic"
|
||||
)
|
||||
|
||||
assistant_msg = next(m for m in converted if m["role"] == "assistant")
|
||||
thinking_blocks = [
|
||||
b for b in assistant_msg["content"]
|
||||
if isinstance(b, dict) and b.get("type") == "thinking"
|
||||
]
|
||||
assert thinking_blocks == [], (
|
||||
"Signed Anthropic thinking blocks must be stripped on DeepSeek — "
|
||||
"DeepSeek cannot validate Anthropic-proprietary signatures."
|
||||
)
|
||||
|
||||
def test_cache_control_stripped_from_thinking_block(self) -> None:
|
||||
"""cache_control must still be stripped even when the block is preserved.
|
||||
|
||||
DeepSeek's compatibility matrix lists cache_control on thinking blocks
|
||||
as ignored — cache markers interfere with signature validation on
|
||||
upstreams that do check them, so Hermes strips them everywhere.
|
||||
"""
|
||||
from agent.anthropic_adapter import convert_messages_to_anthropic
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"reasoning_content": "r1",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {"name": "f", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_1", "content": "ok"},
|
||||
]
|
||||
# Inject cache_control on the synthesised thinking block after-the-fact
|
||||
# by running conversion once, mutating, then re-running would be
|
||||
# indirect. Instead check the simpler invariant: no thinking block in
|
||||
# the converted output carries cache_control.
|
||||
_system, converted = convert_messages_to_anthropic(
|
||||
messages, base_url="https://api.deepseek.com/anthropic"
|
||||
)
|
||||
for m in converted:
|
||||
if not isinstance(m.get("content"), list):
|
||||
continue
|
||||
for b in m["content"]:
|
||||
if isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"):
|
||||
assert "cache_control" not in b
|
||||
|
||||
def test_openai_compat_deepseek_base_is_not_matched(self) -> None:
|
||||
"""The OpenAI-compatible ``api.deepseek.com`` base must NOT trigger the
|
||||
DeepSeek /anthropic branch — it never reaches this adapter, but the
|
||||
detector should still fail closed so an accidental misuse doesn't
|
||||
quietly send signed Anthropic blocks to an OpenAI endpoint.
|
||||
"""
|
||||
from agent.anthropic_adapter import _is_deepseek_anthropic_endpoint
|
||||
|
||||
assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com") is False
|
||||
assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/v1") is False
|
||||
assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic") is True
|
||||
assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic/v1") is True
|
||||
|
||||
def test_non_deepseek_third_party_still_strips_all_thinking(self) -> None:
|
||||
"""MiniMax and other third-party Anthropic endpoints must keep the
|
||||
generic strip-all behaviour (they reject unsigned blocks outright).
|
||||
"""
|
||||
from agent.anthropic_adapter import convert_messages_to_anthropic
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"reasoning_content": "r1",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {"name": "f", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_1", "content": "ok"},
|
||||
]
|
||||
_system, converted = convert_messages_to_anthropic(
|
||||
messages, base_url="https://api.minimax.io/anthropic"
|
||||
)
|
||||
assistant_msg = next(m for m in converted if m["role"] == "assistant")
|
||||
thinking_blocks = [
|
||||
b for b in assistant_msg["content"]
|
||||
if isinstance(b, dict) and b.get("type") == "thinking"
|
||||
]
|
||||
assert thinking_blocks == [], (
|
||||
"Non-DeepSeek third-party endpoints must keep the generic "
|
||||
"strip-all-thinking behaviour — unsigned blocks get rejected."
|
||||
)
|
||||
Loading…
Reference in New Issue
Block a user