diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index af358a2d..d7d955b6 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -202,19 +202,33 @@ def _forbids_sampling_params(model: str) -> bool: # Beta headers for enhanced features (sent with ALL auth types). -# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the +# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the # beta headers are still accepted (harmless no-op) but not required. Kept # here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints # that still gate on the headers continue to get the enhanced features. -# Migration guide: remove these if you no longer support ≤4.5 models. +# +# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7 +# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on +# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still +# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus +# at 200K even though model_metadata.py advertises 1M. The header is a harmless +# no-op on endpoints where 1M is GA. +# +# Migration guide: remove these if you no longer support ≤4.5 models or once +# Bedrock/Azure promote 1M to GA. _COMMON_BETAS = [ "interleaved-thinking-2025-05-14", "fine-grained-tool-streaming-2025-05-14", + "context-1m-2025-08-07", ] # MiniMax's Anthropic-compatible endpoints fail tool-use requests when # the fine-grained tool streaming beta is present. Omit it so tool calls # fall back to the provider's default response path. _TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14" +# 1M context beta — see comment on _COMMON_BETAS above. Stripped for +# Bearer-auth (MiniMax) endpoints since they host their own models and +# unknown Anthropic beta headers risk request rejection. +_CONTEXT_1M_BETA = "context-1m-2025-08-07" # Fast mode beta — enables the ``speed: "fast"`` request parameter for # significantly higher output token throughput on Opus 4.6 (~2.5x). @@ -357,9 +371,14 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]: that include Anthropic's ``fine-grained-tool-streaming`` beta — every tool-use message triggers a connection error. Strip that beta for Bearer-auth endpoints while keeping all other betas intact. + + The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth + endpoints — MiniMax hosts its own models, not Claude, so the header is + irrelevant at best and risks request rejection at worst. """ if _requires_bearer_auth(base_url): - return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA] + _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA} + return [b for b in _COMMON_BETAS if b not in _stripped] return _COMMON_BETAS @@ -456,6 +475,13 @@ def build_anthropic_bedrock_client(region: str): Claude feature parity: prompt caching, thinking budgets, adaptive thinking, fast mode — features not available via the Converse API. + Attaches the common Anthropic beta headers as client-level defaults so + that Bedrock-hosted Claude models get the same enhanced features as + native Anthropic. The ``context-1m-2025-08-07`` beta in particular + unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without + it, Bedrock caps these models at 200K even though the Anthropic API + serves them with 1M natively. + Auth uses the boto3 default credential chain (IAM roles, SSO, env vars). """ if _anthropic_sdk is None: @@ -473,6 +499,7 @@ def build_anthropic_bedrock_client(region: str): return _anthropic_sdk.AnthropicBedrock( aws_region=region, timeout=Timeout(timeout=900.0, connect=10.0), + default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)}, ) diff --git a/tests/agent/test_bedrock_1m_context.py b/tests/agent/test_bedrock_1m_context.py new file mode 100644 index 00000000..988fafed --- /dev/null +++ b/tests/agent/test_bedrock_1m_context.py @@ -0,0 +1,105 @@ +"""Tests for the 1M-context beta header on AWS Bedrock Claude models. + +Claude Opus 4.6/4.7 and Sonnet 4.6 support a 1M context window, but on AWS +Bedrock (and Azure AI Foundry) that window is still gated behind the +``context-1m-2025-08-07`` beta header as of 2026-04. Without it, Bedrock +caps these models at 200K even though ``model_metadata.py`` advertises 1M. + +These tests guard the invariant that the header is always emitted on the +Bedrock client path, and that it survives the MiniMax bearer-auth strip. +""" + +from unittest.mock import MagicMock, patch + + +class TestBedrockContext1MBeta: + """``context-1m-2025-08-07`` must reach Bedrock Claude requests.""" + + def test_common_betas_includes_1m(self): + from agent.anthropic_adapter import _COMMON_BETAS, _CONTEXT_1M_BETA + + assert _CONTEXT_1M_BETA == "context-1m-2025-08-07" + assert _CONTEXT_1M_BETA in _COMMON_BETAS + + def test_common_betas_for_native_anthropic_includes_1m(self): + """Native Anthropic endpoints (and Bedrock with empty base_url) get 1M.""" + from agent.anthropic_adapter import ( + _common_betas_for_base_url, + _CONTEXT_1M_BETA, + ) + + assert _CONTEXT_1M_BETA in _common_betas_for_base_url(None) + assert _CONTEXT_1M_BETA in _common_betas_for_base_url("") + assert _CONTEXT_1M_BETA in _common_betas_for_base_url( + "https://api.anthropic.com" + ) + + def test_common_betas_strips_1m_for_minimax(self): + """MiniMax bearer-auth endpoints host their own models — strip 1M beta.""" + from agent.anthropic_adapter import ( + _common_betas_for_base_url, + _CONTEXT_1M_BETA, + ) + + for url in ( + "https://api.minimax.io/anthropic", + "https://api.minimaxi.com/anthropic", + ): + betas = _common_betas_for_base_url(url) + assert _CONTEXT_1M_BETA not in betas, ( + f"1M beta must be stripped for MiniMax bearer endpoint {url}" + ) + # Other betas still present + assert "interleaved-thinking-2025-05-14" in betas + + def test_build_anthropic_bedrock_client_sends_1m_beta(self): + """AnthropicBedrock client must carry the 1M beta in default_headers. + + This is the load-bearing assertion for the reported bug: + without this header Bedrock serves Opus 4.6/4.7 with a 200K cap. + """ + import agent.anthropic_adapter as adapter + + fake_sdk = MagicMock() + fake_sdk.AnthropicBedrock = MagicMock() + + with patch.object(adapter, "_anthropic_sdk", fake_sdk): + adapter.build_anthropic_bedrock_client(region="us-west-2") + + call_kwargs = fake_sdk.AnthropicBedrock.call_args.kwargs + assert call_kwargs["aws_region"] == "us-west-2" + + default_headers = call_kwargs.get("default_headers") or {} + beta_header = default_headers.get("anthropic-beta", "") + assert "context-1m-2025-08-07" in beta_header, ( + "Bedrock client must send context-1m-2025-08-07 or Opus 4.6/4.7 " + "silently caps at 200K context" + ) + # Other common betas still present — no regression. + assert "interleaved-thinking-2025-05-14" in beta_header + assert "fine-grained-tool-streaming-2025-05-14" in beta_header + + def test_build_anthropic_kwargs_includes_1m_for_bedrock_fastmode(self): + """Fast-mode requests (per-request extra_headers) still include 1M beta. + + Per-request extra_headers override client-level default_headers, so + the fast-mode path must re-include everything in _COMMON_BETAS. + """ + from agent.anthropic_adapter import build_anthropic_kwargs + + kwargs = build_anthropic_kwargs( + model="claude-opus-4-7", + messages=[{"role": "user", "content": "hi"}], + tools=None, + max_tokens=1024, + reasoning_config=None, + is_oauth=False, + # Empty base_url mirrors AnthropicBedrock (no HTTP base URL) + base_url=None, + fast_mode=True, + ) + beta_header = kwargs.get("extra_headers", {}).get("anthropic-beta", "") + assert "context-1m-2025-08-07" in beta_header, ( + "fast-mode extra_headers must carry the 1M beta or it overrides " + "client-level default_headers and Bedrock drops back to 200K" + )