fix(bedrock): send context-1m-2025-08-07 beta so Opus 4.6/4.7 get 1M context (#16793)
On AWS Bedrock (and Azure AI Foundry), Claude Opus 4.6/4.7 and Sonnet 4.6 are capped at 200K context unless the request carries the `context-1m-2025-08-07` beta header. On native Anthropic (api.anthropic.com) 1M went GA so the header is a harmless no-op, but Bedrock/Azure still gate it as beta as of 2026-04. Hermes was advertising 1M in model_metadata.py (`claude-opus-4-7: 1000000`) while silently sending a request without the beta — so Bedrock users saw a 200K ceiling with no error message, and no config knob unblocked it. Claude Code sends this header by default, which is why the same Bedrock credentials worked there. - Add `context-1m-2025-08-07` to `_COMMON_BETAS` (alongside interleaved thinking and fine-grained tool streaming). - Strip it in `_common_betas_for_base_url` for MiniMax bearer-auth endpoints — they host their own models, not Claude, so Anthropic beta headers are irrelevant and could risk rejection. - Attach `_COMMON_BETAS` as `default_headers` on the AnthropicBedrock client. Previously that constructor passed no betas at all, so native Anthropic had the 1M unlock via default_headers but Bedrock didn't. - Fast-mode per-request `extra_headers` already rebuilds from `_common_betas_for_base_url`, so it picks up the 1M beta automatically. Reported by user 'Rodmar' on Discord: Bedrock Opus 4.7 stuck at 200K while same credentials worked in Claude Code.
This commit is contained in:
parent
461ef88705
commit
a7cdd4133c
@ -202,19 +202,33 @@ def _forbids_sampling_params(model: str) -> bool:
|
||||
|
||||
|
||||
# Beta headers for enhanced features (sent with ALL auth types).
|
||||
# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
|
||||
# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
|
||||
# beta headers are still accepted (harmless no-op) but not required. Kept
|
||||
# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
|
||||
# that still gate on the headers continue to get the enhanced features.
|
||||
# Migration guide: remove these if you no longer support ≤4.5 models.
|
||||
#
|
||||
# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
|
||||
# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
|
||||
# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
|
||||
# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
|
||||
# at 200K even though model_metadata.py advertises 1M. The header is a harmless
|
||||
# no-op on endpoints where 1M is GA.
|
||||
#
|
||||
# Migration guide: remove these if you no longer support ≤4.5 models or once
|
||||
# Bedrock/Azure promote 1M to GA.
|
||||
_COMMON_BETAS = [
|
||||
"interleaved-thinking-2025-05-14",
|
||||
"fine-grained-tool-streaming-2025-05-14",
|
||||
"context-1m-2025-08-07",
|
||||
]
|
||||
# MiniMax's Anthropic-compatible endpoints fail tool-use requests when
|
||||
# the fine-grained tool streaming beta is present. Omit it so tool calls
|
||||
# fall back to the provider's default response path.
|
||||
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
||||
# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
|
||||
# Bearer-auth (MiniMax) endpoints since they host their own models and
|
||||
# unknown Anthropic beta headers risk request rejection.
|
||||
_CONTEXT_1M_BETA = "context-1m-2025-08-07"
|
||||
|
||||
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
|
||||
# significantly higher output token throughput on Opus 4.6 (~2.5x).
|
||||
@ -357,9 +371,14 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
that include Anthropic's ``fine-grained-tool-streaming`` beta — every
|
||||
tool-use message triggers a connection error. Strip that beta for
|
||||
Bearer-auth endpoints while keeping all other betas intact.
|
||||
|
||||
The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
|
||||
endpoints — MiniMax hosts its own models, not Claude, so the header is
|
||||
irrelevant at best and risks request rejection at worst.
|
||||
"""
|
||||
if _requires_bearer_auth(base_url):
|
||||
return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
|
||||
_stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
|
||||
return [b for b in _COMMON_BETAS if b not in _stripped]
|
||||
return _COMMON_BETAS
|
||||
|
||||
|
||||
@ -456,6 +475,13 @@ def build_anthropic_bedrock_client(region: str):
|
||||
Claude feature parity: prompt caching, thinking budgets, adaptive
|
||||
thinking, fast mode — features not available via the Converse API.
|
||||
|
||||
Attaches the common Anthropic beta headers as client-level defaults so
|
||||
that Bedrock-hosted Claude models get the same enhanced features as
|
||||
native Anthropic. The ``context-1m-2025-08-07`` beta in particular
|
||||
unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without
|
||||
it, Bedrock caps these models at 200K even though the Anthropic API
|
||||
serves them with 1M natively.
|
||||
|
||||
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
|
||||
"""
|
||||
if _anthropic_sdk is None:
|
||||
@ -473,6 +499,7 @@ def build_anthropic_bedrock_client(region: str):
|
||||
return _anthropic_sdk.AnthropicBedrock(
|
||||
aws_region=region,
|
||||
timeout=Timeout(timeout=900.0, connect=10.0),
|
||||
default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
|
||||
)
|
||||
|
||||
|
||||
|
||||
105
tests/agent/test_bedrock_1m_context.py
Normal file
105
tests/agent/test_bedrock_1m_context.py
Normal file
@ -0,0 +1,105 @@
|
||||
"""Tests for the 1M-context beta header on AWS Bedrock Claude models.
|
||||
|
||||
Claude Opus 4.6/4.7 and Sonnet 4.6 support a 1M context window, but on AWS
|
||||
Bedrock (and Azure AI Foundry) that window is still gated behind the
|
||||
``context-1m-2025-08-07`` beta header as of 2026-04. Without it, Bedrock
|
||||
caps these models at 200K even though ``model_metadata.py`` advertises 1M.
|
||||
|
||||
These tests guard the invariant that the header is always emitted on the
|
||||
Bedrock client path, and that it survives the MiniMax bearer-auth strip.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
class TestBedrockContext1MBeta:
|
||||
"""``context-1m-2025-08-07`` must reach Bedrock Claude requests."""
|
||||
|
||||
def test_common_betas_includes_1m(self):
|
||||
from agent.anthropic_adapter import _COMMON_BETAS, _CONTEXT_1M_BETA
|
||||
|
||||
assert _CONTEXT_1M_BETA == "context-1m-2025-08-07"
|
||||
assert _CONTEXT_1M_BETA in _COMMON_BETAS
|
||||
|
||||
def test_common_betas_for_native_anthropic_includes_1m(self):
|
||||
"""Native Anthropic endpoints (and Bedrock with empty base_url) get 1M."""
|
||||
from agent.anthropic_adapter import (
|
||||
_common_betas_for_base_url,
|
||||
_CONTEXT_1M_BETA,
|
||||
)
|
||||
|
||||
assert _CONTEXT_1M_BETA in _common_betas_for_base_url(None)
|
||||
assert _CONTEXT_1M_BETA in _common_betas_for_base_url("")
|
||||
assert _CONTEXT_1M_BETA in _common_betas_for_base_url(
|
||||
"https://api.anthropic.com"
|
||||
)
|
||||
|
||||
def test_common_betas_strips_1m_for_minimax(self):
|
||||
"""MiniMax bearer-auth endpoints host their own models — strip 1M beta."""
|
||||
from agent.anthropic_adapter import (
|
||||
_common_betas_for_base_url,
|
||||
_CONTEXT_1M_BETA,
|
||||
)
|
||||
|
||||
for url in (
|
||||
"https://api.minimax.io/anthropic",
|
||||
"https://api.minimaxi.com/anthropic",
|
||||
):
|
||||
betas = _common_betas_for_base_url(url)
|
||||
assert _CONTEXT_1M_BETA not in betas, (
|
||||
f"1M beta must be stripped for MiniMax bearer endpoint {url}"
|
||||
)
|
||||
# Other betas still present
|
||||
assert "interleaved-thinking-2025-05-14" in betas
|
||||
|
||||
def test_build_anthropic_bedrock_client_sends_1m_beta(self):
|
||||
"""AnthropicBedrock client must carry the 1M beta in default_headers.
|
||||
|
||||
This is the load-bearing assertion for the reported bug:
|
||||
without this header Bedrock serves Opus 4.6/4.7 with a 200K cap.
|
||||
"""
|
||||
import agent.anthropic_adapter as adapter
|
||||
|
||||
fake_sdk = MagicMock()
|
||||
fake_sdk.AnthropicBedrock = MagicMock()
|
||||
|
||||
with patch.object(adapter, "_anthropic_sdk", fake_sdk):
|
||||
adapter.build_anthropic_bedrock_client(region="us-west-2")
|
||||
|
||||
call_kwargs = fake_sdk.AnthropicBedrock.call_args.kwargs
|
||||
assert call_kwargs["aws_region"] == "us-west-2"
|
||||
|
||||
default_headers = call_kwargs.get("default_headers") or {}
|
||||
beta_header = default_headers.get("anthropic-beta", "")
|
||||
assert "context-1m-2025-08-07" in beta_header, (
|
||||
"Bedrock client must send context-1m-2025-08-07 or Opus 4.6/4.7 "
|
||||
"silently caps at 200K context"
|
||||
)
|
||||
# Other common betas still present — no regression.
|
||||
assert "interleaved-thinking-2025-05-14" in beta_header
|
||||
assert "fine-grained-tool-streaming-2025-05-14" in beta_header
|
||||
|
||||
def test_build_anthropic_kwargs_includes_1m_for_bedrock_fastmode(self):
|
||||
"""Fast-mode requests (per-request extra_headers) still include 1M beta.
|
||||
|
||||
Per-request extra_headers override client-level default_headers, so
|
||||
the fast-mode path must re-include everything in _COMMON_BETAS.
|
||||
"""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-opus-4-7",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=None,
|
||||
max_tokens=1024,
|
||||
reasoning_config=None,
|
||||
is_oauth=False,
|
||||
# Empty base_url mirrors AnthropicBedrock (no HTTP base URL)
|
||||
base_url=None,
|
||||
fast_mode=True,
|
||||
)
|
||||
beta_header = kwargs.get("extra_headers", {}).get("anthropic-beta", "")
|
||||
assert "context-1m-2025-08-07" in beta_header, (
|
||||
"fast-mode extra_headers must carry the 1M beta or it overrides "
|
||||
"client-level default_headers and Bedrock drops back to 200K"
|
||||
)
|
||||
Loading…
Reference in New Issue
Block a user