fix(bedrock): send context-1m-2025-08-07 beta so Opus 4.6/4.7 get 1M context (#16793)

On AWS Bedrock (and Azure AI Foundry), Claude Opus 4.6/4.7 and Sonnet 4.6
are capped at 200K context unless the request carries the
`context-1m-2025-08-07` beta header. On native Anthropic (api.anthropic.com)
1M went GA so the header is a harmless no-op, but Bedrock/Azure still gate
it as beta as of 2026-04.

Hermes was advertising 1M in model_metadata.py (`claude-opus-4-7: 1000000`)
while silently sending a request without the beta — so Bedrock users saw
a 200K ceiling with no error message, and no config knob unblocked it.
Claude Code sends this header by default, which is why the same Bedrock
credentials worked there.

- Add `context-1m-2025-08-07` to `_COMMON_BETAS` (alongside interleaved
  thinking and fine-grained tool streaming).
- Strip it in `_common_betas_for_base_url` for MiniMax bearer-auth
  endpoints — they host their own models, not Claude, so Anthropic beta
  headers are irrelevant and could risk rejection.
- Attach `_COMMON_BETAS` as `default_headers` on the AnthropicBedrock
  client. Previously that constructor passed no betas at all, so native
  Anthropic had the 1M unlock via default_headers but Bedrock didn't.
- Fast-mode per-request `extra_headers` already rebuilds from
  `_common_betas_for_base_url`, so it picks up the 1M beta automatically.

Reported by user 'Rodmar' on Discord: Bedrock Opus 4.7 stuck at 200K while
same credentials worked in Claude Code.
This commit is contained in:
Teknium 2026-04-27 20:41:36 -07:00 committed by GitHub
parent 461ef88705
commit a7cdd4133c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 135 additions and 3 deletions

View File

@ -202,19 +202,33 @@ def _forbids_sampling_params(model: str) -> bool:
# Beta headers for enhanced features (sent with ALL auth types).
# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
# beta headers are still accepted (harmless no-op) but not required. Kept
# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
# that still gate on the headers continue to get the enhanced features.
# Migration guide: remove these if you no longer support ≤4.5 models.
#
# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
# at 200K even though model_metadata.py advertises 1M. The header is a harmless
# no-op on endpoints where 1M is GA.
#
# Migration guide: remove these if you no longer support ≤4.5 models or once
# Bedrock/Azure promote 1M to GA.
_COMMON_BETAS = [
"interleaved-thinking-2025-05-14",
"fine-grained-tool-streaming-2025-05-14",
"context-1m-2025-08-07",
]
# MiniMax's Anthropic-compatible endpoints fail tool-use requests when
# the fine-grained tool streaming beta is present. Omit it so tool calls
# fall back to the provider's default response path.
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
# Bearer-auth (MiniMax) endpoints since they host their own models and
# unknown Anthropic beta headers risk request rejection.
_CONTEXT_1M_BETA = "context-1m-2025-08-07"
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
# significantly higher output token throughput on Opus 4.6 (~2.5x).
@ -357,9 +371,14 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
that include Anthropic's ``fine-grained-tool-streaming`` beta — every
tool-use message triggers a connection error. Strip that beta for
Bearer-auth endpoints while keeping all other betas intact.
The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
endpoints MiniMax hosts its own models, not Claude, so the header is
irrelevant at best and risks request rejection at worst.
"""
if _requires_bearer_auth(base_url):
return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
_stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
return [b for b in _COMMON_BETAS if b not in _stripped]
return _COMMON_BETAS
@ -456,6 +475,13 @@ def build_anthropic_bedrock_client(region: str):
Claude feature parity: prompt caching, thinking budgets, adaptive
thinking, fast mode features not available via the Converse API.
Attaches the common Anthropic beta headers as client-level defaults so
that Bedrock-hosted Claude models get the same enhanced features as
native Anthropic. The ``context-1m-2025-08-07`` beta in particular
unlocks the 1M context window for Opus 4.6/4.7 on Bedrock without
it, Bedrock caps these models at 200K even though the Anthropic API
serves them with 1M natively.
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
"""
if _anthropic_sdk is None:
@ -473,6 +499,7 @@ def build_anthropic_bedrock_client(region: str):
return _anthropic_sdk.AnthropicBedrock(
aws_region=region,
timeout=Timeout(timeout=900.0, connect=10.0),
default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
)

View File

@ -0,0 +1,105 @@
"""Tests for the 1M-context beta header on AWS Bedrock Claude models.
Claude Opus 4.6/4.7 and Sonnet 4.6 support a 1M context window, but on AWS
Bedrock (and Azure AI Foundry) that window is still gated behind the
``context-1m-2025-08-07`` beta header as of 2026-04. Without it, Bedrock
caps these models at 200K even though ``model_metadata.py`` advertises 1M.
These tests guard the invariant that the header is always emitted on the
Bedrock client path, and that it survives the MiniMax bearer-auth strip.
"""
from unittest.mock import MagicMock, patch
class TestBedrockContext1MBeta:
"""``context-1m-2025-08-07`` must reach Bedrock Claude requests."""
def test_common_betas_includes_1m(self):
from agent.anthropic_adapter import _COMMON_BETAS, _CONTEXT_1M_BETA
assert _CONTEXT_1M_BETA == "context-1m-2025-08-07"
assert _CONTEXT_1M_BETA in _COMMON_BETAS
def test_common_betas_for_native_anthropic_includes_1m(self):
"""Native Anthropic endpoints (and Bedrock with empty base_url) get 1M."""
from agent.anthropic_adapter import (
_common_betas_for_base_url,
_CONTEXT_1M_BETA,
)
assert _CONTEXT_1M_BETA in _common_betas_for_base_url(None)
assert _CONTEXT_1M_BETA in _common_betas_for_base_url("")
assert _CONTEXT_1M_BETA in _common_betas_for_base_url(
"https://api.anthropic.com"
)
def test_common_betas_strips_1m_for_minimax(self):
"""MiniMax bearer-auth endpoints host their own models — strip 1M beta."""
from agent.anthropic_adapter import (
_common_betas_for_base_url,
_CONTEXT_1M_BETA,
)
for url in (
"https://api.minimax.io/anthropic",
"https://api.minimaxi.com/anthropic",
):
betas = _common_betas_for_base_url(url)
assert _CONTEXT_1M_BETA not in betas, (
f"1M beta must be stripped for MiniMax bearer endpoint {url}"
)
# Other betas still present
assert "interleaved-thinking-2025-05-14" in betas
def test_build_anthropic_bedrock_client_sends_1m_beta(self):
"""AnthropicBedrock client must carry the 1M beta in default_headers.
This is the load-bearing assertion for the reported bug:
without this header Bedrock serves Opus 4.6/4.7 with a 200K cap.
"""
import agent.anthropic_adapter as adapter
fake_sdk = MagicMock()
fake_sdk.AnthropicBedrock = MagicMock()
with patch.object(adapter, "_anthropic_sdk", fake_sdk):
adapter.build_anthropic_bedrock_client(region="us-west-2")
call_kwargs = fake_sdk.AnthropicBedrock.call_args.kwargs
assert call_kwargs["aws_region"] == "us-west-2"
default_headers = call_kwargs.get("default_headers") or {}
beta_header = default_headers.get("anthropic-beta", "")
assert "context-1m-2025-08-07" in beta_header, (
"Bedrock client must send context-1m-2025-08-07 or Opus 4.6/4.7 "
"silently caps at 200K context"
)
# Other common betas still present — no regression.
assert "interleaved-thinking-2025-05-14" in beta_header
assert "fine-grained-tool-streaming-2025-05-14" in beta_header
def test_build_anthropic_kwargs_includes_1m_for_bedrock_fastmode(self):
"""Fast-mode requests (per-request extra_headers) still include 1M beta.
Per-request extra_headers override client-level default_headers, so
the fast-mode path must re-include everything in _COMMON_BETAS.
"""
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="claude-opus-4-7",
messages=[{"role": "user", "content": "hi"}],
tools=None,
max_tokens=1024,
reasoning_config=None,
is_oauth=False,
# Empty base_url mirrors AnthropicBedrock (no HTTP base URL)
base_url=None,
fast_mode=True,
)
beta_header = kwargs.get("extra_headers", {}).get("anthropic-beta", "")
assert "context-1m-2025-08-07" in beta_header, (
"fast-mode extra_headers must carry the 1M beta or it overrides "
"client-level default_headers and Bedrock drops back to 200K"
)