fix(anthropic): reactive recovery for OAuth 1M-context beta rejection (#17752)

Keep context-1m-2025-08-07 in OAuth requests by default so 1M-capable
subscriptions retain full context. When Anthropic rejects a request with
400 'long context beta is not yet available for this subscription',
disable the beta for the rest of the session, rebuild the client, and
retry once.

Addresses #17680 (thanks @JayGwod for the clean reproduction) without
forcing every OAuth user off the 1M context window.

Changes:
- agent/error_classifier.py: new FailoverReason.oauth_long_context_beta_forbidden;
  pattern matches 400 + 'long context beta' + 'not yet available'. Narrow
  enough that the existing 429 tier-gate pattern keeps its own reason.
- agent/anthropic_adapter.py: _common_betas_for_base_url,
  build_anthropic_client, build_anthropic_kwargs gain drop_context_1m_beta
  kwarg. Default=False (1M stays). OAuth OAUTH_ONLY_BETAS unchanged.
- agent/transports/anthropic.py: build_kwargs forwards the flag.
- run_agent.py: self._oauth_1m_beta_disabled flag, retry-once guard,
  recovery branch next to the image-shrink path. _rebuild_anthropic_client
  honors the flag. The main build_kwargs call site threads it through for
  fast-mode extra_headers.
- hermes_cli/doctor.py, hermes_cli/models.py: sibling OAuth /v1/models
  probes get the same reactive retry — previously they'd falsely report
  the Anthropic API as unreachable for affected subscriptions.

Tests: 2190 tests/agent/ + 94 adjacent integration tests pass. New unit
tests cover the classifier pattern (including the collision guard against
the 429 tier-gate) and the drop_context_1m_beta adapter behavior (default
keeps 1M, flag strips only 1M while preserving every other beta).
This commit is contained in:
Teknium 2026-04-29 21:56:54 -07:00 committed by GitHub
parent 4d363499db
commit 828d3a320b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 264 additions and 23 deletions

View File

@ -461,7 +461,11 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
def _common_betas_for_base_url(base_url: str | None) -> list[str]:
def _common_betas_for_base_url(
base_url: str | None,
*,
drop_context_1m_beta: bool = False,
) -> list[str]:
"""Return the beta headers that are safe for the configured endpoint.
MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
@ -472,14 +476,30 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
endpoints MiniMax hosts its own models, not Claude, so the header is
irrelevant at best and risks request rejection at worst.
``drop_context_1m_beta=True`` additionally strips the 1M-context beta on
otherwise-unrelated endpoints. The OAuth retry path flips this flag after
a subscription rejects the beta with
"The long context beta is not yet available for this subscription" so
subsequent requests in the same session don't repeat the probe. See the
reactive recovery loop in ``run_agent.py`` and issue-comment history on
PR #17680 for the full rationale.
"""
if _requires_bearer_auth(base_url):
_stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
return [b for b in _COMMON_BETAS if b not in _stripped]
if drop_context_1m_beta:
return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
return _COMMON_BETAS
def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
def build_anthropic_client(
api_key: str,
base_url: str = None,
timeout: float = None,
*,
drop_context_1m_beta: bool = False,
):
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
If *timeout* is provided it overrides the default 900s read timeout. The
@ -488,6 +508,12 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
Anthropic-compatible providers respect the same knob as OpenAI-wire
providers.
``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the
client-level ``anthropic-beta`` header. Used by the reactive OAuth retry
path in ``run_agent.py`` when a subscription rejects the beta; leave at
its default on fresh clients so 1M-capable subscriptions keep the
capability.
Returns an anthropic.Anthropic instance.
"""
_anthropic_sdk = _get_anthropic_sdk()
@ -517,7 +543,10 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
kwargs["default_query"] = {"api-version": "2025-04-15"}
else:
kwargs["base_url"] = normalized_base_url
common_betas = _common_betas_for_base_url(normalized_base_url)
common_betas = _common_betas_for_base_url(
normalized_base_url,
drop_context_1m_beta=drop_context_1m_beta,
)
if _is_kimi_coding_endpoint(base_url):
# Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
@ -1689,6 +1718,7 @@ def build_anthropic_kwargs(
context_length: Optional[int] = None,
base_url: str | None = None,
fast_mode: bool = False,
drop_context_1m_beta: bool = False,
) -> Dict[str, Any]:
"""Build kwargs for anthropic.messages.create().
@ -1877,7 +1907,10 @@ def build_anthropic_kwargs(
kwargs.setdefault("extra_body", {})["speed"] = "fast"
# Build extra_headers with ALL applicable betas (the per-request
# extra_headers override the client-level anthropic-beta header).
betas = list(_common_betas_for_base_url(base_url))
betas = list(_common_betas_for_base_url(
base_url,
drop_context_1m_beta=drop_context_1m_beta,
))
if is_oauth:
betas.extend(_OAUTH_ONLY_BETAS)
betas.append(_FAST_MODE_BETA)

View File

@ -54,6 +54,7 @@ class FailoverReason(enum.Enum):
# Provider-specific
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
long_context_tier = "long_context_tier" # Anthropic "extra usage" tier gate
oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden" # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry
# Catch-all
unknown = "unknown" # Unclassifiable — retry with backoff
@ -450,6 +451,25 @@ def classify_api_error(
should_compress=True,
)
# Anthropic OAuth subscription rejects the 1M-context beta header.
# Observed error body: "The long context beta is not yet available for
# this subscription." Returned as HTTP 400 from native Anthropic when
# the subscription doesn't include 1M context, even though the request
# carries ``anthropic-beta: context-1m-2025-08-07``. The recovery path
# in run_agent.py rebuilds the Anthropic client with the beta stripped
# and retries once. Pattern is narrow enough that it won't collide with
# the 429 tier-gate pattern above (different status, different phrase).
if (
status_code == 400
and "long context beta" in error_msg
and "not yet available" in error_msg
):
return _result(
FailoverReason.oauth_long_context_beta_forbidden,
retryable=True,
should_compress=False,
)
# ── 2. HTTP status code classification ──────────────────────────
if status_code is not None:

View File

@ -58,6 +58,7 @@ class AnthropicTransport(ProviderTransport):
context_length: int | None
base_url: str | None
fast_mode: bool
drop_context_1m_beta: bool
"""
from agent.anthropic_adapter import build_anthropic_kwargs
@ -73,6 +74,7 @@ class AnthropicTransport(ProviderTransport):
context_length=params.get("context_length"),
base_url=params.get("base_url"),
fast_mode=params.get("fast_mode", False),
drop_context_1m_beta=params.get("drop_context_1m_beta", False),
)
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:

View File

@ -1023,10 +1023,16 @@ def run_doctor(args):
print(" Checking Anthropic API...", end="", flush=True)
try:
import httpx
from agent.anthropic_adapter import _is_oauth_token, _COMMON_BETAS, _OAUTH_ONLY_BETAS
from agent.anthropic_adapter import (
_is_oauth_token,
_COMMON_BETAS,
_OAUTH_ONLY_BETAS,
_CONTEXT_1M_BETA,
)
headers = {"anthropic-version": "2023-06-01"}
if _is_oauth_token(anthropic_key):
is_oauth = _is_oauth_token(anthropic_key)
if is_oauth:
headers["Authorization"] = f"Bearer {anthropic_key}"
headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
else:
@ -1036,6 +1042,25 @@ def run_doctor(args):
headers=headers,
timeout=10
)
# Reactive recovery: OAuth subscriptions that don't include 1M
# context reject the request with 400 "long context beta is not
# yet available for this subscription". Retry once with that
# beta stripped so the doctor check doesn't falsely report the
# Anthropic API as unreachable for those users.
if (
is_oauth
and response.status_code == 400
and "long context beta" in response.text.lower()
and "not yet available" in response.text.lower()
):
headers["anthropic-beta"] = ",".join(
[b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] + list(_OAUTH_ONLY_BETAS)
)
response = httpx.get(
"https://api.anthropic.com/v1/models",
headers=headers,
timeout=10,
)
if response.status_code == 200:
print(f"\r {color('', Colors.GREEN)} Anthropic API ")
elif response.status_code == 401:

View File

@ -2034,28 +2034,56 @@ def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
return None
headers: dict[str, str] = {"anthropic-version": "2023-06-01"}
if _is_oauth_token(token):
is_oauth = _is_oauth_token(token)
if is_oauth:
headers["Authorization"] = f"Bearer {token}"
from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS
from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS, _CONTEXT_1M_BETA
headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
else:
headers["x-api-key"] = token
req = urllib.request.Request(
"https://api.anthropic.com/v1/models",
headers=headers,
)
try:
def _do_request(h: dict[str, str]):
req = urllib.request.Request(
"https://api.anthropic.com/v1/models",
headers=h,
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode())
models = [m["id"] for m in data.get("data", []) if m.get("id")]
# Sort: latest/largest first (opus > sonnet > haiku, higher version first)
return sorted(models, key=lambda m: (
"opus" not in m, # opus first
"sonnet" not in m, # then sonnet
"haiku" not in m, # then haiku
m, # alphabetical within tier
))
return json.loads(resp.read().decode())
try:
try:
data = _do_request(headers)
except urllib.error.HTTPError as http_err:
# Reactive recovery for OAuth subscriptions that reject the 1M
# context beta with 400 "long context beta is not yet available
# for this subscription". Retry once without the beta; re-raise
# anything else so the outer except logs it.
if (
is_oauth
and http_err.code == 400
):
try:
body_text = http_err.read().decode(errors="ignore").lower()
except Exception:
body_text = ""
if "long context beta" in body_text and "not yet available" in body_text:
headers["anthropic-beta"] = ",".join(
[b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
+ list(_OAUTH_ONLY_BETAS)
)
data = _do_request(headers)
else:
raise
else:
raise
models = [m["id"] for m in data.get("data", []) if m.get("id")]
# Sort: latest/largest first (opus > sonnet > haiku, higher version first)
return sorted(models, key=lambda m: (
"opus" not in m, # opus first
"sonnet" not in m, # then sonnet
"haiku" not in m, # then haiku
m, # alphabetical within tier
))
except Exception as e:
import logging
logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e)

View File

@ -6210,7 +6210,12 @@ class AIAgent:
correctly rebuilding with the Bedrock SDK when provider is bedrock,
rather than always falling back to build_anthropic_client() which
requires a direct Anthropic API key.
Honors ``self._oauth_1m_beta_disabled`` (set by the reactive recovery
path when an OAuth subscription rejects the 1M-context beta) so the
rebuilt client carries the reduced beta set.
"""
_drop_1m = bool(getattr(self, "_oauth_1m_beta_disabled", False))
if getattr(self, "provider", None) == "bedrock":
from agent.anthropic_adapter import build_anthropic_bedrock_client
region = getattr(self, "_bedrock_region", "us-east-1") or "us-east-1"
@ -6221,6 +6226,7 @@ class AIAgent:
self._anthropic_api_key,
getattr(self, "_anthropic_base_url", None),
timeout=get_provider_request_timeout(self.provider, self.model),
drop_context_1m_beta=_drop_1m,
)
def _interruptible_api_call(self, api_kwargs: dict):
@ -8167,6 +8173,7 @@ class AIAgent:
context_length=ctx_len,
base_url=getattr(self, "_anthropic_base_url", None),
fast_mode=(self.request_overrides or {}).get("speed") == "fast",
drop_context_1m_beta=bool(getattr(self, "_oauth_1m_beta_disabled", False)),
)
# AWS Bedrock native Converse API — bypasses the OpenAI client entirely.
@ -10752,6 +10759,7 @@ class AIAgent:
copilot_auth_retry_attempted=False
thinking_sig_retry_attempted = False
image_shrink_retry_attempted = False
oauth_1m_beta_retry_attempted = False
has_retried_429 = False
restart_with_compressed_messages = False
restart_with_length_continuation = False
@ -11708,6 +11716,36 @@ class AIAgent:
"or shrink didn't reduce size; surfacing original error."
)
# Anthropic OAuth subscription rejected the 1M-context beta
# header ("long context beta is not yet available for this
# subscription"). Disable the beta for the rest of this
# session, rebuild the client, and retry once. 1M-capable
# subscriptions never hit this branch — they accept the
# beta and keep full 1M context. See PR #17680 for the
# original report (we chose reactive recovery over the
# proposed unconditional omit so capable subscriptions
# don't silently lose the capability).
if (
classified.reason == FailoverReason.oauth_long_context_beta_forbidden
and self.api_mode == "anthropic_messages"
and self._is_anthropic_oauth
and not oauth_1m_beta_retry_attempted
):
oauth_1m_beta_retry_attempted = True
if not getattr(self, "_oauth_1m_beta_disabled", False):
self._oauth_1m_beta_disabled = True
try:
self._anthropic_client.close()
except Exception:
pass
self._rebuild_anthropic_client()
self._vprint(
f"{self.log_prefix}🔕 OAuth subscription doesn't support "
f"the 1M-context beta — disabled for this session and retrying...",
force=True,
)
continue
if (
self.api_mode == "codex_responses"
and self.provider == "openai-codex"

View File

@ -66,8 +66,30 @@ class TestBuildAnthropicClient:
assert "claude-code-20250219" in betas
assert "interleaved-thinking-2025-05-14" in betas
assert "fine-grained-tool-streaming-2025-05-14" in betas
# Default: 1M-context beta stays IN for OAuth so 1M-capable
# subscriptions keep full context. The reactive recovery path
# in run_agent.py flips it off only after a subscription
# actually rejects the beta.
assert "context-1m-2025-08-07" in betas
assert "api_key" not in kwargs
def test_oauth_drop_context_1m_beta_strips_only_1m(self):
"""drop_context_1m_beta=True strips context-1m-2025-08-07 while
preserving every other OAuth-relevant beta."""
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
build_anthropic_client(
"sk-ant-oat01-" + "x" * 60,
drop_context_1m_beta=True,
)
kwargs = mock_sdk.Anthropic.call_args[1]
betas = kwargs["default_headers"]["anthropic-beta"]
assert "context-1m-2025-08-07" not in betas
# Everything else must still be there.
assert "oauth-2025-04-20" in betas
assert "claude-code-20250219" in betas
assert "interleaved-thinking-2025-05-14" in betas
assert "fine-grained-tool-streaming-2025-05-14" in betas
def test_api_key_uses_api_key(self):
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
build_anthropic_client("sk-ant-api03-something")
@ -77,6 +99,7 @@ class TestBuildAnthropicClient:
# API key auth should still get common betas
betas = kwargs["default_headers"]["anthropic-beta"]
assert "interleaved-thinking-2025-05-14" in betas
assert "context-1m-2025-08-07" in betas
assert "oauth-2025-04-20" not in betas # OAuth-only beta NOT present
assert "claude-code-20250219" not in betas # OAuth-only beta NOT present
@ -963,6 +986,42 @@ class TestBuildAnthropicKwargs:
)
assert kwargs["model"] == "claude-sonnet-4-20250514"
def test_fast_mode_oauth_default_keeps_context_1m_beta(self):
"""Default OAuth fast-mode requests still carry context-1m-2025-08-07."""
kwargs = build_anthropic_kwargs(
model="claude-opus-4-6",
messages=[{"role": "user", "content": "Hi"}],
tools=None,
max_tokens=4096,
reasoning_config=None,
is_oauth=True,
fast_mode=True,
)
betas = kwargs["extra_headers"]["anthropic-beta"]
assert "fast-mode-2026-02-01" in betas
assert "oauth-2025-04-20" in betas
assert "context-1m-2025-08-07" in betas
def test_fast_mode_oauth_drop_context_1m_beta_strips_only_1m(self):
"""drop_context_1m_beta=True strips context-1m from fast-mode
extra_headers while preserving every other OAuth + fast-mode beta."""
kwargs = build_anthropic_kwargs(
model="claude-opus-4-6",
messages=[{"role": "user", "content": "Hi"}],
tools=None,
max_tokens=4096,
reasoning_config=None,
is_oauth=True,
fast_mode=True,
drop_context_1m_beta=True,
)
betas = kwargs["extra_headers"]["anthropic-beta"]
assert "context-1m-2025-08-07" not in betas
assert "fast-mode-2026-02-01" in betas
assert "oauth-2025-04-20" in betas
assert "claude-code-20250219" in betas
assert "interleaved-thinking-2025-05-14" in betas
def test_reasoning_config_maps_to_manual_thinking_for_pre_4_6_models(self):
kwargs = build_anthropic_kwargs(
model="claude-sonnet-4-20250514",

View File

@ -57,7 +57,9 @@ class TestFailoverReason:
"context_overflow", "payload_too_large", "image_too_large",
"model_not_found", "format_error",
"provider_policy_blocked",
"thinking_signature", "long_context_tier", "unknown",
"thinking_signature", "long_context_tier",
"oauth_long_context_beta_forbidden",
"unknown",
}
actual = {r.value for r in FailoverReason}
assert expected == actual
@ -458,6 +460,40 @@ class TestClassifyApiError:
result = classify_api_error(e, provider="anthropic")
assert result.reason == FailoverReason.rate_limit
# ── Provider-specific: Anthropic OAuth 1M-context beta forbidden ──
def test_anthropic_oauth_1m_beta_forbidden(self):
"""400 + 'long context beta is not yet available for this subscription'
oauth_long_context_beta_forbidden (retryable, no compression)."""
e = MockAPIError(
"The long context beta is not yet available for this subscription.",
status_code=400,
)
result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4.6")
assert result.reason == FailoverReason.oauth_long_context_beta_forbidden
assert result.retryable is True
assert result.should_compress is False
def test_anthropic_oauth_1m_beta_forbidden_does_not_collide_with_tier_gate(self):
"""The 429 'extra usage' + 'long context' tier gate keeps its own
classification even though its message mentions 'long context'."""
e = MockAPIError(
"Extra usage is required for long context requests over 200k tokens",
status_code=429,
)
result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4.6")
assert result.reason == FailoverReason.long_context_tier
def test_400_without_beta_phrase_is_not_1m_beta_forbidden(self):
"""A generic 400 that happens to mention 'long context' but not the
exact beta-availability phrase should not be misclassified."""
e = MockAPIError(
"long context window exceeded",
status_code=400,
)
result = classify_api_error(e, provider="anthropic")
assert result.reason != FailoverReason.oauth_long_context_beta_forbidden
# ── Transport errors ──
def test_read_timeout(self):