fix(anthropic): reactive recovery for OAuth 1M-context beta rejection (#17752)
Keep context-1m-2025-08-07 in OAuth requests by default so 1M-capable subscriptions retain full context. When Anthropic rejects a request with 400 'long context beta is not yet available for this subscription', disable the beta for the rest of the session, rebuild the client, and retry once. Addresses #17680 (thanks @JayGwod for the clean reproduction) without forcing every OAuth user off the 1M context window. Changes: - agent/error_classifier.py: new FailoverReason.oauth_long_context_beta_forbidden; pattern matches 400 + 'long context beta' + 'not yet available'. Narrow enough that the existing 429 tier-gate pattern keeps its own reason. - agent/anthropic_adapter.py: _common_betas_for_base_url, build_anthropic_client, build_anthropic_kwargs gain drop_context_1m_beta kwarg. Default=False (1M stays). OAuth OAUTH_ONLY_BETAS unchanged. - agent/transports/anthropic.py: build_kwargs forwards the flag. - run_agent.py: self._oauth_1m_beta_disabled flag, retry-once guard, recovery branch next to the image-shrink path. _rebuild_anthropic_client honors the flag. The main build_kwargs call site threads it through for fast-mode extra_headers. - hermes_cli/doctor.py, hermes_cli/models.py: sibling OAuth /v1/models probes get the same reactive retry — previously they'd falsely report the Anthropic API as unreachable for affected subscriptions. Tests: 2190 tests/agent/ + 94 adjacent integration tests pass. New unit tests cover the classifier pattern (including the collision guard against the 429 tier-gate) and the drop_context_1m_beta adapter behavior (default keeps 1M, flag strips only 1M while preserving every other beta).
This commit is contained in:
parent
4d363499db
commit
828d3a320b
@ -461,7 +461,11 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
|
||||
|
||||
|
||||
def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
def _common_betas_for_base_url(
|
||||
base_url: str | None,
|
||||
*,
|
||||
drop_context_1m_beta: bool = False,
|
||||
) -> list[str]:
|
||||
"""Return the beta headers that are safe for the configured endpoint.
|
||||
|
||||
MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
|
||||
@ -472,14 +476,30 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
|
||||
endpoints — MiniMax hosts its own models, not Claude, so the header is
|
||||
irrelevant at best and risks request rejection at worst.
|
||||
|
||||
``drop_context_1m_beta=True`` additionally strips the 1M-context beta on
|
||||
otherwise-unrelated endpoints. The OAuth retry path flips this flag after
|
||||
a subscription rejects the beta with
|
||||
"The long context beta is not yet available for this subscription" so
|
||||
subsequent requests in the same session don't repeat the probe. See the
|
||||
reactive recovery loop in ``run_agent.py`` and issue-comment history on
|
||||
PR #17680 for the full rationale.
|
||||
"""
|
||||
if _requires_bearer_auth(base_url):
|
||||
_stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
|
||||
return [b for b in _COMMON_BETAS if b not in _stripped]
|
||||
if drop_context_1m_beta:
|
||||
return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
|
||||
return _COMMON_BETAS
|
||||
|
||||
|
||||
def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
|
||||
def build_anthropic_client(
|
||||
api_key: str,
|
||||
base_url: str = None,
|
||||
timeout: float = None,
|
||||
*,
|
||||
drop_context_1m_beta: bool = False,
|
||||
):
|
||||
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
|
||||
|
||||
If *timeout* is provided it overrides the default 900s read timeout. The
|
||||
@ -488,6 +508,12 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
Anthropic-compatible providers respect the same knob as OpenAI-wire
|
||||
providers.
|
||||
|
||||
``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the
|
||||
client-level ``anthropic-beta`` header. Used by the reactive OAuth retry
|
||||
path in ``run_agent.py`` when a subscription rejects the beta; leave at
|
||||
its default on fresh clients so 1M-capable subscriptions keep the
|
||||
capability.
|
||||
|
||||
Returns an anthropic.Anthropic instance.
|
||||
"""
|
||||
_anthropic_sdk = _get_anthropic_sdk()
|
||||
@ -517,7 +543,10 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
kwargs["default_query"] = {"api-version": "2025-04-15"}
|
||||
else:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
common_betas = _common_betas_for_base_url(
|
||||
normalized_base_url,
|
||||
drop_context_1m_beta=drop_context_1m_beta,
|
||||
)
|
||||
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
# Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
|
||||
@ -1689,6 +1718,7 @@ def build_anthropic_kwargs(
|
||||
context_length: Optional[int] = None,
|
||||
base_url: str | None = None,
|
||||
fast_mode: bool = False,
|
||||
drop_context_1m_beta: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build kwargs for anthropic.messages.create().
|
||||
|
||||
@ -1877,7 +1907,10 @@ def build_anthropic_kwargs(
|
||||
kwargs.setdefault("extra_body", {})["speed"] = "fast"
|
||||
# Build extra_headers with ALL applicable betas (the per-request
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
betas = list(_common_betas_for_base_url(base_url))
|
||||
betas = list(_common_betas_for_base_url(
|
||||
base_url,
|
||||
drop_context_1m_beta=drop_context_1m_beta,
|
||||
))
|
||||
if is_oauth:
|
||||
betas.extend(_OAUTH_ONLY_BETAS)
|
||||
betas.append(_FAST_MODE_BETA)
|
||||
|
||||
@ -54,6 +54,7 @@ class FailoverReason(enum.Enum):
|
||||
# Provider-specific
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
long_context_tier = "long_context_tier" # Anthropic "extra usage" tier gate
|
||||
oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden" # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry
|
||||
|
||||
# Catch-all
|
||||
unknown = "unknown" # Unclassifiable — retry with backoff
|
||||
@ -450,6 +451,25 @@ def classify_api_error(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Anthropic OAuth subscription rejects the 1M-context beta header.
|
||||
# Observed error body: "The long context beta is not yet available for
|
||||
# this subscription." Returned as HTTP 400 from native Anthropic when
|
||||
# the subscription doesn't include 1M context, even though the request
|
||||
# carries ``anthropic-beta: context-1m-2025-08-07``. The recovery path
|
||||
# in run_agent.py rebuilds the Anthropic client with the beta stripped
|
||||
# and retries once. Pattern is narrow enough that it won't collide with
|
||||
# the 429 tier-gate pattern above (different status, different phrase).
|
||||
if (
|
||||
status_code == 400
|
||||
and "long context beta" in error_msg
|
||||
and "not yet available" in error_msg
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.oauth_long_context_beta_forbidden,
|
||||
retryable=True,
|
||||
should_compress=False,
|
||||
)
|
||||
|
||||
# ── 2. HTTP status code classification ──────────────────────────
|
||||
|
||||
if status_code is not None:
|
||||
|
||||
@ -58,6 +58,7 @@ class AnthropicTransport(ProviderTransport):
|
||||
context_length: int | None
|
||||
base_url: str | None
|
||||
fast_mode: bool
|
||||
drop_context_1m_beta: bool
|
||||
"""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
@ -73,6 +74,7 @@ class AnthropicTransport(ProviderTransport):
|
||||
context_length=params.get("context_length"),
|
||||
base_url=params.get("base_url"),
|
||||
fast_mode=params.get("fast_mode", False),
|
||||
drop_context_1m_beta=params.get("drop_context_1m_beta", False),
|
||||
)
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
|
||||
@ -1023,10 +1023,16 @@ def run_doctor(args):
|
||||
print(" Checking Anthropic API...", end="", flush=True)
|
||||
try:
|
||||
import httpx
|
||||
from agent.anthropic_adapter import _is_oauth_token, _COMMON_BETAS, _OAUTH_ONLY_BETAS
|
||||
from agent.anthropic_adapter import (
|
||||
_is_oauth_token,
|
||||
_COMMON_BETAS,
|
||||
_OAUTH_ONLY_BETAS,
|
||||
_CONTEXT_1M_BETA,
|
||||
)
|
||||
|
||||
headers = {"anthropic-version": "2023-06-01"}
|
||||
if _is_oauth_token(anthropic_key):
|
||||
is_oauth = _is_oauth_token(anthropic_key)
|
||||
if is_oauth:
|
||||
headers["Authorization"] = f"Bearer {anthropic_key}"
|
||||
headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
|
||||
else:
|
||||
@ -1036,6 +1042,25 @@ def run_doctor(args):
|
||||
headers=headers,
|
||||
timeout=10
|
||||
)
|
||||
# Reactive recovery: OAuth subscriptions that don't include 1M
|
||||
# context reject the request with 400 "long context beta is not
|
||||
# yet available for this subscription". Retry once with that
|
||||
# beta stripped so the doctor check doesn't falsely report the
|
||||
# Anthropic API as unreachable for those users.
|
||||
if (
|
||||
is_oauth
|
||||
and response.status_code == 400
|
||||
and "long context beta" in response.text.lower()
|
||||
and "not yet available" in response.text.lower()
|
||||
):
|
||||
headers["anthropic-beta"] = ",".join(
|
||||
[b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] + list(_OAUTH_ONLY_BETAS)
|
||||
)
|
||||
response = httpx.get(
|
||||
"https://api.anthropic.com/v1/models",
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
print(f"\r {color('✓', Colors.GREEN)} Anthropic API ")
|
||||
elif response.status_code == 401:
|
||||
|
||||
@ -2034,28 +2034,56 @@ def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
|
||||
return None
|
||||
|
||||
headers: dict[str, str] = {"anthropic-version": "2023-06-01"}
|
||||
if _is_oauth_token(token):
|
||||
is_oauth = _is_oauth_token(token)
|
||||
if is_oauth:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS
|
||||
from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS, _CONTEXT_1M_BETA
|
||||
headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
|
||||
else:
|
||||
headers["x-api-key"] = token
|
||||
|
||||
req = urllib.request.Request(
|
||||
"https://api.anthropic.com/v1/models",
|
||||
headers=headers,
|
||||
)
|
||||
try:
|
||||
def _do_request(h: dict[str, str]):
|
||||
req = urllib.request.Request(
|
||||
"https://api.anthropic.com/v1/models",
|
||||
headers=h,
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
models = [m["id"] for m in data.get("data", []) if m.get("id")]
|
||||
# Sort: latest/largest first (opus > sonnet > haiku, higher version first)
|
||||
return sorted(models, key=lambda m: (
|
||||
"opus" not in m, # opus first
|
||||
"sonnet" not in m, # then sonnet
|
||||
"haiku" not in m, # then haiku
|
||||
m, # alphabetical within tier
|
||||
))
|
||||
return json.loads(resp.read().decode())
|
||||
|
||||
try:
|
||||
try:
|
||||
data = _do_request(headers)
|
||||
except urllib.error.HTTPError as http_err:
|
||||
# Reactive recovery for OAuth subscriptions that reject the 1M
|
||||
# context beta with 400 "long context beta is not yet available
|
||||
# for this subscription". Retry once without the beta; re-raise
|
||||
# anything else so the outer except logs it.
|
||||
if (
|
||||
is_oauth
|
||||
and http_err.code == 400
|
||||
):
|
||||
try:
|
||||
body_text = http_err.read().decode(errors="ignore").lower()
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if "long context beta" in body_text and "not yet available" in body_text:
|
||||
headers["anthropic-beta"] = ",".join(
|
||||
[b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
|
||||
+ list(_OAUTH_ONLY_BETAS)
|
||||
)
|
||||
data = _do_request(headers)
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
raise
|
||||
models = [m["id"] for m in data.get("data", []) if m.get("id")]
|
||||
# Sort: latest/largest first (opus > sonnet > haiku, higher version first)
|
||||
return sorted(models, key=lambda m: (
|
||||
"opus" not in m, # opus first
|
||||
"sonnet" not in m, # then sonnet
|
||||
"haiku" not in m, # then haiku
|
||||
m, # alphabetical within tier
|
||||
))
|
||||
except Exception as e:
|
||||
import logging
|
||||
logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e)
|
||||
|
||||
38
run_agent.py
38
run_agent.py
@ -6210,7 +6210,12 @@ class AIAgent:
|
||||
correctly — rebuilding with the Bedrock SDK when provider is bedrock,
|
||||
rather than always falling back to build_anthropic_client() which
|
||||
requires a direct Anthropic API key.
|
||||
|
||||
Honors ``self._oauth_1m_beta_disabled`` (set by the reactive recovery
|
||||
path when an OAuth subscription rejects the 1M-context beta) so the
|
||||
rebuilt client carries the reduced beta set.
|
||||
"""
|
||||
_drop_1m = bool(getattr(self, "_oauth_1m_beta_disabled", False))
|
||||
if getattr(self, "provider", None) == "bedrock":
|
||||
from agent.anthropic_adapter import build_anthropic_bedrock_client
|
||||
region = getattr(self, "_bedrock_region", "us-east-1") or "us-east-1"
|
||||
@ -6221,6 +6226,7 @@ class AIAgent:
|
||||
self._anthropic_api_key,
|
||||
getattr(self, "_anthropic_base_url", None),
|
||||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
drop_context_1m_beta=_drop_1m,
|
||||
)
|
||||
|
||||
def _interruptible_api_call(self, api_kwargs: dict):
|
||||
@ -8167,6 +8173,7 @@ class AIAgent:
|
||||
context_length=ctx_len,
|
||||
base_url=getattr(self, "_anthropic_base_url", None),
|
||||
fast_mode=(self.request_overrides or {}).get("speed") == "fast",
|
||||
drop_context_1m_beta=bool(getattr(self, "_oauth_1m_beta_disabled", False)),
|
||||
)
|
||||
|
||||
# AWS Bedrock native Converse API — bypasses the OpenAI client entirely.
|
||||
@ -10752,6 +10759,7 @@ class AIAgent:
|
||||
copilot_auth_retry_attempted=False
|
||||
thinking_sig_retry_attempted = False
|
||||
image_shrink_retry_attempted = False
|
||||
oauth_1m_beta_retry_attempted = False
|
||||
has_retried_429 = False
|
||||
restart_with_compressed_messages = False
|
||||
restart_with_length_continuation = False
|
||||
@ -11708,6 +11716,36 @@ class AIAgent:
|
||||
"or shrink didn't reduce size; surfacing original error."
|
||||
)
|
||||
|
||||
# Anthropic OAuth subscription rejected the 1M-context beta
|
||||
# header ("long context beta is not yet available for this
|
||||
# subscription"). Disable the beta for the rest of this
|
||||
# session, rebuild the client, and retry once. 1M-capable
|
||||
# subscriptions never hit this branch — they accept the
|
||||
# beta and keep full 1M context. See PR #17680 for the
|
||||
# original report (we chose reactive recovery over the
|
||||
# proposed unconditional omit so capable subscriptions
|
||||
# don't silently lose the capability).
|
||||
if (
|
||||
classified.reason == FailoverReason.oauth_long_context_beta_forbidden
|
||||
and self.api_mode == "anthropic_messages"
|
||||
and self._is_anthropic_oauth
|
||||
and not oauth_1m_beta_retry_attempted
|
||||
):
|
||||
oauth_1m_beta_retry_attempted = True
|
||||
if not getattr(self, "_oauth_1m_beta_disabled", False):
|
||||
self._oauth_1m_beta_disabled = True
|
||||
try:
|
||||
self._anthropic_client.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._rebuild_anthropic_client()
|
||||
self._vprint(
|
||||
f"{self.log_prefix}🔕 OAuth subscription doesn't support "
|
||||
f"the 1M-context beta — disabled for this session and retrying...",
|
||||
force=True,
|
||||
)
|
||||
continue
|
||||
|
||||
if (
|
||||
self.api_mode == "codex_responses"
|
||||
and self.provider == "openai-codex"
|
||||
|
||||
@ -66,8 +66,30 @@ class TestBuildAnthropicClient:
|
||||
assert "claude-code-20250219" in betas
|
||||
assert "interleaved-thinking-2025-05-14" in betas
|
||||
assert "fine-grained-tool-streaming-2025-05-14" in betas
|
||||
# Default: 1M-context beta stays IN for OAuth so 1M-capable
|
||||
# subscriptions keep full context. The reactive recovery path
|
||||
# in run_agent.py flips it off only after a subscription
|
||||
# actually rejects the beta.
|
||||
assert "context-1m-2025-08-07" in betas
|
||||
assert "api_key" not in kwargs
|
||||
|
||||
def test_oauth_drop_context_1m_beta_strips_only_1m(self):
|
||||
"""drop_context_1m_beta=True strips context-1m-2025-08-07 while
|
||||
preserving every other OAuth-relevant beta."""
|
||||
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||
build_anthropic_client(
|
||||
"sk-ant-oat01-" + "x" * 60,
|
||||
drop_context_1m_beta=True,
|
||||
)
|
||||
kwargs = mock_sdk.Anthropic.call_args[1]
|
||||
betas = kwargs["default_headers"]["anthropic-beta"]
|
||||
assert "context-1m-2025-08-07" not in betas
|
||||
# Everything else must still be there.
|
||||
assert "oauth-2025-04-20" in betas
|
||||
assert "claude-code-20250219" in betas
|
||||
assert "interleaved-thinking-2025-05-14" in betas
|
||||
assert "fine-grained-tool-streaming-2025-05-14" in betas
|
||||
|
||||
def test_api_key_uses_api_key(self):
|
||||
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||
build_anthropic_client("sk-ant-api03-something")
|
||||
@ -77,6 +99,7 @@ class TestBuildAnthropicClient:
|
||||
# API key auth should still get common betas
|
||||
betas = kwargs["default_headers"]["anthropic-beta"]
|
||||
assert "interleaved-thinking-2025-05-14" in betas
|
||||
assert "context-1m-2025-08-07" in betas
|
||||
assert "oauth-2025-04-20" not in betas # OAuth-only beta NOT present
|
||||
assert "claude-code-20250219" not in betas # OAuth-only beta NOT present
|
||||
|
||||
@ -963,6 +986,42 @@ class TestBuildAnthropicKwargs:
|
||||
)
|
||||
assert kwargs["model"] == "claude-sonnet-4-20250514"
|
||||
|
||||
def test_fast_mode_oauth_default_keeps_context_1m_beta(self):
|
||||
"""Default OAuth fast-mode requests still carry context-1m-2025-08-07."""
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-opus-4-6",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config=None,
|
||||
is_oauth=True,
|
||||
fast_mode=True,
|
||||
)
|
||||
betas = kwargs["extra_headers"]["anthropic-beta"]
|
||||
assert "fast-mode-2026-02-01" in betas
|
||||
assert "oauth-2025-04-20" in betas
|
||||
assert "context-1m-2025-08-07" in betas
|
||||
|
||||
def test_fast_mode_oauth_drop_context_1m_beta_strips_only_1m(self):
|
||||
"""drop_context_1m_beta=True strips context-1m from fast-mode
|
||||
extra_headers while preserving every other OAuth + fast-mode beta."""
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-opus-4-6",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config=None,
|
||||
is_oauth=True,
|
||||
fast_mode=True,
|
||||
drop_context_1m_beta=True,
|
||||
)
|
||||
betas = kwargs["extra_headers"]["anthropic-beta"]
|
||||
assert "context-1m-2025-08-07" not in betas
|
||||
assert "fast-mode-2026-02-01" in betas
|
||||
assert "oauth-2025-04-20" in betas
|
||||
assert "claude-code-20250219" in betas
|
||||
assert "interleaved-thinking-2025-05-14" in betas
|
||||
|
||||
def test_reasoning_config_maps_to_manual_thinking_for_pre_4_6_models(self):
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
|
||||
@ -57,7 +57,9 @@ class TestFailoverReason:
|
||||
"context_overflow", "payload_too_large", "image_too_large",
|
||||
"model_not_found", "format_error",
|
||||
"provider_policy_blocked",
|
||||
"thinking_signature", "long_context_tier", "unknown",
|
||||
"thinking_signature", "long_context_tier",
|
||||
"oauth_long_context_beta_forbidden",
|
||||
"unknown",
|
||||
}
|
||||
actual = {r.value for r in FailoverReason}
|
||||
assert expected == actual
|
||||
@ -458,6 +460,40 @@ class TestClassifyApiError:
|
||||
result = classify_api_error(e, provider="anthropic")
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
|
||||
# ── Provider-specific: Anthropic OAuth 1M-context beta forbidden ──
|
||||
|
||||
def test_anthropic_oauth_1m_beta_forbidden(self):
|
||||
"""400 + 'long context beta is not yet available for this subscription'
|
||||
→ oauth_long_context_beta_forbidden (retryable, no compression)."""
|
||||
e = MockAPIError(
|
||||
"The long context beta is not yet available for this subscription.",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4.6")
|
||||
assert result.reason == FailoverReason.oauth_long_context_beta_forbidden
|
||||
assert result.retryable is True
|
||||
assert result.should_compress is False
|
||||
|
||||
def test_anthropic_oauth_1m_beta_forbidden_does_not_collide_with_tier_gate(self):
|
||||
"""The 429 'extra usage' + 'long context' tier gate keeps its own
|
||||
classification even though its message mentions 'long context'."""
|
||||
e = MockAPIError(
|
||||
"Extra usage is required for long context requests over 200k tokens",
|
||||
status_code=429,
|
||||
)
|
||||
result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4.6")
|
||||
assert result.reason == FailoverReason.long_context_tier
|
||||
|
||||
def test_400_without_beta_phrase_is_not_1m_beta_forbidden(self):
|
||||
"""A generic 400 that happens to mention 'long context' but not the
|
||||
exact beta-availability phrase should not be misclassified."""
|
||||
e = MockAPIError(
|
||||
"long context window exceeded",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="anthropic")
|
||||
assert result.reason != FailoverReason.oauth_long_context_beta_forbidden
|
||||
|
||||
# ── Transport errors ──
|
||||
|
||||
def test_read_timeout(self):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user