feat(fast): broaden /fast whitelist to all OpenAI + Anthropic models (#16883)
Switch _PRIORITY_PROCESSING_MODELS and _ANTHROPIC_FAST_MODE_MODELS from hardcoded frozensets to prefix-based matching. Any gpt-*, o1*, o3*, o4* (OpenAI) and any claude-* (Anthropic) now exposes /fast. Fixes the case where gpt-5.5 and other post-catalog models silently skipped Priority Processing because they weren't in the frozenset. Future OpenAI/Anthropic releases will work without a catalog bump. Safety: - Codex-series (*codex*) still excluded — they route through the Codex Responses API which doesn't take service_tier. - Anthropic adapter already gates speed=fast on native endpoints only (_is_third_party_anthropic_endpoint), so claude-sonnet-4.6 on OpenRouter/Bedrock/opencode-zen won't leak the unknown beta. - service_tier=priority is silently dropped by non-OpenAI proxies, so false positives are harmless.
This commit is contained in:
parent
6ce796b495
commit
8269f9056c
@ -1623,31 +1623,41 @@ def provider_label(provider: Optional[str]) -> str:
|
||||
|
||||
# Models that support OpenAI Priority Processing (service_tier="priority").
|
||||
# See https://openai.com/api-priority-processing/ for the canonical list.
|
||||
# Only the bare model slug is stored (no vendor prefix).
|
||||
_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.2",
|
||||
"gpt-5.1",
|
||||
"gpt-5",
|
||||
"gpt-5-mini",
|
||||
"gpt-4.1",
|
||||
"gpt-4.1-mini",
|
||||
"gpt-4.1-nano",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
#
|
||||
# Pattern-based matching — any OpenAI flagship model (gpt-*, o1*, o3*, o4*)
|
||||
# is assumed to support Priority Processing. service_tier=priority is silently
|
||||
# ignored by non-OpenAI endpoints (OpenRouter/Copilot/opencode-zen proxies
|
||||
# strip the field), so false positives are harmless. Codex-series models
|
||||
# (gpt-5-codex, gpt-5.3-codex, etc.) are excluded — they don't expose the
|
||||
# service_tier parameter through the Codex Responses API.
|
||||
_OPENAI_FAST_MODE_PREFIXES: tuple[str, ...] = (
|
||||
"gpt-",
|
||||
"o1",
|
||||
"o3",
|
||||
"o4-mini",
|
||||
})
|
||||
"o4",
|
||||
)
|
||||
|
||||
|
||||
def _is_openai_fast_model(model_id: Optional[str]) -> bool:
|
||||
"""Return True if the model is an OpenAI flagship eligible for Priority Processing."""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
base = raw.split(":")[0]
|
||||
if not base:
|
||||
return False
|
||||
# Exclude Codex-series — they route through the Codex Responses API
|
||||
# which doesn't accept service_tier.
|
||||
if "codex" in base:
|
||||
return False
|
||||
return any(base.startswith(prefix) for prefix in _OPENAI_FAST_MODE_PREFIXES)
|
||||
|
||||
|
||||
# Models that support Anthropic Fast Mode (speed="fast").
|
||||
# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
|
||||
# Currently only Claude Opus 4.6. Both hyphen and dot variants are stored
|
||||
# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
|
||||
_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4.6",
|
||||
})
|
||||
#
|
||||
# Pattern-based matching — any claude-* model is eligible. The anthropic
|
||||
# adapter gates speed=fast on native Anthropic endpoints only (see
|
||||
# _is_third_party_anthropic_endpoint in agent/anthropic_adapter.py), so
|
||||
# third-party proxies that would reject the beta header are protected.
|
||||
|
||||
|
||||
def _strip_vendor_prefix(model_id: str) -> str:
|
||||
@ -1660,20 +1670,14 @@ def _strip_vendor_prefix(model_id: str) -> str:
|
||||
|
||||
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
||||
"""Return whether Hermes should expose the /fast toggle for this model."""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
if raw in _PRIORITY_PROCESSING_MODELS:
|
||||
return True
|
||||
# Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
|
||||
# and OpenRouter variant tags (:fast, :beta) for matching.
|
||||
base = raw.split(":")[0]
|
||||
return base in _ANTHROPIC_FAST_MODE_MODELS
|
||||
return _is_anthropic_fast_model(model_id) or _is_openai_fast_model(model_id)
|
||||
|
||||
|
||||
def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
|
||||
"""Return True if the model supports Anthropic's fast mode (speed='fast')."""
|
||||
"""Return True if the model is a Claude model eligible for Anthropic Fast Mode."""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
base = raw.split(":")[0]
|
||||
return base in _ANTHROPIC_FAST_MODE_MODELS
|
||||
return base.startswith("claude-")
|
||||
|
||||
|
||||
def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
|
||||
@ -114,17 +114,38 @@ class TestPriorityProcessingModels(unittest.TestCase):
|
||||
def test_all_documented_models_supported(self):
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
||||
# All models from OpenAI's Priority Processing pricing table
|
||||
# All OpenAI flagship models support Priority Processing — including
|
||||
# future releases (gpt-5.5, 5.6...) via pattern matching.
|
||||
supported = [
|
||||
"gpt-5.5", "gpt-5.5-mini",
|
||||
"gpt-5.4", "gpt-5.4-mini", "gpt-5.2",
|
||||
"gpt-5.1", "gpt-5", "gpt-5-mini",
|
||||
"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano",
|
||||
"gpt-4o", "gpt-4o-mini",
|
||||
"o3", "o4-mini",
|
||||
"o1", "o1-mini", "o3", "o3-mini", "o4-mini",
|
||||
]
|
||||
for model in supported:
|
||||
assert model_supports_fast_mode(model), f"{model} should support fast mode"
|
||||
|
||||
def test_all_anthropic_models_supported(self):
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
||||
# All Claude models support Anthropic Fast Mode — Opus, Sonnet, Haiku.
|
||||
supported = [
|
||||
"claude-opus-4-7", "claude-opus-4-6", "claude-opus-4.6",
|
||||
"claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4",
|
||||
"claude-haiku-4-5", "claude-3-5-haiku",
|
||||
]
|
||||
for model in supported:
|
||||
assert model_supports_fast_mode(model), f"{model} should support fast mode"
|
||||
|
||||
def test_codex_models_excluded(self):
|
||||
"""Codex models route through Responses API and don't accept service_tier."""
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
||||
for model in ["gpt-5-codex", "gpt-5.2-codex", "gpt-5.3-codex", "gpt-5.1-codex-max"]:
|
||||
assert not model_supports_fast_mode(model), f"{model} is codex — should not expose /fast"
|
||||
|
||||
def test_vendor_prefix_stripped(self):
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
||||
@ -135,8 +156,15 @@ class TestPriorityProcessingModels(unittest.TestCase):
|
||||
def test_non_priority_models_rejected(self):
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
||||
# Codex-series models route through the Codex Responses API and
|
||||
# don't accept service_tier, so they're excluded.
|
||||
assert model_supports_fast_mode("gpt-5.3-codex") is False
|
||||
assert model_supports_fast_mode("claude-sonnet-4") is False
|
||||
assert model_supports_fast_mode("gpt-5.2-codex") is False
|
||||
assert model_supports_fast_mode("gpt-5-codex") is False
|
||||
# Non-OpenAI, non-Anthropic models
|
||||
assert model_supports_fast_mode("gemini-3-pro-preview") is False
|
||||
assert model_supports_fast_mode("kimi-k2-thinking") is False
|
||||
assert model_supports_fast_mode("deepseek-chat") is False
|
||||
assert model_supports_fast_mode("") is False
|
||||
assert model_supports_fast_mode(None) is False
|
||||
|
||||
@ -153,7 +181,8 @@ class TestPriorityProcessingModels(unittest.TestCase):
|
||||
from hermes_cli.models import resolve_fast_mode_overrides
|
||||
|
||||
assert resolve_fast_mode_overrides("gpt-5.3-codex") is None
|
||||
assert resolve_fast_mode_overrides("claude-sonnet-4") is None
|
||||
assert resolve_fast_mode_overrides("gemini-3-pro-preview") is None
|
||||
assert resolve_fast_mode_overrides("kimi-k2-thinking") is None
|
||||
|
||||
|
||||
class TestFastModeRouting(unittest.TestCase):
|
||||
@ -228,13 +257,26 @@ class TestAnthropicFastMode(unittest.TestCase):
|
||||
assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True
|
||||
assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True
|
||||
|
||||
def test_anthropic_non_opus_rejected(self):
|
||||
def test_anthropic_all_claude_models_supported(self):
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
||||
assert model_supports_fast_mode("claude-sonnet-4-6") is False
|
||||
assert model_supports_fast_mode("claude-sonnet-4.6") is False
|
||||
assert model_supports_fast_mode("claude-haiku-4-5") is False
|
||||
assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False
|
||||
# All Claude models support fast mode — Opus, Sonnet, Haiku.
|
||||
# The anthropic adapter gates speed=fast on native Anthropic
|
||||
# endpoints only, so third-party proxies that reject the beta
|
||||
# are protected downstream (see _is_third_party_anthropic_endpoint).
|
||||
assert model_supports_fast_mode("claude-sonnet-4-6") is True
|
||||
assert model_supports_fast_mode("claude-sonnet-4.6") is True
|
||||
assert model_supports_fast_mode("claude-haiku-4-5") is True
|
||||
assert model_supports_fast_mode("claude-opus-4-7") is True
|
||||
assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is True
|
||||
|
||||
def test_non_claude_models_not_anthropic_fast(self):
|
||||
"""Non-Claude models should not be treated as Anthropic fast-mode."""
|
||||
from hermes_cli.models import _is_anthropic_fast_model
|
||||
|
||||
assert _is_anthropic_fast_model("gpt-5.4") is False
|
||||
assert _is_anthropic_fast_model("gemini-3-pro") is False
|
||||
assert _is_anthropic_fast_model("kimi-k2-thinking") is False
|
||||
|
||||
def test_anthropic_variant_tags_stripped(self):
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
@ -264,9 +306,11 @@ class TestAnthropicFastMode(unittest.TestCase):
|
||||
|
||||
assert _is_anthropic_fast_model("claude-opus-4-6") is True
|
||||
assert _is_anthropic_fast_model("claude-opus-4.6") is True
|
||||
assert _is_anthropic_fast_model("claude-sonnet-4-6") is True
|
||||
assert _is_anthropic_fast_model("claude-haiku-4-5") is True
|
||||
assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True
|
||||
assert _is_anthropic_fast_model("gpt-5.4") is False
|
||||
assert _is_anthropic_fast_model("claude-sonnet-4-6") is False
|
||||
assert _is_anthropic_fast_model("") is False
|
||||
|
||||
def test_fast_command_exposed_for_anthropic_model(self):
|
||||
cli_mod = _import_cli()
|
||||
@ -276,12 +320,22 @@ class TestAnthropicFastMode(unittest.TestCase):
|
||||
)
|
||||
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||
|
||||
def test_fast_command_hidden_for_anthropic_sonnet(self):
|
||||
def test_fast_command_exposed_for_anthropic_sonnet(self):
|
||||
"""Sonnet now supports Anthropic Fast Mode — the adapter gates on base_url."""
|
||||
cli_mod = _import_cli()
|
||||
stub = SimpleNamespace(
|
||||
provider="anthropic", requested_provider="anthropic",
|
||||
model="claude-sonnet-4-6", agent=None,
|
||||
)
|
||||
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||
|
||||
def test_fast_command_hidden_for_non_claude_non_openai(self):
|
||||
"""Non-Claude, non-OpenAI models should not expose /fast."""
|
||||
cli_mod = _import_cli()
|
||||
stub = SimpleNamespace(
|
||||
provider="gemini", requested_provider="gemini",
|
||||
model="gemini-3-pro-preview", agent=None,
|
||||
)
|
||||
assert cli_mod.HermesCLI._fast_command_available(stub) is False
|
||||
|
||||
def test_turn_route_injects_speed_for_anthropic(self):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user