feat(fast): broaden /fast whitelist to all OpenAI + Anthropic models (#16883)

Switch _PRIORITY_PROCESSING_MODELS and _ANTHROPIC_FAST_MODE_MODELS from hardcoded frozensets to prefix-based matching. Any gpt-*, o1*, o3*, o4* (OpenAI) and any claude-* (Anthropic) now exposes /fast. Fixes the case where gpt-5.5 and other post-catalog models silently skipped Priority Processing because they weren't in the frozenset. Future OpenAI/Anthropic releases will work without a catalog bump. Safety: - Codex-series (*codex*) still excluded — they route through the Codex Responses API which doesn't take service_tier. - Anthropic adapter already gates speed=fast on native endpoints only (_is_third_party_anthropic_endpoint), so claude-sonnet-4.6 on OpenRouter/Bedrock/opencode-zen won't leak the unknown beta. - service_tier=priority is silently dropped by non-OpenAI proxies, so false positives are harmless.
2026-04-28 00:44:43 -07:00 · 2026-04-28 00:44:43 -07:00 · 8269f9056c
commit 8269f9056c
parent 6ce796b495
2 changed files with 99 additions and 41 deletions
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -1623,31 +1623,41 @@ def provider_label(provider: Optional[str]) -> str:

 # Models that support OpenAI Priority Processing (service_tier="priority").
 # See https://openai.com/api-priority-processing/ for the canonical list.
-# Only the bare model slug is stored (no vendor prefix).
-_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
-    "gpt-5.4",
-    "gpt-5.4-mini",
-    "gpt-5.2",
-    "gpt-5.1",
-    "gpt-5",
-    "gpt-5-mini",
-    "gpt-4.1",
-    "gpt-4.1-mini",
-    "gpt-4.1-nano",
-    "gpt-4o",
-    "gpt-4o-mini",
+#
+# Pattern-based matching — any OpenAI flagship model (gpt-*, o1*, o3*, o4*)
+# is assumed to support Priority Processing. service_tier=priority is silently
+# ignored by non-OpenAI endpoints (OpenRouter/Copilot/opencode-zen proxies
+# strip the field), so false positives are harmless. Codex-series models
+# (gpt-5-codex, gpt-5.3-codex, etc.) are excluded — they don't expose the
+# service_tier parameter through the Codex Responses API.
+_OPENAI_FAST_MODE_PREFIXES: tuple[str, ...] = (
+    "gpt-",
+    "o1",
    "o3",
-    "o4-mini",
-})
+    "o4",
+)
+
+
+def _is_openai_fast_model(model_id: Optional[str]) -> bool:
+    """Return True if the model is an OpenAI flagship eligible for Priority Processing."""
+    raw = _strip_vendor_prefix(str(model_id or ""))
+    base = raw.split(":")[0]
+    if not base:
+        return False
+    # Exclude Codex-series — they route through the Codex Responses API
+    # which doesn't accept service_tier.
+    if "codex" in base:
+        return False
+    return any(base.startswith(prefix) for prefix in _OPENAI_FAST_MODE_PREFIXES)
+

 # Models that support Anthropic Fast Mode (speed="fast").
 # See https://platform.claude.com/docs/en/build-with-claude/fast-mode
-# Currently only Claude Opus 4.6.  Both hyphen and dot variants are stored
-# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
-_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
-    "claude-opus-4-6",
-    "claude-opus-4.6",
-})
+#
+# Pattern-based matching — any claude-* model is eligible. The anthropic
+# adapter gates speed=fast on native Anthropic endpoints only (see
+# _is_third_party_anthropic_endpoint in agent/anthropic_adapter.py), so
+# third-party proxies that would reject the beta header are protected.


 def _strip_vendor_prefix(model_id: str) -> str:
@ -1660,20 +1670,14 @@ def _strip_vendor_prefix(model_id: str) -> str:

 def model_supports_fast_mode(model_id: Optional[str]) -> bool:
    """Return whether Hermes should expose the /fast toggle for this model."""
-    raw = _strip_vendor_prefix(str(model_id or ""))
-    if raw in _PRIORITY_PROCESSING_MODELS:
-        return True
-    # Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
-    # and OpenRouter variant tags (:fast, :beta) for matching.
-    base = raw.split(":")[0]
-    return base in _ANTHROPIC_FAST_MODE_MODELS
+    return _is_anthropic_fast_model(model_id) or _is_openai_fast_model(model_id)


 def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
-    """Return True if the model supports Anthropic's fast mode (speed='fast')."""
+    """Return True if the model is a Claude model eligible for Anthropic Fast Mode."""
    raw = _strip_vendor_prefix(str(model_id or ""))
    base = raw.split(":")[0]
-    return base in _ANTHROPIC_FAST_MODE_MODELS
+    return base.startswith("claude-")


 def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
--- a/tests/cli/test_fast_command.py
+++ b/tests/cli/test_fast_command.py
@ -114,17 +114,38 @@ class TestPriorityProcessingModels(unittest.TestCase):
    def test_all_documented_models_supported(self):
        from hermes_cli.models import model_supports_fast_mode

-        # All models from OpenAI's Priority Processing pricing table
+        # All OpenAI flagship models support Priority Processing — including
+        # future releases (gpt-5.5, 5.6...) via pattern matching.
        supported = [
+            "gpt-5.5", "gpt-5.5-mini",
            "gpt-5.4", "gpt-5.4-mini", "gpt-5.2",
            "gpt-5.1", "gpt-5", "gpt-5-mini",
            "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano",
            "gpt-4o", "gpt-4o-mini",
-            "o3", "o4-mini",
+            "o1", "o1-mini", "o3", "o3-mini", "o4-mini",
        ]
        for model in supported:
            assert model_supports_fast_mode(model), f"{model} should support fast mode"

+    def test_all_anthropic_models_supported(self):
+        from hermes_cli.models import model_supports_fast_mode
+
+        # All Claude models support Anthropic Fast Mode — Opus, Sonnet, Haiku.
+        supported = [
+            "claude-opus-4-7", "claude-opus-4-6", "claude-opus-4.6",
+            "claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4",
+            "claude-haiku-4-5", "claude-3-5-haiku",
+        ]
+        for model in supported:
+            assert model_supports_fast_mode(model), f"{model} should support fast mode"
+
+    def test_codex_models_excluded(self):
+        """Codex models route through Responses API and don't accept service_tier."""
+        from hermes_cli.models import model_supports_fast_mode
+
+        for model in ["gpt-5-codex", "gpt-5.2-codex", "gpt-5.3-codex", "gpt-5.1-codex-max"]:
+            assert not model_supports_fast_mode(model), f"{model} is codex — should not expose /fast"
+
    def test_vendor_prefix_stripped(self):
        from hermes_cli.models import model_supports_fast_mode

@ -135,8 +156,15 @@ class TestPriorityProcessingModels(unittest.TestCase):
    def test_non_priority_models_rejected(self):
        from hermes_cli.models import model_supports_fast_mode

+        # Codex-series models route through the Codex Responses API and
+        # don't accept service_tier, so they're excluded.
        assert model_supports_fast_mode("gpt-5.3-codex") is False
-        assert model_supports_fast_mode("claude-sonnet-4") is False
+        assert model_supports_fast_mode("gpt-5.2-codex") is False
+        assert model_supports_fast_mode("gpt-5-codex") is False
+        # Non-OpenAI, non-Anthropic models
+        assert model_supports_fast_mode("gemini-3-pro-preview") is False
+        assert model_supports_fast_mode("kimi-k2-thinking") is False
+        assert model_supports_fast_mode("deepseek-chat") is False
        assert model_supports_fast_mode("") is False
        assert model_supports_fast_mode(None) is False

@ -153,7 +181,8 @@ class TestPriorityProcessingModels(unittest.TestCase):
        from hermes_cli.models import resolve_fast_mode_overrides

        assert resolve_fast_mode_overrides("gpt-5.3-codex") is None
-        assert resolve_fast_mode_overrides("claude-sonnet-4") is None
+        assert resolve_fast_mode_overrides("gemini-3-pro-preview") is None
+        assert resolve_fast_mode_overrides("kimi-k2-thinking") is None


 class TestFastModeRouting(unittest.TestCase):
@ -228,13 +257,26 @@ class TestAnthropicFastMode(unittest.TestCase):
        assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True
        assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True

-    def test_anthropic_non_opus_rejected(self):
+    def test_anthropic_all_claude_models_supported(self):
        from hermes_cli.models import model_supports_fast_mode

-        assert model_supports_fast_mode("claude-sonnet-4-6") is False
-        assert model_supports_fast_mode("claude-sonnet-4.6") is False
-        assert model_supports_fast_mode("claude-haiku-4-5") is False
-        assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False
+        # All Claude models support fast mode — Opus, Sonnet, Haiku.
+        # The anthropic adapter gates speed=fast on native Anthropic
+        # endpoints only, so third-party proxies that reject the beta
+        # are protected downstream (see _is_third_party_anthropic_endpoint).
+        assert model_supports_fast_mode("claude-sonnet-4-6") is True
+        assert model_supports_fast_mode("claude-sonnet-4.6") is True
+        assert model_supports_fast_mode("claude-haiku-4-5") is True
+        assert model_supports_fast_mode("claude-opus-4-7") is True
+        assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is True
+
+    def test_non_claude_models_not_anthropic_fast(self):
+        """Non-Claude models should not be treated as Anthropic fast-mode."""
+        from hermes_cli.models import _is_anthropic_fast_model
+
+        assert _is_anthropic_fast_model("gpt-5.4") is False
+        assert _is_anthropic_fast_model("gemini-3-pro") is False
+        assert _is_anthropic_fast_model("kimi-k2-thinking") is False

    def test_anthropic_variant_tags_stripped(self):
        from hermes_cli.models import model_supports_fast_mode
@ -264,9 +306,11 @@ class TestAnthropicFastMode(unittest.TestCase):

        assert _is_anthropic_fast_model("claude-opus-4-6") is True
        assert _is_anthropic_fast_model("claude-opus-4.6") is True
+        assert _is_anthropic_fast_model("claude-sonnet-4-6") is True
+        assert _is_anthropic_fast_model("claude-haiku-4-5") is True
        assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True
        assert _is_anthropic_fast_model("gpt-5.4") is False
-        assert _is_anthropic_fast_model("claude-sonnet-4-6") is False
+        assert _is_anthropic_fast_model("") is False

    def test_fast_command_exposed_for_anthropic_model(self):
        cli_mod = _import_cli()
@ -276,12 +320,22 @@ class TestAnthropicFastMode(unittest.TestCase):
        )
        assert cli_mod.HermesCLI._fast_command_available(stub) is True

-    def test_fast_command_hidden_for_anthropic_sonnet(self):
+    def test_fast_command_exposed_for_anthropic_sonnet(self):
+        """Sonnet now supports Anthropic Fast Mode — the adapter gates on base_url."""
        cli_mod = _import_cli()
        stub = SimpleNamespace(
            provider="anthropic", requested_provider="anthropic",
            model="claude-sonnet-4-6", agent=None,
        )
+        assert cli_mod.HermesCLI._fast_command_available(stub) is True
+
+    def test_fast_command_hidden_for_non_claude_non_openai(self):
+        """Non-Claude, non-OpenAI models should not expose /fast."""
+        cli_mod = _import_cli()
+        stub = SimpleNamespace(
+            provider="gemini", requested_provider="gemini",
+            model="gemini-3-pro-preview", agent=None,
+        )
        assert cli_mod.HermesCLI._fast_command_available(stub) is False

    def test_turn_route_injects_speed_for_anthropic(self):