fix(cli): /model picker honors provider-specific context caps (#16030)

`_apply_model_switch_result` (the interactive `/model` picker's confirmation path) printed `ModelInfo.context_window` straight from models.dev, which reports the vendor-wide value (1.05M for gpt-5.5 on openai). ChatGPT Codex OAuth caps the same slug at 272K, so the picker showed 1M while the runtime (compressor, gateway `/model`, typed `/model <name>`) correctly used 272K — the classic 'sometimes 1M, sometimes 272K' mismatch on a single model. Both display paths now go through `resolve_display_context_length()`, matching the fix that `_handle_model_switch` received earlier. Also bump the stale last-resort fallback in DEFAULT_CONTEXT_LENGTHS (`gpt-5.5: 400000 -> 1050000`) to match the real OpenAI API value; the 272K Codex cap is already enforced via the Codex-OAuth branch, so the fallback now reflects what every non-Codex probe-miss should see. Tests: adds `test_apply_model_switch_result_context.py` with three scenarios (Codex cap wins, OpenRouter shows 1.05M, resolver-empty falls back to ModelInfo). Updates the existing non-Codex fallback test to assert 1.05M (the correct value). ## Validation | path | before | after | |-------------------------------|-----------|-----------| | picker -> gpt-5.5 on Codex | 1,050,000 | 272,000 | | picker -> gpt-5.5 on OpenAI | 1,050,000 | 1,050,000 | | picker -> gpt-5.5 on OpenRouter | 1,050,000 | 1,050,000 | | typed /model gpt-5.5 on Codex | 272,000 | 272,000 |
2026-04-26 05:43:31 -07:00 · 2026-04-26 05:43:31 -07:00 · 438db0c7b0
commit 438db0c7b0
parent 2ccdadcca6
4 changed files with 177 additions and 20 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -145,10 +145,11 @@ DEFAULT_CONTEXT_LENGTHS = {
    "claude": 200000,
    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
    # Source: https://developers.openai.com/api/docs/models
-    # GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
-    # can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
-    # Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
-    "gpt-5.5": 400000,
+    # GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
+    # ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
+    # provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
+    # This hardcoded value is only reached when every probe misses.
+    "gpt-5.5": 1050000,
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
--- a/cli.py
+++ b/cli.py
@ -5153,27 +5153,29 @@ class HermesCLI:
        _cprint(f"  ✓ Model switched: {result.new_model}")
        _cprint(f"    Provider: {provider_label}")

+        # Context: always resolve via the provider-aware chain so Codex OAuth,
+        # Copilot, and Nous-enforced caps win over the raw models.dev entry
+        # (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
        mi = result.model_info
+        try:
+            from hermes_cli.model_switch import resolve_display_context_length
+            ctx = resolve_display_context_length(
+                result.new_model,
+                result.target_provider,
+                base_url=result.base_url or self.base_url or "",
+                api_key=result.api_key or self.api_key or "",
+                model_info=mi,
+            )
+            if ctx:
+                _cprint(f"    Context: {ctx:,} tokens")
+        except Exception:
+            pass
        if mi:
-            if mi.context_window:
-                _cprint(f"    Context: {mi.context_window:,} tokens")
            if mi.max_output:
                _cprint(f"    Max output: {mi.max_output:,} tokens")
            if mi.has_cost_data():
                _cprint(f"    Cost: {mi.format_cost()}")
            _cprint(f"    Capabilities: {mi.format_capabilities()}")
-        else:
-            try:
-                from agent.model_metadata import get_model_context_length
-                ctx = get_model_context_length(
-                    result.new_model,
-                    base_url=result.base_url or self.base_url,
-                    api_key=result.api_key or self.api_key,
-                    provider=result.target_provider,
-                )
-                _cprint(f"    Context: {ctx:,} tokens")
-            except Exception:
-                pass

        cache_enabled = (
            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@ -340,7 +340,9 @@ class TestCodexOAuthContextLength:
        from agent.model_metadata import get_model_context_length

        # OpenRouter — should hit its own catalog path first; when mocked
-        # empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k).
+        # empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (1.05M,
+        # matching the real direct-API value — Codex OAuth's 272k cap is
+        # provider-specific and must not leak here).
        with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
             patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
             patch("agent.model_metadata.get_cached_context_length", return_value=None), \
@ -351,7 +353,7 @@ class TestCodexOAuthContextLength:
                api_key="",
                provider="openrouter",
            )
-        assert ctx == 400_000, (
+        assert ctx == 1_050_000, (
            f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override "
            "leaked outside openai-codex provider"
        )
--- a/tests/hermes_cli/test_apply_model_switch_result_context.py
+++ b/tests/hermes_cli/test_apply_model_switch_result_context.py
@ -0,0 +1,152 @@
+"""Regression test for the `/model` picker confirmation display.
+
+Bug (April 2026): after choosing a model from the interactive `/model` picker,
+``HermesCLI._apply_model_switch_result()`` printed ``ModelInfo.context_window``
+straight from models.dev, which always reports the vendor-wide value (e.g.
+gpt-5.5 = 1,050,000 on ``openai``). That ignored provider-specific caps — in
+particular, ChatGPT Codex OAuth enforces 272K on the same slug. The sibling
+``_handle_model_switch()`` (typed ``/model <name>``) was already fixed to use
+``resolve_display_context_length()``; the picker path was missed, causing
+"sometimes 1M, sometimes 272K" for the same model across sibling UI paths.
+
+Fix: both display paths now go through ``resolve_display_context_length()``.
+"""
+from __future__ import annotations
+
+from unittest.mock import patch
+
+from hermes_cli.model_switch import ModelSwitchResult
+
+
+class _FakeModelInfo:
+    context_window = 1_050_000
+    max_output = 0
+
+    def has_cost_data(self):
+        return False
+
+    def format_capabilities(self):
+        return ""
+
+
+class _StubCLI:
+    """Minimum attrs ``_apply_model_switch_result`` reads on ``self``."""
+    agent = None
+    model = ""
+    provider = ""
+    requested_provider = ""
+    api_key = ""
+    _explicit_api_key = ""
+    base_url = ""
+    _explicit_base_url = ""
+    api_mode = ""
+    _pending_model_switch_note = ""
+
+
+def _run_display(monkeypatch, result):
+    import cli as cli_mod
+
+    captured: list[str] = []
+    monkeypatch.setattr(cli_mod, "_cprint", lambda s, *a, **k: captured.append(str(s)))
+    # Avoid writing to ~/.hermes/config.yaml during the test.
+    monkeypatch.setattr(cli_mod, "save_config_value", lambda *a, **k: None)
+    cli_mod.HermesCLI._apply_model_switch_result(_StubCLI(), result, False)
+    return captured
+
+
+def test_picker_path_uses_provider_aware_context_on_codex(monkeypatch):
+    """``_apply_model_switch_result`` must prefer the provider-aware resolver
+    (272K on Codex) over the raw models.dev value (1.05M for gpt-5.5).
+    """
+    result = ModelSwitchResult(
+        success=True,
+        new_model="gpt-5.5",
+        target_provider="openai-codex",
+        provider_changed=True,
+        api_key="",
+        base_url="https://chatgpt.com/backend-api/codex",
+        api_mode="codex_responses",
+        warning_message="",
+        provider_label="ChatGPT Codex",
+        resolved_via_alias=False,
+        capabilities=None,
+        model_info=_FakeModelInfo(),  # models.dev says 1.05M
+        is_global=False,
+    )
+    with patch(
+        "agent.model_metadata.get_model_context_length",
+        return_value=272_000,
+    ):
+        lines = _run_display(monkeypatch, result)
+
+    ctx_line = next((l for l in lines if "Context:" in l), "")
+    assert "272,000" in ctx_line, (
+        f"picker-path display must show Codex's 272K cap, got: {ctx_line!r}"
+    )
+    assert "1,050,000" not in ctx_line, (
+        f"picker-path display leaked models.dev's 1.05M for Codex: {ctx_line!r}"
+    )
+
+
+def test_picker_path_shows_vendor_value_when_no_provider_cap(monkeypatch):
+    """On providers with no enforced cap (e.g. OpenRouter), the picker path
+    should surface the real 1.05M context for gpt-5.5 — resolver and models.dev
+    agree here.
+    """
+    result = ModelSwitchResult(
+        success=True,
+        new_model="openai/gpt-5.5",
+        target_provider="openrouter",
+        provider_changed=True,
+        api_key="",
+        base_url="https://openrouter.ai/api/v1",
+        api_mode="chat_completions",
+        warning_message="",
+        provider_label="OpenRouter",
+        resolved_via_alias=False,
+        capabilities=None,
+        model_info=_FakeModelInfo(),
+        is_global=False,
+    )
+    with patch(
+        "agent.model_metadata.get_model_context_length",
+        return_value=1_050_000,
+    ):
+        lines = _run_display(monkeypatch, result)
+
+    ctx_line = next((l for l in lines if "Context:" in l), "")
+    assert "1,050,000" in ctx_line, (
+        f"OpenRouter gpt-5.5 should show 1.05M context, got: {ctx_line!r}"
+    )
+
+
+def test_picker_path_falls_back_to_model_info_when_resolver_empty(monkeypatch):
+    """If ``get_model_context_length`` returns nothing (rare — truly unknown
+    endpoint), the display still surfaces ``ModelInfo.context_window`` so the
+    user sees *something* rather than a silent blank.
+    """
+    result = ModelSwitchResult(
+        success=True,
+        new_model="some-model",
+        target_provider="some-provider",
+        provider_changed=True,
+        api_key="",
+        base_url="",
+        api_mode="chat_completions",
+        warning_message="",
+        provider_label="Some Provider",
+        resolved_via_alias=False,
+        capabilities=None,
+        model_info=_FakeModelInfo(),  # context_window = 1_050_000
+        is_global=False,
+    )
+    with patch(
+        "agent.model_metadata.get_model_context_length",
+        return_value=None,
+    ):
+        lines = _run_display(monkeypatch, result)
+
+    ctx_line = next((l for l in lines if "Context:" in l), "")
+    assert "1,050,000" in ctx_line, (
+        f"resolver-empty path should fall back to ModelInfo, got: {ctx_line!r}"
+    )