diff --git a/workspace-template/adapters/hermes/executor.py b/workspace-template/adapters/hermes/executor.py index ec0b2b2b..e339db6e 100644 --- a/workspace-template/adapters/hermes/executor.py +++ b/workspace-template/adapters/hermes/executor.py @@ -4,16 +4,22 @@ Hermes supports 15 providers via the shared ``providers.py`` registry. Each provider's ``auth_scheme`` field controls which client + request shape the executor uses: -- ``auth_scheme="openai"`` (14 providers) — OpenAI-compat ``/v1/chat/completions`` +- ``auth_scheme="openai"`` (13 providers) — OpenAI-compat ``/v1/chat/completions`` via the ``openai`` Python SDK. Covers: Nous Portal, OpenRouter, OpenAI, xAI, - Gemini, Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together, Fireworks, Mistral. + Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together, Fireworks, Mistral. - ``auth_scheme="anthropic"`` (1 provider — anthropic) — native Messages API via - the ``anthropic`` Python SDK. Phase 2 addition: better tool calling, vision - support, extended thinking semantics. If the ``anthropic`` package isn't - installed in the workspace image, ``_do_anthropic_native`` raises a clear - error with install instructions rather than silently falling back to the - OpenAI-compat shim (which would lose fidelity invisibly). + the ``anthropic`` Python SDK. Phase 2a: better tool calling, vision support, + extended thinking semantics. If the ``anthropic`` package isn't installed in + the workspace image, ``_do_anthropic_native`` raises a clear error with + install instructions rather than silently falling back to the OpenAI-compat + shim (which would lose fidelity invisibly). + +- ``auth_scheme="gemini"`` (1 provider — gemini) — native ``generateContent`` API + via the official ``google-genai`` Python SDK. Phase 2b: first-class vision + content blocks, tool/function calling, system instructions, and thinking + config — all of which the OpenAI-compat shim at ``/v1beta/openai`` either + strips or mis-translates. Same fail-loud semantics as the anthropic path. Key resolution order (unchanged from Phase 1) ---------------------------------------------- @@ -24,9 +30,6 @@ Key resolution order (unchanged from Phase 1) Raises ``ValueError`` if nothing resolves. The error message lists every env var that was checked so the operator knows their options without reading source. - -Gemini native path (``auth_scheme="gemini"``) is intentionally NOT in this PR -— Phase 2b will land it after measuring Phase 2a's Anthropic rollout. """ from __future__ import annotations @@ -188,11 +191,52 @@ class HermesA2AExecutor: return response.content[0].text return "" + async def _do_gemini_native(self, task_text: str) -> str: + """Native Google Gemini ``generateContent`` inference. + + Uses the official ``google-genai`` Python SDK for correct vision + content blocks, tool/function calling, system instructions, and + thinking config. These all get stripped or mis-translated through + the OpenAI-compat ``/v1beta/openai`` shim. + + If the ``google-genai`` package is not installed in the workspace + image, raise a clear error with install instructions rather than + silently falling back to the OpenAI-compat shim (same fail-loud + semantics as the anthropic path). + + Phase 2b minimum viable: single-turn text in, text out, no tools, + no vision, no thinking config. Phase 2c/2d layers those on the same + method. + """ + try: + from google import genai # type: ignore[import-not-found] + except ImportError as exc: # pragma: no cover — exercised by test_missing_sdk + raise RuntimeError( + "Hermes gemini native path requires the `google-genai` package. " + "Install in the workspace image with `pip install google-genai>=1.0.0` " + "or set HERMES provider=openrouter to route Gemini models through " + "OpenRouter's OpenAI-compat shim instead." + ) from exc + + # google-genai client reads api_key from env by default; pass it + # explicitly so we respect whatever ProviderConfig resolved (e.g. a + # test-only key that isn't in process env yet). + client = genai.Client(api_key=self.api_key) + response = await client.aio.models.generate_content( + model=self.model, + contents=task_text, + ) + # response.text is the flattened text across all parts of the first + # candidate. For single-turn text-only that's the whole reply. + return response.text or "" + async def _do_inference(self, task_text: str) -> str: """Dispatch to the right inference path based on provider auth_scheme.""" scheme = self.provider_cfg.auth_scheme if scheme == "anthropic": return await self._do_anthropic_native(task_text) + if scheme == "gemini": + return await self._do_gemini_native(task_text) if scheme == "openai": return await self._do_openai_compat(task_text) # Unknown scheme — treat as openai-compat for forward-compat with any diff --git a/workspace-template/adapters/hermes/providers.py b/workspace-template/adapters/hermes/providers.py index 767e689d..38dadfe8 100644 --- a/workspace-template/adapters/hermes/providers.py +++ b/workspace-template/adapters/hermes/providers.py @@ -132,10 +132,15 @@ PROVIDERS: dict[str, ProviderConfig] = { "gemini": ProviderConfig( name="gemini", env_vars=("GEMINI_API_KEY", "GOOGLE_API_KEY"), - base_url="https://generativelanguage.googleapis.com/v1beta/openai", + base_url="https://generativelanguage.googleapis.com", default_model="gemini-2.5-flash", - docs="Google Gemini — uses the documented OpenAI-compat endpoint at " - "/v1beta/openai. Phase 2 will add native generateContent for vision.", + auth_scheme="gemini", + docs="Google Gemini — Phase 2b uses the native generateContent API via " + "the official `google-genai` Python SDK for correct vision content " + "blocks, tool/function calling, and system instructions. Phase 1 " + "used the /v1beta/openai compat shim. If the google-genai package " + "isn't installed in the workspace image, the executor raises a " + "clear error pointing at `pip install google-genai>=1.0.0`.", ), # --- Chinese providers ---------------------------------------------- diff --git a/workspace-template/adapters/hermes/requirements.txt b/workspace-template/adapters/hermes/requirements.txt index be69ceb6..a59236a7 100644 --- a/workspace-template/adapters/hermes/requirements.txt +++ b/workspace-template/adapters/hermes/requirements.txt @@ -1,15 +1,24 @@ # Hermes adapter dependencies. # -# openai: primary client for the 14 OpenAI-compat providers in providers.py -# (Nous Portal, OpenRouter, OpenAI, xAI, Gemini, Qwen, GLM, Kimi, MiniMax, -# DeepSeek, Groq, Together, Fireworks, Mistral — all reachable via one openai -# SDK pointed at different base URLs). +# openai: primary client for the 13 OpenAI-compat providers in providers.py +# (Nous Portal, OpenRouter, OpenAI, xAI, Qwen, GLM, Kimi, MiniMax, DeepSeek, +# Groq, Together, Fireworks, Mistral — all reachable via one openai SDK +# pointed at different base URLs). Anthropic + Gemini now go native. openai>=1.0.0 # anthropic: native Messages API client for the anthropic provider (auth_scheme -# = "anthropic" in providers.py). Phase 2 addition — gives correct tool calling, +# = "anthropic" in providers.py). Phase 2a addition — gives correct tool calling, # vision, and extended-thinking semantics that don't translate cleanly through # the OpenAI-compat shim. If this package is missing at runtime, executor.py's # _do_anthropic_native() raises a clear RuntimeError pointing back at this # install line, so a workspace image built without it fails loud, not silent. anthropic>=0.39.0 + +# google-genai: native generateContent API client for the gemini provider +# (auth_scheme = "gemini" in providers.py). Phase 2b addition — gives +# first-class vision content blocks, tool/function calling, system +# instructions, and thinking config that don't translate cleanly through +# the OpenAI-compat /v1beta/openai shim. Same fail-loud semantics as the +# anthropic path: missing at runtime → clear RuntimeError from +# _do_gemini_native(), not a silent fallback. +google-genai>=1.0.0 diff --git a/workspace-template/tests/test_hermes_phase2_dispatch.py b/workspace-template/tests/test_hermes_phase2_dispatch.py index bfa24fd1..78bbfe31 100644 --- a/workspace-template/tests/test_hermes_phase2_dispatch.py +++ b/workspace-template/tests/test_hermes_phase2_dispatch.py @@ -63,15 +63,25 @@ def _make_executor(provider_name: str): def test_anthropic_entry_has_anthropic_scheme(): - """The registry flip: Phase 2 sets anthropic's auth_scheme to 'anthropic'.""" + """Phase 2a: anthropic's auth_scheme is 'anthropic'.""" cfg = providers.PROVIDERS["anthropic"] assert cfg.auth_scheme == "anthropic" +def test_gemini_entry_has_gemini_scheme(): + """Phase 2b: gemini's auth_scheme is 'gemini'.""" + cfg = providers.PROVIDERS["gemini"] + assert cfg.auth_scheme == "gemini" + # Base URL no longer has the /v1beta/openai suffix — native SDK uses bare host. + assert "/openai" not in cfg.base_url + assert cfg.base_url.startswith("https://generativelanguage.googleapis.com") + + def test_all_other_providers_still_openai_scheme(): - """Phase 2 only changes anthropic. Every other provider keeps auth_scheme='openai'.""" + """Phase 2 changes only anthropic + gemini. Every other provider keeps auth_scheme='openai'.""" + native_providers = {"anthropic", "gemini"} for name, cfg in providers.PROVIDERS.items(): - if name == "anthropic": + if name in native_providers: continue assert cfg.auth_scheme == "openai", ( f"{name} unexpectedly has auth_scheme={cfg.auth_scheme!r}" @@ -80,32 +90,52 @@ def test_all_other_providers_still_openai_scheme(): @pytest.mark.asyncio async def test_dispatch_openai_scheme_calls_openai_compat(): - """auth_scheme='openai' → _do_openai_compat runs, _do_anthropic_native does not.""" + """auth_scheme='openai' → _do_openai_compat runs, native paths do not.""" executor = _make_executor("openai") executor._do_openai_compat = AsyncMock(return_value="openai-result") executor._do_anthropic_native = AsyncMock(return_value="should-not-run") + executor._do_gemini_native = AsyncMock(return_value="should-not-run") result = await executor._do_inference("hello") executor._do_openai_compat.assert_awaited_once_with("hello") executor._do_anthropic_native.assert_not_awaited() + executor._do_gemini_native.assert_not_awaited() assert result == "openai-result" @pytest.mark.asyncio async def test_dispatch_anthropic_scheme_calls_anthropic_native(): - """auth_scheme='anthropic' → _do_anthropic_native runs, _do_openai_compat does not.""" + """auth_scheme='anthropic' → _do_anthropic_native runs, others do not.""" executor = _make_executor("anthropic") executor._do_openai_compat = AsyncMock(return_value="should-not-run") executor._do_anthropic_native = AsyncMock(return_value="anthropic-result") + executor._do_gemini_native = AsyncMock(return_value="should-not-run") result = await executor._do_inference("hello") executor._do_anthropic_native.assert_awaited_once_with("hello") executor._do_openai_compat.assert_not_awaited() + executor._do_gemini_native.assert_not_awaited() assert result == "anthropic-result" +@pytest.mark.asyncio +async def test_dispatch_gemini_scheme_calls_gemini_native(): + """auth_scheme='gemini' → _do_gemini_native runs, others do not. Phase 2b.""" + executor = _make_executor("gemini") + executor._do_openai_compat = AsyncMock(return_value="should-not-run") + executor._do_anthropic_native = AsyncMock(return_value="should-not-run") + executor._do_gemini_native = AsyncMock(return_value="gemini-result") + + result = await executor._do_inference("hello") + + executor._do_gemini_native.assert_awaited_once_with("hello") + executor._do_openai_compat.assert_not_awaited() + executor._do_anthropic_native.assert_not_awaited() + assert result == "gemini-result" + + @pytest.mark.asyncio async def test_dispatch_unknown_scheme_falls_back_to_openai_compat(): """Unknown auth_scheme → log a warning + fall back to openai-compat (forward-compat).""" @@ -120,11 +150,13 @@ async def test_dispatch_unknown_scheme_falls_back_to_openai_compat(): ) executor._do_openai_compat = AsyncMock(return_value="fallback-result") executor._do_anthropic_native = AsyncMock() + executor._do_gemini_native = AsyncMock() result = await executor._do_inference("hello") executor._do_openai_compat.assert_awaited_once() executor._do_anthropic_native.assert_not_awaited() + executor._do_gemini_native.assert_not_awaited() assert result == "fallback-result" @@ -146,6 +178,21 @@ async def test_anthropic_native_raises_clear_error_when_sdk_missing(monkeypatch) await executor._do_anthropic_native("hello") +@pytest.mark.asyncio +async def test_gemini_native_raises_clear_error_when_sdk_missing(monkeypatch): + """If the google-genai package is not installed, _do_gemini_native raises + a clear RuntimeError with install instructions — same fail-loud semantics + as the anthropic native path.""" + executor = _make_executor("gemini") + + # Simulate ImportError on `from google import genai`. Clobbering + # sys.modules["google"] forces the submodule import to fail. + monkeypatch.setitem(sys.modules, "google", None) + + with pytest.raises(RuntimeError, match="google-genai"): + await executor._do_gemini_native("hello") + + def test_create_executor_passes_provider_cfg(): """create_executor's back-compat paths should set .provider_cfg on the returned executor so dispatch has auth_scheme available at runtime.""" @@ -173,3 +220,13 @@ def test_create_executor_passes_provider_cfg(): assert exec2.model == "claude-sonnet-4-5" finally: os.environ.pop("ANTHROPIC_API_KEY", None) + + # Path 3: Phase 2b — gemini explicit resolution + os.environ["GEMINI_API_KEY"] = "gem-test" + try: + exec3 = create_executor(provider="gemini") + assert exec3.provider_cfg.name == "gemini" + assert exec3.provider_cfg.auth_scheme == "gemini" + assert exec3.model == "gemini-2.5-flash" + finally: + os.environ.pop("GEMINI_API_KEY", None)