Merge pull request #255 from Molecule-AI/feat/hermes-phase2b-gemini-native
feat(hermes): Phase 2b — native Google Gemini generateContent dispatch path
This commit is contained in:
commit
825b8a227f
@ -4,16 +4,22 @@ Hermes supports 15 providers via the shared ``providers.py`` registry. Each
|
||||
provider's ``auth_scheme`` field controls which client + request shape the
|
||||
executor uses:
|
||||
|
||||
- ``auth_scheme="openai"`` (14 providers) — OpenAI-compat ``/v1/chat/completions``
|
||||
- ``auth_scheme="openai"`` (13 providers) — OpenAI-compat ``/v1/chat/completions``
|
||||
via the ``openai`` Python SDK. Covers: Nous Portal, OpenRouter, OpenAI, xAI,
|
||||
Gemini, Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together, Fireworks, Mistral.
|
||||
Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together, Fireworks, Mistral.
|
||||
|
||||
- ``auth_scheme="anthropic"`` (1 provider — anthropic) — native Messages API via
|
||||
the ``anthropic`` Python SDK. Phase 2 addition: better tool calling, vision
|
||||
support, extended thinking semantics. If the ``anthropic`` package isn't
|
||||
installed in the workspace image, ``_do_anthropic_native`` raises a clear
|
||||
error with install instructions rather than silently falling back to the
|
||||
OpenAI-compat shim (which would lose fidelity invisibly).
|
||||
the ``anthropic`` Python SDK. Phase 2a: better tool calling, vision support,
|
||||
extended thinking semantics. If the ``anthropic`` package isn't installed in
|
||||
the workspace image, ``_do_anthropic_native`` raises a clear error with
|
||||
install instructions rather than silently falling back to the OpenAI-compat
|
||||
shim (which would lose fidelity invisibly).
|
||||
|
||||
- ``auth_scheme="gemini"`` (1 provider — gemini) — native ``generateContent`` API
|
||||
via the official ``google-genai`` Python SDK. Phase 2b: first-class vision
|
||||
content blocks, tool/function calling, system instructions, and thinking
|
||||
config — all of which the OpenAI-compat shim at ``/v1beta/openai`` either
|
||||
strips or mis-translates. Same fail-loud semantics as the anthropic path.
|
||||
|
||||
Key resolution order (unchanged from Phase 1)
|
||||
----------------------------------------------
|
||||
@ -24,9 +30,6 @@ Key resolution order (unchanged from Phase 1)
|
||||
|
||||
Raises ``ValueError`` if nothing resolves. The error message lists every env var
|
||||
that was checked so the operator knows their options without reading source.
|
||||
|
||||
Gemini native path (``auth_scheme="gemini"``) is intentionally NOT in this PR
|
||||
— Phase 2b will land it after measuring Phase 2a's Anthropic rollout.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@ -188,11 +191,52 @@ class HermesA2AExecutor:
|
||||
return response.content[0].text
|
||||
return ""
|
||||
|
||||
async def _do_gemini_native(self, task_text: str) -> str:
|
||||
"""Native Google Gemini ``generateContent`` inference.
|
||||
|
||||
Uses the official ``google-genai`` Python SDK for correct vision
|
||||
content blocks, tool/function calling, system instructions, and
|
||||
thinking config. These all get stripped or mis-translated through
|
||||
the OpenAI-compat ``/v1beta/openai`` shim.
|
||||
|
||||
If the ``google-genai`` package is not installed in the workspace
|
||||
image, raise a clear error with install instructions rather than
|
||||
silently falling back to the OpenAI-compat shim (same fail-loud
|
||||
semantics as the anthropic path).
|
||||
|
||||
Phase 2b minimum viable: single-turn text in, text out, no tools,
|
||||
no vision, no thinking config. Phase 2c/2d layers those on the same
|
||||
method.
|
||||
"""
|
||||
try:
|
||||
from google import genai # type: ignore[import-not-found]
|
||||
except ImportError as exc: # pragma: no cover — exercised by test_missing_sdk
|
||||
raise RuntimeError(
|
||||
"Hermes gemini native path requires the `google-genai` package. "
|
||||
"Install in the workspace image with `pip install google-genai>=1.0.0` "
|
||||
"or set HERMES provider=openrouter to route Gemini models through "
|
||||
"OpenRouter's OpenAI-compat shim instead."
|
||||
) from exc
|
||||
|
||||
# google-genai client reads api_key from env by default; pass it
|
||||
# explicitly so we respect whatever ProviderConfig resolved (e.g. a
|
||||
# test-only key that isn't in process env yet).
|
||||
client = genai.Client(api_key=self.api_key)
|
||||
response = await client.aio.models.generate_content(
|
||||
model=self.model,
|
||||
contents=task_text,
|
||||
)
|
||||
# response.text is the flattened text across all parts of the first
|
||||
# candidate. For single-turn text-only that's the whole reply.
|
||||
return response.text or ""
|
||||
|
||||
async def _do_inference(self, task_text: str) -> str:
|
||||
"""Dispatch to the right inference path based on provider auth_scheme."""
|
||||
scheme = self.provider_cfg.auth_scheme
|
||||
if scheme == "anthropic":
|
||||
return await self._do_anthropic_native(task_text)
|
||||
if scheme == "gemini":
|
||||
return await self._do_gemini_native(task_text)
|
||||
if scheme == "openai":
|
||||
return await self._do_openai_compat(task_text)
|
||||
# Unknown scheme — treat as openai-compat for forward-compat with any
|
||||
|
||||
@ -132,10 +132,15 @@ PROVIDERS: dict[str, ProviderConfig] = {
|
||||
"gemini": ProviderConfig(
|
||||
name="gemini",
|
||||
env_vars=("GEMINI_API_KEY", "GOOGLE_API_KEY"),
|
||||
base_url="https://generativelanguage.googleapis.com/v1beta/openai",
|
||||
base_url="https://generativelanguage.googleapis.com",
|
||||
default_model="gemini-2.5-flash",
|
||||
docs="Google Gemini — uses the documented OpenAI-compat endpoint at "
|
||||
"/v1beta/openai. Phase 2 will add native generateContent for vision.",
|
||||
auth_scheme="gemini",
|
||||
docs="Google Gemini — Phase 2b uses the native generateContent API via "
|
||||
"the official `google-genai` Python SDK for correct vision content "
|
||||
"blocks, tool/function calling, and system instructions. Phase 1 "
|
||||
"used the /v1beta/openai compat shim. If the google-genai package "
|
||||
"isn't installed in the workspace image, the executor raises a "
|
||||
"clear error pointing at `pip install google-genai>=1.0.0`.",
|
||||
),
|
||||
|
||||
# --- Chinese providers ----------------------------------------------
|
||||
|
||||
@ -1,15 +1,24 @@
|
||||
# Hermes adapter dependencies.
|
||||
#
|
||||
# openai: primary client for the 14 OpenAI-compat providers in providers.py
|
||||
# (Nous Portal, OpenRouter, OpenAI, xAI, Gemini, Qwen, GLM, Kimi, MiniMax,
|
||||
# DeepSeek, Groq, Together, Fireworks, Mistral — all reachable via one openai
|
||||
# SDK pointed at different base URLs).
|
||||
# openai: primary client for the 13 OpenAI-compat providers in providers.py
|
||||
# (Nous Portal, OpenRouter, OpenAI, xAI, Qwen, GLM, Kimi, MiniMax, DeepSeek,
|
||||
# Groq, Together, Fireworks, Mistral — all reachable via one openai SDK
|
||||
# pointed at different base URLs). Anthropic + Gemini now go native.
|
||||
openai>=1.0.0
|
||||
|
||||
# anthropic: native Messages API client for the anthropic provider (auth_scheme
|
||||
# = "anthropic" in providers.py). Phase 2 addition — gives correct tool calling,
|
||||
# = "anthropic" in providers.py). Phase 2a addition — gives correct tool calling,
|
||||
# vision, and extended-thinking semantics that don't translate cleanly through
|
||||
# the OpenAI-compat shim. If this package is missing at runtime, executor.py's
|
||||
# _do_anthropic_native() raises a clear RuntimeError pointing back at this
|
||||
# install line, so a workspace image built without it fails loud, not silent.
|
||||
anthropic>=0.39.0
|
||||
|
||||
# google-genai: native generateContent API client for the gemini provider
|
||||
# (auth_scheme = "gemini" in providers.py). Phase 2b addition — gives
|
||||
# first-class vision content blocks, tool/function calling, system
|
||||
# instructions, and thinking config that don't translate cleanly through
|
||||
# the OpenAI-compat /v1beta/openai shim. Same fail-loud semantics as the
|
||||
# anthropic path: missing at runtime → clear RuntimeError from
|
||||
# _do_gemini_native(), not a silent fallback.
|
||||
google-genai>=1.0.0
|
||||
|
||||
@ -63,15 +63,25 @@ def _make_executor(provider_name: str):
|
||||
|
||||
|
||||
def test_anthropic_entry_has_anthropic_scheme():
|
||||
"""The registry flip: Phase 2 sets anthropic's auth_scheme to 'anthropic'."""
|
||||
"""Phase 2a: anthropic's auth_scheme is 'anthropic'."""
|
||||
cfg = providers.PROVIDERS["anthropic"]
|
||||
assert cfg.auth_scheme == "anthropic"
|
||||
|
||||
|
||||
def test_gemini_entry_has_gemini_scheme():
|
||||
"""Phase 2b: gemini's auth_scheme is 'gemini'."""
|
||||
cfg = providers.PROVIDERS["gemini"]
|
||||
assert cfg.auth_scheme == "gemini"
|
||||
# Base URL no longer has the /v1beta/openai suffix — native SDK uses bare host.
|
||||
assert "/openai" not in cfg.base_url
|
||||
assert cfg.base_url.startswith("https://generativelanguage.googleapis.com")
|
||||
|
||||
|
||||
def test_all_other_providers_still_openai_scheme():
|
||||
"""Phase 2 only changes anthropic. Every other provider keeps auth_scheme='openai'."""
|
||||
"""Phase 2 changes only anthropic + gemini. Every other provider keeps auth_scheme='openai'."""
|
||||
native_providers = {"anthropic", "gemini"}
|
||||
for name, cfg in providers.PROVIDERS.items():
|
||||
if name == "anthropic":
|
||||
if name in native_providers:
|
||||
continue
|
||||
assert cfg.auth_scheme == "openai", (
|
||||
f"{name} unexpectedly has auth_scheme={cfg.auth_scheme!r}"
|
||||
@ -80,32 +90,52 @@ def test_all_other_providers_still_openai_scheme():
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_openai_scheme_calls_openai_compat():
|
||||
"""auth_scheme='openai' → _do_openai_compat runs, _do_anthropic_native does not."""
|
||||
"""auth_scheme='openai' → _do_openai_compat runs, native paths do not."""
|
||||
executor = _make_executor("openai")
|
||||
executor._do_openai_compat = AsyncMock(return_value="openai-result")
|
||||
executor._do_anthropic_native = AsyncMock(return_value="should-not-run")
|
||||
executor._do_gemini_native = AsyncMock(return_value="should-not-run")
|
||||
|
||||
result = await executor._do_inference("hello")
|
||||
|
||||
executor._do_openai_compat.assert_awaited_once_with("hello")
|
||||
executor._do_anthropic_native.assert_not_awaited()
|
||||
executor._do_gemini_native.assert_not_awaited()
|
||||
assert result == "openai-result"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_anthropic_scheme_calls_anthropic_native():
|
||||
"""auth_scheme='anthropic' → _do_anthropic_native runs, _do_openai_compat does not."""
|
||||
"""auth_scheme='anthropic' → _do_anthropic_native runs, others do not."""
|
||||
executor = _make_executor("anthropic")
|
||||
executor._do_openai_compat = AsyncMock(return_value="should-not-run")
|
||||
executor._do_anthropic_native = AsyncMock(return_value="anthropic-result")
|
||||
executor._do_gemini_native = AsyncMock(return_value="should-not-run")
|
||||
|
||||
result = await executor._do_inference("hello")
|
||||
|
||||
executor._do_anthropic_native.assert_awaited_once_with("hello")
|
||||
executor._do_openai_compat.assert_not_awaited()
|
||||
executor._do_gemini_native.assert_not_awaited()
|
||||
assert result == "anthropic-result"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_gemini_scheme_calls_gemini_native():
|
||||
"""auth_scheme='gemini' → _do_gemini_native runs, others do not. Phase 2b."""
|
||||
executor = _make_executor("gemini")
|
||||
executor._do_openai_compat = AsyncMock(return_value="should-not-run")
|
||||
executor._do_anthropic_native = AsyncMock(return_value="should-not-run")
|
||||
executor._do_gemini_native = AsyncMock(return_value="gemini-result")
|
||||
|
||||
result = await executor._do_inference("hello")
|
||||
|
||||
executor._do_gemini_native.assert_awaited_once_with("hello")
|
||||
executor._do_openai_compat.assert_not_awaited()
|
||||
executor._do_anthropic_native.assert_not_awaited()
|
||||
assert result == "gemini-result"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_unknown_scheme_falls_back_to_openai_compat():
|
||||
"""Unknown auth_scheme → log a warning + fall back to openai-compat (forward-compat)."""
|
||||
@ -120,11 +150,13 @@ async def test_dispatch_unknown_scheme_falls_back_to_openai_compat():
|
||||
)
|
||||
executor._do_openai_compat = AsyncMock(return_value="fallback-result")
|
||||
executor._do_anthropic_native = AsyncMock()
|
||||
executor._do_gemini_native = AsyncMock()
|
||||
|
||||
result = await executor._do_inference("hello")
|
||||
|
||||
executor._do_openai_compat.assert_awaited_once()
|
||||
executor._do_anthropic_native.assert_not_awaited()
|
||||
executor._do_gemini_native.assert_not_awaited()
|
||||
assert result == "fallback-result"
|
||||
|
||||
|
||||
@ -146,6 +178,21 @@ async def test_anthropic_native_raises_clear_error_when_sdk_missing(monkeypatch)
|
||||
await executor._do_anthropic_native("hello")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gemini_native_raises_clear_error_when_sdk_missing(monkeypatch):
|
||||
"""If the google-genai package is not installed, _do_gemini_native raises
|
||||
a clear RuntimeError with install instructions — same fail-loud semantics
|
||||
as the anthropic native path."""
|
||||
executor = _make_executor("gemini")
|
||||
|
||||
# Simulate ImportError on `from google import genai`. Clobbering
|
||||
# sys.modules["google"] forces the submodule import to fail.
|
||||
monkeypatch.setitem(sys.modules, "google", None)
|
||||
|
||||
with pytest.raises(RuntimeError, match="google-genai"):
|
||||
await executor._do_gemini_native("hello")
|
||||
|
||||
|
||||
def test_create_executor_passes_provider_cfg():
|
||||
"""create_executor's back-compat paths should set .provider_cfg on the
|
||||
returned executor so dispatch has auth_scheme available at runtime."""
|
||||
@ -173,3 +220,13 @@ def test_create_executor_passes_provider_cfg():
|
||||
assert exec2.model == "claude-sonnet-4-5"
|
||||
finally:
|
||||
os.environ.pop("ANTHROPIC_API_KEY", None)
|
||||
|
||||
# Path 3: Phase 2b — gemini explicit resolution
|
||||
os.environ["GEMINI_API_KEY"] = "gem-test"
|
||||
try:
|
||||
exec3 = create_executor(provider="gemini")
|
||||
assert exec3.provider_cfg.name == "gemini"
|
||||
assert exec3.provider_cfg.auth_scheme == "gemini"
|
||||
assert exec3.model == "gemini-2.5-flash"
|
||||
finally:
|
||||
os.environ.pop("GEMINI_API_KEY", None)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user