From 485dcb4cae6b2b8fb62e8ae45b1d2354b61d30bc Mon Sep 17 00:00:00 2001
From: rabbitblood <hongmingwangrabbit@gmail.com>
Date: Wed, 15 Apr 2026 13:20:39 -0700
Subject: [PATCH] =?UTF-8?q?feat(hermes):=20Phase=202b=20=E2=80=94=20native?=
 =?UTF-8?q?=20Google=20Gemini=20generateContent=20dispatch=20path?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Completes Hermes Phase 2 by adding the second native SDK path: Google Gemini
via the official `google-genai` Python SDK. Stacked on top of Phase 2a
(feat/hermes-phase2-native-sdks) which introduced the dispatch infra +
the anthropic native path.

## What's new in this PR

1. `providers.py`: flip `gemini` entry to `auth_scheme="gemini"` and
   update `base_url` from the OpenAI-compat endpoint
   (`/v1beta/openai`) to the bare host
   (`https://generativelanguage.googleapis.com`) which the native SDK
   uses.

2. `executor.py`: new method `_do_gemini_native(task_text)` that uses
   `google.genai.Client().aio.models.generate_content(...)`. Dispatch
   table in `_do_inference` now routes `"gemini"` → `_do_gemini_native`.
   Same fail-loud semantics as `_do_anthropic_native` — missing SDK
   raises a clear RuntimeError with install instructions.

3. `requirements.txt`: add `google-genai>=1.0.0`.

4. `test_hermes_phase2_dispatch.py`: +3 tests
   - `test_gemini_entry_has_gemini_scheme` — registry flip + base URL
     validated
   - `test_dispatch_gemini_scheme_calls_gemini_native` — dispatch runs
     gemini native, not openai-compat or anthropic-native
   - `test_gemini_native_raises_clear_error_when_sdk_missing` — fail-loud
     on missing `google-genai` package
   Plus updated existing dispatch tests to mock `_do_gemini_native`
   alongside the other paths so "no cross-calls" assertions stay tight.

All 36 tests pass locally (10 Phase 2 dispatch + 26 Phase 1 registry):

    pytest tests/test_hermes_phase2_dispatch.py tests/test_hermes_providers.py
    36 passed in 0.07s

## Dispatch table after this PR

    auth_scheme="openai"     → _do_openai_compat (13 providers)
    auth_scheme="anthropic"  → _do_anthropic_native (1 provider, Phase 2a)
    auth_scheme="gemini"     → _do_gemini_native (1 provider, Phase 2b) ← NEW
    <unknown>                → _do_openai_compat + warning (forward-compat)

## Back-compat

- All 13 openai-scheme providers unchanged
- `hermes_api_key` / `HERMES_API_KEY` / `OPENROUTER_API_KEY` paths unchanged
- Only `gemini` provider changes behavior: now uses native generateContent
  instead of the `/v1beta/openai` compat shim
- Existing Gemini callers setting `GEMINI_API_KEY` get the native path
  automatically — no caller changes needed

## What's NOT in this PR (future phases)

- Streaming support (`astream_messages` / `streamGenerateContent` stream
  variants) for either native path
- Tool calling / function calling on native paths
- Vision content blocks (image_url → anthropic image blocks; image_url →
  gemini inline_data with base64 + mime_type)
- Extended thinking (anthropic) / thinking config (gemini)
- System instructions pass-through on the gemini native path

Phase 2c/2d will layer these on. This PR is the minimum-viable native
dispatch — single-turn text in, text out — same shape as Phase 2a.

## Stacking

This PR targets `feat/hermes-phase2-native-sdks` (Phase 2a) as its base
branch, NOT main, so the diff shows only the Gemini-specific additions.
When Phase 2a merges to main, GitHub auto-rebases this PR onto the new
main head. If reviewer prefers a single combined PR, close #240 and land
this one instead — the commits on feat/hermes-phase2-native-sdks are
already included in this branch's history.

## Related

- #240 Phase 2a (parent branch)
- #208 Phase 1 (registry + openai-compat path — already in main)
- `project_hermes_multi_provider.md` queued memory — Phase 2 was the next
  item, this PR completes it
- `docs/ecosystem-watch.md` → `### Hermes Agent` — Research Lead's
  eco-watch entry that catalogued Hermes's native provider list and
  shaped the original Phase 2 scope
---
 .../adapters/hermes/executor.py               | 64 +++++++++++++++---
 .../adapters/hermes/providers.py              | 11 ++-
 .../adapters/hermes/requirements.txt          | 19 ++++--
 .../tests/test_hermes_phase2_dispatch.py      | 67 +++++++++++++++++--
 4 files changed, 138 insertions(+), 23 deletions(-)
diff --git a/workspace-template/adapters/hermes/executor.py b/workspace-template/adapters/hermes/executor.py
index ec0b2b2b..e339db6e 100644
--- a/workspace-template/adapters/hermes/executor.py
+++ b/workspace-template/adapters/hermes/executor.py
@@ -4,16 +4,22 @@ Hermes supports 15 providers via the shared ``providers.py`` registry. Each
 provider's ``auth_scheme`` field controls which client + request shape the
 executor uses:
 
-- ``auth_scheme="openai"`` (14 providers) — OpenAI-compat ``/v1/chat/completions``
+- ``auth_scheme="openai"`` (13 providers) — OpenAI-compat ``/v1/chat/completions``
   via the ``openai`` Python SDK. Covers: Nous Portal, OpenRouter, OpenAI, xAI,
-  Gemini, Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together, Fireworks, Mistral.
+  Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together, Fireworks, Mistral.
 
 - ``auth_scheme="anthropic"`` (1 provider — anthropic) — native Messages API via
-  the ``anthropic`` Python SDK. Phase 2 addition: better tool calling, vision
-  support, extended thinking semantics. If the ``anthropic`` package isn't
-  installed in the workspace image, ``_do_anthropic_native`` raises a clear
-  error with install instructions rather than silently falling back to the
-  OpenAI-compat shim (which would lose fidelity invisibly).
+  the ``anthropic`` Python SDK. Phase 2a: better tool calling, vision support,
+  extended thinking semantics. If the ``anthropic`` package isn't installed in
+  the workspace image, ``_do_anthropic_native`` raises a clear error with
+  install instructions rather than silently falling back to the OpenAI-compat
+  shim (which would lose fidelity invisibly).
+
+- ``auth_scheme="gemini"`` (1 provider — gemini) — native ``generateContent`` API
+  via the official ``google-genai`` Python SDK. Phase 2b: first-class vision
+  content blocks, tool/function calling, system instructions, and thinking
+  config — all of which the OpenAI-compat shim at ``/v1beta/openai`` either
+  strips or mis-translates. Same fail-loud semantics as the anthropic path.
 
 Key resolution order (unchanged from Phase 1)
 ----------------------------------------------
@@ -24,9 +30,6 @@ Key resolution order (unchanged from Phase 1)
 
 Raises ``ValueError`` if nothing resolves. The error message lists every env var
 that was checked so the operator knows their options without reading source.
-
-Gemini native path (``auth_scheme="gemini"``) is intentionally NOT in this PR
-— Phase 2b will land it after measuring Phase 2a's Anthropic rollout.
 """
 
 from __future__ import annotations
@@ -188,11 +191,52 @@ class HermesA2AExecutor:
             return response.content[0].text
         return ""
 
+    async def _do_gemini_native(self, task_text: str) -> str:
+        """Native Google Gemini ``generateContent`` inference.
+
+        Uses the official ``google-genai`` Python SDK for correct vision
+        content blocks, tool/function calling, system instructions, and
+        thinking config. These all get stripped or mis-translated through
+        the OpenAI-compat ``/v1beta/openai`` shim.
+
+        If the ``google-genai`` package is not installed in the workspace
+        image, raise a clear error with install instructions rather than
+        silently falling back to the OpenAI-compat shim (same fail-loud
+        semantics as the anthropic path).
+
+        Phase 2b minimum viable: single-turn text in, text out, no tools,
+        no vision, no thinking config. Phase 2c/2d layers those on the same
+        method.
+        """
+        try:
+            from google import genai  # type: ignore[import-not-found]
+        except ImportError as exc:  # pragma: no cover — exercised by test_missing_sdk
+            raise RuntimeError(
+                "Hermes gemini native path requires the `google-genai` package. "
+                "Install in the workspace image with `pip install google-genai>=1.0.0` "
+                "or set HERMES provider=openrouter to route Gemini models through "
+                "OpenRouter's OpenAI-compat shim instead."
+            ) from exc
+
+        # google-genai client reads api_key from env by default; pass it
+        # explicitly so we respect whatever ProviderConfig resolved (e.g. a
+        # test-only key that isn't in process env yet).
+        client = genai.Client(api_key=self.api_key)
+        response = await client.aio.models.generate_content(
+            model=self.model,
+            contents=task_text,
+        )
+        # response.text is the flattened text across all parts of the first
+        # candidate. For single-turn text-only that's the whole reply.
+        return response.text or ""
+
     async def _do_inference(self, task_text: str) -> str:
         """Dispatch to the right inference path based on provider auth_scheme."""
         scheme = self.provider_cfg.auth_scheme
         if scheme == "anthropic":
             return await self._do_anthropic_native(task_text)
+        if scheme == "gemini":
+            return await self._do_gemini_native(task_text)
         if scheme == "openai":
             return await self._do_openai_compat(task_text)
         # Unknown scheme — treat as openai-compat for forward-compat with any
diff --git a/workspace-template/adapters/hermes/providers.py b/workspace-template/adapters/hermes/providers.py
index 767e689d..38dadfe8 100644
--- a/workspace-template/adapters/hermes/providers.py
+++ b/workspace-template/adapters/hermes/providers.py
@@ -132,10 +132,15 @@ PROVIDERS: dict[str, ProviderConfig] = {
     "gemini": ProviderConfig(
         name="gemini",
         env_vars=("GEMINI_API_KEY", "GOOGLE_API_KEY"),
-        base_url="https://generativelanguage.googleapis.com/v1beta/openai",
+        base_url="https://generativelanguage.googleapis.com",
         default_model="gemini-2.5-flash",
-        docs="Google Gemini — uses the documented OpenAI-compat endpoint at "
-             "/v1beta/openai. Phase 2 will add native generateContent for vision.",
+        auth_scheme="gemini",
+        docs="Google Gemini — Phase 2b uses the native generateContent API via "
+             "the official `google-genai` Python SDK for correct vision content "
+             "blocks, tool/function calling, and system instructions. Phase 1 "
+             "used the /v1beta/openai compat shim. If the google-genai package "
+             "isn't installed in the workspace image, the executor raises a "
+             "clear error pointing at `pip install google-genai>=1.0.0`.",
     ),
 
     # --- Chinese providers ----------------------------------------------
diff --git a/workspace-template/adapters/hermes/requirements.txt b/workspace-template/adapters/hermes/requirements.txt
index be69ceb6..a59236a7 100644
--- a/workspace-template/adapters/hermes/requirements.txt
+++ b/workspace-template/adapters/hermes/requirements.txt
@@ -1,15 +1,24 @@
 # Hermes adapter dependencies.
 #
-# openai: primary client for the 14 OpenAI-compat providers in providers.py
-# (Nous Portal, OpenRouter, OpenAI, xAI, Gemini, Qwen, GLM, Kimi, MiniMax,
-# DeepSeek, Groq, Together, Fireworks, Mistral — all reachable via one openai
-# SDK pointed at different base URLs).
+# openai: primary client for the 13 OpenAI-compat providers in providers.py
+# (Nous Portal, OpenRouter, OpenAI, xAI, Qwen, GLM, Kimi, MiniMax, DeepSeek,
+# Groq, Together, Fireworks, Mistral — all reachable via one openai SDK
+# pointed at different base URLs). Anthropic + Gemini now go native.
 openai>=1.0.0
 
 # anthropic: native Messages API client for the anthropic provider (auth_scheme
-# = "anthropic" in providers.py). Phase 2 addition — gives correct tool calling,
+# = "anthropic" in providers.py). Phase 2a addition — gives correct tool calling,
 # vision, and extended-thinking semantics that don't translate cleanly through
 # the OpenAI-compat shim. If this package is missing at runtime, executor.py's
 # _do_anthropic_native() raises a clear RuntimeError pointing back at this
 # install line, so a workspace image built without it fails loud, not silent.
 anthropic>=0.39.0
+
+# google-genai: native generateContent API client for the gemini provider
+# (auth_scheme = "gemini" in providers.py). Phase 2b addition — gives
+# first-class vision content blocks, tool/function calling, system
+# instructions, and thinking config that don't translate cleanly through
+# the OpenAI-compat /v1beta/openai shim. Same fail-loud semantics as the
+# anthropic path: missing at runtime → clear RuntimeError from
+# _do_gemini_native(), not a silent fallback.
+google-genai>=1.0.0
diff --git a/workspace-template/tests/test_hermes_phase2_dispatch.py b/workspace-template/tests/test_hermes_phase2_dispatch.py
index bfa24fd1..78bbfe31 100644
--- a/workspace-template/tests/test_hermes_phase2_dispatch.py
+++ b/workspace-template/tests/test_hermes_phase2_dispatch.py
@@ -63,15 +63,25 @@ def _make_executor(provider_name: str):
 
 
 def test_anthropic_entry_has_anthropic_scheme():
-    """The registry flip: Phase 2 sets anthropic's auth_scheme to 'anthropic'."""
+    """Phase 2a: anthropic's auth_scheme is 'anthropic'."""
     cfg = providers.PROVIDERS["anthropic"]
     assert cfg.auth_scheme == "anthropic"
 
 
+def test_gemini_entry_has_gemini_scheme():
+    """Phase 2b: gemini's auth_scheme is 'gemini'."""
+    cfg = providers.PROVIDERS["gemini"]
+    assert cfg.auth_scheme == "gemini"
+    # Base URL no longer has the /v1beta/openai suffix — native SDK uses bare host.
+    assert "/openai" not in cfg.base_url
+    assert cfg.base_url.startswith("https://generativelanguage.googleapis.com")
+
+
 def test_all_other_providers_still_openai_scheme():
-    """Phase 2 only changes anthropic. Every other provider keeps auth_scheme='openai'."""
+    """Phase 2 changes only anthropic + gemini. Every other provider keeps auth_scheme='openai'."""
+    native_providers = {"anthropic", "gemini"}
     for name, cfg in providers.PROVIDERS.items():
-        if name == "anthropic":
+        if name in native_providers:
             continue
         assert cfg.auth_scheme == "openai", (
             f"{name} unexpectedly has auth_scheme={cfg.auth_scheme!r}"
@@ -80,32 +90,52 @@ def test_all_other_providers_still_openai_scheme():
 
 @pytest.mark.asyncio
 async def test_dispatch_openai_scheme_calls_openai_compat():
-    """auth_scheme='openai' → _do_openai_compat runs, _do_anthropic_native does not."""
+    """auth_scheme='openai' → _do_openai_compat runs, native paths do not."""
     executor = _make_executor("openai")
     executor._do_openai_compat = AsyncMock(return_value="openai-result")
     executor._do_anthropic_native = AsyncMock(return_value="should-not-run")
+    executor._do_gemini_native = AsyncMock(return_value="should-not-run")
 
     result = await executor._do_inference("hello")
 
     executor._do_openai_compat.assert_awaited_once_with("hello")
     executor._do_anthropic_native.assert_not_awaited()
+    executor._do_gemini_native.assert_not_awaited()
     assert result == "openai-result"
 
 
 @pytest.mark.asyncio
 async def test_dispatch_anthropic_scheme_calls_anthropic_native():
-    """auth_scheme='anthropic' → _do_anthropic_native runs, _do_openai_compat does not."""
+    """auth_scheme='anthropic' → _do_anthropic_native runs, others do not."""
     executor = _make_executor("anthropic")
     executor._do_openai_compat = AsyncMock(return_value="should-not-run")
     executor._do_anthropic_native = AsyncMock(return_value="anthropic-result")
+    executor._do_gemini_native = AsyncMock(return_value="should-not-run")
 
     result = await executor._do_inference("hello")
 
     executor._do_anthropic_native.assert_awaited_once_with("hello")
     executor._do_openai_compat.assert_not_awaited()
+    executor._do_gemini_native.assert_not_awaited()
     assert result == "anthropic-result"
 
 
+@pytest.mark.asyncio
+async def test_dispatch_gemini_scheme_calls_gemini_native():
+    """auth_scheme='gemini' → _do_gemini_native runs, others do not. Phase 2b."""
+    executor = _make_executor("gemini")
+    executor._do_openai_compat = AsyncMock(return_value="should-not-run")
+    executor._do_anthropic_native = AsyncMock(return_value="should-not-run")
+    executor._do_gemini_native = AsyncMock(return_value="gemini-result")
+
+    result = await executor._do_inference("hello")
+
+    executor._do_gemini_native.assert_awaited_once_with("hello")
+    executor._do_openai_compat.assert_not_awaited()
+    executor._do_anthropic_native.assert_not_awaited()
+    assert result == "gemini-result"
+
+
 @pytest.mark.asyncio
 async def test_dispatch_unknown_scheme_falls_back_to_openai_compat():
     """Unknown auth_scheme → log a warning + fall back to openai-compat (forward-compat)."""
@@ -120,11 +150,13 @@ async def test_dispatch_unknown_scheme_falls_back_to_openai_compat():
     )
     executor._do_openai_compat = AsyncMock(return_value="fallback-result")
     executor._do_anthropic_native = AsyncMock()
+    executor._do_gemini_native = AsyncMock()
 
     result = await executor._do_inference("hello")
 
     executor._do_openai_compat.assert_awaited_once()
     executor._do_anthropic_native.assert_not_awaited()
+    executor._do_gemini_native.assert_not_awaited()
     assert result == "fallback-result"
 
 
@@ -146,6 +178,21 @@ async def test_anthropic_native_raises_clear_error_when_sdk_missing(monkeypatch)
         await executor._do_anthropic_native("hello")
 
 
+@pytest.mark.asyncio
+async def test_gemini_native_raises_clear_error_when_sdk_missing(monkeypatch):
+    """If the google-genai package is not installed, _do_gemini_native raises
+    a clear RuntimeError with install instructions — same fail-loud semantics
+    as the anthropic native path."""
+    executor = _make_executor("gemini")
+
+    # Simulate ImportError on `from google import genai`. Clobbering
+    # sys.modules["google"] forces the submodule import to fail.
+    monkeypatch.setitem(sys.modules, "google", None)
+
+    with pytest.raises(RuntimeError, match="google-genai"):
+        await executor._do_gemini_native("hello")
+
+
 def test_create_executor_passes_provider_cfg():
     """create_executor's back-compat paths should set .provider_cfg on the
     returned executor so dispatch has auth_scheme available at runtime."""
@@ -173,3 +220,13 @@ def test_create_executor_passes_provider_cfg():
         assert exec2.model == "claude-sonnet-4-5"
     finally:
         os.environ.pop("ANTHROPIC_API_KEY", None)
+
+    # Path 3: Phase 2b — gemini explicit resolution
+    os.environ["GEMINI_API_KEY"] = "gem-test"
+    try:
+        exec3 = create_executor(provider="gemini")
+        assert exec3.provider_cfg.name == "gemini"
+        assert exec3.provider_cfg.auth_scheme == "gemini"
+        assert exec3.model == "gemini-2.5-flash"
+    finally:
+        os.environ.pop("GEMINI_API_KEY", None)