Merge pull request #240 from Molecule-AI/feat/hermes-phase2-native-sdks
feat(hermes): Phase 2a — native Anthropic Messages API dispatch (auth_scheme='anthropic')
This commit is contained in:
commit
353dc306e9
@ -1,21 +1,32 @@
|
||||
"""Hermes adapter executor — Phase 1 multi-provider.
|
||||
"""Hermes adapter executor — Phase 2 multi-provider with native SDK dispatch.
|
||||
|
||||
Hermes models are accessed via an OpenAI-compatible API. Phase 1 supports 15
|
||||
providers via the shared ``providers.py`` registry: Nous Portal, OpenRouter,
|
||||
OpenAI, Anthropic, xAI, Gemini, Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq,
|
||||
Together, Fireworks, Mistral. Every provider is reached through an OpenAI-compat
|
||||
``/v1/chat/completions`` endpoint, so one code path handles all of them.
|
||||
Hermes supports 15 providers via the shared ``providers.py`` registry. Each
|
||||
provider's ``auth_scheme`` field controls which client + request shape the
|
||||
executor uses:
|
||||
|
||||
Key resolution order (unchanged from PR 2, extended)
|
||||
-----------------------------------------------------
|
||||
- ``auth_scheme="openai"`` (14 providers) — OpenAI-compat ``/v1/chat/completions``
|
||||
via the ``openai`` Python SDK. Covers: Nous Portal, OpenRouter, OpenAI, xAI,
|
||||
Gemini, Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together, Fireworks, Mistral.
|
||||
|
||||
- ``auth_scheme="anthropic"`` (1 provider — anthropic) — native Messages API via
|
||||
the ``anthropic`` Python SDK. Phase 2 addition: better tool calling, vision
|
||||
support, extended thinking semantics. If the ``anthropic`` package isn't
|
||||
installed in the workspace image, ``_do_anthropic_native`` raises a clear
|
||||
error with install instructions rather than silently falling back to the
|
||||
OpenAI-compat shim (which would lose fidelity invisibly).
|
||||
|
||||
Key resolution order (unchanged from Phase 1)
|
||||
----------------------------------------------
|
||||
1. ``hermes_api_key`` parameter (explicit call-site override — routes to Nous Portal)
|
||||
2. ``provider`` parameter (explicit provider name — looks up its env var(s))
|
||||
3. Auto-detect: walk ``providers.RESOLUTION_ORDER`` and pick the first provider
|
||||
whose env var is set (``HERMES_API_KEY`` / ``OPENROUTER_API_KEY`` still come
|
||||
first so PR 2 back-compat holds).
|
||||
whose env var is set.
|
||||
|
||||
Raises ``ValueError`` if nothing resolves. The error message lists every env var
|
||||
that was checked so the operator knows their options without reading source.
|
||||
|
||||
Gemini native path (``auth_scheme="gemini"``) is intentionally NOT in this PR
|
||||
— Phase 2b will land it after measuring Phase 2a's Anthropic rollout.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@ -24,7 +35,7 @@ import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from .providers import PROVIDERS, resolve_provider
|
||||
from .providers import PROVIDERS, ProviderConfig, resolve_provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -69,22 +80,23 @@ def create_executor(
|
||||
cfg = PROVIDERS["nous_portal"]
|
||||
logger.debug("Hermes: using explicit hermes_api_key param (Nous Portal)")
|
||||
return HermesA2AExecutor(
|
||||
provider_cfg=cfg,
|
||||
api_key=hermes_api_key,
|
||||
base_url=cfg.base_url,
|
||||
model=model or cfg.default_model,
|
||||
)
|
||||
|
||||
# Path 2/3: registry resolution (either explicit provider name or auto-detect).
|
||||
cfg, api_key = resolve_provider(provider)
|
||||
logger.info(
|
||||
"Hermes: provider=%s base_url=%s model=%s",
|
||||
"Hermes: provider=%s auth_scheme=%s base_url=%s model=%s",
|
||||
cfg.name,
|
||||
cfg.auth_scheme,
|
||||
cfg.base_url,
|
||||
model or cfg.default_model,
|
||||
)
|
||||
return HermesA2AExecutor(
|
||||
provider_cfg=cfg,
|
||||
api_key=api_key,
|
||||
base_url=cfg.base_url,
|
||||
model=model or cfg.default_model,
|
||||
)
|
||||
|
||||
@ -92,11 +104,10 @@ def create_executor(
|
||||
class HermesA2AExecutor:
|
||||
"""LangGraph-compatible AgentExecutor for Hermes-style multi-provider LLMs.
|
||||
|
||||
Uses the OpenAI-compatible ``openai`` client pointed at whichever provider
|
||||
was resolved by ``create_executor`` (Nous Portal, OpenRouter, OpenAI,
|
||||
Anthropic, xAI, Gemini, Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together,
|
||||
Fireworks, Mistral). Matches the pattern of sibling adapters (AutoGen,
|
||||
LangGraph) which also use OpenAI-compat clients.
|
||||
Dispatches each inference call based on ``provider_cfg.auth_scheme``:
|
||||
|
||||
- ``"openai"`` → OpenAI-compat ``/v1/chat/completions`` via the ``openai`` SDK
|
||||
- ``"anthropic"`` → native Messages API via the ``anthropic`` SDK
|
||||
|
||||
The ``execute()`` and ``cancel()`` async methods satisfy the
|
||||
``a2a.server.agent_execution.AgentExecutor`` interface so this
|
||||
@ -105,16 +116,93 @@ class HermesA2AExecutor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
provider_cfg: ProviderConfig,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
model: str,
|
||||
heartbeat=None,
|
||||
):
|
||||
self.provider_cfg = provider_cfg
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
self.base_url = provider_cfg.base_url
|
||||
self.model = model
|
||||
self._heartbeat = heartbeat
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Per-provider inference paths
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _do_openai_compat(self, task_text: str) -> str:
|
||||
"""OpenAI-compat inference — used by every provider with auth_scheme='openai'.
|
||||
|
||||
14 of the 15 registered providers route here. Uses ``openai.AsyncOpenAI``
|
||||
pointed at the provider's base_url; every provider's API is wire-
|
||||
compatible with the OpenAI Chat Completions shape.
|
||||
"""
|
||||
import openai
|
||||
|
||||
client = openai.AsyncOpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url,
|
||||
)
|
||||
response = await client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[{"role": "user", "content": task_text}],
|
||||
)
|
||||
return response.choices[0].message.content or ""
|
||||
|
||||
async def _do_anthropic_native(self, task_text: str) -> str:
|
||||
"""Native Anthropic Messages API inference.
|
||||
|
||||
Uses the official ``anthropic`` Python SDK for correct tool-calling,
|
||||
vision, and extended-thinking semantics that don't translate cleanly
|
||||
through the OpenAI-compat shim.
|
||||
|
||||
If the ``anthropic`` package is not installed in the workspace image,
|
||||
we raise a clear error rather than silently falling back to the
|
||||
OpenAI-compat path — silent fallback would mask the fidelity loss
|
||||
(tool_use blocks become plain text, vision gets stripped, etc.).
|
||||
|
||||
Phase 2a minimum viable: single-turn text in, text out, no tools, no
|
||||
vision. Phase 2b will add tool-calling, vision, and streaming via
|
||||
the same path (still within this method).
|
||||
"""
|
||||
try:
|
||||
import anthropic
|
||||
except ImportError as exc: # pragma: no cover — exercised by test_missing_sdk
|
||||
raise RuntimeError(
|
||||
"Hermes anthropic native path requires the `anthropic` package. "
|
||||
"Install in the workspace image with `pip install anthropic>=0.39.0` "
|
||||
"or set HERMES provider=openrouter to route Claude models through "
|
||||
"OpenRouter's OpenAI-compat shim instead."
|
||||
) from exc
|
||||
|
||||
client = anthropic.AsyncAnthropic(api_key=self.api_key)
|
||||
response = await client.messages.create(
|
||||
model=self.model,
|
||||
max_tokens=4096,
|
||||
messages=[{"role": "user", "content": task_text}],
|
||||
)
|
||||
# response.content is a list of ContentBlock; for single-turn text-only
|
||||
# the first block is a TextBlock with a .text attribute.
|
||||
if response.content and hasattr(response.content[0], "text"):
|
||||
return response.content[0].text
|
||||
return ""
|
||||
|
||||
async def _do_inference(self, task_text: str) -> str:
|
||||
"""Dispatch to the right inference path based on provider auth_scheme."""
|
||||
scheme = self.provider_cfg.auth_scheme
|
||||
if scheme == "anthropic":
|
||||
return await self._do_anthropic_native(task_text)
|
||||
if scheme == "openai":
|
||||
return await self._do_openai_compat(task_text)
|
||||
# Unknown scheme — treat as openai-compat for forward-compat with any
|
||||
# future provider the registry adds without yet having a native path.
|
||||
logger.warning(
|
||||
"Hermes: unknown auth_scheme=%r for provider=%s — falling back to openai-compat",
|
||||
scheme, self.provider_cfg.name,
|
||||
)
|
||||
return await self._do_openai_compat(task_text)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# AgentExecutor interface
|
||||
# ------------------------------------------------------------------
|
||||
@ -138,21 +226,8 @@ class HermesA2AExecutor:
|
||||
await set_current_task(self._heartbeat, brief_task(user_message))
|
||||
|
||||
try:
|
||||
import openai
|
||||
|
||||
client = openai.AsyncOpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url,
|
||||
)
|
||||
|
||||
task_text = build_task_text(user_message, extract_history(context))
|
||||
|
||||
response = await client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[{"role": "user", "content": task_text}],
|
||||
)
|
||||
reply = response.choices[0].message.content or ""
|
||||
|
||||
reply = await self._do_inference(task_text)
|
||||
except Exception as exc:
|
||||
logger.exception("Hermes executor error: %s", exc)
|
||||
reply = f"Hermes error: {exc}"
|
||||
|
||||
@ -113,10 +113,14 @@ PROVIDERS: dict[str, ProviderConfig] = {
|
||||
"anthropic": ProviderConfig(
|
||||
name="anthropic",
|
||||
env_vars=("ANTHROPIC_API_KEY",),
|
||||
base_url="https://api.anthropic.com/v1",
|
||||
base_url="https://api.anthropic.com",
|
||||
default_model="claude-sonnet-4-5",
|
||||
docs="Anthropic — Phase 1 uses the OpenAI-compat shim at /v1. Phase 2 "
|
||||
"will add the native Messages API path for better tool calling.",
|
||||
auth_scheme="anthropic",
|
||||
docs="Anthropic — Phase 2 uses the native Messages API via the official "
|
||||
"`anthropic` Python SDK for correct tool calling, vision, and "
|
||||
"extended thinking semantics. If the SDK isn't installed in the "
|
||||
"workspace image, the executor raises a clear error pointing at "
|
||||
"`pip install anthropic>=0.39.0`.",
|
||||
),
|
||||
"xai": ProviderConfig(
|
||||
name="xai",
|
||||
|
||||
@ -1,5 +1,15 @@
|
||||
# Hermes models are accessed via OpenAI-compatible endpoints (Nous Portal or OpenRouter).
|
||||
# openai: primary client for both Nous Portal (custom base_url) and OpenRouter routing.
|
||||
# If NousResearch publishes a dedicated hermes-agent PyPI package, add it here and
|
||||
# verify the import path before implementing adapter.py (see PR 2 open questions).
|
||||
# Hermes adapter dependencies.
|
||||
#
|
||||
# openai: primary client for the 14 OpenAI-compat providers in providers.py
|
||||
# (Nous Portal, OpenRouter, OpenAI, xAI, Gemini, Qwen, GLM, Kimi, MiniMax,
|
||||
# DeepSeek, Groq, Together, Fireworks, Mistral — all reachable via one openai
|
||||
# SDK pointed at different base URLs).
|
||||
openai>=1.0.0
|
||||
|
||||
# anthropic: native Messages API client for the anthropic provider (auth_scheme
|
||||
# = "anthropic" in providers.py). Phase 2 addition — gives correct tool calling,
|
||||
# vision, and extended-thinking semantics that don't translate cleanly through
|
||||
# the OpenAI-compat shim. If this package is missing at runtime, executor.py's
|
||||
# _do_anthropic_native() raises a clear RuntimeError pointing back at this
|
||||
# install line, so a workspace image built without it fails loud, not silent.
|
||||
anthropic>=0.39.0
|
||||
|
||||
175
workspace-template/tests/test_hermes_phase2_dispatch.py
Normal file
175
workspace-template/tests/test_hermes_phase2_dispatch.py
Normal file
@ -0,0 +1,175 @@
|
||||
"""Tests for Phase 2 auth_scheme dispatch in adapters/hermes/executor.py.
|
||||
|
||||
These cover the NEW behavior only (HermesA2AExecutor._do_inference dispatch
|
||||
based on ProviderConfig.auth_scheme). Phase 1 registry tests live in
|
||||
test_hermes_providers.py — unchanged by Phase 2.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
# Load providers.py directly (same pattern as test_hermes_providers.py)
|
||||
_HERMES_DIR = Path(__file__).parent.parent / "adapters" / "hermes"
|
||||
sys.path.insert(0, str(_HERMES_DIR))
|
||||
import providers # type: ignore # noqa: E402
|
||||
|
||||
|
||||
def _make_executor(provider_name: str):
|
||||
"""Build a HermesA2AExecutor directly without going through create_executor.
|
||||
|
||||
We import executor lazily inside the function because the module-level
|
||||
import chain (``from .providers import ...``) uses a relative import that
|
||||
only resolves when loaded as part of the ``adapters.hermes`` package.
|
||||
The test loads it via direct sys.path manipulation, which bypasses the
|
||||
package loader, so we import providers-as-sibling and then reconstruct
|
||||
the executor with the same shape.
|
||||
"""
|
||||
# We can't import executor.py directly due to the relative-import head,
|
||||
# so instantiate the executor class by replaying its definition inline.
|
||||
# Simpler: test the dispatch logic via providers.PROVIDERS + the public
|
||||
# resolve helpers, plus a mock for the inference methods.
|
||||
cfg = providers.PROVIDERS[provider_name]
|
||||
# Reach into executor via sys.path trick
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"hermes_executor_under_test",
|
||||
_HERMES_DIR / "executor.py",
|
||||
)
|
||||
# The executor module has a relative import `from .providers import ...`
|
||||
# which fails under direct spec_from_file_location. Monkey-patch sys.modules
|
||||
# so the relative import resolves to our directly-loaded providers module.
|
||||
sys.modules["hermes_executor_under_test.providers"] = providers
|
||||
# Also alias the package-style import path so `from .providers import X`
|
||||
# inside executor.py finds it.
|
||||
pkg_name = "hermes_executor_under_test"
|
||||
sys.modules.setdefault(pkg_name, MagicMock())
|
||||
sys.modules[pkg_name].providers = providers # type: ignore
|
||||
# Read + compile executor.py with the relative import rewritten
|
||||
src = (_HERMES_DIR / "executor.py").read_text()
|
||||
src = src.replace("from .providers import", "from providers import")
|
||||
ns: dict = {}
|
||||
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
|
||||
HermesA2AExecutor = ns["HermesA2AExecutor"]
|
||||
return HermesA2AExecutor(
|
||||
provider_cfg=cfg,
|
||||
api_key="test-key",
|
||||
model=cfg.default_model,
|
||||
)
|
||||
|
||||
|
||||
def test_anthropic_entry_has_anthropic_scheme():
|
||||
"""The registry flip: Phase 2 sets anthropic's auth_scheme to 'anthropic'."""
|
||||
cfg = providers.PROVIDERS["anthropic"]
|
||||
assert cfg.auth_scheme == "anthropic"
|
||||
|
||||
|
||||
def test_all_other_providers_still_openai_scheme():
|
||||
"""Phase 2 only changes anthropic. Every other provider keeps auth_scheme='openai'."""
|
||||
for name, cfg in providers.PROVIDERS.items():
|
||||
if name == "anthropic":
|
||||
continue
|
||||
assert cfg.auth_scheme == "openai", (
|
||||
f"{name} unexpectedly has auth_scheme={cfg.auth_scheme!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_openai_scheme_calls_openai_compat():
|
||||
"""auth_scheme='openai' → _do_openai_compat runs, _do_anthropic_native does not."""
|
||||
executor = _make_executor("openai")
|
||||
executor._do_openai_compat = AsyncMock(return_value="openai-result")
|
||||
executor._do_anthropic_native = AsyncMock(return_value="should-not-run")
|
||||
|
||||
result = await executor._do_inference("hello")
|
||||
|
||||
executor._do_openai_compat.assert_awaited_once_with("hello")
|
||||
executor._do_anthropic_native.assert_not_awaited()
|
||||
assert result == "openai-result"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_anthropic_scheme_calls_anthropic_native():
|
||||
"""auth_scheme='anthropic' → _do_anthropic_native runs, _do_openai_compat does not."""
|
||||
executor = _make_executor("anthropic")
|
||||
executor._do_openai_compat = AsyncMock(return_value="should-not-run")
|
||||
executor._do_anthropic_native = AsyncMock(return_value="anthropic-result")
|
||||
|
||||
result = await executor._do_inference("hello")
|
||||
|
||||
executor._do_anthropic_native.assert_awaited_once_with("hello")
|
||||
executor._do_openai_compat.assert_not_awaited()
|
||||
assert result == "anthropic-result"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_unknown_scheme_falls_back_to_openai_compat():
|
||||
"""Unknown auth_scheme → log a warning + fall back to openai-compat (forward-compat)."""
|
||||
executor = _make_executor("openai")
|
||||
# Mutate the cfg field to simulate an unknown scheme (testing the dispatch, not the registry)
|
||||
executor.provider_cfg = providers.ProviderConfig(
|
||||
name="futureprovider",
|
||||
env_vars=("FOO",),
|
||||
base_url="https://example.com/v1",
|
||||
default_model="foo",
|
||||
auth_scheme="some_future_scheme",
|
||||
)
|
||||
executor._do_openai_compat = AsyncMock(return_value="fallback-result")
|
||||
executor._do_anthropic_native = AsyncMock()
|
||||
|
||||
result = await executor._do_inference("hello")
|
||||
|
||||
executor._do_openai_compat.assert_awaited_once()
|
||||
executor._do_anthropic_native.assert_not_awaited()
|
||||
assert result == "fallback-result"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_native_raises_clear_error_when_sdk_missing(monkeypatch):
|
||||
"""If the anthropic package is not installed, _do_anthropic_native raises
|
||||
a clear RuntimeError with install instructions — it does NOT silently
|
||||
fall back to the OpenAI-compat shim (which would lose tool-calling +
|
||||
vision fidelity invisibly).
|
||||
"""
|
||||
executor = _make_executor("anthropic")
|
||||
|
||||
# Simulate ImportError on `import anthropic`. We do this by clobbering
|
||||
# the name in sys.modules so the import statement inside
|
||||
# _do_anthropic_native hits an ImportError.
|
||||
monkeypatch.setitem(sys.modules, "anthropic", None)
|
||||
|
||||
with pytest.raises(RuntimeError, match="anthropic"):
|
||||
await executor._do_anthropic_native("hello")
|
||||
|
||||
|
||||
def test_create_executor_passes_provider_cfg():
|
||||
"""create_executor's back-compat paths should set .provider_cfg on the
|
||||
returned executor so dispatch has auth_scheme available at runtime."""
|
||||
# Direct-load executor module same way _make_executor does
|
||||
import importlib.util
|
||||
src = (_HERMES_DIR / "executor.py").read_text().replace(
|
||||
"from .providers import", "from providers import"
|
||||
)
|
||||
ns: dict = {}
|
||||
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
|
||||
create_executor = ns["create_executor"]
|
||||
|
||||
# Path 1: hermes_api_key back-compat → nous_portal cfg
|
||||
exec1 = create_executor(hermes_api_key="test-key")
|
||||
assert exec1.provider_cfg.name == "nous_portal"
|
||||
assert exec1.provider_cfg.auth_scheme == "openai"
|
||||
|
||||
# Path 2: explicit provider name → that cfg (anthropic has the new scheme)
|
||||
import os
|
||||
os.environ["ANTHROPIC_API_KEY"] = "ant-test"
|
||||
try:
|
||||
exec2 = create_executor(provider="anthropic")
|
||||
assert exec2.provider_cfg.name == "anthropic"
|
||||
assert exec2.provider_cfg.auth_scheme == "anthropic"
|
||||
assert exec2.model == "claude-sonnet-4-5"
|
||||
finally:
|
||||
os.environ.pop("ANTHROPIC_API_KEY", None)
|
||||
Loading…
Reference in New Issue
Block a user