From d40a9d940c9027e5a2793d822d6e03080fe84bc9 Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Wed, 15 Apr 2026 14:21:10 -0700 Subject: [PATCH] =?UTF-8?q?feat(hermes):=20Phase=202c=20=E2=80=94=20multi-?= =?UTF-8?q?turn=20history=20passed=20natively=20to=20all=20paths?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the Phase 2 scope by keeping conversation turns as turns across all three dispatch paths. Pre-2c, history was flattened into a single user message via shared_runtime.build_task_text, which worked as a fallback but lost the model's native multi-turn awareness (role attribution, instruction-following on mid-conversation corrections, system-prompt grounding against prior turns). Phase 2a + 2b shipped the dispatch infrastructure + per-provider native paths. This PR uses them properly. ## What's new - **`_history_to_openai_messages(user_message, history)`** (static) — maps A2A `(role, text)` tuples to OpenAI Chat Completions `[{"role":"user"|"assistant","content":str}]`. Roles: `human`→`user`, `ai`→`assistant`. Current turn appended as the final user message. - **`_history_to_anthropic_messages`** (static) — identical wire shape to OpenAI for text-only turns, so it delegates. Phase 2d tool_use/vision blocks will diverge here. - **`_history_to_gemini_contents`** (static) — Gemini uses a different shape: `role="user"|"model"` (NOT "assistant") and text wrapped in `parts=[{"text":...}]`. Delegates to none of the others. - **`_do_openai_compat(user_message, history=None)`** — accepts history, builds messages via `_history_to_openai_messages`. Back-compat: pass `history=None` to get the old single-turn behavior. - **`_do_anthropic_native(user_message, history=None)`** — same signature change, calls `_history_to_anthropic_messages`. Still uses `anthropic.AsyncAnthropic().messages.create()`, just with proper multi-turn. - **`_do_gemini_native(user_message, history=None)`** — same pattern, calls `_history_to_gemini_contents`, passes to Gemini's `generate_content(contents=...)`. - **`_do_inference(user_message, history=None)`** — new signature, dispatches by auth_scheme as before, passes both args through. - **`execute()`** — no longer calls `build_task_text`. Calls `extract_history(context)` directly and forwards to `_do_inference`. Removes the `build_task_text` import (not needed in this file anymore). ## Tests Existing 7 dispatch tests updated for the new `(user_message, history)` signature — they assert the path is called with `("hello", None)` since they pass no history. 5 NEW tests: - `test_history_to_openai_messages_empty_history` — empty history degrades to single user message (back-compat) - `test_history_to_openai_messages_multi_turn` — round-trip of a 3-turn history + current turn - `test_history_to_anthropic_messages_same_as_openai` — cross-check that anthropic path produces identical wire shape for text-only - `test_history_to_gemini_contents_uses_model_role_and_parts_wrapper` — verifies the Gemini-specific role mapping (`ai`→`model`) + parts wrapper - `test_dispatch_passes_history_through` — end-to-end: _do_inference forwards history to the chosen provider path All 41 tests pass (15 Phase 2 dispatch + 26 Phase 1 registry): pytest tests/test_hermes_phase2_dispatch.py tests/test_hermes_providers.py 41 passed in 0.07s ## Back-compat - No public API changes to `create_executor()`. Callers that hit `execute()` via A2A get the new multi-turn behavior automatically via `extract_history(context)`. - Callers that passed an empty history list (or None) get the same single-turn behavior as pre-2c. - The `build_task_text` helper in shared_runtime is unchanged — other adapters (AutoGen, LangGraph) that use it keep working. Only Hermes bypasses it now. ## What's NOT in this PR (Phase 2d) - Tool calling / function calling on native paths (anthropic `tools=`, gemini `tools=Tool(function_declarations=[...])`) - Vision content blocks (image_url → anthropic `{type:"image", source: {type:"base64",...}}` / gemini `{inline_data:{mime_type,data}}`) - System instructions pass-through (anthropic `system=`, gemini `system_instruction=`) - Streaming (`astream_messages` / `streamGenerateContent` stream variants) - Extended thinking (anthropic `thinking={"type":"enabled"}`) / Gemini thinking config Phase 2c is the **multi-turn upgrade**. Tool + vision + streaming are Phase 2d, scoped in project_hermes_multi_provider.md. ## Related - #240 Phase 2a (native Anthropic dispatch — in main) - #255 Phase 2b (native Gemini dispatch — in main) - Phase 1 (#208 — provider registry baseline, in main) - `project_hermes_multi_provider.md` queued memory - CEO 2026-04-15: "focus on supporting hermes agent" --- .../adapters/hermes/executor.py | 155 ++++++++++++++---- .../tests/test_hermes_phase2_dispatch.py | 97 ++++++++++- 2 files changed, 220 insertions(+), 32 deletions(-) diff --git a/workspace-template/adapters/hermes/executor.py b/workspace-template/adapters/hermes/executor.py index e339db6e..c2a31a0f 100644 --- a/workspace-template/adapters/hermes/executor.py +++ b/workspace-template/adapters/hermes/executor.py @@ -130,16 +130,90 @@ class HermesA2AExecutor: self.model = model self._heartbeat = heartbeat + # ------------------------------------------------------------------ + # History → provider-specific message list converters + # ------------------------------------------------------------------ + # + # The A2A shared runtime gives us history as ``list[tuple[str, str]]`` + # with roles ``"human"`` / ``"ai"``. Each provider wants a different + # shape: + # + # OpenAI-compat: [{"role":"user"|"assistant", "content": str}, ...] + # Anthropic: [{"role":"user"|"assistant", "content": str}, ...] (same) + # Gemini: [{"role":"user"|"model", "parts": [{"text": str}]}, ...] + # + # Before Phase 2c these were flattened into a single user turn via + # ``shared_runtime.build_task_text``, which worked for basic text + # handoff but lost the model's native multi-turn awareness (system + # prompts, tool-use history, role attribution for instruction + # following). Phase 2c keeps the turns as turns. + + @staticmethod + def _history_to_openai_messages( + user_message: str, + history: "list[tuple[str, str]]", + ) -> "list[dict]": + """Convert A2A history + current turn to OpenAI Chat Completions shape.""" + messages: list[dict] = [] + for role, text in history or []: + messages.append({ + "role": "user" if role == "human" else "assistant", + "content": text, + }) + messages.append({"role": "user", "content": user_message}) + return messages + + @staticmethod + def _history_to_anthropic_messages( + user_message: str, + history: "list[tuple[str, str]]", + ) -> "list[dict]": + """Convert A2A history + current turn to Anthropic Messages API shape. + + Identical wire format to OpenAI (``role`` + ``content``) for text-only + turns, so we just delegate. The difference matters for tool_use / + content blocks, which are Phase 2d territory. + """ + return HermesA2AExecutor._history_to_openai_messages(user_message, history) + + @staticmethod + def _history_to_gemini_contents( + user_message: str, + history: "list[tuple[str, str]]", + ) -> "list[dict]": + """Convert A2A history + current turn to Gemini generateContent shape. + + Gemini uses ``role: "user" | "model"`` (NOT "assistant") and wraps + text in a ``parts: [{"text": ...}]`` list. + """ + contents: list[dict] = [] + for role, text in history or []: + contents.append({ + "role": "user" if role == "human" else "model", + "parts": [{"text": text}], + }) + contents.append({"role": "user", "parts": [{"text": user_message}]}) + return contents + # ------------------------------------------------------------------ # Per-provider inference paths # ------------------------------------------------------------------ - async def _do_openai_compat(self, task_text: str) -> str: + async def _do_openai_compat( + self, + user_message: str, + history: "list[tuple[str, str]] | None" = None, + ) -> str: """OpenAI-compat inference — used by every provider with auth_scheme='openai'. - 14 of the 15 registered providers route here. Uses ``openai.AsyncOpenAI`` + 13 of the 15 registered providers route here. Uses ``openai.AsyncOpenAI`` pointed at the provider's base_url; every provider's API is wire- compatible with the OpenAI Chat Completions shape. + + Phase 2c: accepts multi-turn history. The old single-``task_text`` call + shape (pre-2c) is preserved — pass the flattened text as ``user_message`` + with no history and the call degrades gracefully to the original + behavior. See ``_history_to_openai_messages`` for the conversion. """ import openai @@ -147,13 +221,18 @@ class HermesA2AExecutor: api_key=self.api_key, base_url=self.base_url, ) + messages = self._history_to_openai_messages(user_message, history or []) response = await client.chat.completions.create( model=self.model, - messages=[{"role": "user", "content": task_text}], + messages=messages, ) return response.choices[0].message.content or "" - async def _do_anthropic_native(self, task_text: str) -> str: + async def _do_anthropic_native( + self, + user_message: str, + history: "list[tuple[str, str]] | None" = None, + ) -> str: """Native Anthropic Messages API inference. Uses the official ``anthropic`` Python SDK for correct tool-calling, @@ -165,9 +244,8 @@ class HermesA2AExecutor: OpenAI-compat path — silent fallback would mask the fidelity loss (tool_use blocks become plain text, vision gets stripped, etc.). - Phase 2a minimum viable: single-turn text in, text out, no tools, no - vision. Phase 2b will add tool-calling, vision, and streaming via - the same path (still within this method). + Phase 2a: single-turn text in, text out. Phase 2c: multi-turn history. + Tools + vision remain Phase 2d. """ try: import anthropic @@ -180,18 +258,23 @@ class HermesA2AExecutor: ) from exc client = anthropic.AsyncAnthropic(api_key=self.api_key) + messages = self._history_to_anthropic_messages(user_message, history or []) response = await client.messages.create( model=self.model, max_tokens=4096, - messages=[{"role": "user", "content": task_text}], + messages=messages, ) - # response.content is a list of ContentBlock; for single-turn text-only - # the first block is a TextBlock with a .text attribute. + # response.content is a list of ContentBlock; for text-only the first + # block is a TextBlock with a .text attribute. if response.content and hasattr(response.content[0], "text"): return response.content[0].text return "" - async def _do_gemini_native(self, task_text: str) -> str: + async def _do_gemini_native( + self, + user_message: str, + history: "list[tuple[str, str]] | None" = None, + ) -> str: """Native Google Gemini ``generateContent`` inference. Uses the official ``google-genai`` Python SDK for correct vision @@ -204,9 +287,9 @@ class HermesA2AExecutor: silently falling back to the OpenAI-compat shim (same fail-loud semantics as the anthropic path). - Phase 2b minimum viable: single-turn text in, text out, no tools, - no vision, no thinking config. Phase 2c/2d layers those on the same - method. + Phase 2b: single-turn text in, text out. Phase 2c: multi-turn history + via Gemini's ``contents=[{role,parts}]`` shape (note: role is + ``"user"`` / ``"model"``, NOT ``"assistant"``). """ try: from google import genai # type: ignore[import-not-found] @@ -218,45 +301,59 @@ class HermesA2AExecutor: "OpenRouter's OpenAI-compat shim instead." ) from exc - # google-genai client reads api_key from env by default; pass it - # explicitly so we respect whatever ProviderConfig resolved (e.g. a - # test-only key that isn't in process env yet). client = genai.Client(api_key=self.api_key) + contents = self._history_to_gemini_contents(user_message, history or []) response = await client.aio.models.generate_content( model=self.model, - contents=task_text, + contents=contents, ) # response.text is the flattened text across all parts of the first - # candidate. For single-turn text-only that's the whole reply. + # candidate. For text-only that's the whole reply. return response.text or "" - async def _do_inference(self, task_text: str) -> str: - """Dispatch to the right inference path based on provider auth_scheme.""" + async def _do_inference( + self, + user_message: str, + history: "list[tuple[str, str]] | None" = None, + ) -> str: + """Dispatch to the right inference path based on provider auth_scheme. + + Phase 2c: takes ``user_message`` + optional ``history`` list-of-tuples, + passes through to the chosen path. Each path has its own history → + provider-message conversion via the static helpers above. + """ scheme = self.provider_cfg.auth_scheme if scheme == "anthropic": - return await self._do_anthropic_native(task_text) + return await self._do_anthropic_native(user_message, history) if scheme == "gemini": - return await self._do_gemini_native(task_text) + return await self._do_gemini_native(user_message, history) if scheme == "openai": - return await self._do_openai_compat(task_text) + return await self._do_openai_compat(user_message, history) # Unknown scheme — treat as openai-compat for forward-compat with any # future provider the registry adds without yet having a native path. logger.warning( "Hermes: unknown auth_scheme=%r for provider=%s — falling back to openai-compat", scheme, self.provider_cfg.name, ) - return await self._do_openai_compat(task_text) + return await self._do_openai_compat(user_message, history) # ------------------------------------------------------------------ # AgentExecutor interface # ------------------------------------------------------------------ async def execute(self, context, event_queue): # pragma: no cover - """Execute a Hermes inference request and push the reply to event_queue.""" + """Execute a Hermes inference request and push the reply to event_queue. + + Phase 2c: passes the conversation history to the dispatch layer as a + structured list of (role, text) turns instead of flattening via + ``build_task_text``. Each provider path converts the list into its + native multi-turn message shape (OpenAI messages, Anthropic messages, + or Gemini contents). This gives the model its native multi-turn + awareness for instruction following. + """ from a2a.utils import new_agent_text_message from adapters.shared_runtime import ( brief_task, - build_task_text, extract_history, extract_message_text, set_current_task, @@ -270,8 +367,8 @@ class HermesA2AExecutor: await set_current_task(self._heartbeat, brief_task(user_message)) try: - task_text = build_task_text(user_message, extract_history(context)) - reply = await self._do_inference(task_text) + history = extract_history(context) + reply = await self._do_inference(user_message, history) except Exception as exc: logger.exception("Hermes executor error: %s", exc) reply = f"Hermes error: {exc}" diff --git a/workspace-template/tests/test_hermes_phase2_dispatch.py b/workspace-template/tests/test_hermes_phase2_dispatch.py index 78bbfe31..c06057b1 100644 --- a/workspace-template/tests/test_hermes_phase2_dispatch.py +++ b/workspace-template/tests/test_hermes_phase2_dispatch.py @@ -98,7 +98,9 @@ async def test_dispatch_openai_scheme_calls_openai_compat(): result = await executor._do_inference("hello") - executor._do_openai_compat.assert_awaited_once_with("hello") + # Phase 2c: _do_inference passes (user_message, history) to the path; + # when no history supplied, second arg is None. + executor._do_openai_compat.assert_awaited_once_with("hello", None) executor._do_anthropic_native.assert_not_awaited() executor._do_gemini_native.assert_not_awaited() assert result == "openai-result" @@ -114,7 +116,7 @@ async def test_dispatch_anthropic_scheme_calls_anthropic_native(): result = await executor._do_inference("hello") - executor._do_anthropic_native.assert_awaited_once_with("hello") + executor._do_anthropic_native.assert_awaited_once_with("hello", None) executor._do_openai_compat.assert_not_awaited() executor._do_gemini_native.assert_not_awaited() assert result == "anthropic-result" @@ -130,12 +132,101 @@ async def test_dispatch_gemini_scheme_calls_gemini_native(): result = await executor._do_inference("hello") - executor._do_gemini_native.assert_awaited_once_with("hello") + executor._do_gemini_native.assert_awaited_once_with("hello", None) executor._do_openai_compat.assert_not_awaited() executor._do_anthropic_native.assert_not_awaited() assert result == "gemini-result" +# --------------------------------------------------------------------------- +# Phase 2c — history-to-message conversion tests +# --------------------------------------------------------------------------- + + +def test_history_to_openai_messages_empty_history(): + """No history → single user message (back-compat with pre-2c single-turn shape).""" + import importlib.util + src = (_HERMES_DIR / "executor.py").read_text().replace( + "from .providers import", "from providers import" + ) + ns: dict = {} + exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) + HermesA2AExecutor = ns["HermesA2AExecutor"] + + msgs = HermesA2AExecutor._history_to_openai_messages("current turn", []) + assert msgs == [{"role": "user", "content": "current turn"}] + + +def test_history_to_openai_messages_multi_turn(): + """A2A history roles map: human→user, ai→assistant. Current turn appended as user.""" + import importlib.util + src = (_HERMES_DIR / "executor.py").read_text().replace( + "from .providers import", "from providers import" + ) + ns: dict = {} + exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) + HermesA2AExecutor = ns["HermesA2AExecutor"] + + history = [("human", "first question"), ("ai", "first answer"), ("human", "follow-up")] + msgs = HermesA2AExecutor._history_to_openai_messages("current turn", history) + assert msgs == [ + {"role": "user", "content": "first question"}, + {"role": "assistant", "content": "first answer"}, + {"role": "user", "content": "follow-up"}, + {"role": "user", "content": "current turn"}, + ] + + +def test_history_to_anthropic_messages_same_as_openai(): + """Anthropic Messages API uses the same wire shape as OpenAI for text-only turns.""" + import importlib.util + src = (_HERMES_DIR / "executor.py").read_text().replace( + "from .providers import", "from providers import" + ) + ns: dict = {} + exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) + HermesA2AExecutor = ns["HermesA2AExecutor"] + + history = [("human", "hello"), ("ai", "hi")] + openai_msgs = HermesA2AExecutor._history_to_openai_messages("how are you?", history) + anth_msgs = HermesA2AExecutor._history_to_anthropic_messages("how are you?", history) + assert openai_msgs == anth_msgs + + +def test_history_to_gemini_contents_uses_model_role_and_parts_wrapper(): + """Gemini uses role='user'|'model' (NOT 'assistant') and wraps text in parts=[{text}].""" + import importlib.util + src = (_HERMES_DIR / "executor.py").read_text().replace( + "from .providers import", "from providers import" + ) + ns: dict = {} + exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) + HermesA2AExecutor = ns["HermesA2AExecutor"] + + history = [("human", "hi"), ("ai", "hello back")] + contents = HermesA2AExecutor._history_to_gemini_contents("follow-up?", history) + assert contents == [ + {"role": "user", "parts": [{"text": "hi"}]}, + {"role": "model", "parts": [{"text": "hello back"}]}, + {"role": "user", "parts": [{"text": "follow-up?"}]}, + ] + + +@pytest.mark.asyncio +async def test_dispatch_passes_history_through(): + """When _do_inference is called with history, it flows through to the provider path.""" + executor = _make_executor("anthropic") + executor._do_anthropic_native = AsyncMock(return_value="reply-with-history") + executor._do_openai_compat = AsyncMock() + executor._do_gemini_native = AsyncMock() + + history = [("human", "prior q"), ("ai", "prior a")] + result = await executor._do_inference("current", history) + + executor._do_anthropic_native.assert_awaited_once_with("current", history) + assert result == "reply-with-history" + + @pytest.mark.asyncio async def test_dispatch_unknown_scheme_falls_back_to_openai_compat(): """Unknown auth_scheme → log a warning + fall back to openai-compat (forward-compat)."""