From baffc6b0c329a7eab49d718c422e98e89afbad2f Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Wed, 15 Apr 2026 16:21:47 -0700 Subject: [PATCH] =?UTF-8?q?feat(hermes):=20Phase=202d-i=20=E2=80=94=20syst?= =?UTF-8?q?em-prompt.md=20injection=20on=20all=203=20dispatch=20paths?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Hermes adapter never read /configs/system-prompt.md. Any role that switched to runtime: hermes was silently losing its role identity because the system prompt wasn't passed to the model. This PR fixes that by: 1. HermesA2AExecutor.__init__ takes new optional `config_path` kwarg 2. `create_executor(config_path=...)` forwards to the constructor 3. `adapter.py` passes `config.config_path` through from AdapterConfig 4. `execute()` reads system-prompt.md via executor_helpers.get_system_prompt (hot-reload-capable — reads on every turn, not just at startup) 5. `_do_inference(user_message, history, system_prompt)` — new arg threads through the dispatch to each native path 6. Each path uses the provider's NATIVE system field: - OpenAI-compat: prepends `{"role":"system", "content":...}` to messages - Anthropic: top-level `system=` kwarg (NOT in messages — Anthropic requires system at the top level) - Gemini: `config=GenerateContentConfig(system_instruction=...)` ## Phase scoreboard - 2a (in main) — native Anthropic dispatch infra - 2b (in main) — native Gemini dispatch - 2c (in main) — multi-turn history on all paths - **2d-i (this PR)** — system prompts on all paths - 2d-ii (future) — tool calling on native paths - 2d-iii (future) — vision content blocks on native paths - 2d-iv (future) — streaming ## Test coverage 46/46 tests pass (20 Phase 2 dispatch + 26 Phase 1 registry): - Existing dispatch tests updated to assert the 3-arg call shape `("hello", None, None)` — history + system_prompt both None - 4 new tests: - `dispatch_passes_system_prompt_to_anthropic` — happy path, third arg flows - `dispatch_passes_system_prompt_to_gemini` — happy path - `dispatch_passes_system_prompt_to_openai` — happy path - `executor_accepts_config_path_kwarg` — constructor stores config_path - `create_executor_forwards_config_path` — both back-compat and registry resolution paths forward config_path through to the executor ## Back-compat - `config_path=None` (default) → execute() skips system-prompt injection, same behavior as pre-2d-i - Workspaces with `runtime: hermes` but no `/configs/system-prompt.md` file get `system_prompt=None` (get_system_prompt returns fallback), same as before - The 13 OpenAI-compat providers work identically — system_prompt just adds a leading message, which every OpenAI-compat endpoint already supports - Anthropic + Gemini previously got zero system context; now they get the same system prompt the workspace's system-prompt.md carries ## Why this matters Before this PR: if someone flipped a workspace from `runtime: claude-code` to `runtime: hermes`, the agent would act generically (no role identity, no project conventions, no CLAUDE.md context) because the Hermes executor never looked at system-prompt.md. That's a silent correctness regression the test suite wouldn't catch because none of our live workspaces use the hermes runtime today. With this PR: Hermes workspaces get the same system prompt injection as Claude-code workspaces, making the `runtime: hermes` switch a true drop-in alternative. ## Related - #267 Phase 2c (multi-turn history — in main) - #255 Phase 2b (gemini native — in main) - #240 Phase 2a (anthropic native — in main) - #208 Phase 1 (provider registry — in main) - project_hermes_multi_provider.md — Phase 2d-i was the next queued item --- workspace-template/adapters/hermes/adapter.py | 5 +- .../adapters/hermes/executor.py | 116 +++++++++++------- .../tests/test_hermes_phase2_dispatch.py | 105 +++++++++++++++- 3 files changed, 178 insertions(+), 48 deletions(-) diff --git a/workspace-template/adapters/hermes/adapter.py b/workspace-template/adapters/hermes/adapter.py index 0e6526f2..8d3367b1 100644 --- a/workspace-template/adapters/hermes/adapter.py +++ b/workspace-template/adapters/hermes/adapter.py @@ -51,7 +51,10 @@ class HermesAdapter(BaseAdapter): # Resolve API key: prefer workspace secrets (runtime_config), then env vars hermes_api_key = config.runtime_config.get("hermes_api_key") or None - executor = create_executor(hermes_api_key=hermes_api_key) + executor = create_executor( + hermes_api_key=hermes_api_key, + config_path=config.config_path, # Phase 2d-i: system-prompt.md injection + ) # Override model from config if provided model = config.model diff --git a/workspace-template/adapters/hermes/executor.py b/workspace-template/adapters/hermes/executor.py index c2a31a0f..af4cc659 100644 --- a/workspace-template/adapters/hermes/executor.py +++ b/workspace-template/adapters/hermes/executor.py @@ -47,6 +47,7 @@ def create_executor( hermes_api_key: Optional[str] = None, provider: Optional[str] = None, model: Optional[str] = None, + config_path: Optional[str] = None, ): """Create and return a LangGraph-compatible executor for the Hermes adapter. @@ -65,12 +66,17 @@ def create_executor( model: Override the provider's default model. Passed straight through to ``chat.completions.create``. + config_path: + Path to the workspace's ``/configs`` directory. Phase 2d-i reads + ``system-prompt.md`` from here on every ``execute()`` call and + passes the content as a system instruction to the native SDK. + Optional — omit to skip system-prompt injection (tests do this). Returns ------- HermesA2AExecutor A ready-to-use executor wired with the resolved api_key + base_url - + model. + + model + config_path. Raises ------ @@ -86,6 +92,7 @@ def create_executor( provider_cfg=cfg, api_key=hermes_api_key, model=model or cfg.default_model, + config_path=config_path, ) # Path 2/3: registry resolution (either explicit provider name or auto-detect). @@ -101,6 +108,7 @@ def create_executor( provider_cfg=cfg, api_key=api_key, model=model or cfg.default_model, + config_path=config_path, ) @@ -123,12 +131,18 @@ class HermesA2AExecutor: api_key: str, model: str, heartbeat=None, + config_path: Optional[str] = None, ): self.provider_cfg = provider_cfg self.api_key = api_key self.base_url = provider_cfg.base_url self.model = model self._heartbeat = heartbeat + # Phase 2d-i: config_path lets execute() read /configs/system-prompt.md + # on each turn and pass it to the native SDK's `system=` / + # `system_instruction=` / prepended message. Optional because older + # callers + tests construct executors directly. + self._config_path = config_path # ------------------------------------------------------------------ # History → provider-specific message list converters @@ -203,6 +217,7 @@ class HermesA2AExecutor: self, user_message: str, history: "list[tuple[str, str]] | None" = None, + system_prompt: Optional[str] = None, ) -> str: """OpenAI-compat inference — used by every provider with auth_scheme='openai'. @@ -210,10 +225,9 @@ class HermesA2AExecutor: pointed at the provider's base_url; every provider's API is wire- compatible with the OpenAI Chat Completions shape. - Phase 2c: accepts multi-turn history. The old single-``task_text`` call - shape (pre-2c) is preserved — pass the flattened text as ``user_message`` - with no history and the call degrades gracefully to the original - behavior. See ``_history_to_openai_messages`` for the conversion. + Phase 2c: accepts multi-turn history. + Phase 2d-i: accepts optional system_prompt, prepended as a + ``{"role":"system"}`` message per the OpenAI Chat Completions convention. """ import openai @@ -222,6 +236,8 @@ class HermesA2AExecutor: base_url=self.base_url, ) messages = self._history_to_openai_messages(user_message, history or []) + if system_prompt: + messages = [{"role": "system", "content": system_prompt}, *messages] response = await client.chat.completions.create( model=self.model, messages=messages, @@ -232,6 +248,7 @@ class HermesA2AExecutor: self, user_message: str, history: "list[tuple[str, str]] | None" = None, + system_prompt: Optional[str] = None, ) -> str: """Native Anthropic Messages API inference. @@ -239,13 +256,12 @@ class HermesA2AExecutor: vision, and extended-thinking semantics that don't translate cleanly through the OpenAI-compat shim. - If the ``anthropic`` package is not installed in the workspace image, - we raise a clear error rather than silently falling back to the - OpenAI-compat path — silent fallback would mask the fidelity loss - (tool_use blocks become plain text, vision gets stripped, etc.). - - Phase 2a: single-turn text in, text out. Phase 2c: multi-turn history. - Tools + vision remain Phase 2d. + Phase 2a: single-turn text. + Phase 2c: multi-turn history. + Phase 2d-i: optional system_prompt passed via Anthropic's native + top-level ``system=`` parameter — NOT as a message in the messages + list (Anthropic's Messages API requires system prompts to be at the + top level, not inline like OpenAI). """ try: import anthropic @@ -259,11 +275,14 @@ class HermesA2AExecutor: client = anthropic.AsyncAnthropic(api_key=self.api_key) messages = self._history_to_anthropic_messages(user_message, history or []) - response = await client.messages.create( - model=self.model, - max_tokens=4096, - messages=messages, - ) + create_kwargs: dict = { + "model": self.model, + "max_tokens": 4096, + "messages": messages, + } + if system_prompt: + create_kwargs["system"] = system_prompt + response = await client.messages.create(**create_kwargs) # response.content is a list of ContentBlock; for text-only the first # block is a TextBlock with a .text attribute. if response.content and hasattr(response.content[0], "text"): @@ -274,6 +293,7 @@ class HermesA2AExecutor: self, user_message: str, history: "list[tuple[str, str]] | None" = None, + system_prompt: Optional[str] = None, ) -> str: """Native Google Gemini ``generateContent`` inference. @@ -282,17 +302,15 @@ class HermesA2AExecutor: thinking config. These all get stripped or mis-translated through the OpenAI-compat ``/v1beta/openai`` shim. - If the ``google-genai`` package is not installed in the workspace - image, raise a clear error with install instructions rather than - silently falling back to the OpenAI-compat shim (same fail-loud - semantics as the anthropic path). - - Phase 2b: single-turn text in, text out. Phase 2c: multi-turn history - via Gemini's ``contents=[{role,parts}]`` shape (note: role is - ``"user"`` / ``"model"``, NOT ``"assistant"``). + Phase 2b: single-turn text. + Phase 2c: multi-turn history via Gemini's ``contents=[{role,parts}]`` + shape (note: role is ``"user"`` / ``"model"``, NOT ``"assistant"``). + Phase 2d-i: system_prompt passed via native + ``config.system_instruction`` — Gemini's top-level system field. """ try: from google import genai # type: ignore[import-not-found] + from google.genai import types as genai_types # type: ignore[import-not-found] except ImportError as exc: # pragma: no cover — exercised by test_missing_sdk raise RuntimeError( "Hermes gemini native path requires the `google-genai` package. " @@ -303,10 +321,15 @@ class HermesA2AExecutor: client = genai.Client(api_key=self.api_key) contents = self._history_to_gemini_contents(user_message, history or []) - response = await client.aio.models.generate_content( - model=self.model, - contents=contents, - ) + generate_kwargs: dict = { + "model": self.model, + "contents": contents, + } + if system_prompt: + generate_kwargs["config"] = genai_types.GenerateContentConfig( + system_instruction=system_prompt, + ) + response = await client.aio.models.generate_content(**generate_kwargs) # response.text is the flattened text across all parts of the first # candidate. For text-only that's the whole reply. return response.text or "" @@ -315,27 +338,29 @@ class HermesA2AExecutor: self, user_message: str, history: "list[tuple[str, str]] | None" = None, + system_prompt: Optional[str] = None, ) -> str: """Dispatch to the right inference path based on provider auth_scheme. - Phase 2c: takes ``user_message`` + optional ``history`` list-of-tuples, - passes through to the chosen path. Each path has its own history → - provider-message conversion via the static helpers above. + Phase 2c: multi-turn history. + Phase 2d-i: optional system_prompt is passed through to the native + system field of whichever path wins dispatch (OpenAI ``{role:system}`` + / Anthropic ``system=`` / Gemini ``system_instruction=``). """ scheme = self.provider_cfg.auth_scheme if scheme == "anthropic": - return await self._do_anthropic_native(user_message, history) + return await self._do_anthropic_native(user_message, history, system_prompt) if scheme == "gemini": - return await self._do_gemini_native(user_message, history) + return await self._do_gemini_native(user_message, history, system_prompt) if scheme == "openai": - return await self._do_openai_compat(user_message, history) + return await self._do_openai_compat(user_message, history, system_prompt) # Unknown scheme — treat as openai-compat for forward-compat with any # future provider the registry adds without yet having a native path. logger.warning( "Hermes: unknown auth_scheme=%r for provider=%s — falling back to openai-compat", scheme, self.provider_cfg.name, ) - return await self._do_openai_compat(user_message, history) + return await self._do_openai_compat(user_message, history, system_prompt) # ------------------------------------------------------------------ # AgentExecutor interface @@ -344,12 +369,13 @@ class HermesA2AExecutor: async def execute(self, context, event_queue): # pragma: no cover """Execute a Hermes inference request and push the reply to event_queue. - Phase 2c: passes the conversation history to the dispatch layer as a - structured list of (role, text) turns instead of flattening via - ``build_task_text``. Each provider path converts the list into its - native multi-turn message shape (OpenAI messages, Anthropic messages, - or Gemini contents). This gives the model its native multi-turn - awareness for instruction following. + Phase 2c: multi-turn history. + Phase 2d-i: reads ``/configs/system-prompt.md`` via + ``executor_helpers.get_system_prompt`` each turn (supports hot-reload) + and passes the text to the dispatch layer. Each provider path uses + its native system field — Anthropic's top-level ``system=``, Gemini's + ``system_instruction=`` via ``GenerateContentConfig``, or OpenAI's + ``{"role":"system"}`` message at the head of the messages list. """ from a2a.utils import new_agent_text_message from adapters.shared_runtime import ( @@ -358,6 +384,7 @@ class HermesA2AExecutor: extract_message_text, set_current_task, ) + from executor_helpers import get_system_prompt user_message = extract_message_text(context) if not user_message: @@ -368,7 +395,10 @@ class HermesA2AExecutor: try: history = extract_history(context) - reply = await self._do_inference(user_message, history) + system_prompt = ( + get_system_prompt(self._config_path) if self._config_path else None + ) + reply = await self._do_inference(user_message, history, system_prompt) except Exception as exc: logger.exception("Hermes executor error: %s", exc) reply = f"Hermes error: {exc}" diff --git a/workspace-template/tests/test_hermes_phase2_dispatch.py b/workspace-template/tests/test_hermes_phase2_dispatch.py index c06057b1..9879ac50 100644 --- a/workspace-template/tests/test_hermes_phase2_dispatch.py +++ b/workspace-template/tests/test_hermes_phase2_dispatch.py @@ -100,7 +100,7 @@ async def test_dispatch_openai_scheme_calls_openai_compat(): # Phase 2c: _do_inference passes (user_message, history) to the path; # when no history supplied, second arg is None. - executor._do_openai_compat.assert_awaited_once_with("hello", None) + executor._do_openai_compat.assert_awaited_once_with("hello", None, None) executor._do_anthropic_native.assert_not_awaited() executor._do_gemini_native.assert_not_awaited() assert result == "openai-result" @@ -116,7 +116,7 @@ async def test_dispatch_anthropic_scheme_calls_anthropic_native(): result = await executor._do_inference("hello") - executor._do_anthropic_native.assert_awaited_once_with("hello", None) + executor._do_anthropic_native.assert_awaited_once_with("hello", None, None) executor._do_openai_compat.assert_not_awaited() executor._do_gemini_native.assert_not_awaited() assert result == "anthropic-result" @@ -132,7 +132,7 @@ async def test_dispatch_gemini_scheme_calls_gemini_native(): result = await executor._do_inference("hello") - executor._do_gemini_native.assert_awaited_once_with("hello", None) + executor._do_gemini_native.assert_awaited_once_with("hello", None, None) executor._do_openai_compat.assert_not_awaited() executor._do_anthropic_native.assert_not_awaited() assert result == "gemini-result" @@ -223,10 +223,107 @@ async def test_dispatch_passes_history_through(): history = [("human", "prior q"), ("ai", "prior a")] result = await executor._do_inference("current", history) - executor._do_anthropic_native.assert_awaited_once_with("current", history) + executor._do_anthropic_native.assert_awaited_once_with("current", history, None) assert result == "reply-with-history" +# --------------------------------------------------------------------------- +# Phase 2d-i — system_prompt dispatch tests +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_dispatch_passes_system_prompt_to_anthropic(): + """system_prompt flows through _do_inference → _do_anthropic_native as third arg.""" + executor = _make_executor("anthropic") + executor._do_anthropic_native = AsyncMock(return_value="reply") + executor._do_openai_compat = AsyncMock() + executor._do_gemini_native = AsyncMock() + + await executor._do_inference("user msg", None, "you are a helpful assistant") + executor._do_anthropic_native.assert_awaited_once_with( + "user msg", None, "you are a helpful assistant" + ) + + +@pytest.mark.asyncio +async def test_dispatch_passes_system_prompt_to_gemini(): + """system_prompt flows through _do_inference → _do_gemini_native as third arg.""" + executor = _make_executor("gemini") + executor._do_gemini_native = AsyncMock(return_value="reply") + executor._do_openai_compat = AsyncMock() + executor._do_anthropic_native = AsyncMock() + + await executor._do_inference("user msg", None, "system instruction") + executor._do_gemini_native.assert_awaited_once_with( + "user msg", None, "system instruction" + ) + + +@pytest.mark.asyncio +async def test_dispatch_passes_system_prompt_to_openai(): + """system_prompt flows through _do_inference → _do_openai_compat as third arg.""" + executor = _make_executor("openai") + executor._do_openai_compat = AsyncMock(return_value="reply") + executor._do_anthropic_native = AsyncMock() + executor._do_gemini_native = AsyncMock() + + await executor._do_inference("user msg", None, "system prompt") + executor._do_openai_compat.assert_awaited_once_with( + "user msg", None, "system prompt" + ) + + +def test_executor_accepts_config_path_kwarg(): + """HermesA2AExecutor.__init__ accepts config_path and stores it on _config_path.""" + import importlib.util + src = (_HERMES_DIR / "executor.py").read_text().replace( + "from .providers import", "from providers import" + ) + ns: dict = {} + exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) + HermesA2AExecutor = ns["HermesA2AExecutor"] + cfg = providers.PROVIDERS["openai"] + + # Without config_path — default None + e1 = HermesA2AExecutor(provider_cfg=cfg, api_key="k", model="m") + assert e1._config_path is None + + # With config_path + e2 = HermesA2AExecutor( + provider_cfg=cfg, api_key="k", model="m", config_path="/configs" + ) + assert e2._config_path == "/configs" + + +def test_create_executor_forwards_config_path(): + """create_executor(config_path=...) → executor._config_path gets set. + + Exercises both the hermes_api_key back-compat path AND the registry + resolution path to make sure config_path threads through both. + """ + import importlib.util + src = (_HERMES_DIR / "executor.py").read_text().replace( + "from .providers import", "from providers import" + ) + ns: dict = {} + exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) + create_executor = ns["create_executor"] + + # Path 1: hermes_api_key + e1 = create_executor(hermes_api_key="k", config_path="/path/a") + assert e1._config_path == "/path/a" + + # Path 2: registry resolution + import os + os.environ["OPENAI_API_KEY"] = "openai-test" + try: + e2 = create_executor(provider="openai", config_path="/path/b") + assert e2._config_path == "/path/b" + finally: + os.environ.pop("OPENAI_API_KEY", None) + + @pytest.mark.asyncio async def test_dispatch_unknown_scheme_falls_back_to_openai_compat(): """Unknown auth_scheme → log a warning + fall back to openai-compat (forward-compat)."""