feat(hermes): stacked system messages — persona + tools + reasoning policy (#499)

HermesA2AExecutor now supports sending system context as ordered, separate
role=system messages instead of a single concatenated string — the model
format recommended by NousResearch.

Changes:
- HermesA2AExecutor.__init__: new system_blocks kwarg (list[str|None]|None)
  stored as an independent copy; None blocks and empty strings silently skipped
- _build_messages(): when system_blocks is not None, emits each non-empty
  block as a separate {"role": "system"} entry in Hermes-recommended order
  (persona → tools context → reasoning policy); falls through to legacy
  system_prompt path when system_blocks is None (backward compatible)

Backward compatibility: existing callers that pass a single system_prompt
string continue to work identically — no changes required.

Tests (12 new, 47 total):
  - system_blocks stored as independent copy (mutation safe)
  - three-block stacked ordering preserved
  - empty / None blocks silently skipped
  - all-empty list → zero system messages
  - system_blocks overrides system_prompt when both provided
  - legacy system_prompt path unchanged
  - stacked blocks appear in the live API call kwargs

Closes #499

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Molecule AI Backend Engineer 2026-04-17 01:22:31 +00:00
parent 5466b0b88e
commit 1e7db61f94
2 changed files with 247 additions and 4 deletions

View File

@ -73,6 +73,36 @@ enqueues an error message and returns early without calling the API.
When ``response_format`` is ``None`` (the default) the kwarg is omitted
entirely from the API call so older / strict providers do not receive an
unexpected field.
Stacked system messages (#499)
-------------------------------
Hermes recommends separating system context into distinct ``role=system``
messages rather than concatenating everything into a single string. Pass
``system_blocks`` to ``HermesA2AExecutor`` to use this mode::
executor = HermesA2AExecutor(
model="nousresearch/hermes-4-0",
system_blocks=[
persona_prompt, # who the agent is
tools_context, # available tools / MCP context
reasoning_policy, # chain-of-thought / output-format rules
],
)
Each non-empty, non-None block is emitted as a separate
``{"role": "system", "content": block}`` entry, in the order supplied,
before the user turn. The canonical Hermes ordering is:
1. Persona / identity
2. Tools context (function schemas, MCP capabilities)
3. Reasoning policy (think-step, output format constraints)
Empty strings and ``None`` entries are silently skipped so callers can
pass ``None`` for optional blocks without special-casing.
When ``system_blocks`` is provided it takes precedence over
``system_prompt``. Existing code that passes a single ``system_prompt``
string continues to work identically (backward compatible).
"""
from __future__ import annotations
@ -229,6 +259,12 @@ class HermesA2AExecutor(AgentExecutor):
Used to select the upstream model AND detect reasoning support.
system_prompt:
Optional system prompt prepended to every conversation.
system_blocks:
Ordered list of system message blocks in Hermes-recommended order:
persona, tools context, reasoning policy. Each non-empty block
becomes a separate ``{"role": "system"}`` message. None/empty-string
blocks are skipped. When provided, takes precedence over
``system_prompt``.
base_url:
OpenAI-compat endpoint base URL. Defaults to
``OPENAI_BASE_URL`` env var, then ``https://openrouter.ai/api/v1``.
@ -262,6 +298,7 @@ class HermesA2AExecutor(AgentExecutor):
self,
model: str,
system_prompt: str | None = None,
system_blocks: "list[str | None] | None" = None,
base_url: str | None = None,
api_key: str | None = None,
heartbeat: "HeartbeatLoop | None" = None,
@ -271,6 +308,9 @@ class HermesA2AExecutor(AgentExecutor):
) -> None:
self.model = model
self.system_prompt = system_prompt
self._system_blocks: list[str | None] | None = (
list(system_blocks) if system_blocks is not None else None
)
self._heartbeat = heartbeat
self._response_format = response_format
self._provider = ProviderConfig(model)
@ -306,7 +346,15 @@ class HermesA2AExecutor(AgentExecutor):
def _build_messages(self, user_input: str) -> list[dict]:
"""Assemble the ``messages`` list: optional system prompt then user turn."""
msgs: list[dict] = []
if self.system_prompt:
if self._system_blocks is not None:
# Stacked mode: Hermes-recommended ordering:
# persona → tools context → reasoning policy.
# Empty/None blocks are skipped.
for block in self._system_blocks:
if block:
msgs.append({"role": "system", "content": block})
elif self.system_prompt:
# Legacy single-string mode — backward compatible.
msgs.append({"role": "system", "content": self.system_prompt})
msgs.append({"role": "user", "content": user_input})
return msgs

View File

@ -6,8 +6,12 @@ Coverage targets
- ProviderConfig capability flags derived from model name
- _validate_response_format() valid types, invalid type, missing fields (#498)
- HermesA2AExecutor.__init__ field assignment + client injection,
response_format stored (#498), tools (#497)
- HermesA2AExecutor._build_messages system prompt + user turn assembly
response_format stored (#498), tools (#497),
system_blocks stored as independent copy (#499)
- HermesA2AExecutor._build_messages system prompt + user turn assembly,
stacked system blocks in order (#499),
empty/None blocks skipped (#499),
system_blocks overrides system_prompt (#499)
- HermesA2AExecutor._log_reasoning OTEL span emission + swallowed errors
- HermesA2AExecutor.execute happy path, empty input, API error,
Hermes 4 extra_body, Hermes 3 no extra_body,
@ -15,7 +19,8 @@ Coverage targets
response_format forwarded / omitted / invalid (#498),
tools serialized in request body (#497),
empty tools no tools field (#497),
tool_call response JSON text (#497)
tool_call response JSON text (#497),
stacked blocks in API call (#499)
- HermesA2AExecutor.cancel TaskStatusUpdateEvent emitted
The ``openai`` module is stubbed in sys.modules so no real API call is made.
@ -1110,3 +1115,193 @@ async def test_execute_text_content_wins_over_tool_calls():
reply = eq.enqueue_event.call_args[0][0]
assert reply == "The weather is fine."
# ---------------------------------------------------------------------------
# Stacked system messages — issue #499
# ---------------------------------------------------------------------------
def test_system_blocks_stored_correctly():
"""system_blocks are stored as _system_blocks on the executor."""
blocks = ["persona", "tools", "reasoning"]
executor = HermesA2AExecutor(
model="hermes-4",
system_blocks=blocks,
_client=MagicMock(),
)
assert executor._system_blocks == ["persona", "tools", "reasoning"]
def test_system_blocks_none_stored_as_none():
"""Passing system_blocks=None → _system_blocks is None."""
executor = HermesA2AExecutor(
model="hermes-4",
system_blocks=None,
_client=MagicMock(),
)
assert executor._system_blocks is None
def test_system_blocks_is_independent_copy():
"""Mutating the original list after construction does not affect _system_blocks."""
blocks = ["persona", "tools"]
executor = HermesA2AExecutor(
model="hermes-4",
system_blocks=blocks,
_client=MagicMock(),
)
blocks.append("mutated")
assert executor._system_blocks == ["persona", "tools"]
def test_build_messages_stacked_three_blocks():
"""[persona, tools, reasoning] → three separate system messages before user, in order."""
persona = "You are Hermes, a helpful assistant."
tools = "Available tools: search, calculator."
reasoning = "Think step by step before answering."
executor = HermesA2AExecutor(
model="hermes-4",
system_blocks=[persona, tools, reasoning],
_client=MagicMock(),
)
msgs = executor._build_messages("Hello!")
assert len(msgs) == 4
assert msgs[0] == {"role": "system", "content": persona}
assert msgs[1] == {"role": "system", "content": tools}
assert msgs[2] == {"role": "system", "content": reasoning}
assert msgs[3] == {"role": "user", "content": "Hello!"}
def test_build_messages_stacked_empty_block_skipped():
"""An empty string block in system_blocks is NOT added as a system message."""
executor = HermesA2AExecutor(
model="hermes-4",
system_blocks=["persona", "", "reasoning"],
_client=MagicMock(),
)
msgs = executor._build_messages("Hi")
system_msgs = [m for m in msgs if m["role"] == "system"]
assert len(system_msgs) == 2
contents = [m["content"] for m in system_msgs]
assert "persona" in contents
assert "reasoning" in contents
assert "" not in contents
def test_build_messages_stacked_none_block_skipped():
"""A None block in system_blocks is silently skipped."""
executor = HermesA2AExecutor(
model="hermes-4",
system_blocks=["persona", None, "reasoning"],
_client=MagicMock(),
)
msgs = executor._build_messages("Hi")
system_msgs = [m for m in msgs if m["role"] == "system"]
assert len(system_msgs) == 2
contents = [m["content"] for m in system_msgs]
assert "persona" in contents
assert "reasoning" in contents
def test_build_messages_stacked_all_empty_no_system_messages():
"""All blocks empty or None → zero system messages in the output."""
executor = HermesA2AExecutor(
model="hermes-4",
system_blocks=["", None, ""],
_client=MagicMock(),
)
msgs = executor._build_messages("Hi")
system_msgs = [m for m in msgs if m["role"] == "system"]
assert system_msgs == []
assert len(msgs) == 1
assert msgs[0]["role"] == "user"
def test_build_messages_stacked_single_block():
"""[persona_only] → exactly one system message before the user turn."""
executor = HermesA2AExecutor(
model="hermes-4",
system_blocks=["You are Hermes."],
_client=MagicMock(),
)
msgs = executor._build_messages("Hello!")
assert len(msgs) == 2
assert msgs[0] == {"role": "system", "content": "You are Hermes."}
assert msgs[1] == {"role": "user", "content": "Hello!"}
def test_build_messages_stacked_overrides_system_prompt():
"""When both system_blocks and system_prompt are set, system_blocks wins."""
executor = HermesA2AExecutor(
model="hermes-4",
system_prompt="This should be ignored.",
system_blocks=["Persona block.", "Tools block."],
_client=MagicMock(),
)
msgs = executor._build_messages("Hi")
system_msgs = [m for m in msgs if m["role"] == "system"]
assert len(system_msgs) == 2
contents = [m["content"] for m in system_msgs]
assert "Persona block." in contents
assert "Tools block." in contents
assert "This should be ignored." not in contents
def test_build_messages_legacy_single_string_unchanged():
"""system_prompt alone (no system_blocks) → single system message (backward compat)."""
executor = HermesA2AExecutor(
model="hermes-4",
system_prompt="Be helpful.",
_client=MagicMock(),
)
msgs = executor._build_messages("Hello!")
assert len(msgs) == 2
assert msgs[0] == {"role": "system", "content": "Be helpful."}
assert msgs[1] == {"role": "user", "content": "Hello!"}
def test_build_messages_no_system_no_blocks_no_system_msg():
"""Neither system_prompt nor system_blocks → no system message at all."""
executor = HermesA2AExecutor(
model="hermes-4",
system_prompt=None,
system_blocks=None,
_client=MagicMock(),
)
msgs = executor._build_messages("Hello!")
assert len(msgs) == 1
assert msgs[0] == {"role": "user", "content": "Hello!"}
@pytest.mark.asyncio
async def test_execute_stacked_blocks_in_api_call():
"""Stacked system_blocks appear correctly as separate system messages in the API call."""
persona = "You are Hermes."
tools = "Tool: search."
reasoning = "Think before answering."
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response("done")
)
executor = HermesA2AExecutor(
model="nousresearch/hermes-4-0",
system_blocks=[persona, tools, reasoning],
_client=mock_client,
)
await executor.execute(_make_context("test query"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
msgs = call_kwargs["messages"]
system_msgs = [m for m in msgs if m["role"] == "system"]
assert len(system_msgs) == 3
assert system_msgs[0]["content"] == persona
assert system_msgs[1]["content"] == tools
assert system_msgs[2]["content"] == reasoning
user_msgs = [m for m in msgs if m["role"] == "user"]
assert len(user_msgs) == 1
assert "test query" in user_msgs[0]["content"]