Merge pull request #798 from Molecule-AI/feat/issue-499-clean-3
feat(hermes): stacked system messages — persona + tools + reasoning policy (#499)
This commit is contained in:
commit
159c90e0f5
@ -73,6 +73,36 @@ enqueues an error message and returns early without calling the API.
|
||||
When ``response_format`` is ``None`` (the default) the kwarg is omitted
|
||||
entirely from the API call so older / strict providers do not receive an
|
||||
unexpected field.
|
||||
|
||||
Stacked system messages (#499)
|
||||
-------------------------------
|
||||
Hermes recommends separating system context into distinct ``role=system``
|
||||
messages rather than concatenating everything into a single string. Pass
|
||||
``system_blocks`` to ``HermesA2AExecutor`` to use this mode::
|
||||
|
||||
executor = HermesA2AExecutor(
|
||||
model="nousresearch/hermes-4-0",
|
||||
system_blocks=[
|
||||
persona_prompt, # who the agent is
|
||||
tools_context, # available tools / MCP context
|
||||
reasoning_policy, # chain-of-thought / output-format rules
|
||||
],
|
||||
)
|
||||
|
||||
Each non-empty, non-None block is emitted as a separate
|
||||
``{"role": "system", "content": block}`` entry, in the order supplied,
|
||||
before the user turn. The canonical Hermes ordering is:
|
||||
|
||||
1. Persona / identity
|
||||
2. Tools context (function schemas, MCP capabilities)
|
||||
3. Reasoning policy (think-step, output format constraints)
|
||||
|
||||
Empty strings and ``None`` entries are silently skipped so callers can
|
||||
pass ``None`` for optional blocks without special-casing.
|
||||
|
||||
When ``system_blocks`` is provided it takes precedence over
|
||||
``system_prompt``. Existing code that passes a single ``system_prompt``
|
||||
string continues to work identically (backward compatible).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@ -229,6 +259,12 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
Used to select the upstream model AND detect reasoning support.
|
||||
system_prompt:
|
||||
Optional system prompt prepended to every conversation.
|
||||
system_blocks:
|
||||
Ordered list of system message blocks in Hermes-recommended order:
|
||||
persona, tools context, reasoning policy. Each non-empty block
|
||||
becomes a separate ``{"role": "system"}`` message. None/empty-string
|
||||
blocks are skipped. When provided, takes precedence over
|
||||
``system_prompt``.
|
||||
base_url:
|
||||
OpenAI-compat endpoint base URL. Defaults to
|
||||
``OPENAI_BASE_URL`` env var, then ``https://openrouter.ai/api/v1``.
|
||||
@ -262,6 +298,7 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
self,
|
||||
model: str,
|
||||
system_prompt: str | None = None,
|
||||
system_blocks: "list[str | None] | None" = None,
|
||||
base_url: str | None = None,
|
||||
api_key: str | None = None,
|
||||
heartbeat: "HeartbeatLoop | None" = None,
|
||||
@ -271,6 +308,9 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
) -> None:
|
||||
self.model = model
|
||||
self.system_prompt = system_prompt
|
||||
self._system_blocks: list[str | None] | None = (
|
||||
list(system_blocks) if system_blocks is not None else None
|
||||
)
|
||||
self._heartbeat = heartbeat
|
||||
self._response_format = response_format
|
||||
self._provider = ProviderConfig(model)
|
||||
@ -306,7 +346,15 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
def _build_messages(self, user_input: str) -> list[dict]:
|
||||
"""Assemble the ``messages`` list: optional system prompt then user turn."""
|
||||
msgs: list[dict] = []
|
||||
if self.system_prompt:
|
||||
if self._system_blocks is not None:
|
||||
# Stacked mode: Hermes-recommended ordering:
|
||||
# persona → tools context → reasoning policy.
|
||||
# Empty/None blocks are skipped.
|
||||
for block in self._system_blocks:
|
||||
if block:
|
||||
msgs.append({"role": "system", "content": block})
|
||||
elif self.system_prompt:
|
||||
# Legacy single-string mode — backward compatible.
|
||||
msgs.append({"role": "system", "content": self.system_prompt})
|
||||
msgs.append({"role": "user", "content": user_input})
|
||||
return msgs
|
||||
|
||||
@ -6,8 +6,12 @@ Coverage targets
|
||||
- ProviderConfig — capability flags derived from model name
|
||||
- _validate_response_format() — valid types, invalid type, missing fields (#498)
|
||||
- HermesA2AExecutor.__init__ — field assignment + client injection,
|
||||
response_format stored (#498), tools (#497)
|
||||
- HermesA2AExecutor._build_messages — system prompt + user turn assembly
|
||||
response_format stored (#498), tools (#497),
|
||||
system_blocks stored as independent copy (#499)
|
||||
- HermesA2AExecutor._build_messages — system prompt + user turn assembly,
|
||||
stacked system blocks in order (#499),
|
||||
empty/None blocks skipped (#499),
|
||||
system_blocks overrides system_prompt (#499)
|
||||
- HermesA2AExecutor._log_reasoning — OTEL span emission + swallowed errors
|
||||
- HermesA2AExecutor.execute — happy path, empty input, API error,
|
||||
Hermes 4 extra_body, Hermes 3 no extra_body,
|
||||
@ -15,7 +19,8 @@ Coverage targets
|
||||
response_format forwarded / omitted / invalid (#498),
|
||||
tools serialized in request body (#497),
|
||||
empty tools → no tools field (#497),
|
||||
tool_call response → JSON text (#497)
|
||||
tool_call response → JSON text (#497),
|
||||
stacked blocks in API call (#499)
|
||||
- HermesA2AExecutor.cancel — TaskStatusUpdateEvent emitted
|
||||
|
||||
The ``openai`` module is stubbed in sys.modules so no real API call is made.
|
||||
@ -1110,3 +1115,193 @@ async def test_execute_text_content_wins_over_tool_calls():
|
||||
|
||||
reply = eq.enqueue_event.call_args[0][0]
|
||||
assert reply == "The weather is fine."
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stacked system messages — issue #499
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_system_blocks_stored_correctly():
|
||||
"""system_blocks are stored as _system_blocks on the executor."""
|
||||
blocks = ["persona", "tools", "reasoning"]
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
system_blocks=blocks,
|
||||
_client=MagicMock(),
|
||||
)
|
||||
assert executor._system_blocks == ["persona", "tools", "reasoning"]
|
||||
|
||||
|
||||
def test_system_blocks_none_stored_as_none():
|
||||
"""Passing system_blocks=None → _system_blocks is None."""
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
system_blocks=None,
|
||||
_client=MagicMock(),
|
||||
)
|
||||
assert executor._system_blocks is None
|
||||
|
||||
|
||||
def test_system_blocks_is_independent_copy():
|
||||
"""Mutating the original list after construction does not affect _system_blocks."""
|
||||
blocks = ["persona", "tools"]
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
system_blocks=blocks,
|
||||
_client=MagicMock(),
|
||||
)
|
||||
blocks.append("mutated")
|
||||
assert executor._system_blocks == ["persona", "tools"]
|
||||
|
||||
|
||||
def test_build_messages_stacked_three_blocks():
|
||||
"""[persona, tools, reasoning] → three separate system messages before user, in order."""
|
||||
persona = "You are Hermes, a helpful assistant."
|
||||
tools = "Available tools: search, calculator."
|
||||
reasoning = "Think step by step before answering."
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
system_blocks=[persona, tools, reasoning],
|
||||
_client=MagicMock(),
|
||||
)
|
||||
msgs = executor._build_messages("Hello!")
|
||||
assert len(msgs) == 4
|
||||
assert msgs[0] == {"role": "system", "content": persona}
|
||||
assert msgs[1] == {"role": "system", "content": tools}
|
||||
assert msgs[2] == {"role": "system", "content": reasoning}
|
||||
assert msgs[3] == {"role": "user", "content": "Hello!"}
|
||||
|
||||
|
||||
def test_build_messages_stacked_empty_block_skipped():
|
||||
"""An empty string block in system_blocks is NOT added as a system message."""
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
system_blocks=["persona", "", "reasoning"],
|
||||
_client=MagicMock(),
|
||||
)
|
||||
msgs = executor._build_messages("Hi")
|
||||
system_msgs = [m for m in msgs if m["role"] == "system"]
|
||||
assert len(system_msgs) == 2
|
||||
contents = [m["content"] for m in system_msgs]
|
||||
assert "persona" in contents
|
||||
assert "reasoning" in contents
|
||||
assert "" not in contents
|
||||
|
||||
|
||||
def test_build_messages_stacked_none_block_skipped():
|
||||
"""A None block in system_blocks is silently skipped."""
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
system_blocks=["persona", None, "reasoning"],
|
||||
_client=MagicMock(),
|
||||
)
|
||||
msgs = executor._build_messages("Hi")
|
||||
system_msgs = [m for m in msgs if m["role"] == "system"]
|
||||
assert len(system_msgs) == 2
|
||||
contents = [m["content"] for m in system_msgs]
|
||||
assert "persona" in contents
|
||||
assert "reasoning" in contents
|
||||
|
||||
|
||||
def test_build_messages_stacked_all_empty_no_system_messages():
|
||||
"""All blocks empty or None → zero system messages in the output."""
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
system_blocks=["", None, ""],
|
||||
_client=MagicMock(),
|
||||
)
|
||||
msgs = executor._build_messages("Hi")
|
||||
system_msgs = [m for m in msgs if m["role"] == "system"]
|
||||
assert system_msgs == []
|
||||
assert len(msgs) == 1
|
||||
assert msgs[0]["role"] == "user"
|
||||
|
||||
|
||||
def test_build_messages_stacked_single_block():
|
||||
"""[persona_only] → exactly one system message before the user turn."""
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
system_blocks=["You are Hermes."],
|
||||
_client=MagicMock(),
|
||||
)
|
||||
msgs = executor._build_messages("Hello!")
|
||||
assert len(msgs) == 2
|
||||
assert msgs[0] == {"role": "system", "content": "You are Hermes."}
|
||||
assert msgs[1] == {"role": "user", "content": "Hello!"}
|
||||
|
||||
|
||||
def test_build_messages_stacked_overrides_system_prompt():
|
||||
"""When both system_blocks and system_prompt are set, system_blocks wins."""
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
system_prompt="This should be ignored.",
|
||||
system_blocks=["Persona block.", "Tools block."],
|
||||
_client=MagicMock(),
|
||||
)
|
||||
msgs = executor._build_messages("Hi")
|
||||
system_msgs = [m for m in msgs if m["role"] == "system"]
|
||||
assert len(system_msgs) == 2
|
||||
contents = [m["content"] for m in system_msgs]
|
||||
assert "Persona block." in contents
|
||||
assert "Tools block." in contents
|
||||
assert "This should be ignored." not in contents
|
||||
|
||||
|
||||
def test_build_messages_legacy_single_string_unchanged():
|
||||
"""system_prompt alone (no system_blocks) → single system message (backward compat)."""
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
system_prompt="Be helpful.",
|
||||
_client=MagicMock(),
|
||||
)
|
||||
msgs = executor._build_messages("Hello!")
|
||||
assert len(msgs) == 2
|
||||
assert msgs[0] == {"role": "system", "content": "Be helpful."}
|
||||
assert msgs[1] == {"role": "user", "content": "Hello!"}
|
||||
|
||||
|
||||
def test_build_messages_no_system_no_blocks_no_system_msg():
|
||||
"""Neither system_prompt nor system_blocks → no system message at all."""
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
system_prompt=None,
|
||||
system_blocks=None,
|
||||
_client=MagicMock(),
|
||||
)
|
||||
msgs = executor._build_messages("Hello!")
|
||||
assert len(msgs) == 1
|
||||
assert msgs[0] == {"role": "user", "content": "Hello!"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_stacked_blocks_in_api_call():
|
||||
"""Stacked system_blocks appear correctly as separate system messages in the API call."""
|
||||
persona = "You are Hermes."
|
||||
tools = "Tool: search."
|
||||
reasoning = "Think before answering."
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(
|
||||
return_value=_make_api_response("done")
|
||||
)
|
||||
executor = HermesA2AExecutor(
|
||||
model="nousresearch/hermes-4-0",
|
||||
system_blocks=[persona, tools, reasoning],
|
||||
_client=mock_client,
|
||||
)
|
||||
|
||||
await executor.execute(_make_context("test query"), AsyncMock())
|
||||
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
msgs = call_kwargs["messages"]
|
||||
|
||||
system_msgs = [m for m in msgs if m["role"] == "system"]
|
||||
assert len(system_msgs) == 3
|
||||
assert system_msgs[0]["content"] == persona
|
||||
assert system_msgs[1]["content"] == tools
|
||||
assert system_msgs[2]["content"] == reasoning
|
||||
|
||||
user_msgs = [m for m in msgs if m["role"] == "user"]
|
||||
assert len(user_msgs) == 1
|
||||
assert "test query" in user_msgs[0]["content"]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user