Merge pull request #798 from Molecule-AI/feat/issue-499-clean-3

feat(hermes): stacked system messages — persona + tools + reasoning policy (#499)
2026-04-18 00:02:29 +00:00 · 2026-04-18 00:02:29 +00:00 · 159c90e0f5
commit 159c90e0f5
parent fe250b256b 9d171bda7f
2 changed files with 247 additions and 4 deletions
--- a/workspace-template/hermes_executor.py
+++ b/workspace-template/hermes_executor.py
@ -73,6 +73,36 @@ enqueues an error message and returns early without calling the API.
 When ``response_format`` is ``None`` (the default) the kwarg is omitted
 entirely from the API call so older / strict providers do not receive an
 unexpected field.
+
+Stacked system messages (#499)
+-------------------------------
+Hermes recommends separating system context into distinct ``role=system``
+messages rather than concatenating everything into a single string.  Pass
+``system_blocks`` to ``HermesA2AExecutor`` to use this mode::
+
+    executor = HermesA2AExecutor(
+        model="nousresearch/hermes-4-0",
+        system_blocks=[
+            persona_prompt,       # who the agent is
+            tools_context,        # available tools / MCP context
+            reasoning_policy,     # chain-of-thought / output-format rules
+        ],
+    )
+
+Each non-empty, non-None block is emitted as a separate
+``{"role": "system", "content": block}`` entry, in the order supplied,
+before the user turn.  The canonical Hermes ordering is:
+
+    1. Persona / identity
+    2. Tools context (function schemas, MCP capabilities)
+    3. Reasoning policy (think-step, output format constraints)
+
+Empty strings and ``None`` entries are silently skipped so callers can
+pass ``None`` for optional blocks without special-casing.
+
+When ``system_blocks`` is provided it takes precedence over
+``system_prompt``.  Existing code that passes a single ``system_prompt``
+string continues to work identically (backward compatible).
 """

 from __future__ import annotations
@ -229,6 +259,12 @@ class HermesA2AExecutor(AgentExecutor):
        Used to select the upstream model AND detect reasoning support.
    system_prompt:
        Optional system prompt prepended to every conversation.
+    system_blocks:
+        Ordered list of system message blocks in Hermes-recommended order:
+        persona, tools context, reasoning policy.  Each non-empty block
+        becomes a separate ``{"role": "system"}`` message.  None/empty-string
+        blocks are skipped.  When provided, takes precedence over
+        ``system_prompt``.
    base_url:
        OpenAI-compat endpoint base URL.  Defaults to
        ``OPENAI_BASE_URL`` env var, then ``https://openrouter.ai/api/v1``.
@ -262,6 +298,7 @@ class HermesA2AExecutor(AgentExecutor):
        self,
        model: str,
        system_prompt: str | None = None,
+        system_blocks: "list[str | None] | None" = None,
        base_url: str | None = None,
        api_key: str | None = None,
        heartbeat: "HeartbeatLoop | None" = None,
@ -271,6 +308,9 @@ class HermesA2AExecutor(AgentExecutor):
    ) -> None:
        self.model = model
        self.system_prompt = system_prompt
+        self._system_blocks: list[str | None] | None = (
+            list(system_blocks) if system_blocks is not None else None
+        )
        self._heartbeat = heartbeat
        self._response_format = response_format
        self._provider = ProviderConfig(model)
@ -306,7 +346,15 @@ class HermesA2AExecutor(AgentExecutor):
    def _build_messages(self, user_input: str) -> list[dict]:
        """Assemble the ``messages`` list: optional system prompt then user turn."""
        msgs: list[dict] = []
-        if self.system_prompt:
+        if self._system_blocks is not None:
+            # Stacked mode: Hermes-recommended ordering:
+            # persona → tools context → reasoning policy.
+            # Empty/None blocks are skipped.
+            for block in self._system_blocks:
+                if block:
+                    msgs.append({"role": "system", "content": block})
+        elif self.system_prompt:
+            # Legacy single-string mode — backward compatible.
            msgs.append({"role": "system", "content": self.system_prompt})
        msgs.append({"role": "user", "content": user_input})
        return msgs
--- a/workspace-template/tests/test_hermes_executor.py
+++ b/workspace-template/tests/test_hermes_executor.py
@ -6,8 +6,12 @@ Coverage targets
 - ProviderConfig                — capability flags derived from model name
 - _validate_response_format()  — valid types, invalid type, missing fields (#498)
 - HermesA2AExecutor.__init__   — field assignment + client injection,
-                                  response_format stored (#498), tools (#497)
- HermesA2AExecutor._build_messages — system prompt + user turn assembly
+                                  response_format stored (#498), tools (#497),
+                                  system_blocks stored as independent copy (#499)
+- HermesA2AExecutor._build_messages — system prompt + user turn assembly,
+                                       stacked system blocks in order (#499),
+                                       empty/None blocks skipped (#499),
+                                       system_blocks overrides system_prompt (#499)
 - HermesA2AExecutor._log_reasoning  — OTEL span emission + swallowed errors
 - HermesA2AExecutor.execute    — happy path, empty input, API error,
                                  Hermes 4 extra_body, Hermes 3 no extra_body,
@ -15,7 +19,8 @@ Coverage targets
                                  response_format forwarded / omitted / invalid (#498),
                                  tools serialized in request body (#497),
                                  empty tools → no tools field (#497),
-                                  tool_call response → JSON text (#497)
+                                  tool_call response → JSON text (#497),
+                                  stacked blocks in API call (#499)
 - HermesA2AExecutor.cancel     — TaskStatusUpdateEvent emitted

 The ``openai`` module is stubbed in sys.modules so no real API call is made.
@ -1110,3 +1115,193 @@ async def test_execute_text_content_wins_over_tool_calls():

    reply = eq.enqueue_event.call_args[0][0]
    assert reply == "The weather is fine."
+
+
+# ---------------------------------------------------------------------------
+# Stacked system messages — issue #499
+# ---------------------------------------------------------------------------
+
+
+def test_system_blocks_stored_correctly():
+    """system_blocks are stored as _system_blocks on the executor."""
+    blocks = ["persona", "tools", "reasoning"]
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        system_blocks=blocks,
+        _client=MagicMock(),
+    )
+    assert executor._system_blocks == ["persona", "tools", "reasoning"]
+
+
+def test_system_blocks_none_stored_as_none():
+    """Passing system_blocks=None → _system_blocks is None."""
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        system_blocks=None,
+        _client=MagicMock(),
+    )
+    assert executor._system_blocks is None
+
+
+def test_system_blocks_is_independent_copy():
+    """Mutating the original list after construction does not affect _system_blocks."""
+    blocks = ["persona", "tools"]
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        system_blocks=blocks,
+        _client=MagicMock(),
+    )
+    blocks.append("mutated")
+    assert executor._system_blocks == ["persona", "tools"]
+
+
+def test_build_messages_stacked_three_blocks():
+    """[persona, tools, reasoning] → three separate system messages before user, in order."""
+    persona = "You are Hermes, a helpful assistant."
+    tools = "Available tools: search, calculator."
+    reasoning = "Think step by step before answering."
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        system_blocks=[persona, tools, reasoning],
+        _client=MagicMock(),
+    )
+    msgs = executor._build_messages("Hello!")
+    assert len(msgs) == 4
+    assert msgs[0] == {"role": "system", "content": persona}
+    assert msgs[1] == {"role": "system", "content": tools}
+    assert msgs[2] == {"role": "system", "content": reasoning}
+    assert msgs[3] == {"role": "user", "content": "Hello!"}
+
+
+def test_build_messages_stacked_empty_block_skipped():
+    """An empty string block in system_blocks is NOT added as a system message."""
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        system_blocks=["persona", "", "reasoning"],
+        _client=MagicMock(),
+    )
+    msgs = executor._build_messages("Hi")
+    system_msgs = [m for m in msgs if m["role"] == "system"]
+    assert len(system_msgs) == 2
+    contents = [m["content"] for m in system_msgs]
+    assert "persona" in contents
+    assert "reasoning" in contents
+    assert "" not in contents
+
+
+def test_build_messages_stacked_none_block_skipped():
+    """A None block in system_blocks is silently skipped."""
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        system_blocks=["persona", None, "reasoning"],
+        _client=MagicMock(),
+    )
+    msgs = executor._build_messages("Hi")
+    system_msgs = [m for m in msgs if m["role"] == "system"]
+    assert len(system_msgs) == 2
+    contents = [m["content"] for m in system_msgs]
+    assert "persona" in contents
+    assert "reasoning" in contents
+
+
+def test_build_messages_stacked_all_empty_no_system_messages():
+    """All blocks empty or None → zero system messages in the output."""
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        system_blocks=["", None, ""],
+        _client=MagicMock(),
+    )
+    msgs = executor._build_messages("Hi")
+    system_msgs = [m for m in msgs if m["role"] == "system"]
+    assert system_msgs == []
+    assert len(msgs) == 1
+    assert msgs[0]["role"] == "user"
+
+
+def test_build_messages_stacked_single_block():
+    """[persona_only] → exactly one system message before the user turn."""
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        system_blocks=["You are Hermes."],
+        _client=MagicMock(),
+    )
+    msgs = executor._build_messages("Hello!")
+    assert len(msgs) == 2
+    assert msgs[0] == {"role": "system", "content": "You are Hermes."}
+    assert msgs[1] == {"role": "user", "content": "Hello!"}
+
+
+def test_build_messages_stacked_overrides_system_prompt():
+    """When both system_blocks and system_prompt are set, system_blocks wins."""
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        system_prompt="This should be ignored.",
+        system_blocks=["Persona block.", "Tools block."],
+        _client=MagicMock(),
+    )
+    msgs = executor._build_messages("Hi")
+    system_msgs = [m for m in msgs if m["role"] == "system"]
+    assert len(system_msgs) == 2
+    contents = [m["content"] for m in system_msgs]
+    assert "Persona block." in contents
+    assert "Tools block." in contents
+    assert "This should be ignored." not in contents
+
+
+def test_build_messages_legacy_single_string_unchanged():
+    """system_prompt alone (no system_blocks) → single system message (backward compat)."""
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        system_prompt="Be helpful.",
+        _client=MagicMock(),
+    )
+    msgs = executor._build_messages("Hello!")
+    assert len(msgs) == 2
+    assert msgs[0] == {"role": "system", "content": "Be helpful."}
+    assert msgs[1] == {"role": "user", "content": "Hello!"}
+
+
+def test_build_messages_no_system_no_blocks_no_system_msg():
+    """Neither system_prompt nor system_blocks → no system message at all."""
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        system_prompt=None,
+        system_blocks=None,
+        _client=MagicMock(),
+    )
+    msgs = executor._build_messages("Hello!")
+    assert len(msgs) == 1
+    assert msgs[0] == {"role": "user", "content": "Hello!"}
+
+
+@pytest.mark.asyncio
+async def test_execute_stacked_blocks_in_api_call():
+    """Stacked system_blocks appear correctly as separate system messages in the API call."""
+    persona = "You are Hermes."
+    tools = "Tool: search."
+    reasoning = "Think before answering."
+
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response("done")
+    )
+    executor = HermesA2AExecutor(
+        model="nousresearch/hermes-4-0",
+        system_blocks=[persona, tools, reasoning],
+        _client=mock_client,
+    )
+
+    await executor.execute(_make_context("test query"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    msgs = call_kwargs["messages"]
+
+    system_msgs = [m for m in msgs if m["role"] == "system"]
+    assert len(system_msgs) == 3
+    assert system_msgs[0]["content"] == persona
+    assert system_msgs[1]["content"] == tools
+    assert system_msgs[2]["content"] == reasoning
+
+    user_msgs = [m for m in msgs if m["role"] == "user"]
+    assert len(user_msgs) == 1
+    assert "test query" in user_msgs[0]["content"]