From 6d253b961d4e24e8b813f49b29b36201b690c923 Mon Sep 17 00:00:00 2001
From: Molecule AI Backend Engineer <backend-engineer@agents.moleculesai.app>
Date: Fri, 17 Apr 2026 01:00:23 +0000
Subject: [PATCH] feat(hermes): pass tools via native tools[] parameter instead
 of text-in-prompt (#497)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of injecting tool definitions as text into the system prompt,
HermesA2AExecutor now accepts a tools: list[dict] | None constructor
parameter containing OpenAI-format tool definitions and forwards them
via the native tools= parameter on chat.completions.create().

Empty list / None rule: when tools is falsy, the tools key is omitted
from the API call entirely — never sent as tools=[] — so providers
that reject an empty tools array don't return a 400.

Tool-call response handling: when the model returns finish_reason
"tool_calls" with no text content, the executor serialises the call
list as a JSON string and enqueues it as the A2A reply. This keeps
the executor thin (single API call per turn, no ReAct loop) while
surfacing function-call intent in a structured, parseable format.

Changes:
- HermesA2AExecutor.__init__: new tools kwarg; stored as self._tools
  (copy; mutating the input list has no effect)
- execute(): builds create_kwargs dict and conditionally adds tools=
  only when self._tools is non-empty; handles tool_calls response
- Module docstring: new "Native tools (#497)" section with schema
  reference and edge-case explanation

Tests (12 new, 47 total in hermes test file, 1002 total suite):
  - tools stored correctly in constructor (copy, None, [], non-empty)
  - non-empty tools forwarded as tools= in API call
  - multiple tools all forwarded
  - empty list ([] and None and default) → tools key absent from call
  - model tool_call response → JSON-serialised list as A2A reply
  - multiple tool_calls → all in JSON reply
  - text content present → text wins over tool_calls

Closes #497

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 workspace-template/hermes_executor.py         |  98 ++++++-
 .../tests/test_hermes_executor.py             | 267 +++++++++++++++++-
 2 files changed, 356 insertions(+), 9 deletions(-)

diff --git a/workspace-template/hermes_executor.py b/workspace-template/hermes_executor.py
index 07aa4648..953dc063 100644
--- a/workspace-template/hermes_executor.py
+++ b/workspace-template/hermes_executor.py
@@ -21,6 +21,37 @@ OTEL activity span so operators can inspect the thinking trace in Langfuse
 A2A reply — doing so would contaminate the agent's next-turn context with
 the model's internal scratchpad.
 
+Native tools (#497)
+-------------------
+Tool definitions are passed via the OpenAI-native ``tools`` parameter instead
+of injecting them as text into the system prompt.  Each entry must follow the
+standard OpenAI function-calling schema::
+
+    {
+        "type": "function",
+        "function": {
+            "name": "...",
+            "description": "...",
+            "parameters": {        # JSON Schema object
+                "type": "object",
+                "properties": {...},
+                "required": [...]
+            }
+        }
+    }
+
+**Empty list rule:** when ``tools`` is ``None`` or ``[]``, the ``tools``
+parameter is **omitted** from the API call entirely.  Sending ``tools=[]``
+to some OpenAI-compat providers causes a 400 / unexpected behaviour; omitting
+the key is always safe and signals "no tool use."
+
+**Tool-call response handling:** when the model returns
+``choice.message.tool_calls`` with no text content (``finish_reason`` is
+``"tool_calls"``), the executor serialises the tool-call list as a JSON string
+and enqueues that as the A2A reply.  This keeps the executor thin (single API
+call per turn, no ReAct loop) while surfacing function-call intent to the
+caller in a structured, parseable format.
+
 Hermes 3 / unknown models
 --------------------------
 No ``extra_body`` is sent.  The response is processed identically to any
@@ -126,6 +157,7 @@ class HermesA2AExecutor(AgentExecutor):
     - System prompt injected as the first ``messages[]`` entry.
     - Hermes 4 reasoning enabled via ``extra_body`` when supported.
     - Reasoning trace logged to OTEL span — never echoed in the reply.
+    - Tool definitions passed via native ``tools`` parameter when supplied.
 
     Parameters
     ----------
@@ -142,6 +174,12 @@ class HermesA2AExecutor(AgentExecutor):
     heartbeat:
         Optional ``HeartbeatLoop`` instance used to surface the current
         task description in the platform UI.
+    tools:
+        Optional list of OpenAI-format tool definitions to pass via the
+        native ``tools`` parameter.  Each entry must have ``"type"`` and
+        ``"function"`` keys matching the OpenAI function-calling schema.
+        ``None`` or ``[]`` → the ``tools`` key is **omitted** from the API
+        call entirely (never sent as ``tools=[]``).
     _client:
         Inject a pre-built ``AsyncOpenAI`` (or compatible mock) — for
         testing only.  When provided, ``base_url`` and ``api_key`` are
@@ -155,12 +193,16 @@ class HermesA2AExecutor(AgentExecutor):
         base_url: str | None = None,
         api_key: str | None = None,
         heartbeat: "HeartbeatLoop | None" = None,
+        tools: list[dict] | None = None,
         _client: Any = None,
     ) -> None:
         self.model = model
         self.system_prompt = system_prompt
         self._heartbeat = heartbeat
         self._provider = ProviderConfig(model)
+        # Empty list and None are treated identically: no tools → omit the
+        # parameter from the API call rather than sending tools=[].
+        self._tools: list[dict] = list(tools) if tools else []
 
         if _client is not None:
             # Test injection path — skip real AsyncOpenAI construction so
@@ -245,10 +287,15 @@ class HermesA2AExecutor(AgentExecutor):
         Sequence:
         1. Extract user text from A2A message parts.
         2. Build ``messages[]`` (optional system + user).
-        3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4.
+        3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4 and
+           ``tools`` when tool definitions are configured.
         4. Extract and log reasoning trace — does NOT appear in the reply.
-        5. Enqueue a final ``Message`` with the content text.
+        5a. If the model returned text content, enqueue it as the reply.
+        5b. If the model returned tool calls with no text (``finish_reason``
+            ``"tool_calls"``), serialise the calls as JSON and enqueue that.
         """
+        import json
+
         from shared_runtime import extract_message_text
 
         user_input = extract_message_text(context)
@@ -268,12 +315,18 @@ class HermesA2AExecutor(AgentExecutor):
         if self._provider.reasoning_supported:
             extra_body = {"reasoning": {"enabled": True}}
 
+        # Build call kwargs — omit ``tools`` entirely when the list is empty
+        # so providers that reject tools=[] don't get a 400.
+        create_kwargs: dict = {
+            "model": self.model,
+            "messages": messages,
+            "extra_body": extra_body,
+        }
+        if self._tools:
+            create_kwargs["tools"] = self._tools
+
         try:
-            response = await self._client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                extra_body=extra_body,
-            )
+            response = await self._client.chat.completions.create(**create_kwargs)
 
             choice = response.choices[0]
             content: str = choice.message.content or ""
@@ -297,6 +350,37 @@ class HermesA2AExecutor(AgentExecutor):
                 # Log to OTEL — intentionally omitted from the A2A reply.
                 self._log_reasoning(context, reasoning, reasoning_details)
 
+            # Handle tool-call response: when the model returns tool calls
+            # with no text content, serialise the calls as JSON so the caller
+            # receives structured, parseable output.  This keeps the executor
+            # thin (single API call per turn) while not silently discarding
+            # function-call intent.
+            if not content:
+                tool_calls = getattr(choice.message, "tool_calls", None)
+                if tool_calls:
+                    serialised = json.dumps([
+                        {
+                            "id": getattr(tc, "id", ""),
+                            "type": getattr(tc, "type", "function"),
+                            "function": {
+                                "name": getattr(
+                                    getattr(tc, "function", None), "name", ""
+                                ),
+                                "arguments": getattr(
+                                    getattr(tc, "function", None), "arguments", "{}"
+                                ),
+                            },
+                        }
+                        for tc in tool_calls
+                    ])
+                    logger.info(
+                        "hermes_executor: tool_calls response [model=%s n=%d]",
+                        self.model,
+                        len(tool_calls),
+                    )
+                    await event_queue.enqueue_event(new_agent_text_message(serialised))
+                    return
+
             final_text = content.strip() or "(no response generated)"
             await event_queue.enqueue_event(new_agent_text_message(final_text))
 
diff --git a/workspace-template/tests/test_hermes_executor.py b/workspace-template/tests/test_hermes_executor.py
index d6129c58..ad891de7 100644
--- a/workspace-template/tests/test_hermes_executor.py
+++ b/workspace-template/tests/test_hermes_executor.py
@@ -4,12 +4,15 @@ Coverage targets
 ----------------
 - _reasoning_supported()        — model name pattern detection
 - ProviderConfig                — capability flags derived from model name
-- HermesA2AExecutor.__init__   — field assignment + client injection
+- HermesA2AExecutor.__init__   — field assignment, client injection, tools (#497)
 - HermesA2AExecutor._build_messages — system prompt + user turn assembly
 - HermesA2AExecutor._log_reasoning  — OTEL span emission + swallowed errors
 - HermesA2AExecutor.execute    — happy path, empty input, API error,
                                   Hermes 4 extra_body, Hermes 3 no extra_body,
-                                  reasoning not in reply, reasoning_details
+                                  reasoning not in reply, reasoning_details,
+                                  tools serialized in request body (#497),
+                                  empty tools → no tools field (#497),
+                                  tool_call response → JSON text (#497)
 - HermesA2AExecutor.cancel     — TaskStatusUpdateEvent emitted
 
 The ``openai`` module is stubbed in sys.modules so no real API call is made.
@@ -699,3 +702,263 @@ async def test_no_system_prompt_only_user_message():
     msgs = mock_client.chat.completions.create.call_args[1]["messages"]
     assert len(msgs) == 1
     assert msgs[0]["role"] == "user"
+
+
+# ---------------------------------------------------------------------------
+# Native tools parameter — issue #497
+# ---------------------------------------------------------------------------
+
+# Minimal OpenAI-format tool definition used across the tools tests.
+_SAMPLE_TOOL: dict = {
+    "type": "function",
+    "function": {
+        "name": "get_weather",
+        "description": "Get current weather for a location.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "location": {"type": "string", "description": "City name"},
+            },
+            "required": ["location"],
+        },
+    },
+}
+
+_SAMPLE_TOOL_2: dict = {
+    "type": "function",
+    "function": {
+        "name": "search_web",
+        "description": "Search the web.",
+        "parameters": {
+            "type": "object",
+            "properties": {"query": {"type": "string"}},
+            "required": ["query"],
+        },
+    },
+}
+
+
+class _FakeFunction:
+    """Stand-in for openai ChatCompletionMessageToolCall.function."""
+
+    def __init__(self, name: str, arguments: str) -> None:
+        self.name = name
+        self.arguments = arguments
+
+
+class _FakeToolCall:
+    """Stand-in for openai ChatCompletionMessageToolCall."""
+
+    def __init__(self, tc_id: str, name: str, arguments: str = "{}") -> None:
+        self.id = tc_id
+        self.type = "function"
+        self.function = _FakeFunction(name=name, arguments=arguments)
+
+
+def _make_tool_call_response(tool_calls: list, content: str = ""):
+    """Build a mock API response that includes tool_calls on the message."""
+
+    class _MsgWithToolCalls:
+        def __init__(self):
+            self.content = content
+            self.tool_calls = tool_calls
+
+    choice = MagicMock()
+    choice.message = _MsgWithToolCalls()
+    response = MagicMock()
+    response.choices = [choice]
+    return response
+
+
+def test_constructor_tools_stored_correctly():
+    """tools list is stored as _tools attribute."""
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
+        _client=MagicMock(),
+    )
+    assert executor._tools == [_SAMPLE_TOOL, _SAMPLE_TOOL_2]
+
+
+def test_constructor_none_tools_stored_as_empty_list():
+    """tools=None → _tools is [] (empty list, not None)."""
+    executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=MagicMock())
+    assert executor._tools == []
+
+
+def test_constructor_empty_list_stored_as_empty_list():
+    """tools=[] → _tools is []."""
+    executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=MagicMock())
+    assert executor._tools == []
+
+
+def test_constructor_tools_is_independent_copy():
+    """_tools is a copy — mutating the input list doesn't affect the executor."""
+    original = [_SAMPLE_TOOL]
+    executor = HermesA2AExecutor(
+        model="hermes-4", tools=original, _client=MagicMock()
+    )
+    original.append(_SAMPLE_TOOL_2)
+    assert executor._tools == [_SAMPLE_TOOL]
+
+
+@pytest.mark.asyncio
+async def test_execute_tools_serialized_in_request_body():
+    """Non-empty tools list is forwarded to chat.completions.create as tools=."""
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response("Paris is sunny.")
+    )
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL],
+        _client=mock_client,
+    )
+
+    await executor.execute(_make_context("weather?"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert "tools" in call_kwargs
+    assert call_kwargs["tools"] == [_SAMPLE_TOOL]
+
+
+@pytest.mark.asyncio
+async def test_execute_multiple_tools_all_forwarded():
+    """All tool definitions are forwarded — not truncated."""
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response("ok")
+    )
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
+        _client=mock_client,
+    )
+
+    await executor.execute(_make_context("search?"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert call_kwargs["tools"] == [_SAMPLE_TOOL, _SAMPLE_TOOL_2]
+
+
+@pytest.mark.asyncio
+async def test_execute_empty_tools_no_tools_field_in_request():
+    """Empty tools list → 'tools' key absent from API call (not tools=[])."""
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response("ok")
+    )
+    executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=mock_client)
+
+    await executor.execute(_make_context("hello"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert "tools" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_execute_none_tools_no_tools_field_in_request():
+    """tools=None → 'tools' key absent from API call."""
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response("ok")
+    )
+    executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=mock_client)
+
+    await executor.execute(_make_context("hello"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert "tools" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_execute_default_no_tools_field_in_request():
+    """Constructor with no tools kwarg → 'tools' key absent from API call."""
+    executor, mock_client = _make_executor(model="hermes-4")
+    mock_client.chat.completions.create.return_value = _make_api_response("ok")
+
+    await executor.execute(_make_context("hello"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert "tools" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_execute_tool_call_response_returns_json():
+    """Model returns tool_calls with no content → reply is JSON-serialised calls."""
+    import json
+
+    mock_client = MagicMock()
+    tc = _FakeToolCall("call_abc123", "get_weather", '{"location":"Paris"}')
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_tool_call_response(tool_calls=[tc], content="")
+    )
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL],
+        _client=mock_client,
+    )
+
+    eq = AsyncMock()
+    await executor.execute(_make_context("weather in Paris?"), eq)
+
+    eq.enqueue_event.assert_called_once()
+    reply = eq.enqueue_event.call_args[0][0]
+    # Must be valid JSON
+    parsed = json.loads(reply)
+    assert isinstance(parsed, list)
+    assert len(parsed) == 1
+    assert parsed[0]["function"]["name"] == "get_weather"
+    assert parsed[0]["function"]["arguments"] == '{"location":"Paris"}'
+    assert parsed[0]["id"] == "call_abc123"
+    assert parsed[0]["type"] == "function"
+
+
+@pytest.mark.asyncio
+async def test_execute_multiple_tool_calls_all_in_json():
+    """Multiple tool calls are all serialised into the JSON reply."""
+    import json
+
+    mock_client = MagicMock()
+    tc1 = _FakeToolCall("call_1", "get_weather", '{"location":"Paris"}')
+    tc2 = _FakeToolCall("call_2", "search_web", '{"query":"news"}')
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_tool_call_response(tool_calls=[tc1, tc2], content="")
+    )
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
+        _client=mock_client,
+    )
+
+    eq = AsyncMock()
+    await executor.execute(_make_context("do both"), eq)
+
+    reply = eq.enqueue_event.call_args[0][0]
+    parsed = json.loads(reply)
+    assert len(parsed) == 2
+    assert parsed[0]["function"]["name"] == "get_weather"
+    assert parsed[1]["function"]["name"] == "search_web"
+
+
+@pytest.mark.asyncio
+async def test_execute_text_content_wins_over_tool_calls():
+    """When model returns both text content AND tool_calls, text is used."""
+    mock_client = MagicMock()
+    tc = _FakeToolCall("call_xyz", "get_weather", '{"location":"Berlin"}')
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_tool_call_response(
+            tool_calls=[tc], content="The weather is fine."
+        )
+    )
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL],
+        _client=mock_client,
+    )
+
+    eq = AsyncMock()
+    await executor.execute(_make_context("weather?"), eq)
+
+    reply = eq.enqueue_event.call_args[0][0]
+    assert reply == "The weather is fine."