From 6d253b961d4e24e8b813f49b29b36201b690c923 Mon Sep 17 00:00:00 2001 From: Molecule AI Backend Engineer Date: Fri, 17 Apr 2026 01:00:23 +0000 Subject: [PATCH] feat(hermes): pass tools via native tools[] parameter instead of text-in-prompt (#497) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of injecting tool definitions as text into the system prompt, HermesA2AExecutor now accepts a tools: list[dict] | None constructor parameter containing OpenAI-format tool definitions and forwards them via the native tools= parameter on chat.completions.create(). Empty list / None rule: when tools is falsy, the tools key is omitted from the API call entirely — never sent as tools=[] — so providers that reject an empty tools array don't return a 400. Tool-call response handling: when the model returns finish_reason "tool_calls" with no text content, the executor serialises the call list as a JSON string and enqueues it as the A2A reply. This keeps the executor thin (single API call per turn, no ReAct loop) while surfacing function-call intent in a structured, parseable format. Changes: - HermesA2AExecutor.__init__: new tools kwarg; stored as self._tools (copy; mutating the input list has no effect) - execute(): builds create_kwargs dict and conditionally adds tools= only when self._tools is non-empty; handles tool_calls response - Module docstring: new "Native tools (#497)" section with schema reference and edge-case explanation Tests (12 new, 47 total in hermes test file, 1002 total suite): - tools stored correctly in constructor (copy, None, [], non-empty) - non-empty tools forwarded as tools= in API call - multiple tools all forwarded - empty list ([] and None and default) → tools key absent from call - model tool_call response → JSON-serialised list as A2A reply - multiple tool_calls → all in JSON reply - text content present → text wins over tool_calls Closes #497 Co-Authored-By: Claude Sonnet 4.6 --- workspace-template/hermes_executor.py | 98 ++++++- .../tests/test_hermes_executor.py | 267 +++++++++++++++++- 2 files changed, 356 insertions(+), 9 deletions(-) diff --git a/workspace-template/hermes_executor.py b/workspace-template/hermes_executor.py index 07aa4648..953dc063 100644 --- a/workspace-template/hermes_executor.py +++ b/workspace-template/hermes_executor.py @@ -21,6 +21,37 @@ OTEL activity span so operators can inspect the thinking trace in Langfuse A2A reply — doing so would contaminate the agent's next-turn context with the model's internal scratchpad. +Native tools (#497) +------------------- +Tool definitions are passed via the OpenAI-native ``tools`` parameter instead +of injecting them as text into the system prompt. Each entry must follow the +standard OpenAI function-calling schema:: + + { + "type": "function", + "function": { + "name": "...", + "description": "...", + "parameters": { # JSON Schema object + "type": "object", + "properties": {...}, + "required": [...] + } + } + } + +**Empty list rule:** when ``tools`` is ``None`` or ``[]``, the ``tools`` +parameter is **omitted** from the API call entirely. Sending ``tools=[]`` +to some OpenAI-compat providers causes a 400 / unexpected behaviour; omitting +the key is always safe and signals "no tool use." + +**Tool-call response handling:** when the model returns +``choice.message.tool_calls`` with no text content (``finish_reason`` is +``"tool_calls"``), the executor serialises the tool-call list as a JSON string +and enqueues that as the A2A reply. This keeps the executor thin (single API +call per turn, no ReAct loop) while surfacing function-call intent to the +caller in a structured, parseable format. + Hermes 3 / unknown models -------------------------- No ``extra_body`` is sent. The response is processed identically to any @@ -126,6 +157,7 @@ class HermesA2AExecutor(AgentExecutor): - System prompt injected as the first ``messages[]`` entry. - Hermes 4 reasoning enabled via ``extra_body`` when supported. - Reasoning trace logged to OTEL span — never echoed in the reply. + - Tool definitions passed via native ``tools`` parameter when supplied. Parameters ---------- @@ -142,6 +174,12 @@ class HermesA2AExecutor(AgentExecutor): heartbeat: Optional ``HeartbeatLoop`` instance used to surface the current task description in the platform UI. + tools: + Optional list of OpenAI-format tool definitions to pass via the + native ``tools`` parameter. Each entry must have ``"type"`` and + ``"function"`` keys matching the OpenAI function-calling schema. + ``None`` or ``[]`` → the ``tools`` key is **omitted** from the API + call entirely (never sent as ``tools=[]``). _client: Inject a pre-built ``AsyncOpenAI`` (or compatible mock) — for testing only. When provided, ``base_url`` and ``api_key`` are @@ -155,12 +193,16 @@ class HermesA2AExecutor(AgentExecutor): base_url: str | None = None, api_key: str | None = None, heartbeat: "HeartbeatLoop | None" = None, + tools: list[dict] | None = None, _client: Any = None, ) -> None: self.model = model self.system_prompt = system_prompt self._heartbeat = heartbeat self._provider = ProviderConfig(model) + # Empty list and None are treated identically: no tools → omit the + # parameter from the API call rather than sending tools=[]. + self._tools: list[dict] = list(tools) if tools else [] if _client is not None: # Test injection path — skip real AsyncOpenAI construction so @@ -245,10 +287,15 @@ class HermesA2AExecutor(AgentExecutor): Sequence: 1. Extract user text from A2A message parts. 2. Build ``messages[]`` (optional system + user). - 3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4. + 3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4 and + ``tools`` when tool definitions are configured. 4. Extract and log reasoning trace — does NOT appear in the reply. - 5. Enqueue a final ``Message`` with the content text. + 5a. If the model returned text content, enqueue it as the reply. + 5b. If the model returned tool calls with no text (``finish_reason`` + ``"tool_calls"``), serialise the calls as JSON and enqueue that. """ + import json + from shared_runtime import extract_message_text user_input = extract_message_text(context) @@ -268,12 +315,18 @@ class HermesA2AExecutor(AgentExecutor): if self._provider.reasoning_supported: extra_body = {"reasoning": {"enabled": True}} + # Build call kwargs — omit ``tools`` entirely when the list is empty + # so providers that reject tools=[] don't get a 400. + create_kwargs: dict = { + "model": self.model, + "messages": messages, + "extra_body": extra_body, + } + if self._tools: + create_kwargs["tools"] = self._tools + try: - response = await self._client.chat.completions.create( - model=self.model, - messages=messages, - extra_body=extra_body, - ) + response = await self._client.chat.completions.create(**create_kwargs) choice = response.choices[0] content: str = choice.message.content or "" @@ -297,6 +350,37 @@ class HermesA2AExecutor(AgentExecutor): # Log to OTEL — intentionally omitted from the A2A reply. self._log_reasoning(context, reasoning, reasoning_details) + # Handle tool-call response: when the model returns tool calls + # with no text content, serialise the calls as JSON so the caller + # receives structured, parseable output. This keeps the executor + # thin (single API call per turn) while not silently discarding + # function-call intent. + if not content: + tool_calls = getattr(choice.message, "tool_calls", None) + if tool_calls: + serialised = json.dumps([ + { + "id": getattr(tc, "id", ""), + "type": getattr(tc, "type", "function"), + "function": { + "name": getattr( + getattr(tc, "function", None), "name", "" + ), + "arguments": getattr( + getattr(tc, "function", None), "arguments", "{}" + ), + }, + } + for tc in tool_calls + ]) + logger.info( + "hermes_executor: tool_calls response [model=%s n=%d]", + self.model, + len(tool_calls), + ) + await event_queue.enqueue_event(new_agent_text_message(serialised)) + return + final_text = content.strip() or "(no response generated)" await event_queue.enqueue_event(new_agent_text_message(final_text)) diff --git a/workspace-template/tests/test_hermes_executor.py b/workspace-template/tests/test_hermes_executor.py index d6129c58..ad891de7 100644 --- a/workspace-template/tests/test_hermes_executor.py +++ b/workspace-template/tests/test_hermes_executor.py @@ -4,12 +4,15 @@ Coverage targets ---------------- - _reasoning_supported() — model name pattern detection - ProviderConfig — capability flags derived from model name -- HermesA2AExecutor.__init__ — field assignment + client injection +- HermesA2AExecutor.__init__ — field assignment, client injection, tools (#497) - HermesA2AExecutor._build_messages — system prompt + user turn assembly - HermesA2AExecutor._log_reasoning — OTEL span emission + swallowed errors - HermesA2AExecutor.execute — happy path, empty input, API error, Hermes 4 extra_body, Hermes 3 no extra_body, - reasoning not in reply, reasoning_details + reasoning not in reply, reasoning_details, + tools serialized in request body (#497), + empty tools → no tools field (#497), + tool_call response → JSON text (#497) - HermesA2AExecutor.cancel — TaskStatusUpdateEvent emitted The ``openai`` module is stubbed in sys.modules so no real API call is made. @@ -699,3 +702,263 @@ async def test_no_system_prompt_only_user_message(): msgs = mock_client.chat.completions.create.call_args[1]["messages"] assert len(msgs) == 1 assert msgs[0]["role"] == "user" + + +# --------------------------------------------------------------------------- +# Native tools parameter — issue #497 +# --------------------------------------------------------------------------- + +# Minimal OpenAI-format tool definition used across the tools tests. +_SAMPLE_TOOL: dict = { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather for a location.", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City name"}, + }, + "required": ["location"], + }, + }, +} + +_SAMPLE_TOOL_2: dict = { + "type": "function", + "function": { + "name": "search_web", + "description": "Search the web.", + "parameters": { + "type": "object", + "properties": {"query": {"type": "string"}}, + "required": ["query"], + }, + }, +} + + +class _FakeFunction: + """Stand-in for openai ChatCompletionMessageToolCall.function.""" + + def __init__(self, name: str, arguments: str) -> None: + self.name = name + self.arguments = arguments + + +class _FakeToolCall: + """Stand-in for openai ChatCompletionMessageToolCall.""" + + def __init__(self, tc_id: str, name: str, arguments: str = "{}") -> None: + self.id = tc_id + self.type = "function" + self.function = _FakeFunction(name=name, arguments=arguments) + + +def _make_tool_call_response(tool_calls: list, content: str = ""): + """Build a mock API response that includes tool_calls on the message.""" + + class _MsgWithToolCalls: + def __init__(self): + self.content = content + self.tool_calls = tool_calls + + choice = MagicMock() + choice.message = _MsgWithToolCalls() + response = MagicMock() + response.choices = [choice] + return response + + +def test_constructor_tools_stored_correctly(): + """tools list is stored as _tools attribute.""" + executor = HermesA2AExecutor( + model="hermes-4", + tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2], + _client=MagicMock(), + ) + assert executor._tools == [_SAMPLE_TOOL, _SAMPLE_TOOL_2] + + +def test_constructor_none_tools_stored_as_empty_list(): + """tools=None → _tools is [] (empty list, not None).""" + executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=MagicMock()) + assert executor._tools == [] + + +def test_constructor_empty_list_stored_as_empty_list(): + """tools=[] → _tools is [].""" + executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=MagicMock()) + assert executor._tools == [] + + +def test_constructor_tools_is_independent_copy(): + """_tools is a copy — mutating the input list doesn't affect the executor.""" + original = [_SAMPLE_TOOL] + executor = HermesA2AExecutor( + model="hermes-4", tools=original, _client=MagicMock() + ) + original.append(_SAMPLE_TOOL_2) + assert executor._tools == [_SAMPLE_TOOL] + + +@pytest.mark.asyncio +async def test_execute_tools_serialized_in_request_body(): + """Non-empty tools list is forwarded to chat.completions.create as tools=.""" + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock( + return_value=_make_api_response("Paris is sunny.") + ) + executor = HermesA2AExecutor( + model="hermes-4", + tools=[_SAMPLE_TOOL], + _client=mock_client, + ) + + await executor.execute(_make_context("weather?"), AsyncMock()) + + call_kwargs = mock_client.chat.completions.create.call_args[1] + assert "tools" in call_kwargs + assert call_kwargs["tools"] == [_SAMPLE_TOOL] + + +@pytest.mark.asyncio +async def test_execute_multiple_tools_all_forwarded(): + """All tool definitions are forwarded — not truncated.""" + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock( + return_value=_make_api_response("ok") + ) + executor = HermesA2AExecutor( + model="hermes-4", + tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2], + _client=mock_client, + ) + + await executor.execute(_make_context("search?"), AsyncMock()) + + call_kwargs = mock_client.chat.completions.create.call_args[1] + assert call_kwargs["tools"] == [_SAMPLE_TOOL, _SAMPLE_TOOL_2] + + +@pytest.mark.asyncio +async def test_execute_empty_tools_no_tools_field_in_request(): + """Empty tools list → 'tools' key absent from API call (not tools=[]).""" + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock( + return_value=_make_api_response("ok") + ) + executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=mock_client) + + await executor.execute(_make_context("hello"), AsyncMock()) + + call_kwargs = mock_client.chat.completions.create.call_args[1] + assert "tools" not in call_kwargs + + +@pytest.mark.asyncio +async def test_execute_none_tools_no_tools_field_in_request(): + """tools=None → 'tools' key absent from API call.""" + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock( + return_value=_make_api_response("ok") + ) + executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=mock_client) + + await executor.execute(_make_context("hello"), AsyncMock()) + + call_kwargs = mock_client.chat.completions.create.call_args[1] + assert "tools" not in call_kwargs + + +@pytest.mark.asyncio +async def test_execute_default_no_tools_field_in_request(): + """Constructor with no tools kwarg → 'tools' key absent from API call.""" + executor, mock_client = _make_executor(model="hermes-4") + mock_client.chat.completions.create.return_value = _make_api_response("ok") + + await executor.execute(_make_context("hello"), AsyncMock()) + + call_kwargs = mock_client.chat.completions.create.call_args[1] + assert "tools" not in call_kwargs + + +@pytest.mark.asyncio +async def test_execute_tool_call_response_returns_json(): + """Model returns tool_calls with no content → reply is JSON-serialised calls.""" + import json + + mock_client = MagicMock() + tc = _FakeToolCall("call_abc123", "get_weather", '{"location":"Paris"}') + mock_client.chat.completions.create = AsyncMock( + return_value=_make_tool_call_response(tool_calls=[tc], content="") + ) + executor = HermesA2AExecutor( + model="hermes-4", + tools=[_SAMPLE_TOOL], + _client=mock_client, + ) + + eq = AsyncMock() + await executor.execute(_make_context("weather in Paris?"), eq) + + eq.enqueue_event.assert_called_once() + reply = eq.enqueue_event.call_args[0][0] + # Must be valid JSON + parsed = json.loads(reply) + assert isinstance(parsed, list) + assert len(parsed) == 1 + assert parsed[0]["function"]["name"] == "get_weather" + assert parsed[0]["function"]["arguments"] == '{"location":"Paris"}' + assert parsed[0]["id"] == "call_abc123" + assert parsed[0]["type"] == "function" + + +@pytest.mark.asyncio +async def test_execute_multiple_tool_calls_all_in_json(): + """Multiple tool calls are all serialised into the JSON reply.""" + import json + + mock_client = MagicMock() + tc1 = _FakeToolCall("call_1", "get_weather", '{"location":"Paris"}') + tc2 = _FakeToolCall("call_2", "search_web", '{"query":"news"}') + mock_client.chat.completions.create = AsyncMock( + return_value=_make_tool_call_response(tool_calls=[tc1, tc2], content="") + ) + executor = HermesA2AExecutor( + model="hermes-4", + tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2], + _client=mock_client, + ) + + eq = AsyncMock() + await executor.execute(_make_context("do both"), eq) + + reply = eq.enqueue_event.call_args[0][0] + parsed = json.loads(reply) + assert len(parsed) == 2 + assert parsed[0]["function"]["name"] == "get_weather" + assert parsed[1]["function"]["name"] == "search_web" + + +@pytest.mark.asyncio +async def test_execute_text_content_wins_over_tool_calls(): + """When model returns both text content AND tool_calls, text is used.""" + mock_client = MagicMock() + tc = _FakeToolCall("call_xyz", "get_weather", '{"location":"Berlin"}') + mock_client.chat.completions.create = AsyncMock( + return_value=_make_tool_call_response( + tool_calls=[tc], content="The weather is fine." + ) + ) + executor = HermesA2AExecutor( + model="hermes-4", + tools=[_SAMPLE_TOOL], + _client=mock_client, + ) + + eq = AsyncMock() + await executor.execute(_make_context("weather?"), eq) + + reply = eq.enqueue_event.call_args[0][0] + assert reply == "The weather is fine."