feat(hermes): native tools=[] parameter instead of text-in-prompt workaround (#497)

2026-04-17 06:56:10 +00:00 · 2026-04-17 06:56:10 +00:00 · c9b8c26d5f
commit c9b8c26d5f
parent f127d4c0a6 6d253b961d
2 changed files with 356 additions and 9 deletions
--- a/workspace-template/hermes_executor.py
+++ b/workspace-template/hermes_executor.py
@ -21,6 +21,37 @@ OTEL activity span so operators can inspect the thinking trace in Langfuse
 A2A reply — doing so would contaminate the agent's next-turn context with
 the model's internal scratchpad.

+Native tools (#497)
+-------------------
+Tool definitions are passed via the OpenAI-native ``tools`` parameter instead
+of injecting them as text into the system prompt.  Each entry must follow the
+standard OpenAI function-calling schema::
+
+    {
+        "type": "function",
+        "function": {
+            "name": "...",
+            "description": "...",
+            "parameters": {        # JSON Schema object
+                "type": "object",
+                "properties": {...},
+                "required": [...]
+            }
+        }
+    }
+
+**Empty list rule:** when ``tools`` is ``None`` or ``[]``, the ``tools``
+parameter is **omitted** from the API call entirely.  Sending ``tools=[]``
+to some OpenAI-compat providers causes a 400 / unexpected behaviour; omitting
+the key is always safe and signals "no tool use."
+
+**Tool-call response handling:** when the model returns
+``choice.message.tool_calls`` with no text content (``finish_reason`` is
+``"tool_calls"``), the executor serialises the tool-call list as a JSON string
+and enqueues that as the A2A reply.  This keeps the executor thin (single API
+call per turn, no ReAct loop) while surfacing function-call intent to the
+caller in a structured, parseable format.
+
 Hermes 3 / unknown models
 --------------------------
 No ``extra_body`` is sent.  The response is processed identically to any
@ -126,6 +157,7 @@ class HermesA2AExecutor(AgentExecutor):
    - System prompt injected as the first ``messages[]`` entry.
    - Hermes 4 reasoning enabled via ``extra_body`` when supported.
    - Reasoning trace logged to OTEL span — never echoed in the reply.
+    - Tool definitions passed via native ``tools`` parameter when supplied.

    Parameters
    ----------
@ -142,6 +174,12 @@ class HermesA2AExecutor(AgentExecutor):
    heartbeat:
        Optional ``HeartbeatLoop`` instance used to surface the current
        task description in the platform UI.
+    tools:
+        Optional list of OpenAI-format tool definitions to pass via the
+        native ``tools`` parameter.  Each entry must have ``"type"`` and
+        ``"function"`` keys matching the OpenAI function-calling schema.
+        ``None`` or ``[]`` → the ``tools`` key is **omitted** from the API
+        call entirely (never sent as ``tools=[]``).
    _client:
        Inject a pre-built ``AsyncOpenAI`` (or compatible mock) — for
        testing only.  When provided, ``base_url`` and ``api_key`` are
@ -155,12 +193,16 @@ class HermesA2AExecutor(AgentExecutor):
        base_url: str | None = None,
        api_key: str | None = None,
        heartbeat: "HeartbeatLoop | None" = None,
+        tools: list[dict] | None = None,
        _client: Any = None,
    ) -> None:
        self.model = model
        self.system_prompt = system_prompt
        self._heartbeat = heartbeat
        self._provider = ProviderConfig(model)
+        # Empty list and None are treated identically: no tools → omit the
+        # parameter from the API call rather than sending tools=[].
+        self._tools: list[dict] = list(tools) if tools else []

        if _client is not None:
            # Test injection path — skip real AsyncOpenAI construction so
@ -245,10 +287,15 @@ class HermesA2AExecutor(AgentExecutor):
        Sequence:
        1. Extract user text from A2A message parts.
        2. Build ``messages[]`` (optional system + user).
-        3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4.
+        3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4 and
+           ``tools`` when tool definitions are configured.
        4. Extract and log reasoning trace — does NOT appear in the reply.
-        5. Enqueue a final ``Message`` with the content text.
+        5a. If the model returned text content, enqueue it as the reply.
+        5b. If the model returned tool calls with no text (``finish_reason``
+            ``"tool_calls"``), serialise the calls as JSON and enqueue that.
        """
+        import json
+
        from shared_runtime import extract_message_text

        user_input = extract_message_text(context)
@ -268,12 +315,18 @@ class HermesA2AExecutor(AgentExecutor):
        if self._provider.reasoning_supported:
            extra_body = {"reasoning": {"enabled": True}}

+        # Build call kwargs — omit ``tools`` entirely when the list is empty
+        # so providers that reject tools=[] don't get a 400.
+        create_kwargs: dict = {
+            "model": self.model,
+            "messages": messages,
+            "extra_body": extra_body,
+        }
+        if self._tools:
+            create_kwargs["tools"] = self._tools
+
        try:
-            response = await self._client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                extra_body=extra_body,
-            )
+            response = await self._client.chat.completions.create(**create_kwargs)

            choice = response.choices[0]
            content: str = choice.message.content or ""
@ -297,6 +350,37 @@ class HermesA2AExecutor(AgentExecutor):
                # Log to OTEL — intentionally omitted from the A2A reply.
                self._log_reasoning(context, reasoning, reasoning_details)

+            # Handle tool-call response: when the model returns tool calls
+            # with no text content, serialise the calls as JSON so the caller
+            # receives structured, parseable output.  This keeps the executor
+            # thin (single API call per turn) while not silently discarding
+            # function-call intent.
+            if not content:
+                tool_calls = getattr(choice.message, "tool_calls", None)
+                if tool_calls:
+                    serialised = json.dumps([
+                        {
+                            "id": getattr(tc, "id", ""),
+                            "type": getattr(tc, "type", "function"),
+                            "function": {
+                                "name": getattr(
+                                    getattr(tc, "function", None), "name", ""
+                                ),
+                                "arguments": getattr(
+                                    getattr(tc, "function", None), "arguments", "{}"
+                                ),
+                            },
+                        }
+                        for tc in tool_calls
+                    ])
+                    logger.info(
+                        "hermes_executor: tool_calls response [model=%s n=%d]",
+                        self.model,
+                        len(tool_calls),
+                    )
+                    await event_queue.enqueue_event(new_agent_text_message(serialised))
+                    return
+
            final_text = content.strip() or "(no response generated)"
            await event_queue.enqueue_event(new_agent_text_message(final_text))

--- a/workspace-template/tests/test_hermes_executor.py
+++ b/workspace-template/tests/test_hermes_executor.py
@ -4,12 +4,15 @@ Coverage targets
 ----------------
 - _reasoning_supported()        — model name pattern detection
 - ProviderConfig                — capability flags derived from model name
- HermesA2AExecutor.__init__   — field assignment + client injection
+- HermesA2AExecutor.__init__   — field assignment, client injection, tools (#497)
 - HermesA2AExecutor._build_messages — system prompt + user turn assembly
 - HermesA2AExecutor._log_reasoning  — OTEL span emission + swallowed errors
 - HermesA2AExecutor.execute    — happy path, empty input, API error,
                                  Hermes 4 extra_body, Hermes 3 no extra_body,
-                                  reasoning not in reply, reasoning_details
+                                  reasoning not in reply, reasoning_details,
+                                  tools serialized in request body (#497),
+                                  empty tools → no tools field (#497),
+                                  tool_call response → JSON text (#497)
 - HermesA2AExecutor.cancel     — TaskStatusUpdateEvent emitted

 The ``openai`` module is stubbed in sys.modules so no real API call is made.
@ -699,3 +702,263 @@ async def test_no_system_prompt_only_user_message():
    msgs = mock_client.chat.completions.create.call_args[1]["messages"]
    assert len(msgs) == 1
    assert msgs[0]["role"] == "user"
+
+
+# ---------------------------------------------------------------------------
+# Native tools parameter — issue #497
+# ---------------------------------------------------------------------------
+
+# Minimal OpenAI-format tool definition used across the tools tests.
+_SAMPLE_TOOL: dict = {
+    "type": "function",
+    "function": {
+        "name": "get_weather",
+        "description": "Get current weather for a location.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "location": {"type": "string", "description": "City name"},
+            },
+            "required": ["location"],
+        },
+    },
+}
+
+_SAMPLE_TOOL_2: dict = {
+    "type": "function",
+    "function": {
+        "name": "search_web",
+        "description": "Search the web.",
+        "parameters": {
+            "type": "object",
+            "properties": {"query": {"type": "string"}},
+            "required": ["query"],
+        },
+    },
+}
+
+
+class _FakeFunction:
+    """Stand-in for openai ChatCompletionMessageToolCall.function."""
+
+    def __init__(self, name: str, arguments: str) -> None:
+        self.name = name
+        self.arguments = arguments
+
+
+class _FakeToolCall:
+    """Stand-in for openai ChatCompletionMessageToolCall."""
+
+    def __init__(self, tc_id: str, name: str, arguments: str = "{}") -> None:
+        self.id = tc_id
+        self.type = "function"
+        self.function = _FakeFunction(name=name, arguments=arguments)
+
+
+def _make_tool_call_response(tool_calls: list, content: str = ""):
+    """Build a mock API response that includes tool_calls on the message."""
+
+    class _MsgWithToolCalls:
+        def __init__(self):
+            self.content = content
+            self.tool_calls = tool_calls
+
+    choice = MagicMock()
+    choice.message = _MsgWithToolCalls()
+    response = MagicMock()
+    response.choices = [choice]
+    return response
+
+
+def test_constructor_tools_stored_correctly():
+    """tools list is stored as _tools attribute."""
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
+        _client=MagicMock(),
+    )
+    assert executor._tools == [_SAMPLE_TOOL, _SAMPLE_TOOL_2]
+
+
+def test_constructor_none_tools_stored_as_empty_list():
+    """tools=None → _tools is [] (empty list, not None)."""
+    executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=MagicMock())
+    assert executor._tools == []
+
+
+def test_constructor_empty_list_stored_as_empty_list():
+    """tools=[] → _tools is []."""
+    executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=MagicMock())
+    assert executor._tools == []
+
+
+def test_constructor_tools_is_independent_copy():
+    """_tools is a copy — mutating the input list doesn't affect the executor."""
+    original = [_SAMPLE_TOOL]
+    executor = HermesA2AExecutor(
+        model="hermes-4", tools=original, _client=MagicMock()
+    )
+    original.append(_SAMPLE_TOOL_2)
+    assert executor._tools == [_SAMPLE_TOOL]
+
+
+@pytest.mark.asyncio
+async def test_execute_tools_serialized_in_request_body():
+    """Non-empty tools list is forwarded to chat.completions.create as tools=."""
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response("Paris is sunny.")
+    )
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL],
+        _client=mock_client,
+    )
+
+    await executor.execute(_make_context("weather?"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert "tools" in call_kwargs
+    assert call_kwargs["tools"] == [_SAMPLE_TOOL]
+
+
+@pytest.mark.asyncio
+async def test_execute_multiple_tools_all_forwarded():
+    """All tool definitions are forwarded — not truncated."""
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response("ok")
+    )
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
+        _client=mock_client,
+    )
+
+    await executor.execute(_make_context("search?"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert call_kwargs["tools"] == [_SAMPLE_TOOL, _SAMPLE_TOOL_2]
+
+
+@pytest.mark.asyncio
+async def test_execute_empty_tools_no_tools_field_in_request():
+    """Empty tools list → 'tools' key absent from API call (not tools=[])."""
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response("ok")
+    )
+    executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=mock_client)
+
+    await executor.execute(_make_context("hello"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert "tools" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_execute_none_tools_no_tools_field_in_request():
+    """tools=None → 'tools' key absent from API call."""
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response("ok")
+    )
+    executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=mock_client)
+
+    await executor.execute(_make_context("hello"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert "tools" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_execute_default_no_tools_field_in_request():
+    """Constructor with no tools kwarg → 'tools' key absent from API call."""
+    executor, mock_client = _make_executor(model="hermes-4")
+    mock_client.chat.completions.create.return_value = _make_api_response("ok")
+
+    await executor.execute(_make_context("hello"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert "tools" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_execute_tool_call_response_returns_json():
+    """Model returns tool_calls with no content → reply is JSON-serialised calls."""
+    import json
+
+    mock_client = MagicMock()
+    tc = _FakeToolCall("call_abc123", "get_weather", '{"location":"Paris"}')
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_tool_call_response(tool_calls=[tc], content="")
+    )
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL],
+        _client=mock_client,
+    )
+
+    eq = AsyncMock()
+    await executor.execute(_make_context("weather in Paris?"), eq)
+
+    eq.enqueue_event.assert_called_once()
+    reply = eq.enqueue_event.call_args[0][0]
+    # Must be valid JSON
+    parsed = json.loads(reply)
+    assert isinstance(parsed, list)
+    assert len(parsed) == 1
+    assert parsed[0]["function"]["name"] == "get_weather"
+    assert parsed[0]["function"]["arguments"] == '{"location":"Paris"}'
+    assert parsed[0]["id"] == "call_abc123"
+    assert parsed[0]["type"] == "function"
+
+
+@pytest.mark.asyncio
+async def test_execute_multiple_tool_calls_all_in_json():
+    """Multiple tool calls are all serialised into the JSON reply."""
+    import json
+
+    mock_client = MagicMock()
+    tc1 = _FakeToolCall("call_1", "get_weather", '{"location":"Paris"}')
+    tc2 = _FakeToolCall("call_2", "search_web", '{"query":"news"}')
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_tool_call_response(tool_calls=[tc1, tc2], content="")
+    )
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
+        _client=mock_client,
+    )
+
+    eq = AsyncMock()
+    await executor.execute(_make_context("do both"), eq)
+
+    reply = eq.enqueue_event.call_args[0][0]
+    parsed = json.loads(reply)
+    assert len(parsed) == 2
+    assert parsed[0]["function"]["name"] == "get_weather"
+    assert parsed[1]["function"]["name"] == "search_web"
+
+
+@pytest.mark.asyncio
+async def test_execute_text_content_wins_over_tool_calls():
+    """When model returns both text content AND tool_calls, text is used."""
+    mock_client = MagicMock()
+    tc = _FakeToolCall("call_xyz", "get_weather", '{"location":"Berlin"}')
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_tool_call_response(
+            tool_calls=[tc], content="The weather is fine."
+        )
+    )
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        tools=[_SAMPLE_TOOL],
+        _client=mock_client,
+    )
+
+    eq = AsyncMock()
+    await executor.execute(_make_context("weather?"), eq)
+
+    reply = eq.enqueue_event.call_args[0][0]
+    assert reply == "The weather is fine."