feat(hermes): native tools=[] parameter instead of text-in-prompt workaround (#497)

feat(hermes): native tools=[] parameter instead of text-in-prompt workaround (#497)
This commit is contained in:
molecule-ai[bot] 2026-04-17 06:56:10 +00:00 committed by GitHub
commit c9b8c26d5f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 356 additions and 9 deletions

View File

@ -21,6 +21,37 @@ OTEL activity span so operators can inspect the thinking trace in Langfuse
A2A reply doing so would contaminate the agent's next-turn context with
the model's internal scratchpad.
Native tools (#497)
-------------------
Tool definitions are passed via the OpenAI-native ``tools`` parameter instead
of injecting them as text into the system prompt. Each entry must follow the
standard OpenAI function-calling schema::
{
"type": "function",
"function": {
"name": "...",
"description": "...",
"parameters": { # JSON Schema object
"type": "object",
"properties": {...},
"required": [...]
}
}
}
**Empty list rule:** when ``tools`` is ``None`` or ``[]``, the ``tools``
parameter is **omitted** from the API call entirely. Sending ``tools=[]``
to some OpenAI-compat providers causes a 400 / unexpected behaviour; omitting
the key is always safe and signals "no tool use."
**Tool-call response handling:** when the model returns
``choice.message.tool_calls`` with no text content (``finish_reason`` is
``"tool_calls"``), the executor serialises the tool-call list as a JSON string
and enqueues that as the A2A reply. This keeps the executor thin (single API
call per turn, no ReAct loop) while surfacing function-call intent to the
caller in a structured, parseable format.
Hermes 3 / unknown models
--------------------------
No ``extra_body`` is sent. The response is processed identically to any
@ -126,6 +157,7 @@ class HermesA2AExecutor(AgentExecutor):
- System prompt injected as the first ``messages[]`` entry.
- Hermes 4 reasoning enabled via ``extra_body`` when supported.
- Reasoning trace logged to OTEL span never echoed in the reply.
- Tool definitions passed via native ``tools`` parameter when supplied.
Parameters
----------
@ -142,6 +174,12 @@ class HermesA2AExecutor(AgentExecutor):
heartbeat:
Optional ``HeartbeatLoop`` instance used to surface the current
task description in the platform UI.
tools:
Optional list of OpenAI-format tool definitions to pass via the
native ``tools`` parameter. Each entry must have ``"type"`` and
``"function"`` keys matching the OpenAI function-calling schema.
``None`` or ``[]`` the ``tools`` key is **omitted** from the API
call entirely (never sent as ``tools=[]``).
_client:
Inject a pre-built ``AsyncOpenAI`` (or compatible mock) for
testing only. When provided, ``base_url`` and ``api_key`` are
@ -155,12 +193,16 @@ class HermesA2AExecutor(AgentExecutor):
base_url: str | None = None,
api_key: str | None = None,
heartbeat: "HeartbeatLoop | None" = None,
tools: list[dict] | None = None,
_client: Any = None,
) -> None:
self.model = model
self.system_prompt = system_prompt
self._heartbeat = heartbeat
self._provider = ProviderConfig(model)
# Empty list and None are treated identically: no tools → omit the
# parameter from the API call rather than sending tools=[].
self._tools: list[dict] = list(tools) if tools else []
if _client is not None:
# Test injection path — skip real AsyncOpenAI construction so
@ -245,10 +287,15 @@ class HermesA2AExecutor(AgentExecutor):
Sequence:
1. Extract user text from A2A message parts.
2. Build ``messages[]`` (optional system + user).
3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4.
3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4 and
``tools`` when tool definitions are configured.
4. Extract and log reasoning trace does NOT appear in the reply.
5. Enqueue a final ``Message`` with the content text.
5a. If the model returned text content, enqueue it as the reply.
5b. If the model returned tool calls with no text (``finish_reason``
``"tool_calls"``), serialise the calls as JSON and enqueue that.
"""
import json
from shared_runtime import extract_message_text
user_input = extract_message_text(context)
@ -268,12 +315,18 @@ class HermesA2AExecutor(AgentExecutor):
if self._provider.reasoning_supported:
extra_body = {"reasoning": {"enabled": True}}
# Build call kwargs — omit ``tools`` entirely when the list is empty
# so providers that reject tools=[] don't get a 400.
create_kwargs: dict = {
"model": self.model,
"messages": messages,
"extra_body": extra_body,
}
if self._tools:
create_kwargs["tools"] = self._tools
try:
response = await self._client.chat.completions.create(
model=self.model,
messages=messages,
extra_body=extra_body,
)
response = await self._client.chat.completions.create(**create_kwargs)
choice = response.choices[0]
content: str = choice.message.content or ""
@ -297,6 +350,37 @@ class HermesA2AExecutor(AgentExecutor):
# Log to OTEL — intentionally omitted from the A2A reply.
self._log_reasoning(context, reasoning, reasoning_details)
# Handle tool-call response: when the model returns tool calls
# with no text content, serialise the calls as JSON so the caller
# receives structured, parseable output. This keeps the executor
# thin (single API call per turn) while not silently discarding
# function-call intent.
if not content:
tool_calls = getattr(choice.message, "tool_calls", None)
if tool_calls:
serialised = json.dumps([
{
"id": getattr(tc, "id", ""),
"type": getattr(tc, "type", "function"),
"function": {
"name": getattr(
getattr(tc, "function", None), "name", ""
),
"arguments": getattr(
getattr(tc, "function", None), "arguments", "{}"
),
},
}
for tc in tool_calls
])
logger.info(
"hermes_executor: tool_calls response [model=%s n=%d]",
self.model,
len(tool_calls),
)
await event_queue.enqueue_event(new_agent_text_message(serialised))
return
final_text = content.strip() or "(no response generated)"
await event_queue.enqueue_event(new_agent_text_message(final_text))

View File

@ -4,12 +4,15 @@ Coverage targets
----------------
- _reasoning_supported() model name pattern detection
- ProviderConfig capability flags derived from model name
- HermesA2AExecutor.__init__ field assignment + client injection
- HermesA2AExecutor.__init__ field assignment, client injection, tools (#497)
- HermesA2AExecutor._build_messages system prompt + user turn assembly
- HermesA2AExecutor._log_reasoning OTEL span emission + swallowed errors
- HermesA2AExecutor.execute happy path, empty input, API error,
Hermes 4 extra_body, Hermes 3 no extra_body,
reasoning not in reply, reasoning_details
reasoning not in reply, reasoning_details,
tools serialized in request body (#497),
empty tools no tools field (#497),
tool_call response JSON text (#497)
- HermesA2AExecutor.cancel TaskStatusUpdateEvent emitted
The ``openai`` module is stubbed in sys.modules so no real API call is made.
@ -699,3 +702,263 @@ async def test_no_system_prompt_only_user_message():
msgs = mock_client.chat.completions.create.call_args[1]["messages"]
assert len(msgs) == 1
assert msgs[0]["role"] == "user"
# ---------------------------------------------------------------------------
# Native tools parameter — issue #497
# ---------------------------------------------------------------------------
# Minimal OpenAI-format tool definition used across the tools tests.
_SAMPLE_TOOL: dict = {
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather for a location.",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City name"},
},
"required": ["location"],
},
},
}
_SAMPLE_TOOL_2: dict = {
"type": "function",
"function": {
"name": "search_web",
"description": "Search the web.",
"parameters": {
"type": "object",
"properties": {"query": {"type": "string"}},
"required": ["query"],
},
},
}
class _FakeFunction:
"""Stand-in for openai ChatCompletionMessageToolCall.function."""
def __init__(self, name: str, arguments: str) -> None:
self.name = name
self.arguments = arguments
class _FakeToolCall:
"""Stand-in for openai ChatCompletionMessageToolCall."""
def __init__(self, tc_id: str, name: str, arguments: str = "{}") -> None:
self.id = tc_id
self.type = "function"
self.function = _FakeFunction(name=name, arguments=arguments)
def _make_tool_call_response(tool_calls: list, content: str = ""):
"""Build a mock API response that includes tool_calls on the message."""
class _MsgWithToolCalls:
def __init__(self):
self.content = content
self.tool_calls = tool_calls
choice = MagicMock()
choice.message = _MsgWithToolCalls()
response = MagicMock()
response.choices = [choice]
return response
def test_constructor_tools_stored_correctly():
"""tools list is stored as _tools attribute."""
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
_client=MagicMock(),
)
assert executor._tools == [_SAMPLE_TOOL, _SAMPLE_TOOL_2]
def test_constructor_none_tools_stored_as_empty_list():
"""tools=None → _tools is [] (empty list, not None)."""
executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=MagicMock())
assert executor._tools == []
def test_constructor_empty_list_stored_as_empty_list():
"""tools=[] → _tools is []."""
executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=MagicMock())
assert executor._tools == []
def test_constructor_tools_is_independent_copy():
"""_tools is a copy — mutating the input list doesn't affect the executor."""
original = [_SAMPLE_TOOL]
executor = HermesA2AExecutor(
model="hermes-4", tools=original, _client=MagicMock()
)
original.append(_SAMPLE_TOOL_2)
assert executor._tools == [_SAMPLE_TOOL]
@pytest.mark.asyncio
async def test_execute_tools_serialized_in_request_body():
"""Non-empty tools list is forwarded to chat.completions.create as tools=."""
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response("Paris is sunny.")
)
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL],
_client=mock_client,
)
await executor.execute(_make_context("weather?"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert "tools" in call_kwargs
assert call_kwargs["tools"] == [_SAMPLE_TOOL]
@pytest.mark.asyncio
async def test_execute_multiple_tools_all_forwarded():
"""All tool definitions are forwarded — not truncated."""
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response("ok")
)
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
_client=mock_client,
)
await executor.execute(_make_context("search?"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert call_kwargs["tools"] == [_SAMPLE_TOOL, _SAMPLE_TOOL_2]
@pytest.mark.asyncio
async def test_execute_empty_tools_no_tools_field_in_request():
"""Empty tools list → 'tools' key absent from API call (not tools=[])."""
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response("ok")
)
executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=mock_client)
await executor.execute(_make_context("hello"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert "tools" not in call_kwargs
@pytest.mark.asyncio
async def test_execute_none_tools_no_tools_field_in_request():
"""tools=None → 'tools' key absent from API call."""
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response("ok")
)
executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=mock_client)
await executor.execute(_make_context("hello"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert "tools" not in call_kwargs
@pytest.mark.asyncio
async def test_execute_default_no_tools_field_in_request():
"""Constructor with no tools kwarg → 'tools' key absent from API call."""
executor, mock_client = _make_executor(model="hermes-4")
mock_client.chat.completions.create.return_value = _make_api_response("ok")
await executor.execute(_make_context("hello"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert "tools" not in call_kwargs
@pytest.mark.asyncio
async def test_execute_tool_call_response_returns_json():
"""Model returns tool_calls with no content → reply is JSON-serialised calls."""
import json
mock_client = MagicMock()
tc = _FakeToolCall("call_abc123", "get_weather", '{"location":"Paris"}')
mock_client.chat.completions.create = AsyncMock(
return_value=_make_tool_call_response(tool_calls=[tc], content="")
)
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL],
_client=mock_client,
)
eq = AsyncMock()
await executor.execute(_make_context("weather in Paris?"), eq)
eq.enqueue_event.assert_called_once()
reply = eq.enqueue_event.call_args[0][0]
# Must be valid JSON
parsed = json.loads(reply)
assert isinstance(parsed, list)
assert len(parsed) == 1
assert parsed[0]["function"]["name"] == "get_weather"
assert parsed[0]["function"]["arguments"] == '{"location":"Paris"}'
assert parsed[0]["id"] == "call_abc123"
assert parsed[0]["type"] == "function"
@pytest.mark.asyncio
async def test_execute_multiple_tool_calls_all_in_json():
"""Multiple tool calls are all serialised into the JSON reply."""
import json
mock_client = MagicMock()
tc1 = _FakeToolCall("call_1", "get_weather", '{"location":"Paris"}')
tc2 = _FakeToolCall("call_2", "search_web", '{"query":"news"}')
mock_client.chat.completions.create = AsyncMock(
return_value=_make_tool_call_response(tool_calls=[tc1, tc2], content="")
)
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
_client=mock_client,
)
eq = AsyncMock()
await executor.execute(_make_context("do both"), eq)
reply = eq.enqueue_event.call_args[0][0]
parsed = json.loads(reply)
assert len(parsed) == 2
assert parsed[0]["function"]["name"] == "get_weather"
assert parsed[1]["function"]["name"] == "search_web"
@pytest.mark.asyncio
async def test_execute_text_content_wins_over_tool_calls():
"""When model returns both text content AND tool_calls, text is used."""
mock_client = MagicMock()
tc = _FakeToolCall("call_xyz", "get_weather", '{"location":"Berlin"}')
mock_client.chat.completions.create = AsyncMock(
return_value=_make_tool_call_response(
tool_calls=[tc], content="The weather is fine."
)
)
executor = HermesA2AExecutor(
model="hermes-4",
tools=[_SAMPLE_TOOL],
_client=mock_client,
)
eq = AsyncMock()
await executor.execute(_make_context("weather?"), eq)
reply = eq.enqueue_event.call_args[0][0]
assert reply == "The weather is fine."