forked from molecule-ai/molecule-core
feat(hermes): native tools=[] parameter instead of text-in-prompt workaround (#497)
feat(hermes): native tools=[] parameter instead of text-in-prompt workaround (#497)
This commit is contained in:
commit
816ea3e565
@ -21,6 +21,37 @@ OTEL activity span so operators can inspect the thinking trace in Langfuse
|
||||
A2A reply — doing so would contaminate the agent's next-turn context with
|
||||
the model's internal scratchpad.
|
||||
|
||||
Native tools (#497)
|
||||
-------------------
|
||||
Tool definitions are passed via the OpenAI-native ``tools`` parameter instead
|
||||
of injecting them as text into the system prompt. Each entry must follow the
|
||||
standard OpenAI function-calling schema::
|
||||
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "...",
|
||||
"description": "...",
|
||||
"parameters": { # JSON Schema object
|
||||
"type": "object",
|
||||
"properties": {...},
|
||||
"required": [...]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
**Empty list rule:** when ``tools`` is ``None`` or ``[]``, the ``tools``
|
||||
parameter is **omitted** from the API call entirely. Sending ``tools=[]``
|
||||
to some OpenAI-compat providers causes a 400 / unexpected behaviour; omitting
|
||||
the key is always safe and signals "no tool use."
|
||||
|
||||
**Tool-call response handling:** when the model returns
|
||||
``choice.message.tool_calls`` with no text content (``finish_reason`` is
|
||||
``"tool_calls"``), the executor serialises the tool-call list as a JSON string
|
||||
and enqueues that as the A2A reply. This keeps the executor thin (single API
|
||||
call per turn, no ReAct loop) while surfacing function-call intent to the
|
||||
caller in a structured, parseable format.
|
||||
|
||||
Hermes 3 / unknown models
|
||||
--------------------------
|
||||
No ``extra_body`` is sent. The response is processed identically to any
|
||||
@ -126,6 +157,7 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
- System prompt injected as the first ``messages[]`` entry.
|
||||
- Hermes 4 reasoning enabled via ``extra_body`` when supported.
|
||||
- Reasoning trace logged to OTEL span — never echoed in the reply.
|
||||
- Tool definitions passed via native ``tools`` parameter when supplied.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@ -142,6 +174,12 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
heartbeat:
|
||||
Optional ``HeartbeatLoop`` instance used to surface the current
|
||||
task description in the platform UI.
|
||||
tools:
|
||||
Optional list of OpenAI-format tool definitions to pass via the
|
||||
native ``tools`` parameter. Each entry must have ``"type"`` and
|
||||
``"function"`` keys matching the OpenAI function-calling schema.
|
||||
``None`` or ``[]`` → the ``tools`` key is **omitted** from the API
|
||||
call entirely (never sent as ``tools=[]``).
|
||||
_client:
|
||||
Inject a pre-built ``AsyncOpenAI`` (or compatible mock) — for
|
||||
testing only. When provided, ``base_url`` and ``api_key`` are
|
||||
@ -155,12 +193,16 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
base_url: str | None = None,
|
||||
api_key: str | None = None,
|
||||
heartbeat: "HeartbeatLoop | None" = None,
|
||||
tools: list[dict] | None = None,
|
||||
_client: Any = None,
|
||||
) -> None:
|
||||
self.model = model
|
||||
self.system_prompt = system_prompt
|
||||
self._heartbeat = heartbeat
|
||||
self._provider = ProviderConfig(model)
|
||||
# Empty list and None are treated identically: no tools → omit the
|
||||
# parameter from the API call rather than sending tools=[].
|
||||
self._tools: list[dict] = list(tools) if tools else []
|
||||
|
||||
if _client is not None:
|
||||
# Test injection path — skip real AsyncOpenAI construction so
|
||||
@ -245,10 +287,15 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
Sequence:
|
||||
1. Extract user text from A2A message parts.
|
||||
2. Build ``messages[]`` (optional system + user).
|
||||
3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4.
|
||||
3. Call OpenAI-compat API; include ``extra_body`` for Hermes 4 and
|
||||
``tools`` when tool definitions are configured.
|
||||
4. Extract and log reasoning trace — does NOT appear in the reply.
|
||||
5. Enqueue a final ``Message`` with the content text.
|
||||
5a. If the model returned text content, enqueue it as the reply.
|
||||
5b. If the model returned tool calls with no text (``finish_reason``
|
||||
``"tool_calls"``), serialise the calls as JSON and enqueue that.
|
||||
"""
|
||||
import json
|
||||
|
||||
from shared_runtime import extract_message_text
|
||||
|
||||
user_input = extract_message_text(context)
|
||||
@ -268,12 +315,18 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
if self._provider.reasoning_supported:
|
||||
extra_body = {"reasoning": {"enabled": True}}
|
||||
|
||||
# Build call kwargs — omit ``tools`` entirely when the list is empty
|
||||
# so providers that reject tools=[] don't get a 400.
|
||||
create_kwargs: dict = {
|
||||
"model": self.model,
|
||||
"messages": messages,
|
||||
"extra_body": extra_body,
|
||||
}
|
||||
if self._tools:
|
||||
create_kwargs["tools"] = self._tools
|
||||
|
||||
try:
|
||||
response = await self._client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
extra_body=extra_body,
|
||||
)
|
||||
response = await self._client.chat.completions.create(**create_kwargs)
|
||||
|
||||
choice = response.choices[0]
|
||||
content: str = choice.message.content or ""
|
||||
@ -297,6 +350,37 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
# Log to OTEL — intentionally omitted from the A2A reply.
|
||||
self._log_reasoning(context, reasoning, reasoning_details)
|
||||
|
||||
# Handle tool-call response: when the model returns tool calls
|
||||
# with no text content, serialise the calls as JSON so the caller
|
||||
# receives structured, parseable output. This keeps the executor
|
||||
# thin (single API call per turn) while not silently discarding
|
||||
# function-call intent.
|
||||
if not content:
|
||||
tool_calls = getattr(choice.message, "tool_calls", None)
|
||||
if tool_calls:
|
||||
serialised = json.dumps([
|
||||
{
|
||||
"id": getattr(tc, "id", ""),
|
||||
"type": getattr(tc, "type", "function"),
|
||||
"function": {
|
||||
"name": getattr(
|
||||
getattr(tc, "function", None), "name", ""
|
||||
),
|
||||
"arguments": getattr(
|
||||
getattr(tc, "function", None), "arguments", "{}"
|
||||
),
|
||||
},
|
||||
}
|
||||
for tc in tool_calls
|
||||
])
|
||||
logger.info(
|
||||
"hermes_executor: tool_calls response [model=%s n=%d]",
|
||||
self.model,
|
||||
len(tool_calls),
|
||||
)
|
||||
await event_queue.enqueue_event(new_agent_text_message(serialised))
|
||||
return
|
||||
|
||||
final_text = content.strip() or "(no response generated)"
|
||||
await event_queue.enqueue_event(new_agent_text_message(final_text))
|
||||
|
||||
|
||||
@ -4,12 +4,15 @@ Coverage targets
|
||||
----------------
|
||||
- _reasoning_supported() — model name pattern detection
|
||||
- ProviderConfig — capability flags derived from model name
|
||||
- HermesA2AExecutor.__init__ — field assignment + client injection
|
||||
- HermesA2AExecutor.__init__ — field assignment, client injection, tools (#497)
|
||||
- HermesA2AExecutor._build_messages — system prompt + user turn assembly
|
||||
- HermesA2AExecutor._log_reasoning — OTEL span emission + swallowed errors
|
||||
- HermesA2AExecutor.execute — happy path, empty input, API error,
|
||||
Hermes 4 extra_body, Hermes 3 no extra_body,
|
||||
reasoning not in reply, reasoning_details
|
||||
reasoning not in reply, reasoning_details,
|
||||
tools serialized in request body (#497),
|
||||
empty tools → no tools field (#497),
|
||||
tool_call response → JSON text (#497)
|
||||
- HermesA2AExecutor.cancel — TaskStatusUpdateEvent emitted
|
||||
|
||||
The ``openai`` module is stubbed in sys.modules so no real API call is made.
|
||||
@ -699,3 +702,263 @@ async def test_no_system_prompt_only_user_message():
|
||||
msgs = mock_client.chat.completions.create.call_args[1]["messages"]
|
||||
assert len(msgs) == 1
|
||||
assert msgs[0]["role"] == "user"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Native tools parameter — issue #497
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Minimal OpenAI-format tool definition used across the tools tests.
|
||||
_SAMPLE_TOOL: dict = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get current weather for a location.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {"type": "string", "description": "City name"},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_SAMPLE_TOOL_2: dict = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search_web",
|
||||
"description": "Search the web.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"query": {"type": "string"}},
|
||||
"required": ["query"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class _FakeFunction:
|
||||
"""Stand-in for openai ChatCompletionMessageToolCall.function."""
|
||||
|
||||
def __init__(self, name: str, arguments: str) -> None:
|
||||
self.name = name
|
||||
self.arguments = arguments
|
||||
|
||||
|
||||
class _FakeToolCall:
|
||||
"""Stand-in for openai ChatCompletionMessageToolCall."""
|
||||
|
||||
def __init__(self, tc_id: str, name: str, arguments: str = "{}") -> None:
|
||||
self.id = tc_id
|
||||
self.type = "function"
|
||||
self.function = _FakeFunction(name=name, arguments=arguments)
|
||||
|
||||
|
||||
def _make_tool_call_response(tool_calls: list, content: str = ""):
|
||||
"""Build a mock API response that includes tool_calls on the message."""
|
||||
|
||||
class _MsgWithToolCalls:
|
||||
def __init__(self):
|
||||
self.content = content
|
||||
self.tool_calls = tool_calls
|
||||
|
||||
choice = MagicMock()
|
||||
choice.message = _MsgWithToolCalls()
|
||||
response = MagicMock()
|
||||
response.choices = [choice]
|
||||
return response
|
||||
|
||||
|
||||
def test_constructor_tools_stored_correctly():
|
||||
"""tools list is stored as _tools attribute."""
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
|
||||
_client=MagicMock(),
|
||||
)
|
||||
assert executor._tools == [_SAMPLE_TOOL, _SAMPLE_TOOL_2]
|
||||
|
||||
|
||||
def test_constructor_none_tools_stored_as_empty_list():
|
||||
"""tools=None → _tools is [] (empty list, not None)."""
|
||||
executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=MagicMock())
|
||||
assert executor._tools == []
|
||||
|
||||
|
||||
def test_constructor_empty_list_stored_as_empty_list():
|
||||
"""tools=[] → _tools is []."""
|
||||
executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=MagicMock())
|
||||
assert executor._tools == []
|
||||
|
||||
|
||||
def test_constructor_tools_is_independent_copy():
|
||||
"""_tools is a copy — mutating the input list doesn't affect the executor."""
|
||||
original = [_SAMPLE_TOOL]
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4", tools=original, _client=MagicMock()
|
||||
)
|
||||
original.append(_SAMPLE_TOOL_2)
|
||||
assert executor._tools == [_SAMPLE_TOOL]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_tools_serialized_in_request_body():
|
||||
"""Non-empty tools list is forwarded to chat.completions.create as tools=."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(
|
||||
return_value=_make_api_response("Paris is sunny.")
|
||||
)
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
tools=[_SAMPLE_TOOL],
|
||||
_client=mock_client,
|
||||
)
|
||||
|
||||
await executor.execute(_make_context("weather?"), AsyncMock())
|
||||
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
assert "tools" in call_kwargs
|
||||
assert call_kwargs["tools"] == [_SAMPLE_TOOL]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_multiple_tools_all_forwarded():
|
||||
"""All tool definitions are forwarded — not truncated."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(
|
||||
return_value=_make_api_response("ok")
|
||||
)
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
|
||||
_client=mock_client,
|
||||
)
|
||||
|
||||
await executor.execute(_make_context("search?"), AsyncMock())
|
||||
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
assert call_kwargs["tools"] == [_SAMPLE_TOOL, _SAMPLE_TOOL_2]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_empty_tools_no_tools_field_in_request():
|
||||
"""Empty tools list → 'tools' key absent from API call (not tools=[])."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(
|
||||
return_value=_make_api_response("ok")
|
||||
)
|
||||
executor = HermesA2AExecutor(model="hermes-4", tools=[], _client=mock_client)
|
||||
|
||||
await executor.execute(_make_context("hello"), AsyncMock())
|
||||
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
assert "tools" not in call_kwargs
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_none_tools_no_tools_field_in_request():
|
||||
"""tools=None → 'tools' key absent from API call."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(
|
||||
return_value=_make_api_response("ok")
|
||||
)
|
||||
executor = HermesA2AExecutor(model="hermes-4", tools=None, _client=mock_client)
|
||||
|
||||
await executor.execute(_make_context("hello"), AsyncMock())
|
||||
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
assert "tools" not in call_kwargs
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_default_no_tools_field_in_request():
|
||||
"""Constructor with no tools kwarg → 'tools' key absent from API call."""
|
||||
executor, mock_client = _make_executor(model="hermes-4")
|
||||
mock_client.chat.completions.create.return_value = _make_api_response("ok")
|
||||
|
||||
await executor.execute(_make_context("hello"), AsyncMock())
|
||||
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
assert "tools" not in call_kwargs
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_tool_call_response_returns_json():
|
||||
"""Model returns tool_calls with no content → reply is JSON-serialised calls."""
|
||||
import json
|
||||
|
||||
mock_client = MagicMock()
|
||||
tc = _FakeToolCall("call_abc123", "get_weather", '{"location":"Paris"}')
|
||||
mock_client.chat.completions.create = AsyncMock(
|
||||
return_value=_make_tool_call_response(tool_calls=[tc], content="")
|
||||
)
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
tools=[_SAMPLE_TOOL],
|
||||
_client=mock_client,
|
||||
)
|
||||
|
||||
eq = AsyncMock()
|
||||
await executor.execute(_make_context("weather in Paris?"), eq)
|
||||
|
||||
eq.enqueue_event.assert_called_once()
|
||||
reply = eq.enqueue_event.call_args[0][0]
|
||||
# Must be valid JSON
|
||||
parsed = json.loads(reply)
|
||||
assert isinstance(parsed, list)
|
||||
assert len(parsed) == 1
|
||||
assert parsed[0]["function"]["name"] == "get_weather"
|
||||
assert parsed[0]["function"]["arguments"] == '{"location":"Paris"}'
|
||||
assert parsed[0]["id"] == "call_abc123"
|
||||
assert parsed[0]["type"] == "function"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_multiple_tool_calls_all_in_json():
|
||||
"""Multiple tool calls are all serialised into the JSON reply."""
|
||||
import json
|
||||
|
||||
mock_client = MagicMock()
|
||||
tc1 = _FakeToolCall("call_1", "get_weather", '{"location":"Paris"}')
|
||||
tc2 = _FakeToolCall("call_2", "search_web", '{"query":"news"}')
|
||||
mock_client.chat.completions.create = AsyncMock(
|
||||
return_value=_make_tool_call_response(tool_calls=[tc1, tc2], content="")
|
||||
)
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
tools=[_SAMPLE_TOOL, _SAMPLE_TOOL_2],
|
||||
_client=mock_client,
|
||||
)
|
||||
|
||||
eq = AsyncMock()
|
||||
await executor.execute(_make_context("do both"), eq)
|
||||
|
||||
reply = eq.enqueue_event.call_args[0][0]
|
||||
parsed = json.loads(reply)
|
||||
assert len(parsed) == 2
|
||||
assert parsed[0]["function"]["name"] == "get_weather"
|
||||
assert parsed[1]["function"]["name"] == "search_web"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_text_content_wins_over_tool_calls():
|
||||
"""When model returns both text content AND tool_calls, text is used."""
|
||||
mock_client = MagicMock()
|
||||
tc = _FakeToolCall("call_xyz", "get_weather", '{"location":"Berlin"}')
|
||||
mock_client.chat.completions.create = AsyncMock(
|
||||
return_value=_make_tool_call_response(
|
||||
tool_calls=[tc], content="The weather is fine."
|
||||
)
|
||||
)
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
tools=[_SAMPLE_TOOL],
|
||||
_client=mock_client,
|
||||
)
|
||||
|
||||
eq = AsyncMock()
|
||||
await executor.execute(_make_context("weather?"), eq)
|
||||
|
||||
reply = eq.enqueue_event.call_args[0][0]
|
||||
assert reply == "The weather is fine."
|
||||
|
||||
Loading…
Reference in New Issue
Block a user