feat(hermes): plumb response_format=json_schema for structured output (#498)

feat(hermes): plumb response_format=json_schema for structured output (#498)
This commit is contained in:
molecule-ai[bot] 2026-04-17 07:03:45 +00:00 committed by GitHub
commit 7793edee33
2 changed files with 237 additions and 3 deletions

View File

@ -57,6 +57,22 @@ Hermes 3 / unknown models
No ``extra_body`` is sent. The response is processed identically to any
other OpenAI-compat model call. The Hermes 3 path is exercised by the
existing adapter test suite and must remain unchanged.
response_format / structured output (#498)
------------------------------------------
Pass ``response_format={"type": "json_schema", "json_schema": {...}}`` (or
``{"type": "json_object"}`` / ``{"type": "text"}``) to request structured
output from the upstream provider. The value is forwarded verbatim as the
``response_format=`` kwarg on ``chat.completions.create()``.
Validation is performed **before** the API call via
``_validate_response_format()``. If the dict is invalid (unknown type,
missing ``json_schema`` key for ``type="json_schema"``, etc.) the executor
enqueues an error message and returns early without calling the API.
When ``response_format`` is ``None`` (the default) the kwarg is omitted
entirely from the API call so older / strict providers do not receive an
unexpected field.
"""
from __future__ import annotations
@ -108,6 +124,53 @@ def _reasoning_supported(model: str) -> bool:
return any(pat in model_lower for pat in _HERMES4_PATTERNS)
# ---------------------------------------------------------------------------
# response_format validation (#498)
# ---------------------------------------------------------------------------
_VALID_RESPONSE_FORMAT_TYPES: frozenset[str] = frozenset(
{"json_schema", "json_object", "text"}
)
def _validate_response_format(rf: dict) -> "str | None":
"""Validate a ``response_format`` dict before forwarding to the API.
Returns ``None`` if *rf* is valid, or an error message string describing
the first validation failure found.
Valid ``type`` values are ``"json_schema"``, ``"json_object"``, and
``"text"``. For ``type="json_schema"``, the dict must also contain a
``"json_schema"`` key whose value is a dict with at least a ``"name"``
key (str). If ``json_schema.schema`` is present it must be a dict.
Examples::
>>> _validate_response_format({"type": "json_object"}) is None
True
>>> _validate_response_format({"type": "bad"}) is not None
True
"""
rf_type = rf.get("type")
if rf_type not in _VALID_RESPONSE_FORMAT_TYPES:
return (
f"type must be one of {sorted(_VALID_RESPONSE_FORMAT_TYPES)!r}, "
f"got {rf_type!r}"
)
if rf_type == "json_schema":
js = rf.get("json_schema")
if not isinstance(js, dict):
return "json_schema must be a dict when type='json_schema'"
if not isinstance(js.get("name"), str):
return "json_schema.name must be a string"
schema = js.get("schema")
if schema is not None and not isinstance(schema, dict):
return "json_schema.schema must be a dict if present"
return None
# ---------------------------------------------------------------------------
# ProviderConfig — per-provider / per-model capability flags
# ---------------------------------------------------------------------------
@ -174,6 +237,15 @@ class HermesA2AExecutor(AgentExecutor):
heartbeat:
Optional ``HeartbeatLoop`` instance used to surface the current
task description in the platform UI.
response_format:
Optional OpenAI-native ``response_format`` dict forwarded verbatim
to ``chat.completions.create()``. Supported types:
``{"type": "json_schema", "json_schema": {"name": ..., "schema": {...}}}``
``{"type": "json_object"}``
``{"type": "text"}``
When ``None`` (default) the parameter is omitted from the API call.
Invalid dicts cause ``execute()`` to enqueue an error and return
early without calling the API.
tools:
Optional list of OpenAI-format tool definitions to pass via the
native ``tools`` parameter. Each entry must have ``"type"`` and
@ -193,12 +265,14 @@ class HermesA2AExecutor(AgentExecutor):
base_url: str | None = None,
api_key: str | None = None,
heartbeat: "HeartbeatLoop | None" = None,
response_format: "dict | None" = None,
tools: list[dict] | None = None,
_client: Any = None,
) -> None:
self.model = model
self.system_prompt = system_prompt
self._heartbeat = heartbeat
self._response_format = response_format
self._provider = ProviderConfig(model)
# Empty list and None are treated identically: no tools → omit the
# parameter from the API call rather than sending tools=[].
@ -309,19 +383,31 @@ class HermesA2AExecutor(AgentExecutor):
messages = self._build_messages(user_input)
# Validate response_format before hitting the API — invalid dicts
# enqueue an error and return early without making an API call.
if self._response_format is not None:
detail = _validate_response_format(self._response_format)
if detail is not None:
await event_queue.enqueue_event(
new_agent_text_message(f"Error: invalid response_format — {detail}")
)
return
# Only Hermes 4 entries get extra_body — sending it to Hermes 3
# or other models is a no-op at best; a 400 at worst.
extra_body: dict | None = None
if self._provider.reasoning_supported:
extra_body = {"reasoning": {"enabled": True}}
# Build call kwargs — omit ``tools`` entirely when the list is empty
# so providers that reject tools=[] don't get a 400.
# Build create() kwargs; omit response_format and tools entirely when
# not set so strict / older providers do not receive unexpected fields.
create_kwargs: dict = {
"model": self.model,
"messages": messages,
"extra_body": extra_body,
}
if self._response_format is not None:
create_kwargs["response_format"] = self._response_format
if self._tools:
create_kwargs["tools"] = self._tools

View File

@ -4,12 +4,15 @@ Coverage targets
----------------
- _reasoning_supported() model name pattern detection
- ProviderConfig capability flags derived from model name
- HermesA2AExecutor.__init__ field assignment, client injection, tools (#497)
- _validate_response_format() valid types, invalid type, missing fields (#498)
- HermesA2AExecutor.__init__ field assignment + client injection,
response_format stored (#498), tools (#497)
- HermesA2AExecutor._build_messages system prompt + user turn assembly
- HermesA2AExecutor._log_reasoning OTEL span emission + swallowed errors
- HermesA2AExecutor.execute happy path, empty input, API error,
Hermes 4 extra_body, Hermes 3 no extra_body,
reasoning not in reply, reasoning_details,
response_format forwarded / omitted / invalid (#498),
tools serialized in request body (#497),
empty tools no tools field (#497),
tool_call response JSON text (#497)
@ -73,6 +76,7 @@ from hermes_executor import ( # noqa: E402
ProviderConfig,
_HERMES4_PATTERNS,
_reasoning_supported,
_validate_response_format,
)
@ -704,6 +708,150 @@ async def test_no_system_prompt_only_user_message():
assert msgs[0]["role"] == "user"
# ---------------------------------------------------------------------------
# _validate_response_format — issue #498
# ---------------------------------------------------------------------------
def test_validate_response_format_json_schema_valid():
"""Valid json_schema dict (with name and schema) returns None."""
rf = {
"type": "json_schema",
"json_schema": {
"name": "my_schema",
"schema": {"type": "object", "properties": {}},
},
}
assert _validate_response_format(rf) is None
def test_validate_response_format_json_object_valid():
"""{"type": "json_object"} returns None (no sub-fields required)."""
assert _validate_response_format({"type": "json_object"}) is None
def test_validate_response_format_text_valid():
"""{"type": "text"} returns None."""
assert _validate_response_format({"type": "text"}) is None
def test_validate_response_format_invalid_type():
"""An unknown type value returns a non-None error string."""
result = _validate_response_format({"type": "yaml_schema"})
assert result is not None
assert isinstance(result, str)
assert "yaml_schema" in result
def test_validate_response_format_missing_json_schema_key():
"""type='json_schema' but no 'json_schema' key → error string."""
result = _validate_response_format({"type": "json_schema"})
assert result is not None
assert "json_schema" in result
def test_validate_response_format_json_schema_schema_not_dict():
"""json_schema.schema present but not a dict → error string."""
rf = {
"type": "json_schema",
"json_schema": {"name": "s", "schema": "not-a-dict"},
}
result = _validate_response_format(rf)
assert result is not None
assert "schema" in result
def test_validate_response_format_json_schema_missing_name():
"""json_schema present but missing 'name' key → error string."""
rf = {
"type": "json_schema",
"json_schema": {"schema": {"type": "object"}},
}
result = _validate_response_format(rf)
assert result is not None
assert "name" in result
def test_constructor_response_format_stored():
"""response_format kwarg is stored as _response_format attribute."""
rf = {"type": "json_object"}
executor = HermesA2AExecutor(
model="hermes-4",
response_format=rf,
_client=MagicMock(),
)
assert executor._response_format is rf
def test_constructor_no_response_format_is_none():
"""Omitting response_format → _response_format is None."""
executor = HermesA2AExecutor(model="hermes-4", _client=MagicMock())
assert executor._response_format is None
@pytest.mark.asyncio
async def test_execute_response_format_in_request():
"""Valid response_format is forwarded as a kwarg to the API call."""
rf = {"type": "json_object"}
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response('{"answer": 42}')
)
executor = HermesA2AExecutor(
model="nousresearch/hermes-3-llama-3.1-70b",
response_format=rf,
_client=mock_client,
)
await executor.execute(_make_context("hello"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert call_kwargs.get("response_format") == rf
@pytest.mark.asyncio
async def test_execute_response_format_omitted_when_none():
"""When response_format is None, it is NOT present in the API call kwargs."""
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response("ok")
)
executor = HermesA2AExecutor(
model="nousresearch/hermes-3-llama-3.1-70b",
response_format=None,
_client=mock_client,
)
await executor.execute(_make_context("hello"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert "response_format" not in call_kwargs
@pytest.mark.asyncio
async def test_execute_invalid_response_format_returns_error_no_api_call():
"""Invalid response_format → error enqueued, API create() NOT called."""
rf = {"type": "unsupported_format"}
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock()
executor = HermesA2AExecutor(
model="hermes-4",
response_format=rf,
_client=mock_client,
)
eq = AsyncMock()
await executor.execute(_make_context("hello"), eq)
# Should have enqueued an error message
eq.enqueue_event.assert_called_once()
enqueued = eq.enqueue_event.call_args[0][0]
assert "Error: invalid response_format" in enqueued
# API must NOT have been called
mock_client.chat.completions.create.assert_not_called()
# ---------------------------------------------------------------------------
# Native tools parameter — issue #497
# ---------------------------------------------------------------------------