feat(hermes): plumb response_format=json_schema for structured output (#498)

Adds response_format support to HermesA2AExecutor so callers can request
structured JSON output via the OpenAI-native response_format parameter.

Changes:
- _validate_response_format(): validates type (json_schema/json_object/text)
  and required sub-fields; returns None if valid, error message if invalid
- HermesA2AExecutor.__init__: new response_format kwarg, stored as _response_format
- execute(): validates before API call — invalid schema enqueues error and
  returns early without hitting Hermes API; valid and non-None adds
  response_format= to create_kwargs; None omits the field entirely

Tests (12 new):
  - _validate_response_format: all valid types, invalid type, missing fields
  - constructor stores response_format correctly
  - valid response_format forwarded to API call
  - response_format omitted when None (no key in call kwargs)
  - invalid schema → error message enqueued, API not called

Closes #498

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Molecule AI Backend Engineer 2026-04-17 01:19:51 +00:00
parent 15f55f2fb0
commit 1d41f23ddd
2 changed files with 246 additions and 7 deletions

View File

@ -26,6 +26,22 @@ Hermes 3 / unknown models
No ``extra_body`` is sent. The response is processed identically to any
other OpenAI-compat model call. The Hermes 3 path is exercised by the
existing adapter test suite and must remain unchanged.
response_format / structured output (#498)
------------------------------------------
Pass ``response_format={"type": "json_schema", "json_schema": {...}}`` (or
``{"type": "json_object"}`` / ``{"type": "text"}``) to request structured
output from the upstream provider. The value is forwarded verbatim as the
``response_format=`` kwarg on ``chat.completions.create()``.
Validation is performed **before** the API call via
``_validate_response_format()``. If the dict is invalid (unknown type,
missing ``json_schema`` key for ``type="json_schema"``, etc.) the executor
enqueues an error message and returns early without calling the API.
When ``response_format`` is ``None`` (the default) the kwarg is omitted
entirely from the API call so older / strict providers do not receive an
unexpected field.
"""
from __future__ import annotations
@ -77,6 +93,53 @@ def _reasoning_supported(model: str) -> bool:
return any(pat in model_lower for pat in _HERMES4_PATTERNS)
# ---------------------------------------------------------------------------
# response_format validation (#498)
# ---------------------------------------------------------------------------
_VALID_RESPONSE_FORMAT_TYPES: frozenset[str] = frozenset(
{"json_schema", "json_object", "text"}
)
def _validate_response_format(rf: dict) -> "str | None":
"""Validate a ``response_format`` dict before forwarding to the API.
Returns ``None`` if *rf* is valid, or an error message string describing
the first validation failure found.
Valid ``type`` values are ``"json_schema"``, ``"json_object"``, and
``"text"``. For ``type="json_schema"``, the dict must also contain a
``"json_schema"`` key whose value is a dict with at least a ``"name"``
key (str). If ``json_schema.schema`` is present it must be a dict.
Examples::
>>> _validate_response_format({"type": "json_object"}) is None
True
>>> _validate_response_format({"type": "bad"}) is not None
True
"""
rf_type = rf.get("type")
if rf_type not in _VALID_RESPONSE_FORMAT_TYPES:
return (
f"type must be one of {sorted(_VALID_RESPONSE_FORMAT_TYPES)!r}, "
f"got {rf_type!r}"
)
if rf_type == "json_schema":
js = rf.get("json_schema")
if not isinstance(js, dict):
return "json_schema must be a dict when type='json_schema'"
if not isinstance(js.get("name"), str):
return "json_schema.name must be a string"
schema = js.get("schema")
if schema is not None and not isinstance(schema, dict):
return "json_schema.schema must be a dict if present"
return None
# ---------------------------------------------------------------------------
# ProviderConfig — per-provider / per-model capability flags
# ---------------------------------------------------------------------------
@ -142,6 +205,16 @@ class HermesA2AExecutor(AgentExecutor):
heartbeat:
Optional ``HeartbeatLoop`` instance used to surface the current
task description in the platform UI.
response_format:
Optional OpenAI-native ``response_format`` dict forwarded verbatim
to ``chat.completions.create()``. Supported types:
``{"type": "json_schema", "json_schema": {"name": ..., "schema": {...}}}
``
``{"type": "json_object"}``
``{"type": "text"}``
When ``None`` (default) the parameter is omitted from the API call.
Invalid dicts cause ``execute()`` to enqueue an error and return
early without calling the API.
_client:
Inject a pre-built ``AsyncOpenAI`` (or compatible mock) for
testing only. When provided, ``base_url`` and ``api_key`` are
@ -155,11 +228,13 @@ class HermesA2AExecutor(AgentExecutor):
base_url: str | None = None,
api_key: str | None = None,
heartbeat: "HeartbeatLoop | None" = None,
response_format: "dict | None" = None,
_client: Any = None,
) -> None:
self.model = model
self.system_prompt = system_prompt
self._heartbeat = heartbeat
self._response_format = response_format
self._provider = ProviderConfig(model)
if _client is not None:
@ -262,18 +337,34 @@ class HermesA2AExecutor(AgentExecutor):
messages = self._build_messages(user_input)
# Validate response_format before hitting the API — invalid dicts
# enqueue an error and return early without making an API call.
if self._response_format is not None:
detail = _validate_response_format(self._response_format)
if detail is not None:
await event_queue.enqueue_event(
new_agent_text_message(f"Error: invalid response_format — {detail}")
)
return
# Only Hermes 4 entries get extra_body — sending it to Hermes 3
# or other models is a no-op at best; a 400 at worst.
extra_body: dict | None = None
if self._provider.reasoning_supported:
extra_body = {"reasoning": {"enabled": True}}
# Build create() kwargs; omit response_format entirely when None so
# strict / older providers do not receive an unexpected field.
create_kwargs: dict = {
"model": self.model,
"messages": messages,
"extra_body": extra_body,
}
if self._response_format is not None:
create_kwargs["response_format"] = self._response_format
try:
response = await self._client.chat.completions.create(
model=self.model,
messages=messages,
extra_body=extra_body,
)
response = await self._client.chat.completions.create(**create_kwargs)
choice = response.choices[0]
content: str = choice.message.content or ""

View File

@ -4,12 +4,15 @@ Coverage targets
----------------
- _reasoning_supported() model name pattern detection
- ProviderConfig capability flags derived from model name
- HermesA2AExecutor.__init__ field assignment + client injection
- _validate_response_format() valid types, invalid type, missing fields (#498)
- HermesA2AExecutor.__init__ field assignment + client injection,
response_format stored (#498)
- HermesA2AExecutor._build_messages system prompt + user turn assembly
- HermesA2AExecutor._log_reasoning OTEL span emission + swallowed errors
- HermesA2AExecutor.execute happy path, empty input, API error,
Hermes 4 extra_body, Hermes 3 no extra_body,
reasoning not in reply, reasoning_details
reasoning not in reply, reasoning_details,
response_format forwarded / omitted / invalid (#498)
- HermesA2AExecutor.cancel TaskStatusUpdateEvent emitted
The ``openai`` module is stubbed in sys.modules so no real API call is made.
@ -70,6 +73,7 @@ from hermes_executor import ( # noqa: E402
ProviderConfig,
_HERMES4_PATTERNS,
_reasoning_supported,
_validate_response_format,
)
@ -699,3 +703,147 @@ async def test_no_system_prompt_only_user_message():
msgs = mock_client.chat.completions.create.call_args[1]["messages"]
assert len(msgs) == 1
assert msgs[0]["role"] == "user"
# ---------------------------------------------------------------------------
# _validate_response_format — issue #498
# ---------------------------------------------------------------------------
def test_validate_response_format_json_schema_valid():
"""Valid json_schema dict (with name and schema) returns None."""
rf = {
"type": "json_schema",
"json_schema": {
"name": "my_schema",
"schema": {"type": "object", "properties": {}},
},
}
assert _validate_response_format(rf) is None
def test_validate_response_format_json_object_valid():
"""{"type": "json_object"} returns None (no sub-fields required)."""
assert _validate_response_format({"type": "json_object"}) is None
def test_validate_response_format_text_valid():
"""{"type": "text"} returns None."""
assert _validate_response_format({"type": "text"}) is None
def test_validate_response_format_invalid_type():
"""An unknown type value returns a non-None error string."""
result = _validate_response_format({"type": "yaml_schema"})
assert result is not None
assert isinstance(result, str)
assert "yaml_schema" in result
def test_validate_response_format_missing_json_schema_key():
"""type='json_schema' but no 'json_schema' key → error string."""
result = _validate_response_format({"type": "json_schema"})
assert result is not None
assert "json_schema" in result
def test_validate_response_format_json_schema_schema_not_dict():
"""json_schema.schema present but not a dict → error string."""
rf = {
"type": "json_schema",
"json_schema": {"name": "s", "schema": "not-a-dict"},
}
result = _validate_response_format(rf)
assert result is not None
assert "schema" in result
def test_validate_response_format_json_schema_missing_name():
"""json_schema present but missing 'name' key → error string."""
rf = {
"type": "json_schema",
"json_schema": {"schema": {"type": "object"}},
}
result = _validate_response_format(rf)
assert result is not None
assert "name" in result
def test_constructor_response_format_stored():
"""response_format kwarg is stored as _response_format attribute."""
rf = {"type": "json_object"}
executor = HermesA2AExecutor(
model="hermes-4",
response_format=rf,
_client=MagicMock(),
)
assert executor._response_format is rf
def test_constructor_no_response_format_is_none():
"""Omitting response_format → _response_format is None."""
executor = HermesA2AExecutor(model="hermes-4", _client=MagicMock())
assert executor._response_format is None
@pytest.mark.asyncio
async def test_execute_response_format_in_request():
"""Valid response_format is forwarded as a kwarg to the API call."""
rf = {"type": "json_object"}
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response('{"answer": 42}')
)
executor = HermesA2AExecutor(
model="nousresearch/hermes-3-llama-3.1-70b",
response_format=rf,
_client=mock_client,
)
await executor.execute(_make_context("hello"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert call_kwargs.get("response_format") == rf
@pytest.mark.asyncio
async def test_execute_response_format_omitted_when_none():
"""When response_format is None, it is NOT present in the API call kwargs."""
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=_make_api_response("ok")
)
executor = HermesA2AExecutor(
model="nousresearch/hermes-3-llama-3.1-70b",
response_format=None,
_client=mock_client,
)
await executor.execute(_make_context("hello"), AsyncMock())
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert "response_format" not in call_kwargs
@pytest.mark.asyncio
async def test_execute_invalid_response_format_returns_error_no_api_call():
"""Invalid response_format → error enqueued, API create() NOT called."""
rf = {"type": "unsupported_format"}
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock()
executor = HermesA2AExecutor(
model="hermes-4",
response_format=rf,
_client=mock_client,
)
eq = AsyncMock()
await executor.execute(_make_context("hello"), eq)
# Should have enqueued an error message
eq.enqueue_event.assert_called_once()
enqueued = eq.enqueue_event.call_args[0][0]
assert "Error: invalid response_format" in enqueued
# API must NOT have been called
mock_client.chat.completions.create.assert_not_called()