forked from molecule-ai/molecule-core
feat(hermes): plumb response_format=json_schema for structured output (#498)
Adds response_format support to HermesA2AExecutor so callers can request structured JSON output via the OpenAI-native response_format parameter. Changes: - _validate_response_format(): validates type (json_schema/json_object/text) and required sub-fields; returns None if valid, error message if invalid - HermesA2AExecutor.__init__: new response_format kwarg, stored as _response_format - execute(): validates before API call — invalid schema enqueues error and returns early without hitting Hermes API; valid and non-None adds response_format= to create_kwargs; None omits the field entirely Tests (12 new): - _validate_response_format: all valid types, invalid type, missing fields - constructor stores response_format correctly - valid response_format forwarded to API call - response_format omitted when None (no key in call kwargs) - invalid schema → error message enqueued, API not called Closes #498 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
15f55f2fb0
commit
1d41f23ddd
@ -26,6 +26,22 @@ Hermes 3 / unknown models
|
||||
No ``extra_body`` is sent. The response is processed identically to any
|
||||
other OpenAI-compat model call. The Hermes 3 path is exercised by the
|
||||
existing adapter test suite and must remain unchanged.
|
||||
|
||||
response_format / structured output (#498)
|
||||
------------------------------------------
|
||||
Pass ``response_format={"type": "json_schema", "json_schema": {...}}`` (or
|
||||
``{"type": "json_object"}`` / ``{"type": "text"}``) to request structured
|
||||
output from the upstream provider. The value is forwarded verbatim as the
|
||||
``response_format=`` kwarg on ``chat.completions.create()``.
|
||||
|
||||
Validation is performed **before** the API call via
|
||||
``_validate_response_format()``. If the dict is invalid (unknown type,
|
||||
missing ``json_schema`` key for ``type="json_schema"``, etc.) the executor
|
||||
enqueues an error message and returns early without calling the API.
|
||||
|
||||
When ``response_format`` is ``None`` (the default) the kwarg is omitted
|
||||
entirely from the API call so older / strict providers do not receive an
|
||||
unexpected field.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@ -77,6 +93,53 @@ def _reasoning_supported(model: str) -> bool:
|
||||
return any(pat in model_lower for pat in _HERMES4_PATTERNS)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# response_format validation (#498)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_VALID_RESPONSE_FORMAT_TYPES: frozenset[str] = frozenset(
|
||||
{"json_schema", "json_object", "text"}
|
||||
)
|
||||
|
||||
|
||||
def _validate_response_format(rf: dict) -> "str | None":
|
||||
"""Validate a ``response_format`` dict before forwarding to the API.
|
||||
|
||||
Returns ``None`` if *rf* is valid, or an error message string describing
|
||||
the first validation failure found.
|
||||
|
||||
Valid ``type`` values are ``"json_schema"``, ``"json_object"``, and
|
||||
``"text"``. For ``type="json_schema"``, the dict must also contain a
|
||||
``"json_schema"`` key whose value is a dict with at least a ``"name"``
|
||||
key (str). If ``json_schema.schema`` is present it must be a dict.
|
||||
|
||||
Examples::
|
||||
|
||||
>>> _validate_response_format({"type": "json_object"}) is None
|
||||
True
|
||||
>>> _validate_response_format({"type": "bad"}) is not None
|
||||
True
|
||||
"""
|
||||
rf_type = rf.get("type")
|
||||
if rf_type not in _VALID_RESPONSE_FORMAT_TYPES:
|
||||
return (
|
||||
f"type must be one of {sorted(_VALID_RESPONSE_FORMAT_TYPES)!r}, "
|
||||
f"got {rf_type!r}"
|
||||
)
|
||||
|
||||
if rf_type == "json_schema":
|
||||
js = rf.get("json_schema")
|
||||
if not isinstance(js, dict):
|
||||
return "json_schema must be a dict when type='json_schema'"
|
||||
if not isinstance(js.get("name"), str):
|
||||
return "json_schema.name must be a string"
|
||||
schema = js.get("schema")
|
||||
if schema is not None and not isinstance(schema, dict):
|
||||
return "json_schema.schema must be a dict if present"
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ProviderConfig — per-provider / per-model capability flags
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -142,6 +205,16 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
heartbeat:
|
||||
Optional ``HeartbeatLoop`` instance used to surface the current
|
||||
task description in the platform UI.
|
||||
response_format:
|
||||
Optional OpenAI-native ``response_format`` dict forwarded verbatim
|
||||
to ``chat.completions.create()``. Supported types:
|
||||
``{"type": "json_schema", "json_schema": {"name": ..., "schema": {...}}}
|
||||
``
|
||||
``{"type": "json_object"}``
|
||||
``{"type": "text"}``
|
||||
When ``None`` (default) the parameter is omitted from the API call.
|
||||
Invalid dicts cause ``execute()`` to enqueue an error and return
|
||||
early without calling the API.
|
||||
_client:
|
||||
Inject a pre-built ``AsyncOpenAI`` (or compatible mock) — for
|
||||
testing only. When provided, ``base_url`` and ``api_key`` are
|
||||
@ -155,11 +228,13 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
base_url: str | None = None,
|
||||
api_key: str | None = None,
|
||||
heartbeat: "HeartbeatLoop | None" = None,
|
||||
response_format: "dict | None" = None,
|
||||
_client: Any = None,
|
||||
) -> None:
|
||||
self.model = model
|
||||
self.system_prompt = system_prompt
|
||||
self._heartbeat = heartbeat
|
||||
self._response_format = response_format
|
||||
self._provider = ProviderConfig(model)
|
||||
|
||||
if _client is not None:
|
||||
@ -262,18 +337,34 @@ class HermesA2AExecutor(AgentExecutor):
|
||||
|
||||
messages = self._build_messages(user_input)
|
||||
|
||||
# Validate response_format before hitting the API — invalid dicts
|
||||
# enqueue an error and return early without making an API call.
|
||||
if self._response_format is not None:
|
||||
detail = _validate_response_format(self._response_format)
|
||||
if detail is not None:
|
||||
await event_queue.enqueue_event(
|
||||
new_agent_text_message(f"Error: invalid response_format — {detail}")
|
||||
)
|
||||
return
|
||||
|
||||
# Only Hermes 4 entries get extra_body — sending it to Hermes 3
|
||||
# or other models is a no-op at best; a 400 at worst.
|
||||
extra_body: dict | None = None
|
||||
if self._provider.reasoning_supported:
|
||||
extra_body = {"reasoning": {"enabled": True}}
|
||||
|
||||
# Build create() kwargs; omit response_format entirely when None so
|
||||
# strict / older providers do not receive an unexpected field.
|
||||
create_kwargs: dict = {
|
||||
"model": self.model,
|
||||
"messages": messages,
|
||||
"extra_body": extra_body,
|
||||
}
|
||||
if self._response_format is not None:
|
||||
create_kwargs["response_format"] = self._response_format
|
||||
|
||||
try:
|
||||
response = await self._client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
extra_body=extra_body,
|
||||
)
|
||||
response = await self._client.chat.completions.create(**create_kwargs)
|
||||
|
||||
choice = response.choices[0]
|
||||
content: str = choice.message.content or ""
|
||||
|
||||
@ -4,12 +4,15 @@ Coverage targets
|
||||
----------------
|
||||
- _reasoning_supported() — model name pattern detection
|
||||
- ProviderConfig — capability flags derived from model name
|
||||
- HermesA2AExecutor.__init__ — field assignment + client injection
|
||||
- _validate_response_format() — valid types, invalid type, missing fields (#498)
|
||||
- HermesA2AExecutor.__init__ — field assignment + client injection,
|
||||
response_format stored (#498)
|
||||
- HermesA2AExecutor._build_messages — system prompt + user turn assembly
|
||||
- HermesA2AExecutor._log_reasoning — OTEL span emission + swallowed errors
|
||||
- HermesA2AExecutor.execute — happy path, empty input, API error,
|
||||
Hermes 4 extra_body, Hermes 3 no extra_body,
|
||||
reasoning not in reply, reasoning_details
|
||||
reasoning not in reply, reasoning_details,
|
||||
response_format forwarded / omitted / invalid (#498)
|
||||
- HermesA2AExecutor.cancel — TaskStatusUpdateEvent emitted
|
||||
|
||||
The ``openai`` module is stubbed in sys.modules so no real API call is made.
|
||||
@ -70,6 +73,7 @@ from hermes_executor import ( # noqa: E402
|
||||
ProviderConfig,
|
||||
_HERMES4_PATTERNS,
|
||||
_reasoning_supported,
|
||||
_validate_response_format,
|
||||
)
|
||||
|
||||
|
||||
@ -699,3 +703,147 @@ async def test_no_system_prompt_only_user_message():
|
||||
msgs = mock_client.chat.completions.create.call_args[1]["messages"]
|
||||
assert len(msgs) == 1
|
||||
assert msgs[0]["role"] == "user"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _validate_response_format — issue #498
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_validate_response_format_json_schema_valid():
|
||||
"""Valid json_schema dict (with name and schema) returns None."""
|
||||
rf = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "my_schema",
|
||||
"schema": {"type": "object", "properties": {}},
|
||||
},
|
||||
}
|
||||
assert _validate_response_format(rf) is None
|
||||
|
||||
|
||||
def test_validate_response_format_json_object_valid():
|
||||
"""{"type": "json_object"} returns None (no sub-fields required)."""
|
||||
assert _validate_response_format({"type": "json_object"}) is None
|
||||
|
||||
|
||||
def test_validate_response_format_text_valid():
|
||||
"""{"type": "text"} returns None."""
|
||||
assert _validate_response_format({"type": "text"}) is None
|
||||
|
||||
|
||||
def test_validate_response_format_invalid_type():
|
||||
"""An unknown type value returns a non-None error string."""
|
||||
result = _validate_response_format({"type": "yaml_schema"})
|
||||
assert result is not None
|
||||
assert isinstance(result, str)
|
||||
assert "yaml_schema" in result
|
||||
|
||||
|
||||
def test_validate_response_format_missing_json_schema_key():
|
||||
"""type='json_schema' but no 'json_schema' key → error string."""
|
||||
result = _validate_response_format({"type": "json_schema"})
|
||||
assert result is not None
|
||||
assert "json_schema" in result
|
||||
|
||||
|
||||
def test_validate_response_format_json_schema_schema_not_dict():
|
||||
"""json_schema.schema present but not a dict → error string."""
|
||||
rf = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {"name": "s", "schema": "not-a-dict"},
|
||||
}
|
||||
result = _validate_response_format(rf)
|
||||
assert result is not None
|
||||
assert "schema" in result
|
||||
|
||||
|
||||
def test_validate_response_format_json_schema_missing_name():
|
||||
"""json_schema present but missing 'name' key → error string."""
|
||||
rf = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {"schema": {"type": "object"}},
|
||||
}
|
||||
result = _validate_response_format(rf)
|
||||
assert result is not None
|
||||
assert "name" in result
|
||||
|
||||
|
||||
def test_constructor_response_format_stored():
|
||||
"""response_format kwarg is stored as _response_format attribute."""
|
||||
rf = {"type": "json_object"}
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
response_format=rf,
|
||||
_client=MagicMock(),
|
||||
)
|
||||
assert executor._response_format is rf
|
||||
|
||||
|
||||
def test_constructor_no_response_format_is_none():
|
||||
"""Omitting response_format → _response_format is None."""
|
||||
executor = HermesA2AExecutor(model="hermes-4", _client=MagicMock())
|
||||
assert executor._response_format is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_response_format_in_request():
|
||||
"""Valid response_format is forwarded as a kwarg to the API call."""
|
||||
rf = {"type": "json_object"}
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(
|
||||
return_value=_make_api_response('{"answer": 42}')
|
||||
)
|
||||
executor = HermesA2AExecutor(
|
||||
model="nousresearch/hermes-3-llama-3.1-70b",
|
||||
response_format=rf,
|
||||
_client=mock_client,
|
||||
)
|
||||
|
||||
await executor.execute(_make_context("hello"), AsyncMock())
|
||||
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
assert call_kwargs.get("response_format") == rf
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_response_format_omitted_when_none():
|
||||
"""When response_format is None, it is NOT present in the API call kwargs."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(
|
||||
return_value=_make_api_response("ok")
|
||||
)
|
||||
executor = HermesA2AExecutor(
|
||||
model="nousresearch/hermes-3-llama-3.1-70b",
|
||||
response_format=None,
|
||||
_client=mock_client,
|
||||
)
|
||||
|
||||
await executor.execute(_make_context("hello"), AsyncMock())
|
||||
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
assert "response_format" not in call_kwargs
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_invalid_response_format_returns_error_no_api_call():
|
||||
"""Invalid response_format → error enqueued, API create() NOT called."""
|
||||
rf = {"type": "unsupported_format"}
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock()
|
||||
executor = HermesA2AExecutor(
|
||||
model="hermes-4",
|
||||
response_format=rf,
|
||||
_client=mock_client,
|
||||
)
|
||||
|
||||
eq = AsyncMock()
|
||||
await executor.execute(_make_context("hello"), eq)
|
||||
|
||||
# Should have enqueued an error message
|
||||
eq.enqueue_event.assert_called_once()
|
||||
enqueued = eq.enqueue_event.call_args[0][0]
|
||||
assert "Error: invalid response_format" in enqueued
|
||||
|
||||
# API must NOT have been called
|
||||
mock_client.chat.completions.create.assert_not_called()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user