feat(hermes): plumb response_format=json_schema for structured output (#498)

Adds response_format support to HermesA2AExecutor so callers can request structured JSON output via the OpenAI-native response_format parameter. Changes: - _validate_response_format(): validates type (json_schema/json_object/text) and required sub-fields; returns None if valid, error message if invalid - HermesA2AExecutor.__init__: new response_format kwarg, stored as _response_format - execute(): validates before API call — invalid schema enqueues error and returns early without hitting Hermes API; valid and non-None adds response_format= to create_kwargs; None omits the field entirely Tests (12 new): - _validate_response_format: all valid types, invalid type, missing fields - constructor stores response_format correctly - valid response_format forwarded to API call - response_format omitted when None (no key in call kwargs) - invalid schema → error message enqueued, API not called Closes #498 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-17 01:19:51 +00:00 · 2026-04-17 01:19:51 +00:00 · 1d41f23ddd
commit 1d41f23ddd
parent 15f55f2fb0
2 changed files with 246 additions and 7 deletions
--- a/workspace-template/hermes_executor.py
+++ b/workspace-template/hermes_executor.py
@ -26,6 +26,22 @@ Hermes 3 / unknown models
 No ``extra_body`` is sent.  The response is processed identically to any
 other OpenAI-compat model call.  The Hermes 3 path is exercised by the
 existing adapter test suite and must remain unchanged.
+
+response_format / structured output (#498)
+------------------------------------------
+Pass ``response_format={"type": "json_schema", "json_schema": {...}}`` (or
+``{"type": "json_object"}`` / ``{"type": "text"}``) to request structured
+output from the upstream provider.  The value is forwarded verbatim as the
+``response_format=`` kwarg on ``chat.completions.create()``.
+
+Validation is performed **before** the API call via
+``_validate_response_format()``.  If the dict is invalid (unknown type,
+missing ``json_schema`` key for ``type="json_schema"``, etc.) the executor
+enqueues an error message and returns early without calling the API.
+
+When ``response_format`` is ``None`` (the default) the kwarg is omitted
+entirely from the API call so older / strict providers do not receive an
+unexpected field.
 """

 from __future__ import annotations
@ -77,6 +93,53 @@ def _reasoning_supported(model: str) -> bool:
    return any(pat in model_lower for pat in _HERMES4_PATTERNS)


+# ---------------------------------------------------------------------------
+# response_format validation (#498)
+# ---------------------------------------------------------------------------
+
+_VALID_RESPONSE_FORMAT_TYPES: frozenset[str] = frozenset(
+    {"json_schema", "json_object", "text"}
+)
+
+
+def _validate_response_format(rf: dict) -> "str | None":
+    """Validate a ``response_format`` dict before forwarding to the API.
+
+    Returns ``None`` if *rf* is valid, or an error message string describing
+    the first validation failure found.
+
+    Valid ``type`` values are ``"json_schema"``, ``"json_object"``, and
+    ``"text"``.  For ``type="json_schema"``, the dict must also contain a
+    ``"json_schema"`` key whose value is a dict with at least a ``"name"``
+    key (str).  If ``json_schema.schema`` is present it must be a dict.
+
+    Examples::
+
+        >>> _validate_response_format({"type": "json_object"}) is None
+        True
+        >>> _validate_response_format({"type": "bad"}) is not None
+        True
+    """
+    rf_type = rf.get("type")
+    if rf_type not in _VALID_RESPONSE_FORMAT_TYPES:
+        return (
+            f"type must be one of {sorted(_VALID_RESPONSE_FORMAT_TYPES)!r}, "
+            f"got {rf_type!r}"
+        )
+
+    if rf_type == "json_schema":
+        js = rf.get("json_schema")
+        if not isinstance(js, dict):
+            return "json_schema must be a dict when type='json_schema'"
+        if not isinstance(js.get("name"), str):
+            return "json_schema.name must be a string"
+        schema = js.get("schema")
+        if schema is not None and not isinstance(schema, dict):
+            return "json_schema.schema must be a dict if present"
+
+    return None
+
+
 # ---------------------------------------------------------------------------
 # ProviderConfig — per-provider / per-model capability flags
 # ---------------------------------------------------------------------------
@ -142,6 +205,16 @@ class HermesA2AExecutor(AgentExecutor):
    heartbeat:
        Optional ``HeartbeatLoop`` instance used to surface the current
        task description in the platform UI.
+    response_format:
+        Optional OpenAI-native ``response_format`` dict forwarded verbatim
+        to ``chat.completions.create()``.  Supported types:
+        ``{"type": "json_schema", "json_schema": {"name": ..., "schema": {...}}}
+``
+        ``{"type": "json_object"}``
+        ``{"type": "text"}``
+        When ``None`` (default) the parameter is omitted from the API call.
+        Invalid dicts cause ``execute()`` to enqueue an error and return
+        early without calling the API.
    _client:
        Inject a pre-built ``AsyncOpenAI`` (or compatible mock) — for
        testing only.  When provided, ``base_url`` and ``api_key`` are
@ -155,11 +228,13 @@ class HermesA2AExecutor(AgentExecutor):
        base_url: str | None = None,
        api_key: str | None = None,
        heartbeat: "HeartbeatLoop | None" = None,
+        response_format: "dict | None" = None,
        _client: Any = None,
    ) -> None:
        self.model = model
        self.system_prompt = system_prompt
        self._heartbeat = heartbeat
+        self._response_format = response_format
        self._provider = ProviderConfig(model)

        if _client is not None:
@ -262,18 +337,34 @@ class HermesA2AExecutor(AgentExecutor):

        messages = self._build_messages(user_input)

+        # Validate response_format before hitting the API — invalid dicts
+        # enqueue an error and return early without making an API call.
+        if self._response_format is not None:
+            detail = _validate_response_format(self._response_format)
+            if detail is not None:
+                await event_queue.enqueue_event(
+                    new_agent_text_message(f"Error: invalid response_format — {detail}")
+                )
+                return
+
        # Only Hermes 4 entries get extra_body — sending it to Hermes 3
        # or other models is a no-op at best; a 400 at worst.
        extra_body: dict | None = None
        if self._provider.reasoning_supported:
            extra_body = {"reasoning": {"enabled": True}}

+        # Build create() kwargs; omit response_format entirely when None so
+        # strict / older providers do not receive an unexpected field.
+        create_kwargs: dict = {
+            "model": self.model,
+            "messages": messages,
+            "extra_body": extra_body,
+        }
+        if self._response_format is not None:
+            create_kwargs["response_format"] = self._response_format
+
        try:
-            response = await self._client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                extra_body=extra_body,
-            )
+            response = await self._client.chat.completions.create(**create_kwargs)

            choice = response.choices[0]
            content: str = choice.message.content or ""
--- a/workspace-template/tests/test_hermes_executor.py
+++ b/workspace-template/tests/test_hermes_executor.py
@ -4,12 +4,15 @@ Coverage targets
 ----------------
 - _reasoning_supported()        — model name pattern detection
 - ProviderConfig                — capability flags derived from model name
- HermesA2AExecutor.__init__   — field assignment + client injection
+- _validate_response_format()  — valid types, invalid type, missing fields (#498)
+- HermesA2AExecutor.__init__   — field assignment + client injection,
+                                  response_format stored (#498)
 - HermesA2AExecutor._build_messages — system prompt + user turn assembly
 - HermesA2AExecutor._log_reasoning  — OTEL span emission + swallowed errors
 - HermesA2AExecutor.execute    — happy path, empty input, API error,
                                  Hermes 4 extra_body, Hermes 3 no extra_body,
-                                  reasoning not in reply, reasoning_details
+                                  reasoning not in reply, reasoning_details,
+                                  response_format forwarded / omitted / invalid (#498)
 - HermesA2AExecutor.cancel     — TaskStatusUpdateEvent emitted

 The ``openai`` module is stubbed in sys.modules so no real API call is made.
@ -70,6 +73,7 @@ from hermes_executor import (  # noqa: E402
    ProviderConfig,
    _HERMES4_PATTERNS,
    _reasoning_supported,
+    _validate_response_format,
 )


@ -699,3 +703,147 @@ async def test_no_system_prompt_only_user_message():
    msgs = mock_client.chat.completions.create.call_args[1]["messages"]
    assert len(msgs) == 1
    assert msgs[0]["role"] == "user"
+
+
+# ---------------------------------------------------------------------------
+# _validate_response_format — issue #498
+# ---------------------------------------------------------------------------
+
+
+def test_validate_response_format_json_schema_valid():
+    """Valid json_schema dict (with name and schema) returns None."""
+    rf = {
+        "type": "json_schema",
+        "json_schema": {
+            "name": "my_schema",
+            "schema": {"type": "object", "properties": {}},
+        },
+    }
+    assert _validate_response_format(rf) is None
+
+
+def test_validate_response_format_json_object_valid():
+    """{"type": "json_object"} returns None (no sub-fields required)."""
+    assert _validate_response_format({"type": "json_object"}) is None
+
+
+def test_validate_response_format_text_valid():
+    """{"type": "text"} returns None."""
+    assert _validate_response_format({"type": "text"}) is None
+
+
+def test_validate_response_format_invalid_type():
+    """An unknown type value returns a non-None error string."""
+    result = _validate_response_format({"type": "yaml_schema"})
+    assert result is not None
+    assert isinstance(result, str)
+    assert "yaml_schema" in result
+
+
+def test_validate_response_format_missing_json_schema_key():
+    """type='json_schema' but no 'json_schema' key → error string."""
+    result = _validate_response_format({"type": "json_schema"})
+    assert result is not None
+    assert "json_schema" in result
+
+
+def test_validate_response_format_json_schema_schema_not_dict():
+    """json_schema.schema present but not a dict → error string."""
+    rf = {
+        "type": "json_schema",
+        "json_schema": {"name": "s", "schema": "not-a-dict"},
+    }
+    result = _validate_response_format(rf)
+    assert result is not None
+    assert "schema" in result
+
+
+def test_validate_response_format_json_schema_missing_name():
+    """json_schema present but missing 'name' key → error string."""
+    rf = {
+        "type": "json_schema",
+        "json_schema": {"schema": {"type": "object"}},
+    }
+    result = _validate_response_format(rf)
+    assert result is not None
+    assert "name" in result
+
+
+def test_constructor_response_format_stored():
+    """response_format kwarg is stored as _response_format attribute."""
+    rf = {"type": "json_object"}
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        response_format=rf,
+        _client=MagicMock(),
+    )
+    assert executor._response_format is rf
+
+
+def test_constructor_no_response_format_is_none():
+    """Omitting response_format → _response_format is None."""
+    executor = HermesA2AExecutor(model="hermes-4", _client=MagicMock())
+    assert executor._response_format is None
+
+
+@pytest.mark.asyncio
+async def test_execute_response_format_in_request():
+    """Valid response_format is forwarded as a kwarg to the API call."""
+    rf = {"type": "json_object"}
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response('{"answer": 42}')
+    )
+    executor = HermesA2AExecutor(
+        model="nousresearch/hermes-3-llama-3.1-70b",
+        response_format=rf,
+        _client=mock_client,
+    )
+
+    await executor.execute(_make_context("hello"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert call_kwargs.get("response_format") == rf
+
+
+@pytest.mark.asyncio
+async def test_execute_response_format_omitted_when_none():
+    """When response_format is None, it is NOT present in the API call kwargs."""
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response("ok")
+    )
+    executor = HermesA2AExecutor(
+        model="nousresearch/hermes-3-llama-3.1-70b",
+        response_format=None,
+        _client=mock_client,
+    )
+
+    await executor.execute(_make_context("hello"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert "response_format" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_execute_invalid_response_format_returns_error_no_api_call():
+    """Invalid response_format → error enqueued, API create() NOT called."""
+    rf = {"type": "unsupported_format"}
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock()
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        response_format=rf,
+        _client=mock_client,
+    )
+
+    eq = AsyncMock()
+    await executor.execute(_make_context("hello"), eq)
+
+    # Should have enqueued an error message
+    eq.enqueue_event.assert_called_once()
+    enqueued = eq.enqueue_event.call_args[0][0]
+    assert "Error: invalid response_format" in enqueued
+
+    # API must NOT have been called
+    mock_client.chat.completions.create.assert_not_called()