feat(hermes): plumb response_format=json_schema for structured output (#498)

2026-04-17 07:03:45 +00:00 · 2026-04-17 07:03:45 +00:00 · 7793edee33
commit 7793edee33
parent 816ea3e565 fad6aad734
2 changed files with 237 additions and 3 deletions
--- a/workspace-template/hermes_executor.py
+++ b/workspace-template/hermes_executor.py
@ -57,6 +57,22 @@ Hermes 3 / unknown models
 No ``extra_body`` is sent.  The response is processed identically to any
 other OpenAI-compat model call.  The Hermes 3 path is exercised by the
 existing adapter test suite and must remain unchanged.
+
+response_format / structured output (#498)
+------------------------------------------
+Pass ``response_format={"type": "json_schema", "json_schema": {...}}`` (or
+``{"type": "json_object"}`` / ``{"type": "text"}``) to request structured
+output from the upstream provider.  The value is forwarded verbatim as the
+``response_format=`` kwarg on ``chat.completions.create()``.
+
+Validation is performed **before** the API call via
+``_validate_response_format()``.  If the dict is invalid (unknown type,
+missing ``json_schema`` key for ``type="json_schema"``, etc.) the executor
+enqueues an error message and returns early without calling the API.
+
+When ``response_format`` is ``None`` (the default) the kwarg is omitted
+entirely from the API call so older / strict providers do not receive an
+unexpected field.
 """

 from __future__ import annotations
@ -108,6 +124,53 @@ def _reasoning_supported(model: str) -> bool:
    return any(pat in model_lower for pat in _HERMES4_PATTERNS)


+# ---------------------------------------------------------------------------
+# response_format validation (#498)
+# ---------------------------------------------------------------------------
+
+_VALID_RESPONSE_FORMAT_TYPES: frozenset[str] = frozenset(
+    {"json_schema", "json_object", "text"}
+)
+
+
+def _validate_response_format(rf: dict) -> "str | None":
+    """Validate a ``response_format`` dict before forwarding to the API.
+
+    Returns ``None`` if *rf* is valid, or an error message string describing
+    the first validation failure found.
+
+    Valid ``type`` values are ``"json_schema"``, ``"json_object"``, and
+    ``"text"``.  For ``type="json_schema"``, the dict must also contain a
+    ``"json_schema"`` key whose value is a dict with at least a ``"name"``
+    key (str).  If ``json_schema.schema`` is present it must be a dict.
+
+    Examples::
+
+        >>> _validate_response_format({"type": "json_object"}) is None
+        True
+        >>> _validate_response_format({"type": "bad"}) is not None
+        True
+    """
+    rf_type = rf.get("type")
+    if rf_type not in _VALID_RESPONSE_FORMAT_TYPES:
+        return (
+            f"type must be one of {sorted(_VALID_RESPONSE_FORMAT_TYPES)!r}, "
+            f"got {rf_type!r}"
+        )
+
+    if rf_type == "json_schema":
+        js = rf.get("json_schema")
+        if not isinstance(js, dict):
+            return "json_schema must be a dict when type='json_schema'"
+        if not isinstance(js.get("name"), str):
+            return "json_schema.name must be a string"
+        schema = js.get("schema")
+        if schema is not None and not isinstance(schema, dict):
+            return "json_schema.schema must be a dict if present"
+
+    return None
+
+
 # ---------------------------------------------------------------------------
 # ProviderConfig — per-provider / per-model capability flags
 # ---------------------------------------------------------------------------
@ -174,6 +237,15 @@ class HermesA2AExecutor(AgentExecutor):
    heartbeat:
        Optional ``HeartbeatLoop`` instance used to surface the current
        task description in the platform UI.
+    response_format:
+        Optional OpenAI-native ``response_format`` dict forwarded verbatim
+        to ``chat.completions.create()``.  Supported types:
+        ``{"type": "json_schema", "json_schema": {"name": ..., "schema": {...}}}``
+        ``{"type": "json_object"}``
+        ``{"type": "text"}``
+        When ``None`` (default) the parameter is omitted from the API call.
+        Invalid dicts cause ``execute()`` to enqueue an error and return
+        early without calling the API.
    tools:
        Optional list of OpenAI-format tool definitions to pass via the
        native ``tools`` parameter.  Each entry must have ``"type"`` and
@ -193,12 +265,14 @@ class HermesA2AExecutor(AgentExecutor):
        base_url: str | None = None,
        api_key: str | None = None,
        heartbeat: "HeartbeatLoop | None" = None,
+        response_format: "dict | None" = None,
        tools: list[dict] | None = None,
        _client: Any = None,
    ) -> None:
        self.model = model
        self.system_prompt = system_prompt
        self._heartbeat = heartbeat
+        self._response_format = response_format
        self._provider = ProviderConfig(model)
        # Empty list and None are treated identically: no tools → omit the
        # parameter from the API call rather than sending tools=[].
@ -309,19 +383,31 @@ class HermesA2AExecutor(AgentExecutor):

        messages = self._build_messages(user_input)

+        # Validate response_format before hitting the API — invalid dicts
+        # enqueue an error and return early without making an API call.
+        if self._response_format is not None:
+            detail = _validate_response_format(self._response_format)
+            if detail is not None:
+                await event_queue.enqueue_event(
+                    new_agent_text_message(f"Error: invalid response_format — {detail}")
+                )
+                return
+
        # Only Hermes 4 entries get extra_body — sending it to Hermes 3
        # or other models is a no-op at best; a 400 at worst.
        extra_body: dict | None = None
        if self._provider.reasoning_supported:
            extra_body = {"reasoning": {"enabled": True}}

-        # Build call kwargs — omit ``tools`` entirely when the list is empty
-        # so providers that reject tools=[] don't get a 400.
+        # Build create() kwargs; omit response_format and tools entirely when
+        # not set so strict / older providers do not receive unexpected fields.
        create_kwargs: dict = {
            "model": self.model,
            "messages": messages,
            "extra_body": extra_body,
        }
+        if self._response_format is not None:
+            create_kwargs["response_format"] = self._response_format
        if self._tools:
            create_kwargs["tools"] = self._tools

--- a/workspace-template/tests/test_hermes_executor.py
+++ b/workspace-template/tests/test_hermes_executor.py
@ -4,12 +4,15 @@ Coverage targets
 ----------------
 - _reasoning_supported()        — model name pattern detection
 - ProviderConfig                — capability flags derived from model name
- HermesA2AExecutor.__init__   — field assignment, client injection, tools (#497)
+- _validate_response_format()  — valid types, invalid type, missing fields (#498)
+- HermesA2AExecutor.__init__   — field assignment + client injection,
+                                  response_format stored (#498), tools (#497)
 - HermesA2AExecutor._build_messages — system prompt + user turn assembly
 - HermesA2AExecutor._log_reasoning  — OTEL span emission + swallowed errors
 - HermesA2AExecutor.execute    — happy path, empty input, API error,
                                  Hermes 4 extra_body, Hermes 3 no extra_body,
                                  reasoning not in reply, reasoning_details,
+                                  response_format forwarded / omitted / invalid (#498),
                                  tools serialized in request body (#497),
                                  empty tools → no tools field (#497),
                                  tool_call response → JSON text (#497)
@ -73,6 +76,7 @@ from hermes_executor import (  # noqa: E402
    ProviderConfig,
    _HERMES4_PATTERNS,
    _reasoning_supported,
+    _validate_response_format,
 )


@ -704,6 +708,150 @@ async def test_no_system_prompt_only_user_message():
    assert msgs[0]["role"] == "user"


+# ---------------------------------------------------------------------------
+# _validate_response_format — issue #498
+# ---------------------------------------------------------------------------
+
+
+def test_validate_response_format_json_schema_valid():
+    """Valid json_schema dict (with name and schema) returns None."""
+    rf = {
+        "type": "json_schema",
+        "json_schema": {
+            "name": "my_schema",
+            "schema": {"type": "object", "properties": {}},
+        },
+    }
+    assert _validate_response_format(rf) is None
+
+
+def test_validate_response_format_json_object_valid():
+    """{"type": "json_object"} returns None (no sub-fields required)."""
+    assert _validate_response_format({"type": "json_object"}) is None
+
+
+def test_validate_response_format_text_valid():
+    """{"type": "text"} returns None."""
+    assert _validate_response_format({"type": "text"}) is None
+
+
+def test_validate_response_format_invalid_type():
+    """An unknown type value returns a non-None error string."""
+    result = _validate_response_format({"type": "yaml_schema"})
+    assert result is not None
+    assert isinstance(result, str)
+    assert "yaml_schema" in result
+
+
+def test_validate_response_format_missing_json_schema_key():
+    """type='json_schema' but no 'json_schema' key → error string."""
+    result = _validate_response_format({"type": "json_schema"})
+    assert result is not None
+    assert "json_schema" in result
+
+
+def test_validate_response_format_json_schema_schema_not_dict():
+    """json_schema.schema present but not a dict → error string."""
+    rf = {
+        "type": "json_schema",
+        "json_schema": {"name": "s", "schema": "not-a-dict"},
+    }
+    result = _validate_response_format(rf)
+    assert result is not None
+    assert "schema" in result
+
+
+def test_validate_response_format_json_schema_missing_name():
+    """json_schema present but missing 'name' key → error string."""
+    rf = {
+        "type": "json_schema",
+        "json_schema": {"schema": {"type": "object"}},
+    }
+    result = _validate_response_format(rf)
+    assert result is not None
+    assert "name" in result
+
+
+def test_constructor_response_format_stored():
+    """response_format kwarg is stored as _response_format attribute."""
+    rf = {"type": "json_object"}
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        response_format=rf,
+        _client=MagicMock(),
+    )
+    assert executor._response_format is rf
+
+
+def test_constructor_no_response_format_is_none():
+    """Omitting response_format → _response_format is None."""
+    executor = HermesA2AExecutor(model="hermes-4", _client=MagicMock())
+    assert executor._response_format is None
+
+
+@pytest.mark.asyncio
+async def test_execute_response_format_in_request():
+    """Valid response_format is forwarded as a kwarg to the API call."""
+    rf = {"type": "json_object"}
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response('{"answer": 42}')
+    )
+    executor = HermesA2AExecutor(
+        model="nousresearch/hermes-3-llama-3.1-70b",
+        response_format=rf,
+        _client=mock_client,
+    )
+
+    await executor.execute(_make_context("hello"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert call_kwargs.get("response_format") == rf
+
+
+@pytest.mark.asyncio
+async def test_execute_response_format_omitted_when_none():
+    """When response_format is None, it is NOT present in the API call kwargs."""
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=_make_api_response("ok")
+    )
+    executor = HermesA2AExecutor(
+        model="nousresearch/hermes-3-llama-3.1-70b",
+        response_format=None,
+        _client=mock_client,
+    )
+
+    await executor.execute(_make_context("hello"), AsyncMock())
+
+    call_kwargs = mock_client.chat.completions.create.call_args[1]
+    assert "response_format" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_execute_invalid_response_format_returns_error_no_api_call():
+    """Invalid response_format → error enqueued, API create() NOT called."""
+    rf = {"type": "unsupported_format"}
+    mock_client = MagicMock()
+    mock_client.chat.completions.create = AsyncMock()
+    executor = HermesA2AExecutor(
+        model="hermes-4",
+        response_format=rf,
+        _client=mock_client,
+    )
+
+    eq = AsyncMock()
+    await executor.execute(_make_context("hello"), eq)
+
+    # Should have enqueued an error message
+    eq.enqueue_event.assert_called_once()
+    enqueued = eq.enqueue_event.call_args[0][0]
+    assert "Error: invalid response_format" in enqueued
+
+    # API must NOT have been called
+    mock_client.chat.completions.create.assert_not_called()
+
+
 # ---------------------------------------------------------------------------
 # Native tools parameter — issue #497
 # ---------------------------------------------------------------------------