diff --git a/workspace-template/hermes_executor.py b/workspace-template/hermes_executor.py index 953dc063..8fff95e3 100644 --- a/workspace-template/hermes_executor.py +++ b/workspace-template/hermes_executor.py @@ -57,6 +57,22 @@ Hermes 3 / unknown models No ``extra_body`` is sent. The response is processed identically to any other OpenAI-compat model call. The Hermes 3 path is exercised by the existing adapter test suite and must remain unchanged. + +response_format / structured output (#498) +------------------------------------------ +Pass ``response_format={"type": "json_schema", "json_schema": {...}}`` (or +``{"type": "json_object"}`` / ``{"type": "text"}``) to request structured +output from the upstream provider. The value is forwarded verbatim as the +``response_format=`` kwarg on ``chat.completions.create()``. + +Validation is performed **before** the API call via +``_validate_response_format()``. If the dict is invalid (unknown type, +missing ``json_schema`` key for ``type="json_schema"``, etc.) the executor +enqueues an error message and returns early without calling the API. + +When ``response_format`` is ``None`` (the default) the kwarg is omitted +entirely from the API call so older / strict providers do not receive an +unexpected field. """ from __future__ import annotations @@ -108,6 +124,53 @@ def _reasoning_supported(model: str) -> bool: return any(pat in model_lower for pat in _HERMES4_PATTERNS) +# --------------------------------------------------------------------------- +# response_format validation (#498) +# --------------------------------------------------------------------------- + +_VALID_RESPONSE_FORMAT_TYPES: frozenset[str] = frozenset( + {"json_schema", "json_object", "text"} +) + + +def _validate_response_format(rf: dict) -> "str | None": + """Validate a ``response_format`` dict before forwarding to the API. + + Returns ``None`` if *rf* is valid, or an error message string describing + the first validation failure found. + + Valid ``type`` values are ``"json_schema"``, ``"json_object"``, and + ``"text"``. For ``type="json_schema"``, the dict must also contain a + ``"json_schema"`` key whose value is a dict with at least a ``"name"`` + key (str). If ``json_schema.schema`` is present it must be a dict. + + Examples:: + + >>> _validate_response_format({"type": "json_object"}) is None + True + >>> _validate_response_format({"type": "bad"}) is not None + True + """ + rf_type = rf.get("type") + if rf_type not in _VALID_RESPONSE_FORMAT_TYPES: + return ( + f"type must be one of {sorted(_VALID_RESPONSE_FORMAT_TYPES)!r}, " + f"got {rf_type!r}" + ) + + if rf_type == "json_schema": + js = rf.get("json_schema") + if not isinstance(js, dict): + return "json_schema must be a dict when type='json_schema'" + if not isinstance(js.get("name"), str): + return "json_schema.name must be a string" + schema = js.get("schema") + if schema is not None and not isinstance(schema, dict): + return "json_schema.schema must be a dict if present" + + return None + + # --------------------------------------------------------------------------- # ProviderConfig — per-provider / per-model capability flags # --------------------------------------------------------------------------- @@ -174,6 +237,15 @@ class HermesA2AExecutor(AgentExecutor): heartbeat: Optional ``HeartbeatLoop`` instance used to surface the current task description in the platform UI. + response_format: + Optional OpenAI-native ``response_format`` dict forwarded verbatim + to ``chat.completions.create()``. Supported types: + ``{"type": "json_schema", "json_schema": {"name": ..., "schema": {...}}}`` + ``{"type": "json_object"}`` + ``{"type": "text"}`` + When ``None`` (default) the parameter is omitted from the API call. + Invalid dicts cause ``execute()`` to enqueue an error and return + early without calling the API. tools: Optional list of OpenAI-format tool definitions to pass via the native ``tools`` parameter. Each entry must have ``"type"`` and @@ -193,12 +265,14 @@ class HermesA2AExecutor(AgentExecutor): base_url: str | None = None, api_key: str | None = None, heartbeat: "HeartbeatLoop | None" = None, + response_format: "dict | None" = None, tools: list[dict] | None = None, _client: Any = None, ) -> None: self.model = model self.system_prompt = system_prompt self._heartbeat = heartbeat + self._response_format = response_format self._provider = ProviderConfig(model) # Empty list and None are treated identically: no tools → omit the # parameter from the API call rather than sending tools=[]. @@ -309,19 +383,31 @@ class HermesA2AExecutor(AgentExecutor): messages = self._build_messages(user_input) + # Validate response_format before hitting the API — invalid dicts + # enqueue an error and return early without making an API call. + if self._response_format is not None: + detail = _validate_response_format(self._response_format) + if detail is not None: + await event_queue.enqueue_event( + new_agent_text_message(f"Error: invalid response_format — {detail}") + ) + return + # Only Hermes 4 entries get extra_body — sending it to Hermes 3 # or other models is a no-op at best; a 400 at worst. extra_body: dict | None = None if self._provider.reasoning_supported: extra_body = {"reasoning": {"enabled": True}} - # Build call kwargs — omit ``tools`` entirely when the list is empty - # so providers that reject tools=[] don't get a 400. + # Build create() kwargs; omit response_format and tools entirely when + # not set so strict / older providers do not receive unexpected fields. create_kwargs: dict = { "model": self.model, "messages": messages, "extra_body": extra_body, } + if self._response_format is not None: + create_kwargs["response_format"] = self._response_format if self._tools: create_kwargs["tools"] = self._tools diff --git a/workspace-template/tests/test_hermes_executor.py b/workspace-template/tests/test_hermes_executor.py index ad891de7..cd95158e 100644 --- a/workspace-template/tests/test_hermes_executor.py +++ b/workspace-template/tests/test_hermes_executor.py @@ -4,12 +4,15 @@ Coverage targets ---------------- - _reasoning_supported() — model name pattern detection - ProviderConfig — capability flags derived from model name -- HermesA2AExecutor.__init__ — field assignment, client injection, tools (#497) +- _validate_response_format() — valid types, invalid type, missing fields (#498) +- HermesA2AExecutor.__init__ — field assignment + client injection, + response_format stored (#498), tools (#497) - HermesA2AExecutor._build_messages — system prompt + user turn assembly - HermesA2AExecutor._log_reasoning — OTEL span emission + swallowed errors - HermesA2AExecutor.execute — happy path, empty input, API error, Hermes 4 extra_body, Hermes 3 no extra_body, reasoning not in reply, reasoning_details, + response_format forwarded / omitted / invalid (#498), tools serialized in request body (#497), empty tools → no tools field (#497), tool_call response → JSON text (#497) @@ -73,6 +76,7 @@ from hermes_executor import ( # noqa: E402 ProviderConfig, _HERMES4_PATTERNS, _reasoning_supported, + _validate_response_format, ) @@ -704,6 +708,150 @@ async def test_no_system_prompt_only_user_message(): assert msgs[0]["role"] == "user" +# --------------------------------------------------------------------------- +# _validate_response_format — issue #498 +# --------------------------------------------------------------------------- + + +def test_validate_response_format_json_schema_valid(): + """Valid json_schema dict (with name and schema) returns None.""" + rf = { + "type": "json_schema", + "json_schema": { + "name": "my_schema", + "schema": {"type": "object", "properties": {}}, + }, + } + assert _validate_response_format(rf) is None + + +def test_validate_response_format_json_object_valid(): + """{"type": "json_object"} returns None (no sub-fields required).""" + assert _validate_response_format({"type": "json_object"}) is None + + +def test_validate_response_format_text_valid(): + """{"type": "text"} returns None.""" + assert _validate_response_format({"type": "text"}) is None + + +def test_validate_response_format_invalid_type(): + """An unknown type value returns a non-None error string.""" + result = _validate_response_format({"type": "yaml_schema"}) + assert result is not None + assert isinstance(result, str) + assert "yaml_schema" in result + + +def test_validate_response_format_missing_json_schema_key(): + """type='json_schema' but no 'json_schema' key → error string.""" + result = _validate_response_format({"type": "json_schema"}) + assert result is not None + assert "json_schema" in result + + +def test_validate_response_format_json_schema_schema_not_dict(): + """json_schema.schema present but not a dict → error string.""" + rf = { + "type": "json_schema", + "json_schema": {"name": "s", "schema": "not-a-dict"}, + } + result = _validate_response_format(rf) + assert result is not None + assert "schema" in result + + +def test_validate_response_format_json_schema_missing_name(): + """json_schema present but missing 'name' key → error string.""" + rf = { + "type": "json_schema", + "json_schema": {"schema": {"type": "object"}}, + } + result = _validate_response_format(rf) + assert result is not None + assert "name" in result + + +def test_constructor_response_format_stored(): + """response_format kwarg is stored as _response_format attribute.""" + rf = {"type": "json_object"} + executor = HermesA2AExecutor( + model="hermes-4", + response_format=rf, + _client=MagicMock(), + ) + assert executor._response_format is rf + + +def test_constructor_no_response_format_is_none(): + """Omitting response_format → _response_format is None.""" + executor = HermesA2AExecutor(model="hermes-4", _client=MagicMock()) + assert executor._response_format is None + + +@pytest.mark.asyncio +async def test_execute_response_format_in_request(): + """Valid response_format is forwarded as a kwarg to the API call.""" + rf = {"type": "json_object"} + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock( + return_value=_make_api_response('{"answer": 42}') + ) + executor = HermesA2AExecutor( + model="nousresearch/hermes-3-llama-3.1-70b", + response_format=rf, + _client=mock_client, + ) + + await executor.execute(_make_context("hello"), AsyncMock()) + + call_kwargs = mock_client.chat.completions.create.call_args[1] + assert call_kwargs.get("response_format") == rf + + +@pytest.mark.asyncio +async def test_execute_response_format_omitted_when_none(): + """When response_format is None, it is NOT present in the API call kwargs.""" + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock( + return_value=_make_api_response("ok") + ) + executor = HermesA2AExecutor( + model="nousresearch/hermes-3-llama-3.1-70b", + response_format=None, + _client=mock_client, + ) + + await executor.execute(_make_context("hello"), AsyncMock()) + + call_kwargs = mock_client.chat.completions.create.call_args[1] + assert "response_format" not in call_kwargs + + +@pytest.mark.asyncio +async def test_execute_invalid_response_format_returns_error_no_api_call(): + """Invalid response_format → error enqueued, API create() NOT called.""" + rf = {"type": "unsupported_format"} + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock() + executor = HermesA2AExecutor( + model="hermes-4", + response_format=rf, + _client=mock_client, + ) + + eq = AsyncMock() + await executor.execute(_make_context("hello"), eq) + + # Should have enqueued an error message + eq.enqueue_event.assert_called_once() + enqueued = eq.enqueue_event.call_args[0][0] + assert "Error: invalid response_format" in enqueued + + # API must NOT have been called + mock_client.chat.completions.create.assert_not_called() + + # --------------------------------------------------------------------------- # Native tools parameter — issue #497 # ---------------------------------------------------------------------------