diff --git a/workspace-template/hermes_executor.py b/workspace-template/hermes_executor.py index 07aa4648..06a2eea0 100644 --- a/workspace-template/hermes_executor.py +++ b/workspace-template/hermes_executor.py @@ -26,6 +26,22 @@ Hermes 3 / unknown models No ``extra_body`` is sent. The response is processed identically to any other OpenAI-compat model call. The Hermes 3 path is exercised by the existing adapter test suite and must remain unchanged. + +response_format / structured output (#498) +------------------------------------------ +Pass ``response_format={"type": "json_schema", "json_schema": {...}}`` (or +``{"type": "json_object"}`` / ``{"type": "text"}``) to request structured +output from the upstream provider. The value is forwarded verbatim as the +``response_format=`` kwarg on ``chat.completions.create()``. + +Validation is performed **before** the API call via +``_validate_response_format()``. If the dict is invalid (unknown type, +missing ``json_schema`` key for ``type="json_schema"``, etc.) the executor +enqueues an error message and returns early without calling the API. + +When ``response_format`` is ``None`` (the default) the kwarg is omitted +entirely from the API call so older / strict providers do not receive an +unexpected field. """ from __future__ import annotations @@ -77,6 +93,53 @@ def _reasoning_supported(model: str) -> bool: return any(pat in model_lower for pat in _HERMES4_PATTERNS) +# --------------------------------------------------------------------------- +# response_format validation (#498) +# --------------------------------------------------------------------------- + +_VALID_RESPONSE_FORMAT_TYPES: frozenset[str] = frozenset( + {"json_schema", "json_object", "text"} +) + + +def _validate_response_format(rf: dict) -> "str | None": + """Validate a ``response_format`` dict before forwarding to the API. + + Returns ``None`` if *rf* is valid, or an error message string describing + the first validation failure found. + + Valid ``type`` values are ``"json_schema"``, ``"json_object"``, and + ``"text"``. For ``type="json_schema"``, the dict must also contain a + ``"json_schema"`` key whose value is a dict with at least a ``"name"`` + key (str). If ``json_schema.schema`` is present it must be a dict. + + Examples:: + + >>> _validate_response_format({"type": "json_object"}) is None + True + >>> _validate_response_format({"type": "bad"}) is not None + True + """ + rf_type = rf.get("type") + if rf_type not in _VALID_RESPONSE_FORMAT_TYPES: + return ( + f"type must be one of {sorted(_VALID_RESPONSE_FORMAT_TYPES)!r}, " + f"got {rf_type!r}" + ) + + if rf_type == "json_schema": + js = rf.get("json_schema") + if not isinstance(js, dict): + return "json_schema must be a dict when type='json_schema'" + if not isinstance(js.get("name"), str): + return "json_schema.name must be a string" + schema = js.get("schema") + if schema is not None and not isinstance(schema, dict): + return "json_schema.schema must be a dict if present" + + return None + + # --------------------------------------------------------------------------- # ProviderConfig — per-provider / per-model capability flags # --------------------------------------------------------------------------- @@ -142,6 +205,16 @@ class HermesA2AExecutor(AgentExecutor): heartbeat: Optional ``HeartbeatLoop`` instance used to surface the current task description in the platform UI. + response_format: + Optional OpenAI-native ``response_format`` dict forwarded verbatim + to ``chat.completions.create()``. Supported types: + ``{"type": "json_schema", "json_schema": {"name": ..., "schema": {...}}} +`` + ``{"type": "json_object"}`` + ``{"type": "text"}`` + When ``None`` (default) the parameter is omitted from the API call. + Invalid dicts cause ``execute()`` to enqueue an error and return + early without calling the API. _client: Inject a pre-built ``AsyncOpenAI`` (or compatible mock) — for testing only. When provided, ``base_url`` and ``api_key`` are @@ -155,11 +228,13 @@ class HermesA2AExecutor(AgentExecutor): base_url: str | None = None, api_key: str | None = None, heartbeat: "HeartbeatLoop | None" = None, + response_format: "dict | None" = None, _client: Any = None, ) -> None: self.model = model self.system_prompt = system_prompt self._heartbeat = heartbeat + self._response_format = response_format self._provider = ProviderConfig(model) if _client is not None: @@ -262,18 +337,34 @@ class HermesA2AExecutor(AgentExecutor): messages = self._build_messages(user_input) + # Validate response_format before hitting the API — invalid dicts + # enqueue an error and return early without making an API call. + if self._response_format is not None: + detail = _validate_response_format(self._response_format) + if detail is not None: + await event_queue.enqueue_event( + new_agent_text_message(f"Error: invalid response_format — {detail}") + ) + return + # Only Hermes 4 entries get extra_body — sending it to Hermes 3 # or other models is a no-op at best; a 400 at worst. extra_body: dict | None = None if self._provider.reasoning_supported: extra_body = {"reasoning": {"enabled": True}} + # Build create() kwargs; omit response_format entirely when None so + # strict / older providers do not receive an unexpected field. + create_kwargs: dict = { + "model": self.model, + "messages": messages, + "extra_body": extra_body, + } + if self._response_format is not None: + create_kwargs["response_format"] = self._response_format + try: - response = await self._client.chat.completions.create( - model=self.model, - messages=messages, - extra_body=extra_body, - ) + response = await self._client.chat.completions.create(**create_kwargs) choice = response.choices[0] content: str = choice.message.content or "" diff --git a/workspace-template/tests/test_hermes_executor.py b/workspace-template/tests/test_hermes_executor.py index d6129c58..7e4ad603 100644 --- a/workspace-template/tests/test_hermes_executor.py +++ b/workspace-template/tests/test_hermes_executor.py @@ -4,12 +4,15 @@ Coverage targets ---------------- - _reasoning_supported() — model name pattern detection - ProviderConfig — capability flags derived from model name -- HermesA2AExecutor.__init__ — field assignment + client injection +- _validate_response_format() — valid types, invalid type, missing fields (#498) +- HermesA2AExecutor.__init__ — field assignment + client injection, + response_format stored (#498) - HermesA2AExecutor._build_messages — system prompt + user turn assembly - HermesA2AExecutor._log_reasoning — OTEL span emission + swallowed errors - HermesA2AExecutor.execute — happy path, empty input, API error, Hermes 4 extra_body, Hermes 3 no extra_body, - reasoning not in reply, reasoning_details + reasoning not in reply, reasoning_details, + response_format forwarded / omitted / invalid (#498) - HermesA2AExecutor.cancel — TaskStatusUpdateEvent emitted The ``openai`` module is stubbed in sys.modules so no real API call is made. @@ -70,6 +73,7 @@ from hermes_executor import ( # noqa: E402 ProviderConfig, _HERMES4_PATTERNS, _reasoning_supported, + _validate_response_format, ) @@ -699,3 +703,147 @@ async def test_no_system_prompt_only_user_message(): msgs = mock_client.chat.completions.create.call_args[1]["messages"] assert len(msgs) == 1 assert msgs[0]["role"] == "user" + + +# --------------------------------------------------------------------------- +# _validate_response_format — issue #498 +# --------------------------------------------------------------------------- + + +def test_validate_response_format_json_schema_valid(): + """Valid json_schema dict (with name and schema) returns None.""" + rf = { + "type": "json_schema", + "json_schema": { + "name": "my_schema", + "schema": {"type": "object", "properties": {}}, + }, + } + assert _validate_response_format(rf) is None + + +def test_validate_response_format_json_object_valid(): + """{"type": "json_object"} returns None (no sub-fields required).""" + assert _validate_response_format({"type": "json_object"}) is None + + +def test_validate_response_format_text_valid(): + """{"type": "text"} returns None.""" + assert _validate_response_format({"type": "text"}) is None + + +def test_validate_response_format_invalid_type(): + """An unknown type value returns a non-None error string.""" + result = _validate_response_format({"type": "yaml_schema"}) + assert result is not None + assert isinstance(result, str) + assert "yaml_schema" in result + + +def test_validate_response_format_missing_json_schema_key(): + """type='json_schema' but no 'json_schema' key → error string.""" + result = _validate_response_format({"type": "json_schema"}) + assert result is not None + assert "json_schema" in result + + +def test_validate_response_format_json_schema_schema_not_dict(): + """json_schema.schema present but not a dict → error string.""" + rf = { + "type": "json_schema", + "json_schema": {"name": "s", "schema": "not-a-dict"}, + } + result = _validate_response_format(rf) + assert result is not None + assert "schema" in result + + +def test_validate_response_format_json_schema_missing_name(): + """json_schema present but missing 'name' key → error string.""" + rf = { + "type": "json_schema", + "json_schema": {"schema": {"type": "object"}}, + } + result = _validate_response_format(rf) + assert result is not None + assert "name" in result + + +def test_constructor_response_format_stored(): + """response_format kwarg is stored as _response_format attribute.""" + rf = {"type": "json_object"} + executor = HermesA2AExecutor( + model="hermes-4", + response_format=rf, + _client=MagicMock(), + ) + assert executor._response_format is rf + + +def test_constructor_no_response_format_is_none(): + """Omitting response_format → _response_format is None.""" + executor = HermesA2AExecutor(model="hermes-4", _client=MagicMock()) + assert executor._response_format is None + + +@pytest.mark.asyncio +async def test_execute_response_format_in_request(): + """Valid response_format is forwarded as a kwarg to the API call.""" + rf = {"type": "json_object"} + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock( + return_value=_make_api_response('{"answer": 42}') + ) + executor = HermesA2AExecutor( + model="nousresearch/hermes-3-llama-3.1-70b", + response_format=rf, + _client=mock_client, + ) + + await executor.execute(_make_context("hello"), AsyncMock()) + + call_kwargs = mock_client.chat.completions.create.call_args[1] + assert call_kwargs.get("response_format") == rf + + +@pytest.mark.asyncio +async def test_execute_response_format_omitted_when_none(): + """When response_format is None, it is NOT present in the API call kwargs.""" + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock( + return_value=_make_api_response("ok") + ) + executor = HermesA2AExecutor( + model="nousresearch/hermes-3-llama-3.1-70b", + response_format=None, + _client=mock_client, + ) + + await executor.execute(_make_context("hello"), AsyncMock()) + + call_kwargs = mock_client.chat.completions.create.call_args[1] + assert "response_format" not in call_kwargs + + +@pytest.mark.asyncio +async def test_execute_invalid_response_format_returns_error_no_api_call(): + """Invalid response_format → error enqueued, API create() NOT called.""" + rf = {"type": "unsupported_format"} + mock_client = MagicMock() + mock_client.chat.completions.create = AsyncMock() + executor = HermesA2AExecutor( + model="hermes-4", + response_format=rf, + _client=mock_client, + ) + + eq = AsyncMock() + await executor.execute(_make_context("hello"), eq) + + # Should have enqueued an error message + eq.enqueue_event.assert_called_once() + enqueued = eq.enqueue_event.call_args[0][0] + assert "Error: invalid response_format" in enqueued + + # API must NOT have been called + mock_client.chat.completions.create.assert_not_called()