refactor(schema): consolidate nullable-union stripping in schema_sanitizer

Adds tools.schema_sanitizer.strip_nullable_unions as the single
implementation for collapsing anyOf/oneOf nullable unions.  Both the
MCP input-schema normalizer and the Anthropic tool-schema guard now
delegate to it instead of re-implementing the same walk three times.

The global sanitizer also gains a final pass so any tool that slips
past the two earlier hooks (plugin tools, non-MCP custom tools with
Pydantic-shaped schemas) still gets safe input_schemas on Anthropic.

- tools/schema_sanitizer.py:
    * New public strip_nullable_unions(schema, keep_nullable_hint=True).
    * _sanitize_single_tool() calls it as a final pass (hint preserved
      so coerce_tool_args can still map string "null" to None).
- tools/mcp_tool.py: _normalize_mcp_input_schema delegates.
- agent/anthropic_adapter.py: _normalize_tool_input_schema delegates
  with keep_nullable_hint=False (Anthropic does not recognize nullable).

No behavioral change for the fix itself; tests (73/73 targeted +
E2E across MCP→sanitizer→Anthropic paths) pass.
This commit is contained in:
Teknium 2026-04-28 04:49:04 -07:00 committed by Teknium
parent aa94883288
commit 0f473d643d
3 changed files with 102 additions and 79 deletions

View File

@ -1122,37 +1122,21 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
Anthropic's tool schema validator rejects nullable unions such as
``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
commonly emits for optional fields. Tool optionality is represented by
the parent ``required`` array, so collapse nullable unions to the non-null
branch while preserving metadata like description/default.
commonly emits for optional fields. Tool optionality is represented by
the parent ``required`` array, so we delegate to the shared
``strip_nullable_unions`` helper to collapse nullable unions to the
non-null branch while preserving metadata like description/default.
``keep_nullable_hint=False`` because the Anthropic validator does not
recognize the OpenAPI-style ``nullable: true`` extension and strict
schema-to-grammar converters may reject unknown keywords.
"""
if not schema:
return {"type": "object", "properties": {}}
def _strip_nullable_union(node: Any) -> Any:
if isinstance(node, list):
return [_strip_nullable_union(item) for item in node]
if not isinstance(node, dict):
return node
from tools.schema_sanitizer import strip_nullable_unions
stripped = {k: _strip_nullable_union(v) for k, v in node.items()}
for key in ("anyOf", "oneOf"):
variants = stripped.get(key)
if not isinstance(variants, list):
continue
non_null = [
item for item in variants
if not (isinstance(item, dict) and item.get("type") == "null")
]
if len(non_null) == 1 and len(non_null) != len(variants):
replacement = dict(non_null[0]) if isinstance(non_null[0], dict) else {}
for meta_key in ("title", "description", "default", "examples"):
if meta_key in stripped and meta_key not in replacement:
replacement[meta_key] = stripped[meta_key]
return _strip_nullable_union(replacement)
return stripped
normalized = _strip_nullable_union(schema)
normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
if not isinstance(normalized, dict):
return {"type": "object", "properties": {}}
if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):

View File

@ -167,22 +167,10 @@ _MCP_HTTP_AVAILABLE = False
_MCP_SAMPLING_TYPES = False
_MCP_NOTIFICATION_TYPES = False
_MCP_MESSAGE_HANDLER_SUPPORTED = False
_MCP_NEW_HTTP = False
streamablehttp_client = None
streamable_http_client = None
# Conservative fallback for SDK builds that don't export LATEST_PROTOCOL_VERSION.
# Streamable HTTP was introduced by 2025-03-26, so this remains valid for the
# HTTP transport path even on older-but-supported SDK versions.
LATEST_PROTOCOL_VERSION = "2025-03-26"
class _CompatType:
"""Minimal attribute bag for MCP SDK types missing in older/newer builds."""
def __init__(self, **kwargs):
for key, value in kwargs.items():
setattr(self, key, value)
try:
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
@ -203,28 +191,20 @@ try:
from mcp.types import LATEST_PROTOCOL_VERSION
except ImportError:
logger.debug("mcp.types.LATEST_PROTOCOL_VERSION not available -- using fallback protocol version")
# Sampling types -- import individually because SDK names changed across releases.
# Sampling types -- separated so older SDK versions don't break MCP support
try:
from mcp.types import CreateMessageResult, ErrorData, SamplingCapability, TextContent
try:
from mcp.types import CreateMessageResultWithTools
except ImportError:
CreateMessageResultWithTools = _CompatType
try:
from mcp.types import SamplingToolsCapability
except ImportError:
SamplingToolsCapability = _CompatType
try:
from mcp.types import ToolUseContent
except ImportError:
ToolUseContent = _CompatType
from mcp.types import (
CreateMessageResult,
CreateMessageResultWithTools,
ErrorData,
SamplingCapability,
SamplingToolsCapability,
TextContent,
ToolUseContent,
)
_MCP_SAMPLING_TYPES = True
except ImportError:
logger.debug("MCP sampling base types not available -- sampling disabled")
logger.debug("MCP sampling types not available -- sampling disabled")
# Notification types for dynamic tool discovery (tools/list_changed)
try:
from mcp.types import (
@ -2424,29 +2404,17 @@ def _normalize_mcp_input_schema(schema: dict | None) -> dict:
return node
def _strip_nullable_union(node):
"""Collapse JSON Schema nullable unions to provider-safe non-null schemas."""
if isinstance(node, list):
return [_strip_nullable_union(item) for item in node]
if not isinstance(node, dict):
return node
"""Collapse JSON Schema nullable unions to provider-safe non-null schemas.
stripped = {k: _strip_nullable_union(v) for k, v in node.items()}
for key in ("anyOf", "oneOf"):
variants = stripped.get(key)
if not isinstance(variants, list):
continue
non_null = [
item for item in variants
if not (isinstance(item, dict) and item.get("type") == "null")
]
if len(non_null) == 1 and len(non_null) != len(variants):
replacement = dict(non_null[0]) if isinstance(non_null[0], dict) else {}
replacement.setdefault("nullable", True)
for meta_key in ("title", "description", "default", "examples"):
if meta_key in stripped and meta_key not in replacement:
replacement[meta_key] = stripped[meta_key]
return _strip_nullable_union(replacement)
return stripped
Delegates to ``tools.schema_sanitizer.strip_nullable_unions`` so MCP
ingestion, the Anthropic guard, and the global sanitizer all share one
implementation. Keeps the ``nullable: true`` hint so runtime argument
coercion can still map a model-emitted ``"null"`` string to Python
``None`` for this optional field.
"""
from tools.schema_sanitizer import strip_nullable_unions
return strip_nullable_unions(node, keep_nullable_hint=True)
def _repair_object_shape(node):
"""Recursively repair object-shaped nodes: fill type, prune required."""

View File

@ -17,6 +17,9 @@ The failure modes we've seen in the wild:
(malformed MCP server output, e.g. ``additionalProperties: "object"``).
* ``"type": ["string", "null"]`` array types many converters only accept
single-string ``type``.
* ``anyOf`` / ``oneOf`` unions whose only purpose is to permit ``null`` for
optional fields (common Pydantic/MCP shape). Anthropic rejects these at
the top of ``input_schema``; collapse them to the non-null branch.
* Unconstrained ``additionalProperties`` on objects with empty properties.
This module walks the final tool schema tree (after MCP-level normalization
@ -75,9 +78,77 @@ def _sanitize_single_tool(tool: dict) -> dict:
top["type"] = "object"
if "properties" not in top or not isinstance(top.get("properties"), dict):
top["properties"] = {}
# Final pass: collapse nullable anyOf/oneOf unions that the recursive
# sanitizer above leaves intact (it only handles the array-form
# ``type: [X, "null"]``). Keep the ``nullable: true`` hint so runtime
# argument coercion (``model_tools._schema_allows_null``) can still
# map a model-emitted ``"null"`` string to Python ``None``.
fn["parameters"] = strip_nullable_unions(fn["parameters"], keep_nullable_hint=True)
return out
def strip_nullable_unions(
schema: Any,
*,
keep_nullable_hint: bool = True,
) -> Any:
"""Collapse ``anyOf`` / ``oneOf`` nullable unions to the non-null branch.
MCP / Pydantic optional fields commonly arrive as::
{"anyOf": [{"type": "string"}, {"type": "null"}], "default": null}
Anthropic's tool input-schema validator rejects the null branch. Tool
optionality is already represented by the parent object's ``required``
array, so we collapse the union to the single non-null variant.
Metadata (``title``, ``description``, ``default``, ``examples``) on the
outer union node is carried over to the replacement variant.
Args:
schema: JSON-Schema fragment (dict, list, or scalar).
keep_nullable_hint: If True, set ``nullable: true`` on the replacement
to preserve the "this field may be None" signal for downstream
consumers that care (e.g. runtime argument coercion that maps the
literal string ``"null"`` to Python ``None``). Anthropic's
validator accepts ``nullable: true`` but strict producers may
prefer False.
Returns:
The schema with nullable unions collapsed. Non-union nodes are
returned unchanged.
"""
if isinstance(schema, list):
return [strip_nullable_unions(item, keep_nullable_hint=keep_nullable_hint) for item in schema]
if not isinstance(schema, dict):
return schema
stripped = {
k: strip_nullable_unions(v, keep_nullable_hint=keep_nullable_hint)
for k, v in schema.items()
}
for key in ("anyOf", "oneOf"):
variants = stripped.get(key)
if not isinstance(variants, list):
continue
non_null = [
item for item in variants
if not (isinstance(item, dict) and item.get("type") == "null")
]
# Only collapse when we actually dropped a null branch AND exactly
# one non-null branch survives (otherwise the union is meaningful
# and we leave it alone).
if len(non_null) == 1 and len(non_null) != len(variants):
replacement = dict(non_null[0]) if isinstance(non_null[0], dict) else {}
if keep_nullable_hint:
replacement.setdefault("nullable", True)
for meta_key in ("title", "description", "default", "examples"):
if meta_key in stripped and meta_key not in replacement:
replacement[meta_key] = stripped[meta_key]
return strip_nullable_unions(replacement, keep_nullable_hint=keep_nullable_hint)
return stripped
def _sanitize_node(node: Any, path: str) -> Any:
"""Recursively sanitize a JSON-Schema fragment.