Merge pull request #2142 from Molecule-AI/feat/hermes-borrowed-quality-wins

feat(tools): hermes-borrowed quality wins — error/summary caps + sharper tool descriptions
2026-04-27 06:29:30 +00:00 · 2026-04-27 06:29:30 +00:00 · efc2c9d83e
commit efc2c9d83e
parent bc5b0f614f af664e3e87
3 changed files with 96 additions and 2 deletions
--- a/workspace/a2a_mcp_server.py
+++ b/workspace/a2a_mcp_server.py
@ -132,7 +132,7 @@ TOOLS = [
    },
    {
        "name": "commit_memory",
-        "description": "Save important information to persistent memory. Use this to remember decisions, conversation context, task results, and anything that should survive a restart. Scope: LOCAL (this workspace only), TEAM (parent + siblings), GLOBAL (entire org).",
+        "description": "Append a new memory row to persistent storage. Each call CREATES a row — does not overwrite existing memories with the same content. Use to remember decisions, task results, and context that should survive a restart. Scope: LOCAL (this workspace only), TEAM (parent + siblings), GLOBAL (entire org). GLOBAL writes require tier-0 (root) workspace; lower-tier callers get an RBAC error.",
        "inputSchema": {
            "type": "object",
            "properties": {
@ -151,7 +151,7 @@ TOOLS = [
    },
    {
        "name": "recall_memory",
-        "description": "Search persistent memory for previously saved information. Returns all matching memories. Use this at the start of conversations to recall prior context.",
+        "description": "Substring-search persistent memory and return ALL matching rows (no pagination). Empty query returns every memory accessible at the given scope. Server-side filter is case-insensitive substring match on `content`. Use at the start of conversations to recall prior context — calling once with empty query is cheap and avoids missing relevant memories that don't match a narrow keyword.",
        "inputSchema": {
            "type": "object",
            "properties": {
--- a/workspace/a2a_tools.py
+++ b/workspace/a2a_tools.py
@ -111,11 +111,33 @@ def _auth_headers_for_heartbeat() -> dict[str, str]:
        return {}


+# Per-field caps on the heartbeat / activity payload. Borrowed from
+# hermes-agent's design discipline: cap ONCE in the helper, not at every
+# call site, so a future caller adding error_detail can't accidentally
+# DoS activity_logs by pasting a 4MB stack trace + base64 image.
+#
+# Why these specific limits:
+#   - error_detail (4096): hermes' value. Long enough for a multi-frame
+#     stack trace, short enough that 100 errors in 5min is < 500KB total.
+#   - summary (256): summary is a one-liner shown in the canvas card +
+#     activity row. 256 covers UTF-8 emoji + a sentence.
+#   - response_text (NOT capped): this is the agent's actual reply
+#     content. Capping would silently truncate user-visible output.
+_MAX_ERROR_DETAIL_CHARS = 4096
+_MAX_SUMMARY_CHARS = 256
+
+
 async def report_activity(
    activity_type: str, target_id: str = "", summary: str = "", status: str = "ok",
    task_text: str = "", response_text: str = "", error_detail: str = "",
 ):
    """Report activity to the platform for live progress tracking."""
+    # Defensive caps in the helper itself so every caller benefits — see
+    # _MAX_ERROR_DETAIL_CHARS / _MAX_SUMMARY_CHARS comments above.
+    if error_detail and len(error_detail) > _MAX_ERROR_DETAIL_CHARS:
+        error_detail = error_detail[:_MAX_ERROR_DETAIL_CHARS]
+    if summary and len(summary) > _MAX_SUMMARY_CHARS:
+        summary = summary[:_MAX_SUMMARY_CHARS]
    try:
        async with httpx.AsyncClient(timeout=5.0) as client:
            payload: dict = {
--- a/workspace/tests/test_a2a_tools_impl.py
+++ b/workspace/tests/test_a2a_tools_impl.py
@ -128,6 +128,78 @@ class TestReportActivity:
            # Must not raise
            await a2a_tools.report_activity("a2a_send", summary="test")

+    async def test_error_detail_capped_at_max(self):
+        """Hermes-borrowed pattern: error_detail is capped INSIDE the helper
+        so a careless caller pasting a 1MB stack trace can't DoS the
+        activity_logs table. Cap value (4096) is set in
+        a2a_tools._MAX_ERROR_DETAIL_CHARS — pin it here so a future change
+        that drops the cap (or moves it to the call site only) regresses
+        loudly."""
+        import a2a_tools
+
+        huge = "X" * 50_000
+        mc = _make_http_mock()
+        with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
+            await a2a_tools.report_activity(
+                "a2a_receive",
+                target_id="ws-1",
+                summary="failed",
+                status="error",
+                error_detail=huge,
+            )
+        # Two POSTs (activity + heartbeat because summary is set); the
+        # error_detail rides the FIRST call (the activity one).
+        payload = mc.post.call_args_list[0].kwargs.get("json")
+        assert "error_detail" in payload
+        assert len(payload["error_detail"]) == a2a_tools._MAX_ERROR_DETAIL_CHARS
+        assert payload["error_detail"] == "X" * a2a_tools._MAX_ERROR_DETAIL_CHARS
+
+    async def test_error_detail_under_cap_passes_through(self):
+        """Defensive negative: short error_detail must NOT be padded or
+        truncated — only over-long values get clipped."""
+        import a2a_tools
+
+        short = "AssertionError: missing field"
+        mc = _make_http_mock()
+        with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
+            await a2a_tools.report_activity(
+                "a2a_receive", summary="x", status="error", error_detail=short
+            )
+        # First POST is the activity row; second is the heartbeat.
+        payload = mc.post.call_args_list[0].kwargs.get("json")
+        assert payload["error_detail"] == short
+
+    async def test_summary_capped_at_max(self):
+        """summary is shown verbatim in the canvas card and activity row;
+        cap at 256 so a giant string doesn't blow out the layout. Same
+        helper-side cap pattern as error_detail."""
+        import a2a_tools
+
+        huge = "Y" * 1000
+        mc = _make_http_mock()
+        with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
+            await a2a_tools.report_activity("a2a_send", summary=huge)
+        # Two POSTs (activity + heartbeat); inspect the first (activity).
+        first_payload = mc.post.call_args_list[0].kwargs.get("json")
+        assert len(first_payload["summary"]) == a2a_tools._MAX_SUMMARY_CHARS
+
+    async def test_response_text_NOT_capped(self):
+        """Negative pin: response_text is the agent's actual reply content.
+        Capping it would silently truncate user-visible output. Hermes'
+        cap discipline applies to error_detail + summary (telemetry
+        fields) only, not the payload itself."""
+        import a2a_tools
+
+        big_reply = "Z" * 20_000
+        mc = _make_http_mock()
+        with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
+            await a2a_tools.report_activity(
+                "a2a_receive", target_id="ws-1", response_text=big_reply
+            )
+        payload = mc.post.call_args.kwargs.get("json")
+        assert payload["response_body"]["result"] == big_reply
+        assert len(payload["response_body"]["result"]) == 20_000
+

 # ---------------------------------------------------------------------------
 # tool_delegate_task