Merge pull request #2142 from Molecule-AI/feat/hermes-borrowed-quality-wins

feat(tools): hermes-borrowed quality wins — error/summary caps + sharper tool descriptions
This commit is contained in:
hongmingwang-moleculeai 2026-04-27 06:29:30 +00:00 committed by GitHub
commit efc2c9d83e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 96 additions and 2 deletions

View File

@ -132,7 +132,7 @@ TOOLS = [
},
{
"name": "commit_memory",
"description": "Save important information to persistent memory. Use this to remember decisions, conversation context, task results, and anything that should survive a restart. Scope: LOCAL (this workspace only), TEAM (parent + siblings), GLOBAL (entire org).",
"description": "Append a new memory row to persistent storage. Each call CREATES a row — does not overwrite existing memories with the same content. Use to remember decisions, task results, and context that should survive a restart. Scope: LOCAL (this workspace only), TEAM (parent + siblings), GLOBAL (entire org). GLOBAL writes require tier-0 (root) workspace; lower-tier callers get an RBAC error.",
"inputSchema": {
"type": "object",
"properties": {
@ -151,7 +151,7 @@ TOOLS = [
},
{
"name": "recall_memory",
"description": "Search persistent memory for previously saved information. Returns all matching memories. Use this at the start of conversations to recall prior context.",
"description": "Substring-search persistent memory and return ALL matching rows (no pagination). Empty query returns every memory accessible at the given scope. Server-side filter is case-insensitive substring match on `content`. Use at the start of conversations to recall prior context — calling once with empty query is cheap and avoids missing relevant memories that don't match a narrow keyword.",
"inputSchema": {
"type": "object",
"properties": {

View File

@ -111,11 +111,33 @@ def _auth_headers_for_heartbeat() -> dict[str, str]:
return {}
# Per-field caps on the heartbeat / activity payload. Borrowed from
# hermes-agent's design discipline: cap ONCE in the helper, not at every
# call site, so a future caller adding error_detail can't accidentally
# DoS activity_logs by pasting a 4MB stack trace + base64 image.
#
# Why these specific limits:
# - error_detail (4096): hermes' value. Long enough for a multi-frame
# stack trace, short enough that 100 errors in 5min is < 500KB total.
# - summary (256): summary is a one-liner shown in the canvas card +
# activity row. 256 covers UTF-8 emoji + a sentence.
# - response_text (NOT capped): this is the agent's actual reply
# content. Capping would silently truncate user-visible output.
_MAX_ERROR_DETAIL_CHARS = 4096
_MAX_SUMMARY_CHARS = 256
async def report_activity(
activity_type: str, target_id: str = "", summary: str = "", status: str = "ok",
task_text: str = "", response_text: str = "", error_detail: str = "",
):
"""Report activity to the platform for live progress tracking."""
# Defensive caps in the helper itself so every caller benefits — see
# _MAX_ERROR_DETAIL_CHARS / _MAX_SUMMARY_CHARS comments above.
if error_detail and len(error_detail) > _MAX_ERROR_DETAIL_CHARS:
error_detail = error_detail[:_MAX_ERROR_DETAIL_CHARS]
if summary and len(summary) > _MAX_SUMMARY_CHARS:
summary = summary[:_MAX_SUMMARY_CHARS]
try:
async with httpx.AsyncClient(timeout=5.0) as client:
payload: dict = {

View File

@ -128,6 +128,78 @@ class TestReportActivity:
# Must not raise
await a2a_tools.report_activity("a2a_send", summary="test")
async def test_error_detail_capped_at_max(self):
"""Hermes-borrowed pattern: error_detail is capped INSIDE the helper
so a careless caller pasting a 1MB stack trace can't DoS the
activity_logs table. Cap value (4096) is set in
a2a_tools._MAX_ERROR_DETAIL_CHARS pin it here so a future change
that drops the cap (or moves it to the call site only) regresses
loudly."""
import a2a_tools
huge = "X" * 50_000
mc = _make_http_mock()
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
await a2a_tools.report_activity(
"a2a_receive",
target_id="ws-1",
summary="failed",
status="error",
error_detail=huge,
)
# Two POSTs (activity + heartbeat because summary is set); the
# error_detail rides the FIRST call (the activity one).
payload = mc.post.call_args_list[0].kwargs.get("json")
assert "error_detail" in payload
assert len(payload["error_detail"]) == a2a_tools._MAX_ERROR_DETAIL_CHARS
assert payload["error_detail"] == "X" * a2a_tools._MAX_ERROR_DETAIL_CHARS
async def test_error_detail_under_cap_passes_through(self):
"""Defensive negative: short error_detail must NOT be padded or
truncated only over-long values get clipped."""
import a2a_tools
short = "AssertionError: missing field"
mc = _make_http_mock()
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
await a2a_tools.report_activity(
"a2a_receive", summary="x", status="error", error_detail=short
)
# First POST is the activity row; second is the heartbeat.
payload = mc.post.call_args_list[0].kwargs.get("json")
assert payload["error_detail"] == short
async def test_summary_capped_at_max(self):
"""summary is shown verbatim in the canvas card and activity row;
cap at 256 so a giant string doesn't blow out the layout. Same
helper-side cap pattern as error_detail."""
import a2a_tools
huge = "Y" * 1000
mc = _make_http_mock()
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
await a2a_tools.report_activity("a2a_send", summary=huge)
# Two POSTs (activity + heartbeat); inspect the first (activity).
first_payload = mc.post.call_args_list[0].kwargs.get("json")
assert len(first_payload["summary"]) == a2a_tools._MAX_SUMMARY_CHARS
async def test_response_text_NOT_capped(self):
"""Negative pin: response_text is the agent's actual reply content.
Capping it would silently truncate user-visible output. Hermes'
cap discipline applies to error_detail + summary (telemetry
fields) only, not the payload itself."""
import a2a_tools
big_reply = "Z" * 20_000
mc = _make_http_mock()
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
await a2a_tools.report_activity(
"a2a_receive", target_id="ws-1", response_text=big_reply
)
payload = mc.post.call_args.kwargs.get("json")
assert payload["response_body"]["result"] == big_reply
assert len(payload["response_body"]["result"]) == 20_000
# ---------------------------------------------------------------------------
# tool_delegate_task