2026-06-05 14:57:58 +00:00
1 changed files with 177 additions and 0 deletions
@@ -0,0 +1,177 @@
+package handlers
+
+// a2a_full_body_delivery_guard_test.go — regression guard for core#2175.
+//
+// core#2175 RCA: the long-believed "A2A truncation" was a MISDIAGNOSIS.
+// A2A message delivery preserves the FULL body on every agent-facing path.
+// Only HUMAN-facing DISPLAY previews are capped (activity title 80 runes,
+// broadcast 120, delegation summary 80, canvas response_preview 200 bytes).
+// Those caps live on display/broadcast fields, NOT on the bytes an agent
+// reads off the wire.
+//
+// This file locks in the correct behaviour so a FUTURE change cannot
+// silently reintroduce REAL truncation on the agent-facing delivery paths:
+//
+//   1. DequeueNext (a2a_queue.go) — the drain/read path does
+//      `SELECT ... body::text ...` and returns item.Body. The delivered
+//      body MUST equal the enqueued body byte-for-byte.
+//
+//   2. toolCheckTaskStatus (mcp_tools.go) — reads activity_logs.response_body
+//      and surfaces result["result"] = extractA2AText(responseBody). The
+//      returned text MUST be the COMPLETE response text, not a preview.
+//
+// Both bodies used here are WELL over 200 chars (> the largest preview cap,
+// canvas response_preview at 200 bytes) so a regression that wired any
+// display cap into a delivery path would fail loudly.
+//
+// Style: matches the sibling a2a_queue_test.go / mcp_tools_test.go — sqlmock,
+// no integration build tag. These paths are deterministically exercisable
+// against the mock because the truncation guard is about what the Go code
+// does with the row value, not about Postgres-side text handling. CI's
+// real-PG integration arm (a2a_*_integration tests) additionally exercises
+// the live `body::text` round-trip.
+
+import (
+	"context"
+	"database/sql"
+	"strings"
+	"testing"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
+	"github.com/DATA-DOG/go-sqlmock"
+)
+
+// largeA2ABody builds a syntactically valid A2A JSON-RPC message body whose
+// embedded text part is `textLen` runes long, so the whole body comfortably
+// exceeds every human-facing preview cap (max 200 bytes).
+func largeA2ABody(textLen int) string {
+	longText := strings.Repeat("A", textLen)
+	return `{"jsonrpc":"2.0","method":"message/send","params":{"message":{"role":"user","messageId":"guard-2175","parts":[{"type":"text","text":"` + longText + `"}]}}}`
+}
+
+// TestDequeueNext_PreservesFullBody_NoTruncation is the guard for the queue
+// drain/read path. It asserts that the body returned from DequeueNext equals
+// the enqueued body byte-for-byte, even when far longer than any preview cap.
+func TestDequeueNext_PreservesFullBody_NoTruncation(t *testing.T) {
+	// 4000-char text part → total body well over the 200-byte canvas cap and
+	// every other display preview cap.
+	fullBody := largeA2ABody(4000)
+	if len(fullBody) <= 200 {
+		t.Fatalf("test setup error: body must exceed the largest preview cap (200); got %d", len(fullBody))
+	}
+
+	mockDB, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherEqual))
+	if err != nil {
+		t.Fatalf("failed to create sqlmock: %v", err)
+	}
+	prevDB := db.DB
+	db.DB = mockDB
+	t.Cleanup(func() { db.DB = prevDB; mockDB.Close() })
+
+	const wsID = "ws-guard-2175"
+	const itemID = "qid-guard-2175"
+
+	// DequeueNext runs BEGIN → SELECT ... body::text ... → UPDATE → COMMIT.
+	// The mocked SELECT returns the FULL body in the body column; the guard
+	// is that DequeueNext propagates it untouched into item.Body.
+	mock.ExpectBegin()
+	mock.ExpectQuery(
+		"SELECT id, workspace_id, caller_id, priority, body::text, method, attempts FROM a2a_queue WHERE workspace_id = $1 AND status = 'queued' AND (expires_at IS NULL OR expires_at > now()) ORDER BY priority DESC, enqueued_at ASC FOR UPDATE SKIP LOCKED LIMIT 1").
+		WithArgs(wsID).
+		WillReturnRows(sqlmock.NewRows([]string{
+			"id", "workspace_id", "caller_id", "priority", "body", "method", "attempts",
+		}).AddRow(
+			itemID, wsID, sql.NullString{Valid: false}, PriorityTask,
+			fullBody, sql.NullString{String: "message/send", Valid: true}, 0,
+		))
+	mock.ExpectExec(
+		"UPDATE a2a_queue SET status = 'dispatched', dispatched_at = now(), attempts = attempts + 1 WHERE id = $1").
+		WithArgs(itemID).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectCommit()
+
+	item, err := DequeueNext(context.Background(), wsID)
+	if err != nil {
+		t.Fatalf("DequeueNext returned error: %v", err)
+	}
+	if item == nil {
+		t.Fatal("DequeueNext returned nil item for a non-empty queue")
+	}
+
+	if got := string(item.Body); got != fullBody {
+		t.Errorf("delivered body was truncated/altered.\n  enqueued len=%d\n  delivered len=%d\n  REGRESSION: a delivery path must NOT apply a display preview cap (core#2175)",
+			len(fullBody), len(got))
+	}
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestToolCheckTaskStatus_ReturnsFullResponseBody_NoTruncation is the guard
+// for the check_task_status agent-facing read path. It asserts that the text
+// surfaced in result["result"] (via extractA2AText over response_body) is the
+// COMPLETE response text — never a preview-capped slice.
+func TestToolCheckTaskStatus_ReturnsFullResponseBody_NoTruncation(t *testing.T) {
+	// 3000-char response text, far above any preview cap.
+	fullText := strings.Repeat("B", 3000)
+	responseBody := `{"jsonrpc":"2.0","result":{"artifacts":[{"parts":[{"type":"text","text":"` + fullText + `"}]}]}}`
+
+	mockDB, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("failed to create sqlmock: %v", err)
+	}
+	t.Cleanup(func() { mockDB.Close() })
+
+	h := &MCPHandler{database: mockDB}
+
+	const callerID = "ws-caller-2175"
+	const targetID = "ws-target-2175"
+	const taskID = "del-guard-2175"
+
+	mock.ExpectQuery(`SELECT status, error_detail, response_body`).
+		WithArgs(callerID, targetID, taskID).
+		WillReturnRows(sqlmock.NewRows([]string{"status", "error_detail", "response_body"}).
+			AddRow("completed", sql.NullString{Valid: false}, []byte(responseBody)))
+
+	out, err := h.toolCheckTaskStatus(context.Background(), callerID, map[string]interface{}{
+		"workspace_id": targetID,
+		"task_id":      taskID,
+	})
+	if err != nil {
+		t.Fatalf("toolCheckTaskStatus returned error: %v", err)
+	}
+
+	// The full text must appear in the serialized result. If a future change
+	// applied a preview cap (e.g. TruncateBytes(…, 200)) to the agent-facing
+	// result, this substring check would fail.
+	if !strings.Contains(out, fullText) {
+		t.Errorf("check_task_status result was truncated.\n  expected full %d-char response text in result\n  REGRESSION: the agent-facing check_task_status path must return the COMPLETE response_body, not a display preview (core#2175)",
+			len(fullText))
+	}
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestExtractA2AText_FullBodyNoCap is a focused unit-level guard on the
+// extractor itself: extractA2AText must return the entire text part with no
+// length cap, for both supported A2A response shapes.
+func TestExtractA2AText_FullBodyNoCap(t *testing.T) {
+	fullText := strings.Repeat("C", 2500)
+
+	cases := map[string]string{
+		"artifacts shape": `{"result":{"artifacts":[{"parts":[{"type":"text","text":"` + fullText + `"}]}]}}`,
+		"message shape":   `{"result":{"message":{"parts":[{"type":"text","text":"` + fullText + `"}]}}}`,
+	}
+	for name, body := range cases {
+		t.Run(name, func(t *testing.T) {
+			got := extractA2AText([]byte(body))
+			if got != fullText {
+				t.Errorf("extractA2AText capped/altered the text.\n  want len=%d\n  got  len=%d\n  REGRESSION: extractor must not truncate (core#2175)",
+					len(fullText), len(got))
+			}
+		})
+	}
+}