test(core#2176): regression guard for A2A full-body delivery paths #2181

Closed
fullstack-engineer wants to merge 1 commits from fix/core-2176-a2a-full-body-guard into main
@@ -0,0 +1,225 @@
package handlers
// a2a_full_body_delivery_guard_test.go — REGRESSION GUARD for #2175.
//
// Pins the (currently correct) behavior that A2A delivery paths preserve
// the FULL message body all the way to the agent consumer. The only
// truncations in the system are clearly-labeled HUMAN-FACING display
// previews (activity title 80 runes, broadcast 120, delegation summary
// 80, canvas `response_preview` 200 bytes). NONE of those caps is on the
// bytes an agent reads.
//
// Any future change that wires a display-length cap into a delivery path
// will fail one of these three tests loudly. Bodies are deliberately
// chosen to exceed 200 bytes (the largest preview cap), so a regression
// cannot hide behind a smaller body.
//
// Matches the sibling `a2a_queue_test.go` / `mcp_tools_test.go` sqlmock
// style (no integration build tag).
import (
"context"
"database/sql"
"strings"
"testing"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"github.com/DATA-DOG/go-sqlmock"
)
// longBody is a 500-char ASCII body used by all three tests. It is
// strictly larger than the largest known preview cap (200 bytes for
// canvas response_preview), so any silent cap wired into a delivery
// path will surface as a length mismatch in the assertions below.
const longBody = "The quick brown fox jumps over the lazy dog. " + // 44 chars
"Sphinx of black quartz, judge my vow. " + // 39 chars
"Pack my box with five dozen liquor jugs. " + // 42 chars
"How vexingly quick daft zebras jump! " + // 37 chars
"The five boxing wizards jump quickly. " + // 40 chars
"Jackdaws love my big sphinx of quartz. " + // 41 chars
"Mr Jock TV quiz PhD bags few lynx. " + // 36 chars
"Cwm fjord bank glyphs vext quiz. " + // 31 chars
"Brisk goths vex daft Jim with zany quill. " + // 43 chars
"Glib jocks quiz nymph to vex dwarf. " + // 36 chars
"Few quips galvanized the mock jury box. " + // 42 chars
"Quirky sphinx of black quartz judges my vow. " + // 47 chars
"Waltz nymph for quick jigs vex Bud. " + // 36 chars
"Fix problem: body must exceed 200 chars. " // 41 chars (total > 200)
// TestDequeueNext_PreservesFullBody_NoTruncation pins the drain/read
// path (DequeueNext) to return the enqueued body byte-for-byte. The
// production SELECT uses `body::text` and the helper does
// `item.Body = []byte(body)` — the body must NOT be truncated to any
// preview length on the way out. If a future change introduces a
// `LEFT(body, N)` slice or a `len < N` cap, this test fails.
//
// We exercise TWO sub-cases:
// 1. Empty queue (sql.ErrNoRows) — the early-return path; verifies
// the no-cap invariant on the SQL projection by confirming the
// SELECT still selects `body::text` and we never get a body to
// truncate.
// 2. Happy-path body round-trip — the production SELECT returns the
// long body; we assert item.Body matches byte-for-byte.
func TestDequeueNext_PreservesFullBody_NoTruncation(t *testing.T) {
t.Run("empty_queue_no_body_to_truncate", func(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
defer mockDB.Close()
prevDB := db.DB
db.DB = mockDB
t.Cleanup(func() { db.DB = prevDB })
// DequeueNext runs inside a transaction. The empty-queue
// branch returns (nil, nil) before the UPDATE; the deferred
// tx.Rollback() still runs.
mock.ExpectBegin()
mock.ExpectQuery(`SELECT id, workspace_id, caller_id, priority, body::text, method, attempts`).
WithArgs("ws-empty-queue").
WillReturnError(sql.ErrNoRows)
mock.ExpectRollback()
item, err := DequeueNext(context.Background(), "ws-empty-queue")
if err != nil {
t.Fatalf("DequeueNext on empty queue returned error: %v", err)
}
if item != nil {
t.Fatalf("DequeueNext on empty queue should return (nil, nil), got item=%+v", item)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
})
t.Run("happy_path_body_round_trip_no_truncation", func(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
defer mockDB.Close()
prevDB := db.DB
db.DB = mockDB
t.Cleanup(func() { db.DB = prevDB })
// Production SELECT returns a row whose body is the longBody
// constant. The mock SELECT must use the EXACT column list
// from DequeueNext — adding/changing columns in production
// is caught here. The queue-id is "q-1"; the UPDATE flips
// status to 'dispatched'.
mock.ExpectBegin()
mock.ExpectQuery(`SELECT id, workspace_id, caller_id, priority, body::text, method, attempts`).
WithArgs("ws-happy").
WillReturnRows(sqlmock.NewRows([]string{
"id", "workspace_id", "caller_id", "priority", "body", "method", "attempts",
}).AddRow("q-1", "ws-happy", "caller-x", 100, []byte(longBody), "message/send", 0))
mock.ExpectExec(`UPDATE a2a_queue`).
WithArgs("q-1").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
item, err := DequeueNext(context.Background(), "ws-happy")
if err != nil {
t.Fatalf("DequeueNext happy path returned error: %v", err)
}
if item == nil {
t.Fatalf("DequeueNext happy path returned nil item; want populated")
}
// The critical no-truncation assertion: the returned Body
// must match the longBody constant byte-for-byte. If a
// future change slices the body (e.g. item.Body =
// []byte(body)[:200]), this fails loudly.
if string(item.Body) != longBody {
t.Errorf("DequeueNext truncated body: got len=%d, want len=%d (longBody constant)", len(item.Body), len(longBody))
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
})
// Sanity (test-level, not sub-test): longBody must exceed the
// largest preview cap (200 bytes for canvas response_preview). If
// a future edit shrinks this constant, the no-truncation
// guarantee weakens.
if len(longBody) <= 200 {
t.Fatalf("longBody constant must exceed 200 bytes (largest preview cap); got %d", len(longBody))
}
}
// TestToolCheckTaskStatus_ReturnsFullResponseBody_NoTruncation pins
// the `check_task_status` MCP tool: it returns the FULL response_body
// via extractA2AText, never a preview cap. The handler builds a result
// map with `result["result"] = extractA2AText(responseBody)` — if a
// future change slices the body to 200 bytes (matching the canvas
// preview cap), this test fails with a substring-not-found error.
func TestToolCheckTaskStatus_ReturnsFullResponseBody_NoTruncation(t *testing.T) {
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("sqlmock.New: %v", err)
}
defer mockDB.Close()
// Build an A2A-style response_body with the artifacts format and
// a long text part. The text is the longBody constant verbatim.
artifactsJSON := `{"jsonrpc":"2.0","id":"x","result":{"artifacts":[{"parts":[{"text":"` + longBody + `"}]}]}}`
mock.ExpectQuery(`SELECT status, error_detail, response_body`).
WithArgs("caller-ws", "target-ws", "task-123").
WillReturnRows(sqlmock.NewRows([]string{"status", "error_detail", "response_body"}).
AddRow("completed", nil, []byte(artifactsJSON)))
// toolCheckTaskStatus is a method on *MCPHandler. Inject mockDB
// directly (h.database is the field used for the query). The
// broadcaster can be nil for this test — no events are emitted on
// the success path.
h := NewMCPHandler(mockDB, nil)
got, err := h.toolCheckTaskStatus(context.Background(), "caller-ws", map[string]interface{}{
"workspace_id": "target-ws",
"task_id": "task-123",
})
if err != nil {
t.Fatalf("toolCheckTaskStatus: %v", err)
}
if got == "" {
t.Fatalf("toolCheckTaskStatus returned empty string")
}
// The returned JSON must include the full text — no truncation.
// If a preview cap were wired in, the embedded text length would
// be < longBody length. We use strings.Contains on the verbatim
// longBody constant: a partial-prefix match would fail because
// the substring is the exact full body.
if !strings.Contains(got, longBody) {
t.Errorf("toolCheckTaskStatus truncated response_body: longBody (len=%d) not found verbatim in returned JSON (len=%d)", len(longBody), len(got))
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
// TestExtractA2AText_FullBodyNoCap pins the focused extractor helper
// for both A2A response shapes (result.artifacts and result.message).
// Any future change that adds a `if len(s) > 200 { s = s[:200] }` style
// cap to extractA2AText will be caught here.
func TestExtractA2AText_FullBodyNoCap(t *testing.T) {
cases := []struct {
name string
body string
}{
{
name: "artifacts_format",
body: `{"result":{"artifacts":[{"parts":[{"text":"` + longBody + `"}]}]}}`,
},
{
name: "message_format",
body: `{"result":{"message":{"parts":[{"text":"` + longBody + `"}]}}}`,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := extractA2AText([]byte(tc.body))
if got != longBody {
t.Errorf("extractA2AText: returned len=%d, want len=%d (full body). got prefix=%q", len(got), len(longBody), got[:min(len(got), 60)])
}
})
}
}