Merge pull request #2964 from Molecule-AI/fix/delegation-ledger-utf8-truncate-2962

fix(delegation_ledger): rune-safe preview truncation (#2962)
This commit is contained in:
Hongming Wang 2026-05-05 23:34:57 +00:00 committed by GitHub
commit f1dc721eeb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 81 additions and 1 deletions

View File

@ -53,13 +53,35 @@ func NewDelegationLedger(handle *sql.DB) *DelegationLedger {
// truncatePreview caps stored preview at 4KB. The full prompt/response is
// already in activity_logs.{request,response}_body — this is the at-a-glance
// view for the dashboard, not a forensic record.
//
// Rune-safe: previous byte-slice form (s[:previewCap]) split on a byte
// boundary, which on a multi-byte codepoint at byte 4096 produced
// invalid UTF-8 — Postgres JSONB rejects → ledger row not inserted →
// audit gap. Issue #2962. Walks the string by rune, stops at the last
// rune-boundary index that fits inside the cap. ASCII-only strings hit
// the cap exactly; CJK/emoji strings stop slightly under the cap,
// never over.
//
// Mirrors the truncatePreviewRunes fix from agent_message_writer.go
// (#2959). Both call sites should consume a shared helper after both
// fixes have landed — followup deduplication tracked in #2962's body.
const previewCap = 4096
func truncatePreview(s string) string {
if len(s) <= previewCap {
return s
}
return s[:previewCap]
// Range over a string yields rune-boundary byte indices. Walk
// until the next index would exceed previewCap; the previous
// index is the safe truncation point.
end := 0
for i := range s {
if i > previewCap {
break
}
end = i
}
return s[:end]
}
// InsertOpts is the agent's record-of-intent. Caller, callee, task preview,

View File

@ -5,6 +5,7 @@ import (
"errors"
"strings"
"testing"
"unicode/utf8"
"github.com/DATA-DOG/go-sqlmock"
)
@ -121,6 +122,63 @@ func TestTruncatePreview_ExactlyAtCap(t *testing.T) {
}
}
// TestTruncatePreview_NeverProducesInvalidUTF8 — pins #2962. The old
// byte-slice implementation (s[:previewCap]) split on a byte boundary,
// so a multi-byte codepoint straddling byte 4096 produced invalid
// UTF-8 → Postgres JSONB rejects → ledger row not inserted → audit
// gap. Test feeds a CJK / emoji-padded string longer than previewCap
// and asserts utf8.ValidString on the result.
func TestTruncatePreview_NeverProducesInvalidUTF8(t *testing.T) {
// Build a string of '世' (3 bytes per rune in UTF-8) that's just
// past the cap. With the old implementation, the slice at byte
// previewCap would land mid-rune and ValidString would fail.
// With the rune-aware implementation, the result is always valid
// UTF-8 even if the byte length is < previewCap.
rune3 := "世" // U+4E16, 3 bytes
// Need at least previewCap/3 + 1 runes so we cross the cap with
// margin to spare.
in := strings.Repeat(rune3, (previewCap/3)+10)
if len(in) <= previewCap {
t.Fatalf("test setup: input too short (%d bytes) — must exceed previewCap=%d", len(in), previewCap)
}
got := truncatePreview(in)
if !utf8.ValidString(got) {
t.Errorf("truncatePreview produced invalid UTF-8 — JSONB will reject this row. len(got)=%d", len(got))
}
if len(got) > previewCap {
t.Errorf("truncatePreview exceeded cap: len(got)=%d > previewCap=%d", len(got), previewCap)
}
// Defense-in-depth: the result should also be a clean rune
// prefix of the input — not some garbled sequence.
if !strings.HasPrefix(in, got) {
t.Errorf("truncatePreview should return a prefix of the input")
}
}
// TestTruncatePreview_MultiByteAtBoundary — most-targeted regression.
// Feeds an input where the cap byte falls EXACTLY in the middle of a
// 3-byte codepoint. Pre-fix, this is the case that produces invalid
// UTF-8; post-fix, the truncate stops at the previous rune boundary.
func TestTruncatePreview_MultiByteAtBoundary(t *testing.T) {
// Build a string that's `previewCap-1` ASCII bytes followed by
// '世' (3 bytes). Total = previewCap + 2. The old impl would
// slice at byte previewCap, landing inside the '世' codepoint.
prefix := strings.Repeat("a", previewCap-1)
in := prefix + "世"
if len(in) != previewCap+2 {
t.Fatalf("test setup: expected len %d, got %d", previewCap+2, len(in))
}
got := truncatePreview(in)
if !utf8.ValidString(got) {
t.Errorf("truncatePreview produced invalid UTF-8 at the multi-byte boundary case")
}
// Result should be exactly the ASCII prefix — '世' was past
// the cap so it must be dropped entirely.
if got != prefix {
t.Errorf("expected exact ASCII prefix, got %q (len=%d)", got[len(got)-10:], len(got))
}
}
// ---------- SetStatus lifecycle ----------
func TestLedgerSetStatus_QueuedToDispatched(t *testing.T) {