diff --git a/workspace-server/internal/handlers/delegation_ledger.go b/workspace-server/internal/handlers/delegation_ledger.go index 9a783ece..621cdbd9 100644 --- a/workspace-server/internal/handlers/delegation_ledger.go +++ b/workspace-server/internal/handlers/delegation_ledger.go @@ -53,13 +53,35 @@ func NewDelegationLedger(handle *sql.DB) *DelegationLedger { // truncatePreview caps stored preview at 4KB. The full prompt/response is // already in activity_logs.{request,response}_body — this is the at-a-glance // view for the dashboard, not a forensic record. +// +// Rune-safe: previous byte-slice form (s[:previewCap]) split on a byte +// boundary, which on a multi-byte codepoint at byte 4096 produced +// invalid UTF-8 — Postgres JSONB rejects → ledger row not inserted → +// audit gap. Issue #2962. Walks the string by rune, stops at the last +// rune-boundary index that fits inside the cap. ASCII-only strings hit +// the cap exactly; CJK/emoji strings stop slightly under the cap, +// never over. +// +// Mirrors the truncatePreviewRunes fix from agent_message_writer.go +// (#2959). Both call sites should consume a shared helper after both +// fixes have landed — followup deduplication tracked in #2962's body. const previewCap = 4096 func truncatePreview(s string) string { if len(s) <= previewCap { return s } - return s[:previewCap] + // Range over a string yields rune-boundary byte indices. Walk + // until the next index would exceed previewCap; the previous + // index is the safe truncation point. + end := 0 + for i := range s { + if i > previewCap { + break + } + end = i + } + return s[:end] } // InsertOpts is the agent's record-of-intent. Caller, callee, task preview, diff --git a/workspace-server/internal/handlers/delegation_ledger_test.go b/workspace-server/internal/handlers/delegation_ledger_test.go index 78cb4b94..48f1a3cd 100644 --- a/workspace-server/internal/handlers/delegation_ledger_test.go +++ b/workspace-server/internal/handlers/delegation_ledger_test.go @@ -5,6 +5,7 @@ import ( "errors" "strings" "testing" + "unicode/utf8" "github.com/DATA-DOG/go-sqlmock" ) @@ -121,6 +122,63 @@ func TestTruncatePreview_ExactlyAtCap(t *testing.T) { } } +// TestTruncatePreview_NeverProducesInvalidUTF8 — pins #2962. The old +// byte-slice implementation (s[:previewCap]) split on a byte boundary, +// so a multi-byte codepoint straddling byte 4096 produced invalid +// UTF-8 → Postgres JSONB rejects → ledger row not inserted → audit +// gap. Test feeds a CJK / emoji-padded string longer than previewCap +// and asserts utf8.ValidString on the result. +func TestTruncatePreview_NeverProducesInvalidUTF8(t *testing.T) { + // Build a string of '世' (3 bytes per rune in UTF-8) that's just + // past the cap. With the old implementation, the slice at byte + // previewCap would land mid-rune and ValidString would fail. + // With the rune-aware implementation, the result is always valid + // UTF-8 even if the byte length is < previewCap. + rune3 := "世" // U+4E16, 3 bytes + // Need at least previewCap/3 + 1 runes so we cross the cap with + // margin to spare. + in := strings.Repeat(rune3, (previewCap/3)+10) + if len(in) <= previewCap { + t.Fatalf("test setup: input too short (%d bytes) — must exceed previewCap=%d", len(in), previewCap) + } + got := truncatePreview(in) + if !utf8.ValidString(got) { + t.Errorf("truncatePreview produced invalid UTF-8 — JSONB will reject this row. len(got)=%d", len(got)) + } + if len(got) > previewCap { + t.Errorf("truncatePreview exceeded cap: len(got)=%d > previewCap=%d", len(got), previewCap) + } + // Defense-in-depth: the result should also be a clean rune + // prefix of the input — not some garbled sequence. + if !strings.HasPrefix(in, got) { + t.Errorf("truncatePreview should return a prefix of the input") + } +} + +// TestTruncatePreview_MultiByteAtBoundary — most-targeted regression. +// Feeds an input where the cap byte falls EXACTLY in the middle of a +// 3-byte codepoint. Pre-fix, this is the case that produces invalid +// UTF-8; post-fix, the truncate stops at the previous rune boundary. +func TestTruncatePreview_MultiByteAtBoundary(t *testing.T) { + // Build a string that's `previewCap-1` ASCII bytes followed by + // '世' (3 bytes). Total = previewCap + 2. The old impl would + // slice at byte previewCap, landing inside the '世' codepoint. + prefix := strings.Repeat("a", previewCap-1) + in := prefix + "世" + if len(in) != previewCap+2 { + t.Fatalf("test setup: expected len %d, got %d", previewCap+2, len(in)) + } + got := truncatePreview(in) + if !utf8.ValidString(got) { + t.Errorf("truncatePreview produced invalid UTF-8 at the multi-byte boundary case") + } + // Result should be exactly the ASCII prefix — '世' was past + // the cap so it must be dropped entirely. + if got != prefix { + t.Errorf("expected exact ASCII prefix, got %q (len=%d)", got[len(got)-10:], len(got)) + } +} + // ---------- SetStatus lifecycle ---------- func TestLedgerSetStatus_QueuedToDispatched(t *testing.T) {