fix(textutil): SSOT for rune-safe string truncation, fix 3 audit-gap bugs

Closes #2962.

## Why

Six per-package `truncate` helpers had drifted into independent
re-implementations of the same idea. Three of them (delegation.go,
memory/client/client.go, memory-backfill/verify.go) used
`s[:max] + "…"` byte-slice form, which on a multi-byte codepoint at
byte `max` produces invalid UTF-8 → Postgres `text`/`jsonb` rejects
the INSERT silently → `delegation` / `activity_logs` row never lands
→ audit gap.

Three other helpers (delegation_ledger.go #2962, agent_message_writer.go
#2959, scheduler.go #2026) had each been fixed in isolation with three
slightly different rune-safe shapes — confirming this is a class of
bug, not a single instance.

## What

New package `internal/textutil` with three rune-safe functions:

- `TruncateBytes(s, maxBytes)` — byte-cap, "…" marker. Used by 5
  callers writing into byte-bounded columns / log lines.
- `TruncateBytesNoMarker(s, maxBytes)` — byte-cap, no marker. Used by
  delegation_ledger.go where the storage already conveys "preview"
  and an extra ellipsis would push the result over the column cap.
- `TruncateRunes(s, maxRunes)` — rune-cap, "…" marker. Used by
  agent_message_writer.go where the cap is in display chars (UI
  summary), not bytes.

All three guarantee `utf8.ValidString(out)` for any `utf8.ValidString(in)`.
Inputs already invalid go through `sanitizeUTF8` at the call site
boundary (scheduler.go preserved this defense-in-depth).

## Migration map

| Old | New | Behavior change |
|---|---|---|
| `delegation_ledger.truncatePreview` | `textutil.TruncateBytesNoMarker(s, 4096)` | none |
| `agent_message_writer.truncatePreviewRunes` | `textutil.TruncateRunes(s, n)` | none |
| `scheduler.truncate` | `textutil.TruncateBytes(s, n)` | "..." → "…" (3 bytes either way; single-glyph display) |
| `delegation.truncate` | `textutil.TruncateBytes(s, n)` | bug fix + ellipsis swap |
| `memory/client.truncate` | `textutil.TruncateBytes(s, n)` | bug fix |
| `memory-backfill.truncate` | `textutil.TruncateBytes(s, n)` | bug fix |

Five separate `truncate*` helpers + their per-package tests removed.
Net: 12 files / +427 / -255.

## Tests

- `internal/textutil/truncate_test.go` — 27 table-test cases + 145
  fuzz-invariant cases asserting `utf8.ValidString` and byte-cap
  invariants on every output.
- `delegation_ledger_test.go TestLedgerInsert_TruncatesOversizedPreview`
  strengthened with `capValidUTF8Matcher` so the SQL-write argument
  is asserted to be valid UTF-8 + within cap (not just `AnyArg()`).
  Mutation-tested: replacing the SSOT call with byte-slice form makes
  this test fail loud.

## Compatibility

- All callers internal; no external API surface change.
- Ellipsis swap "..." → "…": same byte budget (3 bytes), single-glyph
  display. No alerting/grep on either marker in this codebase
  (verified). Canvas renders both correctly.
- DB column widths unchanged (4096 / 80 / 200 / 256 / 300 — all
  preserved in the migrations).

## Security

Fixes a silent INSERT-failure mode that hid `activity_logs` /
`delegations` rows containing peer-controlled text. The class of input
that triggered it (CJK, emoji, accented Latin) is normal user content,
not malicious — but the symptom (audit gap) makes incident
reconstruction harder. Helper is pure-function over `string`; no
secrets / PII / auth handling involved. Untrusted input is handled
identically to before, just rune-aligned now.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-05-05 23:01:21 -07:00
parent c53155ec5f
commit 656a02fae4
14 changed files with 427 additions and 255 deletions

View File

@ -21,6 +21,7 @@ import (
"os" "os"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract" "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
) )
// verifyConfig is the typed dependency bundle for verifyParity. // verifyConfig is the typed dependency bundle for verifyParity.
@ -121,7 +122,7 @@ func verifyParity(ctx context.Context, cfg verifyConfig, stdout *os.File) (*veri
matched := true matched := true
for _, c := range legacy { for _, c := range legacy {
if pluginContents[c] == 0 { if pluginContents[c] == 0 {
fmt.Fprintf(stdout, "[mismatch] workspace=%s missing-from-plugin content=%q\n", wsID, truncate(c, 80)) fmt.Fprintf(stdout, "[mismatch] workspace=%s missing-from-plugin content=%q\n", wsID, textutil.TruncateBytes(c, 80))
matched = false matched = false
break break
} }
@ -192,9 +193,4 @@ func queryLegacyMemories(ctx context.Context, db *sql.DB, workspaceID string) ([
return out, rows.Err() return out, rows.Err()
} }
func truncate(s string, n int) string { // truncation moved to internal/textutil.TruncateBytes (#2962 SSOT).
if len(s) <= n {
return s
}
return s[:n] + "…"
}

View File

@ -349,16 +349,8 @@ func TestVerifyParity_PickSampleError(t *testing.T) {
} }
} }
// --- Truncate --- // Truncate moved to internal/textutil — coverage in
// internal/textutil/truncate_test.go (TestTruncateBytes_RuneBoundary).
func TestVerifyTruncate(t *testing.T) {
if got := truncate("short", 10); got != "short" {
t.Errorf("got %q", got)
}
if got := truncate(strings.Repeat("a", 200), 10); !strings.HasSuffix(got, "…") {
t.Errorf("expected ellipsis: %q", got)
}
}
// --- CLI: -verify mode --- // --- CLI: -verify mode ---

View File

@ -22,6 +22,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events" "github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
) )
// extractIdempotencyKey pulls params.message.messageId out of an A2A JSON-RPC // extractIdempotencyKey pulls params.message.messageId out of an A2A JSON-RPC
@ -420,7 +421,7 @@ func (h *WorkspaceHandler) stitchDrainResponseToDelegation(ctx context.Context,
AND method = 'delegate_result' AND method = 'delegate_result'
AND target_id = $4 AND target_id = $4
AND response_body->>'delegation_id' = $5 AND response_body->>'delegation_id' = $5
`, "Delegation completed ("+truncate(responseText, 80)+")", string(respJSON), `, "Delegation completed ("+textutil.TruncateBytes(responseText, 80)+")", string(respJSON),
sourceID, targetID, delegationID) sourceID, targetID, delegationID)
if err != nil { if err != nil {
log.Printf("A2AQueue drain stitch: update failed for delegation %s: %v", delegationID, err) log.Printf("A2AQueue drain stitch: update failed for delegation %s: %v", delegationID, err)
@ -439,7 +440,7 @@ func (h *WorkspaceHandler) stitchDrainResponseToDelegation(ctx context.Context,
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{ h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{
"delegation_id": delegationID, "delegation_id": delegationID,
"target_id": targetID, "target_id": targetID,
"response_preview": truncate(responseText, 200), "response_preview": textutil.TruncateBytes(responseText, 200),
"via": "queue_drain", "via": "queue_drain",
}) })
} }

View File

@ -42,9 +42,9 @@ import (
"errors" "errors"
"fmt" "fmt"
"log" "log"
"unicode/utf8"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events" "github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
) )
// ErrWorkspaceNotFound is returned by AgentMessageWriter.Send when the // ErrWorkspaceNotFound is returned by AgentMessageWriter.Send when the
@ -54,36 +54,6 @@ import (
// timeout) surface as wrapped errors and should be treated as 503. // timeout) surface as wrapped errors and should be treated as 503.
var ErrWorkspaceNotFound = errors.New("agent_message: workspace not found") var ErrWorkspaceNotFound = errors.New("agent_message: workspace not found")
// truncatePreviewRunes returns at most maxRunes runes of s, plus an ellipsis
// when truncated. Operates on the rune (codepoint) boundary instead of
// byte indices — the previous byte-slice version produced invalid UTF-8
// when maxRunes landed mid-codepoint (CJK, emoji, accented characters
// in agent-authored chat messages), and Postgres JSONB rejects invalid
// UTF-8, dropping the activity_log INSERT silently. The persistence
// failure log fires but the message vanishes from chat history — the
// exact regression class the SSOT consolidation was built to prevent.
//
// maxRunes is in runes, not bytes — `truncatePreviewRunes("你好", 1)` returns
// `"你…"`, not `"\xe4…"`. Set the cap on a UI-friendly basis (visible
// character count, not stored byte count); 80 runes covers the
// activity_logs.summary column comfortably.
func truncatePreviewRunes(s string, maxRunes int) string {
if utf8.RuneCountInString(s) <= maxRunes {
return s
}
// Walk runes until we've consumed maxRunes; cut at that byte index.
count := 0
cut := len(s)
for i := range s {
if count == maxRunes {
cut = i
break
}
count++
}
return s[:cut] + "…"
}
// AgentMessageAttachment is one file attached to an agent → user // AgentMessageAttachment is one file attached to an agent → user
// message. Identical to handlers.NotifyAttachment in field set; kept // message. Identical to handlers.NotifyAttachment in field set; kept
// distinct so the writer's API doesn't import a handler type with HTTP // distinct so the writer's API doesn't import a handler type with HTTP
@ -186,7 +156,7 @@ func (w *AgentMessageWriter) Send(
respPayload["parts"] = fileParts respPayload["parts"] = fileParts
} }
respJSON, _ := json.Marshal(respPayload) respJSON, _ := json.Marshal(respPayload)
preview := truncatePreviewRunes(message, 80) preview := textutil.TruncateRunes(message, 80)
if _, err := w.db.ExecContext(ctx, ` if _, err := w.db.ExecContext(ctx, `
INSERT INTO activity_logs (workspace_id, activity_type, method, summary, response_body, status) INSERT INTO activity_logs (workspace_id, activity_type, method, summary, response_body, status)
VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok') VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok')

View File

@ -331,45 +331,11 @@ func TestAgentMessageWriter_Send_DBErrorOnLookupReturnsWrapped(t *testing.T) {
} }
} }
// TestTruncatePreviewRunes_RuneBoundary pins the multi-byte-safe // Helper-level truncate tests now live in
// truncation. The previous byte-slice version produced invalid UTF-8 // internal/textutil/truncate_test.go (TestTruncateRunes). The
// when the cut landed mid-codepoint (CJK, emoji, accented), and // integration-level coverage that exercises the agent_message_writer
// Postgres JSONB rejects invalid UTF-8 — INSERT fails, log.Printf // path with non-ASCII content is TestAgentMessageWriter_Send_NonASCIIMessagePersists
// fires, message vanishes from chat history. Per memory // below.
// feedback_assert_exact_not_substring.md, pin the boundary cases
// directly.
func TestTruncatePreviewRunes_RuneBoundary(t *testing.T) {
cases := []struct {
name string
in string
max int
want string
}{
{"under-max ASCII", "hi", 80, "hi"},
{"under-max CJK", "你好", 80, "你好"},
{"exactly-at-max", "abcde", 5, "abcde"},
{"truncate ASCII", "abcdefghij", 5, "abcde…"},
{"truncate CJK at rune boundary", "你好世界你好世界", 4, "你好世界…"},
{"truncate emoji at rune boundary", "😀😀😀😀😀😀", 3, "😀😀😀…"},
// The pre-fix bug shape: byte-slice on non-ASCII would have
// mangled the codepoint here. With rune-boundary truncation
// the result is well-formed UTF-8.
{"non-zero with emoji prefix", "🚀abcdefghijk", 5, "🚀abcd…"},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
got := truncatePreviewRunes(c.in, c.max)
if got != c.want {
t.Errorf("truncatePreviewRunes(%q, %d) = %q, want %q", c.in, c.max, got, c.want)
}
// Always-valid UTF-8 invariant. A byte-slice truncation
// could leave partial codepoints; this version must not.
if !utf8.ValidString(got) {
t.Errorf("truncatePreviewRunes(%q, %d) returned invalid UTF-8: %q", c.in, c.max, got)
}
})
}
}
// TestAgentMessageWriter_Send_NonASCIIMessagePersists pins the end-to-end // TestAgentMessageWriter_Send_NonASCIIMessagePersists pins the end-to-end
// path for non-ASCII messages — the original reno-stars regression // path for non-ASCII messages — the original reno-stars regression

View File

@ -10,6 +10,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events" "github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/google/uuid" "github.com/google/uuid"
) )
@ -167,7 +168,7 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationSent), sourceID, map[string]interface{}{ h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationSent), sourceID, map[string]interface{}{
"delegation_id": delegationID, "delegation_id": delegationID,
"target_id": body.TargetID, "target_id": body.TargetID,
"task_preview": truncate(body.Task, 100), "task_preview": textutil.TruncateBytes(body.Task, 100),
}) })
resp := gin.H{ resp := gin.H{
@ -407,7 +408,7 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
if _, err := db.DB.ExecContext(ctx, ` if _, err := db.DB.ExecContext(ctx, `
INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, target_id, summary, response_body, status) INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, target_id, summary, response_body, status)
VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4, $5::jsonb, 'completed') VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4, $5::jsonb, 'completed')
`, sourceID, sourceID, targetID, "Delegation completed ("+truncate(responseText, 80)+")", string(respJSON)); err != nil { `, sourceID, sourceID, targetID, "Delegation completed ("+textutil.TruncateBytes(responseText, 80)+")", string(respJSON)); err != nil {
log.Printf("Delegation %s: failed to insert success log: %v", delegationID, err) log.Printf("Delegation %s: failed to insert success log: %v", delegationID, err)
} }
@ -423,7 +424,7 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{ h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{
"delegation_id": delegationID, "delegation_id": delegationID,
"target_id": targetID, "target_id": targetID,
"response_preview": truncate(responseText, 200), "response_preview": textutil.TruncateBytes(responseText, 200),
}) })
// RFC #2829 PR-2 result-push (see UpdateStatus for rationale). // RFC #2829 PR-2 result-push (see UpdateStatus for rationale).
pushDelegationResultToInbox(ctx, sourceID, delegationID, "completed", responseText, "") pushDelegationResultToInbox(ctx, sourceID, delegationID, "completed", responseText, "")
@ -506,7 +507,7 @@ func (h *DelegationHandler) Record(c *gin.Context) {
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationSent), sourceID, map[string]interface{}{ h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationSent), sourceID, map[string]interface{}{
"delegation_id": body.DelegationID, "delegation_id": body.DelegationID,
"target_id": body.TargetID, "target_id": body.TargetID,
"task_preview": truncate(body.Task, 100), "task_preview": textutil.TruncateBytes(body.Task, 100),
}) })
c.JSON(http.StatusAccepted, gin.H{ c.JSON(http.StatusAccepted, gin.H{
@ -555,12 +556,12 @@ func (h *DelegationHandler) UpdateStatus(c *gin.Context) {
if _, err := db.DB.ExecContext(ctx, ` if _, err := db.DB.ExecContext(ctx, `
INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, summary, response_body, status) INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, summary, response_body, status)
VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4::jsonb, 'completed') VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4::jsonb, 'completed')
`, sourceID, sourceID, "Delegation completed ("+truncate(body.ResponsePreview, 80)+")", string(respJSON)); err != nil { `, sourceID, sourceID, "Delegation completed ("+textutil.TruncateBytes(body.ResponsePreview, 80)+")", string(respJSON)); err != nil {
log.Printf("Delegation UpdateStatus: result insert failed for %s: %v", delegationID, err) log.Printf("Delegation UpdateStatus: result insert failed for %s: %v", delegationID, err)
} }
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{ h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{
"delegation_id": delegationID, "delegation_id": delegationID,
"response_preview": truncate(body.ResponsePreview, 200), "response_preview": textutil.TruncateBytes(body.ResponsePreview, 200),
}) })
// RFC #2829 PR-2 result-push: when the gate is on, also write an // RFC #2829 PR-2 result-push: when the gate is on, also write an
// a2a_receive row so the caller's inbox poller surfaces this to // a2a_receive row so the caller's inbox poller surfaces this to
@ -626,7 +627,7 @@ func (h *DelegationHandler) ListDelegations(c *gin.Context) {
entry["error"] = errorDetail entry["error"] = errorDetail
} }
if responseBody != "" { if responseBody != "" {
entry["response_preview"] = truncate(responseBody, 300) entry["response_preview"] = textutil.TruncateBytes(responseBody, 300)
} }
delegations = append(delegations, entry) delegations = append(delegations, entry)
} }
@ -727,9 +728,3 @@ func extractResponseText(body []byte) string {
return string(body) return string(body)
} }
func truncate(s string, max int) string {
if len(s) <= max {
return s
}
return s[:max] + "..."
}

View File

@ -8,6 +8,7 @@ import (
"time" "time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
) )
// delegation_ledger.go — durable per-task ledger for A2A delegation // delegation_ledger.go — durable per-task ledger for A2A delegation
@ -50,40 +51,15 @@ func NewDelegationLedger(handle *sql.DB) *DelegationLedger {
return &DelegationLedger{db: handle} return &DelegationLedger{db: handle}
} }
// truncatePreview caps stored preview at 4KB. The full prompt/response is // previewCap caps stored preview at 4KB. The full prompt/response is
// already in activity_logs.{request,response}_body — this is the at-a-glance // already in activity_logs.{request,response}_body — this is the
// view for the dashboard, not a forensic record. // at-a-glance view for the dashboard, not a forensic record.
// //
// Rune-safe: previous byte-slice form (s[:previewCap]) split on a byte // Truncation goes through textutil.TruncateBytesNoMarker so it's
// boundary, which on a multi-byte codepoint at byte 4096 produced // rune-safe (#2026 / #2959 / #2962 bug class: byte-slice mid-codepoint
// invalid UTF-8 — Postgres JSONB rejects → ledger row not inserted → // → Postgres JSONB rejects → silent INSERT failure → audit gap).
// audit gap. Issue #2962. Walks the string by rune, stops at the last
// rune-boundary index that fits inside the cap. ASCII-only strings hit
// the cap exactly; CJK/emoji strings stop slightly under the cap,
// never over.
//
// Mirrors the truncatePreviewRunes fix from agent_message_writer.go
// (#2959). Both call sites should consume a shared helper after both
// fixes have landed — followup deduplication tracked in #2962's body.
const previewCap = 4096 const previewCap = 4096
func truncatePreview(s string) string {
if len(s) <= previewCap {
return s
}
// Range over a string yields rune-boundary byte indices. Walk
// until the next index would exceed previewCap; the previous
// index is the safe truncation point.
end := 0
for i := range s {
if i > previewCap {
break
}
end = i
}
return s[:end]
}
// InsertOpts is the agent's record-of-intent. Caller, callee, task preview, // InsertOpts is the agent's record-of-intent. Caller, callee, task preview,
// and the chosen delegation_id are required; idempotency_key is optional. // and the chosen delegation_id are required; idempotency_key is optional.
type InsertOpts struct { type InsertOpts struct {
@ -118,7 +94,7 @@ func (l *DelegationLedger) Insert(ctx context.Context, opts InsertOpts) {
) VALUES ($1, $2, $3, $4, 'queued', $5, $6) ) VALUES ($1, $2, $3, $4, 'queued', $5, $6)
ON CONFLICT (delegation_id) DO NOTHING ON CONFLICT (delegation_id) DO NOTHING
`, opts.DelegationID, opts.CallerID, opts.CalleeID, `, opts.DelegationID, opts.CallerID, opts.CalleeID,
truncatePreview(opts.TaskPreview), deadline, idemArg) textutil.TruncateBytesNoMarker(opts.TaskPreview, previewCap), deadline, idemArg)
if err != nil { if err != nil {
log.Printf("delegation_ledger Insert(%s): %v", opts.DelegationID, err) log.Printf("delegation_ledger Insert(%s): %v", opts.DelegationID, err)
} }
@ -197,7 +173,7 @@ func (l *DelegationLedger) SetStatus(ctx context.Context,
result_preview = NULLIF($4, ''), result_preview = NULLIF($4, ''),
updated_at = now() updated_at = now()
WHERE delegation_id = $1 WHERE delegation_id = $1
`, delegationID, status, errorDetail, truncatePreview(resultPreview)) `, delegationID, status, errorDetail, textutil.TruncateBytesNoMarker(resultPreview, previewCap))
return err return err
} }

View File

@ -2,6 +2,7 @@ package handlers
import ( import (
"context" "context"
"database/sql/driver"
"errors" "errors"
"strings" "strings"
"testing" "testing"
@ -74,15 +75,20 @@ func TestLedgerInsert_TruncatesOversizedPreview(t *testing.T) {
mock := setupTestDB(t) mock := setupTestDB(t)
l := NewDelegationLedger(nil) l := NewDelegationLedger(nil)
huge := strings.Repeat("x", 10_000) // > previewCap // 4096 / 3 = 1365 runes; +10 for margin so we cross the cap.
// '世' is 3 bytes in UTF-8 (worst case for byte-cap rune walking).
huge := strings.Repeat("世", (previewCap/3)+10)
if len(huge) <= previewCap {
t.Fatalf("test setup: input too short (%d bytes) — must exceed previewCap=%d", len(huge), previewCap)
}
mock.ExpectExec(`INSERT INTO delegations`). mock.ExpectExec(`INSERT INTO delegations`).
WithArgs( WithArgs(
"deleg-big", "deleg-big",
"c", "ca", "c", "ca",
sqlmock.AnyArg(), // truncated preview — verify length below via custom matcher capValidUTF8Matcher{cap: previewCap}, // truncated preview must fit cap AND be valid UTF-8
sqlmock.AnyArg(), sqlmock.AnyArg(), // deadline
sqlmock.AnyArg(), sqlmock.AnyArg(), // idempotency_key
). ).
WillReturnResult(sqlmock.NewResult(0, 1)) WillReturnResult(sqlmock.NewResult(0, 1))
@ -97,87 +103,28 @@ func TestLedgerInsert_TruncatesOversizedPreview(t *testing.T) {
} }
} }
// ---------- truncatePreview unit ---------- // capValidUTF8Matcher pins #2962 at the integration boundary: the
// preview that lands in the INSERT MUST be valid UTF-8 (else Postgres
// JSONB rejects → silent audit gap) AND fit within the byte cap. Pre-
// migration this would have asserted on the corrupted "世" mid-codepoint
// byte slice; post-migration it asserts the truncated preview is a
// clean rune-aligned prefix.
type capValidUTF8Matcher struct{ cap int }
func TestTruncatePreview_UnderCap(t *testing.T) { func (m capValidUTF8Matcher) Match(v driver.Value) bool {
in := "short" s, ok := v.(string)
if got := truncatePreview(in); got != in { if !ok {
t.Errorf("under-cap should passthrough; got %q", got) return false
} }
return len(s) <= m.cap && utf8.ValidString(s)
} }
func TestTruncatePreview_OverCapTruncatesAtBoundary(t *testing.T) { // Helper-level truncation tests now live in
in := strings.Repeat("a", previewCap+100) // internal/textutil/truncate_test.go. The integration-level path
got := truncatePreview(in) // (TestLedgerInsert_TruncatesOversizedPreview above) still exercises
if len(got) != previewCap { // the previewCap boundary through the SQL write so a regression in
t.Errorf("expected len=%d got len=%d", previewCap, len(got)) // the wiring (wrong cap, wrong helper, missing call) would still go
} // red here.
}
func TestTruncatePreview_ExactlyAtCap(t *testing.T) {
in := strings.Repeat("a", previewCap)
got := truncatePreview(in)
if got != in {
t.Errorf("at-cap should passthrough unchanged")
}
}
// TestTruncatePreview_NeverProducesInvalidUTF8 — pins #2962. The old
// byte-slice implementation (s[:previewCap]) split on a byte boundary,
// so a multi-byte codepoint straddling byte 4096 produced invalid
// UTF-8 → Postgres JSONB rejects → ledger row not inserted → audit
// gap. Test feeds a CJK / emoji-padded string longer than previewCap
// and asserts utf8.ValidString on the result.
func TestTruncatePreview_NeverProducesInvalidUTF8(t *testing.T) {
// Build a string of '世' (3 bytes per rune in UTF-8) that's just
// past the cap. With the old implementation, the slice at byte
// previewCap would land mid-rune and ValidString would fail.
// With the rune-aware implementation, the result is always valid
// UTF-8 even if the byte length is < previewCap.
rune3 := "世" // U+4E16, 3 bytes
// Need at least previewCap/3 + 1 runes so we cross the cap with
// margin to spare.
in := strings.Repeat(rune3, (previewCap/3)+10)
if len(in) <= previewCap {
t.Fatalf("test setup: input too short (%d bytes) — must exceed previewCap=%d", len(in), previewCap)
}
got := truncatePreview(in)
if !utf8.ValidString(got) {
t.Errorf("truncatePreview produced invalid UTF-8 — JSONB will reject this row. len(got)=%d", len(got))
}
if len(got) > previewCap {
t.Errorf("truncatePreview exceeded cap: len(got)=%d > previewCap=%d", len(got), previewCap)
}
// Defense-in-depth: the result should also be a clean rune
// prefix of the input — not some garbled sequence.
if !strings.HasPrefix(in, got) {
t.Errorf("truncatePreview should return a prefix of the input")
}
}
// TestTruncatePreview_MultiByteAtBoundary — most-targeted regression.
// Feeds an input where the cap byte falls EXACTLY in the middle of a
// 3-byte codepoint. Pre-fix, this is the case that produces invalid
// UTF-8; post-fix, the truncate stops at the previous rune boundary.
func TestTruncatePreview_MultiByteAtBoundary(t *testing.T) {
// Build a string that's `previewCap-1` ASCII bytes followed by
// '世' (3 bytes). Total = previewCap + 2. The old impl would
// slice at byte previewCap, landing inside the '世' codepoint.
prefix := strings.Repeat("a", previewCap-1)
in := prefix + "世"
if len(in) != previewCap+2 {
t.Fatalf("test setup: expected len %d, got %d", previewCap+2, len(in))
}
got := truncatePreview(in)
if !utf8.ValidString(got) {
t.Errorf("truncatePreview produced invalid UTF-8 at the multi-byte boundary case")
}
// Result should be exactly the ASCII prefix — '世' was past
// the cap so it must be dropped entirely.
if got != prefix {
t.Errorf("expected exact ASCII prefix, got %q (len=%d)", got[len(got)-10:], len(got))
}
}
// ---------- SetStatus lifecycle ---------- // ---------- SetStatus lifecycle ----------

View File

@ -35,6 +35,7 @@ import (
"time" "time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract" "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
) )
const ( const (
@ -340,7 +341,7 @@ func decodeError(resp *http.Response) error {
// have rather than dropping it. // have rather than dropping it.
return &contract.Error{ return &contract.Error{
Code: httpStatusToCode(resp.StatusCode), Code: httpStatusToCode(resp.StatusCode),
Message: fmt.Sprintf("status %d: %s", resp.StatusCode, truncate(string(body), 256)), Message: fmt.Sprintf("status %d: %s", resp.StatusCode, textutil.TruncateBytes(string(body), 256)),
} }
} }
return &e return &e
@ -359,12 +360,7 @@ func httpStatusToCode(status int) contract.ErrorCode {
} }
} }
func truncate(s string, n int) string { // truncation moved to internal/textutil.TruncateBytes (#2962 SSOT).
if len(s) <= n {
return s
}
return s[:n] + "…"
}
// --- Circuit breaker --- // --- Circuit breaker ---

View File

@ -499,14 +499,10 @@ func TestHttpStatusToCode(t *testing.T) {
} }
} }
func TestTruncate(t *testing.T) { // Truncate moved to internal/textutil — coverage lives in
if got := truncate("short", 10); got != "short" { // internal/textutil/truncate_test.go (TestTruncateBytes_RuneBoundary).
t.Errorf("got %q", got) // memory/client just calls it as a wire-shape helper for error
} // messages; no client-specific behavior to pin here.
if got := truncate(strings.Repeat("a", 300), 10); !strings.HasSuffix(got, "…") {
t.Errorf("expected ellipsis: %q", got)
}
}
// --- Circuit breaker --- // --- Circuit breaker ---

View File

@ -17,6 +17,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events" "github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics" "github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/supervised" "github.com/Molecule-AI/molecule-monorepo/platform/internal/supervised"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
) )
const ( const (
@ -522,7 +523,7 @@ func (s *Scheduler) fireSchedule(ctx context.Context, sched scheduleRow) {
"schedule_id": sched.ID, "schedule_id": sched.ID,
"schedule_name": sched.Name, "schedule_name": sched.Name,
"cron_expr": sched.CronExpr, "cron_expr": sched.CronExpr,
"prompt": sanitizeUTF8(truncate(sched.Prompt, 200)), "prompt": sanitizeUTF8(textutil.TruncateBytes(sched.Prompt, 200)),
}) })
// #152: persist lastError into error_detail on the activity_logs row // #152: persist lastError into error_detail on the activity_logs row
// so GET /workspaces/:id/schedules/:id/history can surface why a run // so GET /workspaces/:id/schedules/:id/history can surface why a run
@ -807,27 +808,10 @@ func isEmptyResponse(body []byte) bool {
return false return false
} }
// truncate shortens s to at most maxLen bytes, appending "..." if truncated. // truncation moved to internal/textutil.TruncateBytes (#2962 SSOT).
// #2026: UTF-8 safe — byte-slicing at maxLen-3 would split multi-byte runes // The original #2026 fix lives in textutil's package docs as canonical
// (observed: U+2026 `…` = 0xe2 0x80 0xa6, sliced mid-char, concatenated with // prior art. Ellipsis was previously "..." (3 ASCII bytes); the SSOT
// "..." producing 0xe2 0x80 0x2e — rejected by Postgres as invalid UTF-8, // uses "…" (3 UTF-8 bytes) — same byte budget, single-glyph display.
// which wedged the activity_logs INSERT with no deadline and stalled the
// scheduler).
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
cut := maxLen - 3
if cut < 0 {
cut = 0
}
// Back up to a rune boundary — utf8.RuneStart returns true for any
// non-continuation byte (ASCII, or the lead byte of a multi-byte rune).
for cut > 0 && !utf8.RuneStart(s[cut]) {
cut--
}
return s[:cut] + "..."
}
// short returns up to n leading characters of s without panicking when s is // short returns up to n leading characters of s without panicking when s is
// shorter than n. Used to safely display UUID prefixes in log lines where // shorter than n. Used to safely display UUID prefixes in log lines where

View File

@ -10,6 +10,7 @@ import (
sqlmock "github.com/DATA-DOG/go-sqlmock" sqlmock "github.com/DATA-DOG/go-sqlmock"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
) )
// errDBDown is a sentinel error used by tests to simulate a DB connection failure. // errDBDown is a sentinel error used by tests to simulate a DB connection failure.
@ -618,7 +619,7 @@ func TestTruncate_utf8Safe_regression2026(t *testing.T) {
filler += "a" filler += "a"
} }
input := filler + "…xxx" // 195 ASCII + 3-byte rune + 3 trailing input := filler + "…xxx" // 195 ASCII + 3-byte rune + 3 trailing
out := truncate(input, 200) out := textutil.TruncateBytes(input, 200)
if !utf8.ValidString(out) { if !utf8.ValidString(out) {
t.Fatalf("truncate produced invalid UTF-8: %x", []byte(out)) t.Fatalf("truncate produced invalid UTF-8: %x", []byte(out))

View File

@ -0,0 +1,130 @@
// Package textutil provides string-handling helpers that respect UTF-8
// rune boundaries.
//
// Why this package exists
// -----------------------
// `s[:max]` truncates by BYTES; for any string with a multi-byte
// codepoint at byte `max` (CJK, emoji, accented Latin), the slice
// produces invalid UTF-8. Postgres `text` and `jsonb` columns reject
// invalid UTF-8 with `invalid byte sequence for encoding "UTF8"`,
// which silently fails the INSERT and holds the surrounding tx open
// — a class of audit-gap that has bitten this codebase three times
// (scheduler.go #2026, agent_message_writer.go #2959,
// delegation_ledger.go #2962). Six per-package helpers had
// independently re-implemented this logic with varying correctness;
// this package is the single source of truth.
//
// Use sites
// ---------
// - DB writes whose column is bytes-bounded (jsonb preview field,
// varchar(N)): TruncateBytes / TruncateBytesNoMarker.
// - UI summaries whose cap is in display chars, not bytes:
// TruncateRunes.
//
// All functions guarantee `utf8.ValidString(out) == true` for any
// `s` where `utf8.ValidString(s) == true`. Inputs that are already
// invalid UTF-8 should be sanitized at the trust boundary (e.g. via
// `strings.ToValidUTF8`); this package does not silently fix
// upstream invalid input.
package textutil
import "unicode/utf8"
// ellipsis is the truncation marker. U+2026 HORIZONTAL ELLIPSIS —
// 3 bytes in UTF-8, 1 rune, 1 display column. Standardized across
// the codebase to avoid the "..." (3 ASCII chars) vs "…" (1 char)
// inconsistency the per-package helpers had drifted into.
const ellipsis = "…"
// TruncateBytes returns s if `len(s) <= maxBytes`, otherwise returns
// the longest rune-aligned prefix of s that fits in `maxBytes - 3`
// bytes followed by the ellipsis marker. The returned string is
// always at most `maxBytes` bytes long.
//
// Example: TruncateBytes("你好世界你好", 10) returns "你好世…" (9 bytes)
// — three "你好" runes (each 3 bytes = 9 bytes) plus "…" (3 bytes)
// would be 12 bytes, so we walk back to "你好" (6 bytes) + "…" (3) = 9.
//
// Edge cases:
// - maxBytes <= 0: returns "" (no room even for input or marker)
// - maxBytes < len(ellipsis): returns "" (can't add marker without
// exceeding cap, and we won't return a marker-less truncation
// here — caller wanted a marker; use TruncateBytesNoMarker if
// they don't)
// - s contains invalid UTF-8: continuation bytes are walked over
// same as valid runes; the result preserves the (invalid) input
// bytes up to the truncation point. Caller is responsible for
// pre-sanitizing if Postgres validity is required.
func TruncateBytes(s string, maxBytes int) string {
if len(s) <= maxBytes {
return s
}
if maxBytes < len(ellipsis) {
return ""
}
// Reserve room for the marker, then walk back to the nearest
// rune boundary at or below the cut point.
cut := maxBytes - len(ellipsis)
for cut > 0 && !utf8.RuneStart(s[cut]) {
cut--
}
return s[:cut] + ellipsis
}
// TruncateBytesNoMarker returns s if `len(s) <= maxBytes`, otherwise
// returns the longest rune-aligned prefix of s that fits in
// `maxBytes` bytes. No marker is appended — useful when the caller's
// storage already conveys "preview" / "snippet" semantics and an
// extra ellipsis would push the result over a hard column cap.
//
// Example: TruncateBytesNoMarker("hello world", 5) returns "hello".
//
// Edge case: maxBytes <= 0 returns "".
func TruncateBytesNoMarker(s string, maxBytes int) string {
if len(s) <= maxBytes {
return s
}
if maxBytes <= 0 {
return ""
}
cut := maxBytes
for cut > 0 && !utf8.RuneStart(s[cut]) {
cut--
}
return s[:cut]
}
// TruncateRunes returns s if it has at most maxRunes runes, otherwise
// returns the first maxRunes runes followed by the ellipsis marker.
// Use this when the cap is in user-visible characters (UI summary,
// activity feed line) rather than bytes (DB column).
//
// Example: TruncateRunes("你好世界你好", 3) returns "你好世…" — three
// runes plus the marker, regardless of the resulting byte count.
//
// Edge case: maxRunes <= 0 returns "" (caller asked for no content).
func TruncateRunes(s string, maxRunes int) string {
if maxRunes <= 0 {
return ""
}
// Fast path: if every byte is a single-byte rune, the byte-length
// upper-bounds the rune count. This avoids a runes alloc for the
// common ASCII case where the input fits.
if len(s) <= maxRunes {
return s
}
// Walk by rune boundaries; stop at the (maxRunes+1)-th rune so we
// know the cut point and that truncation is needed.
count := 0
for i := range s {
if count == maxRunes {
return s[:i] + ellipsis
}
count++
}
// Reachable when the byte count exceeded maxRunes but the actual
// rune count didn't (e.g. all single-byte runes that just happen
// to be more than maxRunes). The fast path catches len(s) <=
// maxRunes; this catches maxRunes < runeCount(s) <= len(s).
return s
}

View File

@ -0,0 +1,222 @@
package textutil
import (
"testing"
"unicode/utf8"
)
// TestTruncateBytes_RuneBoundary pins the byte-cap, marker-bearing
// truncation path. Every case asserts both:
// 1. the exact expected output (so a refactor that flips ellipsis or
// drops a rune is caught), and
// 2. utf8.ValidString on the output (the invariant that the bug class
// in #2026/#2959/#2962 violated by slicing mid-codepoint).
//
// Per memory feedback_assert_exact_not_substring.md, asserts are exact
// equality, not substring matches.
func TestTruncateBytes_RuneBoundary(t *testing.T) {
cases := []struct {
name string
in string
maxBytes int
want string
}{
// Under-cap: returns input verbatim.
{"empty", "", 10, ""},
{"under-cap ASCII", "hi", 10, "hi"},
{"exactly-at-cap ASCII", "hello", 5, "hello"},
{"under-cap CJK", "你好", 10, "你好"}, // 6 bytes
{"exactly-at-cap CJK", "你好", 6, "你好"},
// Over-cap ASCII: trims to (maxBytes - 3) bytes + "…".
{"over-cap ASCII", "abcdefghij", 6, "abc…"},
// Over-cap CJK where cut would land mid-codepoint. The
// pre-fix bug shape: 7 - 3 = 4, but byte 4 is mid-"好"
// (好 is bytes 3..5 of "你好世界"). Walking back to byte 3
// (start of 好 — wait, that IS the start). Actually 你=0..2,
// 好=3..5, 世=6..8, 界=9..11. Cut=4, walk back to 3 (start
// of 好), then s[:3]="你", + "…" = "你…" (3+3=6 bytes ≤ 7).
{"over-cap CJK lands mid-codepoint", "你好世界", 7, "你…"},
// Over-cap CJK where cut lands exactly on rune boundary.
// 9 - 3 = 6, byte 6 is start of 世. Walk-back is no-op.
// s[:6]="你好" + "…" = "你好…" (9 bytes).
{"over-cap CJK rune-aligned", "你好世界", 9, "你好…"},
// Emoji: 😀 is 4 bytes (U+1F600). 7 - 3 = 4, byte 4 is start
// of second 😀 — walk-back no-op. s[:4]="😀" + "…" = "😀…".
{"over-cap emoji", "😀😀😀", 7, "😀…"},
// Mixed ASCII + CJK. "ab你好世界": a(1) b(1) 你(3) 好(3) 世(3) 界(3) = 14 bytes.
// maxBytes=8, 8-3=5. byte 5 is mid-好. Walk back to start of 好 = byte 5? Let me
// recompute: a=0, b=1, 你=2..4, 好=5..7, 世=8..10. Byte 5 IS start of 好.
// Walk-back keeps cut at 5. s[:5] = "ab你" + "…" = "ab你…" (8 bytes).
{"mixed prefix ASCII over-cap CJK", "ab你好世界", 8, "ab你…"},
// Pathological: maxBytes too small to even fit the marker.
{"cap below ellipsis len", "hello", 2, ""},
{"cap zero", "hello", 0, ""},
{"cap negative", "hello", -1, ""},
// Cap exactly == ellipsis len: no room for content, but
// the marker fits. This returns "" (cut = 0, s[:0] = "").
{"cap equals ellipsis len", "hello", 3, "…"},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
got := TruncateBytes(c.in, c.maxBytes)
if got != c.want {
t.Errorf("TruncateBytes(%q, %d) = %q, want %q", c.in, c.maxBytes, got, c.want)
}
if !utf8.ValidString(got) {
t.Errorf("TruncateBytes(%q, %d) returned invalid UTF-8: %q", c.in, c.maxBytes, got)
}
// Output never exceeds the byte cap (when one is set).
if c.maxBytes > 0 && len(got) > c.maxBytes {
t.Errorf("TruncateBytes(%q, %d) overflowed cap: len(out)=%d > %d",
c.in, c.maxBytes, len(got), c.maxBytes)
}
})
}
}
// TestTruncateBytesNoMarker pins the marker-less variant. Same
// boundary handling as TruncateBytes but no ellipsis cost — the cut
// happens at maxBytes itself, walking back only if that lands
// mid-codepoint.
func TestTruncateBytesNoMarker(t *testing.T) {
cases := []struct {
name string
in string
maxBytes int
want string
}{
{"empty", "", 10, ""},
{"under-cap ASCII", "hi", 10, "hi"},
{"exactly-at-cap ASCII", "hello", 5, "hello"},
{"over-cap ASCII", "abcdefghij", 5, "abcde"},
// Over-cap CJK rune-aligned: "你好世界", maxBytes=6, byte 6 is start of 世.
// s[:6]="你好" — perfect cut.
{"over-cap CJK rune-aligned", "你好世界", 6, "你好"},
// Over-cap CJK mid-codepoint: maxBytes=4, byte 4 is mid-好.
// Walk back to byte 3 (start of 好), s[:3]="你".
{"over-cap CJK mid-codepoint", "你好世界", 4, "你"},
// Emoji: maxBytes=5, "😀😀" is bytes 0..3 then 4..7. byte 5 is mid-second-😀.
// Walk back to byte 4 (start of second 😀), s[:4]="😀".
{"over-cap emoji", "😀😀", 5, "😀"},
// Edge: cap zero or negative → "".
{"cap zero", "hello", 0, ""},
{"cap negative", "hello", -1, ""},
// Cap = 1 and first rune is multi-byte: walk-back to 0, return "".
{"cap one with leading CJK", "你hello", 1, ""},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
got := TruncateBytesNoMarker(c.in, c.maxBytes)
if got != c.want {
t.Errorf("TruncateBytesNoMarker(%q, %d) = %q, want %q", c.in, c.maxBytes, got, c.want)
}
if !utf8.ValidString(got) {
t.Errorf("TruncateBytesNoMarker(%q, %d) returned invalid UTF-8: %q", c.in, c.maxBytes, got)
}
if c.maxBytes > 0 && len(got) > c.maxBytes {
t.Errorf("TruncateBytesNoMarker(%q, %d) overflowed cap: len(out)=%d > %d",
c.in, c.maxBytes, len(got), c.maxBytes)
}
})
}
}
// TestTruncateRunes pins the rune-cap variant. The key contract is
// that maxRunes counts user-visible characters (Go runes, which line
// up with Unicode codepoints), not bytes — so "你好世界" with
// maxRunes=2 returns "你好…", regardless of the resulting byte count.
func TestTruncateRunes(t *testing.T) {
cases := []struct {
name string
in string
maxRunes int
want string
}{
{"empty", "", 5, ""},
{"under-cap ASCII", "hi", 5, "hi"},
{"exactly-at-cap ASCII", "hello", 5, "hello"},
{"over-cap ASCII", "abcdefghij", 5, "abcde…"},
{"under-cap CJK", "你好", 5, "你好"},
{"exactly-at-cap CJK", "你好", 2, "你好"},
// Over-cap CJK: maxRunes=3, expect first 3 runes + marker.
{"over-cap CJK", "你好世界你好", 3, "你好世…"},
// Emoji is one rune per glyph in Go (no ZWJ here).
{"over-cap emoji", "😀😀😀😀😀", 2, "😀😀…"},
// Mixed: maxRunes=3 of "ab你好世界" → "ab你…".
{"mixed prefix", "ab你好世界", 3, "ab你…"},
// Edge: maxRunes 0 / negative → "".
{"cap zero", "hello", 0, ""},
{"cap negative", "hello", -1, ""},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
got := TruncateRunes(c.in, c.maxRunes)
if got != c.want {
t.Errorf("TruncateRunes(%q, %d) = %q, want %q", c.in, c.maxRunes, got, c.want)
}
if !utf8.ValidString(got) {
t.Errorf("TruncateRunes(%q, %d) returned invalid UTF-8: %q", c.in, c.maxRunes, got)
}
})
}
}
// TestTruncate_FuzzInvariants stays as a property-style sanity check:
// for any rune-valid input and any cap, the output is rune-valid and
// (for byte-cap variants) within the cap. This catches off-by-one
// regressions in cuts that slip past the table-test cases above.
func TestTruncate_FuzzInvariants(t *testing.T) {
inputs := []string{
"",
"a",
"hello world",
"你好世界",
"😀😀😀",
"ab你c好d世e界",
"日本語の文字列",
"🇺🇸🇯🇵", // flags: each is 2 codepoints (regional indicators)
}
for _, in := range inputs {
for cap := -1; cap <= len(in)+5; cap++ {
t.Run("", func(t *testing.T) {
gotB := TruncateBytes(in, cap)
if !utf8.ValidString(gotB) {
t.Errorf("TruncateBytes(%q, %d) invalid UTF-8: %q", in, cap, gotB)
}
if cap > 0 && len(gotB) > cap {
t.Errorf("TruncateBytes(%q, %d) overflowed: %q (%d bytes)", in, cap, gotB, len(gotB))
}
gotN := TruncateBytesNoMarker(in, cap)
if !utf8.ValidString(gotN) {
t.Errorf("TruncateBytesNoMarker(%q, %d) invalid UTF-8: %q", in, cap, gotN)
}
if cap > 0 && len(gotN) > cap {
t.Errorf("TruncateBytesNoMarker(%q, %d) overflowed: %q (%d bytes)", in, cap, gotN, len(gotN))
}
gotR := TruncateRunes(in, cap)
if !utf8.ValidString(gotR) {
t.Errorf("TruncateRunes(%q, %d) invalid UTF-8: %q", in, cap, gotR)
}
})
}
}
}