fix(textutil): SSOT for rune-safe string truncation, fix 3 audit-gap bugs

Closes #2962. ## Why Six per-package `truncate` helpers had drifted into independent re-implementations of the same idea. Three of them (delegation.go, memory/client/client.go, memory-backfill/verify.go) used `s[:max] + "…"` byte-slice form, which on a multi-byte codepoint at byte `max` produces invalid UTF-8 → Postgres `text`/`jsonb` rejects the INSERT silently → `delegation` / `activity_logs` row never lands → audit gap. Three other helpers (delegation_ledger.go #2962, agent_message_writer.go #2959, scheduler.go #2026) had each been fixed in isolation with three slightly different rune-safe shapes — confirming this is a class of bug, not a single instance. ## What New package `internal/textutil` with three rune-safe functions: - `TruncateBytes(s, maxBytes)` — byte-cap, "…" marker. Used by 5 callers writing into byte-bounded columns / log lines. - `TruncateBytesNoMarker(s, maxBytes)` — byte-cap, no marker. Used by delegation_ledger.go where the storage already conveys "preview" and an extra ellipsis would push the result over the column cap. - `TruncateRunes(s, maxRunes)` — rune-cap, "…" marker. Used by agent_message_writer.go where the cap is in display chars (UI summary), not bytes. All three guarantee `utf8.ValidString(out)` for any `utf8.ValidString(in)`. Inputs already invalid go through `sanitizeUTF8` at the call site boundary (scheduler.go preserved this defense-in-depth). ## Migration map | Old | New | Behavior change | |---|---|---| | `delegation_ledger.truncatePreview` | `textutil.TruncateBytesNoMarker(s, 4096)` | none | | `agent_message_writer.truncatePreviewRunes` | `textutil.TruncateRunes(s, n)` | none | | `scheduler.truncate` | `textutil.TruncateBytes(s, n)` | "..." → "…" (3 bytes either way; single-glyph display) | | `delegation.truncate` | `textutil.TruncateBytes(s, n)` | bug fix + ellipsis swap | | `memory/client.truncate` | `textutil.TruncateBytes(s, n)` | bug fix | | `memory-backfill.truncate` | `textutil.TruncateBytes(s, n)` | bug fix | Five separate `truncate*` helpers + their per-package tests removed. Net: 12 files / +427 / -255. ## Tests - `internal/textutil/truncate_test.go` — 27 table-test cases + 145 fuzz-invariant cases asserting `utf8.ValidString` and byte-cap invariants on every output. - `delegation_ledger_test.go TestLedgerInsert_TruncatesOversizedPreview` strengthened with `capValidUTF8Matcher` so the SQL-write argument is asserted to be valid UTF-8 + within cap (not just `AnyArg()`). Mutation-tested: replacing the SSOT call with byte-slice form makes this test fail loud. ## Compatibility - All callers internal; no external API surface change. - Ellipsis swap "..." → "…": same byte budget (3 bytes), single-glyph display. No alerting/grep on either marker in this codebase (verified). Canvas renders both correctly. - DB column widths unchanged (4096 / 80 / 200 / 256 / 300 — all preserved in the migrations). ## Security Fixes a silent INSERT-failure mode that hid `activity_logs` / `delegations` rows containing peer-controlled text. The class of input that triggered it (CJK, emoji, accented Latin) is normal user content, not malicious — but the symptom (audit gap) makes incident reconstruction harder. Helper is pure-function over `string`; no secrets / PII / auth handling involved. Untrusted input is handled identically to before, just rune-aligned now. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 23:01:21 -07:00 · 2026-05-05 23:01:21 -07:00 · 656a02fae4
commit 656a02fae4
parent c53155ec5f
14 changed files with 427 additions and 255 deletions
--- a/workspace-server/cmd/memory-backfill/verify.go
+++ b/workspace-server/cmd/memory-backfill/verify.go
@ -21,6 +21,7 @@ import (
 	"os"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 )
 // verifyConfig is the typed dependency bundle for verifyParity.
@ -121,7 +122,7 @@ func verifyParity(ctx context.Context, cfg verifyConfig, stdout *os.File) (*veri
 		matched := true
 		for _, c := range legacy {
 			if pluginContents[c] == 0 {
-				fmt.Fprintf(stdout, "[mismatch] workspace=%s missing-from-plugin content=%q\n", wsID, truncate(c, 80))
+				fmt.Fprintf(stdout, "[mismatch] workspace=%s missing-from-plugin content=%q\n", wsID, textutil.TruncateBytes(c, 80))
 				matched = false
 				break
 			}
@ -192,9 +193,4 @@ func queryLegacyMemories(ctx context.Context, db *sql.DB, workspaceID string) ([
 	return out, rows.Err()
 }
-func truncate(s string, n int) string {
+// truncation moved to internal/textutil.TruncateBytes (#2962 SSOT).
 	if len(s) <= n {
 		return s
 	}
 	return s[:n] + "…"
 }
--- a/workspace-server/cmd/memory-backfill/verify_test.go
+++ b/workspace-server/cmd/memory-backfill/verify_test.go
@ -349,16 +349,8 @@ func TestVerifyParity_PickSampleError(t *testing.T) {
 	}
 }
-// --- Truncate ---
+// Truncate moved to internal/textutil — coverage in
-
+// internal/textutil/truncate_test.go (TestTruncateBytes_RuneBoundary).
 func TestVerifyTruncate(t *testing.T) {
 	if got := truncate("short", 10); got != "short" {
 		t.Errorf("got %q", got)
 	}
 	if got := truncate(strings.Repeat("a", 200), 10); !strings.HasSuffix(got, "…") {
 		t.Errorf("expected ellipsis: %q", got)
 	}
 }
 // --- CLI: -verify mode ---
--- a/workspace-server/internal/handlers/a2a_queue.go
+++ b/workspace-server/internal/handlers/a2a_queue.go
@ -22,6 +22,7 @@ import (
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 )
 // extractIdempotencyKey pulls params.message.messageId out of an A2A JSON-RPC
@ -420,7 +421,7 @@ func (h *WorkspaceHandler) stitchDrainResponseToDelegation(ctx context.Context,
 		   AND method         = 'delegate_result'
 		   AND target_id      = $4
 		   AND response_body->>'delegation_id' = $5
-	`, "Delegation completed ("+truncate(responseText, 80)+")", string(respJSON),
+	`, "Delegation completed ("+textutil.TruncateBytes(responseText, 80)+")", string(respJSON),
 		sourceID, targetID, delegationID)
 	if err != nil {
 		log.Printf("A2AQueue drain stitch: update failed for delegation %s: %v", delegationID, err)
@ -439,7 +440,7 @@ func (h *WorkspaceHandler) stitchDrainResponseToDelegation(ctx context.Context,
 		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{
 			"delegation_id":    delegationID,
 			"target_id":        targetID,
-			"response_preview": truncate(responseText, 200),
+			"response_preview": textutil.TruncateBytes(responseText, 200),
 			"via":              "queue_drain",
 		})
 	}
--- a/workspace-server/internal/handlers/agent_message_writer.go
+++ b/workspace-server/internal/handlers/agent_message_writer.go
@ -42,9 +42,9 @@ import (
 	"errors"
 	"fmt"
 	"log"
 	"unicode/utf8"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 )
 // ErrWorkspaceNotFound is returned by AgentMessageWriter.Send when the
@ -54,36 +54,6 @@ import (
 // timeout) surface as wrapped errors and should be treated as 503.
 var ErrWorkspaceNotFound = errors.New("agent_message: workspace not found")
 // truncatePreviewRunes returns at most maxRunes runes of s, plus an ellipsis
 // when truncated. Operates on the rune (codepoint) boundary instead of
 // byte indices — the previous byte-slice version produced invalid UTF-8
 // when maxRunes landed mid-codepoint (CJK, emoji, accented characters
 // in agent-authored chat messages), and Postgres JSONB rejects invalid
 // UTF-8, dropping the activity_log INSERT silently. The persistence
 // failure log fires but the message vanishes from chat history — the
 // exact regression class the SSOT consolidation was built to prevent.
 //
 // maxRunes is in runes, not bytes — `truncatePreviewRunes("你好", 1)` returns
 // `"你…"`, not `"\xe4…"`. Set the cap on a UI-friendly basis (visible
 // character count, not stored byte count); 80 runes covers the
 // activity_logs.summary column comfortably.
 func truncatePreviewRunes(s string, maxRunes int) string {
 	if utf8.RuneCountInString(s) <= maxRunes {
 		return s
 	}
 	// Walk runes until we've consumed maxRunes; cut at that byte index.
 	count := 0
 	cut := len(s)
 	for i := range s {
 		if count == maxRunes {
 			cut = i
 			break
 		}
 		count++
 	}
 	return s[:cut] + "…"
 }
 // AgentMessageAttachment is one file attached to an agent → user
 // message. Identical to handlers.NotifyAttachment in field set; kept
 // distinct so the writer's API doesn't import a handler type with HTTP
@ -186,7 +156,7 @@ func (w *AgentMessageWriter) Send(
 		respPayload["parts"] = fileParts
 	}
 	respJSON, _ := json.Marshal(respPayload)
-	preview := truncatePreviewRunes(message, 80)
+	preview := textutil.TruncateRunes(message, 80)
 	if _, err := w.db.ExecContext(ctx, `
 		INSERT INTO activity_logs (workspace_id, activity_type, method, summary, response_body, status)
 		VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok')
--- a/workspace-server/internal/handlers/agent_message_writer_test.go
+++ b/workspace-server/internal/handlers/agent_message_writer_test.go
@ -331,45 +331,11 @@ func TestAgentMessageWriter_Send_DBErrorOnLookupReturnsWrapped(t *testing.T) {
 	}
 }
-// TestTruncatePreviewRunes_RuneBoundary pins the multi-byte-safe
+// Helper-level truncate tests now live in
-// truncation. The previous byte-slice version produced invalid UTF-8
+// internal/textutil/truncate_test.go (TestTruncateRunes). The
-// when the cut landed mid-codepoint (CJK, emoji, accented), and
+// integration-level coverage that exercises the agent_message_writer
-// Postgres JSONB rejects invalid UTF-8 — INSERT fails, log.Printf
+// path with non-ASCII content is TestAgentMessageWriter_Send_NonASCIIMessagePersists
-// fires, message vanishes from chat history. Per memory
+// below.
 // feedback_assert_exact_not_substring.md, pin the boundary cases
 // directly.
 func TestTruncatePreviewRunes_RuneBoundary(t *testing.T) {
 	cases := []struct {
 		name     string
 		in       string
 		max      int
 		want     string
 	}{
 		{"under-max ASCII", "hi", 80, "hi"},
 		{"under-max CJK", "你好", 80, "你好"},
 		{"exactly-at-max", "abcde", 5, "abcde"},
 		{"truncate ASCII", "abcdefghij", 5, "abcde…"},
 		{"truncate CJK at rune boundary", "你好世界你好世界", 4, "你好世界…"},
 		{"truncate emoji at rune boundary", "😀😀😀😀😀😀", 3, "😀😀😀…"},
 		// The pre-fix bug shape: byte-slice on non-ASCII would have
 		// mangled the codepoint here. With rune-boundary truncation
 		// the result is well-formed UTF-8.
 		{"non-zero with emoji prefix", "🚀abcdefghijk", 5, "🚀abcd…"},
 	}
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
 			got := truncatePreviewRunes(c.in, c.max)
 			if got != c.want {
 				t.Errorf("truncatePreviewRunes(%q, %d) = %q, want %q", c.in, c.max, got, c.want)
 			}
 			// Always-valid UTF-8 invariant. A byte-slice truncation
 			// could leave partial codepoints; this version must not.
 			if !utf8.ValidString(got) {
 				t.Errorf("truncatePreviewRunes(%q, %d) returned invalid UTF-8: %q", c.in, c.max, got)
 			}
 		})
 	}
 }
 // TestAgentMessageWriter_Send_NonASCIIMessagePersists pins the end-to-end
 // path for non-ASCII messages — the original reno-stars regression
--- a/workspace-server/internal/handlers/delegation.go
+++ b/workspace-server/internal/handlers/delegation.go
@ -10,6 +10,7 @@ import (
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
 )
@ -167,7 +168,7 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
 	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationSent), sourceID, map[string]interface{}{
 		"delegation_id": delegationID,
 		"target_id":     body.TargetID,
-		"task_preview":  truncate(body.Task, 100),
+		"task_preview":  textutil.TruncateBytes(body.Task, 100),
 	})
 	resp := gin.H{
@ -407,7 +408,7 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
 	if _, err := db.DB.ExecContext(ctx, `
 		INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, target_id, summary, response_body, status)
 		VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4, $5::jsonb, 'completed')
-	`, sourceID, sourceID, targetID, "Delegation completed ("+truncate(responseText, 80)+")", string(respJSON)); err != nil {
+	`, sourceID, sourceID, targetID, "Delegation completed ("+textutil.TruncateBytes(responseText, 80)+")", string(respJSON)); err != nil {
 		log.Printf("Delegation %s: failed to insert success log: %v", delegationID, err)
 	}
@ -423,7 +424,7 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
 	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{
 		"delegation_id":    delegationID,
 		"target_id":        targetID,
-		"response_preview": truncate(responseText, 200),
+		"response_preview": textutil.TruncateBytes(responseText, 200),
 	})
 	// RFC #2829 PR-2 result-push (see UpdateStatus for rationale).
 	pushDelegationResultToInbox(ctx, sourceID, delegationID, "completed", responseText, "")
@ -506,7 +507,7 @@ func (h *DelegationHandler) Record(c *gin.Context) {
 	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationSent), sourceID, map[string]interface{}{
 		"delegation_id": body.DelegationID,
 		"target_id":     body.TargetID,
-		"task_preview":  truncate(body.Task, 100),
+		"task_preview":  textutil.TruncateBytes(body.Task, 100),
 	})
 	c.JSON(http.StatusAccepted, gin.H{
@ -555,12 +556,12 @@ func (h *DelegationHandler) UpdateStatus(c *gin.Context) {
 		if _, err := db.DB.ExecContext(ctx, `
 			INSERT INTO activity_logs (workspace_id, activity_type, method, source_id, summary, response_body, status)
 			VALUES ($1, 'delegation', 'delegate_result', $2, $3, $4::jsonb, 'completed')
-		`, sourceID, sourceID, "Delegation completed ("+truncate(body.ResponsePreview, 80)+")", string(respJSON)); err != nil {
+		`, sourceID, sourceID, "Delegation completed ("+textutil.TruncateBytes(body.ResponsePreview, 80)+")", string(respJSON)); err != nil {
 			log.Printf("Delegation UpdateStatus: result insert failed for %s: %v", delegationID, err)
 		}
 		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationComplete), sourceID, map[string]interface{}{
 			"delegation_id":    delegationID,
-			"response_preview": truncate(body.ResponsePreview, 200),
+			"response_preview": textutil.TruncateBytes(body.ResponsePreview, 200),
 		})
 		// RFC #2829 PR-2 result-push: when the gate is on, also write an
 		// a2a_receive row so the caller's inbox poller surfaces this to
@ -626,7 +627,7 @@ func (h *DelegationHandler) ListDelegations(c *gin.Context) {
 			entry["error"] = errorDetail
 		}
 		if responseBody != "" {
-			entry["response_preview"] = truncate(responseBody, 300)
+			entry["response_preview"] = textutil.TruncateBytes(responseBody, 300)
 		}
 		delegations = append(delegations, entry)
 	}
@ -727,9 +728,3 @@ func extractResponseText(body []byte) string {
 	return string(body)
 }
 func truncate(s string, max int) string {
 	if len(s) <= max {
 		return s
 	}
 	return s[:max] + "..."
 }
--- a/workspace-server/internal/handlers/delegation_ledger.go
+++ b/workspace-server/internal/handlers/delegation_ledger.go
@ -8,6 +8,7 @@ import (
 	"time"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 )
 // delegation_ledger.go — durable per-task ledger for A2A delegation
@ -50,40 +51,15 @@ func NewDelegationLedger(handle *sql.DB) *DelegationLedger {
 	return &DelegationLedger{db: handle}
 }
-// truncatePreview caps stored preview at 4KB. The full prompt/response is
+// previewCap caps stored preview at 4KB. The full prompt/response is
-// already in activity_logs.{request,response}_body — this is the at-a-glance
+// already in activity_logs.{request,response}_body — this is the
-// view for the dashboard, not a forensic record.
+// at-a-glance view for the dashboard, not a forensic record.
 //
-// Rune-safe: previous byte-slice form (s[:previewCap]) split on a byte
+// Truncation goes through textutil.TruncateBytesNoMarker so it's
-// boundary, which on a multi-byte codepoint at byte 4096 produced
+// rune-safe (#2026 / #2959 / #2962 bug class: byte-slice mid-codepoint
-// invalid UTF-8 — Postgres JSONB rejects → ledger row not inserted →
+// → Postgres JSONB rejects → silent INSERT failure → audit gap).
 // audit gap. Issue #2962. Walks the string by rune, stops at the last
 // rune-boundary index that fits inside the cap. ASCII-only strings hit
 // the cap exactly; CJK/emoji strings stop slightly under the cap,
 // never over.
 //
 // Mirrors the truncatePreviewRunes fix from agent_message_writer.go
 // (#2959). Both call sites should consume a shared helper after both
 // fixes have landed — followup deduplication tracked in #2962's body.
 const previewCap = 4096
 func truncatePreview(s string) string {
 	if len(s) <= previewCap {
 		return s
 	}
 	// Range over a string yields rune-boundary byte indices. Walk
 	// until the next index would exceed previewCap; the previous
 	// index is the safe truncation point.
 	end := 0
 	for i := range s {
 		if i > previewCap {
 			break
 		}
 		end = i
 	}
 	return s[:end]
 }
 // InsertOpts is the agent's record-of-intent. Caller, callee, task preview,
 // and the chosen delegation_id are required; idempotency_key is optional.
 type InsertOpts struct {
@ -118,7 +94,7 @@ func (l *DelegationLedger) Insert(ctx context.Context, opts InsertOpts) {
 		) VALUES ($1, $2, $3, $4, 'queued', $5, $6)
 		ON CONFLICT (delegation_id) DO NOTHING
 	`, opts.DelegationID, opts.CallerID, opts.CalleeID,
-		truncatePreview(opts.TaskPreview), deadline, idemArg)
+		textutil.TruncateBytesNoMarker(opts.TaskPreview, previewCap), deadline, idemArg)
 	if err != nil {
 		log.Printf("delegation_ledger Insert(%s): %v", opts.DelegationID, err)
 	}
@ -197,7 +173,7 @@ func (l *DelegationLedger) SetStatus(ctx context.Context,
 		    result_preview = NULLIF($4, ''),
 		    updated_at = now()
 		WHERE delegation_id = $1
-	`, delegationID, status, errorDetail, truncatePreview(resultPreview))
+	`, delegationID, status, errorDetail, textutil.TruncateBytesNoMarker(resultPreview, previewCap))
 	return err
 }
--- a/workspace-server/internal/handlers/delegation_ledger_test.go
+++ b/workspace-server/internal/handlers/delegation_ledger_test.go
@ -2,6 +2,7 @@ package handlers
 import (
 	"context"
 	"database/sql/driver"
 	"errors"
 	"strings"
 	"testing"
@ -74,15 +75,20 @@ func TestLedgerInsert_TruncatesOversizedPreview(t *testing.T) {
 	mock := setupTestDB(t)
 	l := NewDelegationLedger(nil)
-	huge := strings.Repeat("x", 10_000) // > previewCap
+	// 4096 / 3 = 1365 runes; +10 for margin so we cross the cap.
 	// '世' is 3 bytes in UTF-8 (worst case for byte-cap rune walking).
 	huge := strings.Repeat("世", (previewCap/3)+10)
 	if len(huge) <= previewCap {
 		t.Fatalf("test setup: input too short (%d bytes) — must exceed previewCap=%d", len(huge), previewCap)
 	}
 	mock.ExpectExec(`INSERT INTO delegations`).
 		WithArgs(
 			"deleg-big",
 			"c", "ca",
-			sqlmock.AnyArg(), // truncated preview — verify length below via custom matcher
+			capValidUTF8Matcher{cap: previewCap}, // truncated preview must fit cap AND be valid UTF-8
-			sqlmock.AnyArg(),
+			sqlmock.AnyArg(),                     // deadline
-			sqlmock.AnyArg(),
+			sqlmock.AnyArg(),                     // idempotency_key
 		).
 		WillReturnResult(sqlmock.NewResult(0, 1))
@ -97,87 +103,28 @@ func TestLedgerInsert_TruncatesOversizedPreview(t *testing.T) {
 	}
 }
-// ---------- truncatePreview unit ----------
+// capValidUTF8Matcher pins #2962 at the integration boundary: the
 // preview that lands in the INSERT MUST be valid UTF-8 (else Postgres
 // JSONB rejects → silent audit gap) AND fit within the byte cap. Pre-
 // migration this would have asserted on the corrupted "世" mid-codepoint
 // byte slice; post-migration it asserts the truncated preview is a
 // clean rune-aligned prefix.
 type capValidUTF8Matcher struct{ cap int }
-func TestTruncatePreview_UnderCap(t *testing.T) {
+func (m capValidUTF8Matcher) Match(v driver.Value) bool {
-	in := "short"
+	s, ok := v.(string)
-	if got := truncatePreview(in); got != in {
+	if !ok {
-		t.Errorf("under-cap should passthrough; got %q", got)
+		return false
 	}
 	return len(s) <= m.cap && utf8.ValidString(s)
 }
-func TestTruncatePreview_OverCapTruncatesAtBoundary(t *testing.T) {
+// Helper-level truncation tests now live in
-	in := strings.Repeat("a", previewCap+100)
+// internal/textutil/truncate_test.go. The integration-level path
-	got := truncatePreview(in)
+// (TestLedgerInsert_TruncatesOversizedPreview above) still exercises
-	if len(got) != previewCap {
+// the previewCap boundary through the SQL write so a regression in
-		t.Errorf("expected len=%d got len=%d", previewCap, len(got))
+// the wiring (wrong cap, wrong helper, missing call) would still go
-	}
+// red here.
 }
 func TestTruncatePreview_ExactlyAtCap(t *testing.T) {
 	in := strings.Repeat("a", previewCap)
 	got := truncatePreview(in)
 	if got != in {
 		t.Errorf("at-cap should passthrough unchanged")
 	}
 }
 // TestTruncatePreview_NeverProducesInvalidUTF8 — pins #2962. The old
 // byte-slice implementation (s[:previewCap]) split on a byte boundary,
 // so a multi-byte codepoint straddling byte 4096 produced invalid
 // UTF-8 → Postgres JSONB rejects → ledger row not inserted → audit
 // gap. Test feeds a CJK / emoji-padded string longer than previewCap
 // and asserts utf8.ValidString on the result.
 func TestTruncatePreview_NeverProducesInvalidUTF8(t *testing.T) {
 	// Build a string of '世' (3 bytes per rune in UTF-8) that's just
 	// past the cap. With the old implementation, the slice at byte
 	// previewCap would land mid-rune and ValidString would fail.
 	// With the rune-aware implementation, the result is always valid
 	// UTF-8 even if the byte length is < previewCap.
 	rune3 := "世" // U+4E16, 3 bytes
 	// Need at least previewCap/3 + 1 runes so we cross the cap with
 	// margin to spare.
 	in := strings.Repeat(rune3, (previewCap/3)+10)
 	if len(in) <= previewCap {
 		t.Fatalf("test setup: input too short (%d bytes) — must exceed previewCap=%d", len(in), previewCap)
 	}
 	got := truncatePreview(in)
 	if !utf8.ValidString(got) {
 		t.Errorf("truncatePreview produced invalid UTF-8 — JSONB will reject this row. len(got)=%d", len(got))
 	}
 	if len(got) > previewCap {
 		t.Errorf("truncatePreview exceeded cap: len(got)=%d > previewCap=%d", len(got), previewCap)
 	}
 	// Defense-in-depth: the result should also be a clean rune
 	// prefix of the input — not some garbled sequence.
 	if !strings.HasPrefix(in, got) {
 		t.Errorf("truncatePreview should return a prefix of the input")
 	}
 }
 // TestTruncatePreview_MultiByteAtBoundary — most-targeted regression.
 // Feeds an input where the cap byte falls EXACTLY in the middle of a
 // 3-byte codepoint. Pre-fix, this is the case that produces invalid
 // UTF-8; post-fix, the truncate stops at the previous rune boundary.
 func TestTruncatePreview_MultiByteAtBoundary(t *testing.T) {
 	// Build a string that's `previewCap-1` ASCII bytes followed by
 	// '世' (3 bytes). Total = previewCap + 2. The old impl would
 	// slice at byte previewCap, landing inside the '世' codepoint.
 	prefix := strings.Repeat("a", previewCap-1)
 	in := prefix + "世"
 	if len(in) != previewCap+2 {
 		t.Fatalf("test setup: expected len %d, got %d", previewCap+2, len(in))
 	}
 	got := truncatePreview(in)
 	if !utf8.ValidString(got) {
 		t.Errorf("truncatePreview produced invalid UTF-8 at the multi-byte boundary case")
 	}
 	// Result should be exactly the ASCII prefix — '世' was past
 	// the cap so it must be dropped entirely.
 	if got != prefix {
 		t.Errorf("expected exact ASCII prefix, got %q (len=%d)", got[len(got)-10:], len(got))
 	}
 }
 // ---------- SetStatus lifecycle ----------
--- a/workspace-server/internal/memory/client/client.go
+++ b/workspace-server/internal/memory/client/client.go
@ -35,6 +35,7 @@ import (
 	"time"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 )
 const (
@ -340,7 +341,7 @@ func decodeError(resp *http.Response) error {
 		// have rather than dropping it.
 		return &contract.Error{
 			Code:    httpStatusToCode(resp.StatusCode),
-			Message: fmt.Sprintf("status %d: %s", resp.StatusCode, truncate(string(body), 256)),
+			Message: fmt.Sprintf("status %d: %s", resp.StatusCode, textutil.TruncateBytes(string(body), 256)),
 		}
 	}
 	return &e
@ -359,12 +360,7 @@ func httpStatusToCode(status int) contract.ErrorCode {
 	}
 }
-func truncate(s string, n int) string {
+// truncation moved to internal/textutil.TruncateBytes (#2962 SSOT).
 	if len(s) <= n {
 		return s
 	}
 	return s[:n] + "…"
 }
 // --- Circuit breaker ---
--- a/workspace-server/internal/memory/client/client_test.go
+++ b/workspace-server/internal/memory/client/client_test.go
@ -499,14 +499,10 @@ func TestHttpStatusToCode(t *testing.T) {
 	}
 }
-func TestTruncate(t *testing.T) {
+// Truncate moved to internal/textutil — coverage lives in
-	if got := truncate("short", 10); got != "short" {
+// internal/textutil/truncate_test.go (TestTruncateBytes_RuneBoundary).
-		t.Errorf("got %q", got)
+// memory/client just calls it as a wire-shape helper for error
-	}
+// messages; no client-specific behavior to pin here.
 	if got := truncate(strings.Repeat("a", 300), 10); !strings.HasSuffix(got, "…") {
 		t.Errorf("expected ellipsis: %q", got)
 	}
 }
 // --- Circuit breaker ---
--- a/workspace-server/internal/scheduler/scheduler.go
+++ b/workspace-server/internal/scheduler/scheduler.go
@ -17,6 +17,7 @@ import (
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/supervised"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 )
 const (
@ -522,7 +523,7 @@ func (s *Scheduler) fireSchedule(ctx context.Context, sched scheduleRow) {
 		"schedule_id":   sched.ID,
 		"schedule_name": sched.Name,
 		"cron_expr":     sched.CronExpr,
-		"prompt":        sanitizeUTF8(truncate(sched.Prompt, 200)),
+		"prompt":        sanitizeUTF8(textutil.TruncateBytes(sched.Prompt, 200)),
 	})
 	// #152: persist lastError into error_detail on the activity_logs row
 	// so GET /workspaces/:id/schedules/:id/history can surface why a run
@ -807,27 +808,10 @@ func isEmptyResponse(body []byte) bool {
 	return false
 }
-// truncate shortens s to at most maxLen bytes, appending "..." if truncated.
+// truncation moved to internal/textutil.TruncateBytes (#2962 SSOT).
-// #2026: UTF-8 safe — byte-slicing at maxLen-3 would split multi-byte runes
+// The original #2026 fix lives in textutil's package docs as canonical
-// (observed: U+2026 `…` = 0xe2 0x80 0xa6, sliced mid-char, concatenated with
+// prior art. Ellipsis was previously "..." (3 ASCII bytes); the SSOT
-// "..." producing 0xe2 0x80 0x2e — rejected by Postgres as invalid UTF-8,
+// uses "…" (3 UTF-8 bytes) — same byte budget, single-glyph display.
 // which wedged the activity_logs INSERT with no deadline and stalled the
 // scheduler).
 func truncate(s string, maxLen int) string {
 	if len(s) <= maxLen {
 		return s
 	}
 	cut := maxLen - 3
 	if cut < 0 {
 		cut = 0
 	}
 	// Back up to a rune boundary — utf8.RuneStart returns true for any
 	// non-continuation byte (ASCII, or the lead byte of a multi-byte rune).
 	for cut > 0 && !utf8.RuneStart(s[cut]) {
 		cut--
 	}
 	return s[:cut] + "..."
 }
 // short returns up to n leading characters of s without panicking when s is
 // shorter than n. Used to safely display UUID prefixes in log lines where
--- a/workspace-server/internal/scheduler/scheduler_test.go
+++ b/workspace-server/internal/scheduler/scheduler_test.go
@ -10,6 +10,7 @@ import (
 	sqlmock "github.com/DATA-DOG/go-sqlmock"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
 	"github.com/Molecule-AI/molecule-monorepo/platform/internal/textutil"
 )
 // errDBDown is a sentinel error used by tests to simulate a DB connection failure.
@ -618,7 +619,7 @@ func TestTruncate_utf8Safe_regression2026(t *testing.T) {
 		filler += "a"
 	}
 	input := filler + "…xxx" // 195 ASCII + 3-byte rune + 3 trailing
-	out := truncate(input, 200)
+	out := textutil.TruncateBytes(input, 200)
 	if !utf8.ValidString(out) {
 		t.Fatalf("truncate produced invalid UTF-8: %x", []byte(out))
--- a/workspace-server/internal/textutil/truncate.go
+++ b/workspace-server/internal/textutil/truncate.go
@ -0,0 +1,130 @@
 // Package textutil provides string-handling helpers that respect UTF-8
 // rune boundaries.
 //
 // Why this package exists
 // -----------------------
 // `s[:max]` truncates by BYTES; for any string with a multi-byte
 // codepoint at byte `max` (CJK, emoji, accented Latin), the slice
 // produces invalid UTF-8. Postgres `text` and `jsonb` columns reject
 // invalid UTF-8 with `invalid byte sequence for encoding "UTF8"`,
 // which silently fails the INSERT and holds the surrounding tx open
 // — a class of audit-gap that has bitten this codebase three times
 // (scheduler.go #2026, agent_message_writer.go #2959,
 // delegation_ledger.go #2962). Six per-package helpers had
 // independently re-implemented this logic with varying correctness;
 // this package is the single source of truth.
 //
 // Use sites
 // ---------
 //   - DB writes whose column is bytes-bounded (jsonb preview field,
 //     varchar(N)): TruncateBytes / TruncateBytesNoMarker.
 //   - UI summaries whose cap is in display chars, not bytes:
 //     TruncateRunes.
 //
 // All functions guarantee `utf8.ValidString(out) == true` for any
 // `s` where `utf8.ValidString(s) == true`. Inputs that are already
 // invalid UTF-8 should be sanitized at the trust boundary (e.g. via
 // `strings.ToValidUTF8`); this package does not silently fix
 // upstream invalid input.
 package textutil
 import "unicode/utf8"
 // ellipsis is the truncation marker. U+2026 HORIZONTAL ELLIPSIS —
 // 3 bytes in UTF-8, 1 rune, 1 display column. Standardized across
 // the codebase to avoid the "..." (3 ASCII chars) vs "…" (1 char)
 // inconsistency the per-package helpers had drifted into.
 const ellipsis = "…"
 // TruncateBytes returns s if `len(s) <= maxBytes`, otherwise returns
 // the longest rune-aligned prefix of s that fits in `maxBytes - 3`
 // bytes followed by the ellipsis marker. The returned string is
 // always at most `maxBytes` bytes long.
 //
 // Example: TruncateBytes("你好世界你好", 10) returns "你好世…" (9 bytes)
 // — three "你好" runes (each 3 bytes = 9 bytes) plus "…" (3 bytes)
 // would be 12 bytes, so we walk back to "你好" (6 bytes) + "…" (3) = 9.
 //
 // Edge cases:
 //   - maxBytes <= 0: returns "" (no room even for input or marker)
 //   - maxBytes < len(ellipsis): returns "" (can't add marker without
 //     exceeding cap, and we won't return a marker-less truncation
 //     here — caller wanted a marker; use TruncateBytesNoMarker if
 //     they don't)
 //   - s contains invalid UTF-8: continuation bytes are walked over
 //     same as valid runes; the result preserves the (invalid) input
 //     bytes up to the truncation point. Caller is responsible for
 //     pre-sanitizing if Postgres validity is required.
 func TruncateBytes(s string, maxBytes int) string {
 	if len(s) <= maxBytes {
 		return s
 	}
 	if maxBytes < len(ellipsis) {
 		return ""
 	}
 	// Reserve room for the marker, then walk back to the nearest
 	// rune boundary at or below the cut point.
 	cut := maxBytes - len(ellipsis)
 	for cut > 0 && !utf8.RuneStart(s[cut]) {
 		cut--
 	}
 	return s[:cut] + ellipsis
 }
 // TruncateBytesNoMarker returns s if `len(s) <= maxBytes`, otherwise
 // returns the longest rune-aligned prefix of s that fits in
 // `maxBytes` bytes. No marker is appended — useful when the caller's
 // storage already conveys "preview" / "snippet" semantics and an
 // extra ellipsis would push the result over a hard column cap.
 //
 // Example: TruncateBytesNoMarker("hello world", 5) returns "hello".
 //
 // Edge case: maxBytes <= 0 returns "".
 func TruncateBytesNoMarker(s string, maxBytes int) string {
 	if len(s) <= maxBytes {
 		return s
 	}
 	if maxBytes <= 0 {
 		return ""
 	}
 	cut := maxBytes
 	for cut > 0 && !utf8.RuneStart(s[cut]) {
 		cut--
 	}
 	return s[:cut]
 }
 // TruncateRunes returns s if it has at most maxRunes runes, otherwise
 // returns the first maxRunes runes followed by the ellipsis marker.
 // Use this when the cap is in user-visible characters (UI summary,
 // activity feed line) rather than bytes (DB column).
 //
 // Example: TruncateRunes("你好世界你好", 3) returns "你好世…" — three
 // runes plus the marker, regardless of the resulting byte count.
 //
 // Edge case: maxRunes <= 0 returns "" (caller asked for no content).
 func TruncateRunes(s string, maxRunes int) string {
 	if maxRunes <= 0 {
 		return ""
 	}
 	// Fast path: if every byte is a single-byte rune, the byte-length
 	// upper-bounds the rune count. This avoids a runes alloc for the
 	// common ASCII case where the input fits.
 	if len(s) <= maxRunes {
 		return s
 	}
 	// Walk by rune boundaries; stop at the (maxRunes+1)-th rune so we
 	// know the cut point and that truncation is needed.
 	count := 0
 	for i := range s {
 		if count == maxRunes {
 			return s[:i] + ellipsis
 		}
 		count++
 	}
 	// Reachable when the byte count exceeded maxRunes but the actual
 	// rune count didn't (e.g. all single-byte runes that just happen
 	// to be more than maxRunes). The fast path catches len(s) <=
 	// maxRunes; this catches maxRunes < runeCount(s) <= len(s).
 	return s
 }
--- a/workspace-server/internal/textutil/truncate_test.go
+++ b/workspace-server/internal/textutil/truncate_test.go
@ -0,0 +1,222 @@
 package textutil
 import (
 	"testing"
 	"unicode/utf8"
 )
 // TestTruncateBytes_RuneBoundary pins the byte-cap, marker-bearing
 // truncation path. Every case asserts both:
 //  1. the exact expected output (so a refactor that flips ellipsis or
 //     drops a rune is caught), and
 //  2. utf8.ValidString on the output (the invariant that the bug class
 //     in #2026/#2959/#2962 violated by slicing mid-codepoint).
 //
 // Per memory feedback_assert_exact_not_substring.md, asserts are exact
 // equality, not substring matches.
 func TestTruncateBytes_RuneBoundary(t *testing.T) {
 	cases := []struct {
 		name     string
 		in       string
 		maxBytes int
 		want     string
 	}{
 		// Under-cap: returns input verbatim.
 		{"empty", "", 10, ""},
 		{"under-cap ASCII", "hi", 10, "hi"},
 		{"exactly-at-cap ASCII", "hello", 5, "hello"},
 		{"under-cap CJK", "你好", 10, "你好"}, // 6 bytes
 		{"exactly-at-cap CJK", "你好", 6, "你好"},
 		// Over-cap ASCII: trims to (maxBytes - 3) bytes + "…".
 		{"over-cap ASCII", "abcdefghij", 6, "abc…"},
 		// Over-cap CJK where cut would land mid-codepoint. The
 		// pre-fix bug shape: 7 - 3 = 4, but byte 4 is mid-"好"
 		// (好 is bytes 3..5 of "你好世界"). Walking back to byte 3
 		// (start of 好 — wait, that IS the start). Actually 你=0..2,
 		// 好=3..5, 世=6..8, 界=9..11. Cut=4, walk back to 3 (start
 		// of 好), then s[:3]="你", + "…" = "你…" (3+3=6 bytes ≤ 7).
 		{"over-cap CJK lands mid-codepoint", "你好世界", 7, "你…"},
 		// Over-cap CJK where cut lands exactly on rune boundary.
 		// 9 - 3 = 6, byte 6 is start of 世. Walk-back is no-op.
 		// s[:6]="你好" + "…" = "你好…" (9 bytes).
 		{"over-cap CJK rune-aligned", "你好世界", 9, "你好…"},
 		// Emoji: 😀 is 4 bytes (U+1F600). 7 - 3 = 4, byte 4 is start
 		// of second 😀 — walk-back no-op. s[:4]="😀" + "…" = "😀…".
 		{"over-cap emoji", "😀😀😀", 7, "😀…"},
 		// Mixed ASCII + CJK. "ab你好世界": a(1) b(1) 你(3) 好(3) 世(3) 界(3) = 14 bytes.
 		// maxBytes=8, 8-3=5. byte 5 is mid-好. Walk back to start of 好 = byte 5? Let me
 		// recompute: a=0, b=1, 你=2..4, 好=5..7, 世=8..10. Byte 5 IS start of 好.
 		// Walk-back keeps cut at 5. s[:5] = "ab你" + "…" = "ab你…" (8 bytes).
 		{"mixed prefix ASCII over-cap CJK", "ab你好世界", 8, "ab你…"},
 		// Pathological: maxBytes too small to even fit the marker.
 		{"cap below ellipsis len", "hello", 2, ""},
 		{"cap zero", "hello", 0, ""},
 		{"cap negative", "hello", -1, ""},
 		// Cap exactly == ellipsis len: no room for content, but
 		// the marker fits. This returns "" (cut = 0, s[:0] = "").
 		{"cap equals ellipsis len", "hello", 3, "…"},
 	}
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
 			got := TruncateBytes(c.in, c.maxBytes)
 			if got != c.want {
 				t.Errorf("TruncateBytes(%q, %d) = %q, want %q", c.in, c.maxBytes, got, c.want)
 			}
 			if !utf8.ValidString(got) {
 				t.Errorf("TruncateBytes(%q, %d) returned invalid UTF-8: %q", c.in, c.maxBytes, got)
 			}
 			// Output never exceeds the byte cap (when one is set).
 			if c.maxBytes > 0 && len(got) > c.maxBytes {
 				t.Errorf("TruncateBytes(%q, %d) overflowed cap: len(out)=%d > %d",
 					c.in, c.maxBytes, len(got), c.maxBytes)
 			}
 		})
 	}
 }
 // TestTruncateBytesNoMarker pins the marker-less variant. Same
 // boundary handling as TruncateBytes but no ellipsis cost — the cut
 // happens at maxBytes itself, walking back only if that lands
 // mid-codepoint.
 func TestTruncateBytesNoMarker(t *testing.T) {
 	cases := []struct {
 		name     string
 		in       string
 		maxBytes int
 		want     string
 	}{
 		{"empty", "", 10, ""},
 		{"under-cap ASCII", "hi", 10, "hi"},
 		{"exactly-at-cap ASCII", "hello", 5, "hello"},
 		{"over-cap ASCII", "abcdefghij", 5, "abcde"},
 		// Over-cap CJK rune-aligned: "你好世界", maxBytes=6, byte 6 is start of 世.
 		// s[:6]="你好" — perfect cut.
 		{"over-cap CJK rune-aligned", "你好世界", 6, "你好"},
 		// Over-cap CJK mid-codepoint: maxBytes=4, byte 4 is mid-好.
 		// Walk back to byte 3 (start of 好), s[:3]="你".
 		{"over-cap CJK mid-codepoint", "你好世界", 4, "你"},
 		// Emoji: maxBytes=5, "😀😀" is bytes 0..3 then 4..7. byte 5 is mid-second-😀.
 		// Walk back to byte 4 (start of second 😀), s[:4]="😀".
 		{"over-cap emoji", "😀😀", 5, "😀"},
 		// Edge: cap zero or negative → "".
 		{"cap zero", "hello", 0, ""},
 		{"cap negative", "hello", -1, ""},
 		// Cap = 1 and first rune is multi-byte: walk-back to 0, return "".
 		{"cap one with leading CJK", "你hello", 1, ""},
 	}
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
 			got := TruncateBytesNoMarker(c.in, c.maxBytes)
 			if got != c.want {
 				t.Errorf("TruncateBytesNoMarker(%q, %d) = %q, want %q", c.in, c.maxBytes, got, c.want)
 			}
 			if !utf8.ValidString(got) {
 				t.Errorf("TruncateBytesNoMarker(%q, %d) returned invalid UTF-8: %q", c.in, c.maxBytes, got)
 			}
 			if c.maxBytes > 0 && len(got) > c.maxBytes {
 				t.Errorf("TruncateBytesNoMarker(%q, %d) overflowed cap: len(out)=%d > %d",
 					c.in, c.maxBytes, len(got), c.maxBytes)
 			}
 		})
 	}
 }
 // TestTruncateRunes pins the rune-cap variant. The key contract is
 // that maxRunes counts user-visible characters (Go runes, which line
 // up with Unicode codepoints), not bytes — so "你好世界" with
 // maxRunes=2 returns "你好…", regardless of the resulting byte count.
 func TestTruncateRunes(t *testing.T) {
 	cases := []struct {
 		name     string
 		in       string
 		maxRunes int
 		want     string
 	}{
 		{"empty", "", 5, ""},
 		{"under-cap ASCII", "hi", 5, "hi"},
 		{"exactly-at-cap ASCII", "hello", 5, "hello"},
 		{"over-cap ASCII", "abcdefghij", 5, "abcde…"},
 		{"under-cap CJK", "你好", 5, "你好"},
 		{"exactly-at-cap CJK", "你好", 2, "你好"},
 		// Over-cap CJK: maxRunes=3, expect first 3 runes + marker.
 		{"over-cap CJK", "你好世界你好", 3, "你好世…"},
 		// Emoji is one rune per glyph in Go (no ZWJ here).
 		{"over-cap emoji", "😀😀😀😀😀", 2, "😀😀…"},
 		// Mixed: maxRunes=3 of "ab你好世界" → "ab你…".
 		{"mixed prefix", "ab你好世界", 3, "ab你…"},
 		// Edge: maxRunes 0 / negative → "".
 		{"cap zero", "hello", 0, ""},
 		{"cap negative", "hello", -1, ""},
 	}
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
 			got := TruncateRunes(c.in, c.maxRunes)
 			if got != c.want {
 				t.Errorf("TruncateRunes(%q, %d) = %q, want %q", c.in, c.maxRunes, got, c.want)
 			}
 			if !utf8.ValidString(got) {
 				t.Errorf("TruncateRunes(%q, %d) returned invalid UTF-8: %q", c.in, c.maxRunes, got)
 			}
 		})
 	}
 }
 // TestTruncate_FuzzInvariants stays as a property-style sanity check:
 // for any rune-valid input and any cap, the output is rune-valid and
 // (for byte-cap variants) within the cap. This catches off-by-one
 // regressions in cuts that slip past the table-test cases above.
 func TestTruncate_FuzzInvariants(t *testing.T) {
 	inputs := []string{
 		"",
 		"a",
 		"hello world",
 		"你好世界",
 		"😀😀😀",
 		"ab你c好d世e界",
 		"日本語の文字列",
 		"🇺🇸🇯🇵", // flags: each is 2 codepoints (regional indicators)
 	}
 	for _, in := range inputs {
 		for cap := -1; cap <= len(in)+5; cap++ {
 			t.Run("", func(t *testing.T) {
 				gotB := TruncateBytes(in, cap)
 				if !utf8.ValidString(gotB) {
 					t.Errorf("TruncateBytes(%q, %d) invalid UTF-8: %q", in, cap, gotB)
 				}
 				if cap > 0 && len(gotB) > cap {
 					t.Errorf("TruncateBytes(%q, %d) overflowed: %q (%d bytes)", in, cap, gotB, len(gotB))
 				}
 				gotN := TruncateBytesNoMarker(in, cap)
 				if !utf8.ValidString(gotN) {
 					t.Errorf("TruncateBytesNoMarker(%q, %d) invalid UTF-8: %q", in, cap, gotN)
 				}
 				if cap > 0 && len(gotN) > cap {
 					t.Errorf("TruncateBytesNoMarker(%q, %d) overflowed: %q (%d bytes)", in, cap, gotN, len(gotN))
 				}
 				gotR := TruncateRunes(in, cap)
 				if !utf8.ValidString(gotR) {
 					t.Errorf("TruncateRunes(%q, %d) invalid UTF-8: %q", in, cap, gotR)
 				}
 			})
 		}
 	}
 }