diff --git a/workspace-server/internal/handlers/agent_message_writer.go b/workspace-server/internal/handlers/agent_message_writer.go index b6856847..a0f25588 100644 --- a/workspace-server/internal/handlers/agent_message_writer.go +++ b/workspace-server/internal/handlers/agent_message_writer.go @@ -40,7 +40,9 @@ import ( "database/sql" "encoding/json" "errors" + "fmt" "log" + "unicode/utf8" "github.com/Molecule-AI/molecule-monorepo/platform/internal/events" ) @@ -48,9 +50,40 @@ import ( // ErrWorkspaceNotFound is returned by AgentMessageWriter.Send when the // workspace lookup turns up nothing (or the workspace is in // status='removed'). Callers translate to HTTP 404 / JSON-RPC error / -// whatever surface they expose. +// whatever surface they expose. Real DB errors (connection drop, query +// timeout) surface as wrapped errors and should be treated as 503. var ErrWorkspaceNotFound = errors.New("agent_message: workspace not found") +// truncatePreviewRunes returns at most maxRunes runes of s, plus an ellipsis +// when truncated. Operates on the rune (codepoint) boundary instead of +// byte indices — the previous byte-slice version produced invalid UTF-8 +// when maxRunes landed mid-codepoint (CJK, emoji, accented characters +// in agent-authored chat messages), and Postgres JSONB rejects invalid +// UTF-8, dropping the activity_log INSERT silently. The persistence +// failure log fires but the message vanishes from chat history — the +// exact regression class the SSOT consolidation was built to prevent. +// +// maxRunes is in runes, not bytes — `truncatePreviewRunes("你好", 1)` returns +// `"你…"`, not `"\xe4…"`. Set the cap on a UI-friendly basis (visible +// character count, not stored byte count); 80 runes covers the +// activity_logs.summary column comfortably. +func truncatePreviewRunes(s string, maxRunes int) string { + if utf8.RuneCountInString(s) <= maxRunes { + return s + } + // Walk runes until we've consumed maxRunes; cut at that byte index. + count := 0 + cut := len(s) + for i := range s { + if count == maxRunes { + cut = i + break + } + count++ + } + return s[:cut] + "…" +} + // AgentMessageAttachment is one file attached to an agent → user // message. Identical to handlers.NotifyAttachment in field set; kept // distinct so the writer's API doesn't import a handler type with HTTP @@ -96,15 +129,24 @@ func (w *AgentMessageWriter) Send( ) error { // 1. Workspace lookup. status='removed' filter is the same shape /notify // used pre-consolidation; deleted workspaces don't get notifications. + // + // Distinguish sql.ErrNoRows ("workspace genuinely not present" — caller + // should 404) from real DB errors (connection drop, statement timeout, + // pool exhaustion — caller should 503). Pre-fix this branch returned + // ErrWorkspaceNotFound for any error, so during a DB outage every + // notify call surfaced as "workspace not found" and masked real + // incidents in the alert path. var wsName string - if err := w.db.QueryRowContext(ctx, + err := w.db.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1 AND status != 'removed'`, workspaceID, - ).Scan(&wsName); err != nil { - // Includes sql.ErrNoRows; canonicalize so callers don't have to - // import database/sql to compare. + ).Scan(&wsName) + if errors.Is(err, sql.ErrNoRows) { return ErrWorkspaceNotFound } + if err != nil { + return fmt.Errorf("agent_message: workspace lookup: %w", err) + } // 2. Build broadcast payload + WS-emit. Same shape that ChatTab's // AGENT_MESSAGE handler in canvas/src/store/canvas-events.ts has @@ -144,10 +186,7 @@ func (w *AgentMessageWriter) Send( respPayload["parts"] = fileParts } respJSON, _ := json.Marshal(respPayload) - preview := message - if len(preview) > 80 { - preview = preview[:80] + "…" - } + preview := truncatePreviewRunes(message, 80) if _, err := w.db.ExecContext(ctx, ` INSERT INTO activity_logs (workspace_id, activity_type, method, summary, response_body, status) VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok') diff --git a/workspace-server/internal/handlers/agent_message_writer_test.go b/workspace-server/internal/handlers/agent_message_writer_test.go index d311c926..880d2273 100644 --- a/workspace-server/internal/handlers/agent_message_writer_test.go +++ b/workspace-server/internal/handlers/agent_message_writer_test.go @@ -7,6 +7,7 @@ import ( "errors" "strings" "testing" + "unicode/utf8" "github.com/DATA-DOG/go-sqlmock" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" @@ -303,6 +304,122 @@ func TestAgentMessageWriter_Send_BroadcastsAgentMessageEvent(t *testing.T) { } } +// TestAgentMessageWriter_Send_DBErrorOnLookupReturnsWrapped pins the +// distinction between sql.ErrNoRows (legit not-found → 404) and real +// DB errors (connection drop → 503). Pre-followup the lookup branch +// returned ErrWorkspaceNotFound for ANY error, so during a DB outage +// every notify call surfaced as "workspace not found" and masked +// real incidents in alerting. +func TestAgentMessageWriter_Send_DBErrorOnLookupReturnsWrapped(t *testing.T) { + mock := setupTestDB(t) + w := NewAgentMessageWriter(db.DB, newTestBroadcaster()) + + transientErr := errors.New("connection refused") + mock.ExpectQuery("SELECT name FROM workspaces"). + WithArgs("ws-dbdown"). + WillReturnError(transientErr) + + err := w.Send(context.Background(), "ws-dbdown", "hi", nil) + if err == nil { + t.Fatal("expected wrapped DB error, got nil") + } + if errors.Is(err, ErrWorkspaceNotFound) { + t.Errorf("DB outage MUST NOT surface as ErrWorkspaceNotFound (masks incidents in alerting); got %v", err) + } + if !errors.Is(err, transientErr) { + t.Errorf("expected wrapped %v, got %v", transientErr, err) + } +} + +// TestTruncatePreviewRunes_RuneBoundary pins the multi-byte-safe +// truncation. The previous byte-slice version produced invalid UTF-8 +// when the cut landed mid-codepoint (CJK, emoji, accented), and +// Postgres JSONB rejects invalid UTF-8 — INSERT fails, log.Printf +// fires, message vanishes from chat history. Per memory +// feedback_assert_exact_not_substring.md, pin the boundary cases +// directly. +func TestTruncatePreviewRunes_RuneBoundary(t *testing.T) { + cases := []struct { + name string + in string + max int + want string + }{ + {"under-max ASCII", "hi", 80, "hi"}, + {"under-max CJK", "你好", 80, "你好"}, + {"exactly-at-max", "abcde", 5, "abcde"}, + {"truncate ASCII", "abcdefghij", 5, "abcde…"}, + {"truncate CJK at rune boundary", "你好世界你好世界", 4, "你好世界…"}, + {"truncate emoji at rune boundary", "😀😀😀😀😀😀", 3, "😀😀😀…"}, + // The pre-fix bug shape: byte-slice on non-ASCII would have + // mangled the codepoint here. With rune-boundary truncation + // the result is well-formed UTF-8. + {"non-zero with emoji prefix", "🚀abcdefghijk", 5, "🚀abcd…"}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got := truncatePreviewRunes(c.in, c.max) + if got != c.want { + t.Errorf("truncatePreviewRunes(%q, %d) = %q, want %q", c.in, c.max, got, c.want) + } + // Always-valid UTF-8 invariant. A byte-slice truncation + // could leave partial codepoints; this version must not. + if !utf8.ValidString(got) { + t.Errorf("truncatePreviewRunes(%q, %d) returned invalid UTF-8: %q", c.in, c.max, got) + } + }) + } +} + +// TestAgentMessageWriter_Send_NonASCIIMessagePersists pins the end-to-end +// path for non-ASCII messages — the original reno-stars regression +// surfaced via byte-slice truncation breaking JSONB INSERT. Every +// handler-level test had ASCII content, so this branch had no +// coverage. Now it does. +func TestAgentMessageWriter_Send_NonASCIIMessagePersists(t *testing.T) { + mock := setupTestDB(t) + w := NewAgentMessageWriter(db.DB, newTestBroadcaster()) + + // 200-rune CJK message — exceeds the 80-rune cap, would have hit + // the byte-slice bug. + msg := strings.Repeat("你", 200) + + mock.ExpectQuery("SELECT name FROM workspaces"). + WithArgs("ws-cjk"). + WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC")) + + mock.ExpectExec(`INSERT INTO activity_logs`). + WithArgs( + "ws-cjk", + stringMatcher(func(s string) bool { + if !strings.HasPrefix(s, "Agent message: ") { + return false + } + preview := strings.TrimPrefix(s, "Agent message: ") + if !strings.HasSuffix(preview, "…") { + return false + } + body := strings.TrimSuffix(preview, "…") + // 80 runes of 你 = 80 codepoints. Each is 3 bytes UTF-8. + if utf8.RuneCountInString(body) != 80 { + return false + } + // MUST be valid UTF-8 — pre-fix byte-slice would have + // returned half a codepoint here. + return utf8.ValidString(body) + }), + sqlmock.AnyArg(), + ). + WillReturnResult(sqlmock.NewResult(1, 1)) + + if err := w.Send(context.Background(), "ws-cjk", msg, nil); err != nil { + t.Fatalf("Send: %v", err) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("non-ASCII path drift: %v", err) + } +} + // TestAgentMessageWriter_Send_OmitsAttachmentsKeyWhenEmpty pins the // "no key when nil" wire contract — extra empty fields would force // canvas consumers to defensively check for [] vs undefined; the