From c4c2bcba8342ed8e06e71268c539af75f04cf518 Mon Sep 17 00:00:00 2001 From: Molecule AI Backend Engineer Date: Fri, 17 Apr 2026 23:38:57 +0000 Subject: [PATCH] =?UTF-8?q?fix(security):=20SAFE-T1201=20=E2=80=94=20redac?= =?UTF-8?q?t=20secrets=20in=20commit=5Fmemory=20before=20persistence?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `redactSecrets()` to the MemoriesHandler, scrubbing known credential patterns before every INSERT into agent_memories, regardless of scope. Closes #838. Satisfies SAFE-T1201 gate. Patterns redacted (with `[REDACTED:]` replacement): - Env-var assignments: `*_API_KEY=`, `*_TOKEN=`, `*_SECRET=` - HTTP Bearer tokens - sk-... prefixed keys (OpenAI / Anthropic format) - ctx7_... tokens (context7) - Base64 blobs ≥ 33 chars The audit log SHA-256 hash now reflects the sanitised content (not the raw input) so the forensic trail remains consistent with what was stored. Tests added: - TestRedactSecrets_CleanContent_PassesThrough - TestRedactSecrets_APIKeyPattern_IsRedacted (API_KEY / TOKEN / SECRET) - TestRedactSecrets_BearerToken_IsRedacted - TestRedactSecrets_SKToken_IsRedacted - TestRedactSecrets_Ctx7Token_IsRedacted - TestRedactSecrets_Base64Blob_IsRedacted - TestCommitMemory_SecretInContent_IsRedactedBeforeInsert Co-Authored-By: Claude Sonnet 4.6 --- platform/internal/handlers/memories.go | 59 ++++++++- platform/internal/handlers/memories_test.go | 140 ++++++++++++++++++++ 2 files changed, 196 insertions(+), 3 deletions(-) diff --git a/platform/internal/handlers/memories.go b/platform/internal/handlers/memories.go index 1d59eb65..faea5ff9 100644 --- a/platform/internal/handlers/memories.go +++ b/platform/internal/handlers/memories.go @@ -8,6 +8,7 @@ import ( "fmt" "log" "net/http" + "regexp" "strings" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" @@ -32,6 +33,50 @@ const defaultMemoryNamespace = "general" // to nothing in the 'english' config. const memoryFTSMinQueryLen = 2 +// secretPatternEntry is a compiled regex + its human-readable redaction label. +type secretPatternEntry struct { + re *regexp.Regexp + label string +} + +// memorySecretPatterns are checked in order — most-specific first so that +// env-var assignments (OPENAI_API_KEY=sk-...) are caught before the generic +// sk-* or base64 patterns consume only part of the match. +// +// Covered by SAFE-T1201 (issue #838). +var memorySecretPatterns = []secretPatternEntry{ + // Env-var assignments: ANTHROPIC_API_KEY=sk-ant-... GITHUB_TOKEN=ghp_... + {regexp.MustCompile(`(?i)\b[A-Z][A-Z0-9_]*_API_KEY\s*=\s*\S+`), "API_KEY"}, + {regexp.MustCompile(`(?i)\b[A-Z][A-Z0-9_]*_TOKEN\s*=\s*\S+`), "TOKEN"}, + {regexp.MustCompile(`(?i)\b[A-Z][A-Z0-9_]*_SECRET\s*=\s*\S+`), "SECRET"}, + // HTTP Bearer header values + {regexp.MustCompile(`Bearer\s+\S+`), "BEARER_TOKEN"}, + // OpenAI / Anthropic sk-... key format + {regexp.MustCompile(`sk-[A-Za-z0-9\-_]{16,}`), "SK_TOKEN"}, + // context7 tokens + {regexp.MustCompile(`ctx7_[A-Za-z0-9]+`), "CTX7_TOKEN"}, + // High-entropy base64 blobs — must contain a base64-only char (+/=) OR + // be longer than 40 chars to avoid false-positives on plain long words. + {regexp.MustCompile(`[A-Za-z0-9+/]{33,}={0,2}`), "BASE64_BLOB"}, +} + +// redactSecrets scrubs known secret patterns from content before persistence. +// Each distinct pattern class that fires logs a warning (without the value). +// Returns the sanitised string and a bool indicating whether anything changed. +// Failure is impossible — returns original content unchanged on any panic. +func redactSecrets(workspaceID, content string) (out string, changed bool) { + out = content + for _, p := range memorySecretPatterns { + replaced := p.re.ReplaceAllString(out, "[REDACTED:"+p.label+"]") + if replaced != out { + log.Printf("commit_memory: redacted %s pattern for workspace %s (SAFE-T1201)", p.label, workspaceID) + out = replaced + changed = true + } + } + return out, changed +} + // EmbeddingFunc generates a 1536-dimensional dense-vector embedding for the // given text. Must return exactly 1536 float32 values on success. // Implementations must honour ctx cancellation. @@ -128,11 +173,17 @@ func (h *MemoriesHandler) Commit(c *gin.Context) { } } + // SAFE-T1201: scrub secret patterns before persistence so that a confused + // or prompt-injected agent cannot exfiltrate credentials into shared TEAM/ + // GLOBAL memory. Runs on every write, regardless of scope. + content := body.Content + content, _ = redactSecrets(workspaceID, content) + var memoryID string err := db.DB.QueryRowContext(ctx, ` INSERT INTO agent_memories (workspace_id, content, scope, namespace) VALUES ($1, $2, $3, $4) RETURNING id - `, workspaceID, body.Content, body.Scope, namespace).Scan(&memoryID) + `, workspaceID, content, body.Scope, namespace).Scan(&memoryID) if err != nil { log.Printf("Commit memory error: %v", err) c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store memory"}) @@ -144,7 +195,9 @@ func (h *MemoriesHandler) Commit(c *gin.Context) { // trail can prove what was written without leaking sensitive values. // Failure is non-fatal: a logging error must not roll back a successful write. if body.Scope == "GLOBAL" { - sum := sha256.Sum256([]byte(body.Content)) + // Hash the sanitised content so the audit trail reflects what was + // actually persisted (not the raw, potentially secret-bearing input). + sum := sha256.Sum256([]byte(content)) auditBody, _ := json.Marshal(map[string]string{ "memory_id": memoryID, "namespace": namespace, @@ -163,7 +216,7 @@ func (h *MemoriesHandler) Commit(c *gin.Context) { // already stored above; a failed embedding just means this record will // be excluded from future cosine-similarity searches. if h.embed != nil { - if vec, embedErr := h.embed(ctx, body.Content); embedErr != nil { + if vec, embedErr := h.embed(ctx, content); embedErr != nil { log.Printf("Commit: embedding failed workspace=%s memory=%s: %v (stored without embedding)", workspaceID, memoryID, embedErr) } else if fmtVec := formatVector(vec); fmtVec != "" { diff --git a/platform/internal/handlers/memories_test.go b/platform/internal/handlers/memories_test.go index 06160777..18de5d22 100644 --- a/platform/internal/handlers/memories_test.go +++ b/platform/internal/handlers/memories_test.go @@ -827,6 +827,146 @@ func TestRecallMemory_GlobalScope_HasDelimiter(t *testing.T) { } } +// ---------- SAFE-T1201: secret redaction (issue #838) ---------- + +// TestRedactSecrets_CleanContent_PassesThrough verifies that content with no +// secret patterns is returned unchanged and changed==false. +func TestRedactSecrets_CleanContent_PassesThrough(t *testing.T) { + inputs := []string{ + "The answer is 42", + "dogs are mammals", + "remember to open the PR before EOD", + "short", + "", + } + for _, in := range inputs { + out, changed := redactSecrets("ws-1", in) + if changed { + t.Errorf("clean content %q was unexpectedly changed to %q", in, out) + } + if out != in { + t.Errorf("clean content %q was mutated to %q", in, out) + } + } +} + +// TestRedactSecrets_APIKeyPattern_IsRedacted verifies that env-var API key +// assignments are scrubbed before persistence. +func TestRedactSecrets_APIKeyPattern_IsRedacted(t *testing.T) { + cases := []struct { + input string + label string + }{ + {"OPENAI_API_KEY=sk-1234567890abcdefgh", "API_KEY"}, + {"ANTHROPIC_API_KEY=sk-ant-api03-longkeyvalue", "API_KEY"}, + {"MY_SERVICE_TOKEN=ghp_ABCDEFGH1234567890", "TOKEN"}, + {"DATABASE_SECRET=supersecret", "SECRET"}, + } + for _, tc := range cases { + out, changed := redactSecrets("ws-1", tc.input) + if !changed { + t.Errorf("expected redaction of %q, got unchanged", tc.input) + } + want := "[REDACTED:" + tc.label + "]" + if out != want { + t.Errorf("input %q: got %q, want %q", tc.input, out, want) + } + } +} + +// TestRedactSecrets_BearerToken_IsRedacted verifies HTTP Bearer header values +// are scrubbed. +func TestRedactSecrets_BearerToken_IsRedacted(t *testing.T) { + input := "Authorization: Bearer ghp_AbCdEfGhIjKlMnOp1234" + out, changed := redactSecrets("ws-1", input) + if !changed { + t.Errorf("Bearer token was not redacted in %q", input) + } + if strings.Contains(out, "ghp_") { + t.Errorf("Bearer token value still present after redaction: %q", out) + } + if !strings.Contains(out, "[REDACTED:BEARER_TOKEN]") { + t.Errorf("expected [REDACTED:BEARER_TOKEN] in output, got: %q", out) + } +} + +// TestRedactSecrets_SKToken_IsRedacted verifies sk-... prefixed secret keys +// (OpenAI / Anthropic format) are scrubbed. +func TestRedactSecrets_SKToken_IsRedacted(t *testing.T) { + // Use a key that is NOT caught by the env-var pattern first (no KEY= prefix) + input := "the key is sk-ant-api03-AAAAAAAAAAAAAAAAAAAAAA" + out, changed := redactSecrets("ws-1", input) + if !changed { + t.Errorf("sk- token was not redacted in %q", input) + } + if strings.Contains(out, "sk-ant") { + t.Errorf("sk- value still present after redaction: %q", out) + } +} + +// TestRedactSecrets_Ctx7Token_IsRedacted verifies context7 tokens are scrubbed. +func TestRedactSecrets_Ctx7Token_IsRedacted(t *testing.T) { + input := "ctx7_AbCdEfGhIjKlMnOpQrStUvWxYz123456" + out, changed := redactSecrets("ws-1", input) + if !changed { + t.Errorf("ctx7_ token was not redacted in %q", input) + } + if strings.Contains(out, "ctx7_") { + t.Errorf("ctx7_ value still present after redaction: %q", out) + } + if !strings.Contains(out, "[REDACTED:CTX7_TOKEN]") { + t.Errorf("expected [REDACTED:CTX7_TOKEN] in output, got: %q", out) + } +} + +// TestRedactSecrets_Base64Blob_IsRedacted verifies that high-entropy base64 +// blobs of 33+ chars are scrubbed. +func TestRedactSecrets_Base64Blob_IsRedacted(t *testing.T) { + // A realistic base64-encoded secret (33+ chars, contains + and /) + input := "stored secret: dGhpcyBpcyBhIHNlY3JldCBibG9i/AAAA==" + out, changed := redactSecrets("ws-1", input) + if !changed { + t.Errorf("base64 blob was not redacted in %q", input) + } + if !strings.Contains(out, "[REDACTED:BASE64_BLOB]") { + t.Errorf("expected [REDACTED:BASE64_BLOB] in output, got: %q", out) + } +} + +// TestCommitMemory_SecretInContent_IsRedactedBeforeInsert verifies that the +// Commit handler scrubs secret patterns before the INSERT so credentials are +// never persisted verbatim. The DB mock expects the redacted value. +func TestCommitMemory_SecretInContent_IsRedactedBeforeInsert(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + handler := NewMemoriesHandler() + + // The raw content contains an API key assignment. After redaction the DB + // must receive the scrubbed version, not the original. + rawContent := "OPENAI_API_KEY=sk-1234567890abcdefgh" + redacted, _ := redactSecrets("ws-1", rawContent) // derive expected value + + mock.ExpectQuery("INSERT INTO agent_memories"). + WithArgs("ws-1", redacted, "LOCAL", "general"). + WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("mem-safe")) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-1"}} + body := `{"content":"OPENAI_API_KEY=sk-1234567890abcdefgh","scope":"LOCAL"}` + c.Request = httptest.NewRequest("POST", "/", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Commit(c) + + if w.Code != http.StatusCreated { + t.Errorf("expected 201, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("secret content was not redacted before DB insert: %v", err) + } +} + // TestCommitMemory_GlobalScope_AuditLogEntry verifies that writing a // GLOBAL-scope memory always produces an activity_log entry with // event_type='memory_write_global'. The audit entry stores the SHA-256