fix(security): SAFE-T1201 — redact secrets in commit_memory before persistence

Adds `redactSecrets()` to the MemoriesHandler, scrubbing known credential
patterns before every INSERT into agent_memories, regardless of scope.

Closes #838. Satisfies SAFE-T1201 gate.

Patterns redacted (with `[REDACTED:<CLASS>]` replacement):
- Env-var assignments: `*_API_KEY=`, `*_TOKEN=`, `*_SECRET=`
- HTTP Bearer tokens
- sk-... prefixed keys (OpenAI / Anthropic format)
- ctx7_... tokens (context7)
- Base64 blobs ≥ 33 chars

The audit log SHA-256 hash now reflects the sanitised content (not the
raw input) so the forensic trail remains consistent with what was stored.

Tests added:
- TestRedactSecrets_CleanContent_PassesThrough
- TestRedactSecrets_APIKeyPattern_IsRedacted (API_KEY / TOKEN / SECRET)
- TestRedactSecrets_BearerToken_IsRedacted
- TestRedactSecrets_SKToken_IsRedacted
- TestRedactSecrets_Ctx7Token_IsRedacted
- TestRedactSecrets_Base64Blob_IsRedacted
- TestCommitMemory_SecretInContent_IsRedactedBeforeInsert

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Molecule AI Backend Engineer 2026-04-17 23:38:57 +00:00
parent a8fcff947d
commit c4c2bcba83
2 changed files with 196 additions and 3 deletions

View File

@ -8,6 +8,7 @@ import (
"fmt"
"log"
"net/http"
"regexp"
"strings"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
@ -32,6 +33,50 @@ const defaultMemoryNamespace = "general"
// to nothing in the 'english' config.
const memoryFTSMinQueryLen = 2
// secretPatternEntry is a compiled regex + its human-readable redaction label.
type secretPatternEntry struct {
re *regexp.Regexp
label string
}
// memorySecretPatterns are checked in order — most-specific first so that
// env-var assignments (OPENAI_API_KEY=sk-...) are caught before the generic
// sk-* or base64 patterns consume only part of the match.
//
// Covered by SAFE-T1201 (issue #838).
var memorySecretPatterns = []secretPatternEntry{
// Env-var assignments: ANTHROPIC_API_KEY=sk-ant-... GITHUB_TOKEN=ghp_...
{regexp.MustCompile(`(?i)\b[A-Z][A-Z0-9_]*_API_KEY\s*=\s*\S+`), "API_KEY"},
{regexp.MustCompile(`(?i)\b[A-Z][A-Z0-9_]*_TOKEN\s*=\s*\S+`), "TOKEN"},
{regexp.MustCompile(`(?i)\b[A-Z][A-Z0-9_]*_SECRET\s*=\s*\S+`), "SECRET"},
// HTTP Bearer header values
{regexp.MustCompile(`Bearer\s+\S+`), "BEARER_TOKEN"},
// OpenAI / Anthropic sk-... key format
{regexp.MustCompile(`sk-[A-Za-z0-9\-_]{16,}`), "SK_TOKEN"},
// context7 tokens
{regexp.MustCompile(`ctx7_[A-Za-z0-9]+`), "CTX7_TOKEN"},
// High-entropy base64 blobs — must contain a base64-only char (+/=) OR
// be longer than 40 chars to avoid false-positives on plain long words.
{regexp.MustCompile(`[A-Za-z0-9+/]{33,}={0,2}`), "BASE64_BLOB"},
}
// redactSecrets scrubs known secret patterns from content before persistence.
// Each distinct pattern class that fires logs a warning (without the value).
// Returns the sanitised string and a bool indicating whether anything changed.
// Failure is impossible — returns original content unchanged on any panic.
func redactSecrets(workspaceID, content string) (out string, changed bool) {
out = content
for _, p := range memorySecretPatterns {
replaced := p.re.ReplaceAllString(out, "[REDACTED:"+p.label+"]")
if replaced != out {
log.Printf("commit_memory: redacted %s pattern for workspace %s (SAFE-T1201)", p.label, workspaceID)
out = replaced
changed = true
}
}
return out, changed
}
// EmbeddingFunc generates a 1536-dimensional dense-vector embedding for the
// given text. Must return exactly 1536 float32 values on success.
// Implementations must honour ctx cancellation.
@ -128,11 +173,17 @@ func (h *MemoriesHandler) Commit(c *gin.Context) {
}
}
// SAFE-T1201: scrub secret patterns before persistence so that a confused
// or prompt-injected agent cannot exfiltrate credentials into shared TEAM/
// GLOBAL memory. Runs on every write, regardless of scope.
content := body.Content
content, _ = redactSecrets(workspaceID, content)
var memoryID string
err := db.DB.QueryRowContext(ctx, `
INSERT INTO agent_memories (workspace_id, content, scope, namespace)
VALUES ($1, $2, $3, $4) RETURNING id
`, workspaceID, body.Content, body.Scope, namespace).Scan(&memoryID)
`, workspaceID, content, body.Scope, namespace).Scan(&memoryID)
if err != nil {
log.Printf("Commit memory error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store memory"})
@ -144,7 +195,9 @@ func (h *MemoriesHandler) Commit(c *gin.Context) {
// trail can prove what was written without leaking sensitive values.
// Failure is non-fatal: a logging error must not roll back a successful write.
if body.Scope == "GLOBAL" {
sum := sha256.Sum256([]byte(body.Content))
// Hash the sanitised content so the audit trail reflects what was
// actually persisted (not the raw, potentially secret-bearing input).
sum := sha256.Sum256([]byte(content))
auditBody, _ := json.Marshal(map[string]string{
"memory_id": memoryID,
"namespace": namespace,
@ -163,7 +216,7 @@ func (h *MemoriesHandler) Commit(c *gin.Context) {
// already stored above; a failed embedding just means this record will
// be excluded from future cosine-similarity searches.
if h.embed != nil {
if vec, embedErr := h.embed(ctx, body.Content); embedErr != nil {
if vec, embedErr := h.embed(ctx, content); embedErr != nil {
log.Printf("Commit: embedding failed workspace=%s memory=%s: %v (stored without embedding)",
workspaceID, memoryID, embedErr)
} else if fmtVec := formatVector(vec); fmtVec != "" {

View File

@ -827,6 +827,146 @@ func TestRecallMemory_GlobalScope_HasDelimiter(t *testing.T) {
}
}
// ---------- SAFE-T1201: secret redaction (issue #838) ----------
// TestRedactSecrets_CleanContent_PassesThrough verifies that content with no
// secret patterns is returned unchanged and changed==false.
func TestRedactSecrets_CleanContent_PassesThrough(t *testing.T) {
inputs := []string{
"The answer is 42",
"dogs are mammals",
"remember to open the PR before EOD",
"short",
"",
}
for _, in := range inputs {
out, changed := redactSecrets("ws-1", in)
if changed {
t.Errorf("clean content %q was unexpectedly changed to %q", in, out)
}
if out != in {
t.Errorf("clean content %q was mutated to %q", in, out)
}
}
}
// TestRedactSecrets_APIKeyPattern_IsRedacted verifies that env-var API key
// assignments are scrubbed before persistence.
func TestRedactSecrets_APIKeyPattern_IsRedacted(t *testing.T) {
cases := []struct {
input string
label string
}{
{"OPENAI_API_KEY=sk-1234567890abcdefgh", "API_KEY"},
{"ANTHROPIC_API_KEY=sk-ant-api03-longkeyvalue", "API_KEY"},
{"MY_SERVICE_TOKEN=ghp_ABCDEFGH1234567890", "TOKEN"},
{"DATABASE_SECRET=supersecret", "SECRET"},
}
for _, tc := range cases {
out, changed := redactSecrets("ws-1", tc.input)
if !changed {
t.Errorf("expected redaction of %q, got unchanged", tc.input)
}
want := "[REDACTED:" + tc.label + "]"
if out != want {
t.Errorf("input %q: got %q, want %q", tc.input, out, want)
}
}
}
// TestRedactSecrets_BearerToken_IsRedacted verifies HTTP Bearer header values
// are scrubbed.
func TestRedactSecrets_BearerToken_IsRedacted(t *testing.T) {
input := "Authorization: Bearer ghp_AbCdEfGhIjKlMnOp1234"
out, changed := redactSecrets("ws-1", input)
if !changed {
t.Errorf("Bearer token was not redacted in %q", input)
}
if strings.Contains(out, "ghp_") {
t.Errorf("Bearer token value still present after redaction: %q", out)
}
if !strings.Contains(out, "[REDACTED:BEARER_TOKEN]") {
t.Errorf("expected [REDACTED:BEARER_TOKEN] in output, got: %q", out)
}
}
// TestRedactSecrets_SKToken_IsRedacted verifies sk-... prefixed secret keys
// (OpenAI / Anthropic format) are scrubbed.
func TestRedactSecrets_SKToken_IsRedacted(t *testing.T) {
// Use a key that is NOT caught by the env-var pattern first (no KEY= prefix)
input := "the key is sk-ant-api03-AAAAAAAAAAAAAAAAAAAAAA"
out, changed := redactSecrets("ws-1", input)
if !changed {
t.Errorf("sk- token was not redacted in %q", input)
}
if strings.Contains(out, "sk-ant") {
t.Errorf("sk- value still present after redaction: %q", out)
}
}
// TestRedactSecrets_Ctx7Token_IsRedacted verifies context7 tokens are scrubbed.
func TestRedactSecrets_Ctx7Token_IsRedacted(t *testing.T) {
input := "ctx7_AbCdEfGhIjKlMnOpQrStUvWxYz123456"
out, changed := redactSecrets("ws-1", input)
if !changed {
t.Errorf("ctx7_ token was not redacted in %q", input)
}
if strings.Contains(out, "ctx7_") {
t.Errorf("ctx7_ value still present after redaction: %q", out)
}
if !strings.Contains(out, "[REDACTED:CTX7_TOKEN]") {
t.Errorf("expected [REDACTED:CTX7_TOKEN] in output, got: %q", out)
}
}
// TestRedactSecrets_Base64Blob_IsRedacted verifies that high-entropy base64
// blobs of 33+ chars are scrubbed.
func TestRedactSecrets_Base64Blob_IsRedacted(t *testing.T) {
// A realistic base64-encoded secret (33+ chars, contains + and /)
input := "stored secret: dGhpcyBpcyBhIHNlY3JldCBibG9i/AAAA=="
out, changed := redactSecrets("ws-1", input)
if !changed {
t.Errorf("base64 blob was not redacted in %q", input)
}
if !strings.Contains(out, "[REDACTED:BASE64_BLOB]") {
t.Errorf("expected [REDACTED:BASE64_BLOB] in output, got: %q", out)
}
}
// TestCommitMemory_SecretInContent_IsRedactedBeforeInsert verifies that the
// Commit handler scrubs secret patterns before the INSERT so credentials are
// never persisted verbatim. The DB mock expects the redacted value.
func TestCommitMemory_SecretInContent_IsRedactedBeforeInsert(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewMemoriesHandler()
// The raw content contains an API key assignment. After redaction the DB
// must receive the scrubbed version, not the original.
rawContent := "OPENAI_API_KEY=sk-1234567890abcdefgh"
redacted, _ := redactSecrets("ws-1", rawContent) // derive expected value
mock.ExpectQuery("INSERT INTO agent_memories").
WithArgs("ws-1", redacted, "LOCAL", "general").
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("mem-safe"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
body := `{"content":"OPENAI_API_KEY=sk-1234567890abcdefgh","scope":"LOCAL"}`
c.Request = httptest.NewRequest("POST", "/", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Commit(c)
if w.Code != http.StatusCreated {
t.Errorf("expected 201, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("secret content was not redacted before DB insert: %v", err)
}
}
// TestCommitMemory_GlobalScope_AuditLogEntry verifies that writing a
// GLOBAL-scope memory always produces an activity_log entry with
// event_type='memory_write_global'. The audit entry stores the SHA-256