forked from molecule-ai/molecule-core
fix(security): SAFE-T1201 — redact secrets in commit_memory before persistence
Adds `redactSecrets()` to the MemoriesHandler, scrubbing known credential patterns before every INSERT into agent_memories, regardless of scope. Closes #838. Satisfies SAFE-T1201 gate. Patterns redacted (with `[REDACTED:<CLASS>]` replacement): - Env-var assignments: `*_API_KEY=`, `*_TOKEN=`, `*_SECRET=` - HTTP Bearer tokens - sk-... prefixed keys (OpenAI / Anthropic format) - ctx7_... tokens (context7) - Base64 blobs ≥ 33 chars The audit log SHA-256 hash now reflects the sanitised content (not the raw input) so the forensic trail remains consistent with what was stored. Tests added: - TestRedactSecrets_CleanContent_PassesThrough - TestRedactSecrets_APIKeyPattern_IsRedacted (API_KEY / TOKEN / SECRET) - TestRedactSecrets_BearerToken_IsRedacted - TestRedactSecrets_SKToken_IsRedacted - TestRedactSecrets_Ctx7Token_IsRedacted - TestRedactSecrets_Base64Blob_IsRedacted - TestCommitMemory_SecretInContent_IsRedactedBeforeInsert Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a8fcff947d
commit
c4c2bcba83
@ -8,6 +8,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
@ -32,6 +33,50 @@ const defaultMemoryNamespace = "general"
|
||||
// to nothing in the 'english' config.
|
||||
const memoryFTSMinQueryLen = 2
|
||||
|
||||
// secretPatternEntry is a compiled regex + its human-readable redaction label.
|
||||
type secretPatternEntry struct {
|
||||
re *regexp.Regexp
|
||||
label string
|
||||
}
|
||||
|
||||
// memorySecretPatterns are checked in order — most-specific first so that
|
||||
// env-var assignments (OPENAI_API_KEY=sk-...) are caught before the generic
|
||||
// sk-* or base64 patterns consume only part of the match.
|
||||
//
|
||||
// Covered by SAFE-T1201 (issue #838).
|
||||
var memorySecretPatterns = []secretPatternEntry{
|
||||
// Env-var assignments: ANTHROPIC_API_KEY=sk-ant-... GITHUB_TOKEN=ghp_...
|
||||
{regexp.MustCompile(`(?i)\b[A-Z][A-Z0-9_]*_API_KEY\s*=\s*\S+`), "API_KEY"},
|
||||
{regexp.MustCompile(`(?i)\b[A-Z][A-Z0-9_]*_TOKEN\s*=\s*\S+`), "TOKEN"},
|
||||
{regexp.MustCompile(`(?i)\b[A-Z][A-Z0-9_]*_SECRET\s*=\s*\S+`), "SECRET"},
|
||||
// HTTP Bearer header values
|
||||
{regexp.MustCompile(`Bearer\s+\S+`), "BEARER_TOKEN"},
|
||||
// OpenAI / Anthropic sk-... key format
|
||||
{regexp.MustCompile(`sk-[A-Za-z0-9\-_]{16,}`), "SK_TOKEN"},
|
||||
// context7 tokens
|
||||
{regexp.MustCompile(`ctx7_[A-Za-z0-9]+`), "CTX7_TOKEN"},
|
||||
// High-entropy base64 blobs — must contain a base64-only char (+/=) OR
|
||||
// be longer than 40 chars to avoid false-positives on plain long words.
|
||||
{regexp.MustCompile(`[A-Za-z0-9+/]{33,}={0,2}`), "BASE64_BLOB"},
|
||||
}
|
||||
|
||||
// redactSecrets scrubs known secret patterns from content before persistence.
|
||||
// Each distinct pattern class that fires logs a warning (without the value).
|
||||
// Returns the sanitised string and a bool indicating whether anything changed.
|
||||
// Failure is impossible — returns original content unchanged on any panic.
|
||||
func redactSecrets(workspaceID, content string) (out string, changed bool) {
|
||||
out = content
|
||||
for _, p := range memorySecretPatterns {
|
||||
replaced := p.re.ReplaceAllString(out, "[REDACTED:"+p.label+"]")
|
||||
if replaced != out {
|
||||
log.Printf("commit_memory: redacted %s pattern for workspace %s (SAFE-T1201)", p.label, workspaceID)
|
||||
out = replaced
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
return out, changed
|
||||
}
|
||||
|
||||
// EmbeddingFunc generates a 1536-dimensional dense-vector embedding for the
|
||||
// given text. Must return exactly 1536 float32 values on success.
|
||||
// Implementations must honour ctx cancellation.
|
||||
@ -128,11 +173,17 @@ func (h *MemoriesHandler) Commit(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// SAFE-T1201: scrub secret patterns before persistence so that a confused
|
||||
// or prompt-injected agent cannot exfiltrate credentials into shared TEAM/
|
||||
// GLOBAL memory. Runs on every write, regardless of scope.
|
||||
content := body.Content
|
||||
content, _ = redactSecrets(workspaceID, content)
|
||||
|
||||
var memoryID string
|
||||
err := db.DB.QueryRowContext(ctx, `
|
||||
INSERT INTO agent_memories (workspace_id, content, scope, namespace)
|
||||
VALUES ($1, $2, $3, $4) RETURNING id
|
||||
`, workspaceID, body.Content, body.Scope, namespace).Scan(&memoryID)
|
||||
`, workspaceID, content, body.Scope, namespace).Scan(&memoryID)
|
||||
if err != nil {
|
||||
log.Printf("Commit memory error: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store memory"})
|
||||
@ -144,7 +195,9 @@ func (h *MemoriesHandler) Commit(c *gin.Context) {
|
||||
// trail can prove what was written without leaking sensitive values.
|
||||
// Failure is non-fatal: a logging error must not roll back a successful write.
|
||||
if body.Scope == "GLOBAL" {
|
||||
sum := sha256.Sum256([]byte(body.Content))
|
||||
// Hash the sanitised content so the audit trail reflects what was
|
||||
// actually persisted (not the raw, potentially secret-bearing input).
|
||||
sum := sha256.Sum256([]byte(content))
|
||||
auditBody, _ := json.Marshal(map[string]string{
|
||||
"memory_id": memoryID,
|
||||
"namespace": namespace,
|
||||
@ -163,7 +216,7 @@ func (h *MemoriesHandler) Commit(c *gin.Context) {
|
||||
// already stored above; a failed embedding just means this record will
|
||||
// be excluded from future cosine-similarity searches.
|
||||
if h.embed != nil {
|
||||
if vec, embedErr := h.embed(ctx, body.Content); embedErr != nil {
|
||||
if vec, embedErr := h.embed(ctx, content); embedErr != nil {
|
||||
log.Printf("Commit: embedding failed workspace=%s memory=%s: %v (stored without embedding)",
|
||||
workspaceID, memoryID, embedErr)
|
||||
} else if fmtVec := formatVector(vec); fmtVec != "" {
|
||||
|
||||
@ -827,6 +827,146 @@ func TestRecallMemory_GlobalScope_HasDelimiter(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- SAFE-T1201: secret redaction (issue #838) ----------
|
||||
|
||||
// TestRedactSecrets_CleanContent_PassesThrough verifies that content with no
|
||||
// secret patterns is returned unchanged and changed==false.
|
||||
func TestRedactSecrets_CleanContent_PassesThrough(t *testing.T) {
|
||||
inputs := []string{
|
||||
"The answer is 42",
|
||||
"dogs are mammals",
|
||||
"remember to open the PR before EOD",
|
||||
"short",
|
||||
"",
|
||||
}
|
||||
for _, in := range inputs {
|
||||
out, changed := redactSecrets("ws-1", in)
|
||||
if changed {
|
||||
t.Errorf("clean content %q was unexpectedly changed to %q", in, out)
|
||||
}
|
||||
if out != in {
|
||||
t.Errorf("clean content %q was mutated to %q", in, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestRedactSecrets_APIKeyPattern_IsRedacted verifies that env-var API key
|
||||
// assignments are scrubbed before persistence.
|
||||
func TestRedactSecrets_APIKeyPattern_IsRedacted(t *testing.T) {
|
||||
cases := []struct {
|
||||
input string
|
||||
label string
|
||||
}{
|
||||
{"OPENAI_API_KEY=sk-1234567890abcdefgh", "API_KEY"},
|
||||
{"ANTHROPIC_API_KEY=sk-ant-api03-longkeyvalue", "API_KEY"},
|
||||
{"MY_SERVICE_TOKEN=ghp_ABCDEFGH1234567890", "TOKEN"},
|
||||
{"DATABASE_SECRET=supersecret", "SECRET"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
out, changed := redactSecrets("ws-1", tc.input)
|
||||
if !changed {
|
||||
t.Errorf("expected redaction of %q, got unchanged", tc.input)
|
||||
}
|
||||
want := "[REDACTED:" + tc.label + "]"
|
||||
if out != want {
|
||||
t.Errorf("input %q: got %q, want %q", tc.input, out, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestRedactSecrets_BearerToken_IsRedacted verifies HTTP Bearer header values
|
||||
// are scrubbed.
|
||||
func TestRedactSecrets_BearerToken_IsRedacted(t *testing.T) {
|
||||
input := "Authorization: Bearer ghp_AbCdEfGhIjKlMnOp1234"
|
||||
out, changed := redactSecrets("ws-1", input)
|
||||
if !changed {
|
||||
t.Errorf("Bearer token was not redacted in %q", input)
|
||||
}
|
||||
if strings.Contains(out, "ghp_") {
|
||||
t.Errorf("Bearer token value still present after redaction: %q", out)
|
||||
}
|
||||
if !strings.Contains(out, "[REDACTED:BEARER_TOKEN]") {
|
||||
t.Errorf("expected [REDACTED:BEARER_TOKEN] in output, got: %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRedactSecrets_SKToken_IsRedacted verifies sk-... prefixed secret keys
|
||||
// (OpenAI / Anthropic format) are scrubbed.
|
||||
func TestRedactSecrets_SKToken_IsRedacted(t *testing.T) {
|
||||
// Use a key that is NOT caught by the env-var pattern first (no KEY= prefix)
|
||||
input := "the key is sk-ant-api03-AAAAAAAAAAAAAAAAAAAAAA"
|
||||
out, changed := redactSecrets("ws-1", input)
|
||||
if !changed {
|
||||
t.Errorf("sk- token was not redacted in %q", input)
|
||||
}
|
||||
if strings.Contains(out, "sk-ant") {
|
||||
t.Errorf("sk- value still present after redaction: %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRedactSecrets_Ctx7Token_IsRedacted verifies context7 tokens are scrubbed.
|
||||
func TestRedactSecrets_Ctx7Token_IsRedacted(t *testing.T) {
|
||||
input := "ctx7_AbCdEfGhIjKlMnOpQrStUvWxYz123456"
|
||||
out, changed := redactSecrets("ws-1", input)
|
||||
if !changed {
|
||||
t.Errorf("ctx7_ token was not redacted in %q", input)
|
||||
}
|
||||
if strings.Contains(out, "ctx7_") {
|
||||
t.Errorf("ctx7_ value still present after redaction: %q", out)
|
||||
}
|
||||
if !strings.Contains(out, "[REDACTED:CTX7_TOKEN]") {
|
||||
t.Errorf("expected [REDACTED:CTX7_TOKEN] in output, got: %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRedactSecrets_Base64Blob_IsRedacted verifies that high-entropy base64
|
||||
// blobs of 33+ chars are scrubbed.
|
||||
func TestRedactSecrets_Base64Blob_IsRedacted(t *testing.T) {
|
||||
// A realistic base64-encoded secret (33+ chars, contains + and /)
|
||||
input := "stored secret: dGhpcyBpcyBhIHNlY3JldCBibG9i/AAAA=="
|
||||
out, changed := redactSecrets("ws-1", input)
|
||||
if !changed {
|
||||
t.Errorf("base64 blob was not redacted in %q", input)
|
||||
}
|
||||
if !strings.Contains(out, "[REDACTED:BASE64_BLOB]") {
|
||||
t.Errorf("expected [REDACTED:BASE64_BLOB] in output, got: %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCommitMemory_SecretInContent_IsRedactedBeforeInsert verifies that the
|
||||
// Commit handler scrubs secret patterns before the INSERT so credentials are
|
||||
// never persisted verbatim. The DB mock expects the redacted value.
|
||||
func TestCommitMemory_SecretInContent_IsRedactedBeforeInsert(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewMemoriesHandler()
|
||||
|
||||
// The raw content contains an API key assignment. After redaction the DB
|
||||
// must receive the scrubbed version, not the original.
|
||||
rawContent := "OPENAI_API_KEY=sk-1234567890abcdefgh"
|
||||
redacted, _ := redactSecrets("ws-1", rawContent) // derive expected value
|
||||
|
||||
mock.ExpectQuery("INSERT INTO agent_memories").
|
||||
WithArgs("ws-1", redacted, "LOCAL", "general").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("mem-safe"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
body := `{"content":"OPENAI_API_KEY=sk-1234567890abcdefgh","scope":"LOCAL"}`
|
||||
c.Request = httptest.NewRequest("POST", "/", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Commit(c)
|
||||
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Errorf("expected 201, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("secret content was not redacted before DB insert: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCommitMemory_GlobalScope_AuditLogEntry verifies that writing a
|
||||
// GLOBAL-scope memory always produces an activity_log entry with
|
||||
// event_type='memory_write_global'. The audit entry stores the SHA-256
|
||||
|
||||
Loading…
Reference in New Issue
Block a user