Merge pull request #913 from Molecule-AI/fix/issue-834-commit-memory-secret-scrub

fix(security): redact secrets from commit_memory before persistence (closes #834)
2026-04-17 21:42:57 -07:00 · 2026-04-17 21:42:57 -07:00 · d7324fdbfd
commit d7324fdbfd
parent 5e0795e4ea c1b4dca5b7
6 changed files with 595 additions and 0 deletions
--- a/workspace-template/a2a_tools.py
+++ b/workspace-template/a2a_tools.py
@ -18,6 +18,7 @@ from a2a_client import (
    get_workspace_info,
    send_a2a_message,
 )
+from builtin_tools.security import _redact_secrets


 def _auth_headers_for_heartbeat() -> dict[str, str]:
@ -224,6 +225,7 @@ async def tool_commit_memory(content: str, scope: str = "LOCAL") -> str:
    """Save important information to persistent memory."""
    if not content:
        return "Error: content is required"
+    content = _redact_secrets(content)
    scope = scope.upper()
    if scope not in ("LOCAL", "TEAM", "GLOBAL"):
        scope = "LOCAL"
--- a/workspace-template/builtin_tools/memory.py
+++ b/workspace-template/builtin_tools/memory.py
@ -34,6 +34,7 @@ from typing import Any
 from langchain_core.tools import tool
 from builtin_tools.awareness_client import build_awareness_client
 from builtin_tools.audit import check_permission, get_workspace_roles, log_event
+from builtin_tools.security import _redact_secrets
 from builtin_tools.telemetry import MEMORY_QUERY, MEMORY_SCOPE, WORKSPACE_ID_ATTR, get_tracer

 try:  # pragma: no cover - optional runtime dependency in lightweight test envs
@ -53,6 +54,7 @@ async def commit_memory(content: str, scope: str = "LOCAL") -> dict:
        content: The fact or knowledge to remember.
        scope: Memory scope — LOCAL (private), TEAM (shared with team), or GLOBAL (company-wide, root only).
    """
+    content = _redact_secrets(content)
    trace_id = str(uuid.uuid4())
    scope = scope.upper()
    if scope not in ("LOCAL", "TEAM", "GLOBAL"):
--- a/workspace-template/builtin_tools/security.py
+++ b/workspace-template/builtin_tools/security.py
@ -0,0 +1,120 @@
+"""Secret-scrubbing utilities for workspace runtime (#834 — C2).
+
+Provides ``_redact_secrets()`` applied at every ``commit_memory`` call site
+to prevent API keys and tokens from being persisted verbatim in the
+memories table.
+
+Design notes
+------------
+- **Allowlist of known prefixes** (``sk-``, ``ghp_``, etc.) cover the most
+  dangerous tokens because they are unambiguous.
+- **Contextual pattern** covers generic high-entropy values that appear
+  immediately after assignment keywords (``key=``, ``token=``, ``secret=``,
+  ``password=``, ``api_key=``).  The keyword is preserved in the output so
+  log lines remain readable; only the value is redacted.
+- **Idempotent**: the replacement token ``[REDACTED]`` does not match any
+  of the patterns, so calling ``_redact_secrets`` twice is safe.
+- **No false-positive risk on normal prose**: all patterns require either
+  a well-known prefix (``AKIA``, ``ghp_``, ``sk-``) or both a keyword and
+  ≥ 40 base64/alphanumeric chars — ordinary English words never match.
+
+Relationship to ``compliance.redact_pii``
+------------------------------------------
+``redact_pii`` handles PII (emails, SSNs, credit cards) and uses typed
+tokens ``[REDACTED:type]`` for SIEM indexing.  ``_redact_secrets`` is
+narrowly scoped to API credentials and uses the plain ``[REDACTED]`` token
+because the exact secret type is not important at the storage layer —
+what matters is that no credential value ever reaches the database.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import List
+
+# ---------------------------------------------------------------------------
+# Replacement sentinel
+# ---------------------------------------------------------------------------
+
+#: Replacement token — deliberately plain so downstream readers do not need
+#: to parse structured tokens.  Does not match any scrub pattern (idempotent).
+REDACTED: str = "[REDACTED]"
+
+# ---------------------------------------------------------------------------
+# Patterns
+# ---------------------------------------------------------------------------
+
+# Patterns that identify secret values by their well-known prefix.
+# Ordered from most specific to least specific.
+_BARE_PATTERNS: List[re.Pattern] = [
+    # OpenAI / Anthropic-style keys: sk-<20+ alnum/hyphen/underscore chars>
+    # Covers: sk-<key>, sk-ant-<key>, sk-proj-<key>, etc.
+    re.compile(r"\bsk-[A-Za-z0-9_-]{20,}\b"),
+    # GitHub classic personal access token
+    re.compile(r"\bghp_[A-Za-z0-9]{36}\b"),
+    # GitHub server-to-server token
+    re.compile(r"\bghs_[A-Za-z0-9]{36}\b"),
+    # GitHub fine-grained personal access token
+    re.compile(r"\bgithub_pat_[A-Za-z0-9_]{82}\b"),
+    # AWS access key ID
+    re.compile(r"\bAKIA[0-9A-Z]{16}\b"),
+]
+
+# Contextual pattern: keyword= followed by a high-entropy value.
+#
+# Group 1 captures the keyword + equals sign so it is preserved in the
+# replacement — "api_key=[REDACTED]" is more informative than "[REDACTED]".
+#
+# The value charset [A-Za-z0-9+/] covers base64 and common token alphabets.
+# The minimum length of 40 chars prevents false-positives on short values.
+_CONTEXTUAL_RE: re.Pattern = re.compile(
+    r"(?i)"
+    r"((?:api_key|key|token|secret|password)\s*=\s*)"
+    r"([A-Za-z0-9+/]{40,}={0,2})"
+)
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def _redact_secrets(content: str) -> str:
+    """Scrub known secret patterns from *content*, replacing with ``[REDACTED]``.
+
+    Parameters
+    ----------
+    content:
+        Raw string to scrub — typically a ``commit_memory`` payload.
+
+    Returns
+    -------
+    str
+        Copy of *content* with secrets replaced.  If no secrets are found,
+        the original string is returned unchanged.  Calling this function
+        on already-redacted content is safe (idempotent).
+
+    Examples::
+
+        >>> _redact_secrets("token is sk-abc1234567890123456789012345")
+        'token is [REDACTED]'
+
+        >>> _redact_secrets("api_key=" + "A" * 45)
+        'api_key=[REDACTED]'
+
+        >>> _redact_secrets("The answer is 42.")
+        'The answer is 42.'
+
+        >>> _redact_secrets("[REDACTED]")
+        '[REDACTED]'
+    """
+    result = content
+
+    # Apply prefix-based patterns first (most unambiguous)
+    for pattern in _BARE_PATTERNS:
+        result = pattern.sub(REDACTED, result)
+
+    # Apply contextual pattern — preserve keyword, replace only the value
+    result = _CONTEXTUAL_RE.sub(r"\1" + REDACTED, result)
+
+    return result
--- a/workspace-template/executor_helpers.py
+++ b/workspace-template/executor_helpers.py
@ -23,6 +23,8 @@ from typing import TYPE_CHECKING, Any

 import httpx

+from builtin_tools.security import _redact_secrets
+
 if TYPE_CHECKING:
    from heartbeat import HeartbeatLoop

@ -129,6 +131,7 @@ async def commit_memory(content: str) -> None:
    platform_url = os.environ.get("PLATFORM_URL", "")
    if not workspace_id or not platform_url or not content:
        return
+    content = _redact_secrets(content)
    # Fix E (Cycle 5): include auth header so WorkspaceAuth middleware allows access.
    try:
        from platform_auth import auth_headers as _platform_auth
--- a/workspace-template/tests/conftest.py
+++ b/workspace-template/tests/conftest.py
@ -189,6 +189,19 @@ def _make_tools_mocks():
    tools_hitl_mod.requires_approval = MagicMock(side_effect=lambda *a, **kw: (lambda f: f))
    tools_hitl_mod.pause_registry = MagicMock()

+    # builtin_tools.security — load the real module so _redact_secrets is
+    # available to executor_helpers, a2a_tools, and any other module that
+    # imports from it.  The module is pure-Python with no external deps.
+    import importlib.util as _ilu
+    import os as _os
+    _sec_path = _os.path.join(
+        _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__))),
+        "builtin_tools", "security.py",
+    )
+    _sec_spec = _ilu.spec_from_file_location("builtin_tools.security", _sec_path)
+    _sec_mod = _ilu.module_from_spec(_sec_spec)
+    _sec_spec.loader.exec_module(_sec_mod)
+
    sys.modules["builtin_tools"] = tools_mod
    sys.modules["builtin_tools.delegation"] = tools_delegation_mod
    sys.modules["builtin_tools.approval"] = tools_approval_mod
@ -199,6 +212,7 @@ def _make_tools_mocks():
    sys.modules["builtin_tools.telemetry"] = tools_telemetry_mod
    sys.modules["builtin_tools.audit"] = tools_audit_mod
    sys.modules["builtin_tools.hitl"] = tools_hitl_mod
+    sys.modules["builtin_tools.security"] = _sec_mod


 def _make_claude_agent_sdk_mock():
--- a/workspace-template/tests/test_secret_redact.py
+++ b/workspace-template/tests/test_secret_redact.py
@ -0,0 +1,454 @@
+"""Tests for builtin_tools/security.py — _redact_secrets() (#834 — C2).
+
+Coverage targets
+----------------
+- Unit: each secret pattern type (OpenAI key, GitHub tokens, AWS key,
+  generic contextual pattern)
+- Idempotency: already-redacted strings pass through unchanged
+- Non-regression: normal prose is never modified
+- Integration: commit_memory call sites in builtin_tools/memory.py,
+  a2a_tools.py, and executor_helpers.py each invoke _redact_secrets before
+  persisting content
+
+Spec patterns verified:
+    sk-[A-Za-z0-9]{20,}          — OpenAI/Anthropic-style keys
+    ghp_[A-Za-z0-9]{36}          — GitHub classic PAT
+    ghs_[A-Za-z0-9]{36}          — GitHub server token
+    github_pat_[A-Za-z0-9_]{82}  — GitHub fine-grained PAT
+    AKIA[0-9A-Z]{16}              — AWS access key ID
+    key/token/secret/password/api_key = <40+ chars>  — generic contextual
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import json
+import os
+import sys
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Bootstrap: load the real builtin_tools/security.py before conftest stubs
+# interfere.  conftest sets builtin_tools.__path__ = [] which prevents normal
+# submodule discovery, so we load via file path (same pattern as test_memory.py).
+# ---------------------------------------------------------------------------
+
+_WT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+_SECURITY_PATH = os.path.join(_WT_ROOT, "builtin_tools", "security.py")
+
+_spec = importlib.util.spec_from_file_location("builtin_tools.security", _SECURITY_PATH)
+_security_mod = importlib.util.module_from_spec(_spec)
+sys.modules["builtin_tools.security"] = _security_mod
+_spec.loader.exec_module(_security_mod)
+
+REDACTED: str = _security_mod.REDACTED
+_redact_secrets = _security_mod._redact_secrets
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _gh_classic() -> str:
+    """Return a syntactically valid-length GitHub classic PAT."""
+    return "ghp_" + "A" * 36
+
+
+def _ghs() -> str:
+    return "ghs_" + "B" * 36
+
+
+def _gh_fine() -> str:
+    return "github_pat_" + "C" * 82
+
+
+def _aws() -> str:
+    return "AKIA" + "0" * 16
+
+
+def _openai() -> str:
+    return "sk-" + "x" * 48
+
+
+def _anthropic() -> str:
+    return "sk-ant-" + "y" * 40  # sk- prefix applies
+
+
+# ---------------------------------------------------------------------------
+# _redact_secrets — per-pattern unit tests
+# ---------------------------------------------------------------------------
+
+
+class TestRedactOpenAIStyleKeys:
+    def test_bare_openai_key_redacted(self):
+        result = _redact_secrets(f"Key is {_openai()}")
+        assert _openai() not in result
+        assert REDACTED in result
+
+    def test_anthropic_key_redacted(self):
+        result = _redact_secrets(f"Using {_anthropic()} for requests")
+        assert _anthropic() not in result
+        assert REDACTED in result
+
+    def test_short_sk_prefix_not_redacted(self):
+        """sk- with fewer than 20 chars should NOT be redacted (e.g. 'sk-test')."""
+        result = _redact_secrets("sk-test")
+        assert result == "sk-test"
+
+
+class TestRedactGitHubTokens:
+    def test_classic_pat_redacted(self):
+        token = _gh_classic()
+        result = _redact_secrets(f"auth: {token}")
+        assert token not in result
+        assert REDACTED in result
+
+    def test_server_token_redacted(self):
+        token = _ghs()
+        result = _redact_secrets(f"ghs token: {token}")
+        assert token not in result
+        assert REDACTED in result
+
+    def test_fine_grained_pat_redacted(self):
+        token = _gh_fine()
+        result = _redact_secrets(token)
+        assert token not in result
+        assert REDACTED in result
+
+    def test_classic_pat_wrong_length_not_redacted(self):
+        """ghp_ token with only 10 chars should NOT be redacted (wrong length)."""
+        short = "ghp_" + "A" * 10
+        result = _redact_secrets(short)
+        assert result == short
+
+
+class TestRedactAWSKey:
+    def test_aws_access_key_redacted(self):
+        key = _aws()
+        result = _redact_secrets(f"AWS key: {key}")
+        assert key not in result
+        assert REDACTED in result
+
+    def test_akia_prefix_wrong_length_not_redacted(self):
+        """AKIA with only 10 trailing chars should NOT be redacted."""
+        short = "AKIA" + "X" * 10
+        result = _redact_secrets(short)
+        assert result == short
+
+
+class TestRedactGenericContextual:
+    def test_api_key_equals_redacted(self):
+        secret = "A" * 45
+        result = _redact_secrets(f"api_key={secret}")
+        assert secret not in result
+        assert "api_key=" in result
+        assert REDACTED in result
+
+    def test_key_equals_redacted(self):
+        secret = "B" * 42
+        result = _redact_secrets(f"key={secret}")
+        assert secret not in result
+        assert REDACTED in result
+
+    def test_token_equals_redacted(self):
+        secret = "C" * 50
+        result = _redact_secrets(f"token={secret}")
+        assert secret not in result
+        assert REDACTED in result
+
+    def test_secret_equals_redacted(self):
+        secret = "D" * 44
+        result = _redact_secrets(f"secret={secret}")
+        assert secret not in result
+        assert REDACTED in result
+
+    def test_password_equals_redacted(self):
+        secret = "E" * 41
+        result = _redact_secrets(f"password={secret}")
+        assert secret not in result
+        assert REDACTED in result
+
+    def test_keyword_case_insensitive(self):
+        secret = "F" * 40
+        result = _redact_secrets(f"API_KEY={secret}")
+        assert secret not in result
+        assert REDACTED in result
+
+    def test_keyword_with_spaces_around_equals(self):
+        secret = "G" * 40
+        result = _redact_secrets(f"token = {secret}")
+        assert secret not in result
+        assert REDACTED in result
+
+    def test_short_value_not_redacted(self):
+        """Values shorter than 40 chars should NOT be treated as secrets."""
+        result = _redact_secrets("api_key=short")
+        assert result == "api_key=short"
+
+    def test_base64_value_with_equals_padding_redacted(self):
+        """Base64-padded values (ending in ==) should be redacted."""
+        secret = "A" * 44 + "=="
+        result = _redact_secrets(f"key={secret}")
+        assert secret not in result
+        assert REDACTED in result
+
+
+# ---------------------------------------------------------------------------
+# _redact_secrets — idempotency
+# ---------------------------------------------------------------------------
+
+
+class TestIdempotency:
+    def test_already_redacted_token_passes_through(self):
+        content = f"The token was {REDACTED}"
+        assert _redact_secrets(content) == content
+
+    def test_double_application_unchanged(self):
+        """Applying _redact_secrets twice must not alter the result."""
+        content = f"key={_openai()} and github {_gh_classic()}"
+        once = _redact_secrets(content)
+        twice = _redact_secrets(once)
+        assert once == twice
+
+    def test_pure_redacted_string(self):
+        assert _redact_secrets(REDACTED) == REDACTED
+
+
+# ---------------------------------------------------------------------------
+# _redact_secrets — non-regression (normal prose untouched)
+# ---------------------------------------------------------------------------
+
+
+class TestNormalProseUnchanged:
+    def test_plain_sentence(self):
+        text = "The quick brown fox jumps over the lazy dog."
+        assert _redact_secrets(text) == text
+
+    def test_numbers_and_punctuation(self):
+        text = "Order #12345 shipped at 09:00 on 2026-04-17."
+        assert _redact_secrets(text) == text
+
+    def test_empty_string(self):
+        assert _redact_secrets("") == ""
+
+    def test_short_key_value(self):
+        assert _redact_secrets("key=short_value") == "key=short_value"
+
+    def test_json_with_short_values(self):
+        text = json.dumps({"status": "ok", "workspace_id": "ws-abc123"})
+        assert _redact_secrets(text) == text
+
+    def test_markdown_content(self):
+        text = "## Summary\n\nThe task completed successfully. No errors."
+        assert _redact_secrets(text) == text
+
+
+# ---------------------------------------------------------------------------
+# _redact_secrets — multiple secrets in one string
+# ---------------------------------------------------------------------------
+
+
+class TestMultipleSecrets:
+    def test_two_different_types_both_redacted(self):
+        content = f"OpenAI key: {_openai()} GitHub: {_gh_classic()}"
+        result = _redact_secrets(content)
+        assert _openai() not in result
+        assert _gh_classic() not in result
+        assert result.count(REDACTED) == 2
+
+    def test_all_pattern_types_in_one_string(self):
+        parts = [
+            f"openai={_openai()}",
+            f"github={_gh_classic()}",
+            f"aws={_aws()}",
+        ]
+        content = " | ".join(parts)
+        result = _redact_secrets(content)
+        assert _openai() not in result
+        assert _gh_classic() not in result
+        assert _aws() not in result
+
+
+# ---------------------------------------------------------------------------
+# Integration: builtin_tools/memory.py commit_memory
+#
+# The conftest stubs builtin_tools with __path__=[] at collection time, so
+# `from builtin_tools import memory` would return the mock module.  We load
+# the real memory.py directly via spec_from_file_location (same pattern as
+# test_memory.py) so that the _redact_secrets import inside it binds to the
+# real function.
+# ---------------------------------------------------------------------------
+
+
+class TestMemoryCommitRedactsSecrets:
+    """Verify that builtin_tools/memory.py calls _redact_secrets before storage.
+
+    Loading the real memory.py in tests is impractical because the conftest
+    awareness_client stub does not expose build_awareness_client.  Instead we
+    verify at two levels:
+      1. Source-code inspection: the call site exists and is correctly placed
+         (before any HTTP/awareness write).
+      2. Functional: the unit-tested _redact_secrets function itself handles
+         the token — the a2a_tools integration tests cover the end-to-end path.
+    """
+
+    def test_memory_py_imports_redact_secrets(self):
+        """builtin_tools/memory.py must import _redact_secrets from security."""
+        source = open(os.path.join(_WT_ROOT, "builtin_tools", "memory.py")).read()
+        assert "from builtin_tools.security import _redact_secrets" in source, (
+            "memory.py must import _redact_secrets from builtin_tools.security"
+        )
+
+    def test_memory_py_calls_redact_before_use(self):
+        """_redact_secrets(content) must appear in memory.py before the HTTP call."""
+        source = open(os.path.join(_WT_ROOT, "builtin_tools", "memory.py")).read()
+        assert "_redact_secrets(content)" in source, (
+            "memory.py must call _redact_secrets(content) before storing"
+        )
+
+    def test_redact_applied_before_store_in_function_body(self):
+        """_redact_secrets(content) must appear before build_awareness_client()
+        inside the commit_memory function body (i.e. before any storage call).
+        """
+        source = open(os.path.join(_WT_ROOT, "builtin_tools", "memory.py")).read()
+        # Find the commit_memory function definition, then measure positions
+        # of redact and awareness_client() calls within that scope.
+        fn_start = source.find("async def commit_memory(")
+        assert fn_start != -1, "commit_memory not found in memory.py"
+        fn_body = source[fn_start:]  # everything from the function onward
+        redact_pos = fn_body.find("_redact_secrets(content)")
+        store_pos = fn_body.find("build_awareness_client()")
+        assert redact_pos != -1, "_redact_secrets(content) not found in commit_memory body"
+        assert store_pos != -1, "build_awareness_client() not found in commit_memory body"
+        assert redact_pos < store_pos, (
+            f"_redact_secrets (offset {redact_pos}) must appear BEFORE "
+            f"build_awareness_client() (offset {store_pos}) in commit_memory body"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Integration: a2a_tools.tool_commit_memory
+# ---------------------------------------------------------------------------
+
+
+class TestA2AToolCommitMemoryRedactsSecrets:
+    @pytest.mark.asyncio
+    async def test_github_token_redacted(self):
+        """tool_commit_memory must scrub secrets before the HTTP POST."""
+        import a2a_tools
+
+        token = _gh_classic()
+        content_with_secret = f"ghp token encountered: {token}"
+        captured: dict = {}
+
+        fake_resp = MagicMock()
+        fake_resp.status_code = 201
+        fake_resp.json = MagicMock(return_value={"id": "mem-3"})
+
+        fake_client = AsyncMock()
+        fake_client.__aenter__ = AsyncMock(return_value=fake_client)
+        fake_client.__aexit__ = AsyncMock(return_value=False)
+
+        async def _capture(url, *, json=None, headers=None, **kwargs):
+            captured.update(json or {})
+            return fake_resp
+
+        fake_client.post = _capture
+
+        with patch("a2a_tools.httpx.AsyncClient", return_value=fake_client):
+            await a2a_tools.tool_commit_memory(content_with_secret)
+
+        stored = captured.get("content", "")
+        assert token not in stored
+        assert REDACTED in stored
+
+    @pytest.mark.asyncio
+    async def test_openai_key_redacted(self):
+        import a2a_tools
+
+        key = _openai()
+        captured: dict = {}
+
+        fake_resp = MagicMock()
+        fake_resp.status_code = 201
+        fake_resp.json = MagicMock(return_value={"id": "mem-4"})
+
+        fake_client = AsyncMock()
+        fake_client.__aenter__ = AsyncMock(return_value=fake_client)
+        fake_client.__aexit__ = AsyncMock(return_value=False)
+
+        async def _capture(url, *, json=None, headers=None, **kwargs):
+            captured.update(json or {})
+            return fake_resp
+
+        fake_client.post = _capture
+
+        with patch("a2a_tools.httpx.AsyncClient", return_value=fake_client):
+            await a2a_tools.tool_commit_memory(f"key={key}")
+
+        stored = captured.get("content", "")
+        assert key not in stored
+        assert REDACTED in stored
+
+
+# ---------------------------------------------------------------------------
+# Integration: executor_helpers.commit_memory
+# ---------------------------------------------------------------------------
+
+
+class TestExecutorHelpersCommitMemoryRedactsSecrets:
+    @pytest.mark.asyncio
+    async def test_aws_key_redacted_before_post(self, monkeypatch):
+        """executor_helpers.commit_memory must scrub AWS keys before the POST."""
+        import executor_helpers
+
+        monkeypatch.setenv("WORKSPACE_ID", "ws-test")
+        monkeypatch.setenv("PLATFORM_URL", "http://platform.test")
+
+        aws_key = _aws()
+        content_with_secret = f"Discovered AWS key: {aws_key}"
+        captured: dict = {}
+
+        # get_http_client() returns a plain AsyncClient — no context manager.
+        # Patch it to return an async-capable mock with a .post() coroutine.
+        fake_client = MagicMock()
+
+        async def _capture_post(url, *, json=None, headers=None, **kwargs):
+            captured.update(json or {})
+            return MagicMock(status_code=200)
+
+        fake_client.post = _capture_post
+
+        with patch("executor_helpers.get_http_client", return_value=fake_client):
+            await executor_helpers.commit_memory(content_with_secret)
+
+        stored = captured.get("content", "")
+        assert aws_key not in stored, f"AWS key found in stored content: {stored!r}"
+        assert REDACTED in stored
+
+    @pytest.mark.asyncio
+    async def test_openai_key_redacted_before_post(self, monkeypatch):
+        """executor_helpers.commit_memory must scrub OpenAI-style keys."""
+        import executor_helpers
+
+        monkeypatch.setenv("WORKSPACE_ID", "ws-test")
+        monkeypatch.setenv("PLATFORM_URL", "http://platform.test")
+
+        key = _openai()
+        captured: dict = {}
+
+        fake_client = MagicMock()
+
+        async def _capture(url, *, json=None, headers=None, **kwargs):
+            captured.update(json or {})
+            return MagicMock(status_code=200)
+
+        fake_client.post = _capture
+
+        with patch("executor_helpers.get_http_client", return_value=fake_client):
+            await executor_helpers.commit_memory(f"model key: {key}")
+
+        assert key not in captured.get("content", "")
+        assert REDACTED in captured.get("content", "")