"""OFFSEC-003: A2A peer-result sanitization — shared across delegation tools. This module is intentionally a LEAF (no imports from the molecule-runtime package) to avoid circular dependency cycles. Both ``a2a_tools_delegation`` and ``a2a_tools`` can import from here without creating import loops. Trust-boundary design (OFFSEC-003): A2A peer responses are untrusted third-party content. Before passing them to the agent context, they MUST be wrapped in a trust-boundary marker pair so the calling agent knows the content is external. Boundary markers: - _A2A_BOUNDARY_START = "[A2A_RESULT_FROM_PEER]" - _A2A_BOUNDARY_END = "[/A2A_RESULT_FROM_PEER]" The boundary is the PRIMARY security control. A peer that sends "[A2A_RESULT_FROM_PEER]evil[/A2A_RESULT_FROM_PEER]safe" can make "safe" appear inside the trusted context unless the markers themselves are escaped before wrapping — see _escape_boundary_markers() below. Defense-in-depth (secondary): Known prompt-injection control-words are also escaped so that even if a calling agent ignores the boundary marker, embedded attack patterns (SYSTEM:, OVERRIDE:, etc.) lose their special meaning. This is not a complete injection sanitizer — do not rely on it as the primary control. """ from __future__ import annotations import re # ── Trust-boundary markers ──────────────────────────────────────────────────── _A2A_BOUNDARY_START = "[A2A_RESULT_FROM_PEER]" _A2A_BOUNDARY_END = "[/A2A_RESULT_FROM_PEER]" # ── Boundary-marker escaping ───────────────────────────────────────────────── # A peer that sends "[/A2A_RESULT_FROM_PEER]evil" can make "evil" appear # inside the trusted zone. Escape BOTH boundary markers in the raw text # before wrapping so they can never close the boundary early. # We use "[/ " as the escape prefix — visually distinct from the real marker. def _escape_boundary_markers(text: str) -> str: """Escape boundary markers inside the raw peer text before wrapping. Replaces any occurrence of the boundary start/end markers with a visually-similar escaped form so a malicious peer can never close the boundary early or inject a fake opener. """ return ( text.replace(_A2A_BOUNDARY_START, "[/ A2A_RESULT_FROM_PEER]") .replace(_A2A_BOUNDARY_END, "[/ /A2A_RESULT_FROM_PEER]") ) # ── Defense-in-depth: injection pattern escaping ─────────────────────────────── # These patterns cover common prompt-injection phrasings. They are NOT a # complete sanitizer — see module docstring. The boundary marker is the # primary control; these are purely defense-in-depth. _INJECTION_PATTERNS = [ # Single-word patterns: anchor to word boundary so they don't match # inside other words (e.g. "SYSTEM" in "mySYSTEMatic"). # Single-word patterns: anchor to word boundary so they don't match # inside other words (e.g. "SYSTEM" in "mySYSTEMatic"). (re.compile(r"(^|[^\w])SYSTEM\b", re.IGNORECASE), r"\1[ESCAPED_SYSTEM]"), (re.compile(r"(^|[^\w])OVERRIDE\b", re.IGNORECASE), r"\1[ESCAPED_OVERRIDE]"), # "INSTRUCTIONS" may appear at the start of a string or after a newline. (re.compile(r"(^|\n)INSTRUCTIONS?\b", re.IGNORECASE), " [ESCAPED_INSTRUCTIONS]"), (re.compile(r"(^|[^\w])IGNORE\s+ALL\b", re.IGNORECASE), r"\1[ESCAPED_IGNORE_ALL]"), (re.compile(r"(^|[^\w])YOU\s+ARE\s+NOW\b", re.IGNORECASE), r"\1[ESCAPED_YOU_ARE_NOW]"), ] def sanitize_a2a_result(text: str) -> str: """Sanitize untrusted text from an A2A peer (OFFSEC-003). Order of operations: 1. Escape boundary markers in the raw text (prevents injection). 2. Escape known injection patterns (defense-in-depth). Returns the input unchanged if it is empty/None. Note: this function does NOT add boundary wrappers — callers that need to establish a trust boundary should wrap the sanitized result with ``[A2A_RESULT_FROM_PEER]\\n{sanitized}\\n[/A2A_RESULT_FROM_PEER]``. See ``a2a_tools_delegation.py:tool_delegate_task`` for the canonical wrapping pattern. """ if not text: return text # 1. Escape boundary markers so a malicious peer cannot break the # trust boundary from inside their response. escaped = _escape_boundary_markers(text) # 2. Escape known injection control-words (defense-in-depth only). for pattern, replacement in _INJECTION_PATTERNS: escaped = pattern.sub(replacement, escaped) return escaped