fix(file-tools): broaden dedup-status write guard to cover small wrappers

The write_file guard added in #16223 used strict equality against the internal dedup status message. In practice, the model sometimes prepends a short note or appends a trailing comment before calling write_file, which slipped past the strict check. Broaden the heuristic: reject writes whose stripped content equals the status message OR contains it and is <=2x its length. Short, status-dominated writes are always corruption; legitimate docs that quote the message verbatim are always much longer. Adds two tests: one for the small-wrapper corruption shape, one confirming large legitimate files that quote the status still write.
2026-04-26 19:03:32 -07:00 · 2026-04-26 19:03:32 -07:00 · ced8f44cd2
commit ced8f44cd2
parent 977d5f56c9
2 changed files with 82 additions and 2 deletions
--- a/tests/tools/test_file_read_guards.py
+++ b/tests/tools/test_file_read_guards.py
@ -197,6 +197,62 @@ class TestFileDedup(unittest.TestCase):
        self.assertIn("internal read_file status text", result["error"])
        fake.write_file.assert_not_called()

+    @patch("tools.file_tools._get_file_ops")
+    def test_write_rejects_status_text_with_small_framing(self, mock_ops):
+        """write_file rejects small wrappers around the status text too.
+
+        Real-world corruption shapes aren't always the verbatim message — the
+        model sometimes prepends a short note or appends a trailing comment
+        before calling write_file.  A short, status-dominated write is still
+        corruption, not legitimate file content.
+        """
+        fake = MagicMock()
+        fake.write_file = MagicMock()
+        mock_ops.return_value = fake
+
+        wrapped = "Note: " + _READ_DEDUP_STATUS_MESSAGE + "\n\n(continuing.)"
+        result = json.loads(write_file_tool(
+            self._tmpfile,
+            wrapped,
+            task_id="guard",
+        ))
+
+        self.assertIn("error", result)
+        self.assertIn("internal read_file status text", result["error"])
+        fake.write_file.assert_not_called()
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_write_allows_large_file_that_quotes_status_text(self, mock_ops):
+        """Legitimate large content that happens to quote the status is allowed.
+
+        Hermes' own docs / SKILL.md files may legitimately mention the dedup
+        message verbatim.  Only short, status-dominated writes are rejected —
+        a normal file that contains the message as one line out of many must
+        still write successfully.
+        """
+        fake = MagicMock()
+        fake.write_file = lambda path, content: MagicMock(
+            to_dict=lambda: {"success": True, "path": path}
+        )
+        mock_ops.return_value = fake
+
+        # Build content that contains the status text but is much larger,
+        # so the status doesn't "dominate" — this is a legitimate file.
+        large_content = (
+            "# Skill reference\n\n"
+            "Example internal message (do not write back):\n\n"
+            f"    {_READ_DEDUP_STATUS_MESSAGE}\n\n"
+            + ("This is documentation content. " * 200)
+        )
+        result = json.loads(write_file_tool(
+            self._tmpfile,
+            large_content,
+            task_id="guard",
+        ))
+
+        self.assertNotIn("error", result)
+        self.assertTrue(result.get("success"))
+
    @patch("tools.file_tools._get_file_ops")
    def test_modified_file_not_deduped(self, mock_ops):
        """After the file is modified, dedup returns full content."""
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@ -264,10 +264,34 @@ def _cap_read_tracker_data(task_data: dict) -> None:


 def _is_internal_file_status_text(content: str) -> bool:
-    """Return True when content is an internal file-tool status, not file bytes."""
+    """Return True when content looks like an internal file-tool status, not real file bytes.
+
+    The read_file dedup status message must never be persisted as file
+    content.  The obvious shape is the model echoing the message verbatim,
+    but in practice it also wraps it with small framing text (a leading
+    "Note:", a trailing newline + short comment, etc.) before calling
+    write_file.  We treat any short-ish write whose body is dominated by
+    the status message as the same class of corruption.
+
+    Heuristic:
+      * Strict equality (after strip) — the verbatim shape.
+      * OR the stripped content contains the full status message AND is
+        short enough that the status dominates it (<=2x the message length).
+        Short, status-dominated writes can't plausibly be real files —
+        legitimate docs/notes that happen to quote this internal message
+        are always dramatically longer.
+    """
    if not isinstance(content, str):
        return False
-    return content.strip() == _READ_DEDUP_STATUS_MESSAGE
+    stripped = content.strip()
+    if not stripped:
+        return False
+    if stripped == _READ_DEDUP_STATUS_MESSAGE:
+        return True
+    if _READ_DEDUP_STATUS_MESSAGE in stripped and \
+            len(stripped) <= 2 * len(_READ_DEDUP_STATUS_MESSAGE):
+        return True
+    return False


 def _get_file_ops(task_id: str = "default") -> ShellFileOperations: