Merge pull request #36 from Molecule-AI/fix/watcher-sha256

fix(security): H1 — replace MD5 with SHA-256 in watcher file-integrity checks
This commit is contained in:
Hongming Wang 2026-04-14 01:25:29 -07:00 committed by GitHub
commit 03c5dfd410
4 changed files with 18 additions and 13 deletions

View File

@ -7,7 +7,7 @@ Architecture
------------
``SkillsWatcher`` runs as a background asyncio task alongside the agent. It
polls the skill directories every ``POLL_INTERVAL`` seconds (default 3 s),
computes MD5 hashes of every file, and fires ``_reload_skill()`` when any
computes SHA-256 hashes of every file, and fires ``_reload_skill()`` when any
file inside a skill's folder changes.
``_reload_skill()`` calls ``load_skills()`` from ``skills.loader`` for the
@ -71,7 +71,7 @@ class SkillsWatcher:
self.config_path = config_path
self.skill_names = list(skill_names)
self.on_reload = on_reload
self._hashes: dict[str, str] = {} # rel_path → md5 hex
self._hashes: dict[str, str] = {} # rel_path → sha256 hex
self._running = False
# ------------------------------------------------------------------
@ -103,12 +103,13 @@ class SkillsWatcher:
def _hash_file(self, path: Path) -> str:
try:
return hashlib.md5(path.read_bytes()).hexdigest()
# H1: SHA-256 replaces MD5 for file-integrity change detection.
return hashlib.sha256(path.read_bytes()).hexdigest()
except OSError:
return ""
def _scan(self) -> dict[str, str]:
"""Return {relative_path: md5} for every file in watched skill dirs."""
"""Return {relative_path: sha256} for every file in watched skill dirs."""
hashes: dict[str, str] = {}
root = self._skills_root()
for skill_name in self.skill_names:

View File

@ -40,8 +40,8 @@ def _write(path: Path, content: str) -> None:
path.write_text(content, encoding="utf-8")
def _md5(content: str) -> str:
return hashlib.md5(content.encode()).hexdigest()
def _sha256(content: str) -> str:
return hashlib.sha256(content.encode()).hexdigest()
# ============================================================================
@ -149,7 +149,7 @@ class TestChangedSkills:
_write(tmp_path / "skills" / "unwatched" / "SKILL.md", "v2")
# Also add path for unwatched to new_hashes manually (shouldn't matter)
new_hashes = w._scan()
new_hashes["unwatched/SKILL.md"] = _md5("v2")
new_hashes["unwatched/SKILL.md"] = _sha256("v2")
changed = w._changed_skills(new_hashes)
assert "unwatched" not in changed
@ -351,13 +351,13 @@ class TestHashFile:
result = w._hash_file(missing)
assert result == ""
def test_hash_file_returns_md5_for_existing_file(self, tmp_path):
"""_hash_file returns a non-empty hex digest for a readable file."""
def test_hash_file_returns_sha256_for_existing_file(self, tmp_path):
"""_hash_file returns a non-empty SHA-256 hex digest for a readable file."""
f = tmp_path / "real_file.txt"
f.write_text("hello")
w = SkillsWatcher(str(tmp_path), [])
result = w._hash_file(f)
assert len(result) == 32 # MD5 hex digest length
assert len(result) == 64 # SHA-256 hex digest length
assert result != ""

View File

@ -64,10 +64,10 @@ def test_init_defaults_on_reload(tmp_config):
# ---------------------------------------------------------------------------
def test_hash_file_real_file(tmp_path, watcher):
"""_hash_file returns md5 hex digest of the file's bytes."""
"""_hash_file returns sha256 hex digest of the file's bytes."""
f = tmp_path / "sample.txt"
f.write_bytes(b"hello world")
expected = hashlib.md5(b"hello world").hexdigest()
expected = hashlib.sha256(b"hello world").hexdigest()
assert watcher._hash_file(str(f)) == expected

View File

@ -37,7 +37,11 @@ class ConfigWatcher:
def _hash_file(self, path: str) -> str:
try:
return hashlib.md5(Path(path).read_bytes()).hexdigest()
# H1: SHA-256 replaces MD5 for file-integrity change detection.
# MD5 is collision-prone; using SHA-256 prevents a crafted config
# file from producing the same hash as a benign one, which would
# silently suppress the hot-reload callback.
return hashlib.sha256(Path(path).read_bytes()).hexdigest()
except (OSError, IOError):
return ""