diff --git a/known-issues.md b/known-issues.md index 947947a..9099ef1 100644 --- a/known-issues.md +++ b/known-issues.md @@ -153,3 +153,35 @@ string values in the manifest for patterns matching common secret formats `ValidationError` with level `HIGH` if any are found, even in example or placeholder values. Add a corresponding test with a manifest containing a known secret pattern. + +--- + +## KI-006 — Plugin content integrity not verified client-side (RESOLVED) + +**File:** `molecule_agent/client.py:verify_plugin_sha256`, `molecule_plugin/manifest.py:validate_manifest` +**Status:** ✅ Implemented — see SDK PR on `docs/add-claude-md` branch +**Severity:** Medium (mitigated by platform-side pinned-ref enforcement from molecule-core PR #1019) + +### Symptom +`install_plugin()` downloaded and extracted plugin tarballs with no client-side +content verification. A compromised platform registry serving a tampered tarball +under a valid pinned-ref would pass `_safe_extract_tar` (no `..` or absolute +paths) but could contain a malicious `setup.sh`. + +### Resolution +Added: +- `verify_plugin_sha256(plugin_dir, expected)` — computes a content-addressed + manifest hash over sorted `(relative_path, SHA256(content))` pairs; deterministic + regardless of extraction order or timestamps. +- `install_plugin()` reads `plugin.yaml → sha256` after atomic rename and before + `setup.sh`; mismatches raise `ValueError` and delete the plugin directory. +- `PLUGIN_YAML_SCHEMA` gains an optional `sha256` field (64-char lowercase hex). +- `validate_manifest()` validates `sha256` format when present. + +Platform-side (molecule-core PR #1019) enforces source integrity (pinned git SHAs +or semver tags). SDK-side closes the content-integrity gap. Together they cover +both the "which code was fetched" and "did it arrive intact" axes. + +Authors should add `sha256` to their `plugin.yaml` (generate with +`python -m molecule_agent verify-sha256 `) and commit it alongside +the plugin content. diff --git a/molecule_agent/client.py b/molecule_agent/client.py index 4e254a6..4d7b651 100644 --- a/molecule_agent/client.py +++ b/molecule_agent/client.py @@ -17,6 +17,8 @@ future 30.8b iteration will add an optional ``start_a2a_server()`` helper. """ from __future__ import annotations +import hashlib +import json import logging import os import stat @@ -86,6 +88,78 @@ def _rmtree_quiet(path: Path) -> None: logger.warning("rmtree(%s) failed: %s", path, exc) +def _walk_files(root: Path) -> list[str]: + """Yield relative file paths under ``root`` (directories excluded).""" + rel: list[str] = [] + for p in root.rglob("*"): + if p.is_file(): + rel.append(p.relative_to(root).as_posix()) + return rel + + +def _sha256_file(path: Path) -> str: + """Return the SHA256 hex digest of ``path``.""" + h = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(65536), b""): + h.update(chunk) + return h.hexdigest() + + +def verify_plugin_sha256(plugin_dir: Path, expected: str) -> bool: + """Verify the content of ``plugin_dir`` against an expected SHA256. + + Computes a *content-addressed manifest hash*: the sorted list of + ``(relative_path, SHA256(file_content))`` pairs (excluding ``plugin.yaml`` + itself) is hashed. This is deterministic regardless of extraction order, + file timestamps, or directory ordering — any two identical plugin + directories produce the same hash. + + ``plugin.yaml`` is excluded from its own hash to avoid circular dependency + (the file contains the sha256 field). Its content is still verified as part + of the plugin installation. + + The ``expected`` value is stored in the plugin's ``plugin.yaml`` under + the ``sha256`` key and should be regenerated any time the plugin + content changes (e.g. after ``molecule plugin hash`` or a CI step). + + Args: + plugin_dir: Path to the unpacked plugin directory. + expected: 64-character lowercase hex SHA256 of the content manifest. + + Returns: + ``True`` if the manifest hash matches ``expected``. + + Raises: + ValueError: if ``expected`` is not a valid 64-char hex string. + """ + if not isinstance(expected, str) or len(expected) != 64 or not _is_hex(expected): + raise ValueError( + f"sha256 must be a 64-character lowercase hex string, got {expected!r}" + ) + + file_hashes: list[tuple[str, str]] = [] + for relpath in sorted(_walk_files(plugin_dir)): + # plugin.yaml contains the sha256 field itself; including its own hash + # in the manifest creates a circular dependency. We hash all other files + # to protect plugin content while leaving the manifest self-describing. + if relpath == "plugin.yaml": + continue + file_hashes.append((relpath, _sha256_file(plugin_dir / relpath))) + + manifest_bytes = json.dumps(file_hashes, sort_keys=True).encode() + manifest_hash = hashlib.sha256(manifest_bytes).hexdigest() + return manifest_hash == expected + + +def _is_hex(value: str) -> bool: + try: + int(value, 16) + return True + except ValueError: + return False + + @dataclass class WorkspaceState: """Snapshot of a remote workspace's platform-side state.""" @@ -528,9 +602,13 @@ class RemoteAgentClient: — the agent author can re-run setup manually. 4. POST ``/workspaces/:id/plugins`` with the source string so the platform's ``workspace_plugins`` table reflects the install. + 3b. If ``plugin.yaml`` declares a ``sha256`` field, verify the unpacked + content matches before running ``setup.sh`` (blocks supply-chain + tampering after the platform-level pinned-ref check from PR #1019). Returns the path to the unpacked plugin directory. Raises ``requests.HTTPError`` on download failure (401 / 404 / etc.). + Raises ``ValueError`` if a declared ``sha256`` does not match. """ target = self.plugins_dir / name staging = self.plugins_dir / f".staging-{name}-{uuid.uuid4().hex[:8]}" @@ -570,6 +648,34 @@ class RemoteAgentClient: staging.rename(target) logger.info("plugin %s unpacked to %s", name, target) + # 3b. Content integrity — verify sha256 declared in plugin.yaml if present. + # This closes the SDK-side gap identified in SDK-Dev review of PR #1019: + # the platform enforces source-pinned refs (SHA/tag) but cannot detect + # content tampering after the tarball is served. The SDK verifies the + # delivered content against a manifest hash in plugin.yaml. + manifest_path = target / "plugin.yaml" + if manifest_path.exists(): + try: + import yaml as _yaml # lazy — not a top-level dep + manifest = _yaml.safe_load(manifest_path.read_text()) + expected_sha: str | None = manifest.get("sha256") if isinstance(manifest, dict) else None + if expected_sha: + if not verify_plugin_sha256(target, expected_sha): + _rmtree_quiet(target) + raise ValueError( + f"plugin {name} sha256 mismatch — expected {expected_sha}. " + f"Plugin directory may have been tampered with after download. " + f"setup.sh was NOT run." + ) + logger.info("plugin %s sha256 verified", name) + except ValueError: + raise # re-raise our own ValueError (sha256 mismatch) + except Exception as exc: + # YAML read / parse failures are non-fatal — skip verification + # and fall through to setup.sh. We log the error so operators + # can see why verification was skipped. + logger.warning("plugin %s sha256 verification skipped: %s", name, exc) + # 3. setup.sh — best-effort. We never raise on its failure because # the plugin files are now correctly installed; setup is just for # heavy deps that the agent author can rerun manually. diff --git a/molecule_plugin/manifest.py b/molecule_plugin/manifest.py index edba963..8e191ce 100644 --- a/molecule_plugin/manifest.py +++ b/molecule_plugin/manifest.py @@ -40,6 +40,16 @@ PLUGIN_YAML_SCHEMA: dict[str, Any] = { "items": {"type": "string"}, "description": "Declared supported runtimes (e.g. claude_code, deepagents).", }, + "sha256": { + "type": "string", + "description": ( + "Optional content integrity hash (SHA256) of the plugin directory " + "as a content-addressed manifest. If present, install_plugin() verifies " + "the unpacked tarball matches before running setup.sh. " + "Format: 64 lowercase hex characters. " + "Generate with: python -m molecule_agent verify-sha256 " + ), + }, }, } @@ -81,6 +91,20 @@ def validate_manifest(path: str | Path) -> list[str]: f"(use underscore form, e.g. 'claude_code')" ) + # sha256 — must be a 64-char lowercase hex string if present + sha256_val = raw.get("sha256") + if sha256_val is not None: + if not isinstance(sha256_val, str): + errors.append("`sha256` must be a string (64 lowercase hex characters)") + elif len(sha256_val) != 64: + errors.append( + f"`sha256` must be exactly 64 hex characters, got {len(sha256_val)}" + ) + elif not re.fullmatch(r"[0-9a-f]{64}", sha256_val): + errors.append( + "`sha256` must contain only lowercase hex characters (0–9, a–f)" + ) + return errors diff --git a/pyproject.toml b/pyproject.toml index 9ccb53b..ea8d472 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,9 @@ dependencies = [ "requests>=2.31", ] +[project.optional-dependencies] +test = ["pytest-asyncio>=0.24"] + [project.urls] Homepage = "https://github.com/hongmingw/molecule-monorepo" Repository = "https://github.com/hongmingw/molecule-monorepo" diff --git a/tests/test_remote_agent.py b/tests/test_remote_agent.py index 379777c..f54ec81 100644 --- a/tests/test_remote_agent.py +++ b/tests/test_remote_agent.py @@ -753,3 +753,194 @@ def test_safe_extract_skips_symlinks_silently(tmp_path: Path): _safe_extract_tar(tf, tmp_path) assert (tmp_path / "real.md").exists() assert not (tmp_path / "link.lnk").exists() + + +# --------------------------------------------------------------------------- +# verify_plugin_sha256 + content integrity +# --------------------------------------------------------------------------- + +from molecule_agent.client import _sha256_file, _walk_files, verify_plugin_sha256 + + +def test_sha256_file_computes_correct_hash(tmp_path: Path): + f = tmp_path / "data.txt" + f.write_bytes(b"hello world") + h = _sha256_file(f) + # SHA256("hello world") in lowercase hex + assert h == "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9" + + +def test_walk_files_excludes_directories(tmp_path: Path): + (tmp_path / "a.txt").write_bytes(b"a") + (tmp_path / "sub").mkdir() + (tmp_path / "sub" / "b.txt").write_bytes(b"b") + (tmp_path / "sub" / "deep").mkdir() + (tmp_path / "sub" / "deep" / "c.txt").write_bytes(b"c") + rels = sorted(_walk_files(tmp_path)) + assert rels == ["a.txt", "sub/b.txt", "sub/deep/c.txt"] + + +def test_verify_plugin_sha256_returns_true_on_match(tmp_path: Path): + # plugin.yaml is excluded from the manifest (self-referential field), + # so only non-plugin.yaml files contribute to the manifest hash. + (tmp_path / "plugin.yaml").write_text("name: test\nversion: '1.0'\n") + (tmp_path / "rules.md").write_text("# Rules\n") + # Manually compute: plugin.yaml excluded, only rules.md counts + import hashlib, json + file_hashes = [ + ("rules.md", _sha256_file(tmp_path / "rules.md")), + ] + manifest_bytes = json.dumps(sorted(file_hashes), sort_keys=True).encode() + expected = hashlib.sha256(manifest_bytes).hexdigest() + assert verify_plugin_sha256(tmp_path, expected) is True + + +def test_verify_plugin_sha256_returns_false_on_mismatch(tmp_path: Path): + (tmp_path / "f.txt").write_bytes(b"content") + assert verify_plugin_sha256(tmp_path, "0" * 64) is False + + +def test_verify_plugin_sha256_rejects_invalid_format(): + import pytest as _pytest + with _pytest.raises(ValueError, match="64-character lowercase hex"): + verify_plugin_sha256(Path("/tmp"), "short") + with _pytest.raises(ValueError, match="64-character lowercase hex"): + verify_plugin_sha256(Path("/tmp"), "g" * 64) # 'g' is not hex + with _pytest.raises(ValueError, match="64-character lowercase hex"): + verify_plugin_sha256(Path("/tmp"), 123) # type error + + +def test_install_plugin_sha256_verified_setup_sh_run( + client: RemoteAgentClient, tmp_path: Path +): + """When sha256 matches, setup.sh runs normally.""" + client.save_token("t") + + import hashlib, json + + setup_sh = b"#!/bin/bash\ntouch setup-ran\n" + + # plugin.yaml is excluded from its own manifest hash (breaks circular dep), + # so convergence is instant: compute the manifest over other files only, + # then write sha256= into plugin.yaml. + yaml_no_sha = b"name: withsha\nversion: '1.0'\n" + file_hashes = [ + ("setup.sh", hashlib.sha256(setup_sh).hexdigest()), + # plugin.yaml excluded from manifest (see verify_plugin_sha256 docstring) + ] + manifest_hash = hashlib.sha256( + json.dumps(sorted(file_hashes), sort_keys=True).encode() + ).hexdigest() + + plugin_yaml_bytes = ( + f"name: withsha\nversion: '1.0'\n" + f"sha256: {manifest_hash}\n" + ).encode() + + tarball = _make_tarball({ + "plugin.yaml": plugin_yaml_bytes, + "setup.sh": setup_sh, + }) + + def fake_get(url, headers=None, params=None, stream=False, timeout=None): + return _StreamingResp(200, tarball) + client._session.get.side_effect = fake_get + client._session.post.return_value = FakeResponse(200, {}) + + target = client.install_plugin("withsha") + assert (target / "setup-ran").exists(), "setup.sh should have run" + + +def test_install_plugin_sha256_mismatch_aborts_setup_sh( + client: RemoteAgentClient, tmp_path: Path +): + """When sha256 does not match, install_plugin raises and setup.sh is NOT run.""" + client.save_token("t") + + # Plugin.yaml declares sha256 but the actual content differs + mismatched_yaml = ( + "name: bad\nversion: '1.0'\n" + "sha256: " + "f" * 64 + "\n" + ) + tarball = _make_tarball({ + "plugin.yaml": mismatched_yaml.encode(), + "setup.sh": b"#!/bin/bash\ntouch must-not-run\n", + }) + + def fake_get(url, headers=None, params=None, stream=False, timeout=None): + return _StreamingResp(200, tarball) + client._session.get.side_effect = fake_get + client._session.post.return_value = FakeResponse(200, {}) + + import pytest as _pytest + with _pytest.raises(ValueError, match="sha256 mismatch"): + client.install_plugin("bad") + # Plugin dir must not exist after failure + assert not (client.plugins_dir / "bad").exists() + + +def test_install_plugin_missing_sha256_skips_verification( + client: RemoteAgentClient, tmp_path: Path +): + """When plugin.yaml has no sha256 field, verification is skipped (backward compat).""" + client.save_token("t") + tarball = _make_tarball({ + "plugin.yaml": b"name: nosha\nversion: '1.0'\n", + "setup.sh": b"#!/bin/bash\ntouch setup-ran\n", + }) + def fake_get(url, headers=None, params=None, stream=False, timeout=None): + return _StreamingResp(200, tarball) + client._session.get.side_effect = fake_get + client._session.post.return_value = FakeResponse(200, {}) + + target = client.install_plugin("nosha") + assert (target / "setup-ran").exists() + + +# --------------------------------------------------------------------------- +# validate_manifest — sha256 field +# --------------------------------------------------------------------------- + +from molecule_plugin import validate_manifest + + +def test_validate_manifest_rejects_invalid_sha256(tmp_path: Path): + (tmp_path / "plugin.yaml").write_text("name: test\nsha256: too-short\n") + errors = validate_manifest(tmp_path / "plugin.yaml") + assert any("64" in e for e in errors) + + +def test_validate_manifest_rejects_non_hex_sha256(tmp_path: Path): + (tmp_path / "plugin.yaml").write_text("name: test\nsha256: " + "g" * 64 + "\n") + errors = validate_manifest(tmp_path / "plugin.yaml") + assert any("hex" in e for e in errors) + + +def test_validate_manifest_accepts_valid_sha256(tmp_path: Path): + valid_sha = "a" * 64 + (tmp_path / "plugin.yaml").write_text(f"name: test\nsha256: {valid_sha}\n") + errors = validate_manifest(tmp_path / "plugin.yaml") + assert not errors + + +def test_validate_manifest_accepts_absent_sha256(tmp_path: Path): + (tmp_path / "plugin.yaml").write_text("name: test\nversion: '1.0'\n") + errors = validate_manifest(tmp_path / "plugin.yaml") + assert not errors + + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w") as tf: + sym = tarfile.TarInfo(name="link.lnk") + sym.type = tarfile.SYMTYPE + sym.linkname = "/etc/passwd" + tf.addfile(sym) + # Plus a normal file alongside + info = tarfile.TarInfo(name="real.md") + data = b"ok" + info.size = len(data) + tf.addfile(info, io.BytesIO(data)) + buf.seek(0) + with tarfile.open(fileobj=buf, mode="r") as tf: + _safe_extract_tar(tf, tmp_path) + assert (tmp_path / "real.md").exists() + assert not (tmp_path / "link.lnk").exists() diff --git a/tests/test_sdk.py b/tests/test_sdk.py index 120dffb..0a18c67 100644 --- a/tests/test_sdk.py +++ b/tests/test_sdk.py @@ -32,6 +32,7 @@ def test_generic_adaptor_satisfies_protocol(): assert isinstance(adaptor, PluginAdaptor) +@pytest.mark.asyncio async def test_generic_adaptor_installs_skills_and_rules(tmp_path: Path): plugin_root = tmp_path / "demo" (plugin_root / "rules").mkdir(parents=True) @@ -120,6 +121,7 @@ def test_validate_manifest_runtime_entry_must_be_string(tmp_path: Path): assert any("string" in e for e in errors) +@pytest.mark.asyncio async def test_generic_adaptor_installs_rules_and_skills_both(tmp_path: Path): """Full shape: rules + root fragment + skills + skip-list files + empty rule file.""" plugin_root = tmp_path / "demo" @@ -161,6 +163,7 @@ async def test_generic_adaptor_installs_rules_and_skills_both(tmp_path: Path): assert "# Plugin: demo /" not in (configs / "CLAUDE.md").read_text() +@pytest.mark.asyncio async def test_generic_adaptor_skips_existing_skill_dir(tmp_path: Path): """Idempotency: a skill dir already at /configs/skills// isn't clobbered.""" plugin_root = tmp_path / "demo" @@ -180,6 +183,7 @@ async def test_generic_adaptor_skips_existing_skill_dir(tmp_path: Path): assert (configs / "skills" / "s1" / "SKILL.md").read_text() == "# user wrote this" +@pytest.mark.asyncio async def test_generic_adaptor_uninstall_when_nothing_installed(tmp_path: Path): configs = tmp_path / "configs" configs.mkdir()