diff --git a/known-issues.md b/known-issues.md index cf30fb8..af96770 100644 --- a/known-issues.md +++ b/known-issues.md @@ -130,10 +130,11 @@ and only write to disk as a recovery fallback. --- -## KI-005 — `validate_plugin` does not check for secrets in bundle manifests +## KI-005 — `validate_manifest` does not check for secrets in bundle manifests **File:** `molecule_plugin/manifest.py:validate_manifest` -**Status:** Not yet implemented +**Status:** ✅ Fixed — `_scan_for_secrets()` added; called from `validate_manifest` +**Resolved in:** `fix/ki-005-ki-007` branch **Severity:** High ### Symptom @@ -191,7 +192,8 @@ the plugin content. ## KI-007 — `_is_hex` raises `TypeError` on non-string arguments instead of returning `False` **File:** `molecule_agent/client.py:_is_hex` -**Status:** Identified +**Status:** ✅ Fixed — isinstance guard added +**Resolved in:** `fix/ki-005-ki-007` branch **Severity:** Low ### Symptom diff --git a/molecule_agent/client.py b/molecule_agent/client.py index d378878..e0ee142 100644 --- a/molecule_agent/client.py +++ b/molecule_agent/client.py @@ -162,6 +162,8 @@ def verify_plugin_sha256(plugin_dir: Path, expected: str) -> bool: def _is_hex(value: str) -> bool: + if not isinstance(value, str): + return False try: int(value, 16) return True diff --git a/molecule_plugin/manifest.py b/molecule_plugin/manifest.py index 8e191ce..50a53cc 100644 --- a/molecule_plugin/manifest.py +++ b/molecule_plugin/manifest.py @@ -105,9 +105,68 @@ def validate_manifest(path: str | Path) -> list[str]: "`sha256` must contain only lowercase hex characters (0–9, a–f)" ) + # Secrets scan — no credentials may appear in any string value + for field_name, field_value in raw.items(): + _scan_for_secrets(field_name, field_value, errors) + return errors +# --------------------------------------------------------------------------- +# Secrets scanning +# --------------------------------------------------------------------------- + +# Patterns matching common credential formats. +# Anchored with word boundaries where possible to avoid false positives +# in legitimate content (e.g. "sk" inside "ask"). +_SECRET_PATTERNS: list[tuple[str, re.Pattern[str]]] = [ + # AWS / GCP / Azure keys + ("AWS key pattern", re.compile(r"AKIA[0-9A-Z]{16}")), + # GitHub fine-grained / classic tokens + ("GitHub token", re.compile(r"gh[pousr]_[A-Za-z0-9_]{36,}")), + # GitHub app token + ("GitHub app token", re.compile(r"gho_[A-Za-z0-9_]{36}")), + # OpenAI / Anthropic / generic sk- keys + ("OpenAI/Anthropic key", re.compile(r"\bsk-[A-Za-z0-9_-]{20,}\b")), + # Bearer tokens in JSON/YAML values + ("Bearer token", re.compile(r'"[Bb]earer\s+[A-Za-z0-9_.-]+"')), + # Long hex strings that look like cryptographic keys (32+ bytes) + ("long hex secret", re.compile(r"\b[0-9a-fA-F]{64,}\b")), + # Password assignment patterns in YAML values + ("password assignment", re.compile(r"(?i)password\s*[=:]\s*['\"]?[A-Za-z0-9_/+=.-]{8,}")), + # API key assignment patterns + ("api_key assignment", re.compile(r"(?i)api[_-]?key\s*[=:]\s*['\"]?[A-Za-z0-9_/+=.-]{16,}")), + # Secret / token assignment patterns + ("secret assignment", re.compile(r"(?i)secret\s*[=:]\s*['\"]?[A-Za-z0-9_/+=.-]{16,}")), + # Generic bearer string in raw text + ("raw bearer", re.compile(r"Bearer [A-Za-z0-9_.-]{20,}")), +] + + +def _scan_for_secrets(key: str, value: Any, errors: list[str]) -> None: + """Recursively scan `value` (and all nested values) for secret patterns. + + Appends error messages to `errors` when a match is found. + Skips `sha256` field since it's a content-addressed hash, not a secret. + """ + if key in ("sha256",): + return + if isinstance(value, str): + for description, pattern in _SECRET_PATTERNS: + if pattern.search(value): + errors.append( + f"possible secret detected in `{key}`: {description} — " + "bundles must not contain credentials; use platform secrets instead" + ) + break # report first match only to keep noise minimal + elif isinstance(value, dict): + for k, v in value.items(): + _scan_for_secrets(k, v, errors) + elif isinstance(value, list): + for i, item in enumerate(value): + _scan_for_secrets(f"{key}[{i}]", item, errors) + + # --------------------------------------------------------------------------- # agentskills.io spec — SKILL.md validation # --------------------------------------------------------------------------- diff --git a/tests/test_sha256_verification.py b/tests/test_sha256_verification.py index 4312088..eb1d6b2 100644 --- a/tests/test_sha256_verification.py +++ b/tests/test_sha256_verification.py @@ -52,17 +52,15 @@ def test_is_hex_invalid_char(): def test_is_hex_non_string(): - """Non-strings fed to _is_hex must not raise unhandled TypeError. + """Non-strings fed to _is_hex return False cleanly, not raise TypeError. - Python's int(None, 16) raises TypeError. The SDK implementation does not - guard with isinstance(value, str) first, so non-string values raise TypeError. - This test documents the current (unguarded) behaviour. A follow-up should add - isinstance(value, str) guard to _is_hex so it returns False cleanly. + Python's int(None, 16) raises TypeError. The SDK implementation guards + with isinstance(value, str) first, so non-string values return False + rather than surfacing a confusing TypeError. """ for val in (None, 123, [], {}): - # Currently raises TypeError — documents known gap - with pytest.raises(TypeError): - _is_hex(val) + # After the isinstance guard, non-strings return False cleanly + assert _is_hex(val) is False # ---------------------------------------------------------------------------