Merge pull request #12 from Molecule-AI/fix/ki-005-ki-007

fix(sdk): resolve KI-005 (secrets scan) and KI-007 (_is_hex guard)
This commit is contained in:
molecule-ai[bot] 2026-04-21 08:13:04 +00:00 committed by GitHub
commit b65867d6a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 72 additions and 11 deletions

View File

@ -130,10 +130,11 @@ and only write to disk as a recovery fallback.
---
## KI-005 — `validate_plugin` does not check for secrets in bundle manifests
## KI-005 — `validate_manifest` does not check for secrets in bundle manifests
**File:** `molecule_plugin/manifest.py:validate_manifest`
**Status:** Not yet implemented
**Status:** ✅ Fixed — `_scan_for_secrets()` added; called from `validate_manifest`
**Resolved in:** `fix/ki-005-ki-007` branch
**Severity:** High
### Symptom
@ -191,7 +192,8 @@ the plugin content.
## KI-007 — `_is_hex` raises `TypeError` on non-string arguments instead of returning `False`
**File:** `molecule_agent/client.py:_is_hex`
**Status:** Identified
**Status:** ✅ Fixed — isinstance guard added
**Resolved in:** `fix/ki-005-ki-007` branch
**Severity:** Low
### Symptom

View File

@ -162,6 +162,8 @@ def verify_plugin_sha256(plugin_dir: Path, expected: str) -> bool:
def _is_hex(value: str) -> bool:
if not isinstance(value, str):
return False
try:
int(value, 16)
return True

View File

@ -105,9 +105,68 @@ def validate_manifest(path: str | Path) -> list[str]:
"`sha256` must contain only lowercase hex characters (09, af)"
)
# Secrets scan — no credentials may appear in any string value
for field_name, field_value in raw.items():
_scan_for_secrets(field_name, field_value, errors)
return errors
# ---------------------------------------------------------------------------
# Secrets scanning
# ---------------------------------------------------------------------------
# Patterns matching common credential formats.
# Anchored with word boundaries where possible to avoid false positives
# in legitimate content (e.g. "sk" inside "ask").
_SECRET_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
# AWS / GCP / Azure keys
("AWS key pattern", re.compile(r"AKIA[0-9A-Z]{16}")),
# GitHub fine-grained / classic tokens
("GitHub token", re.compile(r"gh[pousr]_[A-Za-z0-9_]{36,}")),
# GitHub app token
("GitHub app token", re.compile(r"gho_[A-Za-z0-9_]{36}")),
# OpenAI / Anthropic / generic sk- keys
("OpenAI/Anthropic key", re.compile(r"\bsk-[A-Za-z0-9_-]{20,}\b")),
# Bearer tokens in JSON/YAML values
("Bearer token", re.compile(r'"[Bb]earer\s+[A-Za-z0-9_.-]+"')),
# Long hex strings that look like cryptographic keys (32+ bytes)
("long hex secret", re.compile(r"\b[0-9a-fA-F]{64,}\b")),
# Password assignment patterns in YAML values
("password assignment", re.compile(r"(?i)password\s*[=:]\s*['\"]?[A-Za-z0-9_/+=.-]{8,}")),
# API key assignment patterns
("api_key assignment", re.compile(r"(?i)api[_-]?key\s*[=:]\s*['\"]?[A-Za-z0-9_/+=.-]{16,}")),
# Secret / token assignment patterns
("secret assignment", re.compile(r"(?i)secret\s*[=:]\s*['\"]?[A-Za-z0-9_/+=.-]{16,}")),
# Generic bearer string in raw text
("raw bearer", re.compile(r"Bearer [A-Za-z0-9_.-]{20,}")),
]
def _scan_for_secrets(key: str, value: Any, errors: list[str]) -> None:
"""Recursively scan `value` (and all nested values) for secret patterns.
Appends error messages to `errors` when a match is found.
Skips `sha256` field since it's a content-addressed hash, not a secret.
"""
if key in ("sha256",):
return
if isinstance(value, str):
for description, pattern in _SECRET_PATTERNS:
if pattern.search(value):
errors.append(
f"possible secret detected in `{key}`: {description}"
"bundles must not contain credentials; use platform secrets instead"
)
break # report first match only to keep noise minimal
elif isinstance(value, dict):
for k, v in value.items():
_scan_for_secrets(k, v, errors)
elif isinstance(value, list):
for i, item in enumerate(value):
_scan_for_secrets(f"{key}[{i}]", item, errors)
# ---------------------------------------------------------------------------
# agentskills.io spec — SKILL.md validation
# ---------------------------------------------------------------------------

View File

@ -52,17 +52,15 @@ def test_is_hex_invalid_char():
def test_is_hex_non_string():
"""Non-strings fed to _is_hex must not raise unhandled TypeError.
"""Non-strings fed to _is_hex return False cleanly, not raise TypeError.
Python's int(None, 16) raises TypeError. The SDK implementation does not
guard with isinstance(value, str) first, so non-string values raise TypeError.
This test documents the current (unguarded) behaviour. A follow-up should add
isinstance(value, str) guard to _is_hex so it returns False cleanly.
Python's int(None, 16) raises TypeError. The SDK implementation guards
with isinstance(value, str) first, so non-string values return False
rather than surfacing a confusing TypeError.
"""
for val in (None, 123, [], {}):
# Currently raises TypeError — documents known gap
with pytest.raises(TypeError):
_is_hex(val)
# After the isinstance guard, non-strings return False cleanly
assert _is_hex(val) is False
# ---------------------------------------------------------------------------