fix(sdk): resolve KI-005 and KI-007 — secrets scan + _is_hex guard
KI-007 (High): Add isinstance(value, str) guard to _is_hex() so non-string arguments return False cleanly instead of raising TypeError. Updated test_is_hex_non_string to assert False instead of expecting pytest.raises(TypeError). KI-005 (High): Add _scan_for_secrets() to manifest.py that walks all string values in plugin.yaml and reports common credential patterns (sk-, ghp_, AKIA, bearer tokens, long hex strings, password/api_key assignments). Call it from validate_manifest(). Skips the sha256 field since it's a content-addressed hash, not a secret. Run: pytest → 210 passed, 1 skipped. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
818931f9d3
commit
beca7db42a
@ -130,10 +130,11 @@ and only write to disk as a recovery fallback.
|
||||
|
||||
---
|
||||
|
||||
## KI-005 — `validate_plugin` does not check for secrets in bundle manifests
|
||||
## KI-005 — `validate_manifest` does not check for secrets in bundle manifests
|
||||
|
||||
**File:** `molecule_plugin/manifest.py:validate_manifest`
|
||||
**Status:** Not yet implemented
|
||||
**Status:** ✅ Fixed — `_scan_for_secrets()` added; called from `validate_manifest`
|
||||
**Resolved in:** `fix/ki-005-ki-007` branch
|
||||
**Severity:** High
|
||||
|
||||
### Symptom
|
||||
@ -191,7 +192,8 @@ the plugin content.
|
||||
## KI-007 — `_is_hex` raises `TypeError` on non-string arguments instead of returning `False`
|
||||
|
||||
**File:** `molecule_agent/client.py:_is_hex`
|
||||
**Status:** Identified
|
||||
**Status:** ✅ Fixed — isinstance guard added
|
||||
**Resolved in:** `fix/ki-005-ki-007` branch
|
||||
**Severity:** Low
|
||||
|
||||
### Symptom
|
||||
|
||||
@ -162,6 +162,8 @@ def verify_plugin_sha256(plugin_dir: Path, expected: str) -> bool:
|
||||
|
||||
|
||||
def _is_hex(value: str) -> bool:
|
||||
if not isinstance(value, str):
|
||||
return False
|
||||
try:
|
||||
int(value, 16)
|
||||
return True
|
||||
|
||||
@ -105,9 +105,68 @@ def validate_manifest(path: str | Path) -> list[str]:
|
||||
"`sha256` must contain only lowercase hex characters (0–9, a–f)"
|
||||
)
|
||||
|
||||
# Secrets scan — no credentials may appear in any string value
|
||||
for field_name, field_value in raw.items():
|
||||
_scan_for_secrets(field_name, field_value, errors)
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Secrets scanning
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Patterns matching common credential formats.
|
||||
# Anchored with word boundaries where possible to avoid false positives
|
||||
# in legitimate content (e.g. "sk" inside "ask").
|
||||
_SECRET_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
|
||||
# AWS / GCP / Azure keys
|
||||
("AWS key pattern", re.compile(r"AKIA[0-9A-Z]{16}")),
|
||||
# GitHub fine-grained / classic tokens
|
||||
("GitHub token", re.compile(r"gh[pousr]_[A-Za-z0-9_]{36,}")),
|
||||
# GitHub app token
|
||||
("GitHub app token", re.compile(r"gho_[A-Za-z0-9_]{36}")),
|
||||
# OpenAI / Anthropic / generic sk- keys
|
||||
("OpenAI/Anthropic key", re.compile(r"\bsk-[A-Za-z0-9_-]{20,}\b")),
|
||||
# Bearer tokens in JSON/YAML values
|
||||
("Bearer token", re.compile(r'"[Bb]earer\s+[A-Za-z0-9_.-]+"')),
|
||||
# Long hex strings that look like cryptographic keys (32+ bytes)
|
||||
("long hex secret", re.compile(r"\b[0-9a-fA-F]{64,}\b")),
|
||||
# Password assignment patterns in YAML values
|
||||
("password assignment", re.compile(r"(?i)password\s*[=:]\s*['\"]?[A-Za-z0-9_/+=.-]{8,}")),
|
||||
# API key assignment patterns
|
||||
("api_key assignment", re.compile(r"(?i)api[_-]?key\s*[=:]\s*['\"]?[A-Za-z0-9_/+=.-]{16,}")),
|
||||
# Secret / token assignment patterns
|
||||
("secret assignment", re.compile(r"(?i)secret\s*[=:]\s*['\"]?[A-Za-z0-9_/+=.-]{16,}")),
|
||||
# Generic bearer string in raw text
|
||||
("raw bearer", re.compile(r"Bearer [A-Za-z0-9_.-]{20,}")),
|
||||
]
|
||||
|
||||
|
||||
def _scan_for_secrets(key: str, value: Any, errors: list[str]) -> None:
|
||||
"""Recursively scan `value` (and all nested values) for secret patterns.
|
||||
|
||||
Appends error messages to `errors` when a match is found.
|
||||
Skips `sha256` field since it's a content-addressed hash, not a secret.
|
||||
"""
|
||||
if key in ("sha256",):
|
||||
return
|
||||
if isinstance(value, str):
|
||||
for description, pattern in _SECRET_PATTERNS:
|
||||
if pattern.search(value):
|
||||
errors.append(
|
||||
f"possible secret detected in `{key}`: {description} — "
|
||||
"bundles must not contain credentials; use platform secrets instead"
|
||||
)
|
||||
break # report first match only to keep noise minimal
|
||||
elif isinstance(value, dict):
|
||||
for k, v in value.items():
|
||||
_scan_for_secrets(k, v, errors)
|
||||
elif isinstance(value, list):
|
||||
for i, item in enumerate(value):
|
||||
_scan_for_secrets(f"{key}[{i}]", item, errors)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# agentskills.io spec — SKILL.md validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@ -52,17 +52,15 @@ def test_is_hex_invalid_char():
|
||||
|
||||
|
||||
def test_is_hex_non_string():
|
||||
"""Non-strings fed to _is_hex must not raise unhandled TypeError.
|
||||
"""Non-strings fed to _is_hex return False cleanly, not raise TypeError.
|
||||
|
||||
Python's int(None, 16) raises TypeError. The SDK implementation does not
|
||||
guard with isinstance(value, str) first, so non-string values raise TypeError.
|
||||
This test documents the current (unguarded) behaviour. A follow-up should add
|
||||
isinstance(value, str) guard to _is_hex so it returns False cleanly.
|
||||
Python's int(None, 16) raises TypeError. The SDK implementation guards
|
||||
with isinstance(value, str) first, so non-string values return False
|
||||
rather than surfacing a confusing TypeError.
|
||||
"""
|
||||
for val in (None, 123, [], {}):
|
||||
# Currently raises TypeError — documents known gap
|
||||
with pytest.raises(TypeError):
|
||||
_is_hex(val)
|
||||
# After the isinstance guard, non-strings return False cleanly
|
||||
assert _is_hex(val) is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Loading…
Reference in New Issue
Block a user