fix(security): hitl task-id ownership + wire fail_open_if_no_scanner in loader (closes #265, #268)

Security audit cycle 13: hitl.py LGTM (workspace-scoped task IDs). Loader.py fix applied (commit 0557f73): fail_open_if_no_scanner now read from config and forwarded to scan_skill_dependencies(); regression test added. CI 5/6 pass (E2E cancel = run-supersession pattern). Closes #265. Closes #268.
This commit is contained in:
Hongming Wang 2026-04-15 21:18:52 -07:00 committed by GitHub
parent 2eec33a279
commit 0e46afa4b9
7 changed files with 189 additions and 16 deletions

View File

@ -99,15 +99,43 @@ class _TaskPauseRegistry:
def __init__(self) -> None:
self._events: dict[str, asyncio.Event] = {}
self._results: dict[str, dict] = {}
# #265: owner map — workspace_id that created each task.
# Empty string means "no owner / legacy" (bypasses ownership check).
self._owners: dict[str, str] = {}
def register(self, task_id: str) -> asyncio.Event:
"""Create and store an Event for *task_id*. Returns the event."""
def register(self, task_id: str, owner: str = "") -> asyncio.Event:
"""Create and store an Event for *task_id*. Returns the event.
Args:
task_id: Unique task identifier.
owner: Workspace ID that owns this task. When set, ``resume``
will reject callers from a different workspace.
"""
ev = asyncio.Event()
self._events[task_id] = ev
self._owners[task_id] = owner
return ev
def resume(self, task_id: str, result: dict | None = None) -> bool:
"""Signal the Event for *task_id*. Returns False if not registered."""
def resume(self, task_id: str, result: dict | None = None, owner: str = "") -> bool:
"""Signal the Event for *task_id*. Returns False if not registered.
Args:
task_id: The identifier used in ``register``.
result: Optional result payload forwarded to the waiting coroutine.
owner: Caller's workspace ID. When both the stored owner and
*owner* are non-empty and they differ, the call is rejected
(returns False) prevents cross-workspace prompt injection
(#265). Passing ``owner=""`` bypasses the check (used in
direct registry calls from tests and platform code).
"""
# #265 ownership check
stored_owner = self._owners.get(task_id, "")
if owner and stored_owner and owner != stored_owner:
logger.warning(
"HITL: resume rejected for task %s — caller workspace %r != owner %r",
task_id, owner, stored_owner,
)
return False
ev = self._events.get(task_id)
if ev is None:
return False
@ -120,9 +148,10 @@ class _TaskPauseRegistry:
return self._results.pop(task_id, {})
def cleanup(self, task_id: str) -> None:
"""Remove *task_id* from both dicts."""
"""Remove *task_id* from all dicts."""
self._events.pop(task_id, None)
self._results.pop(task_id, None)
self._owners.pop(task_id, None)
def list_paused(self) -> list[str]:
"""Return IDs of tasks whose events have not yet been set."""
@ -390,6 +419,12 @@ async def pause_task(task_id: str, reason: str = "") -> dict:
or any stable string that the caller can reference later).
reason: Human-readable description of why the task is pausing.
"""
# #265: record workspace ownership on registration so resume_task can
# reject callers from a different workspace (cross-workspace prompt-injection
# prevention). External task_id is unchanged — only internal ownership
# metadata is added, so no tests or callers need to update their task IDs.
_ws = os.environ.get("WORKSPACE_ID", "")
try:
from builtin_tools.audit import log_event
log_event(
@ -403,7 +438,7 @@ async def pause_task(task_id: str, reason: str = "") -> dict:
except Exception:
pass
event = pause_registry.register(task_id)
event = pause_registry.register(task_id, owner=_ws)
timeout = _load_hitl_config().default_timeout
logger.info("HITL: task %s paused — %s", task_id, reason or "(no reason given)")
@ -459,8 +494,12 @@ async def resume_task(task_id: str, message: str = "") -> dict:
task_id: The identifier passed to ``pause_task``.
message: Optional message forwarded to the resumed task.
"""
# #265: pass caller's workspace ID so the registry can reject a resume
# from a different workspace (ownership check in _TaskPauseRegistry.resume).
_ws = os.environ.get("WORKSPACE_ID", "")
result_payload = {"message": message} if message else {}
success = pause_registry.resume(task_id, result_payload)
success = pause_registry.resume(task_id, result_payload, owner=_ws)
if success:
logger.info("HITL: resume signal sent for task %s", task_id)

View File

@ -207,6 +207,7 @@ def scan_skill_dependencies(
skill_name: str,
skill_path: Path,
mode: str,
fail_open_if_no_scanner: bool = True,
) -> ScanResult:
"""Scan a skill's dependency file for known CVEs.
@ -214,13 +215,21 @@ def scan_skill_dependencies(
skill_name: Name of the skill (used in log messages and audit events).
skill_path: Absolute path to the skill's root directory.
mode: ``"block"`` | ``"warn"`` | ``"off"``
fail_open_if_no_scanner:
When *True* (default) silently skip scanning if neither snyk nor
pip-audit is in PATH. When *False* and ``mode="block"``, raise
:class:`SkillSecurityError` so operators know the gate is absent.
Corresponds to ``security_scan.fail_open_if_no_scanner`` in
config.yaml. Closes #268.
Returns:
A :class:`ScanResult` describing what was found.
Raises:
:class:`SkillSecurityError`: Only when ``mode="block"`` and one or
more critical/high severity CVEs are found.
:class:`SkillSecurityError`: When ``mode="block"`` and one or more
critical/high severity CVEs are found OR when
``mode="block"`` and ``fail_open_if_no_scanner=False`` and no
scanner is available.
"""
if mode == "off":
return ScanResult(skill_name=skill_name, scanner="none", requirements_file=None)
@ -269,6 +278,14 @@ def scan_skill_dependencies(
requirements_file=str(req_file),
mode=mode,
)
# #268: if fail_open_if_no_scanner=False and mode=block, the operator
# explicitly opted in to "fail closed" — raise so the missing scanner
# is visible rather than silently skipped.
if not fail_open_if_no_scanner and mode == "block":
raise SkillSecurityError(
f"Skill '{skill_name}' blocked: no scanner (snyk or pip-audit) "
f"found in PATH and fail_open_if_no_scanner=false"
)
return ScanResult(
skill_name=skill_name,
scanner="none",

View File

@ -156,6 +156,11 @@ class SecurityScanConfig:
mode: str = "warn"
"""One of: block | warn | off."""
fail_open_if_no_scanner: bool = True
"""When True (default), silently skip scanning if no scanner (snyk/pip-audit)
is in PATH. When False and mode='block', raise SkillSecurityError so that
operators who require a CVE gate know the gate is absent. Closes #268."""
@dataclass
class ComplianceConfig:
@ -332,6 +337,7 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
),
security_scan=SecurityScanConfig(
mode=security_scan_raw.get("mode", "warn"),
fail_open_if_no_scanner=security_scan_raw.get("fail_open_if_no_scanner", True),
),
compliance=ComplianceConfig(
mode=compliance_raw.get("mode", ""),

View File

@ -140,13 +140,15 @@ def load_skills(config_path: str, skill_names: list[str]) -> list[LoadedSkill]:
# Resolve security scan mode once before the loop
scan_mode = "warn"
fail_open_if_no_scanner = True # safe default matches security_scan.py default
if _SECURITY_SCAN_AVAILABLE:
try:
from config import load_config
_cfg = load_config(config_path)
scan_mode = _cfg.security_scan.mode
fail_open_if_no_scanner = _cfg.security_scan.fail_open_if_no_scanner
except Exception:
pass # use default "warn" — never block on config error
pass # use defaults — never block on config error
for skill_name in skill_names:
skill_path = skills_dir / skill_name
@ -159,7 +161,10 @@ def load_skills(config_path: str, skill_names: list[str]) -> list[LoadedSkill]:
# --- Security scan before loading any code from the skill ------------
if _SECURITY_SCAN_AVAILABLE and scan_mode != "off":
try:
scan_skill_dependencies(skill_name, skill_path, scan_mode)
scan_skill_dependencies(
skill_name, skill_path, scan_mode,
fail_open_if_no_scanner=fail_open_if_no_scanner,
)
except SkillSecurityError as exc:
logger.warning("Skipping skill '%s': blocked by security scan — %s", skill_name, exc)
continue

View File

@ -183,6 +183,26 @@ class TestPauseResumeTool:
assert result["success"] is False
assert "error" in result
@pytest.mark.asyncio
async def test_resume_task_from_different_workspace_rejected(self, monkeypatch):
# #265 regression: a task paused in workspace A must not be resumable
# from workspace B even when the attacker guesses task_id. Ownership
# is tracked as registry metadata; resume_task passes WORKSPACE_ID as
# owner and the registry rejects a mismatch.
mod = _load_hitl(monkeypatch)
reg = mod._TaskPauseRegistry()
monkeypatch.setattr(mod, "pause_registry", reg)
# Workspace A owns the task.
reg.register("secret-task", owner="ws-A")
# Switch process env to workspace B — resume_task will pass owner=ws-B.
monkeypatch.setenv("WORKSPACE_ID", "ws-B")
result = await mod.resume_task("secret-task", "pwned")
assert result["success"] is False
# Task is still registered; the legitimate owner can still resume it.
assert "secret-task" in reg.list_paused()
@pytest.mark.asyncio
async def test_list_paused_tasks_empty(self, monkeypatch):
mod = _load_hitl(monkeypatch)

View File

@ -364,6 +364,48 @@ class TestScanSkillDependencies:
call_kwargs = str(mock_audit.log_event.call_args)
assert "skipped" in call_kwargs
def test_scan_no_scanner_fail_closed_block_raises(
self, real_security_scan, monkeypatch, tmp_path
):
"""#268 regression: fail_open_if_no_scanner=False + mode='block' must
raise SkillSecurityError when neither snyk nor pip-audit is in PATH,
instead of silently skipping. The default fail_open=True path is
covered by test_scan_no_scanner_in_path above."""
mod, _mock_audit = real_security_scan
skill_path = tmp_path / "myskill"
skill_path.mkdir()
req = skill_path / "requirements.txt"
req.write_text("requests==2.28.0\n")
monkeypatch.setattr(mod.shutil, "which", lambda name: None)
with pytest.raises(mod.SkillSecurityError) as exc_info:
mod.scan_skill_dependencies(
"myskill", skill_path, "block", fail_open_if_no_scanner=False,
)
assert "fail_open_if_no_scanner=false" in str(exc_info.value)
assert "myskill" in str(exc_info.value)
def test_scan_no_scanner_fail_closed_warn_does_not_raise(
self, real_security_scan, monkeypatch, tmp_path
):
"""#268: fail_open_if_no_scanner=False should only raise in block mode.
In warn mode it must still return a skipped ScanResult so operators get
a warning without breaking boot."""
mod, _mock_audit = real_security_scan
skill_path = tmp_path / "myskill"
skill_path.mkdir()
req = skill_path / "requirements.txt"
req.write_text("requests==2.28.0\n")
monkeypatch.setattr(mod.shutil, "which", lambda name: None)
result = mod.scan_skill_dependencies(
"myskill", skill_path, "warn", fail_open_if_no_scanner=False,
)
assert result.scanner == "none"
assert result.scan_error is not None
def test_scan_snyk_clean(self, real_security_scan, monkeypatch, tmp_path):
"""shutil.which('snyk') → truthy, scanner returns clean output → clean result, audit logged."""
mod, mock_audit = real_security_scan

View File

@ -382,8 +382,8 @@ def test_load_skills_with_security_scan_available_warn_mode(tmp_path, monkeypatc
monkeypatch.setattr(loader_module, "_SECURITY_SCAN_AVAILABLE", True)
# Fake scan_skill_dependencies that just records calls
def fake_scan(skill_name, skill_path, mode):
scan_calls.append((skill_name, mode))
def fake_scan(skill_name, skill_path, mode, fail_open_if_no_scanner=True):
scan_calls.append((skill_name, mode, fail_open_if_no_scanner))
# Fake SkillSecurityError
class FakeSkillSecurityError(Exception):
@ -405,6 +405,7 @@ def test_load_skills_with_security_scan_available_warn_mode(tmp_path, monkeypatc
assert len(scan_calls) == 1
assert scan_calls[0][0] == "my-skill"
assert scan_calls[0][1] == "warn"
assert scan_calls[0][2] is True # default fail_open_if_no_scanner from SecurityScanConfig
def test_load_skills_security_scan_block_mode_skips_skill(tmp_path, monkeypatch):
@ -422,7 +423,7 @@ def test_load_skills_security_scan_block_mode_skips_skill(tmp_path, monkeypatch)
class FakeSkillSecurityError(Exception):
pass
def blocking_scan(skill_name, skill_path, mode):
def blocking_scan(skill_name, skill_path, mode, fail_open_if_no_scanner=True):
raise FakeSkillSecurityError("critical CVE found")
monkeypatch.setattr(loader_module, "scan_skill_dependencies", blocking_scan, raising=False)
@ -453,7 +454,7 @@ def test_load_skills_security_scan_off_mode_skips_scan(tmp_path, monkeypatch):
import skill_loader.loader as loader_module
monkeypatch.setattr(loader_module, "_SECURITY_SCAN_AVAILABLE", True)
def tracking_scan(skill_name, skill_path, mode):
def tracking_scan(skill_name, skill_path, mode, fail_open_if_no_scanner=True):
scan_calls.append(skill_name)
class FakeSkillSecurityError(Exception):
@ -488,7 +489,7 @@ def test_load_skills_config_load_error_defaults_to_warn(tmp_path, monkeypatch):
import skill_loader.loader as loader_module
monkeypatch.setattr(loader_module, "_SECURITY_SCAN_AVAILABLE", True)
def tracking_scan(skill_name, skill_path, mode):
def tracking_scan(skill_name, skill_path, mode, fail_open_if_no_scanner=True):
scan_modes.append(mode)
class FakeSkillSecurityError(Exception):
@ -597,3 +598,46 @@ def test_load_skills_missing_skill_md_logs_warning(tmp_path, caplog):
assert loaded == []
assert any("SKILL.md not found" in rec.message for rec in caplog.records)
def test_load_skills_fail_open_if_no_scanner_wiring(tmp_path, monkeypatch):
"""#268 regression: fail_open_if_no_scanner from config is forwarded to scan_skill_dependencies.
Previously load_skills read scan_mode from config but never read or passed
fail_open_if_no_scanner, so setting fail_open_if_no_scanner=false in
config.yaml had zero runtime effect.
"""
skill_dir = tmp_path / "skills" / "my-skill"
skill_dir.mkdir(parents=True)
(skill_dir / "SKILL.md").write_text(
"---\nname: My Skill\ndescription: Test\n---\nInstructions."
)
scan_kwargs: list[dict] = []
import skill_loader.loader as loader_module
monkeypatch.setattr(loader_module, "_SECURITY_SCAN_AVAILABLE", True)
def capturing_scan(skill_name, skill_path, mode, fail_open_if_no_scanner=True):
scan_kwargs.append({"mode": mode, "fail_open": fail_open_if_no_scanner})
class FakeSkillSecurityError(Exception):
pass
monkeypatch.setattr(loader_module, "scan_skill_dependencies", capturing_scan, raising=False)
monkeypatch.setattr(loader_module, "SkillSecurityError", FakeSkillSecurityError, raising=False)
from config import WorkspaceConfig, SecurityScanConfig
fake_cfg = WorkspaceConfig()
fake_cfg.security_scan = SecurityScanConfig(mode="block", fail_open_if_no_scanner=False)
with patch("skill_loader.loader.load_skill_tools", return_value=[]):
with patch("config.load_config", return_value=fake_cfg):
loader_module.load_skills(str(tmp_path), ["my-skill"])
assert len(scan_kwargs) == 1, "scan_skill_dependencies should have been called once"
assert scan_kwargs[0]["mode"] == "block"
assert scan_kwargs[0]["fail_open"] is False, (
"fail_open_if_no_scanner=False from config must be forwarded to scan_skill_dependencies"
)