From c11d8f3ec3ca88726b141047768a4990e793a3f5 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 15 Apr 2026 21:18:52 -0700 Subject: [PATCH] fix(security): hitl task-id ownership + wire fail_open_if_no_scanner in loader (closes #265, #268) Security audit cycle 13: hitl.py LGTM (workspace-scoped task IDs). Loader.py fix applied (commit 0557f73): fail_open_if_no_scanner now read from config and forwarded to scan_skill_dependencies(); regression test added. CI 5/6 pass (E2E cancel = run-supersession pattern). Closes #265. Closes #268. --- workspace-template/builtin_tools/hitl.py | 53 +++++++++++++++--- .../builtin_tools/security_scan.py | 21 +++++++- workspace-template/config.py | 6 +++ workspace-template/skill_loader/loader.py | 9 +++- workspace-template/tests/test_hitl.py | 20 +++++++ .../tests/test_security_scan.py | 42 +++++++++++++++ .../tests/test_skills_loader.py | 54 +++++++++++++++++-- 7 files changed, 189 insertions(+), 16 deletions(-) diff --git a/workspace-template/builtin_tools/hitl.py b/workspace-template/builtin_tools/hitl.py index 8b056051..d7bccc2d 100644 --- a/workspace-template/builtin_tools/hitl.py +++ b/workspace-template/builtin_tools/hitl.py @@ -99,15 +99,43 @@ class _TaskPauseRegistry: def __init__(self) -> None: self._events: dict[str, asyncio.Event] = {} self._results: dict[str, dict] = {} + # #265: owner map — workspace_id that created each task. + # Empty string means "no owner / legacy" (bypasses ownership check). + self._owners: dict[str, str] = {} - def register(self, task_id: str) -> asyncio.Event: - """Create and store an Event for *task_id*. Returns the event.""" + def register(self, task_id: str, owner: str = "") -> asyncio.Event: + """Create and store an Event for *task_id*. Returns the event. + + Args: + task_id: Unique task identifier. + owner: Workspace ID that owns this task. When set, ``resume`` + will reject callers from a different workspace. + """ ev = asyncio.Event() self._events[task_id] = ev + self._owners[task_id] = owner return ev - def resume(self, task_id: str, result: dict | None = None) -> bool: - """Signal the Event for *task_id*. Returns False if not registered.""" + def resume(self, task_id: str, result: dict | None = None, owner: str = "") -> bool: + """Signal the Event for *task_id*. Returns False if not registered. + + Args: + task_id: The identifier used in ``register``. + result: Optional result payload forwarded to the waiting coroutine. + owner: Caller's workspace ID. When both the stored owner and + *owner* are non-empty and they differ, the call is rejected + (returns False) — prevents cross-workspace prompt injection + (#265). Passing ``owner=""`` bypasses the check (used in + direct registry calls from tests and platform code). + """ + # #265 ownership check + stored_owner = self._owners.get(task_id, "") + if owner and stored_owner and owner != stored_owner: + logger.warning( + "HITL: resume rejected for task %s — caller workspace %r != owner %r", + task_id, owner, stored_owner, + ) + return False ev = self._events.get(task_id) if ev is None: return False @@ -120,9 +148,10 @@ class _TaskPauseRegistry: return self._results.pop(task_id, {}) def cleanup(self, task_id: str) -> None: - """Remove *task_id* from both dicts.""" + """Remove *task_id* from all dicts.""" self._events.pop(task_id, None) self._results.pop(task_id, None) + self._owners.pop(task_id, None) def list_paused(self) -> list[str]: """Return IDs of tasks whose events have not yet been set.""" @@ -390,6 +419,12 @@ async def pause_task(task_id: str, reason: str = "") -> dict: or any stable string that the caller can reference later). reason: Human-readable description of why the task is pausing. """ + # #265: record workspace ownership on registration so resume_task can + # reject callers from a different workspace (cross-workspace prompt-injection + # prevention). External task_id is unchanged — only internal ownership + # metadata is added, so no tests or callers need to update their task IDs. + _ws = os.environ.get("WORKSPACE_ID", "") + try: from builtin_tools.audit import log_event log_event( @@ -403,7 +438,7 @@ async def pause_task(task_id: str, reason: str = "") -> dict: except Exception: pass - event = pause_registry.register(task_id) + event = pause_registry.register(task_id, owner=_ws) timeout = _load_hitl_config().default_timeout logger.info("HITL: task %s paused — %s", task_id, reason or "(no reason given)") @@ -459,8 +494,12 @@ async def resume_task(task_id: str, message: str = "") -> dict: task_id: The identifier passed to ``pause_task``. message: Optional message forwarded to the resumed task. """ + # #265: pass caller's workspace ID so the registry can reject a resume + # from a different workspace (ownership check in _TaskPauseRegistry.resume). + _ws = os.environ.get("WORKSPACE_ID", "") + result_payload = {"message": message} if message else {} - success = pause_registry.resume(task_id, result_payload) + success = pause_registry.resume(task_id, result_payload, owner=_ws) if success: logger.info("HITL: resume signal sent for task %s", task_id) diff --git a/workspace-template/builtin_tools/security_scan.py b/workspace-template/builtin_tools/security_scan.py index 6698f330..214e5fb3 100644 --- a/workspace-template/builtin_tools/security_scan.py +++ b/workspace-template/builtin_tools/security_scan.py @@ -207,6 +207,7 @@ def scan_skill_dependencies( skill_name: str, skill_path: Path, mode: str, + fail_open_if_no_scanner: bool = True, ) -> ScanResult: """Scan a skill's dependency file for known CVEs. @@ -214,13 +215,21 @@ def scan_skill_dependencies( skill_name: Name of the skill (used in log messages and audit events). skill_path: Absolute path to the skill's root directory. mode: ``"block"`` | ``"warn"`` | ``"off"`` + fail_open_if_no_scanner: + When *True* (default) silently skip scanning if neither snyk nor + pip-audit is in PATH. When *False* and ``mode="block"``, raise + :class:`SkillSecurityError` so operators know the gate is absent. + Corresponds to ``security_scan.fail_open_if_no_scanner`` in + config.yaml. Closes #268. Returns: A :class:`ScanResult` describing what was found. Raises: - :class:`SkillSecurityError`: Only when ``mode="block"`` and one or - more critical/high severity CVEs are found. + :class:`SkillSecurityError`: When ``mode="block"`` and one or more + critical/high severity CVEs are found — OR when + ``mode="block"`` and ``fail_open_if_no_scanner=False`` and no + scanner is available. """ if mode == "off": return ScanResult(skill_name=skill_name, scanner="none", requirements_file=None) @@ -269,6 +278,14 @@ def scan_skill_dependencies( requirements_file=str(req_file), mode=mode, ) + # #268: if fail_open_if_no_scanner=False and mode=block, the operator + # explicitly opted in to "fail closed" — raise so the missing scanner + # is visible rather than silently skipped. + if not fail_open_if_no_scanner and mode == "block": + raise SkillSecurityError( + f"Skill '{skill_name}' blocked: no scanner (snyk or pip-audit) " + f"found in PATH and fail_open_if_no_scanner=false" + ) return ScanResult( skill_name=skill_name, scanner="none", diff --git a/workspace-template/config.py b/workspace-template/config.py index 19f34d62..6f7dbc53 100644 --- a/workspace-template/config.py +++ b/workspace-template/config.py @@ -156,6 +156,11 @@ class SecurityScanConfig: mode: str = "warn" """One of: block | warn | off.""" + fail_open_if_no_scanner: bool = True + """When True (default), silently skip scanning if no scanner (snyk/pip-audit) + is in PATH. When False and mode='block', raise SkillSecurityError so that + operators who require a CVE gate know the gate is absent. Closes #268.""" + @dataclass class ComplianceConfig: @@ -332,6 +337,7 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig: ), security_scan=SecurityScanConfig( mode=security_scan_raw.get("mode", "warn"), + fail_open_if_no_scanner=security_scan_raw.get("fail_open_if_no_scanner", True), ), compliance=ComplianceConfig( mode=compliance_raw.get("mode", ""), diff --git a/workspace-template/skill_loader/loader.py b/workspace-template/skill_loader/loader.py index 99f353a2..05333564 100644 --- a/workspace-template/skill_loader/loader.py +++ b/workspace-template/skill_loader/loader.py @@ -140,13 +140,15 @@ def load_skills(config_path: str, skill_names: list[str]) -> list[LoadedSkill]: # Resolve security scan mode once before the loop scan_mode = "warn" + fail_open_if_no_scanner = True # safe default matches security_scan.py default if _SECURITY_SCAN_AVAILABLE: try: from config import load_config _cfg = load_config(config_path) scan_mode = _cfg.security_scan.mode + fail_open_if_no_scanner = _cfg.security_scan.fail_open_if_no_scanner except Exception: - pass # use default "warn" — never block on config error + pass # use defaults — never block on config error for skill_name in skill_names: skill_path = skills_dir / skill_name @@ -159,7 +161,10 @@ def load_skills(config_path: str, skill_names: list[str]) -> list[LoadedSkill]: # --- Security scan before loading any code from the skill ------------ if _SECURITY_SCAN_AVAILABLE and scan_mode != "off": try: - scan_skill_dependencies(skill_name, skill_path, scan_mode) + scan_skill_dependencies( + skill_name, skill_path, scan_mode, + fail_open_if_no_scanner=fail_open_if_no_scanner, + ) except SkillSecurityError as exc: logger.warning("Skipping skill '%s': blocked by security scan — %s", skill_name, exc) continue diff --git a/workspace-template/tests/test_hitl.py b/workspace-template/tests/test_hitl.py index 3469f222..78fe49ce 100644 --- a/workspace-template/tests/test_hitl.py +++ b/workspace-template/tests/test_hitl.py @@ -183,6 +183,26 @@ class TestPauseResumeTool: assert result["success"] is False assert "error" in result + @pytest.mark.asyncio + async def test_resume_task_from_different_workspace_rejected(self, monkeypatch): + # #265 regression: a task paused in workspace A must not be resumable + # from workspace B even when the attacker guesses task_id. Ownership + # is tracked as registry metadata; resume_task passes WORKSPACE_ID as + # owner and the registry rejects a mismatch. + mod = _load_hitl(monkeypatch) + reg = mod._TaskPauseRegistry() + monkeypatch.setattr(mod, "pause_registry", reg) + # Workspace A owns the task. + reg.register("secret-task", owner="ws-A") + + # Switch process env to workspace B — resume_task will pass owner=ws-B. + monkeypatch.setenv("WORKSPACE_ID", "ws-B") + result = await mod.resume_task("secret-task", "pwned") + + assert result["success"] is False + # Task is still registered; the legitimate owner can still resume it. + assert "secret-task" in reg.list_paused() + @pytest.mark.asyncio async def test_list_paused_tasks_empty(self, monkeypatch): mod = _load_hitl(monkeypatch) diff --git a/workspace-template/tests/test_security_scan.py b/workspace-template/tests/test_security_scan.py index 135d3e21..e28937f2 100644 --- a/workspace-template/tests/test_security_scan.py +++ b/workspace-template/tests/test_security_scan.py @@ -364,6 +364,48 @@ class TestScanSkillDependencies: call_kwargs = str(mock_audit.log_event.call_args) assert "skipped" in call_kwargs + def test_scan_no_scanner_fail_closed_block_raises( + self, real_security_scan, monkeypatch, tmp_path + ): + """#268 regression: fail_open_if_no_scanner=False + mode='block' must + raise SkillSecurityError when neither snyk nor pip-audit is in PATH, + instead of silently skipping. The default fail_open=True path is + covered by test_scan_no_scanner_in_path above.""" + mod, _mock_audit = real_security_scan + skill_path = tmp_path / "myskill" + skill_path.mkdir() + req = skill_path / "requirements.txt" + req.write_text("requests==2.28.0\n") + + monkeypatch.setattr(mod.shutil, "which", lambda name: None) + + with pytest.raises(mod.SkillSecurityError) as exc_info: + mod.scan_skill_dependencies( + "myskill", skill_path, "block", fail_open_if_no_scanner=False, + ) + assert "fail_open_if_no_scanner=false" in str(exc_info.value) + assert "myskill" in str(exc_info.value) + + def test_scan_no_scanner_fail_closed_warn_does_not_raise( + self, real_security_scan, monkeypatch, tmp_path + ): + """#268: fail_open_if_no_scanner=False should only raise in block mode. + In warn mode it must still return a skipped ScanResult so operators get + a warning without breaking boot.""" + mod, _mock_audit = real_security_scan + skill_path = tmp_path / "myskill" + skill_path.mkdir() + req = skill_path / "requirements.txt" + req.write_text("requests==2.28.0\n") + + monkeypatch.setattr(mod.shutil, "which", lambda name: None) + + result = mod.scan_skill_dependencies( + "myskill", skill_path, "warn", fail_open_if_no_scanner=False, + ) + assert result.scanner == "none" + assert result.scan_error is not None + def test_scan_snyk_clean(self, real_security_scan, monkeypatch, tmp_path): """shutil.which('snyk') → truthy, scanner returns clean output → clean result, audit logged.""" mod, mock_audit = real_security_scan diff --git a/workspace-template/tests/test_skills_loader.py b/workspace-template/tests/test_skills_loader.py index 3e07f218..7f2cae98 100644 --- a/workspace-template/tests/test_skills_loader.py +++ b/workspace-template/tests/test_skills_loader.py @@ -382,8 +382,8 @@ def test_load_skills_with_security_scan_available_warn_mode(tmp_path, monkeypatc monkeypatch.setattr(loader_module, "_SECURITY_SCAN_AVAILABLE", True) # Fake scan_skill_dependencies that just records calls - def fake_scan(skill_name, skill_path, mode): - scan_calls.append((skill_name, mode)) + def fake_scan(skill_name, skill_path, mode, fail_open_if_no_scanner=True): + scan_calls.append((skill_name, mode, fail_open_if_no_scanner)) # Fake SkillSecurityError class FakeSkillSecurityError(Exception): @@ -405,6 +405,7 @@ def test_load_skills_with_security_scan_available_warn_mode(tmp_path, monkeypatc assert len(scan_calls) == 1 assert scan_calls[0][0] == "my-skill" assert scan_calls[0][1] == "warn" + assert scan_calls[0][2] is True # default fail_open_if_no_scanner from SecurityScanConfig def test_load_skills_security_scan_block_mode_skips_skill(tmp_path, monkeypatch): @@ -422,7 +423,7 @@ def test_load_skills_security_scan_block_mode_skips_skill(tmp_path, monkeypatch) class FakeSkillSecurityError(Exception): pass - def blocking_scan(skill_name, skill_path, mode): + def blocking_scan(skill_name, skill_path, mode, fail_open_if_no_scanner=True): raise FakeSkillSecurityError("critical CVE found") monkeypatch.setattr(loader_module, "scan_skill_dependencies", blocking_scan, raising=False) @@ -453,7 +454,7 @@ def test_load_skills_security_scan_off_mode_skips_scan(tmp_path, monkeypatch): import skill_loader.loader as loader_module monkeypatch.setattr(loader_module, "_SECURITY_SCAN_AVAILABLE", True) - def tracking_scan(skill_name, skill_path, mode): + def tracking_scan(skill_name, skill_path, mode, fail_open_if_no_scanner=True): scan_calls.append(skill_name) class FakeSkillSecurityError(Exception): @@ -488,7 +489,7 @@ def test_load_skills_config_load_error_defaults_to_warn(tmp_path, monkeypatch): import skill_loader.loader as loader_module monkeypatch.setattr(loader_module, "_SECURITY_SCAN_AVAILABLE", True) - def tracking_scan(skill_name, skill_path, mode): + def tracking_scan(skill_name, skill_path, mode, fail_open_if_no_scanner=True): scan_modes.append(mode) class FakeSkillSecurityError(Exception): @@ -597,3 +598,46 @@ def test_load_skills_missing_skill_md_logs_warning(tmp_path, caplog): assert loaded == [] assert any("SKILL.md not found" in rec.message for rec in caplog.records) + + +def test_load_skills_fail_open_if_no_scanner_wiring(tmp_path, monkeypatch): + """#268 regression: fail_open_if_no_scanner from config is forwarded to scan_skill_dependencies. + + Previously load_skills read scan_mode from config but never read or passed + fail_open_if_no_scanner, so setting fail_open_if_no_scanner=false in + config.yaml had zero runtime effect. + """ + skill_dir = tmp_path / "skills" / "my-skill" + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text( + "---\nname: My Skill\ndescription: Test\n---\nInstructions." + ) + + scan_kwargs: list[dict] = [] + + import skill_loader.loader as loader_module + + monkeypatch.setattr(loader_module, "_SECURITY_SCAN_AVAILABLE", True) + + def capturing_scan(skill_name, skill_path, mode, fail_open_if_no_scanner=True): + scan_kwargs.append({"mode": mode, "fail_open": fail_open_if_no_scanner}) + + class FakeSkillSecurityError(Exception): + pass + + monkeypatch.setattr(loader_module, "scan_skill_dependencies", capturing_scan, raising=False) + monkeypatch.setattr(loader_module, "SkillSecurityError", FakeSkillSecurityError, raising=False) + + from config import WorkspaceConfig, SecurityScanConfig + fake_cfg = WorkspaceConfig() + fake_cfg.security_scan = SecurityScanConfig(mode="block", fail_open_if_no_scanner=False) + + with patch("skill_loader.loader.load_skill_tools", return_value=[]): + with patch("config.load_config", return_value=fake_cfg): + loader_module.load_skills(str(tmp_path), ["my-skill"]) + + assert len(scan_kwargs) == 1, "scan_skill_dependencies should have been called once" + assert scan_kwargs[0]["mode"] == "block" + assert scan_kwargs[0]["fail_open"] is False, ( + "fail_open_if_no_scanner=False from config must be forwarded to scan_skill_dependencies" + )