diff --git a/agent/curator.py b/agent/curator.py index 2eebe10e..cce3d8c1 100644 --- a/agent/curator.py +++ b/agent/curator.py @@ -387,6 +387,11 @@ CURATOR_REVIEW_PROMPT = ( " - skill_manage action=write_file — add a references/, templates/, " "or scripts/ file under an existing skill (the skill must already " "exist)\n" + " - skill_manage action=delete — archive a skill. MUST pass " + "`absorbed_into=` when you've merged its content into another " + "skill, or `absorbed_into=\"\"` when you're truly pruning with no " + "forwarding target. This drives cron-job skill-reference migration — " + "guessing from your YAML summary after the fact is fragile.\n" " - terminal — mv a sibling into the archive " "OR move its content into a support subfile\n\n" "'keep' is a legitimate decision ONLY when the skill is already a " @@ -637,15 +642,76 @@ def _parse_structured_summary( return out +def _extract_absorbed_into_declarations( + tool_calls: List[Dict[str, Any]], +) -> Dict[str, Dict[str, Any]]: + """Walk this run's tool calls and extract model-declared absorption targets. + + The curator prompt requires every ``skill_manage(action='delete')`` call + to pass ``absorbed_into=`` when consolidating, or + ``absorbed_into=""`` when truly pruning. This is the single authoritative + signal for classification — the model's own declaration at the moment of + deletion, which beats both post-hoc YAML summary parsing and substring + heuristics on other tool calls. + + Returns ``{skill_name: {"into": "" | "", "declared": True}}``. + Entries with ``into == ""`` are explicit prunings. + Skills without a ``skill_manage(delete)`` call, or with one that omitted + ``absorbed_into``, are not in the returned dict — caller falls back to + the existing heuristic/YAML logic for those (backward compat with older + curator runs and any callers that don't populate the arg). + """ + out: Dict[str, Dict[str, Any]] = {} + for tc in tool_calls or []: + if not isinstance(tc, dict): + continue + if tc.get("name") != "skill_manage": + continue + raw = tc.get("arguments") or "" + args: Dict[str, Any] = {} + if isinstance(raw, dict): + args = raw + elif isinstance(raw, str): + try: + args = json.loads(raw) + except Exception: + continue + if not isinstance(args, dict): + continue + if args.get("action") != "delete": + continue + name = args.get("name") + if not isinstance(name, str) or not name.strip(): + continue + # absorbed_into must be present (even empty string is meaningful); + # missing key means the model didn't declare intent. + if "absorbed_into" not in args: + continue + target = args.get("absorbed_into") + if target is None: + continue + if not isinstance(target, str): + continue + out[name.strip()] = {"into": target.strip(), "declared": True} + return out + + def _reconcile_classification( removed: List[str], heuristic: Dict[str, List[Dict[str, Any]]], model_block: Dict[str, List[Dict[str, str]]], destinations: Set[str], + absorbed_declarations: Optional[Dict[str, Dict[str, Any]]] = None, ) -> Dict[str, List[Dict[str, Any]]]: """Merge heuristic (tool-call evidence) with the model's structured block. - Rules: + Rules (evaluated in order; first match wins): + - **Model-declared `absorbed_into` at delete time is authoritative.** Any + entry in ``absorbed_declarations`` beats every other signal. This is + the model telling us directly, at the moment of deletion, what it did. + ``into != ""`` and target exists → consolidated. ``into == ""`` → + pruned. ``into != ""`` but target doesn't exist → hallucination; fall + through to the usual signals. - Model-declared consolidation wins when its ``into`` target exists in ``destinations`` (survived or newly-created). This gives the model authority over intent + rationale. @@ -666,6 +732,8 @@ def _reconcile_classification( model_cons = {e["from"]: e for e in model_block.get("consolidations", [])} model_pruned = {e["name"]: e for e in model_block.get("prunings", [])} + declared = absorbed_declarations or {} + consolidated: List[Dict[str, Any]] = [] pruned: List[Dict[str, Any]] = [] @@ -673,6 +741,36 @@ def _reconcile_classification( mc = model_cons.get(name) mp = model_pruned.get(name) hc = heur_cons.get(name) + dec = declared.get(name) + + # Authoritative: model declared `absorbed_into` at the delete call. + if dec is not None: + into_claim = dec.get("into", "") + if into_claim and into_claim in destinations: + entry: Dict[str, Any] = { + "name": name, + "into": into_claim, + "source": "absorbed_into (model-declared at delete)", + "reason": (mc.get("reason") or "") if mc else "", + } + if hc and hc.get("evidence"): + entry["evidence"] = hc["evidence"] + consolidated.append(entry) + continue + if into_claim == "": + # Explicit prune declaration + pruned.append({ + "name": name, + "source": "absorbed_into=\"\" (model-declared prune)", + "reason": (mp.get("reason") or "") if mp else "", + }) + continue + # into_claim is non-empty but target doesn't exist: the model + # named a nonexistent umbrella at delete time. The tool already + # rejects this at the skill_manage layer, so we shouldn't see it + # in practice — but if it slips through (e.g. the umbrella was + # deleted LATER in the same run), fall through to the usual + # signals rather than trusting a broken reference. # Model says consolidated — trust it if the destination is real. if mc and mc.get("into") in destinations: @@ -808,11 +906,20 @@ def _write_run_report( ) model_block = _parse_structured_summary(llm_meta.get("final", "") or "") destinations = set(after_names) | set(added or []) + # Authoritative signal: extract per-delete `absorbed_into` declarations + # from this run's tool calls. These beat both the YAML summary block and + # the substring heuristic — the model is telling us directly, at the + # moment of deletion, whether each archived skill was consolidated + # (into=) or pruned (into=""). + absorbed_declarations = _extract_absorbed_into_declarations( + llm_meta.get("tool_calls", []) or [] + ) classification = _reconcile_classification( removed=removed, heuristic=heuristic, model_block=model_block, destinations=destinations, + absorbed_declarations=absorbed_declarations, ) consolidated = classification["consolidated"] pruned = classification["pruned"] diff --git a/agent/curator_backup.py b/agent/curator_backup.py index 268de64f..fe749205 100644 --- a/agent/curator_backup.py +++ b/agent/curator_backup.py @@ -21,6 +21,18 @@ It DOES include: pointer — otherwise the curator would immediately re-fire on the next tick) - ``.bundled_manifest`` (so protection markers stay consistent) + +Alongside the skills tarball, each snapshot also captures a copy of +``~/.hermes/cron/jobs.json`` as ``cron-jobs.json`` when it exists. Cron +jobs reference skills by name in their ``skills``/``skill`` fields; the +curator's consolidation pass rewrites those in place via +``cron.jobs.rewrite_skill_refs()``. Without capturing the pre-run state, +rolling back the skills tree would leave cron jobs pointing at the +umbrella skills even though the narrow skills they were originally +configured with have been restored. We store the whole jobs.json for +fidelity but rollback only touches the ``skills``/``skill`` fields — the +rest (schedule, next_run_at, enabled, prompt, etc.) is live state and +we leave it alone. """ from __future__ import annotations @@ -63,6 +75,60 @@ def _skills_dir() -> Path: return get_hermes_home() / "skills" +def _cron_jobs_file() -> Path: + """Source path for the live cron jobs store (``~/.hermes/cron/jobs.json``).""" + return get_hermes_home() / "cron" / "jobs.json" + + +CRON_JOBS_FILENAME = "cron-jobs.json" + + +def _backup_cron_jobs_into(dest: Path) -> Dict[str, Any]: + """Copy the live cron jobs.json into ``dest`` as ``cron-jobs.json``. + + Returns a small dict describing what was captured so the caller can + fold it into the manifest. Never raises — if the cron file is missing + or unreadable, the return dict has ``backed_up=False`` and the reason, + and the snapshot proceeds without cron data (the snapshot is still + useful for rolling back skills). + """ + src = _cron_jobs_file() + info: Dict[str, Any] = {"backed_up": False, "jobs_count": 0} + if not src.exists(): + info["reason"] = "no cron/jobs.json present" + return info + try: + raw = src.read_text(encoding="utf-8") + except OSError as e: + logger.debug("Failed to read cron/jobs.json for backup: %s", e) + info["reason"] = f"read error: {e}" + return info + # Count jobs as a nice diagnostic — but don't fail the snapshot if the + # file is unparseable; just store the raw text and let rollback deal + # with it (or not, if it's corrupted). jobs.json wraps the list as + # `{"jobs": [...], "updated_at": ...}` — we count via that shape, and + # fall back to bare-list shape just in case the format ever changes. + try: + parsed = json.loads(raw) + if isinstance(parsed, dict): + inner = parsed.get("jobs") + if isinstance(inner, list): + info["jobs_count"] = len(inner) + elif isinstance(parsed, list): + info["jobs_count"] = len(parsed) + except (json.JSONDecodeError, TypeError): + info["jobs_count"] = 0 + info["parse_warning"] = "jobs.json was not valid JSON at snapshot time" + try: + (dest / CRON_JOBS_FILENAME).write_text(raw, encoding="utf-8") + except OSError as e: + logger.debug("Failed to write cron backup file: %s", e) + info["reason"] = f"write error: {e}" + return info + info["backed_up"] = True + return info + + def _utc_id(now: Optional[datetime] = None) -> str: """UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``.""" if now is None: @@ -116,7 +182,8 @@ def _count_skill_files(base: Path) -> int: def _write_manifest(dest: Path, reason: str, archive_path: Path, - skills_counted: int) -> None: + skills_counted: int, + cron_info: Optional[Dict[str, Any]] = None) -> None: manifest = { "id": dest.name, "reason": reason, @@ -125,6 +192,15 @@ def _write_manifest(dest: Path, reason: str, archive_path: Path, "archive_bytes": archive_path.stat().st_size, "skill_files": skills_counted, } + if cron_info is not None: + manifest["cron_jobs"] = { + "backed_up": bool(cron_info.get("backed_up", False)), + "jobs_count": int(cron_info.get("jobs_count", 0)), + } + if not cron_info.get("backed_up"): + manifest["cron_jobs"]["reason"] = cron_info.get("reason", "not captured") + if cron_info.get("parse_warning"): + manifest["cron_jobs"]["parse_warning"] = cron_info["parse_warning"] (dest / "manifest.json").write_text( json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8" ) @@ -181,7 +257,14 @@ def snapshot_skills(reason: str = "manual") -> Optional[Path]: # arcname: store paths relative to skills/ so extraction # drops cleanly back into the skills dir. tf.add(str(entry), arcname=entry.name, recursive=True) - _write_manifest(dest, reason, archive, _count_skill_files(skills)) + # Capture cron/jobs.json alongside the tarball. Never fails the + # snapshot — the skills side is the core guarantee; cron is + # additive. We still record in the manifest whether it was + # captured so rollback can surface "no cron data in this snapshot". + cron_info = _backup_cron_jobs_into(dest) + _write_manifest(dest, reason, archive, + _count_skill_files(skills), + cron_info=cron_info) except (OSError, tarfile.TarError) as e: logger.debug("Curator snapshot failed: %s", e, exc_info=True) # Clean up partial snapshot @@ -298,6 +381,149 @@ def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]: return candidates[0] if candidates else None +def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]: + """Reconcile backed-up cron skill links into the live ``cron/jobs.json``. + + We do NOT overwrite the whole cron file. Only the ``skills`` and + ``skill`` fields are restored, and only on jobs that still exist in the + current file (matched by ``id``). Everything else about the job — + schedule, next_run_at, last_run_at, enabled, prompt, workdir, hooks — + is live state that the user/scheduler has modified since the snapshot; + overwriting it would regress unrelated cron activity. + + Rules: + - Jobs present in backup AND live, with differing skills → skills restored. + - Jobs present in backup AND live, with matching skills → no-op. + - Jobs present in backup but gone from live (user deleted the job + after the snapshot) → skipped, noted in the return report. + - Jobs present in live but not in backup (user created a new cron + job after the snapshot) → left untouched. + + Never raises; failures are captured in the return dict. Writes through + ``cron.jobs`` to pick up the same lock + atomic-write path that tick() + uses, so we don't race the scheduler. + """ + report: Dict[str, Any] = { + "attempted": False, + "restored": [], + "skipped_missing": [], + "unchanged": 0, + "error": None, + } + backup_file = snapshot_dir / CRON_JOBS_FILENAME + if not backup_file.exists(): + report["error"] = f"snapshot has no {CRON_JOBS_FILENAME}" + return report + + try: + backup_text = backup_file.read_text(encoding="utf-8") + backup_parsed = json.loads(backup_text) + except (OSError, json.JSONDecodeError) as e: + report["error"] = f"failed to load backed-up jobs: {e}" + return report + # jobs.json on disk is `{"jobs": [...], "updated_at": ...}`; accept both + # that shape and a bare list for forward compat. + if isinstance(backup_parsed, dict): + backup_jobs = backup_parsed.get("jobs") + elif isinstance(backup_parsed, list): + backup_jobs = backup_parsed + else: + backup_jobs = None + if not isinstance(backup_jobs, list): + report["error"] = "backed-up cron-jobs.json has no jobs list" + return report + + # Build a lookup of the backed-up skill state keyed by job id. + # We only need the two skill-ish fields (legacy single and modern list). + backup_by_id: Dict[str, Dict[str, Any]] = {} + for job in backup_jobs: + if not isinstance(job, dict): + continue + jid = job.get("id") + if not isinstance(jid, str) or not jid: + continue + backup_by_id[jid] = { + "skills": job.get("skills"), + "skill": job.get("skill"), + "name": job.get("name") or jid, + } + + if not backup_by_id: + report["attempted"] = True # we tried but there was nothing to do + return report + + # Load and rewrite the live jobs under the scheduler's lock. + try: + from cron.jobs import load_jobs, save_jobs, _jobs_file_lock + except ImportError as e: + report["error"] = f"cron module unavailable: {e}" + return report + + report["attempted"] = True + try: + with _jobs_file_lock: + live_jobs = load_jobs() + changed = False + + live_ids = set() + for live in live_jobs: + if not isinstance(live, dict): + continue + jid = live.get("id") + if not isinstance(jid, str) or not jid: + continue + live_ids.add(jid) + + backup = backup_by_id.get(jid) + if backup is None: + continue # live job didn't exist at snapshot time + + cur_skills = live.get("skills") + cur_skill = live.get("skill") + bkp_skills = backup.get("skills") + bkp_skill = backup.get("skill") + + if cur_skills == bkp_skills and cur_skill == bkp_skill: + report["unchanged"] += 1 + continue + + # Restore. Preserve absence (don't force the key to appear + # if the backup didn't have it either). + if bkp_skills is None: + live.pop("skills", None) + else: + live["skills"] = bkp_skills + if bkp_skill is None: + live.pop("skill", None) + else: + live["skill"] = bkp_skill + + report["restored"].append({ + "job_id": jid, + "job_name": backup.get("name") or jid, + "from": {"skills": cur_skills, "skill": cur_skill}, + "to": {"skills": bkp_skills, "skill": bkp_skill}, + }) + changed = True + + # Jobs in backup but not in live = user deleted them after snapshot + for jid, backup in backup_by_id.items(): + if jid not in live_ids: + report["skipped_missing"].append({ + "job_id": jid, + "job_name": backup.get("name") or jid, + }) + + if changed: + save_jobs(live_jobs) + except Exception as e: # noqa: BLE001 — rollback must not die mid-restore + logger.debug("Cron skill-link restore failed: %s", e, exc_info=True) + report["error"] = f"restore failed mid-flight: {e}" + + return report + + + def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]: """Restore ``~/.hermes/skills/`` from a snapshot. @@ -408,8 +634,35 @@ def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path] except OSError: pass - logger.info("Curator rollback: restored from %s", target.name) - return (True, f"restored from snapshot {target.name}", target) + # Reconcile cron skill-links. Surgical: only the skills/skill fields + # on jobs matched by id. Everything else in jobs.json is live state + # (schedule, next_run_at, enabled, prompt, etc.) and we leave it + # alone. Failures here don't fail the overall rollback — the skills + # tree is already restored, which is the main guarantee. + cron_report = _restore_cron_skill_links(target) + + summary_bits = [f"restored from snapshot {target.name}"] + if cron_report.get("attempted"): + restored_n = len(cron_report.get("restored") or []) + skipped_n = len(cron_report.get("skipped_missing") or []) + if cron_report.get("error"): + summary_bits.append(f"cron links: error — {cron_report['error']}") + elif restored_n == 0 and skipped_n == 0 and cron_report.get("unchanged", 0) == 0: + # Attempted but nothing matched — empty snapshot or no overlapping ids. + pass + else: + parts = [] + if restored_n: + parts.append(f"{restored_n} job(s) had skill links restored") + if skipped_n: + parts.append(f"{skipped_n} backed-up job(s) no longer exist (skipped)") + if cron_report.get("unchanged"): + parts.append(f"{cron_report['unchanged']} already matched") + summary_bits.append("cron links: " + ", ".join(parts)) + + logger.info("Curator rollback: restored from %s (cron_report=%s)", + target.name, cron_report) + return (True, "; ".join(summary_bits), target) # --------------------------------------------------------------------------- diff --git a/hermes_cli/curator.py b/hermes_cli/curator.py index b6646d72..df69aa7d 100644 --- a/hermes_cli/curator.py +++ b/hermes_cli/curator.py @@ -302,9 +302,21 @@ def _cmd_rollback(args) -> int: print(f" reason: {manifest.get('reason', '?')}") print(f" created_at: {manifest.get('created_at', '?')}") print(f" skill files: {manifest.get('skill_files', '?')}") + cron = manifest.get("cron_jobs") or {} + if isinstance(cron, dict): + if cron.get("backed_up"): + print( + f" cron jobs: {cron.get('jobs_count', 0)} " + f"(will be restored for skill-link fields only)" + ) + else: + reason = cron.get("reason", "not captured") + print(f" cron jobs: not in snapshot ({reason})") print( "\nThis will replace the current ~/.hermes/skills/ tree (a safety " - "snapshot of the current state is taken first so this is undoable)." + "snapshot of the current state is taken first so this is undoable). " + "Cron jobs that still exist will have their skills/skill fields " + "restored from the snapshot; all other cron fields are left alone." ) if not getattr(args, "yes", False): diff --git a/tests/agent/test_curator_backup.py b/tests/agent/test_curator_backup.py index 1d906ed7..b375f986 100644 --- a/tests/agent/test_curator_backup.py +++ b/tests/agent/test_curator_backup.py @@ -314,3 +314,281 @@ def test_dry_run_skips_snapshot(backup_env, monkeypatch): assert not any(r.get("reason") == "pre-curator-run" for r in rows), ( "dry-run must not create a pre-run snapshot" ) + + +# --------------------------------------------------------------------------- +# cron-jobs backup + rollback (the part issue #18671's follow-up adds) +# --------------------------------------------------------------------------- + + +def _write_cron_jobs(home: Path, jobs: list) -> Path: + """Write a synthetic cron/jobs.json under HERMES_HOME. Returns the path. + Mirrors cron.jobs.save_jobs() wrapper shape: `{"jobs": [...], "updated_at": ...}`. + """ + cron_dir = home / "cron" + cron_dir.mkdir(parents=True, exist_ok=True) + path = cron_dir / "jobs.json" + path.write_text( + json.dumps({"jobs": jobs, "updated_at": "2026-05-01T00:00:00Z"}, indent=2), + encoding="utf-8", + ) + return path + + +def _reload_cron_jobs(home: Path): + """Reload cron.jobs so its module-level HERMES_DIR picks up the tmp HOME.""" + import hermes_constants + importlib.reload(hermes_constants) + if "cron.jobs" in sys.modules: + import cron.jobs as _cj + importlib.reload(_cj) + else: + import cron.jobs as _cj # noqa: F401 + import cron.jobs as cj + return cj + + +def test_snapshot_includes_cron_jobs(backup_env): + """With a cron/jobs.json present, snapshot writes cron-jobs.json and records it in manifest.""" + cb = backup_env["cb"] + _write_skill(backup_env["skills"], "alpha") + _write_cron_jobs(backup_env["home"], [ + {"id": "job-a", "name": "a", "schedule": "every 1h", "skills": ["alpha"]}, + {"id": "job-b", "name": "b", "schedule": "every 2h", "skill": "alpha"}, + ]) + + snap = cb.snapshot_skills(reason="test") + assert snap is not None + assert (snap / cb.CRON_JOBS_FILENAME).exists() + + mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8")) + assert mf["cron_jobs"]["backed_up"] is True + assert mf["cron_jobs"]["jobs_count"] == 2 + + +def test_snapshot_without_cron_jobs_file_still_succeeds(backup_env): + """No cron/jobs.json on disk → snapshot succeeds, manifest records absence.""" + cb = backup_env["cb"] + _write_skill(backup_env["skills"], "alpha") + # Deliberately do not create ~/.hermes/cron/jobs.json + + snap = cb.snapshot_skills(reason="test") + assert snap is not None + assert not (snap / cb.CRON_JOBS_FILENAME).exists() + + mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8")) + assert mf["cron_jobs"]["backed_up"] is False + assert "cron/jobs.json" in mf["cron_jobs"]["reason"] + + +def test_snapshot_cron_jobs_malformed_json_still_captured(backup_env): + """Malformed jobs.json is still copied to the snapshot (fidelity over + validation); the manifest notes the parse warning.""" + cb = backup_env["cb"] + _write_skill(backup_env["skills"], "alpha") + (backup_env["home"] / "cron").mkdir() + (backup_env["home"] / "cron" / "jobs.json").write_text("{oh no", encoding="utf-8") + + snap = cb.snapshot_skills(reason="test") + assert snap is not None + # Raw file was copied even though we couldn't parse it + assert (snap / cb.CRON_JOBS_FILENAME).read_text() == "{oh no" + + mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8")) + assert mf["cron_jobs"]["backed_up"] is True + assert mf["cron_jobs"]["jobs_count"] == 0 + assert "parse_warning" in mf["cron_jobs"] + + +def test_rollback_restores_cron_skill_links(backup_env): + """End-to-end: snapshot with job [alpha,beta], curator-style in-place + rewrite to [umbrella], then rollback → skills restored to [alpha,beta].""" + cb = backup_env["cb"] + home = backup_env["home"] + _write_skill(backup_env["skills"], "alpha") + _write_skill(backup_env["skills"], "beta") + _write_skill(backup_env["skills"], "umbrella") + + cj = _reload_cron_jobs(home) + cj.create_job(name="weekly", prompt="p", schedule="every 7d", + skills=["alpha", "beta"]) + + snap = cb.snapshot_skills(reason="pre-curator-run") + assert snap is not None + + # Simulate the curator's in-place cron rewrite after consolidation + cj.rewrite_skill_refs( + consolidated={"alpha": "umbrella", "beta": "umbrella"}, + pruned=[], + ) + live_after_curator = cj.load_jobs() + assert live_after_curator[0]["skills"] == ["umbrella"] + + # Now roll back + ok, msg, _ = cb.rollback(backup_id=snap.name) + assert ok, msg + assert "cron links" in msg + + live_after_rollback = cj.load_jobs() + # skills restored; legacy `skill` mirror follows first element + assert live_after_rollback[0]["skills"] == ["alpha", "beta"] + + +def test_rollback_only_touches_skill_fields(backup_env): + """Every field other than skills/skill must remain untouched across rollback. + Schedule, enabled, prompt, timestamps — all live state, hands off.""" + cb = backup_env["cb"] + home = backup_env["home"] + _write_skill(backup_env["skills"], "alpha") + + # Hand-rolled jobs.json with varied fields (no real create_job — we want + # exact field control). + _write_cron_jobs(home, [{ + "id": "stable-id", + "name": "original-name", + "prompt": "original prompt", + "schedule": "every 1h", + "skills": ["alpha"], + "enabled": True, + "last_run_at": "2026-04-01T00:00:00Z", + }]) + snap = cb.snapshot_skills(reason="pre-curator-run") + assert snap is not None + + # User/scheduler activity AFTER the snapshot: rename the job, change + # the schedule, update timestamps, and (curator) rewrite the skills list. + cj = _reload_cron_jobs(home) + jobs = cj.load_jobs() + jobs[0]["name"] = "renamed-since-snapshot" + jobs[0]["schedule"] = "every 30m" + jobs[0]["last_run_at"] = "2026-05-01T12:00:00Z" + jobs[0]["skills"] = ["umbrella"] # pretend curator did this + cj.save_jobs(jobs) + + ok, _, _ = cb.rollback(backup_id=snap.name) + assert ok + + after = cj.load_jobs() + job = after[0] + # skills: restored + assert job["skills"] == ["alpha"] + # everything else: untouched (live state preserved) + assert job["name"] == "renamed-since-snapshot" + assert job["schedule"] == "every 30m" + assert job["last_run_at"] == "2026-05-01T12:00:00Z" + assert job["prompt"] == "original prompt" + + +def test_rollback_skips_jobs_the_user_deleted(backup_env): + """If the user deleted a cron job after the snapshot, rollback must + NOT resurrect it — the user's delete is a later, explicit choice.""" + cb = backup_env["cb"] + home = backup_env["home"] + _write_skill(backup_env["skills"], "alpha") + + _write_cron_jobs(home, [ + {"id": "keep-me", "name": "keep", "schedule": "every 1h", "skills": ["alpha"]}, + {"id": "delete-me", "name": "gone", "schedule": "every 1h", "skills": ["alpha"]}, + ]) + snap = cb.snapshot_skills(reason="pre-curator-run") + + # User deletes one job after the snapshot + cj = _reload_cron_jobs(home) + cj.save_jobs([j for j in cj.load_jobs() if j["id"] != "delete-me"]) + + ok, _, _ = cb.rollback(backup_id=snap.name) + assert ok + + live_after = cj.load_jobs() + live_ids = {j["id"] for j in live_after} + assert "keep-me" in live_ids + assert "delete-me" not in live_ids # not resurrected + + +def test_rollback_leaves_new_jobs_untouched(backup_env): + """Jobs created AFTER the snapshot must pass through rollback unchanged.""" + cb = backup_env["cb"] + home = backup_env["home"] + _write_skill(backup_env["skills"], "alpha") + _write_cron_jobs(home, [ + {"id": "original", "name": "o", "schedule": "every 1h", "skills": ["alpha"]}, + ]) + snap = cb.snapshot_skills(reason="pre-curator-run") + + cj = _reload_cron_jobs(home) + jobs = cj.load_jobs() + jobs.append({"id": "new-after-snapshot", "name": "new", + "schedule": "every 15m", "skills": ["brand-new-skill"]}) + cj.save_jobs(jobs) + + ok, _, _ = cb.rollback(backup_id=snap.name) + assert ok + + live = cj.load_jobs() + by_id = {j["id"]: j for j in live} + assert "new-after-snapshot" in by_id + # New job's fields completely preserved + assert by_id["new-after-snapshot"]["skills"] == ["brand-new-skill"] + assert by_id["new-after-snapshot"]["schedule"] == "every 15m" + + +def test_rollback_with_snapshot_missing_cron_succeeds(backup_env): + """Older snapshots (created before this feature shipped) have no + cron-jobs.json. Rollback must still restore the skills tree and not + error out.""" + cb = backup_env["cb"] + home = backup_env["home"] + _write_skill(backup_env["skills"], "alpha") + + # No cron/jobs.json at snapshot time — simulates a pre-feature snapshot + snap = cb.snapshot_skills(reason="test") + assert snap is not None + assert not (snap / cb.CRON_JOBS_FILENAME).exists() + + # Later the user created a cron job + _write_cron_jobs(home, [ + {"id": "later-job", "name": "l", "schedule": "every 1h", "skills": ["x"]}, + ]) + + ok, msg, _ = cb.rollback(backup_id=snap.name) + # Main rollback still succeeds; cron report notes the missing file. + assert ok, msg + # Jobs.json untouched (nothing to restore from) + cj = _reload_cron_jobs(home) + jobs = cj.load_jobs() + assert jobs[0]["id"] == "later-job" + assert jobs[0]["skills"] == ["x"] + + +def test_restore_cron_skill_links_standalone(backup_env): + """Unit-level test on _restore_cron_skill_links without the full rollback. + Verifies the report structure carefully.""" + cb = backup_env["cb"] + home = backup_env["home"] + + # Prime a snapshot dir manually with cron-jobs.json + backups_dir = home / "skills" / ".curator_backups" / "fake-id" + backups_dir.mkdir(parents=True) + (backups_dir / cb.CRON_JOBS_FILENAME).write_text(json.dumps([ + {"id": "job-1", "name": "one", "skills": ["narrow-a", "narrow-b"]}, + {"id": "job-2", "name": "two", "skill": "legacy-single"}, + {"id": "job-gone", "name": "deleted", "skills": ["whatever"]}, + ]), encoding="utf-8") + + # Live jobs: job-1 got rewritten, job-2 unchanged, job-gone deleted + _write_cron_jobs(home, [ + {"id": "job-1", "name": "one", "skills": ["umbrella"], "schedule": "every 1h"}, + {"id": "job-2", "name": "two", "skill": "legacy-single", "schedule": "every 1h"}, + {"id": "job-new", "name": "new", "skills": ["x"], "schedule": "every 1h"}, + ]) + _reload_cron_jobs(home) + + report = cb._restore_cron_skill_links(backups_dir) + assert report["attempted"] is True + assert report["error"] is None + assert report["unchanged"] == 1 # job-2 matched + assert len(report["restored"]) == 1 # job-1 got restored + assert report["restored"][0]["job_id"] == "job-1" + assert report["restored"][0]["to"]["skills"] == ["narrow-a", "narrow-b"] + assert len(report["skipped_missing"]) == 1 + assert report["skipped_missing"][0]["job_id"] == "job-gone" diff --git a/tests/agent/test_curator_classification.py b/tests/agent/test_curator_classification.py index edf7394f..031d6652 100644 --- a/tests/agent/test_curator_classification.py +++ b/tests/agent/test_curator_classification.py @@ -548,3 +548,266 @@ def test_reconcile_model_block_visible_in_full_report(curator_env): md = (run_dir / "REPORT.md").read_text() assert "duplicate content, now a subsection" in md assert "pre-curator junk" in md + + +# --------------------------------------------------------------------------- +# _extract_absorbed_into_declarations — authoritative signal from delete calls +# --------------------------------------------------------------------------- + + +def test_extract_absorbed_into_picks_up_consolidation(curator_env): + """Delete call with absorbed_into= yields a declaration.""" + declarations = curator_env._extract_absorbed_into_declarations([ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "delete", + "name": "narrow-skill", + "absorbed_into": "umbrella", + }), + }, + ]) + assert declarations == { + "narrow-skill": {"into": "umbrella", "declared": True}, + } + + +def test_extract_absorbed_into_empty_string_is_explicit_prune(curator_env): + """absorbed_into='' is recorded as an explicit prune declaration.""" + declarations = curator_env._extract_absorbed_into_declarations([ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "delete", + "name": "stale", + "absorbed_into": "", + }), + }, + ]) + assert declarations == {"stale": {"into": "", "declared": True}} + + +def test_extract_absorbed_into_missing_arg_ignored(curator_env): + """Delete call without absorbed_into is skipped — fallback to heuristic.""" + declarations = curator_env._extract_absorbed_into_declarations([ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "delete", + "name": "legacy-skill", + }), + }, + ]) + assert declarations == {} + + +def test_extract_absorbed_into_ignores_non_delete_actions(curator_env): + """Patch, create, write_file etc. must not leak into declarations.""" + declarations = curator_env._extract_absorbed_into_declarations([ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "patch", + "name": "umbrella", + "old_string": "...", + "new_string": "...", + "absorbed_into": "something", # bogus on non-delete, must be ignored + }), + }, + ]) + assert declarations == {} + + +def test_extract_absorbed_into_accepts_dict_arguments(curator_env): + """arguments can arrive as a dict (defensive path) — still works.""" + declarations = curator_env._extract_absorbed_into_declarations([ + { + "name": "skill_manage", + "arguments": { + "action": "delete", + "name": "narrow", + "absorbed_into": "umbrella", + }, + }, + ]) + assert declarations == {"narrow": {"into": "umbrella", "declared": True}} + + +def test_extract_absorbed_into_strips_whitespace(curator_env): + declarations = curator_env._extract_absorbed_into_declarations([ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "delete", + "name": " narrow ", + "absorbed_into": " umbrella ", + }), + }, + ]) + assert declarations == {"narrow": {"into": "umbrella", "declared": True}} + + +def test_extract_absorbed_into_ignores_non_skill_manage_calls(curator_env): + declarations = curator_env._extract_absorbed_into_declarations([ + {"name": "terminal", "arguments": json.dumps({"command": "ls"})}, + {"name": "read_file", "arguments": json.dumps({"path": "/tmp/x"})}, + ]) + assert declarations == {} + + +def test_extract_absorbed_into_handles_malformed_arguments(curator_env): + """Garbage JSON in arguments must not crash the extractor.""" + declarations = curator_env._extract_absorbed_into_declarations([ + {"name": "skill_manage", "arguments": "{not json"}, + {"name": "skill_manage", "arguments": None}, + {"name": "skill_manage"}, # no arguments key at all + ]) + assert declarations == {} + + +# --------------------------------------------------------------------------- +# _reconcile_classification with absorbed_into declarations (authoritative) +# --------------------------------------------------------------------------- + + +def test_reconcile_absorbed_into_beats_everything_else(curator_env): + """Model declared absorbed_into at delete; YAML/heuristic disagree — declaration wins. + + This is the exact #18671 regression: the model forgets to emit the YAML + summary block, the heuristic's substring match misses because the + umbrella's patch content doesn't literally contain the old skill's + slug. Previously this fell through to 'no-evidence fallback' prune, + which dropped the cron ref instead of rewriting. With absorbed_into + declared, the model tells us directly. + """ + out = curator_env._reconcile_classification( + removed=["pr-review-format"], + heuristic={"consolidated": [], "pruned": [{"name": "pr-review-format"}]}, + model_block={"consolidations": [], "prunings": []}, # model forgot YAML block + destinations={"hermes-agent-dev"}, + absorbed_declarations={ + "pr-review-format": {"into": "hermes-agent-dev", "declared": True}, + }, + ) + assert len(out["consolidated"]) == 1 + assert out["pruned"] == [] + e = out["consolidated"][0] + assert e["name"] == "pr-review-format" + assert e["into"] == "hermes-agent-dev" + assert "absorbed_into" in e["source"] + + +def test_reconcile_absorbed_into_empty_is_explicit_prune(curator_env): + """absorbed_into='' takes precedence and routes to pruned, not fallback.""" + out = curator_env._reconcile_classification( + removed=["stale"], + heuristic={"consolidated": [], "pruned": [{"name": "stale"}]}, + model_block={"consolidations": [], "prunings": []}, + destinations=set(), + absorbed_declarations={ + "stale": {"into": "", "declared": True}, + }, + ) + assert out["consolidated"] == [] + assert len(out["pruned"]) == 1 + assert "model-declared prune" in out["pruned"][0]["source"] + + +def test_reconcile_absorbed_into_nonexistent_target_falls_through(curator_env): + """If the declared umbrella doesn't exist in destinations, fall through to + heuristic/YAML logic. Shouldn't happen in practice (the tool validates at + delete time) but the reconciler is defensive.""" + out = curator_env._reconcile_classification( + removed=["thing"], + heuristic={ + "consolidated": [{"name": "thing", "into": "real-umbrella", "evidence": "..."}], + "pruned": [], + }, + model_block={"consolidations": [], "prunings": []}, + destinations={"real-umbrella"}, + absorbed_declarations={ + "thing": {"into": "ghost-umbrella", "declared": True}, + }, + ) + assert len(out["consolidated"]) == 1 + assert out["consolidated"][0]["into"] == "real-umbrella" + assert "tool-call audit" in out["consolidated"][0]["source"] + + +def test_reconcile_declaration_preserves_yaml_reason(curator_env): + """When the model both declared absorbed_into AND emitted YAML with reason, + the reason carries through so REPORT.md still has it.""" + out = curator_env._reconcile_classification( + removed=["narrow"], + heuristic={"consolidated": [], "pruned": []}, + model_block={ + "consolidations": [{ + "from": "narrow", + "into": "umbrella", + "reason": "duplicate of umbrella's main content", + }], + "prunings": [], + }, + destinations={"umbrella"}, + absorbed_declarations={ + "narrow": {"into": "umbrella", "declared": True}, + }, + ) + assert len(out["consolidated"]) == 1 + e = out["consolidated"][0] + assert e["into"] == "umbrella" + assert "absorbed_into" in e["source"] + assert e["reason"] == "duplicate of umbrella's main content" + + +def test_reconcile_without_declarations_preserves_legacy_behavior(curator_env): + """Backward compat: no absorbed_declarations arg → all existing logic intact.""" + out = curator_env._reconcile_classification( + removed=["thing"], + heuristic={ + "consolidated": [{"name": "thing", "into": "umbrella", "evidence": "..."}], + "pruned": [], + }, + model_block={"consolidations": [], "prunings": []}, + destinations={"umbrella"}, + # no absorbed_declarations — defaults to None → behaves identically to pre-change + ) + assert len(out["consolidated"]) == 1 + assert out["consolidated"][0]["into"] == "umbrella" + + +def test_reconcile_mixed_declarations_and_legacy_calls(curator_env): + """Real-world run: some deletes declared absorbed_into, some didn't. + Declared ones use the authoritative path; others fall through to YAML/heuristic. + """ + out = curator_env._reconcile_classification( + removed=["declared-cons", "declared-prune", "legacy-cons", "legacy-prune"], + heuristic={ + "consolidated": [ + {"name": "legacy-cons", "into": "umbrella-a", "evidence": "..."}, + ], + "pruned": [{"name": "legacy-prune"}], + }, + model_block={"consolidations": [], "prunings": []}, + destinations={"umbrella-a", "umbrella-b"}, + absorbed_declarations={ + "declared-cons": {"into": "umbrella-b", "declared": True}, + "declared-prune": {"into": "", "declared": True}, + }, + ) + cons_by_name = {e["name"]: e for e in out["consolidated"]} + pruned_by_name = {e["name"]: e for e in out["pruned"]} + + assert "declared-cons" in cons_by_name + assert cons_by_name["declared-cons"]["into"] == "umbrella-b" + assert "absorbed_into" in cons_by_name["declared-cons"]["source"] + + assert "legacy-cons" in cons_by_name + assert cons_by_name["legacy-cons"]["into"] == "umbrella-a" + assert "tool-call audit" in cons_by_name["legacy-cons"]["source"] + + assert "declared-prune" in pruned_by_name + assert "model-declared prune" in pruned_by_name["declared-prune"]["source"] + + assert "legacy-prune" in pruned_by_name + assert "no-evidence fallback" in pruned_by_name["legacy-prune"]["source"] diff --git a/tests/tools/test_skill_manager_tool.py b/tests/tools/test_skill_manager_tool.py index 00eaf51e..004924b9 100644 --- a/tests/tools/test_skill_manager_tool.py +++ b/tests/tools/test_skill_manager_tool.py @@ -371,6 +371,57 @@ class TestDeleteSkill: _delete_skill("my-skill") assert not (tmp_path / "devops").exists() + def test_delete_with_absorbed_into_valid_target(self, tmp_path): + with _skill_dir(tmp_path): + _create_skill("umbrella", VALID_SKILL_CONTENT) + _create_skill("narrow", VALID_SKILL_CONTENT) + result = _delete_skill("narrow", absorbed_into="umbrella") + assert result["success"] is True + assert "absorbed into 'umbrella'" in result["message"] + assert not (tmp_path / "narrow").exists() + assert (tmp_path / "umbrella").exists() + + def test_delete_with_absorbed_into_empty_string_means_pruned(self, tmp_path): + with _skill_dir(tmp_path): + _create_skill("stale-skill", VALID_SKILL_CONTENT) + result = _delete_skill("stale-skill", absorbed_into="") + assert result["success"] is True + # Empty absorbed_into is explicit prune — no "absorbed into" suffix in message + assert "absorbed into" not in result["message"] + + def test_delete_with_absorbed_into_nonexistent_target_rejected(self, tmp_path): + with _skill_dir(tmp_path): + _create_skill("narrow", VALID_SKILL_CONTENT) + result = _delete_skill("narrow", absorbed_into="ghost-umbrella") + assert result["success"] is False + assert "does not exist" in result["error"] + # Skill must NOT have been deleted on validation failure + assert (tmp_path / "narrow").exists() + + def test_delete_with_absorbed_into_equals_self_rejected(self, tmp_path): + with _skill_dir(tmp_path): + _create_skill("narrow", VALID_SKILL_CONTENT) + result = _delete_skill("narrow", absorbed_into="narrow") + assert result["success"] is False + assert "cannot equal" in result["error"] + assert (tmp_path / "narrow").exists() + + def test_delete_with_absorbed_into_whitespace_only_treated_as_prune(self, tmp_path): + # Leading/trailing whitespace only: .strip() → "" → pruned path + with _skill_dir(tmp_path): + _create_skill("narrow", VALID_SKILL_CONTENT) + result = _delete_skill("narrow", absorbed_into=" ") + assert result["success"] is True + assert "absorbed into" not in result["message"] + + def test_delete_without_absorbed_into_backward_compat(self, tmp_path): + # Legacy callers that don't pass the arg still work — the curator + # reconciler falls back to its heuristic+YAML logic for such deletes. + with _skill_dir(tmp_path): + _create_skill("my-skill", VALID_SKILL_CONTENT) + result = _delete_skill("my-skill") + assert result["success"] is True + # --------------------------------------------------------------------------- # write_file / remove_file @@ -485,6 +536,25 @@ class TestSkillManageDispatcher: result = json.loads(raw) assert result["success"] is True + def test_delete_via_dispatcher_threads_absorbed_into(self, tmp_path): + # Dispatcher must plumb absorbed_into through to _delete_skill so the + # validation + message suffix paths are exercised end-to-end. + with _skill_dir(tmp_path): + skill_manage(action="create", name="umbrella", content=VALID_SKILL_CONTENT) + skill_manage(action="create", name="narrow", content=VALID_SKILL_CONTENT) + raw = skill_manage(action="delete", name="narrow", absorbed_into="umbrella") + result = json.loads(raw) + assert result["success"] is True + assert "absorbed into 'umbrella'" in result["message"] + + def test_delete_via_dispatcher_rejects_missing_absorbed_target(self, tmp_path): + with _skill_dir(tmp_path): + skill_manage(action="create", name="narrow", content=VALID_SKILL_CONTENT) + raw = skill_manage(action="delete", name="narrow", absorbed_into="ghost") + result = json.loads(raw) + assert result["success"] is False + assert "does not exist" in result["error"] + class TestSecurityScanGate: """_security_scan_skill is gated by skills.guard_agent_created config flag.""" diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index e1b9a5f0..d8d44f1a 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -560,8 +560,18 @@ def _patch_skill( } -def _delete_skill(name: str) -> Dict[str, Any]: - """Delete a skill.""" +def _delete_skill(name: str, absorbed_into: Optional[str] = None) -> Dict[str, Any]: + """Delete a skill. + + ``absorbed_into`` declares intent: + - ``None`` / missing → caller didn't declare (legacy / non-curator path); + accepted for backward compat but logs a warning because the curator + classification pipeline can't tell consolidation from pruning without it. + - ``""`` (empty) → explicit "truly pruned, no forwarding target". + - ``""`` → content was absorbed into that umbrella; the + target must exist on disk. Validated here so the model can't claim an + umbrella that doesn't exist. + """ existing = _find_skill(name) if not existing: return {"success": False, "error": f"Skill '{name}' not found."} @@ -570,6 +580,24 @@ def _delete_skill(name: str) -> Dict[str, Any]: if pinned_err: return {"success": False, "error": pinned_err} + # Validate absorbed_into target when declared non-empty + if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip(): + target_name = absorbed_into.strip() + if target_name == name: + return { + "success": False, + "error": f"absorbed_into='{target_name}' cannot equal the skill being deleted.", + } + target = _find_skill(target_name) + if not target: + return { + "success": False, + "error": ( + f"absorbed_into='{target_name}' does not exist. " + f"Create or patch the umbrella skill first, then retry the delete." + ), + } + skill_dir = existing["path"] skills_root = _containing_skills_root(skill_dir) shutil.rmtree(skill_dir) @@ -579,9 +607,13 @@ def _delete_skill(name: str) -> Dict[str, Any]: if parent != skills_root and parent.exists() and not any(parent.iterdir()): parent.rmdir() + message = f"Skill '{name}' deleted." + if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip(): + message += f" Content absorbed into '{absorbed_into.strip()}'." + return { "success": True, - "message": f"Skill '{name}' deleted.", + "message": message, } @@ -702,6 +734,7 @@ def skill_manage( old_string: str = None, new_string: str = None, replace_all: bool = False, + absorbed_into: str = None, ) -> str: """ Manage user-created skills. Dispatches to the appropriate action handler. @@ -726,7 +759,7 @@ def skill_manage( result = _patch_skill(name, old_string, new_string, file_path, replace_all) elif action == "delete": - result = _delete_skill(name) + result = _delete_skill(name, absorbed_into=absorbed_into) elif action == "write_file": if not file_path: @@ -778,6 +811,13 @@ SKILL_MANAGE_SCHEMA = { "patch (old_string/new_string — preferred for fixes), " "edit (full SKILL.md rewrite — major overhauls only), " "delete, write_file, remove_file.\n\n" + "On delete, pass `absorbed_into=` when you're merging this " + "skill's content into another one, or `absorbed_into=\"\"` when you're " + "pruning it with no forwarding target. This lets the curator tell " + "consolidation from pruning without guessing, so downstream consumers " + "(cron jobs that reference the old skill name, etc.) get updated " + "correctly. The target you name in `absorbed_into` must already " + "exist — create/patch the umbrella first, then delete.\n\n" "Create when: complex task succeeded (5+ calls), errors overcome, " "user-corrected approach worked, non-trivial workflow discovered, " "or user asks you to remember a procedure.\n" @@ -855,6 +895,20 @@ SKILL_MANAGE_SCHEMA = { "type": "string", "description": "Content for the file. Required for 'write_file'." }, + "absorbed_into": { + "type": "string", + "description": ( + "For 'delete' only — declares intent so the curator can " + "tell consolidation from pruning without guessing. " + "Pass the umbrella skill name when this skill's content " + "was merged into another (the target must already exist). " + "Pass an empty string when the skill is truly stale and " + "being pruned with no forwarding target. Omitting the arg " + "on delete is supported for backward compatibility but " + "downstream tooling (e.g. cron-job skill reference " + "rewriting) will have to guess at intent." + ) + }, }, "required": ["action", "name"], }, @@ -877,6 +931,7 @@ registry.register( file_content=args.get("file_content"), old_string=args.get("old_string"), new_string=args.get("new_string"), - replace_all=args.get("replace_all", False)), + replace_all=args.get("replace_all", False), + absorbed_into=args.get("absorbed_into")), emoji="📝", )