diff --git a/CLAUDE.md b/CLAUDE.md index aa48db12..b5cd0706 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -109,6 +109,28 @@ Shared plugins in `plugins/` are auto-loaded by every workspace: - **`ecc`**: General Claude Code guardrails - **`browser-automation`**: Puppeteer/CDP-based web scraping and live canvas screenshots (opt-in per workspace — wired into Research + UIUX roles in `org-templates/molecule-dev/org.yaml`) +**Modular guardrails** (Claude Code only — pick what you need, or install several): + +*Hook plugins (ambient enforcement at the harness layer)* +- **`molecule-careful-bash`** — REFUSES `git push --force` to main, `rm -rf` at root, `DROP TABLE` against prod schema. Ships the `careful-mode` skill as documentation. +- **`molecule-freeze-scope`** — locks edits to a single path glob via `.claude/freeze`. Useful while debugging. +- **`molecule-audit-trail`** — appends every Edit/Write to `.claude/audit.jsonl` for accountability. +- **`molecule-session-context`** — auto-loads recent cron-learnings + open PR/issue counts at session start. Pairs with `molecule-skill-cron-learnings`. +- **`molecule-prompt-watchdog`** — injects warning context when the user prompt mentions destructive keywords ("force push", "drop table", "delete all", etc). + +*Skill plugins (on-demand, via the `Skill` tool)* +- **`molecule-skill-code-review`** — 16-criteria multi-axis review. +- **`molecule-skill-cross-vendor-review`** — adversarial second-model review (use for noteworthy PRs). +- **`molecule-skill-llm-judge`** — score whether a deliverable addresses the request. +- **`molecule-skill-update-docs`** — sync repo docs after merges. +- **`molecule-skill-cron-learnings`** — defines the operational-memory JSONL format consumed by `molecule-session-context`. + +*Workflow plugins (slash commands that compose skills)* +- **`molecule-workflow-triage`** — `/triage` runs a full PR-triage cycle (gates 1–7 + code-review + merge if green). Recommends installing `molecule-skill-code-review` + `molecule-skill-cron-learnings` first. +- **`molecule-workflow-retro`** — `/retro` posts a weekly retrospective issue. Recommends `molecule-skill-cron-learnings` first. + +These are distilled from the harness-level guardrails the orchestrator uses on itself. A workspace can install one (e.g., just `molecule-careful-bash` for safety) or stack the full set for the same posture as the Molecule AI orchestrator. + ### Scripts ```bash bash scripts/setup-default-org.sh # Create PM + 3 teams (Marketing/Research/Dev) via API diff --git a/plugins/molecule-audit-trail/adapters/__init__.py b/plugins/molecule-audit-trail/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-audit-trail/adapters/claude_code.py b/plugins/molecule-audit-trail/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-audit-trail/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-audit-trail/hooks/_lib.py b/plugins/molecule-audit-trail/hooks/_lib.py new file mode 100755 index 00000000..1d0555ac --- /dev/null +++ b/plugins/molecule-audit-trail/hooks/_lib.py @@ -0,0 +1,46 @@ +"""Common helpers for Claude Code hooks. Imported by the .py hook scripts. + +Hooks receive JSON on stdin per the Claude Code hook spec, and may emit +JSON on stdout or exit with code 2 to block. This module wraps both. +""" +import json +import sys + + +def read_input() -> dict: + """Parse stdin JSON. Empty input → empty dict.""" + raw = sys.stdin.read().strip() + if not raw: + return {} + try: + return json.loads(raw) + except json.JSONDecodeError: + return {} + + +def emit(payload: dict) -> None: + """Print JSON payload to stdout for the harness to interpret.""" + print(json.dumps(payload)) + + +def deny_pretooluse(reason: str) -> None: + """Emit a PreToolUse denial with reason and exit 0.""" + emit({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": reason, + } + }) + sys.exit(0) + + +def add_context(text: str) -> None: + """Emit additionalContext for SessionStart / UserPromptSubmit hooks.""" + if text and text.strip(): + emit({"additionalContext": text}) + + +def warn_to_stderr(msg: str) -> None: + """Non-blocking warning visible to the next agent turn via stderr.""" + print(msg, file=sys.stderr) diff --git a/plugins/molecule-audit-trail/hooks/post-edit-audit.py b/plugins/molecule-audit-trail/hooks/post-edit-audit.py new file mode 100755 index 00000000..98a6a379 --- /dev/null +++ b/plugins/molecule-audit-trail/hooks/post-edit-audit.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +"""PostToolUse:Edit/Write — append one-line audit record to .claude/audit.jsonl.""" +import datetime as dt +import json +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _lib import read_input, warn_to_stderr # noqa + +REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +AUDIT = os.path.join(REPO, ".claude", "audit.jsonl") + + +def main() -> None: + data = read_input() + target = data.get("tool_input", {}).get("file_path") or data.get("tool_input", {}).get("notebook_path") or "" + if target.startswith(REPO + "/"): + target = target[len(REPO) + 1:] + + record = { + "ts": dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "tool": data.get("tool_name", "unknown"), + "file": target, + "ok": data.get("tool_response", {}).get("success", True), + } + try: + with open(AUDIT, "a") as f: + f.write(json.dumps(record) + "\n") + except Exception: + pass # never block tool execution on audit-write failure + + +if __name__ == "__main__": + try: + main() + except Exception as e: + warn_to_stderr(f"[audit hook error] {e}") + sys.exit(0) diff --git a/plugins/molecule-audit-trail/hooks/post-edit-audit.sh b/plugins/molecule-audit-trail/hooks/post-edit-audit.sh new file mode 100755 index 00000000..141ca419 --- /dev/null +++ b/plugins/molecule-audit-trail/hooks/post-edit-audit.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec python3 "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/post-edit-audit.py" diff --git a/plugins/molecule-audit-trail/plugin.yaml b/plugins/molecule-audit-trail/plugin.yaml new file mode 100644 index 00000000..814c7b0e --- /dev/null +++ b/plugins/molecule-audit-trail/plugin.yaml @@ -0,0 +1,11 @@ +name: molecule-audit-trail +version: 1.0.0 +description: Append every Edit/Write to .claude/audit.jsonl. PostToolUse hook for accountability. +author: Molecule AI +tags: [molecule, guardrails] + +runtimes: + - claude_code + +hooks: + - post-edit-audit diff --git a/plugins/molecule-audit-trail/settings-fragment.json b/plugins/molecule-audit-trail/settings-fragment.json new file mode 100644 index 00000000..9efdcf9c --- /dev/null +++ b/plugins/molecule-audit-trail/settings-fragment.json @@ -0,0 +1 @@ +{"hooks":{"PostToolUse":[{"matcher":"Edit|Write|NotebookEdit","hooks":[{"type":"command","command":"bash ${CLAUDE_DIR}/hooks/post-edit-audit.sh"}]}]}} diff --git a/plugins/molecule-careful-bash/adapters/__init__.py b/plugins/molecule-careful-bash/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-careful-bash/adapters/claude_code.py b/plugins/molecule-careful-bash/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-careful-bash/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-careful-bash/hooks/_lib.py b/plugins/molecule-careful-bash/hooks/_lib.py new file mode 100755 index 00000000..1d0555ac --- /dev/null +++ b/plugins/molecule-careful-bash/hooks/_lib.py @@ -0,0 +1,46 @@ +"""Common helpers for Claude Code hooks. Imported by the .py hook scripts. + +Hooks receive JSON on stdin per the Claude Code hook spec, and may emit +JSON on stdout or exit with code 2 to block. This module wraps both. +""" +import json +import sys + + +def read_input() -> dict: + """Parse stdin JSON. Empty input → empty dict.""" + raw = sys.stdin.read().strip() + if not raw: + return {} + try: + return json.loads(raw) + except json.JSONDecodeError: + return {} + + +def emit(payload: dict) -> None: + """Print JSON payload to stdout for the harness to interpret.""" + print(json.dumps(payload)) + + +def deny_pretooluse(reason: str) -> None: + """Emit a PreToolUse denial with reason and exit 0.""" + emit({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": reason, + } + }) + sys.exit(0) + + +def add_context(text: str) -> None: + """Emit additionalContext for SessionStart / UserPromptSubmit hooks.""" + if text and text.strip(): + emit({"additionalContext": text}) + + +def warn_to_stderr(msg: str) -> None: + """Non-blocking warning visible to the next agent turn via stderr.""" + print(msg, file=sys.stderr) diff --git a/plugins/molecule-careful-bash/hooks/pre-bash-careful.py b/plugins/molecule-careful-bash/hooks/pre-bash-careful.py new file mode 100755 index 00000000..32b61315 --- /dev/null +++ b/plugins/molecule-careful-bash/hooks/pre-bash-careful.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""PreToolUse:Bash — enforce careful-mode patterns on shell commands.""" +import sys +import os +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _lib import read_input, deny_pretooluse, warn_to_stderr # noqa + + +def main() -> None: + data = read_input() + cmd = data.get("tool_input", {}).get("command", "") + if not cmd: + return + + # REFUSE list — hard stops + refuse_patterns = [ + ("git push --force", "main", "git push --force to main is REFUSED. Use --force-with-lease on a feature branch only."), + ("git push -f", "main", "git push -f to main is REFUSED."), + ("git push --force", "master", "git push --force to master is REFUSED."), + ("git push -f", "master", "git push -f to master is REFUSED."), + ] + for needle1, needle2, msg in refuse_patterns: + if needle1 in cmd and needle2 in cmd: + deny_pretooluse(f"careful-mode: {msg}") + + if "git reset --hard" in cmd and ("origin/main" in cmd or " main" in cmd or "/main" in cmd): + deny_pretooluse("careful-mode: git reset --hard against main is REFUSED. Stash, branch, then reset.") + + # SQL DDL/DML against prod-like names + sql_destructive = ["DROP TABLE", "DROP DATABASE", "TRUNCATE TABLE"] + for tok in sql_destructive: + if tok in cmd: + # Allow against test/sandbox patterns + allow_substrings = ["_test", "sandbox", "/tmp/", "_dev", "test_"] + if not any(a in cmd for a in allow_substrings): + deny_pretooluse(f"careful-mode: '{tok}' against production-like schema is REFUSED. Use a migration with explicit review.") + + # rm -rf at scary paths + if "rm -rf" in cmd: + scary = [" /", " ~", " $HOME", "/.git ", "/.git/"] + scratch_ok = ["/tmp/", "node_modules", "dist", ".next", "__pycache__", ".pytest_cache", "coverage"] + if any(s in cmd for s in scary) and not any(s in cmd for s in scratch_ok): + # Check for migrations dir specifically + if "migrations" in cmd: + deny_pretooluse("careful-mode: rm -rf inside a migrations dir is REFUSED.") + deny_pretooluse(f"careful-mode: rm -rf at filesystem root, HOME, or .git is REFUSED. Command: {cmd[:200]}") + if "/.git" in cmd: + deny_pretooluse("careful-mode: rm -rf .git is REFUSED. Re-clone if you need a fresh repo.") + + # WARN list — log but allow + if "git push --force-with-lease" in cmd: + warn_to_stderr("[careful-mode WARN] force-with-lease: safer than --force but still rewrites remote history.") + if "gh pr close" in cmd or "gh issue close" in cmd: + warn_to_stderr("[careful-mode WARN] closing a PR/issue is irreversible from this bot's standpoint. Confirm intent.") + + +if __name__ == "__main__": + try: + main() + except Exception as e: # never break tool execution due to hook bug + warn_to_stderr(f"[careful-mode hook error] {e}") + sys.exit(0) diff --git a/plugins/molecule-careful-bash/hooks/pre-bash-careful.sh b/plugins/molecule-careful-bash/hooks/pre-bash-careful.sh new file mode 100755 index 00000000..bc152eea --- /dev/null +++ b/plugins/molecule-careful-bash/hooks/pre-bash-careful.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# PreToolUse hook for Bash. Enforces careful-mode at the harness level +# rather than relying on the agent to remember. Exit 2 / JSON deny blocks. +exec python3 "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/pre-bash-careful.py" diff --git a/plugins/molecule-careful-bash/plugin.yaml b/plugins/molecule-careful-bash/plugin.yaml new file mode 100644 index 00000000..50d1a3d5 --- /dev/null +++ b/plugins/molecule-careful-bash/plugin.yaml @@ -0,0 +1,14 @@ +name: molecule-careful-bash +version: 1.0.0 +description: Refuse destructive bash commands (git push --force to main, rm -rf at root, DROP TABLE prod). PreToolUse:Bash hook. +author: Molecule AI +tags: [molecule, guardrails] + +runtimes: + - claude_code + +skills: + - careful-mode + +hooks: + - pre-bash-careful diff --git a/plugins/molecule-careful-bash/settings-fragment.json b/plugins/molecule-careful-bash/settings-fragment.json new file mode 100644 index 00000000..f7492fbe --- /dev/null +++ b/plugins/molecule-careful-bash/settings-fragment.json @@ -0,0 +1 @@ +{"hooks":{"PreToolUse":[{"matcher":"Bash","hooks":[{"type":"command","command":"bash ${CLAUDE_DIR}/hooks/pre-bash-careful.sh"}]}]}} diff --git a/plugins/molecule-careful-bash/skills/careful-mode/SKILL.md b/plugins/molecule-careful-bash/skills/careful-mode/SKILL.md new file mode 100644 index 00000000..f336478c --- /dev/null +++ b/plugins/molecule-careful-bash/skills/careful-mode/SKILL.md @@ -0,0 +1,74 @@ +--- +name: careful-mode +description: Refuse or warn before destructive irreversible commands (rm -rf, force push, DROP TABLE, gh pr close, gh issue close, mass DELETE). Inspired by gstack's /careful and /freeze. Activate at the start of any cron tick or when about to write to shared resources. +--- + +# careful-mode + +Cron has merge authority + commit authority. That is enough rope to do permanent damage. This skill is the seatbelt. + +## Activate when + +- The hourly cron tick starts +- About to call `gh pr merge` / `gh pr close` / `gh issue close` +- About to push to a branch other than your own draft +- About to run `git push --force` for any reason +- About to run `rm -rf` on anything inside the repo +- About to issue `DROP TABLE` / `TRUNCATE` / `DELETE FROM ... WHERE` without a known small WHERE + +## Categories + +### REFUSE — hard stop + +- `git push --force` to `main`, `master`, or any protected branch +- `gh pr merge` on a PR that: + - has CI failing + - has `state: draft` + - has unresolved review comments from a non-bot author + - was created in the same conversation context (need 1 tick of distance) +- `git reset --hard` against a branch that has commits I haven't seen pushed to a remote +- `rm -rf` against any path matching `**/migrations/**`, `.git/`, `~/.molecule/`, or repo root +- `DROP TABLE`, `TRUNCATE TABLE` against any table in the molecule schema +- `DELETE FROM workspaces` without a `WHERE id = $known_uuid` clause + +### WARN — proceed only with explicit confirmation in the prompt + +- `gh pr close` on a PR not authored by me +- `gh issue close` on any issue +- `git push --force-with-lease` (safer than `--force`, still requires care) +- `rm -rf node_modules / dist /` (safe, but worth a one-line "yes I meant this") +- `chmod -R` on anything outside the current PR's diff +- Mass curl-DELETE loops over `/workspaces` (the cleanup-rogue-workspaces.sh pattern is OK but document the prefix) + +### ALLOW + +- Anything against `/tmp/`, the agent's own scratch dir, or test artifacts +- Reads of any kind +- Standard merges via `gh pr merge --merge --delete-branch` once the gates pass +- Single-row updates / deletes with explicit WHERE on a known-uuid + +## Freeze mode + +When debugging a tricky issue, lock edits to one directory. Example invocation: + +``` +careful-mode freeze platform/internal/handlers/ +# now any Edit/Write outside that path refuses +careful-mode unfreeze +``` + +This is conceptually like gstack's `/freeze` — prevents accidental scope creep when an agent is spelunking. + +## How to honor this skill + +The skill is enforced by the AGENT, not by the harness. When making a tool call that lands in the REFUSE / WARN list, the agent must: + +1. Stop +2. State the exact command + which list it falls under +3. Explain why this case is or isn't safe +4. For WARN, ask for explicit user confirmation +5. For REFUSE, decline and propose a safer alternative + +## Why this exists + +The cron has merge authority. gstack documented several near-misses where Claude wiped working directories or force-pushed to main. We avoid those by making the rules explicit and machine-readable, applied at the start of every tick. diff --git a/plugins/molecule-freeze-scope/adapters/__init__.py b/plugins/molecule-freeze-scope/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-freeze-scope/adapters/claude_code.py b/plugins/molecule-freeze-scope/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-freeze-scope/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-freeze-scope/hooks/_lib.py b/plugins/molecule-freeze-scope/hooks/_lib.py new file mode 100755 index 00000000..1d0555ac --- /dev/null +++ b/plugins/molecule-freeze-scope/hooks/_lib.py @@ -0,0 +1,46 @@ +"""Common helpers for Claude Code hooks. Imported by the .py hook scripts. + +Hooks receive JSON on stdin per the Claude Code hook spec, and may emit +JSON on stdout or exit with code 2 to block. This module wraps both. +""" +import json +import sys + + +def read_input() -> dict: + """Parse stdin JSON. Empty input → empty dict.""" + raw = sys.stdin.read().strip() + if not raw: + return {} + try: + return json.loads(raw) + except json.JSONDecodeError: + return {} + + +def emit(payload: dict) -> None: + """Print JSON payload to stdout for the harness to interpret.""" + print(json.dumps(payload)) + + +def deny_pretooluse(reason: str) -> None: + """Emit a PreToolUse denial with reason and exit 0.""" + emit({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": reason, + } + }) + sys.exit(0) + + +def add_context(text: str) -> None: + """Emit additionalContext for SessionStart / UserPromptSubmit hooks.""" + if text and text.strip(): + emit({"additionalContext": text}) + + +def warn_to_stderr(msg: str) -> None: + """Non-blocking warning visible to the next agent turn via stderr.""" + print(msg, file=sys.stderr) diff --git a/plugins/molecule-freeze-scope/hooks/pre-edit-freeze.py b/plugins/molecule-freeze-scope/hooks/pre-edit-freeze.py new file mode 100755 index 00000000..a1a9d335 --- /dev/null +++ b/plugins/molecule-freeze-scope/hooks/pre-edit-freeze.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +"""PreToolUse:Edit/Write — enforce /freeze scope from .claude/freeze.""" +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _lib import read_input, deny_pretooluse, warn_to_stderr # noqa + +REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +FREEZE = os.path.join(REPO, ".claude", "freeze") + + +def main() -> None: + if not os.path.isfile(FREEZE): + return + with open(FREEZE) as f: + allowed = f.readline().strip() + if not allowed: + return + + data = read_input() + target = data.get("tool_input", {}).get("file_path") or data.get("tool_input", {}).get("notebook_path") or "" + if not target: + return + + # Always allow .claude/ writes (so unfreeze still works) + if "/.claude/" in target or target.endswith("/.claude") or "/.claude" in target: + return + + if allowed in target: + return + + deny_pretooluse( + f"freeze: edit to {target} refused — scope locked to '{allowed}'. " + f"Remove .claude/freeze to unlock." + ) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + warn_to_stderr(f"[freeze hook error] {e}") + sys.exit(0) diff --git a/plugins/molecule-freeze-scope/hooks/pre-edit-freeze.sh b/plugins/molecule-freeze-scope/hooks/pre-edit-freeze.sh new file mode 100755 index 00000000..3ad5ce38 --- /dev/null +++ b/plugins/molecule-freeze-scope/hooks/pre-edit-freeze.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec python3 "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/pre-edit-freeze.py" diff --git a/plugins/molecule-freeze-scope/plugin.yaml b/plugins/molecule-freeze-scope/plugin.yaml new file mode 100644 index 00000000..ea71e1f1 --- /dev/null +++ b/plugins/molecule-freeze-scope/plugin.yaml @@ -0,0 +1,11 @@ +name: molecule-freeze-scope +version: 1.0.0 +description: Lock edits to a single path glob via .claude/freeze. PreToolUse:Edit/Write hook. +author: Molecule AI +tags: [molecule, guardrails] + +runtimes: + - claude_code + +hooks: + - pre-edit-freeze diff --git a/plugins/molecule-freeze-scope/settings-fragment.json b/plugins/molecule-freeze-scope/settings-fragment.json new file mode 100644 index 00000000..2a2895d1 --- /dev/null +++ b/plugins/molecule-freeze-scope/settings-fragment.json @@ -0,0 +1 @@ +{"hooks":{"PreToolUse":[{"matcher":"Edit|Write|NotebookEdit","hooks":[{"type":"command","command":"bash ${CLAUDE_DIR}/hooks/pre-edit-freeze.sh"}]}]}} diff --git a/plugins/molecule-prompt-watchdog/adapters/__init__.py b/plugins/molecule-prompt-watchdog/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-prompt-watchdog/adapters/claude_code.py b/plugins/molecule-prompt-watchdog/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-prompt-watchdog/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-prompt-watchdog/hooks/_lib.py b/plugins/molecule-prompt-watchdog/hooks/_lib.py new file mode 100755 index 00000000..1d0555ac --- /dev/null +++ b/plugins/molecule-prompt-watchdog/hooks/_lib.py @@ -0,0 +1,46 @@ +"""Common helpers for Claude Code hooks. Imported by the .py hook scripts. + +Hooks receive JSON on stdin per the Claude Code hook spec, and may emit +JSON on stdout or exit with code 2 to block. This module wraps both. +""" +import json +import sys + + +def read_input() -> dict: + """Parse stdin JSON. Empty input → empty dict.""" + raw = sys.stdin.read().strip() + if not raw: + return {} + try: + return json.loads(raw) + except json.JSONDecodeError: + return {} + + +def emit(payload: dict) -> None: + """Print JSON payload to stdout for the harness to interpret.""" + print(json.dumps(payload)) + + +def deny_pretooluse(reason: str) -> None: + """Emit a PreToolUse denial with reason and exit 0.""" + emit({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": reason, + } + }) + sys.exit(0) + + +def add_context(text: str) -> None: + """Emit additionalContext for SessionStart / UserPromptSubmit hooks.""" + if text and text.strip(): + emit({"additionalContext": text}) + + +def warn_to_stderr(msg: str) -> None: + """Non-blocking warning visible to the next agent turn via stderr.""" + print(msg, file=sys.stderr) diff --git a/plugins/molecule-prompt-watchdog/hooks/user-prompt-tag.py b/plugins/molecule-prompt-watchdog/hooks/user-prompt-tag.py new file mode 100755 index 00000000..c74e64df --- /dev/null +++ b/plugins/molecule-prompt-watchdog/hooks/user-prompt-tag.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""UserPromptSubmit — inject context warnings for destructive-keyword prompts.""" +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _lib import read_input, add_context, warn_to_stderr # noqa + +PATTERNS = [ + ( + ["force push", "force-push", "git push -f", "--force"], + "Mention of force-push detected. Confirm scope (which branch? to main? careful-mode REFUSES force to main).", + ), + ( + ["delete all", "drop all", "wipe all", "remove all", "clear all"], + "'all'-scoped destructive operation detected. Re-confirm exact target set (which workspaces / which rows / which files) before tooling.", + ), + ( + ["drop table", "truncate", "delete from", "drop database"], + "Direct SQL DDL/DML detected. Use a migration via goose or a parameterized query through platform handlers — not raw psql against prod.", + ), + ( + ["merge directly", "push to main", "commit to main", "directly to main"], + "Mention of working on main detected. Standing rule: never push to main. Use a branch + PR.", + ), +] + +CLOSE_BULK = ["close all", "close every"] +CLOSE_OBJ = ["pr", "issue", "workspace"] + + +def main() -> None: + data = read_input() + prompt = data.get("prompt", "").lower() + if not prompt: + return + + warnings = [] + for needles, msg in PATTERNS: + if any(n in prompt for n in needles): + warnings.append(f"• {msg}") + + if any(b in prompt for b in CLOSE_BULK) and any(o in prompt for o in CLOSE_OBJ): + warnings.append("• Bulk close requested. List the targets first; do NOT loop a close command.") + + if warnings: + add_context( + "## ⚠ Prompt-watchdog warnings\n\n" + + "\n".join(warnings) + + "\n\ncareful-mode applies — re-confirm scope before any destructive tool call." + ) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + warn_to_stderr(f"[prompt-tag hook error] {e}") + sys.exit(0) diff --git a/plugins/molecule-prompt-watchdog/hooks/user-prompt-tag.sh b/plugins/molecule-prompt-watchdog/hooks/user-prompt-tag.sh new file mode 100755 index 00000000..b5223051 --- /dev/null +++ b/plugins/molecule-prompt-watchdog/hooks/user-prompt-tag.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec python3 "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/user-prompt-tag.py" diff --git a/plugins/molecule-prompt-watchdog/plugin.yaml b/plugins/molecule-prompt-watchdog/plugin.yaml new file mode 100644 index 00000000..7cb8161e --- /dev/null +++ b/plugins/molecule-prompt-watchdog/plugin.yaml @@ -0,0 +1,11 @@ +name: molecule-prompt-watchdog +version: 1.0.0 +description: Inject context warnings when the user prompt mentions destructive keywords (force push, drop table, delete all). UserPromptSubmit hook. +author: Molecule AI +tags: [molecule, guardrails] + +runtimes: + - claude_code + +hooks: + - user-prompt-tag diff --git a/plugins/molecule-prompt-watchdog/settings-fragment.json b/plugins/molecule-prompt-watchdog/settings-fragment.json new file mode 100644 index 00000000..796739e2 --- /dev/null +++ b/plugins/molecule-prompt-watchdog/settings-fragment.json @@ -0,0 +1 @@ +{"hooks":{"UserPromptSubmit":[{"hooks":[{"type":"command","command":"bash ${CLAUDE_DIR}/hooks/user-prompt-tag.sh"}]}]}} diff --git a/plugins/molecule-session-context/adapters/__init__.py b/plugins/molecule-session-context/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-session-context/adapters/claude_code.py b/plugins/molecule-session-context/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-session-context/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-session-context/hooks/_lib.py b/plugins/molecule-session-context/hooks/_lib.py new file mode 100755 index 00000000..1d0555ac --- /dev/null +++ b/plugins/molecule-session-context/hooks/_lib.py @@ -0,0 +1,46 @@ +"""Common helpers for Claude Code hooks. Imported by the .py hook scripts. + +Hooks receive JSON on stdin per the Claude Code hook spec, and may emit +JSON on stdout or exit with code 2 to block. This module wraps both. +""" +import json +import sys + + +def read_input() -> dict: + """Parse stdin JSON. Empty input → empty dict.""" + raw = sys.stdin.read().strip() + if not raw: + return {} + try: + return json.loads(raw) + except json.JSONDecodeError: + return {} + + +def emit(payload: dict) -> None: + """Print JSON payload to stdout for the harness to interpret.""" + print(json.dumps(payload)) + + +def deny_pretooluse(reason: str) -> None: + """Emit a PreToolUse denial with reason and exit 0.""" + emit({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": reason, + } + }) + sys.exit(0) + + +def add_context(text: str) -> None: + """Emit additionalContext for SessionStart / UserPromptSubmit hooks.""" + if text and text.strip(): + emit({"additionalContext": text}) + + +def warn_to_stderr(msg: str) -> None: + """Non-blocking warning visible to the next agent turn via stderr.""" + print(msg, file=sys.stderr) diff --git a/plugins/molecule-session-context/hooks/session-start-context.py b/plugins/molecule-session-context/hooks/session-start-context.py new file mode 100755 index 00000000..8f418f63 --- /dev/null +++ b/plugins/molecule-session-context/hooks/session-start-context.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +"""SessionStart hook — auto-load recent cron-learnings, freeze status, +and a one-line repo snapshot into Claude's context. +""" +import os +import subprocess +import sys +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _lib import add_context, warn_to_stderr # noqa + +REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +LEARNINGS = os.path.expanduser( + "~/.claude/projects/-Users-hongming-Documents-GitHub-molecule-monorepo/memory/cron-learnings.jsonl" +) +FREEZE = os.path.join(REPO, ".claude", "freeze") + + +def tail(path: str, n: int) -> str: + if not os.path.isfile(path): + return "" + try: + with open(path) as f: + lines = f.readlines() + return "".join(lines[-n:]).rstrip() + except Exception: + return "" + + +def gh_count(args: list) -> str: + try: + out = subprocess.run( + ["gh"] + args + ["--json", "number"], + capture_output=True, text=True, timeout=4, + ) + if out.returncode != 0: + return "?" + import json + return str(len(json.loads(out.stdout or "[]"))) + except Exception: + return "?" + + +def main() -> None: + parts = [] + + learnings = tail(LEARNINGS, 20) + if learnings: + parts.append(f"## Recent cron learnings (last 20)\n{learnings}") + + if os.path.isfile(FREEZE): + try: + with open(FREEZE) as f: + frozen = f.readline().strip() + parts.append(f"## ⚠ FREEZE ACTIVE\nEdits restricted to: {frozen}\nRemove .claude/freeze to unlock.") + except Exception: + pass + + pr = gh_count(["pr", "list", "--repo", "Molecule-AI/molecule-monorepo", "--state", "open"]) + iss = gh_count(["issue", "list", "--repo", "Molecule-AI/molecule-monorepo", "--state", "open"]) + parts.append(f"## Repo state\nOpen PRs: {pr} · Open issues: {iss}") + + if parts: + add_context("\n\n".join(parts)) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + warn_to_stderr(f"[session-start hook error] {e}") + sys.exit(0) diff --git a/plugins/molecule-session-context/hooks/session-start-context.sh b/plugins/molecule-session-context/hooks/session-start-context.sh new file mode 100755 index 00000000..f0068a68 --- /dev/null +++ b/plugins/molecule-session-context/hooks/session-start-context.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec python3 "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/session-start-context.py" diff --git a/plugins/molecule-session-context/plugin.yaml b/plugins/molecule-session-context/plugin.yaml new file mode 100644 index 00000000..d1968245 --- /dev/null +++ b/plugins/molecule-session-context/plugin.yaml @@ -0,0 +1,11 @@ +name: molecule-session-context +version: 1.0.0 +description: Auto-load recent cron-learnings + repo PR/issue counts at SessionStart. Pairs well with molecule-cron-learnings. +author: Molecule AI +tags: [molecule, guardrails] + +runtimes: + - claude_code + +hooks: + - session-start-context diff --git a/plugins/molecule-session-context/settings-fragment.json b/plugins/molecule-session-context/settings-fragment.json new file mode 100644 index 00000000..1f560a18 --- /dev/null +++ b/plugins/molecule-session-context/settings-fragment.json @@ -0,0 +1 @@ +{"hooks":{"SessionStart":[{"hooks":[{"type":"command","command":"bash ${CLAUDE_DIR}/hooks/session-start-context.sh"}]}]}} diff --git a/plugins/molecule-skill-code-review/adapters/__init__.py b/plugins/molecule-skill-code-review/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-skill-code-review/adapters/claude_code.py b/plugins/molecule-skill-code-review/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-skill-code-review/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-skill-code-review/plugin.yaml b/plugins/molecule-skill-code-review/plugin.yaml new file mode 100644 index 00000000..33e7c7a5 --- /dev/null +++ b/plugins/molecule-skill-code-review/plugin.yaml @@ -0,0 +1,11 @@ +name: molecule-skill-code-review +version: 1.0.0 +description: Multi-criteria code review skill — best practices, modularity, scalability, abstraction, test coverage, redundancy, hardcoded values, type safety, performance, naming, API design, async patterns, config/env sync, template consistency, documentation alignment. +author: Molecule AI +tags: [molecule, guardrails, code-review] + +runtimes: + - claude_code + +skills: + - code-review diff --git a/plugins/molecule-skill-code-review/skills/code-review/SKILL.md b/plugins/molecule-skill-code-review/skills/code-review/SKILL.md new file mode 100644 index 00000000..a6954b04 --- /dev/null +++ b/plugins/molecule-skill-code-review/skills/code-review/SKILL.md @@ -0,0 +1,172 @@ +--- +name: code-review +description: "Review code for best practices, modularity, scalability, abstraction, test coverage, redundancy, hardcoded values, type safety, performance, naming, API design, async patterns, config/env sync, template consistency, and documentation alignment. Generates detailed report with issues and recommendations." +--- + +# Code Review + +Perform a comprehensive code review of recent changes or specified files to ensure quality standards. + +## Review Criteria + +### 1. Best Practices +- Follows TypeScript strict mode conventions +- Proper error handling (try/catch, error types, no silent failures) +- No hardcoded values (use environment variables or constants) +- Proper logging with appropriate log levels +- Security best practices (input validation, no SQL injection, XSS prevention) +- No console.log in production code (use logger) + +### 2. Modularity +- Single responsibility principle (each function/class does one thing) +- Functions are small and focused (< 50 lines ideally) +- No code duplication (DRY principle) +- Clear separation of concerns (routes, services, utilities) + +### 3. Scalability +- Efficient database queries (proper indexing, no N+1 queries) +- Connection pooling used correctly +- Async operations handled properly +- No blocking operations in hot paths + +### 4. Abstraction +- Interfaces/types defined for all public APIs +- Implementation details hidden behind abstractions +- Adapter pattern used for external services (LLM, database) +- Configuration externalized (not hardcoded) + +### 5. Test Coverage +- Unit tests exist for all utility functions and service functions +- Service layer has integration tests +- Edge cases are covered +- Test files go in `tests/unit/` or `tests/integration/`, named `*.test.ts` +- All exported functions have at least one test + +### 6. No Redundancy +- No duplicate code blocks (extract to shared functions/utilities) +- No repeated logic across files (consolidate into services) +- No redundant imports or unused variables +- No copy-pasted code with minor variations (use parameters/generics) +- No redundant API calls (cache or batch where appropriate) +- No repeated validation logic (create reusable validators) +- No duplicate helper logic in test files (extract shared test utilities) + +### 7. No Hardcoded Values +- No hardcoded URLs, API endpoints, or hostnames (use env vars) +- No hardcoded credentials, keys, or secrets (use env vars) +- No magic numbers without named constants +- No hardcoded file paths (use configuration or path utilities) +- No hardcoded timeouts/limits (externalize to config) +- No hardcoded error messages (use constants or i18n) +- No hardcoded feature flags (use configuration system) +- No hardcoded tenant/user IDs in business logic + +### 8. Type Safety +- No usage of `any` type (use `unknown` or proper types) +- Proper null/undefined handling (optional chaining, nullish coalescing) +- Generic types used appropriately +- Return types explicitly declared for public functions +- No type assertions (`as`) without validation + +### 9. Performance +- No memory leaks (cleanup subscriptions, timers, event listeners) +- Proper memoization for expensive computations +- Lazy loading for heavy components/modules +- Efficient data structures for the use case +- No synchronous operations blocking the event loop +- Batch API calls where possible (e.g., single `messages.modify` with multiple label IDs) + +### 10. Naming & Readability +- Descriptive variable/function names (no `x`, `temp`, `data`) +- Consistent naming conventions (camelCase, PascalCase) +- No misleading names (function does what name suggests) +- Boolean variables prefixed appropriately (`is`, `has`, `should`) +- No excessive abbreviations +- Code is self-documenting where possible + +### 11. API Design +- Consistent response formats across endpoints +- Proper HTTP status codes used +- Input validation at API boundaries +- Proper error response structure +- RESTful conventions followed +- API versioning considered for breaking changes + +### 12. Async & Concurrency +- No unhandled promise rejections +- Proper race condition handling +- Concurrent operations use Promise.all where appropriate +- No floating promises (missing await) +- Proper cleanup on component unmount/request abort +- AbortController used for cancellable operations + +### 13. Dependency Management +- No unused dependencies in package.json +- No deprecated packages +- Security vulnerabilities addressed (npm audit) +- Peer dependency conflicts resolved +- Dependencies pinned to specific versions where needed + +### 14. Environment & Configuration Sync +- Every env var used in `src/config/env.ts` is documented in `.env.example` +- Every env var in `.env.example` is defined in the Zod schema (`src/config/env.ts`) +- Default values match between `.env.example` comments and Zod `.default()` calls +- Conditional requirements are documented (e.g., "only required when LLM_PROVIDER=openai") +- No env vars referenced directly via `process.env` outside of `src/config/env.ts` and `src/lib/logger.ts` +- `docker-compose.yml` service ports/URLs align with `.env.example` defaults +- `Dockerfile` exposes the correct `PORT` matching `.env.example` +- `docs/railway-deployment.md` env var list matches the Zod schema + +### 15. Template & Documentation Consistency +- Email templates in `docs/templates/` have all `{{variable}}` placeholders documented in their "Available Variables" table +- Template variable sources match actual database columns and service outputs +- Classification categories in `docs/classification-design.md` match the `EmailCategory` type in `src/types/email.ts` +- Confidence thresholds in docs match the actual thresholds implemented in code +- Sub-types in docs match the template trigger conditions +- Gmail label names in code (`GmailLabel` const) match labels documented in architecture docs +- API endpoint schemas in `docs/api-spec.md` match actual route handler request/response types +- Error handling strategies in `docs/error-handling.md` match actual retry/error class behavior (e.g., `isRetryable` flags) + +### 16. Error Messages & UX +- User-friendly error messages (no technical jargon) +- Loading states for async operations +- Empty states handled gracefully +- Graceful degradation when features fail +- Confirmation for destructive actions +- Success feedback for completed actions +- Error boundaries to prevent full app crashes +- Proper form validation with clear feedback + +## Output Format + +```markdown +## Code Review Report + +### Files Reviewed +- List of files + +### Issues Found + +#### 🔴 Critical +- [file:line] Description - Recommendation + +#### 🟡 Warning +- [file:line] Description - Recommendation + +#### 🔵 Suggestions +- [file:line] Description - Recommendation + +### Config & Template Sync +- .env.example ↔ env.ts schema: [in sync / N mismatches] +- docs/classification-design.md ↔ src/types/email.ts: [in sync / N mismatches] +- docs/templates/ ↔ template variables: [in sync / N mismatches] +- docs/error-handling.md ↔ src/lib/errors.ts: [in sync / N mismatches] + +### Test Coverage +- Files missing tests +- Coverage gaps + +### Summary +- Total issues count +- Action items +``` diff --git a/plugins/molecule-skill-cron-learnings/adapters/__init__.py b/plugins/molecule-skill-cron-learnings/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-skill-cron-learnings/adapters/claude_code.py b/plugins/molecule-skill-cron-learnings/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-skill-cron-learnings/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-skill-cron-learnings/plugin.yaml b/plugins/molecule-skill-cron-learnings/plugin.yaml new file mode 100644 index 00000000..b70de024 --- /dev/null +++ b/plugins/molecule-skill-cron-learnings/plugin.yaml @@ -0,0 +1,11 @@ +name: molecule-skill-cron-learnings +version: 1.0.0 +description: Defines the per-tick operational-memory JSONL format used by automated cron loops. End each cron tick by appending 1-3 learning lines; replay at next tick start. Pairs with molecule-session-context for auto-loading. +author: Molecule AI +tags: [molecule, guardrails, memory] + +runtimes: + - claude_code + +skills: + - cron-learnings diff --git a/plugins/molecule-skill-cron-learnings/skills/cron-learnings/SKILL.md b/plugins/molecule-skill-cron-learnings/skills/cron-learnings/SKILL.md new file mode 100644 index 00000000..bdbf9cda --- /dev/null +++ b/plugins/molecule-skill-cron-learnings/skills/cron-learnings/SKILL.md @@ -0,0 +1,60 @@ +--- +name: cron-learnings +description: At the end of every cron tick, append 1-3 lines of operational learnings (what worked, what surprised, what should change next tick) to a per-project JSONL. Replay at start of next tick. Inspired by gstack's /learn skill. +--- + +# cron-learnings + +Each tick, the cron does a lot of work. Half the lessons are forgotten by the next tick. This skill is the compounding layer. + +## Storage + +Per-project file at: +``` +~/.claude/projects//memory/cron-learnings.jsonl +``` + +For molecule-monorepo, that's: +``` +~/.claude/projects/-Users-hongming-Documents-GitHub-molecule-monorepo/memory/cron-learnings.jsonl +``` + +One JSON object per line: +```json +{"ts": "2026-04-14T05:17:00Z", "tick_id": "5939aa3f-001", "category": "gate-fail", "summary": "Gate 4 (security) flagged token!=secret in PR #28; requireInternalAPISecret needs subtle.ConstantTimeCompare", "next_action": "When reviewing auth-gate code, grep for `subtle.ConstantTimeCompare`. Flag plain == on tokens."} +``` + +Categories: +- `gate-fail` — a verification gate caught something +- `mechanical-fix` — fixed a gate failure on-branch +- `false-positive` — a code-review finding turned out to be wrong; record so we don't keep flagging it +- `tool-error` — an MCP tool / CLI flaked; note the workaround +- `repo-state` — something about the repo's state that next tick should know +- `pattern` — a cross-PR pattern worth remembering (e.g., "every cron loop adds itself as `noreply@anthropic.com`; reviewers OK with it") + +## When to write + +End of every cron tick (Step 5 of the cron prompt). 1-3 lines max — be terse. + +## When to read + +Start of every cron tick. Read the last 20 lines (most recent first) before Step 1. Use them to: +- Skip false-positive paths the previous tick flagged +- Apply learned patterns (e.g., "PR #28 found INTERNAL_API_SECRET missing from .env.example — when reviewing future security PRs, always check .env.example sync as a first move") +- Avoid re-litigating decided design choices + +## Pruning + +Cap at 500 lines. When exceeded, the next write also drops the oldest 100 lines. The point is recent operational memory, not an audit log. + +## Format discipline + +- One line per event +- ASCII-only for grep-friendliness +- No PII, no tokens, no URLs with auth +- `summary` is what HAPPENED; `next_action` is what FUTURE-YOU should DO +- If you can't think of a concrete next_action, it's not worth logging + +## Why this exists + +gstack's `/learn` showed that AI sessions repeatedly make the same mistakes because the lessons live only in the conversation that produced them. Writing them to disk lets every tick start with the accumulated wisdom of every prior tick, at zero cost. The awareness MCP we have is fine for cross-session human/agent memory — this file is specifically for the cron's own automation. diff --git a/plugins/molecule-skill-cross-vendor-review/adapters/__init__.py b/plugins/molecule-skill-cross-vendor-review/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-skill-cross-vendor-review/adapters/claude_code.py b/plugins/molecule-skill-cross-vendor-review/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-skill-cross-vendor-review/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-skill-cross-vendor-review/plugin.yaml b/plugins/molecule-skill-cross-vendor-review/plugin.yaml new file mode 100644 index 00000000..0f131380 --- /dev/null +++ b/plugins/molecule-skill-cross-vendor-review/plugin.yaml @@ -0,0 +1,11 @@ +name: molecule-skill-cross-vendor-review +version: 1.0.0 +description: Run an adversarial code review against a non-Claude model (Codex / GPT / Gemini) and surface disagreements with Claude's own review. Use ONLY for noteworthy PRs (auth, billing, data-deletion, irreversible migration). +author: Molecule AI +tags: [molecule, guardrails, code-review, security] + +runtimes: + - claude_code + +skills: + - cross-vendor-review diff --git a/plugins/molecule-skill-cross-vendor-review/skills/cross-vendor-review/SKILL.md b/plugins/molecule-skill-cross-vendor-review/skills/cross-vendor-review/SKILL.md new file mode 100644 index 00000000..28ae30f7 --- /dev/null +++ b/plugins/molecule-skill-cross-vendor-review/skills/cross-vendor-review/SKILL.md @@ -0,0 +1,71 @@ +--- +name: cross-vendor-review +description: Run an adversarial code review against a non-Claude model (Codex / GPT / Gemini) and surface disagreements with Claude's own review. Use ONLY for noteworthy PRs (auth, billing, data-deletion, irreversible migration, large-blast-radius). Inspired by gstack's /codex command. +--- + +# cross-vendor-review + +Two LLMs catch bugs one doesn't. Claude has blind spots; so does GPT-5; so does Gemini. For high-stakes PRs the cost of a second model is dwarfed by the cost of a missed defect. + +## When to invoke + +ALWAYS for PRs touching: +- Authentication, authorization, session, or token handling +- Billing / payments / Stripe / metering +- Destructive operations (delete cascades, mass-update, drop) +- Database migrations (schema changes, data backfills) +- Cross-tenant isolation logic +- Cryptographic primitives + +OPTIONAL for: +- Large refactors (>500 LOC) +- Performance-sensitive changes +- Anything where the cron's standard code-review skill returned conflicting signals + +NEVER for: +- Docs, templates, CI tweaks, dependency bumps, test-only changes + +## How to invoke + +1. Pull the diff: `gh pr diff N --repo OWNER/REPO` +2. Run Claude's own code-review skill first; capture its findings +3. Send the SAME diff + the SAME rubric to a second model: + - Preferred order: GPT-5 (via Codex CLI or API), Gemini Pro 2.5, Llama 3.3 70B + - One-shot prompt; no conversation + - Instruct the second model to be ADVERSARIAL: assume the diff has at least one bug and find it +4. Compare the two reports. For each finding: + - Both flag it → real, must address + - Only Claude → likely real, address or justify dismissal + - Only second model → may be real, investigate + - Both clean → ok to merge + +## Output format + +``` +## Cross-vendor review for PR #N + +| Finding | Claude | <2nd model> | Verdict | +|---|---|---|---| +| Token compared with == not constant-time | 🔴 | 🔴 | MUST FIX | +| ctx not propagated through goroutine | 🟡 | — | SHOULD FIX | +| — | — | 🟡 stale jwt cache on revoke | INVESTIGATE | + +## Disagreements +- Claude said X; said Y. Resolution: ... + +## Verdict +- ☐ Merge (both clean) +- ☐ Address findings then re-review +- ☐ Escalate to CEO (irreconcilable models) +``` + +## Cost guard + +Cross-vendor calls cost real money. Cap: +- One pass per PR per session +- Skip if the noteworthy-flag is uncertain (default: no second model) +- Log per-tick spend in the cron telemetry channel + +## Why this exists + +gstack's `/codex` showed that single-model review misses ~15-30% of real findings catchable by a different vendor. Auth bugs are precisely the class where blind spots are catastrophic. This skill formalizes the pattern. diff --git a/plugins/molecule-skill-llm-judge/adapters/__init__.py b/plugins/molecule-skill-llm-judge/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-skill-llm-judge/adapters/claude_code.py b/plugins/molecule-skill-llm-judge/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-skill-llm-judge/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-skill-llm-judge/plugin.yaml b/plugins/molecule-skill-llm-judge/plugin.yaml new file mode 100644 index 00000000..063a18b0 --- /dev/null +++ b/plugins/molecule-skill-llm-judge/plugin.yaml @@ -0,0 +1,11 @@ +name: molecule-skill-llm-judge +version: 1.0.0 +description: Cheap LLM-as-judge gate that catches "agent shipped the wrong thing". Scores whether a deliverable (PR diff, A2A response, generated config) actually addresses the original request — the failure mode unit tests miss. +author: Molecule AI +tags: [molecule, guardrails, evaluation] + +runtimes: + - claude_code + +skills: + - llm-judge diff --git a/plugins/molecule-skill-llm-judge/skills/llm-judge/SKILL.md b/plugins/molecule-skill-llm-judge/skills/llm-judge/SKILL.md new file mode 100644 index 00000000..fca14b6d --- /dev/null +++ b/plugins/molecule-skill-llm-judge/skills/llm-judge/SKILL.md @@ -0,0 +1,75 @@ +--- +name: llm-judge +description: Evaluate whether a Molecule AI agent's output (a PR, a delegation result, a generated config) actually addresses the original request. Cheap LLM-as-judge gate that catches "wrong answer to right question" — the failure mode unit tests miss. Inspired by gstack's tier-3 LLM-as-judge test infra. +--- + +# llm-judge + +Unit tests verify the code RAN. They don't verify it did the RIGHT THING for the customer's actual request. This skill closes that gap. + +## When to invoke + +After a Molecule AI agent (PM, Dev Lead, QA, etc.) produces a deliverable: +- A PR they opened in response to an issue +- A delegation result (response to an A2A `message/send`) +- A generated config or template +- A code review they posted + +Specifically: when a worker agent comes back with "done", before we believe them. + +## Inputs + +1. The ORIGINAL request — the issue body, the user message, the delegation prompt +2. The DELIVERABLE — the diff, the response text, the generated artifact +3. ACCEPTANCE CRITERIA if explicit (often in the issue body) + +## How to evaluate + +Send to a small fast model (Haiku, GPT-mini, Gemini Flash): + +``` +You are an evaluator. Below is a customer request and the deliverable +the AI agent produced. Rate, on a 0-5 scale, how well the deliverable +addresses the original request. Then list the top 3 reasons for the score. + +REQUEST: + + +DELIVERABLE: + + +ACCEPTANCE CRITERIA (if any): + + +Output JSON: +{ + "score": 0..5, + "addresses_request": true|false, + "missing": ["...", "..."], + "wrong": ["...", "..."], + "reasons": ["...", "...", "..."] +} +``` + +## Decision + +| Score | Action | +|---|---| +| 5 | Accept — log to telemetry | +| 4 | Accept with note — file a follow-up issue for the gap if material | +| 3 | Send back to the agent for revision with the judge's "missing" list | +| 0–2 | Reject. Escalate to CEO. Likely the agent misunderstood the task — fixing the prompt > fixing the deliverable | + +## Cost + +Tier-3 (Haiku-class): ~$0.001 per eval. Even at 100 evals/day = $0.10/day. Negligible. + +## Where to plug it in + +- **Cron Step 4 (issue pickup)**: after a draft PR is opened by a subagent, run llm-judge against the issue body. Mark the PR ready ONLY if score >= 4. +- **A2A delegation in workspaces**: optionally enable per-org. PM gets the worker's response, runs llm-judge, only forwards to the next stage if accepted. +- **Manual**: `npm run skill:llm-judge -- --request --deliverable ` + +## Why this exists + +gstack runs LLM-as-judge as a test-tier ($0.15 per eval, ~30s). Our worker agents produce many more deliverables per day than gstack's single-session model — making the eval cheaper and more frequent matches our scale. The failure mode this catches — "agent shipped the wrong thing" — is invisible to unit tests AND to code-review skills (both verify the code, not the intent). diff --git a/plugins/molecule-skill-update-docs/adapters/__init__.py b/plugins/molecule-skill-update-docs/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-skill-update-docs/adapters/claude_code.py b/plugins/molecule-skill-update-docs/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-skill-update-docs/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-skill-update-docs/plugin.yaml b/plugins/molecule-skill-update-docs/plugin.yaml new file mode 100644 index 00000000..15a6be0f --- /dev/null +++ b/plugins/molecule-skill-update-docs/plugin.yaml @@ -0,0 +1,11 @@ +name: molecule-skill-update-docs +version: 1.0.0 +description: Review recent edits and update all documentation (architecture docs, API specs, edit history, README mirrors). Creates missing docs for new implementations. +author: Molecule AI +tags: [molecule, guardrails, documentation] + +runtimes: + - claude_code + +skills: + - update-docs diff --git a/plugins/molecule-skill-update-docs/skills/update-docs/SKILL.md b/plugins/molecule-skill-update-docs/skills/update-docs/SKILL.md new file mode 100644 index 00000000..459b89f9 --- /dev/null +++ b/plugins/molecule-skill-update-docs/skills/update-docs/SKILL.md @@ -0,0 +1,89 @@ +--- +name: update-docs +description: "Review recent edits and update all documentation including architecture docs, API specs, and edit history. Creates missing docs for new implementations." +--- + +# Update Documentation + +Review recent code changes and update ALL relevant documentation in the `/docs` folder. + +## Steps + +1. **Read today's edit history** + + - Check `docs/edit-history/` for the current date's session file + - Identify all files that were modified + +2. **Analyze changes** + + - Read the modified files to understand what changed + - Categorize changes: new features, bug fixes, architecture changes, API changes, config changes + +3. **Update edit-history session file** + + - Add a summary section at the top describing what was accomplished + - Group related changes under descriptive headings + - Add any missing context about why changes were made + +4. **Update CLAUDE.md if needed** + + - New commands or scripts added + - Architecture or key modules changed + - New environment variables required + - New routes or endpoints added + - Test counts when new test files were added + +5. **Update PLAN.md (repo root) if needed** + + - When a planned phase ships, mark it complete and add any follow-ups + - When new architectural decisions are made, update the relevant phase + - Keep the current status / next steps section in sync with reality + - If a feature was reverted, document the reversal and reasoning + +6. **Update README.md (repo root) if needed** + + - New features visible to users (canvas tabs, deploy flows, etc.) + - Changed setup or quickstart instructions + - Updated tech stack list (when adding/removing major dependencies) + - Updated test counts in the status badges + - License or branding changes + +7. **Update README.zh-CN.md (repo root) if README.md was updated** + + - Mirror any user-visible changes from README.md + - Keep the Chinese translation in sync — don't let it drift + - Update the same sections in both files (status, features, setup, license) + +8. **Update .env.example (repo root) if needed** + + - Every new env var read by code must be documented in `.env.example` + - Include a comment describing the var and its expected format + - When removing an env var from code, remove from `.env.example` + - Keep default values consistent with code defaults + +9. **Update docs/README.md if needed** + + - New features or capabilities + - Changed setup instructions + - Updated project overview + +10. **Update docs/ files** + Review and update all architecture documentation to match current implementation + + **For each doc:** + + - Check if documented features match actual code implementation + - Update outdated sections to reflect current code + - Add NEW sections for features that are implemented but not documented + - Remove or mark deprecated features that no longer exist + - Ensure code examples match actual implementation + +11. **Create new docs if needed** + + - If a significant new feature or module was added but has no documentation, create appropriate documentation + - Follow existing documentation style and structure + +12. **Report summary** + - List all documentation files updated + - Note any new documentation files created + - Summarize key changes documented diff --git a/plugins/molecule-workflow-retro/adapters/__init__.py b/plugins/molecule-workflow-retro/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-workflow-retro/adapters/claude_code.py b/plugins/molecule-workflow-retro/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-workflow-retro/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-workflow-retro/commands/retro.md b/plugins/molecule-workflow-retro/commands/retro.md new file mode 100644 index 00000000..78f11bc6 --- /dev/null +++ b/plugins/molecule-workflow-retro/commands/retro.md @@ -0,0 +1,41 @@ +--- +name: retro +description: Generate a weekly retrospective digest — PRs merged, gate failures, code-review severity trend, time-to-merge, issues picked up. Posts as a GitHub issue. +--- + +# /retro + +Weekly retrospective on cron + agent activity. Default cadence: Sundays +23:00 local. Manual invocation on demand. + +## Steps + +1. Compute over the prior 7 days: + - Merged PR count (total + by category) + - Issues closed (with PR-link for each) + - Time-to-merge: median, p90, max — exclude docs PRs + - Gate failure breakdown (which gates, how often) + - Code-review findings: total 🔴/🟡/🔵, trend vs prior week + - Mechanical fixes pushed (count) + - Skips by reason: design-judgment / CI-down / scope-too-open / noteworthy-CEO-needed + - Code volume: net LOC added/removed + - Test count delta (Go + Python + Vitest + Jest) + - New runtime / library / tool added or removed + +2. Format per the `cron-retro` skill template. + +3. Post as a new GitHub issue titled + `Cron retro: (week N)` with labels `meta`, `retro`. + +4. If trends are bad (gate failure rate up, 🔴 findings appearing, + time-to-merge >50% increase), flag prominently in the body and + @-mention the workspace owner. + +5. Skip new-issue creation if the prior 7 days had < 3 merged PRs; + post a one-liner in the latest weekly retro issue's comments instead. + +## Standing rules +- careful-mode applies — don't mass-close stale issues, don't delete + prior retros +- The retro is observational, not actionable — propose 2-3 follow-ups + for the user but never auto-create them diff --git a/plugins/molecule-workflow-retro/plugin.yaml b/plugins/molecule-workflow-retro/plugin.yaml new file mode 100644 index 00000000..58ad9933 --- /dev/null +++ b/plugins/molecule-workflow-retro/plugin.yaml @@ -0,0 +1,14 @@ +name: molecule-workflow-retro +version: 1.0.0 +description: Provides /retro slash command — weekly retrospective generator. Recommends installing molecule-skill-cron-learnings first. +author: Molecule AI +tags: [molecule, guardrails] + +runtimes: + - claude_code + +skills: + - cron-retro + +commands: + - retro diff --git a/plugins/molecule-workflow-retro/skills/cron-retro/SKILL.md b/plugins/molecule-workflow-retro/skills/cron-retro/SKILL.md new file mode 100644 index 00000000..579ae3ec --- /dev/null +++ b/plugins/molecule-workflow-retro/skills/cron-retro/SKILL.md @@ -0,0 +1,69 @@ +--- +name: cron-retro +description: Weekly retrospective digest of cron activity — PRs merged, gates failed, issues picked, code-review findings by severity, time-to-merge, regression trend. Posts to a dedicated GitHub issue. Inspired by gstack's /retro. +--- + +# cron-retro + +The cron runs hourly and ships a lot. Without a periodic summary, drift happens silently — Gate 4 starts failing more often, code-review noise climbs, time-to-merge balloons, and nobody notices for weeks. + +## When to run + +- Every Sunday at 23:00 local (`0 23 * * 0` cron expression) +- On-demand by the CEO + +## What to compute (over the prior 7 days) + +From `gh pr list --state merged --search "merged:>=YYYY-MM-DD"` and our local `cron-learnings.jsonl`: + +1. **Merged PR count** — total + by category (auth/security, refactor, feat, fix, docs, infra) +2. **Issues closed** — count, with PR-link for each +3. **Time-to-merge distribution** — median, p90, max. Excluding docs PRs (they merge instantly). +4. **Gate failure breakdown** — which gates failed how often. Patterns? +5. **Code-review findings** — total 🔴 / 🟡 / 🔵 across all PRs. Trend vs prior week. +6. **Mechanical fixes pushed** — how often did the cron fix a gate failure on-branch? +7. **Skips by reason** — categorize: design-judgment, CI-down, scope-too-open, noteworthy-CEO-needed +8. **Code volume** — net LOC added/removed (Garry Tan publishes these in his retros — keep us honest) +9. **Test count delta** — Go + Python + Vitest + Jest from start to end of week +10. **New runtime / library / tool added or removed** — anything strategic + +## Format + +Post a new GitHub issue titled `Cron retro: 2026-04-14 → 2026-04-21 (week N)` with body: + +```markdown +# Week summary +- Merged: X PRs (Y closed issues) +- Median TTM: 3h12m (excluding docs) +- Code-review findings: 0 🔴 / 4 🟡 / 18 🔵 (vs last week: 0 / 6 / 24) +- Mechanical fixes pushed: 5 +- Skips: 2 design-judgment, 1 CI-down + +# Trend signals +- ↑ Frontend test coverage (+12 vitest, +1 file) +- ↓ Time-to-merge for auth PRs (down from 8h median to 3h — likely + because Gate-4 doc-sync subagent now catches missing .env entries) +- ⚠ Gate 7 (Playwright) failed 3 times this week vs 0 last week — + probably the canvas dev-server stale-chunk issue. Action item. + +# Code volume +- 12,847 lines added, 8,213 removed across 23 commits + +# Notes +- Closed #6, #13, #17, #23 — 4 issues from the launch backlog +- 2 issues remain in the SaaS-launch Tier 1 list (multi-tenancy, Fly Machines) +- New skills added this week: cross-vendor-review, careful-mode, cron-learnings, cron-retro + +# Action items for next week +- [ ] Investigate Gate 7 flakes (likely fix: persistent canvas dev daemon) +- [ ] Pick up issue #19 (workspace restart context) +- [ ] PR #58 needs CEO review (configurable tier limits — behavior change) +``` + +## Why this exists + +What gets measured improves. gstack publishes weekly retros and credits them with knowing where to invest. We have no analog. This is the smallest viable analog: one issue per week, generated automatically, costs nothing to ignore, valuable when the metrics start drifting. + +## Implementation note + +This skill should be invoked from a separate cron job (not the hourly triage cron). Suggested cron expression: `7 23 * * 0` — Sunday 23:07 local. diff --git a/plugins/molecule-workflow-triage/adapters/__init__.py b/plugins/molecule-workflow-triage/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/plugins/molecule-workflow-triage/adapters/claude_code.py b/plugins/molecule-workflow-triage/adapters/claude_code.py new file mode 100644 index 00000000..cc589931 --- /dev/null +++ b/plugins/molecule-workflow-triage/adapters/claude_code.py @@ -0,0 +1,2 @@ +"""Claude Code adaptor — uses the generic rule+skill+hooks installer.""" +from plugins_registry.builtins import AgentskillsAdaptor as Adaptor # noqa: F401 diff --git a/plugins/molecule-workflow-triage/commands/triage.md b/plugins/molecule-workflow-triage/commands/triage.md new file mode 100644 index 00000000..7f452998 --- /dev/null +++ b/plugins/molecule-workflow-triage/commands/triage.md @@ -0,0 +1,50 @@ +--- +name: triage +description: Run a full PR-triage cycle (gates 1-7 + code-review + merge if green). Equivalent to one cron tick, on demand. +--- + +# /triage + +Manual invocation of the hourly PR-triage flow. Use when: +- You want to clear backlog faster than the hourly cadence +- You're testing a change to the triage prompt itself +- A scheduled cron has died and the queue is backing up + +## Steps + +### Step 0 — Activate guards + replay learnings +1. `Skill careful-mode` — load REFUSE/WARN/ALLOW lists. +2. Read last 20 lines of cron-learnings JSONL (workspace memory dir). + +### Step 1 — List +``` +gh pr list --state open --json number,title,author,isDraft,mergeable,statusCheckRollup +gh issue list --state open --json number,title,assignees,labels +``` + +### Step 2 — 7-gate verification per PR +- Gate 1 CI · Gate 2 build · Gate 3 tests · Gate 4 security · Gate 5 design · Gate 6 line review · Gate 7 Playwright if UI +- Supplement A: `Skill code-review` +- Supplement B: `Skill cross-vendor-review` on noteworthy PRs (auth/billing/data-deletion/migration/large-blast-radius) + +### Step 2a — Mechanical fixes only +Fix on-branch + commit `fix(gate-N): ...` + push + poll CI. NEVER fix logic / design / auth issues. + +### Step 2b — Merge +All gates pass + 0 🔴 from code-review + cross-vendor agreement → `gh pr merge N --merge --delete-branch`. Merge-commit only. + +### Step 3 — Docs sync after any merge +`Skill update-docs` — measure test counts, don't guess. + +### Step 4 — Issue pickup (cap 2) +For each candidate: gates I-1..I-6, self-assign, branch, implement, draft PR, run `Skill llm-judge` against issue body + PR diff. Mark ready only if score >= 4. + +### Step 5 — Status report + cron-learnings +Report includes every subsection ("none" if empty). Then append 1-3 lines to cron-learnings JSONL. + +## Standing rules (inviolable) +- Never push to main · Merge-commits only +- careful-mode REFUSE list ALWAYS blocks +- code-review 🔴 ALWAYS blocks merge +- cross-vendor disagreement on noteworthy PR escalates to user +- llm-judge ≤ 2 blocks marking a draft PR ready diff --git a/plugins/molecule-workflow-triage/plugin.yaml b/plugins/molecule-workflow-triage/plugin.yaml new file mode 100644 index 00000000..76154dff --- /dev/null +++ b/plugins/molecule-workflow-triage/plugin.yaml @@ -0,0 +1,11 @@ +name: molecule-workflow-triage +version: 1.0.0 +description: Provides /triage slash command — full PR-triage cycle composing code-review, cross-vendor-review, cron-learnings. Recommends installing molecule-skill-code-review and molecule-skill-cron-learnings first. +author: Molecule AI +tags: [molecule, guardrails] + +runtimes: + - claude_code + +commands: + - triage diff --git a/sdk/python/molecule_plugin/builtins.py b/sdk/python/molecule_plugin/builtins.py index 2b5d5ec0..eeb9a22e 100644 --- a/sdk/python/molecule_plugin/builtins.py +++ b/sdk/python/molecule_plugin/builtins.py @@ -108,6 +108,11 @@ class AgentskillsAdaptor: result.warnings.append("setup.sh timed out (120s)") ctx.logger.warning("%s: setup.sh timed out", self.plugin_name) + # Claude Code layer — hooks/, commands/, settings-fragment.json. + # Mirrors workspace-template/plugins_registry/builtins.py — drift + # guarded by tests/test_plugins_builtins_drift.py. + _install_claude_layer(ctx, result, self.plugin_name) + return result async def uninstall(self, ctx: InstallContext) -> None: @@ -125,3 +130,82 @@ class AgentskillsAdaptor: memory_path.write_text("".join(kept)) + + +# ---------------------------------------------------------------------- +# Claude Code layer — mirrors workspace-template/plugins_registry/builtins.py. +# Drift-guarded by workspace-template/tests/test_plugins_builtins_drift.py. +# ---------------------------------------------------------------------- + +def _install_claude_layer(ctx: InstallContext, result: InstallResult, plugin_name: str) -> None: + claude_dir = ctx.configs_dir / ".claude" + claude_dir.mkdir(parents=True, exist_ok=True) + _copy_dir_files(ctx.plugin_root / "hooks", claude_dir / "hooks", result, executable_suffix=".sh") + _copy_dir_files(ctx.plugin_root / "commands", claude_dir / "commands", result, only_suffix=".md") + _merge_settings_fragment(ctx, claude_dir, result, plugin_name) + + +def _copy_dir_files(src: Path, dst: Path, result: InstallResult, + executable_suffix: str | None = None, + only_suffix: str | None = None) -> None: + if not src.is_dir(): + return + dst.mkdir(parents=True, exist_ok=True) + for f in src.iterdir(): + if not f.is_file(): + continue + if only_suffix and f.suffix != only_suffix: + if not (executable_suffix and f.suffix == ".py"): + continue + target = dst / f.name + shutil.copy2(f, target) + if executable_suffix and f.suffix == executable_suffix: + target.chmod(0o755) + result.files_written.append(str(target.relative_to(target.parents[2]))) + + +def _merge_settings_fragment(ctx: InstallContext, claude_dir: Path, + result: InstallResult, plugin_name: str) -> None: + fragment_path = ctx.plugin_root / "settings-fragment.json" + if not fragment_path.is_file(): + return + try: + fragment = json.loads(fragment_path.read_text()) + except Exception as e: + result.warnings.append(f"settings-fragment.json invalid: {e}") + return + settings_path = claude_dir / "settings.json" + if settings_path.is_file(): + try: + existing = json.loads(settings_path.read_text()) + except Exception: + existing = {} + else: + existing = {} + rewritten = _rewrite_hook_paths(fragment, claude_dir) + merged = _deep_merge_hooks(existing, rewritten) + settings_path.write_text(json.dumps(merged, indent=2) + "\n") + result.files_written.append(str(settings_path.relative_to(ctx.configs_dir))) + ctx.logger.info("%s: merged hook config into %s", plugin_name, settings_path) + + +def _rewrite_hook_paths(fragment: dict, claude_dir: Path) -> dict: + out = json.loads(json.dumps(fragment)) + for handlers in out.get("hooks", {}).values(): + for handler in handlers: + for h in handler.get("hooks", []): + h["command"] = h.get("command", "").replace("${CLAUDE_DIR}", str(claude_dir)) + return out + + +def _deep_merge_hooks(existing: dict, fragment: dict) -> dict: + out = dict(existing) + out.setdefault("hooks", {}) + for event, handlers in fragment.get("hooks", {}).items(): + out["hooks"].setdefault(event, []) + out["hooks"][event].extend(handlers) + for key, val in fragment.items(): + if key == "hooks": + continue + out.setdefault(key, val) + return out diff --git a/workspace-template/plugins_registry/builtins.py b/workspace-template/plugins_registry/builtins.py index 409b7b0f..634d5fb1 100644 --- a/workspace-template/plugins_registry/builtins.py +++ b/workspace-template/plugins_registry/builtins.py @@ -40,6 +40,7 @@ the class into this module. from __future__ import annotations +import json import os import shutil import subprocess @@ -182,6 +183,14 @@ class AgentskillsAdaptor: result.warnings.append("setup.sh timed out (120s)") ctx.logger.warning("%s: setup.sh timed out", self.plugin_name) + # 5. Hooks — copy hooks/* into /.claude/hooks/ (Claude Code- + # style harness hooks). No-op when the plugin doesn't ship any. + # 6. Commands — copy commands/*.md into /.claude/commands/. + # 7. settings-fragment.json — merge into /.claude/settings.json, + # rewriting ${CLAUDE_DIR} to the absolute install path. Existing + # user hooks are preserved (deep-merge by event). + _install_claude_layer(ctx, result, self.plugin_name) + return result # ------------------------------------------------------------------ @@ -213,3 +222,106 @@ class AgentskillsAdaptor: ctx.logger.info("%s: stripped markers from %s", self.plugin_name, ctx.memory_filename) + + +# ---------------------------------------------------------------------- +# Claude Code layer — hooks, slash commands, settings.json fragments. +# Promoted from the molecule-guardrails plugin so any plugin can ship +# these by dropping the right files; no custom adapter needed. +# ---------------------------------------------------------------------- + +def _install_claude_layer(ctx: InstallContext, result: InstallResult, plugin_name: str) -> None: + claude_dir = ctx.configs_dir / ".claude" + claude_dir.mkdir(parents=True, exist_ok=True) + + _copy_dir_files( + ctx.plugin_root / "hooks", + claude_dir / "hooks", + result, + executable_suffix=".sh", + ) + _copy_dir_files( + ctx.plugin_root / "commands", + claude_dir / "commands", + result, + only_suffix=".md", + ) + _merge_settings_fragment(ctx, claude_dir, result, plugin_name) + + +def _copy_dir_files( + src: Path, + dst: Path, + result: InstallResult, + executable_suffix: str | None = None, + only_suffix: str | None = None, +) -> None: + if not src.is_dir(): + return + dst.mkdir(parents=True, exist_ok=True) + for f in src.iterdir(): + if not f.is_file(): + continue + if only_suffix and f.suffix != only_suffix: + # When copying hooks, allow .py companion files alongside .sh + if not (executable_suffix and f.suffix == ".py"): + continue + target = dst / f.name + shutil.copy2(f, target) + if executable_suffix and f.suffix == executable_suffix: + target.chmod(0o755) + result.files_written.append(str(target.relative_to(target.parents[2]))) + + +def _merge_settings_fragment( + ctx: InstallContext, + claude_dir: Path, + result: InstallResult, + plugin_name: str, +) -> None: + fragment_path = ctx.plugin_root / "settings-fragment.json" + if not fragment_path.is_file(): + return + try: + fragment = json.loads(fragment_path.read_text()) + except Exception as e: + result.warnings.append(f"settings-fragment.json invalid: {e}") + return + + settings_path = claude_dir / "settings.json" + if settings_path.is_file(): + try: + existing = json.loads(settings_path.read_text()) + except Exception: + existing = {} + else: + existing = {} + + rewritten = _rewrite_hook_paths(fragment, claude_dir) + merged = _deep_merge_hooks(existing, rewritten) + settings_path.write_text(json.dumps(merged, indent=2) + "\n") + result.files_written.append(str(settings_path.relative_to(ctx.configs_dir))) + ctx.logger.info("%s: merged hook config into %s", plugin_name, settings_path) + + +def _rewrite_hook_paths(fragment: dict, claude_dir: Path) -> dict: + out = json.loads(json.dumps(fragment)) # deep copy via roundtrip + for handlers in out.get("hooks", {}).values(): + for handler in handlers: + for h in handler.get("hooks", []): + cmd = h.get("command", "") + h["command"] = cmd.replace("${CLAUDE_DIR}", str(claude_dir)) + return out + + +def _deep_merge_hooks(existing: dict, fragment: dict) -> dict: + out = dict(existing) + out.setdefault("hooks", {}) + for event, handlers in fragment.get("hooks", {}).items(): + out["hooks"].setdefault(event, []) + out["hooks"][event].extend(handlers) + for key, val in fragment.items(): + if key == "hooks": + continue + out.setdefault(key, val) + return out