diff --git a/.claude/commands/triage.md b/.claude/commands/triage.md new file mode 100644 index 00000000..a78e2f5a --- /dev/null +++ b/.claude/commands/triage.md @@ -0,0 +1,64 @@ +--- +name: triage +description: Run the hourly PR-triage + issue-pickup + code-review + docs-sync loop. Equivalent to one tick of the c5074cd5 cron, on demand. +--- + +# /triage + +Manual invocation of the same prompt the hourly cron runs at :17 past each hour. Use when: +- You want to clear backlog faster than the hourly cadence +- You're testing a change to the cron prompt itself +- The cron is session-only and the session has ended + +## Steps + +Run the full c5074cd5 cron flow: + +### Step 0 — Activate guards + replay learnings +1. Invoke `Skill careful-mode` — load REFUSE/WARN/ALLOW lists. +2. Read last 20 lines of `~/.claude/projects/-Users-hongming-Documents-GitHub-molecule-monorepo/memory/cron-learnings.jsonl`. + +### Step 1 — List +``` +gh pr list --repo Molecule-AI/molecule-monorepo --state open --json number,title,author,isDraft,mergeable,statusCheckRollup,files +gh issue list --repo Molecule-AI/molecule-monorepo --state open --json number,title,assignees,labels,body +``` + +### Step 2 — 7-gate verification per PR +- Gate 1 CI · Gate 2 build · Gate 3 tests · Gate 4 security · Gate 5 design · Gate 6 line review · Gate 7 Playwright if canvas +- Supplement A: `Skill code-review` on every PR +- Supplement B: `Skill cross-vendor-review` on noteworthy PRs (auth/billing/data-deletion/migration/large-blast-radius) + +### Step 2a — Mechanical fixes only +Fix on-branch + commit `fix(gate-N): ...` + push + poll CI. NEVER fix logic / design / auth issues. + +### Step 2b — Merge +All gates pass + 0 🔴 from code-review + cross-vendor agreement → `gh pr merge N --merge --delete-branch`. Merge-commit only. + +### Step 3 — Docs sync after any merge +`Skill update-docs` — measure test counts, don't guess. Open `docs/sync-YYYY-MM-DD-tick-N` PR, don't merge. + +### Step 4 — Issue pickup (cap 2 per tick) +For each candidate issue: gates I-1..I-6, self-assign, branch, implement, draft PR, run `Skill llm-judge` against issue body + PR diff, mark ready only if score >= 4. + +### Step 5 — Status report + cron-learnings +Report includes every subsection (use "none" if empty): +- Merged: #A, #B +- Fixed + merged: #C (gate-N fix) +- Fixed + awaiting CI: #D +- Skipped-design: #E (🔴 finding) +- Picked up issue #F → draft PR #G (llm-judge: N/5) +- Skipped issue #H (gate I-2) +- Code-review summary: total 🔴/🟡/🔵 +- Cross-vendor pass/escalation +- Docs PR: #K +- Idle reason if nothing to do + +THEN: append 1-3 lines to cron-learnings.jsonl. Terse. Concrete next_action only. + +## Standing rules (inviolable) +- Never push to main · Merge-commits only · Dark theme only · No native browser dialogs · Delegate through PM · Only PM mounts the repo +- careful-mode REFUSE list ALWAYS blocks +- code-review 🔴 ALWAYS blocks merge +- cross-vendor disagreement on noteworthy PR escalates to CEO +- llm-judge ≤ 2 blocks marking a draft PR ready diff --git a/.claude/hooks/_lib.py b/.claude/hooks/_lib.py new file mode 100755 index 00000000..1d0555ac --- /dev/null +++ b/.claude/hooks/_lib.py @@ -0,0 +1,46 @@ +"""Common helpers for Claude Code hooks. Imported by the .py hook scripts. + +Hooks receive JSON on stdin per the Claude Code hook spec, and may emit +JSON on stdout or exit with code 2 to block. This module wraps both. +""" +import json +import sys + + +def read_input() -> dict: + """Parse stdin JSON. Empty input → empty dict.""" + raw = sys.stdin.read().strip() + if not raw: + return {} + try: + return json.loads(raw) + except json.JSONDecodeError: + return {} + + +def emit(payload: dict) -> None: + """Print JSON payload to stdout for the harness to interpret.""" + print(json.dumps(payload)) + + +def deny_pretooluse(reason: str) -> None: + """Emit a PreToolUse denial with reason and exit 0.""" + emit({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": reason, + } + }) + sys.exit(0) + + +def add_context(text: str) -> None: + """Emit additionalContext for SessionStart / UserPromptSubmit hooks.""" + if text and text.strip(): + emit({"additionalContext": text}) + + +def warn_to_stderr(msg: str) -> None: + """Non-blocking warning visible to the next agent turn via stderr.""" + print(msg, file=sys.stderr) diff --git a/.claude/hooks/post-edit-audit.py b/.claude/hooks/post-edit-audit.py new file mode 100755 index 00000000..98a6a379 --- /dev/null +++ b/.claude/hooks/post-edit-audit.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +"""PostToolUse:Edit/Write — append one-line audit record to .claude/audit.jsonl.""" +import datetime as dt +import json +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _lib import read_input, warn_to_stderr # noqa + +REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +AUDIT = os.path.join(REPO, ".claude", "audit.jsonl") + + +def main() -> None: + data = read_input() + target = data.get("tool_input", {}).get("file_path") or data.get("tool_input", {}).get("notebook_path") or "" + if target.startswith(REPO + "/"): + target = target[len(REPO) + 1:] + + record = { + "ts": dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "tool": data.get("tool_name", "unknown"), + "file": target, + "ok": data.get("tool_response", {}).get("success", True), + } + try: + with open(AUDIT, "a") as f: + f.write(json.dumps(record) + "\n") + except Exception: + pass # never block tool execution on audit-write failure + + +if __name__ == "__main__": + try: + main() + except Exception as e: + warn_to_stderr(f"[audit hook error] {e}") + sys.exit(0) diff --git a/.claude/hooks/post-edit-audit.sh b/.claude/hooks/post-edit-audit.sh new file mode 100755 index 00000000..141ca419 --- /dev/null +++ b/.claude/hooks/post-edit-audit.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec python3 "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/post-edit-audit.py" diff --git a/.claude/hooks/pre-bash-careful.py b/.claude/hooks/pre-bash-careful.py new file mode 100755 index 00000000..32b61315 --- /dev/null +++ b/.claude/hooks/pre-bash-careful.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""PreToolUse:Bash — enforce careful-mode patterns on shell commands.""" +import sys +import os +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _lib import read_input, deny_pretooluse, warn_to_stderr # noqa + + +def main() -> None: + data = read_input() + cmd = data.get("tool_input", {}).get("command", "") + if not cmd: + return + + # REFUSE list — hard stops + refuse_patterns = [ + ("git push --force", "main", "git push --force to main is REFUSED. Use --force-with-lease on a feature branch only."), + ("git push -f", "main", "git push -f to main is REFUSED."), + ("git push --force", "master", "git push --force to master is REFUSED."), + ("git push -f", "master", "git push -f to master is REFUSED."), + ] + for needle1, needle2, msg in refuse_patterns: + if needle1 in cmd and needle2 in cmd: + deny_pretooluse(f"careful-mode: {msg}") + + if "git reset --hard" in cmd and ("origin/main" in cmd or " main" in cmd or "/main" in cmd): + deny_pretooluse("careful-mode: git reset --hard against main is REFUSED. Stash, branch, then reset.") + + # SQL DDL/DML against prod-like names + sql_destructive = ["DROP TABLE", "DROP DATABASE", "TRUNCATE TABLE"] + for tok in sql_destructive: + if tok in cmd: + # Allow against test/sandbox patterns + allow_substrings = ["_test", "sandbox", "/tmp/", "_dev", "test_"] + if not any(a in cmd for a in allow_substrings): + deny_pretooluse(f"careful-mode: '{tok}' against production-like schema is REFUSED. Use a migration with explicit review.") + + # rm -rf at scary paths + if "rm -rf" in cmd: + scary = [" /", " ~", " $HOME", "/.git ", "/.git/"] + scratch_ok = ["/tmp/", "node_modules", "dist", ".next", "__pycache__", ".pytest_cache", "coverage"] + if any(s in cmd for s in scary) and not any(s in cmd for s in scratch_ok): + # Check for migrations dir specifically + if "migrations" in cmd: + deny_pretooluse("careful-mode: rm -rf inside a migrations dir is REFUSED.") + deny_pretooluse(f"careful-mode: rm -rf at filesystem root, HOME, or .git is REFUSED. Command: {cmd[:200]}") + if "/.git" in cmd: + deny_pretooluse("careful-mode: rm -rf .git is REFUSED. Re-clone if you need a fresh repo.") + + # WARN list — log but allow + if "git push --force-with-lease" in cmd: + warn_to_stderr("[careful-mode WARN] force-with-lease: safer than --force but still rewrites remote history.") + if "gh pr close" in cmd or "gh issue close" in cmd: + warn_to_stderr("[careful-mode WARN] closing a PR/issue is irreversible from this bot's standpoint. Confirm intent.") + + +if __name__ == "__main__": + try: + main() + except Exception as e: # never break tool execution due to hook bug + warn_to_stderr(f"[careful-mode hook error] {e}") + sys.exit(0) diff --git a/.claude/hooks/pre-bash-careful.sh b/.claude/hooks/pre-bash-careful.sh new file mode 100755 index 00000000..bc152eea --- /dev/null +++ b/.claude/hooks/pre-bash-careful.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# PreToolUse hook for Bash. Enforces careful-mode at the harness level +# rather than relying on the agent to remember. Exit 2 / JSON deny blocks. +exec python3 "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/pre-bash-careful.py" diff --git a/.claude/hooks/pre-edit-freeze.py b/.claude/hooks/pre-edit-freeze.py new file mode 100755 index 00000000..a1a9d335 --- /dev/null +++ b/.claude/hooks/pre-edit-freeze.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +"""PreToolUse:Edit/Write — enforce /freeze scope from .claude/freeze.""" +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _lib import read_input, deny_pretooluse, warn_to_stderr # noqa + +REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +FREEZE = os.path.join(REPO, ".claude", "freeze") + + +def main() -> None: + if not os.path.isfile(FREEZE): + return + with open(FREEZE) as f: + allowed = f.readline().strip() + if not allowed: + return + + data = read_input() + target = data.get("tool_input", {}).get("file_path") or data.get("tool_input", {}).get("notebook_path") or "" + if not target: + return + + # Always allow .claude/ writes (so unfreeze still works) + if "/.claude/" in target or target.endswith("/.claude") or "/.claude" in target: + return + + if allowed in target: + return + + deny_pretooluse( + f"freeze: edit to {target} refused — scope locked to '{allowed}'. " + f"Remove .claude/freeze to unlock." + ) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + warn_to_stderr(f"[freeze hook error] {e}") + sys.exit(0) diff --git a/.claude/hooks/pre-edit-freeze.sh b/.claude/hooks/pre-edit-freeze.sh new file mode 100755 index 00000000..3ad5ce38 --- /dev/null +++ b/.claude/hooks/pre-edit-freeze.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec python3 "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/pre-edit-freeze.py" diff --git a/.claude/hooks/session-start-context.py b/.claude/hooks/session-start-context.py new file mode 100755 index 00000000..8f418f63 --- /dev/null +++ b/.claude/hooks/session-start-context.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +"""SessionStart hook — auto-load recent cron-learnings, freeze status, +and a one-line repo snapshot into Claude's context. +""" +import os +import subprocess +import sys +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _lib import add_context, warn_to_stderr # noqa + +REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +LEARNINGS = os.path.expanduser( + "~/.claude/projects/-Users-hongming-Documents-GitHub-molecule-monorepo/memory/cron-learnings.jsonl" +) +FREEZE = os.path.join(REPO, ".claude", "freeze") + + +def tail(path: str, n: int) -> str: + if not os.path.isfile(path): + return "" + try: + with open(path) as f: + lines = f.readlines() + return "".join(lines[-n:]).rstrip() + except Exception: + return "" + + +def gh_count(args: list) -> str: + try: + out = subprocess.run( + ["gh"] + args + ["--json", "number"], + capture_output=True, text=True, timeout=4, + ) + if out.returncode != 0: + return "?" + import json + return str(len(json.loads(out.stdout or "[]"))) + except Exception: + return "?" + + +def main() -> None: + parts = [] + + learnings = tail(LEARNINGS, 20) + if learnings: + parts.append(f"## Recent cron learnings (last 20)\n{learnings}") + + if os.path.isfile(FREEZE): + try: + with open(FREEZE) as f: + frozen = f.readline().strip() + parts.append(f"## ⚠ FREEZE ACTIVE\nEdits restricted to: {frozen}\nRemove .claude/freeze to unlock.") + except Exception: + pass + + pr = gh_count(["pr", "list", "--repo", "Molecule-AI/molecule-monorepo", "--state", "open"]) + iss = gh_count(["issue", "list", "--repo", "Molecule-AI/molecule-monorepo", "--state", "open"]) + parts.append(f"## Repo state\nOpen PRs: {pr} · Open issues: {iss}") + + if parts: + add_context("\n\n".join(parts)) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + warn_to_stderr(f"[session-start hook error] {e}") + sys.exit(0) diff --git a/.claude/hooks/session-start-context.sh b/.claude/hooks/session-start-context.sh new file mode 100755 index 00000000..f0068a68 --- /dev/null +++ b/.claude/hooks/session-start-context.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec python3 "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/session-start-context.py" diff --git a/.claude/hooks/subagent-stop-judge.py b/.claude/hooks/subagent-stop-judge.py new file mode 100755 index 00000000..b971b7a1 --- /dev/null +++ b/.claude/hooks/subagent-stop-judge.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +"""SubagentStop — optional self-check prompt before accepting subagent output. + +Disabled by default. Enable per-tick with: touch .claude/judge-subagents + +When on, asks the orchestrator to verify the subagent's output addresses +the original task. Cost-free MVP — does NOT call an LLM. Future versions +can plug in an actual llm-judge call gated by a separate toggle. +""" +import json +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _lib import read_input, emit, warn_to_stderr # noqa + +REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +TOGGLE = os.path.join(REPO, ".claude", "judge-subagents") + + +def main() -> None: + if not os.path.isfile(TOGGLE): + return + + data = read_input() + last = data.get("last_assistant_message", "") + agent = data.get("agent_type", "unknown") + if not last or len(last) < 100: + return + + snippet = last[:400].replace("\n", " ") + emit({ + "decision": "block", + "reason": ( + f"subagent-judge: {agent} returned. Before proceeding, re-read its last message " + f"(snippet: {snippet}...) and confirm: did it actually address the original task? " + f"If unsure, re-spawn with a tighter prompt." + ), + }) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + warn_to_stderr(f"[subagent-stop hook error] {e}") + sys.exit(0) diff --git a/.claude/hooks/subagent-stop-judge.sh b/.claude/hooks/subagent-stop-judge.sh new file mode 100755 index 00000000..c170b7ec --- /dev/null +++ b/.claude/hooks/subagent-stop-judge.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec python3 "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/subagent-stop-judge.py" diff --git a/.claude/hooks/user-prompt-tag.py b/.claude/hooks/user-prompt-tag.py new file mode 100755 index 00000000..c74e64df --- /dev/null +++ b/.claude/hooks/user-prompt-tag.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""UserPromptSubmit — inject context warnings for destructive-keyword prompts.""" +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _lib import read_input, add_context, warn_to_stderr # noqa + +PATTERNS = [ + ( + ["force push", "force-push", "git push -f", "--force"], + "Mention of force-push detected. Confirm scope (which branch? to main? careful-mode REFUSES force to main).", + ), + ( + ["delete all", "drop all", "wipe all", "remove all", "clear all"], + "'all'-scoped destructive operation detected. Re-confirm exact target set (which workspaces / which rows / which files) before tooling.", + ), + ( + ["drop table", "truncate", "delete from", "drop database"], + "Direct SQL DDL/DML detected. Use a migration via goose or a parameterized query through platform handlers — not raw psql against prod.", + ), + ( + ["merge directly", "push to main", "commit to main", "directly to main"], + "Mention of working on main detected. Standing rule: never push to main. Use a branch + PR.", + ), +] + +CLOSE_BULK = ["close all", "close every"] +CLOSE_OBJ = ["pr", "issue", "workspace"] + + +def main() -> None: + data = read_input() + prompt = data.get("prompt", "").lower() + if not prompt: + return + + warnings = [] + for needles, msg in PATTERNS: + if any(n in prompt for n in needles): + warnings.append(f"• {msg}") + + if any(b in prompt for b in CLOSE_BULK) and any(o in prompt for o in CLOSE_OBJ): + warnings.append("• Bulk close requested. List the targets first; do NOT loop a close command.") + + if warnings: + add_context( + "## ⚠ Prompt-watchdog warnings\n\n" + + "\n".join(warnings) + + "\n\ncareful-mode applies — re-confirm scope before any destructive tool call." + ) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + warn_to_stderr(f"[prompt-tag hook error] {e}") + sys.exit(0) diff --git a/.claude/hooks/user-prompt-tag.sh b/.claude/hooks/user-prompt-tag.sh new file mode 100755 index 00000000..b5223051 --- /dev/null +++ b/.claude/hooks/user-prompt-tag.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec python3 "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/user-prompt-tag.py" diff --git a/.gitignore b/.gitignore index c738933a..fc5ca426 100644 --- a/.gitignore +++ b/.gitignore @@ -79,6 +79,10 @@ redis_data/ # Claude Code worktrees and runtime artifacts .claude/worktrees/ .claude/scheduled_tasks.lock +.claude/audit.jsonl +.claude/freeze +.claude/judge-subagents +.claude/per-tick-reflections.md # Workspace instance configs (auto-generated by provisioner, not templates) workspace-configs-templates/ws-* diff --git a/CLAUDE.md b/CLAUDE.md index 28544e01..1e97c362 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -14,6 +14,75 @@ overlap / differentiation / terminology-collision notes. Cross-referenced from `PLAN.md` and `README.md`; it's the canonical starting point for "what else is out there." +## Agent operating rules (auto-loaded — read first) + +The following are project-level rules that override default behavior. They +apply to every conversation in this repo, automated cron tick, and every +subagent the orchestrator spawns. + +### Cron / triage discipline + +1. **Always read the most recent cron-learnings before reviewing PRs.** Open + `~/.claude/projects/-Users-hongming-Documents-GitHub-molecule-monorepo/memory/cron-learnings.jsonl`, + read the last 20 lines. Patterns recur — a finding that was a false-positive + last tick is likely a false-positive again. A fix that worked last tick is + likely the fix this tick. The SessionStart hook auto-injects this; read + anyway when starting a triage from the middle of a conversation. + +2. **Treat `docs/sync-*` PRs that touch CLAUDE.md or PLAN.md as ALWAYS + noteworthy.** Those two files are the agent-facing source of truth — a + bad merge there silently corrupts every future triage tick. Run code-review + skill at minimum, ideally cross-vendor-review too. + +3. **After any cron tick, write a 1-line reflection** to + `.claude/per-tick-reflections.md` (gitignored). Format: `2026-MM-DDTHH:MMZ + — what surprised me / what I'd do differently next tick`. This is for + YOUR future self; the cron-learnings JSONL is for the operational pattern + memory. They are distinct. + +### Hooks active in this repo + +The following ambient guardrails fire automatically (configured in +`.claude/settings.json`). When a hook blocks a tool call, the response will +include a `permissionDecisionReason` — read it carefully before retrying. + +| Hook | Event | Effect | +|------|-------|--------| +| `pre-bash-careful.sh` | PreToolUse:Bash | REFUSES `git push --force` to main, `rm -rf` at root/HOME, `DROP TABLE` against prod schema. WARNs on `--force-with-lease`, `gh pr close/issue close`. | +| `pre-edit-freeze.sh` | PreToolUse:Edit/Write | Blocks edits outside the path in `.claude/freeze` if that file exists. Use to lock scope while debugging. | +| `session-start-context.sh` | SessionStart | Auto-loads recent cron-learnings, freeze status, open PR/issue counts. | +| `post-edit-audit.sh` | PostToolUse:Edit/Write | Appends every edit to `.claude/audit.jsonl` (gitignored). | +| `user-prompt-tag.sh` | UserPromptSubmit | Injects warning into context when prompt mentions force-push / drop-table / "delete all" / etc. | +| `subagent-stop-judge.sh` | SubagentStop | Off by default (touch `.claude/judge-subagents` to enable). When on, prompts the orchestrator to verify the subagent's output addresses the original task. | + +### Skills active in this repo + +These are documented in `.claude/skills/*/SKILL.md`. Invoke explicitly via +the `Skill` tool — they are NOT auto-applied. The cron prompt invokes them +at fixed steps; for ad-hoc work, decide if the skill matches your situation: + +- `code-review` — full 16-criteria rubric on a diff +- `cross-vendor-review` — adversarial second-model review (use for noteworthy PRs) +- `careful-mode` — the doc backing the bash hook above +- `cron-learnings` — defines the JSONL format +- `cron-retro` — weekly retrospective generator +- `llm-judge` — score whether a deliverable addresses the request +- `update-docs` — sync repo docs after merges + +### Standing rules (inviolable) + +- Never push directly to main — use feat/fix/chore/docs branches +- Merge-commits only (`gh pr merge --merge`) — never `--squash` / `--rebase` +- Never commit without explicit user approval EXCEPT on: + - Open PR branches you're fixing for a gate + - Issue-pickup branches you opened a draft PR for + - Docs-sync branches + - Main is untouchable without a merge +- Dark theme only (no white/light CSS classes; pre-commit hook enforces) +- No native browser dialogs (`confirm`/`alert`/`prompt`) — use `ConfirmDialog` +- Delegate through PM, never bypass hierarchy +- Only PM mounts the repo (`workspace_dir` bind-mount); other agents get isolated Docker volumes + ## Architecture ```