From 6e685df1ee191c79e438e13ce7f5d5dc4aec77f1 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 6 May 2026 13:53:22 -0700 Subject: [PATCH] import from local vendored copy (2026-05-06) --- .github/workflows/ci.yml | 5 + .gitignore | 21 +++ .molecule-ci/scripts/requirements.txt | 1 + .molecule-ci/scripts/validate-plugin.py | 52 +++++++ CLAUDE.md | 183 ++++++++++++++++++++++++ README.md | 19 +++ known-issues.md | 54 +++++++ plugin.yaml | 16 +++ runbooks/local-dev-setup.md | 145 +++++++++++++++++++ skills/ai-act-audit-log/SKILL.md | 133 +++++++++++++++++ 10 files changed, 629 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 .molecule-ci/scripts/requirements.txt create mode 100644 .molecule-ci/scripts/validate-plugin.py create mode 100644 CLAUDE.md create mode 100644 README.md create mode 100644 known-issues.md create mode 100644 plugin.yaml create mode 100644 runbooks/local-dev-setup.md create mode 100644 skills/ai-act-audit-log/SKILL.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..c8fb9d3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,5 @@ +name: CI +on: [push, pull_request] +jobs: + validate: + uses: Molecule-AI/molecule-ci/.github/workflows/validate-plugin.yml@main diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2af45b5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# Credentials — never commit. Use .env.example as the template. +.env +.env.local +.env.*.local +.env.* +!.env.example +!.env.sample + +# Private keys + certs +*.pem +*.key +*.crt +*.p12 +*.pfx + +# Secret directories +.secrets/ + +# Workspace auth tokens +.auth-token +.auth_token diff --git a/.molecule-ci/scripts/requirements.txt b/.molecule-ci/scripts/requirements.txt new file mode 100644 index 0000000..3aecde9 --- /dev/null +++ b/.molecule-ci/scripts/requirements.txt @@ -0,0 +1 @@ +pyyaml>=6.0 diff --git a/.molecule-ci/scripts/validate-plugin.py b/.molecule-ci/scripts/validate-plugin.py new file mode 100644 index 0000000..c42e916 --- /dev/null +++ b/.molecule-ci/scripts/validate-plugin.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +"""Validate a Molecule AI plugin repo.""" +import os, sys, yaml + +errors = [] + +# 1. plugin.yaml exists +if not os.path.isfile("plugin.yaml"): + print("::error::plugin.yaml not found at repo root") + sys.exit(1) + +with open("plugin.yaml") as f: + plugin = yaml.safe_load(f) + +# 2. Required fields +for field in ["name", "version", "description"]: + if not plugin.get(field): + errors.append(f"Missing required field: {field}") + +# 3. Version format +v = str(plugin.get("version", "")) +if v and not all(c in "0123456789." for c in v): + errors.append(f"Invalid version format: {v}") + +# 4. Runtimes type +runtimes = plugin.get("runtimes") +if runtimes is not None and not isinstance(runtimes, list): + errors.append(f"runtimes must be a list, got {type(runtimes).__name__}") + +# 5. Has content +content_paths = ["SKILL.md", "hooks", "skills", "rules"] +found = [p for p in content_paths if os.path.exists(p)] +if not found: + errors.append("Plugin must contain at least one of: SKILL.md, hooks/, skills/, rules/") + +# 6. SKILL.md formatting check +if os.path.isfile("SKILL.md"): + with open("SKILL.md") as f: + first_line = f.readline().strip() + if first_line and not first_line.startswith("#"): + print("::warning::SKILL.md should start with a markdown heading (e.g., # Plugin Name)") + +if errors: + for e in errors: + print(f"::error::{e}") + sys.exit(1) + +print(f"✓ plugin.yaml valid: {plugin['name']} v{plugin['version']}") +if found: + print(f" Content: {', '.join(found)}") +if runtimes: + print(f" Runtimes: {', '.join(runtimes)}") diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..5a2bf9a --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,183 @@ +# molecule-audit — EU AI Act Audit Log + +`molecule-audit` is an **immutable append-only JSON Lines (JSONL) audit log** +plugin for EU AI Act compliance (Articles 12/13/17). It wraps +`builtin_tools/audit.py` and is SIEM-friendly, write-only, and opt-in per +workspace. + +**Version:** 1.0.0 +**Runtime:** `langgraph`, `claude_code`, `deepagents` +**Usually paired with:** `molecule-compliance` (runtime OWASP policy) + +--- + +## Repository Layout + +``` +molecule-audit/ +├── plugin.yaml — Plugin manifest +├── skills/ +│ └── ai-act-audit-log/ +│ └── SKILL.md — Full skill documentation +└── builtin_tools/ — (harness-provided, not in this repo) + └── audit.py — Audit log implementation +``` + +--- + +## What Gets Logged + +Events are written to a JSONL file (one JSON object per line): + +| Field | Type | Description | +|---|---|---| +| `timestamp` | ISO 8601 | Event time | +| `event_type` | string | Delegation, approval, RBAC, memory_read, memory_write | +| `workspace_id` | UUID | Workspace that generated the event | +| `actor` | string | Agent or user who triggered the action | +| `action` | string | What was done | +| `resource` | string | Target of the action | +| `outcome` | string | pass / deny / error | +| `trace_id` | string | Platform trace correlation ID | +| `detail` | object | Event-specific extra fields | + +### Event Types + +- **Delegation** — A2A task delegation between workspaces +- **Approval** — Human-in-the-loop gate approval or rejection +- **RBAC** — Role-based access control decision +- **memory_read** — Agent read from persistent memory +- **memory_write** — Agent wrote to persistent memory + +### Anti-Patterns (never do these) + +- Do not write from multiple workspaces to the same log path — this + corrupts the JSONL stream and makes it unreadable. +- Do not truncate logs with `>` instead of `>>` — destroys the append-only + guarantee. +- Do not log raw PII. Scrub tokens, emails, and workspace IDs from + `detail` fields before emission. +- Do not skip OA-01 (bias audit) or OA-03 (explainability) detections. + +--- + +## Configuration + +In workspace `config.yaml`: + +```yaml +audit: + enabled: true + log_path: /workspace/audit/ai-act-log.jsonl + max_size_mb: 100 + retention_days: 90 +``` + +| Setting | Default | Description | +|---|---|---| +| `enabled` | `false` | Opt-in — audit is off by default | +| `log_path` | — | Required when enabled | +| `max_size_mb` | `100` | Rotate when file exceeds this | +| `retention_days` | `90` | Days to retain before archiving | + +--- + +## Log Rotation + +The plugin writes append-only. Use **external logrotate with `copytruncate`** +to avoid breaking the write stream: + +``` +/path/to/ai-act-log.jsonl { + daily + rotate 14 + compress + copytruncate + missingok + notifempty +} +``` + +**Never use `truncate` or `size 0`** — that destroys unwritten buffered events. + +--- + +## SIEM Integration + +The JSONL format is compatible with: + +| SIEM | Ingestion | +|---|---| +| Splunk | `source = /path/to/ai-act-log.jsonl` | +| Elastic (ELK) | Filebeat with `json` codec | +| Datadog | `dd-agent` JSON log files | +| Grafana Loki | `json` label parser | + +--- + +## Development + +### Prerequisites + +- Node.js >= 18 (for markdownlint, if editing `.md` files) +- Python 3.11+ (for YAML validation) +- `gh` CLI authenticated +- Write access to `Molecule-AI/molecule-ai-plugin-molecule-audit` + +### Setup + +```bash +git clone https://github.com/Molecule-AI/molecule-ai-plugin-molecule-audit.git +cd molecule-ai-plugin-molecule-audit + +# Validate plugin.yaml +python3 -c "import yaml; yaml.safe_load(open('plugin.yaml'))" +``` + +### Pre-Commit Checklist + +```bash +# YAML structure +python3 -c "import yaml; yaml.safe_load(open('plugin.yaml'))" + +# Markdown lint (if any .md edited) +npx markdownlint '**/*.md' --ignore node_modules 2>/dev/null || true + +# No credentials in plugin.yaml +python3 -c " +import re, sys +with open('plugin.yaml') as f: + content = f.read() +patterns = [r'sk.ant', r'ghp.', r'AKIA[A-Z0-9]'] +if any(re.search(p, content) for p in patterns): + print('FAIL: possible credentials found') + sys.exit(1) +print('No credentials: OK') +" +``` + +--- + +## Release Process + +1. Review changes: `git log origin/main..HEAD --oneline` +2. Bump `version` in `plugin.yaml` (semver) +3. Update `**Version:**` in this CLAUDE.md if conventions changed +4. Commit: `chore: bump version to X.Y.Z` +5. Tag and push: `git tag vX.Y.Z && git push origin main --tags` +6. Create GitHub Release with changelog + +--- + +## Adding a New Event Type + +1. Define the event schema in `skills/ai-act-audit-log/SKILL.md` +2. Add it to the Event Types table above +3. Ensure `builtin_tools/audit.py` handles the new type (harness-level change) +4. Update SIEM ingestion configs if field structure changes + +--- + +## Known Issues + +See `known-issues.md` at the repo root. diff --git a/README.md b/README.md new file mode 100644 index 0000000..6c46bea --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +# molecule-audit + +Molecule AI plugin. Install via the Molecule AI platform plugin system. + +## Usage + +### In org template (org.yaml) +```yaml +plugins: + - molecule-audit +``` + +### From URL (community install) +``` +github://Molecule-AI/molecule-ai-plugin-molecule-audit +``` + +## License +Business Source License 1.1 — © Molecule AI. diff --git a/known-issues.md b/known-issues.md new file mode 100644 index 0000000..c551f3a --- /dev/null +++ b/known-issues.md @@ -0,0 +1,54 @@ +# Known Issues — molecule-audit + +--- + +## Active Issues + +*(None currently open. This section is updated when issues are filed.)* + +--- + +## Recently Resolved + +*(No recently resolved issues.)* + +--- + +## How to Update This File + +When a new issue is identified: +1. Add it under **Active Issues** using the template below +2. Include: symptom, cause (if known), workaround +3. When fixed, move to **Recently Resolved** and note the fix version + +### Issue Template + +```markdown +## [TICKET-NUMBER] + +**Severity:** P0 / P1 / P2 / P3 +**Status:** Workaround / Fix in progress / Fix available +**Affected versions:** All / vX.Y.Z+ + +**Symptoms:** +**Cause:** +**Workaround:** +**Fix (if available):** +``` + +--- + +## Severity Definitions + +| Level | Description | +|---|---| +| P0 | Audit log fails to write; compliance gap | +| P1 | Log corrupted or unreadable by SIEM | +| P2 | Non-critical event type missing | +| P3 | Cosmetic or documentation issue | + +--- + +## Reporting + +Use the Molecule-AI/internal issue tracker. Tag with `plugin-molecule-audit`. diff --git a/plugin.yaml b/plugin.yaml new file mode 100644 index 0000000..04675af --- /dev/null +++ b/plugin.yaml @@ -0,0 +1,16 @@ +name: molecule-audit +version: 1.0.0 +description: > + Immutable append-only audit log for EU AI Act compliance (Articles 12/13/17). + Wraps builtin_tools/audit.py — JSON Lines format, SIEM-friendly, write-only. + Opt-in per workspace; usually paired with molecule-compliance. +author: Molecule AI +tags: [audit, compliance, eu-ai-act, logging, siem] + +runtimes: + - langgraph + - claude_code + - deepagents + +skills: + - ai-act-audit-log diff --git a/runbooks/local-dev-setup.md b/runbooks/local-dev-setup.md new file mode 100644 index 0000000..328cba3 --- /dev/null +++ b/runbooks/local-dev-setup.md @@ -0,0 +1,145 @@ +# Local Development Setup + +This runbook covers setting up a local development environment for +`molecule-audit`. + +--- + +## Prerequisites + +- Python 3.11+ +- `gh` CLI authenticated +- Write access to `Molecule-AI/molecule-ai-plugin-molecule-audit` + +--- + +## Clone & Bootstrap + +```bash +git clone https://github.com/Molecule-AI/molecule-ai-plugin-molecule-audit.git +cd molecule-ai-plugin-molecule-audit +``` + +--- + +## Validating Plugin Structure + +```bash +# YAML structure validation +python3 -c "import yaml; yaml.safe_load(open('plugin.yaml'))" +echo "plugin.yaml OK" + +# Check all referenced skill paths exist +python3 -c " +import yaml, os +with open('plugin.yaml') as f: + data = yaml.safe_load(f) +for skill in data.get('skills', []): + path = f'skills/{skill}/SKILL.md' + exists = os.path.exists(path) + print(f'[{\"OK\" if exists else \"MISSING\"}] {path}') +" +``` + +--- + +## Testing the Audit Skill Locally + +The `builtin_tools/audit.py` harness wrapper is not in this repo — it is +provided by the Molecule AI platform at runtime. To test the skill locally: + +1. **Install the plugin in a test workspace** via the platform UI or + `molecule-cli`: + ```bash + mol workspace plugin install molecule-audit --workspace + ``` + +2. **Trigger a delegation** in the test workspace and check the log file: + ```bash + cat /path/to/workspace/audit/ai-act-log.jsonl | jq . + ``` + +3. **Validate JSONL integrity**: + ```bash + # Check each line is valid JSON + while IFS= read -r line; do + echo "$line" | python3 -c "import json,sys; json.load(sys.stdin)" 2>/dev/null \ + || echo "INVALID: $line" + done < /path/to/ai-act-log.jsonl + echo "Integrity check complete" + ``` + +--- + +## Simulating a SIEM Export + +To verify your SIEM config is correct without a live SIEM: + +```bash +# Generate a sample log line +python3 -c " +import json, uuid, datetime +event = { + 'timestamp': datetime.datetime.utcnow().isoformat() + 'Z', + 'event_type': 'delegation', + 'workspace_id': str(uuid.uuid4()), + 'actor': 'test-agent', + 'action': 'delegate_task', + 'resource': 'ws-target', + 'outcome': 'pass', + 'trace_id': str(uuid.uuid4()), + 'detail': {'task': 'test', 'async': False} +} +print(json.dumps(event)) +" > /tmp/test-audit.jsonl + +# Verify Splunk/ELK-compatible parsing +python3 -c " +import json +with open('/tmp/test-audit.jsonl') as f: + for i, line in enumerate(f): + obj = json.loads(line) + assert 'timestamp' in obj + assert 'event_type' in obj + assert 'outcome' in obj + print(f'Line {i}: OK — {obj[\"event_type\"]}') +" +``` + +--- + +## Troubleshooting + +### plugin.yaml fails to load + +```bash +python3 -c "import yaml; yaml.safe_load(open('plugin.yaml'))" +# If this throws, your YAML is malformed +``` + +### Audit log file not created + +- Ensure `audit.enabled: true` is set in workspace `config.yaml` +- Check the workspace has write access to the `log_path` directory +- The harness must be providing `builtin_tools/audit.py` — verify + the platform version includes it + +### JSONL is corrupted (one line fails to parse) + +This usually means two workspaces are writing to the same `log_path`. +The append-only stream was opened for writing by more than one process. +Fix: assign each workspace a unique log path. + +### SIEM shows no events after ingestion + +- Confirm the Filebeat/Agent tail is reading the right path +- Check the SIEM has permission to read the log file +- Verify the JSON fields match the SIEM field-mapping config + +--- + +## Related + +- `builtin_tools/audit.py` — the platform-provided audit implementation +- `molecule-compliance` — runtime OWASP policy companion +- `skills/ai-act-audit-log/SKILL.md` — full skill documentation diff --git a/skills/ai-act-audit-log/SKILL.md b/skills/ai-act-audit-log/SKILL.md new file mode 100644 index 0000000..ba48088 --- /dev/null +++ b/skills/ai-act-audit-log/SKILL.md @@ -0,0 +1,133 @@ +--- +name: ai-act-audit-log +description: "Emit immutable audit events for EU AI Act compliance. Use when a workspace performs any action that needs to be legally reconstructable: delegations, approvals, RBAC decisions, memory read/write. JSON Lines, append-only, SIEM-friendly." +--- + +# EU AI Act Audit Log + +Opt-in plugin that activates `builtin_tools/audit.py` — an append-only +JSON Lines log satisfying the record-keeping and transparency obligations +of the EU AI Act (Articles 12, 13, 17) for high-risk AI systems. + +## When to install + +Install on any workspace that: +- Must satisfy EU AI Act conformity assessment +- Needs a tamper-evident trail of agent decisions for a legal discovery +- Pairs with `molecule-compliance` to record OWASP OA-01 detections and + OA-03 terminations + +Skip on disposable dev workspaces — the log fills disk over time and +isn't useful for throwaway agents. + +## Event schema + +Every line is one JSON object: + +```json +{ + "timestamp": "2026-04-15T21:30:00.123Z", + "event_type": "delegation", + "workspace_id": "ws-acme-pm-a1b2c3d4", + "actor": "ws-acme-pm-a1b2c3d4", + "action": "delegate", + "resource": "ws-acme-dev-lead-e5f6g7h8", + "outcome": "allowed", + "trace_id": "5e8b2f3c-9a1d-4e7b-8c6f-1234567890ab" +} +``` + +Required fields: + +| Field | Meaning | +|---|---| +| `timestamp` | ISO-8601 UTC with offset — sort key + freshness indicator | +| `event_type` | `delegation` / `approval` / `memory` / `rbac` | +| `workspace_id` | Who generated the event | +| `actor` | Who triggered the action (defaults to workspace_id for automated events; human identity for approval decisions) | +| `action` | Verb: `delegate`, `approve`, `memory.read`, `memory.write`, `rbac.deny` | +| `resource` | Target of the action: another workspace id, memory scope, approval action string | +| `outcome` | `allowed` / `denied` / `success` / `failure` / `timeout` / `requested` / `granted` | +| `trace_id` | UUID v4 correlating related events across workspaces | + +## Usage + +Call `audit.log_event` from any tool or handler: + +```python +from builtin_tools.audit import log_event + +log_event( + event_type="delegation", + workspace_id=self.workspace_id, + actor=self.workspace_id, + action="delegate", + resource=target_workspace_id, + outcome="allowed", + trace_id=ctx.trace_id, +) +``` + +The function is synchronous and fire-and-forget — it opens the log file +in append mode, writes one line, closes. No buffering, no retry. If the +disk is full the call raises `IOError`; the caller decides whether to +surface that (usually yes — an audit gap is a compliance event itself). + +## Configuration + +Add to `config.yaml`: + +```yaml +audit: + enabled: true + log_path: /var/log/molecule/audit.jsonl + max_size_mb: 100 # informational only; rotation is EXTERNAL + retention_days: 365 # informational only; the module never deletes +``` + +## Rotation (external) + +This module is **write-only by design**. It does not rotate, compress, +or delete log lines. Use the host's `logrotate` (Linux) or equivalent: + +``` +/var/log/molecule/audit.jsonl { + daily + rotate 365 + compress + copytruncate # NOT truncate — copytruncate leaves the file open + missingok + notifempty +} +``` + +`copytruncate` is load-bearing — the Python side holds the file +descriptor open for append, so a rename-based rotation would orphan the +new file and writes would continue to the rotated-away path. + +## SIEM ingestion + +The JSON Lines format is directly consumable by: +- Splunk (ingest via Universal Forwarder) +- Elastic (Filebeat + JSON decoder) +- Datadog (Agent in JSON mode) +- Self-hosted Loki + +One ingestion pipeline per workspace volume. No post-processing needed. + +## Anti-patterns + +- **Don't** write to the same log path from multiple workspaces on the + same host — races corrupt the JSONL newlines. Use per-workspace paths. +- **Don't** truncate or edit the log. Tamper-evidence is the whole point. +- **Don't** log raw PII or secrets in the `resource` or `outcome` fields. + Use IDs or hashes; the audit story and the GDPR story have to coexist. +- **Don't** skip this on OA-01/OA-03 detections — they're exactly the + events an auditor wants to see. + +## Related + +- `builtin_tools/audit.py` — the implementation +- `molecule-compliance` — emits OWASP OA-01 / OA-03 events into this log +- `molecule-security-scan` — emits CVE-scan results into this log +- Issue #256 — the proposal that led to this plugin split